Full Code of HewlettPackard/quartz for AI

master c22e1aa156a0 cached

92 files

375.4 KB

97.4k tokens

256 symbols

1 requests

Download .txt

Showing preview only (400K chars total). Download the full file or copy to clipboard to get everything.

Repository: HewlettPackard/quartz
Branch: master
Commit: c22e1aa156a0
Files: 92
Total size: 375.4 KB

Directory structure:
gitextract_aunglxr9/

├── AUTHORS
├── CMakeLists.txt
├── Doxyfile
├── README-BENCHMARKS-TESTING.md
├── README.md
├── TODO.dox
├── bench/
│   ├── CMakeLists.txt
│   ├── memlat/
│   │   ├── CMakeLists.txt
│   │   └── memlat.c
│   ├── multilat/
│   │   ├── CMakeLists.txt
│   │   └── multilat.c
│   └── new_memlat/
│       ├── CMakeLists.txt
│       ├── memlat.c
│       └── memlat.sh
├── benchmark-tests/
│   ├── bandwidth-model-building.sh
│   ├── memlat-bench-test-10M-single-socket.sh
│   ├── memlat-bench-test-10M.sh
│   ├── memlat-orig-lat-test-single-socket.sh
│   ├── memlat-orig-lat-test.sh
│   ├── nvmemul-bandwidth.ini
│   ├── nvmemul-debug.ini
│   ├── nvmemul-orig.ini
│   └── nvmemul.ini
├── license.txt
├── nvmemul-orig.ini
├── nvmemul.dox
├── nvmemul.ini
├── scripts/
│   ├── install.sh
│   ├── runenv.sh
│   ├── setupdev.sh
│   └── turboboost.sh
├── src/
│   ├── CMakeLists.txt
│   ├── dev/
│   │   ├── CMakeLists.txt
│   │   ├── Makefile
│   │   ├── ioctl_query.h
│   │   └── pmc.c
│   └── lib/
│       ├── CMakeLists.txt
│       ├── config.c
│       ├── config.h
│       ├── cpu/
│       │   ├── CMakeLists.txt
│       │   ├── cpu.c
│       │   ├── cpu.h
│       │   ├── haswell-papi.h
│       │   ├── haswell.h
│       │   ├── ivybridge-papi.h
│       │   ├── ivybridge.h
│       │   ├── known_cpus.h
│       │   ├── pmc-papi.c
│       │   ├── pmc-papi.h
│       │   ├── pmc.c
│       │   ├── pmc.h
│       │   ├── sandybridge-papi.h
│       │   ├── sandybridge.h
│       │   └── xeon-ex.h
│       ├── debug.c
│       ├── debug.h
│       ├── dev.c
│       ├── dev.h
│       ├── errno.h
│       ├── error.h
│       ├── init.c
│       ├── interpose.c
│       ├── interpose.h
│       ├── measure.h
│       ├── measure_bw.c
│       ├── measure_lat.c
│       ├── misc.c
│       ├── misc.h
│       ├── model.h
│       ├── model_bw.c
│       ├── model_lat.c
│       ├── monotonic_timer.c
│       ├── monotonic_timer.h
│       ├── pflush.c
│       ├── pflush.h
│       ├── pmalloc.c
│       ├── pmalloc.h
│       ├── process_rank.c
│       ├── stat.c
│       ├── stat.h
│       ├── thread.c
│       ├── thread.h
│       ├── topology.c
│       └── topology.h
└── test/
    ├── CMakeLists.txt
    ├── test_dev.cc
    ├── test_interpose.cc
    ├── test_multithread.c
    ├── test_mutex.cc
    ├── test_nvm.c
    ├── test_nvm_remote_dram.c
    └── test_thread.cc

================================================
FILE CONTENTS
================================================

================================================
FILE: AUTHORS
================================================
Haris Volos           (haris.volos@hpe.com)
Guilherme Magalhaes   (guilherme.magalhaes@hpe.com)
Lucy Cherkasova       (lucy.cherkasova@gmail.com)


================================================
FILE: CMakeLists.txt
================================================
cmake_minimum_required(VERSION 2.8)

#add_subdirectory(third_party)
add_subdirectory(src)
add_subdirectory(bench)
enable_testing()
#add_subdirectory(test)


================================================
FILE: Doxyfile
================================================
# Doxyfile 1.4.7

# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project
#
# All text after a hash (#) is considered a comment and will be ignored
# The format is:
#       TAG = value [value, ...]
# For lists items can also be appended using:
#       TAG += value [value, ...]
# Values that contain spaces should be placed between quotes (" ")

#---------------------------------------------------------------------------
# Project related configuration options
#---------------------------------------------------------------------------

# The PROJECT_NAME tag is a single word (or a sequence of words surrounded 
# by quotes) that should identify the project.

PROJECT_NAME           = "Quartz"

# The PROJECT_NUMBER tag can be used to enter a project or revision number. 
# This could be handy for archiving the generated documentation or 
# if some version control system is used.

PROJECT_NUMBER         = 

# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) 
# base path where the generated documentation will be put. 
# If a relative path is entered, it will be relative to the location 
# where doxygen was started. If left blank the current directory will be used.

OUTPUT_DIRECTORY       = ./doc

# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create 
# 4096 sub-directories (in 2 levels) under the output directory of each output 
# format and will distribute the generated files over these directories. 
# Enabling this option can be useful when feeding doxygen a huge amount of 
# source files, where putting all generated files in the same directory would 
# otherwise cause performance problems for the file system.

CREATE_SUBDIRS         = NO

# The OUTPUT_LANGUAGE tag is used to specify the language in which all 
# documentation generated by doxygen is written. Doxygen will use this 
# information to generate all constant output in the proper language. 
# The default language is English, other supported languages are: 
# Brazilian, Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, 
# Dutch, Finnish, French, German, Greek, Hungarian, Italian, Japanese, 
# Japanese-en (Japanese with English messages), Korean, Korean-en, Norwegian, 
# Polish, Portuguese, Romanian, Russian, Serbian, Slovak, Slovene, Spanish, 
# Swedish, and Ukrainian.

OUTPUT_LANGUAGE        = English

# This tag can be used to specify the encoding used in the generated output. 
# The encoding is not always determined by the language that is chosen, 
# but also whether or not the output is meant for Windows or non-Windows users. 
# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES 
# forces the Windows encoding (this is the default for the Windows binary), 
# whereas setting the tag to NO uses a Unix-style encoding (the default for 
# all platforms other than Windows).

USE_WINDOWS_ENCODING   = NO

# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will 
# include brief member descriptions after the members that are listed in 
# the file and class documentation (similar to JavaDoc). 
# Set to NO to disable this.

BRIEF_MEMBER_DESC      = YES

# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend 
# the brief description of a member or function before the detailed description. 
# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the 
# brief descriptions will be completely suppressed.

REPEAT_BRIEF           = YES

# This tag implements a quasi-intelligent brief description abbreviator 
# that is used to form the text in various listings. Each string 
# in this list, if found as the leading text of the brief description, will be 
# stripped from the text and the result after processing the whole list, is 
# used as the annotated text. Otherwise, the brief description is used as-is. 
# If left blank, the following values are used ("$name" is automatically 
# replaced with the name of the entity): "The $name class" "The $name widget" 
# "The $name file" "is" "provides" "specifies" "contains" 
# "represents" "a" "an" "the"

ABBREVIATE_BRIEF       = 

# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then 
# Doxygen will generate a detailed section even if there is only a brief 
# description.

ALWAYS_DETAILED_SEC    = NO

# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all 
# inherited members of a class in the documentation of that class as if those 
# members were ordinary class members. Constructors, destructors and assignment 
# operators of the base classes will not be shown.

INLINE_INHERITED_MEMB  = NO

# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full 
# path before files name in the file list and in the header files. If set 
# to NO the shortest path that makes the file name unique will be used.

FULL_PATH_NAMES        = YES

# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag 
# can be used to strip a user-defined part of the path. Stripping is 
# only done if one of the specified strings matches the left-hand part of 
# the path. The tag can be used to show relative paths in the file list. 
# If left blank the directory from which doxygen is run is used as the 
# path to strip.

STRIP_FROM_PATH        = 

# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of 
# the path mentioned in the documentation of a class, which tells 
# the reader which header file to include in order to use a class. 
# If left blank only the name of the header file containing the class 
# definition is used. Otherwise one should specify the include paths that 
# are normally passed to the compiler using the -I flag.

STRIP_FROM_INC_PATH    = 

# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter 
# (but less readable) file names. This can be useful is your file systems 
# doesn't support long names like on DOS, Mac, or CD-ROM.

SHORT_NAMES            = NO

# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen 
# will interpret the first line (until the first dot) of a JavaDoc-style 
# comment as the brief description. If set to NO, the JavaDoc 
# comments will behave just like the Qt-style comments (thus requiring an 
# explicit @brief command for a brief description.

JAVADOC_AUTOBRIEF      = NO

# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen 
# treat a multi-line C++ special comment block (i.e. a block of //! or /// 
# comments) as a brief description. This used to be the default behaviour. 
# The new default is to treat a multi-line C++ comment block as a detailed 
# description. Set this tag to YES if you prefer the old behaviour instead.

MULTILINE_CPP_IS_BRIEF = NO

# If the DETAILS_AT_TOP tag is set to YES then Doxygen 
# will output the detailed description near the top, like JavaDoc.
# If set to NO, the detailed description appears after the member 
# documentation.

DETAILS_AT_TOP         = NO

# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented 
# member inherits the documentation from any documented member that it 
# re-implements.

INHERIT_DOCS           = YES

# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce 
# a new page for each member. If set to NO, the documentation of a member will 
# be part of the file/class/namespace that contains it.

SEPARATE_MEMBER_PAGES  = NO

# The TAB_SIZE tag can be used to set the number of spaces in a tab. 
# Doxygen uses this value to replace tabs by spaces in code fragments.

TAB_SIZE               = 8

# This tag can be used to specify a number of aliases that acts 
# as commands in the documentation. An alias has the form "name=value". 
# For example adding "sideeffect=\par Side Effects:\n" will allow you to 
# put the command \sideeffect (or @sideeffect) in the documentation, which 
# will result in a user-defined paragraph with heading "Side Effects:". 
# You can put \n's in the value part of an alias to insert newlines.

ALIASES                = 

# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C 
# sources only. Doxygen will then generate output that is more tailored for C. 
# For instance, some of the names that are used will be different. The list 
# of all members will be omitted, etc.

OPTIMIZE_OUTPUT_FOR_C  = NO

# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java 
# sources only. Doxygen will then generate output that is more tailored for Java. 
# For instance, namespaces will be presented as packages, qualified scopes 
# will look different, etc.

OPTIMIZE_OUTPUT_JAVA   = NO

# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to 
# include (a tag file for) the STL sources as input, then you should 
# set this tag to YES in order to let doxygen match functions declarations and 
# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. 
# func(std::string) {}). This also make the inheritance and collaboration 
# diagrams that involve STL classes more complete and accurate.

BUILTIN_STL_SUPPORT    = NO

# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC 
# tag is set to YES, then doxygen will reuse the documentation of the first 
# member in the group (if any) for the other members of the group. By default 
# all members of a group must be documented explicitly.

DISTRIBUTE_GROUP_DOC   = NO

# Set the SUBGROUPING tag to YES (the default) to allow class member groups of 
# the same type (for instance a group of public functions) to be put as a 
# subgroup of that type (e.g. under the Public Functions section). Set it to 
# NO to prevent subgrouping. Alternatively, this can be done per class using 
# the \nosubgrouping command.

SUBGROUPING            = YES

#---------------------------------------------------------------------------
# Build related configuration options
#---------------------------------------------------------------------------

# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in 
# documentation are documented, even if no documentation was available. 
# Private class members and static file members will be hidden unless 
# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES

EXTRACT_ALL            = NO

# If the EXTRACT_PRIVATE tag is set to YES all private members of a class 
# will be included in the documentation.

EXTRACT_PRIVATE        = NO

# If the EXTRACT_STATIC tag is set to YES all static members of a file 
# will be included in the documentation.

EXTRACT_STATIC         = NO

# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) 
# defined locally in source files will be included in the documentation. 
# If set to NO only classes defined in header files are included.

EXTRACT_LOCAL_CLASSES  = YES

# This flag is only useful for Objective-C code. When set to YES local 
# methods, which are defined in the implementation section but not in 
# the interface are included in the documentation. 
# If set to NO (the default) only methods in the interface are included.

EXTRACT_LOCAL_METHODS  = NO

# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all 
# undocumented members of documented classes, files or namespaces. 
# If set to NO (the default) these members will be included in the 
# various overviews, but no documentation section is generated. 
# This option has no effect if EXTRACT_ALL is enabled.

HIDE_UNDOC_MEMBERS     = NO

# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all 
# undocumented classes that are normally visible in the class hierarchy. 
# If set to NO (the default) these classes will be included in the various 
# overviews. This option has no effect if EXTRACT_ALL is enabled.

HIDE_UNDOC_CLASSES     = NO

# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all 
# friend (class|struct|union) declarations. 
# If set to NO (the default) these declarations will be included in the 
# documentation.

HIDE_FRIEND_COMPOUNDS  = NO

# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any 
# documentation blocks found inside the body of a function. 
# If set to NO (the default) these blocks will be appended to the 
# function's detailed documentation block.

HIDE_IN_BODY_DOCS      = NO

# The INTERNAL_DOCS tag determines if documentation 
# that is typed after a \internal command is included. If the tag is set 
# to NO (the default) then the documentation will be excluded. 
# Set it to YES to include the internal documentation.

INTERNAL_DOCS          = NO

# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate 
# file names in lower-case letters. If set to YES upper-case letters are also 
# allowed. This is useful if you have classes or files whose names only differ 
# in case and if your file system supports case sensitive file names. Windows 
# and Mac users are advised to set this option to NO.

CASE_SENSE_NAMES       = YES

# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen 
# will show members with their full class and namespace scopes in the 
# documentation. If set to YES the scope will be hidden.

HIDE_SCOPE_NAMES       = NO

# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen 
# will put a list of the files that are included by a file in the documentation 
# of that file.

SHOW_INCLUDE_FILES     = YES

# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] 
# is inserted in the documentation for inline members.

INLINE_INFO            = YES

# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen 
# will sort the (detailed) documentation of file and class members 
# alphabetically by member name. If set to NO the members will appear in 
# declaration order.

SORT_MEMBER_DOCS       = YES

# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the 
# brief documentation of file, namespace and class members alphabetically 
# by member name. If set to NO (the default) the members will appear in 
# declaration order.

SORT_BRIEF_DOCS        = NO

# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be 
# sorted by fully-qualified names, including namespaces. If set to 
# NO (the default), the class list will be sorted only by class name, 
# not including the namespace part. 
# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
# Note: This option applies only to the class list, not to the 
# alphabetical list.

SORT_BY_SCOPE_NAME     = NO

# The GENERATE_TODOLIST tag can be used to enable (YES) or 
# disable (NO) the todo list. This list is created by putting \todo 
# commands in the documentation.

GENERATE_TODOLIST      = YES

# The GENERATE_TESTLIST tag can be used to enable (YES) or 
# disable (NO) the test list. This list is created by putting \test 
# commands in the documentation.

GENERATE_TESTLIST      = YES

# The GENERATE_BUGLIST tag can be used to enable (YES) or 
# disable (NO) the bug list. This list is created by putting \bug 
# commands in the documentation.

GENERATE_BUGLIST       = YES

# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or 
# disable (NO) the deprecated list. This list is created by putting 
# \deprecated commands in the documentation.

GENERATE_DEPRECATEDLIST= YES

# The ENABLED_SECTIONS tag can be used to enable conditional 
# documentation sections, marked by \if sectionname ... \endif.

ENABLED_SECTIONS       = 

# The MAX_INITIALIZER_LINES tag determines the maximum number of lines 
# the initial value of a variable or define consists of for it to appear in 
# the documentation. If the initializer consists of more lines than specified 
# here it will be hidden. Use a value of 0 to hide initializers completely. 
# The appearance of the initializer of individual variables and defines in the 
# documentation can be controlled using \showinitializer or \hideinitializer 
# command in the documentation regardless of this setting.

MAX_INITIALIZER_LINES  = 30

# Set the SHOW_USED_FILES tag to NO to disable the list of files generated 
# at the bottom of the documentation of classes and structs. If set to YES the 
# list will mention the files that were used to generate the documentation.

SHOW_USED_FILES        = YES

# If the sources in your project are distributed over multiple directories 
# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy 
# in the documentation. The default is NO.

SHOW_DIRECTORIES       = NO

# The FILE_VERSION_FILTER tag can be used to specify a program or script that 
# doxygen should invoke to get the current version for each file (typically from the 
# version control system). Doxygen will invoke the program by executing (via 
# popen()) the command <command> <input-file>, where <command> is the value of 
# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file 
# provided by doxygen. Whatever the program writes to standard output 
# is used as the file version. See the manual for examples.

FILE_VERSION_FILTER    = 

#---------------------------------------------------------------------------
# configuration options related to warning and progress messages
#---------------------------------------------------------------------------

# The QUIET tag can be used to turn on/off the messages that are generated 
# by doxygen. Possible values are YES and NO. If left blank NO is used.

QUIET                  = NO

# The WARNINGS tag can be used to turn on/off the warning messages that are 
# generated by doxygen. Possible values are YES and NO. If left blank 
# NO is used.

WARNINGS               = YES

# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings 
# for undocumented members. If EXTRACT_ALL is set to YES then this flag will 
# automatically be disabled.

WARN_IF_UNDOCUMENTED   = YES

# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for 
# potential errors in the documentation, such as not documenting some 
# parameters in a documented function, or documenting parameters that 
# don't exist or using markup commands wrongly.

WARN_IF_DOC_ERROR      = YES

# This WARN_NO_PARAMDOC option can be abled to get warnings for 
# functions that are documented, but have no documentation for their parameters 
# or return value. If set to NO (the default) doxygen will only warn about 
# wrong or incomplete parameter documentation, but not about the absence of 
# documentation.

WARN_NO_PARAMDOC       = NO

# The WARN_FORMAT tag determines the format of the warning messages that 
# doxygen can produce. The string should contain the $file, $line, and $text 
# tags, which will be replaced by the file and line number from which the 
# warning originated and the warning text. Optionally the format may contain 
# $version, which will be replaced by the version of the file (if it could 
# be obtained via FILE_VERSION_FILTER)

WARN_FORMAT            = "$file:$line: $text"

# The WARN_LOGFILE tag can be used to specify a file to which warning 
# and error messages should be written. If left blank the output is written 
# to stderr.

WARN_LOGFILE           = 

#---------------------------------------------------------------------------
# configuration options related to the input files
#---------------------------------------------------------------------------

# The INPUT tag can be used to specify the files and/or directories that contain 
# documented source files. You may enter file names like "myfile.cpp" or 
# directories like "/usr/src/myproject". Separate the files or directories 
# with spaces.

INPUT                  = nvmemul.dox TODO.dox src/

# If the value of the INPUT tag contains directories, you can use the 
# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
# and *.h) to filter out the source-files in the directories. If left 
# blank the following patterns are tested: 
# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx 
# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py

FILE_PATTERNS          = 

# The RECURSIVE tag can be used to turn specify whether or not subdirectories 
# should be searched for input files as well. Possible values are YES and NO. 
# If left blank NO is used.

RECURSIVE              = YES

# The EXCLUDE tag can be used to specify files and/or directories that should 
# excluded from the INPUT source files. This way you can easily exclude a 
# subdirectory from a directory tree whose root is specified with the INPUT tag.

EXCLUDE                = 

# The EXCLUDE_SYMLINKS tag can be used select whether or not files or 
# directories that are symbolic links (a Unix filesystem feature) are excluded 
# from the input.

EXCLUDE_SYMLINKS       = NO

# If the value of the INPUT tag contains directories, you can use the 
# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude 
# certain files from those directories. Note that the wildcards are matched 
# against the file with absolute path, so to exclude all test directories 
# for example use the pattern */test/*

EXCLUDE_PATTERNS       = 

# The EXAMPLE_PATH tag can be used to specify one or more files or 
# directories that contain example code fragments that are included (see 
# the \include command).

EXAMPLE_PATH           = 

# If the value of the EXAMPLE_PATH tag contains directories, you can use the 
# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp 
# and *.h) to filter out the source-files in the directories. If left 
# blank all files are included.

EXAMPLE_PATTERNS       = 

# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be 
# searched for input files to be used with the \include or \dontinclude 
# commands irrespective of the value of the RECURSIVE tag. 
# Possible values are YES and NO. If left blank NO is used.

EXAMPLE_RECURSIVE      = NO

# The IMAGE_PATH tag can be used to specify one or more files or 
# directories that contain image that are included in the documentation (see 
# the \image command).

IMAGE_PATH             = ./doc/figures

# The INPUT_FILTER tag can be used to specify a program that doxygen should 
# invoke to filter for each input file. Doxygen will invoke the filter program 
# by executing (via popen()) the command <filter> <input-file>, where <filter> 
# is the value of the INPUT_FILTER tag, and <input-file> is the name of an 
# input file. Doxygen will then use the output that the filter program writes 
# to standard output.  If FILTER_PATTERNS is specified, this tag will be 
# ignored.

INPUT_FILTER           = 

# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern 
# basis.  Doxygen will compare the file name with each pattern and apply the 
# filter if there is a match.  The filters are a list of the form: 
# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further 
# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER 
# is applied to all files.

FILTER_PATTERNS        = 

# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using 
# INPUT_FILTER) will be used to filter the input files when producing source 
# files to browse (i.e. when SOURCE_BROWSER is set to YES).

FILTER_SOURCE_FILES    = NO

#---------------------------------------------------------------------------
# configuration options related to source browsing
#---------------------------------------------------------------------------

# If the SOURCE_BROWSER tag is set to YES then a list of source files will 
# be generated. Documented entities will be cross-referenced with these sources. 
# Note: To get rid of all source code in the generated output, make sure also 
# VERBATIM_HEADERS is set to NO.

SOURCE_BROWSER         = YES

# Setting the INLINE_SOURCES tag to YES will include the body 
# of functions and classes directly in the documentation.

INLINE_SOURCES         = NO

# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct 
# doxygen to hide any special comment blocks from generated source code 
# fragments. Normal C and C++ comments will always remain visible.

STRIP_CODE_COMMENTS    = YES

# If the REFERENCED_BY_RELATION tag is set to YES (the default) 
# then for each documented function all documented 
# functions referencing it will be listed.

REFERENCED_BY_RELATION = YES

# If the REFERENCES_RELATION tag is set to YES (the default) 
# then for each documented function all documented entities 
# called/used by that function will be listed.

REFERENCES_RELATION    = YES

# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
# link to the source code.  Otherwise they will link to the documentstion.

REFERENCES_LINK_SOURCE = YES

# If the USE_HTAGS tag is set to YES then the references to source code 
# will point to the HTML generated by the htags(1) tool instead of doxygen 
# built-in source browser. The htags tool is part of GNU's global source 
# tagging system (see http://www.gnu.org/software/global/global.html). You 
# will need version 4.8.6 or higher.

USE_HTAGS              = NO

# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen 
# will generate a verbatim copy of the header file for each class for 
# which an include is specified. Set to NO to disable this.

VERBATIM_HEADERS       = YES

#---------------------------------------------------------------------------
# configuration options related to the alphabetical class index
#---------------------------------------------------------------------------

# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index 
# of all compounds will be generated. Enable this if the project 
# contains a lot of classes, structs, unions or interfaces.

ALPHABETICAL_INDEX     = YES

# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then 
# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns 
# in which this list will be split (can be a number in the range [1..20])

COLS_IN_ALPHA_INDEX    = 5

# In case all classes in a project start with a common prefix, all 
# classes will be put under the same header in the alphabetical index. 
# The IGNORE_PREFIX tag can be used to specify one or more prefixes that 
# should be ignored while generating the index headers.

IGNORE_PREFIX          = 

#---------------------------------------------------------------------------
# configuration options related to the HTML output
#---------------------------------------------------------------------------

# If the GENERATE_HTML tag is set to YES (the default) Doxygen will 
# generate HTML output.

GENERATE_HTML          = YES

# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `html' will be used as the default path.

HTML_OUTPUT            = html

# The HTML_FILE_EXTENSION tag can be used to specify the file extension for 
# each generated HTML page (for example: .htm,.php,.asp). If it is left blank 
# doxygen will generate files with .html extension.

HTML_FILE_EXTENSION    = .html

# The HTML_HEADER tag can be used to specify a personal HTML header for 
# each generated HTML page. If it is left blank doxygen will generate a 
# standard header.

HTML_HEADER            = 

# The HTML_FOOTER tag can be used to specify a personal HTML footer for 
# each generated HTML page. If it is left blank doxygen will generate a 
# standard footer.

HTML_FOOTER            = 

# The HTML_STYLESHEET tag can be used to specify a user-defined cascading 
# style sheet that is used by each HTML page. It can be used to 
# fine-tune the look of the HTML output. If the tag is left blank doxygen 
# will generate a default style sheet. Note that doxygen will try to copy 
# the style sheet file to the HTML output directory, so don't put your own 
# stylesheet in the HTML output directory as well, or it will be erased!

HTML_STYLESHEET        = 

# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, 
# files or namespaces will be aligned in HTML using tables. If set to 
# NO a bullet list will be used.

HTML_ALIGN_MEMBERS     = YES

# If the GENERATE_HTMLHELP tag is set to YES, additional index files 
# will be generated that can be used as input for tools like the 
# Microsoft HTML help workshop to generate a compressed HTML help file (.chm) 
# of the generated HTML documentation.

GENERATE_HTMLHELP      = YES

# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can 
# be used to specify the file name of the resulting .chm file. You 
# can add a path in front of the file if the result should not be 
# written to the html output directory.

CHM_FILE               = 

# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can 
# be used to specify the location (absolute path including file name) of 
# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run 
# the HTML help compiler on the generated index.hhp.

HHC_LOCATION           = 

# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag 
# controls if a separate .chi index file is generated (YES) or that 
# it should be included in the master .chm file (NO).

GENERATE_CHI           = NO

# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag 
# controls whether a binary table of contents is generated (YES) or a 
# normal table of contents (NO) in the .chm file.

BINARY_TOC             = NO

# The TOC_EXPAND flag can be set to YES to add extra items for group members 
# to the contents of the HTML help documentation and to the tree view.

TOC_EXPAND             = YES

# The DISABLE_INDEX tag can be used to turn on/off the condensed index at 
# top of each HTML page. The value NO (the default) enables the index and 
# the value YES disables it.

DISABLE_INDEX          = NO

# This tag can be used to set the number of enum values (range [1..20]) 
# that doxygen will group on one line in the generated HTML documentation.

ENUM_VALUES_PER_LINE   = 4

# If the GENERATE_TREEVIEW tag is set to YES, a side panel will be
# generated containing a tree-like index structure (just like the one that 
# is generated for HTML Help). For this to work a browser that supports 
# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, 
# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are 
# probably better off using the HTML help feature.

GENERATE_TREEVIEW      = YES

# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be 
# used to set the initial width (in pixels) of the frame in which the tree 
# is shown.

TREEVIEW_WIDTH         = 250

#---------------------------------------------------------------------------
# configuration options related to the LaTeX output
#---------------------------------------------------------------------------

# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will 
# generate Latex output.

GENERATE_LATEX         = NO

# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `latex' will be used as the default path.

LATEX_OUTPUT           = latex

# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be 
# invoked. If left blank `latex' will be used as the default command name.

LATEX_CMD_NAME         = latex

# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to 
# generate index for LaTeX. If left blank `makeindex' will be used as the 
# default command name.

MAKEINDEX_CMD_NAME     = makeindex

# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact 
# LaTeX documents. This may be useful for small projects and may help to 
# save some trees in general.

COMPACT_LATEX          = NO

# The PAPER_TYPE tag can be used to set the paper type that is used 
# by the printer. Possible values are: a4, a4wide, letter, legal and 
# executive. If left blank a4wide will be used.

PAPER_TYPE             = a4wide

# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX 
# packages that should be included in the LaTeX output.

EXTRA_PACKAGES         = 

# The LATEX_HEADER tag can be used to specify a personal LaTeX header for 
# the generated latex document. The header should contain everything until 
# the first chapter. If it is left blank doxygen will generate a 
# standard header. Notice: only use this tag if you know what you are doing!

LATEX_HEADER           = 

# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated 
# is prepared for conversion to pdf (using ps2pdf). The pdf file will 
# contain links (just like the HTML output) instead of page references 
# This makes the output suitable for online browsing using a pdf viewer.

PDF_HYPERLINKS         = NO

# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of 
# plain latex in the generated Makefile. Set this option to YES to get a 
# higher quality PDF documentation.

USE_PDFLATEX           = NO

# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. 
# command to the generated LaTeX files. This will instruct LaTeX to keep 
# running if errors occur, instead of asking the user for help. 
# This option is also used when generating formulas in HTML.

LATEX_BATCHMODE        = NO

# If LATEX_HIDE_INDICES is set to YES then doxygen will not 
# include the index chapters (such as File Index, Compound Index, etc.) 
# in the output.

LATEX_HIDE_INDICES     = NO

#---------------------------------------------------------------------------
# configuration options related to the RTF output
#---------------------------------------------------------------------------

# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output 
# The RTF output is optimized for Word 97 and may not look very pretty with 
# other RTF readers or editors.

GENERATE_RTF           = NO

# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `rtf' will be used as the default path.

RTF_OUTPUT             = rtf

# If the COMPACT_RTF tag is set to YES Doxygen generates more compact 
# RTF documents. This may be useful for small projects and may help to 
# save some trees in general.

COMPACT_RTF            = NO

# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated 
# will contain hyperlink fields. The RTF file will 
# contain links (just like the HTML output) instead of page references. 
# This makes the output suitable for online browsing using WORD or other 
# programs which support those fields. 
# Note: wordpad (write) and others do not support links.

RTF_HYPERLINKS         = NO

# Load stylesheet definitions from file. Syntax is similar to doxygen's 
# config file, i.e. a series of assignments. You only have to provide 
# replacements, missing definitions are set to their default value.

RTF_STYLESHEET_FILE    = 

# Set optional variables used in the generation of an rtf document. 
# Syntax is similar to doxygen's config file.

RTF_EXTENSIONS_FILE    = 

#---------------------------------------------------------------------------
# configuration options related to the man page output
#---------------------------------------------------------------------------

# If the GENERATE_MAN tag is set to YES (the default) Doxygen will 
# generate man pages

GENERATE_MAN           = NO

# The MAN_OUTPUT tag is used to specify where the man pages will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `man' will be used as the default path.

MAN_OUTPUT             = man

# The MAN_EXTENSION tag determines the extension that is added to 
# the generated man pages (default is the subroutine's section .3)

MAN_EXTENSION          = .3

# If the MAN_LINKS tag is set to YES and Doxygen generates man output, 
# then it will generate one additional man file for each entity 
# documented in the real man page(s). These additional files 
# only source the real man page, but without them the man command 
# would be unable to find the correct page. The default is NO.

MAN_LINKS              = NO

#---------------------------------------------------------------------------
# configuration options related to the XML output
#---------------------------------------------------------------------------

# If the GENERATE_XML tag is set to YES Doxygen will 
# generate an XML file that captures the structure of 
# the code including all documentation.

GENERATE_XML           = NO

# The XML_OUTPUT tag is used to specify where the XML pages will be put. 
# If a relative path is entered the value of OUTPUT_DIRECTORY will be 
# put in front of it. If left blank `xml' will be used as the default path.

XML_OUTPUT             = xml

# The XML_SCHEMA tag can be used to specify an XML schema, 
# which can be used by a validating XML parser to check the 
# syntax of the XML files.

XML_SCHEMA             = 

# The XML_DTD tag can be used to specify an XML DTD, 
# which can be used by a validating XML parser to check the 
# syntax of the XML files.

XML_DTD                = 

# If the XML_PROGRAMLISTING tag is set to YES Doxygen will 
# dump the program listings (including syntax highlighting 
# and cross-referencing information) to the XML output. Note that 
# enabling this will significantly increase the size of the XML output.

XML_PROGRAMLISTING     = YES

#---------------------------------------------------------------------------
# configuration options for the AutoGen Definitions output
#---------------------------------------------------------------------------

# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will 
# generate an AutoGen Definitions (see autogen.sf.net) file 
# that captures the structure of the code including all 
# documentation. Note that this feature is still experimental 
# and incomplete at the moment.

GENERATE_AUTOGEN_DEF   = NO

#---------------------------------------------------------------------------
# configuration options related to the Perl module output
#---------------------------------------------------------------------------

# If the GENERATE_PERLMOD tag is set to YES Doxygen will 
# generate a Perl module file that captures the structure of 
# the code including all documentation. Note that this 
# feature is still experimental and incomplete at the 
# moment.

GENERATE_PERLMOD       = NO

# If the PERLMOD_LATEX tag is set to YES Doxygen will generate 
# the necessary Makefile rules, Perl scripts and LaTeX code to be able 
# to generate PDF and DVI output from the Perl module output.

PERLMOD_LATEX          = NO

# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be 
# nicely formatted so it can be parsed by a human reader.  This is useful 
# if you want to understand what is going on.  On the other hand, if this 
# tag is set to NO the size of the Perl module output will be much smaller 
# and Perl will parse it just the same.

PERLMOD_PRETTY         = YES

# The names of the make variables in the generated doxyrules.make file 
# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. 
# This is useful so different doxyrules.make files included by the same 
# Makefile don't overwrite each other's variables.

PERLMOD_MAKEVAR_PREFIX = 

#---------------------------------------------------------------------------
# Configuration options related to the preprocessor   
#---------------------------------------------------------------------------

# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will 
# evaluate all C-preprocessor directives found in the sources and include 
# files.

ENABLE_PREPROCESSING   = YES

# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro 
# names in the source code. If set to NO (the default) only conditional 
# compilation will be performed. Macro expansion can be done in a controlled 
# way by setting EXPAND_ONLY_PREDEF to YES.

MACRO_EXPANSION        = NO

# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES 
# then the macro expansion is limited to the macros specified with the 
# PREDEFINED and EXPAND_AS_DEFINED tags.

EXPAND_ONLY_PREDEF     = NO

# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files 
# in the INCLUDE_PATH (see below) will be search if a #include is found.

SEARCH_INCLUDES        = YES

# The INCLUDE_PATH tag can be used to specify one or more directories that 
# contain include files that are not input files but should be processed by 
# the preprocessor.

INCLUDE_PATH           = 

# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard 
# patterns (like *.h and *.hpp) to filter out the header-files in the 
# directories. If left blank, the patterns specified with FILE_PATTERNS will 
# be used.

INCLUDE_FILE_PATTERNS  = 

# The PREDEFINED tag can be used to specify one or more macro names that 
# are defined before the preprocessor is started (similar to the -D option of 
# gcc). The argument of the tag is a list of macros of the form: name 
# or name=definition (no spaces). If the definition and the = are 
# omitted =1 is assumed. To prevent a macro definition from being 
# undefined via #undef or recursively expanded use the := operator 
# instead of the = operator.

PREDEFINED             = 

# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then 
# this tag can be used to specify a list of macro names that should be expanded. 
# The macro definition that is found in the sources will be used. 
# Use the PREDEFINED tag if you want to use a different macro definition.

EXPAND_AS_DEFINED      = 

# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then 
# doxygen's preprocessor will remove all function-like macros that are alone 
# on a line, have an all uppercase name, and do not end with a semicolon. Such 
# function macros are typically used for boiler-plate code, and will confuse 
# the parser if not removed.

SKIP_FUNCTION_MACROS   = YES

#---------------------------------------------------------------------------
# Configuration::additions related to external references   
#---------------------------------------------------------------------------

# The TAGFILES option can be used to specify one or more tagfiles. 
# Optionally an initial location of the external documentation 
# can be added for each tagfile. The format of a tag file without 
# this location is as follows: 
#   TAGFILES = file1 file2 ... 
# Adding location for the tag files is done as follows: 
#   TAGFILES = file1=loc1 "file2 = loc2" ... 
# where "loc1" and "loc2" can be relative or absolute paths or 
# URLs. If a location is present for each tag, the installdox tool 
# does not have to be run to correct the links.
# Note that each tag file must have a unique name
# (where the name does NOT include the path)
# If a tag file is not located in the directory in which doxygen 
# is run, you must also specify the path to the tagfile here.

TAGFILES               = 

# When a file name is specified after GENERATE_TAGFILE, doxygen will create 
# a tag file that is based on the input files it reads.

GENERATE_TAGFILE       = 

# If the ALLEXTERNALS tag is set to YES all external classes will be listed 
# in the class index. If set to NO only the inherited external classes 
# will be listed.

ALLEXTERNALS           = NO

# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed 
# in the modules index. If set to NO, only the current project's groups will 
# be listed.

EXTERNAL_GROUPS        = YES

# The PERL_PATH should be the absolute path and name of the perl script 
# interpreter (i.e. the result of `which perl').

PERL_PATH              = /usr/bin/perl

#---------------------------------------------------------------------------
# Configuration options related to the dot tool   
#---------------------------------------------------------------------------

# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will 
# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base 
# or super classes. Setting the tag to NO turns the diagrams off. Note that 
# this option is superseded by the HAVE_DOT option below. This is only a 
# fallback. It is recommended to install and use dot, since it yields more 
# powerful graphs.

CLASS_DIAGRAMS         = YES

# If set to YES, the inheritance and collaboration graphs will hide 
# inheritance and usage relations if the target is undocumented 
# or is not a class.

HIDE_UNDOC_RELATIONS   = YES

# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is 
# available from the path. This tool is part of Graphviz, a graph visualization 
# toolkit from AT&T and Lucent Bell Labs. The other options in this section 
# have no effect if this option is set to NO (the default)

HAVE_DOT               = NO

# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for each documented class showing the direct and 
# indirect inheritance relations. Setting this tag to YES will force the 
# the CLASS_DIAGRAMS tag to NO.

CLASS_GRAPH            = YES

# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for each documented class showing the direct and 
# indirect implementation dependencies (inheritance, containment, and 
# class references variables) of the class with other documented classes.

COLLABORATION_GRAPH    = YES

# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen 
# will generate a graph for groups, showing the direct groups dependencies

GROUP_GRAPHS           = YES

# If the UML_LOOK tag is set to YES doxygen will generate inheritance and 
# collaboration diagrams in a style similar to the OMG's Unified Modeling 
# Language.

UML_LOOK               = NO

# If set to YES, the inheritance and collaboration graphs will show the 
# relations between templates and their instances.

TEMPLATE_RELATIONS     = NO

# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT 
# tags are set to YES then doxygen will generate a graph for each documented 
# file showing the direct and indirect include dependencies of the file with 
# other documented files.

INCLUDE_GRAPH          = YES

# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and 
# HAVE_DOT tags are set to YES then doxygen will generate a graph for each 
# documented header file showing the documented files that directly or 
# indirectly include this file.

INCLUDED_BY_GRAPH      = YES

# If the CALL_GRAPH and HAVE_DOT tags are set to YES then doxygen will 
# generate a call dependency graph for every global function or class method. 
# Note that enabling this option will significantly increase the time of a run. 
# So in most cases it will be better to enable call graphs for selected 
# functions only using the \callgraph command.

CALL_GRAPH             = NO

# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then doxygen will 
# generate a caller dependency graph for every global function or class method. 
# Note that enabling this option will significantly increase the time of a run. 
# So in most cases it will be better to enable caller graphs for selected 
# functions only using the \callergraph command.

CALLER_GRAPH           = NO

# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen 
# will graphical hierarchy of all classes instead of a textual one.

GRAPHICAL_HIERARCHY    = YES

# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES 
# then doxygen will show the dependencies a directory has on other directories 
# in a graphical way. The dependency relations are determined by the #include
# relations between the files in the directories.

DIRECTORY_GRAPH        = YES

# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images 
# generated by dot. Possible values are png, jpg, or gif
# If left blank png will be used.

DOT_IMAGE_FORMAT       = png

# The tag DOT_PATH can be used to specify the path where the dot tool can be 
# found. If left blank, it is assumed the dot tool can be found in the path.

DOT_PATH               = 

# The DOTFILE_DIRS tag can be used to specify one or more directories that 
# contain dot files that are included in the documentation (see the 
# \dotfile command).

DOTFILE_DIRS           = 

# The MAX_DOT_GRAPH_WIDTH tag can be used to set the maximum allowed width 
# (in pixels) of the graphs generated by dot. If a graph becomes larger than 
# this value, doxygen will try to truncate the graph, so that it fits within 
# the specified constraint. Beware that most browsers cannot cope with very 
# large images.

MAX_DOT_GRAPH_WIDTH    = 1024

# The MAX_DOT_GRAPH_HEIGHT tag can be used to set the maximum allows height 
# (in pixels) of the graphs generated by dot. If a graph becomes larger than 
# this value, doxygen will try to truncate the graph, so that it fits within 
# the specified constraint. Beware that most browsers cannot cope with very 
# large images.

MAX_DOT_GRAPH_HEIGHT   = 1024

# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the 
# graphs generated by dot. A depth value of 3 means that only nodes reachable 
# from the root by following a path via at most 3 edges will be shown. Nodes 
# that lay further from the root node will be omitted. Note that setting this 
# option to 1 or 2 may greatly reduce the computation time needed for large 
# code bases. Also note that a graph may be further truncated if the graph's 
# image dimensions are not sufficient to fit the graph (see MAX_DOT_GRAPH_WIDTH 
# and MAX_DOT_GRAPH_HEIGHT). If 0 is used for the depth value (the default), 
# the graph is not depth-constrained.

MAX_DOT_GRAPH_DEPTH    = 0

# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent 
# background. This is disabled by default, which results in a white background. 
# Warning: Depending on the platform used, enabling this option may lead to 
# badly anti-aliased labels on the edges of a graph (i.e. they become hard to 
# read).

DOT_TRANSPARENT        = NO

# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output 
# files in one run (i.e. multiple -o and -T options on the command line). This 
# makes dot run faster, but since only newer versions of dot (>1.8.10) 
# support this, this feature is disabled by default.

DOT_MULTI_TARGETS      = NO

# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will 
# generate a legend page explaining the meaning of the various boxes and 
# arrows in the dot generated graphs.

GENERATE_LEGEND        = YES

# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will 
# remove the intermediate dot files that are used to generate 
# the various graphs.

DOT_CLEANUP            = YES

#---------------------------------------------------------------------------
# Configuration::additions related to the search engine   
#---------------------------------------------------------------------------

# The SEARCHENGINE tag specifies whether or not a search engine should be 
# used. If set to NO the values of all tags below this one will be ignored.

SEARCHENGINE           = NO


================================================
FILE: README-BENCHMARKS-TESTING.md
================================================
**For testing whether your environment is configured correctly for
running Quartz** (e.g., whether you set all the required environmental
variables, etc.) **we have created a few scripts with benchmarks, which
can be executed automatically** and which can provide you with a
feedback on Quartz performance in your environment.

**The directory with these scripts is called: *benchmark-tests*. There are three scripts which you can run:**
- **bandwidth-model-building.sh**

   This script will execute for approximately **10 min** and will build a memory
   bandwidth model that can be used in the experiments with memory bandwidth
   throttling. The configuration file uses a "debug" mode on purpose -- that
   you can see the messages on the screen about the progress of the memory
   bandwidth  model building, which can be found at */tmp/bandwidth_model*

- **memlat-orig-lat-test.sh**

    This script will measure your server hardware *memory access latency* in nanoseconds: local
    and remote (for two sockets servers).  It will execute the test 20 times, and   write the results in directory *ORIG-lat-test*.
    You can find the summary of the results in the file *ORIG-lat-test/final-hw-latency.txt*.
    It will have measurements like:
    
               FORMAT:  1_min_local  2_aver_local  3_max_local  4_min_remote  5_aver_remote  6_max_remote
                           91             91.9           92           152        163.9           176
   
    First three numbers show: minimal, average and maximum measured local
    memory access latency (in ns, over 20 measurements). The last three numbers
    show show similar measurements for  access latency of the remote memory,
    i.e., in the second socket.

-  **memlat-bench-test-10M.sh**

    This script will execute memlat benchmark (pointer-chasing benchmark) with
    nine emulated memory access latencies: 200 ns, 300 ns,..., 1000 ns.
    It will run the benchmark with these emulated latencies in two settings:
    in the local socket (.i.e., emulating a higher memory access latency in the
    local socket) and similarly, in the remote socket.
    Each test is repeated 10 times: this is used for assessing the variability
    of  your environment. In some cases, we had issues with TurboBoost mode, \
    which did impact the quality of the emulation...
    This test might take **approx. 30 min to finish** (since it executes 180 tests),
    and will create two output directories:  *FULL-RESULTS-test*  and
    *SUMMARY-RESULTS-test*
    In the directory SUMMARY-RESULTS-test, you will find two files that
    summarize the outcome of the experiments in the local and remote sockets.
    The outcome should look like this:
    
          FORMAT: 1_emul_lat  2_min_meas_lat  3_aver_meas_lat  4_max_meas_lat  5_aver_error(%) 6_max_error(%)
                   200           177            197.9             204              1.05            11.5
                   300           259            289.5             300              3.5             13.6  
                   400           354            382.6             395              4.3             11.5
                   500           468            485.8             490              2.8             6.4
                   600           554            575.3             585              4.1             7.6
                   700           640            666.6             681              4.7             8.5
                   800           749            766.4             776              4.2             6.3
                   900           851            866.2             871              3.7             5.4
                   1000          926            956.5             966              4.35            7.4
    
          The format is the following:
          1st column:    emulated latency (in nanoseconds)
          2nd column:    minimum measured  latency (across 10 tests, in ns)
          3d column:     average measured  latency (across 10 tests, in ns)
          4th column:    maximum measured  latency (across 10 tests, in ns)
          5th column:    average error (between emulated and measured latencies, in %)
          6th column:    max error (between emulated and measured latencies, in %)

One of the goals of the designed performance emulator is to provide a
framework for application sensitivity studies under different
latencies and memory bw. Even if you have 15% deviation (error) from
the targeted emulated latencies, but the benchmark measurements are
consistent -- this is a good sign that you can perform a good
sensitivity study.


================================================
FILE: README.md
================================================

Quartz: A DRAM-based performance emulator for NVM
----------------------

Quartz leverages features available in commodity hardware to emulate
different latency and bandwidth characteristics of future
byte-addressable NVM technologies.

Quartz's design, implementation details, evaluation, and overhead  can be found 
in the following research paper:
 - **H. Volos, G. Magalhaes, L. Cherkasova, J. Li: Quartz: A Lightweight 
   Performance Emulator for Persistent Memory Software. In Proc. of the 
   16th ACM/IFIP/USENIX International Middleware Conference, (Middleware'2015),
   Vancouver, Canada, December 8-11, 2015.  and can be downloaded from:
   http://www.jahrhundert.net/papers/middleware2015.pdf**

While the emulator is designed to cover three processor families:
*Sandy Bridge, Ivy Bridge*, and *Haswell* -- we have had the best results
on the *Ivy Bridge* platform. Haswell processor has a TurboBoost feature
that cause higher variance and deviations when emulating higher range
latencies (above 600 ns).

Contributors
----------------------
For a list of contributors see [AUTHORS](https://github.hpe.com/labs/quartz/blob/master/AUTHORS). 

Extended documentation
----------------------
Extended documentation available in Doxygen form. To build and view:

    doxygen
    xdg-open doc/html/index.html


Dependencies
------------
This is the list of libraries and tools used by Quartz:

On RPM based distributions:
- cmake 2.8
- libconfig and libconfig-devel
- numactl-devel
- uthash-devel
- kernel-devel

On Debian based distributions:
- cmake 2.8
- libconfig-dev
- libnuma-dev
- uthash-dev
- linux-headers

You can run 'sudo scripts/install.sh' in order to automatically install these 
dependencies.


Supported environment
---------------------
Currently the latency emulator can be used on Linux with *Sandy Bridge, 
Ivy Bridge*, and *Haswell* Intel processors. For bandwidth emulation support, Intel 
Thermal Memory Controller device is required.
No specific Linux distribution or kernel version is required.


Source code tree overview
-------------------------

    bench             Benchmarks
    doc               Documentation, including Doxygen generated documentation (doc/html)
    src/lib           Emulator main library code
    src/dev           Kernel-module for accessing performance counters and 
                      memory-controller PCI registers
    scripts           Helper scripts to run a program using the emulator and install 
                      dependencies
    test              Several tests and application code examples
    benchmark-tests   Several automated tests with benchmark runs and output analysis 
                      for testing the correctness of configured emulation environment and 
                      the accuracy of expected results

For more details, please see the extended documentation generated using Doxygen.

Building
--------
After installing the dependencies, go to the emulator's source code root folder 
and execute the following steps:

    mkdir build
    cd build
    cmake ..
    make clean all

In order to disable statistics support, replace the third step above with:

    cmake .. -DSTATISTICS=OFF
See more details about statistics on the respective section below.
The emulator library, benchmark and test binaries resulted from the build 
process will be available in the respective subfolder inside the 'build' folder.


Usage
-----
First, load the emulator's kernel module. From the emulator's source code root 
folder, execute:

    sudo scripts/setupdev.sh load

Set your processor to run at maximum frequency to ensure fixed cycle 
rate (as the cycle counter is used to project delay time). You can 
use the scaling governor:

    echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

Set the LD_PRELOAD and NVMEMUL_INI environment variables to point respectively 
to the emulators library and the configuration file to be used. The LD_PRELOAD 
is used for automatically loading the emulator's library when the user 
application is executed. Thus, there is no need to statically link the library 
to the user application. See below details about the configuration file in the 
respective section.

Rather than configuring the scaling governor and the environment variables 
manually as indicated above, you can use the scripts/runenv.sh script. See 
below.

An additional configuration step may be required depending on the Linux Kernel
version. This emulator makes use of rdpmc x86 instruction to read CPU counters.
Before kernel 4.0, when rdpmc support was enabled, any process (not just ones
with an active perf event) could use the rdpmc instruction to access the counters.
Starting with Linux 4.0 rdpmc support is only allowed if an event is currently
enabled in a process's context. To restore the old behavior, write the value 2
to /sys/devices/cpu/rdpmc if kernel version is 4.0 or greater:

    echo 2 | sudo tee /sys/devices/cpu/rdpmc

Run your application:

    scripts/runenv.sh <your_app>

The runenv.sh script runs an application in a new shell environment that
properly sets LD_PRELOAD to the library available in the build folder. We do
not modify the current shell environment to avoid getting other applications 
interposed by the emulator unexpectedly. 

Alternatively, you may directly link 
the library to your application but the nvmemul library must come first in the 
linking order to ensure we properly interpose on necessary functions.
Additionally, this script sets the NVMEMUL_INI environment variable to point
to the nvmemul.ini configuration file available in the emulator's source code 
root folder.


Configuration file
------------------
Emulator runtime parameters can be defined in a configuration file. 

The default path is ./nvmemul.ini but you may change the path through the 
environment variable $NVMEMUL_INI (see scripts/runenv.sh).

The main available parameters are:

    - Latency:
      enable                  True means the latency emulation is on, false,
                              the latency emulation is disabled.
      inject_delay            True means the delay injection is on, false,
                              the emulator will skip the delay injection
      read                    The target read latency in nano seconds. It must 
                              be greater than the hardware latency. This value
                              is automatically consisted by the emulator.
      write                   The target write latency in nano seconds. It must 
                              be greater than the hardware latency. This value
                              is automatically consisted by the emulator.
      max_epoch_duration_us   This is the epoch duration in micro seconds. 
                              Eventually an epoch may be greater than this value
                              depending on signal delivery managed by Kernel.
      min_epoch_duration_us   The minimum epoch duration. 
    - Bandwidth:
      enable                  True means the bandwidth emulation is on, false, 
                              it is disabled.
      model                   File path used by the emulator to cache the 
                              detected hardware bandwidth characteristics.
      read                    Target read bandwidth in MB/s.
      write                   Target write bandwidth in MB/s;
    - Topology:
      mc_pci                  File path used by the emulator to cache the PCI 
                              bus topology. It is not required if bandwidth 
                              emulation is disabled.
      physical_nodes          List all CPU sockets ids to be added to the known
                              topology. An odd number of CPU sockets means it
                              will not be possible to configure all CPUs in
                              pairs and then a single CPU will be used as NVM
                              only. See Emulation modes section below.
    - Statistics:
      enable                  True means the statistics collection and report is
                              enable, false, it is disable. See the Statistics
                              section below.
      file                    File path used by the emulator to write the 
                              statistics report. If not provided, emulator will 
                              use stdout.
    - Debug:
      level                   Shows debugging message with level up to this 
                              value, the greater this value is, the more verbose 
                              the debug log will be.
                              0: off; 1: critical; 2: error; 3: warning; 4: info;
                              5: debugging.
      verbose                 If greater than zero shows source code information
                              along with the debugging message.


Latency emulation modes
-----------------------
The emulator may run application threads on a *NVM only* mode or *DRAM+NVM* mode.
It depends if the system has more than one CPU socket and if the topology 
configuration enables multiple CPU socket.

For *NVM only* mode, the emulator will use a CPU socket with no sibling node and
make use of the DRAM available in that socket to emulate NVM. Any DRAM memory 
access on this socket will produce delays injection to emulate the target 
latency.

For *DRAM+NVM* mode, the emulator will differentiate DRAM from virtual NVM 
latencies. It is supported only on IvyBridge, Haswell (and higher) Intel processor 
systems with 2 CPU sockets or more. A proper configuration as mentioned above and 
explicit calls to NVM memory allocation in the application’s source code is required.
- The emulator will bind application threads to node 0 CPU and DRAM. The 
 other CPU socket will not be used for application threads and the DRAM 
from this second socket will be used as virtual NVM;
- The application must explicitly allocate virtual NVRAM memory using 
pmalloc(size) and pfree(pointer, size) API provided by the emulator. 

See the NVM programming section below.


NVM programming
---------------
The emulator provides an API for allocating and deallocating memory from NVM
space. It is possible to use this API on both NVM only and DRAM+NVM modes. 
However, it is really required to use this API in the DRAM+NVM mode so the 
emulator can clearly differentiate DRAM from NVM memory access latencies.
This is the API available for user applications:

    void *pmalloc(size_t size);
    void pfree(void *start, size_t size);

The application can include the NVM_EMUL/src/lib/pmalloc.h header file to
properly define these headers.
See test/test_nvm.c and test/test_nvm_remote_dram.c for an example on how to
allocate memory on respectively local DRAM or virtual NVM on a DRAM+NVM 
emulation mode.


Statistics
----------
The emulator collects statistical data to help on emulation accuracy validation.
If enabled, by default the emulator will show the statistics report when the 
user application terminates to the standard output. Some applications suppress
output to stdout, you can still see the reports by defining a target file for 
the report in the configuration file. When using a file as output, the emulator
appends the result to the file and then previous reports are not overwritten.
The statistics source code can also be statically removed at compile time. See 
Building section.

These are the reported statistics:

    - initialization duration   Time in micro seconds took by the emulator to 
                                initialize.
    - running threads           The number of threads still running. If the report
                                was called automatically by the emulator, all user 
                                threads are already terminated.
    - terminated threads        Number of terminated threads, including the main
                                thread.
    For each application thread:
    - thread id                 Thread id.
    - cpu id                    CPU id where the user thread was bind to.
    - spawn timestamp           Thread spawn timestamp as reported by the
                                monotonic time.
    - termination timestamp     Thread termination timestamp as reported by the
                                monotonic time.
    - execution time
    - stall cycles              Total number of CPU stalls caused by memory 
                                accesses made by this thread.
    - NVM accesses              Number of effective NVM accesses performed by
                                the application.
    - latency calculation overhead cycles     Overhead cycles caused by the 
                                              emulator and that could not be
                                              amortized. Zero is expected.
                                              Otherwise, consider increasing
                                              the epoch duration.
    - injected delay cycles     Total number of cycles injected by the emulator
                                to emulate the target latency.
    - injected delay in usec    Same value as above, but shown in micro seconds.
    - longest epoch duration    The effective longest epoch duration ever 
                                performed for this thread.
    - shortest epoch duration   The effective shortest epoch duration ever 
                                performed for this thread.
    - average epoch duration    The average epoch duration for this thread.
    - number of epochs          Total number of epochs performed for this 
                                thread.
    - epochs which didn't reach min duration   Number of epochs requested by 
                                               either Thread Monitor or thread 
                                               synchronizations, but were not 
                                               open since the epoch durations
                                               didn't reach the minimum epoch
                                               duration.
    - static epochs requested   Number of epochs requested by the Thread Monitor.


Support to PAPI
---------------
Performance API (PAPI) library may be used with the emulator and there are some 
hooks to switch the current CPU counters reading method to PAPI. Up to the time 
of this writing, there was no way to make PAPI CPU counter reading to perform 
at the performance level required by the emulation. In the future, if it is 
desired to switch to PAPI, follow these steps:
 - Device pmc_ioctl_setcounter() and emulator lib set_counter() in dev/pmc.c 
   calls can be deleted.
 - Define PAPI_SUPPORT for src/lib/* source code.
 - Compile with lib/cpu/pmc-papi.c rather than lib/cpu/pmc.c.
 - Link code with PAPI and add PAPI include directory.
 - Some extra tweaks may be required, check TODOs in the code.


Multiple emulated processes and MPI programs
--------------------------------------------
The emulator needs to bind user threads to specific CPU cores in order to 
optimize emulation results. It is required to export the EMUL_LOCAL_PROCESSES 
environment variable with the number or emulated processes on the host. The 
emulator will manage each emulated processes to partition the available CPUs in 
a coordinated way. It is recommended to set EMUL_LOCAL_PROCESSES with up to half 
number of available CPU cores (note DRAM+NVM mode already reserves half of 
available CPU cores).

If EMUL_LOCAL_PROCESSES is not set or set with a value lower than 2, the 
emulator will not partition CPU cores per process.

If some process crashes the emulator might not have cleaned up the environment
and the process rank ids will not be correctly managed. On this case, close all
emulated processes and delete files /tmp/emul_lock_file and 
/tmp/emul_process_local_rank if they exist.


Bandwidth emulation
-------------------
Quartz supports an emulation mode with "throttled" memory bandwidth. 

The memory bandwidth emulation  makes use of the copy kernel from the Stream benchmark, 
openMP version. When the bandwidth emulation is enabled for a first time, Quartz
creates a memory bandwidth model by utilizing the available *Thermal Registers* in the 
Memory Controller and measuring the corresponding memory bandwidth. This initial step of 
building a model might take several minutes **(~10min)**.

For the memory bandwitdh emulation, *turn off the latency modeling*
in the configuration file and select all available NUMA nodes in the 
configuration file in order to prepare the model for any combination of NUMA
nodes selection.

Modeling data will be cached to these files:

    /tmp/bandwidth_model
    /tmp/mc_pci_bus
As first step, the emulator will detect the Memory Controller Thermal Registers
Control PCI addresses and cache it to /tmp/mc/pci_bus. After this step, the 
emulator will close the current execution to safely clear NUMA bindings. Rerun
the process to resume the work. 

Quartz will create the file: **/tmp/bandwidth_model**. 

It reflects the relationship between Thermal Registers and achievable memory 
bandwidth (in a single socket). The line format in this file is:

    read <thermal register value> <memory bandwidth MB/s>
This file should present ascending values of memory bandwidth ranging from
hundreds of MiB/s to tens of GiB/S. These values (or their approximations) 
can be used for the experiments with memory bandwidth throttling. Note, that 
the model is built once: it is cached and then used for all later experiments.
(You can also run a specially prepared  automated script *bandwidth-model-building.sh* 
in directory *benchmark-tests*. For details see [README-BENCHMARKS-TESTING.md]
(https://github.hpe.com/labs/quartz/blob/master/README-BENCHMARKS-TESTING.md).

For example, to enable memory bandwidth throttling at 2 GB/s, you should change
the emulator configuration file  "nvmemul.ini" using the following settings:

    bandwidth:
    {
    enable = true;
    model = "/tmp/bandwidth_model";
    read = 2000;
    write = 2000;
    };

Both read and write bandwidth values must be set to the same value since the 
emulator does not model read/write independently in the current version. 
See Limitations session.

The pmalloc() family is not intended to be used with the bandwidth modeling. Use
numactl for instance to bind CPU and memory of the used application to the 
intended NUMA node depending. The bandwidth emulator considers the virtual NVRAM 
node only (in the configuration with two sockets). So it is required the application 
to keep processes/threads and data on the same NUMA node for bandwidth experiments.

Automated Benchmark Runs
-------------------------
We have created several automated tests with benchmark runs and output analysis 
for testing the correctness of configured emulation environment and the accuracy 
of expected results. For details see [README-BENCHMARKS-TESTING.md]
(https://github.hpe.com/labs/quartz/blob/master/README-BENCHMARKS-TESTING.md).

Limitations
-----------
The emulator functionality may be affected by certain conditions in user 
applications:
 - application sets threads CPU and memory affinity.
 - application opens much more concurrent threads than available cores per 
   socket. Note that on DRAM+NVM emulation mode, half of the available CPU 
   cores is not used for user threads.
 - application sets handler for SIGUSR1.
Other:
 - Write memory latency is not yet implemented.
 - Write/Read memory bandwidth emulation cannot be set independently.
 - The signal handler may cause syscalls in the application to fail. It is
   recommended to implement retries at the application level as a good practice 
   for syscalls.
 - Child process from fork() calls are not tracked by the emulator. As a
   workaround, the emulator could make the library initialization function 
   available in the external API. Applications then should call this function
   in the beginning of the child process.
 - OpenMP applications may use synchronization primitives not based on
   pthreads which are currently not supported.
 - See Todo session for details.


Todo list
---------
Please see accompanied TODO.dox or extended documentation for an extensive 
list.

#License

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or (at
    your option) any later version. This program is distributed in the
    hope that it will be useful, but WITHOUT ANY WARRANTY; without even
    the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
    PURPOSE. See the GNU General Public License for more details. You
    should have received a copy of the GNU General Public License along
    with this program; if not, write to the Free Software Foundation,
    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.

    
#Copyright

	    (c) Copyright 2016 Hewlett Packard Enterprise Development LP

**NOTE**: This software depends on other packages that may be licensed under different open source licenses.



================================================
FILE: TODO.dox
================================================
/**
\file

\todo Improve performance counter API by making it more generic. For example, autogenerate pmc event_id using perf.
\todo Currently we may interrupt a thread to form a new epoch while it is blocked. This might cause accumulation of overhead cycles.
\todo Currently our bandwidth model cannot independently throttle read and write bandwidth as it relies on throttling DDR ACT transactions. We tried throttling DDR READ and DDR WRITE transactions but this didn't work.
\todo Extend library to interpose on other synchronization events we care: semaphores, barriers, context switches, openMP sync primitives, etc.
\todo Currently our library does not support context switching. Extent the device driver to properly handle context switching: keep track of per-thread cpu counters, introduce proper delay at context switch points.
\todo Support uncacheable and write-through memory.
\todo Signal SIGUSR1 should be dedicated to the emulator. If the application makes use of this signal, the emulator will not work. Figure out a way to fix this limitation.
\todo Interpose pthread_cancel() e pthread_exit() to make sure the thread is always deregistered internally to the emulator?
\todo CPU counters overflow is not currently handled.
\todo Multiple processes emulation must be reviewed: log file per process, statistics report by process, process id and thread id indications in the log messages.
\todo See Limitations section in the README file.
*/


================================================
FILE: bench/CMakeLists.txt
================================================
add_subdirectory(memlat)
add_subdirectory(new_memlat)
add_subdirectory(multilat)


================================================
FILE: bench/memlat/CMakeLists.txt
================================================
include_directories(${CMAKE_SOURCE_DIR}/src/lib)
add_executable(memlat memlat.c)
target_link_libraries(memlat nvmemul pthread)


================================================
FILE: bench/memlat/memlat.c
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include <pthread.h>

#define MAX_NUM_THREADS 512

uint64_t g_seed, g_nchains, g_nelems, g_from_node_id, g_to_node_id, g_element_size, g_access_size;

extern int measure_latency2(uint64_t seedin, int nchains, size_t nelems, int element_size, int access_size, int from_node_id, int to_node_id);

static uint64_t safe_strtoull(const char *s) {
    char *ep;
    uint64_t r;
    assert(NULL != s && '\0' != *s);
    r = strtoull(s, &ep, 10);
    assert('\0' == *ep);
    return r;
}


void* worker(void* arg) 
{
    int latency_ns;

    latency_ns = measure_latency2(g_seed, g_nchains, g_nelems, g_element_size, g_access_size, g_from_node_id, g_to_node_id);
    printf("latency_ns: %d\n", latency_ns);

    return NULL;
}
int main(int argc, char *argv[]) {
	int i;
    uint64_t nthreads;
    pthread_t thread[MAX_NUM_THREADS];

    if (9 != argc) {
        fprintf(stderr, "usage: %s PRNGseed Nthreads Nchains Nelems SZelem SZaccess from_node to_node\n", argv[0]);
        return 1;
    }
    g_seed  = safe_strtoull(argv[1]);
    nthreads = safe_strtoull(argv[2]);
    g_nchains = safe_strtoull(argv[3]);
    g_nelems = safe_strtoull(argv[4]);
    g_element_size = safe_strtoull(argv[5]);
    g_access_size = safe_strtoull(argv[6]);
    g_from_node_id = safe_strtoull(argv[7]);
    g_to_node_id = safe_strtoull(argv[8]);

	for (i = 0; i< nthreads; i++) {
		pthread_create(&thread[i], NULL, worker, NULL);
    }
	for(i = 0 ; i < nthreads; i++) {
		pthread_join(thread[i], NULL);
    }
    return 0;
}


================================================
FILE: bench/multilat/CMakeLists.txt
================================================
include_directories(${CMAKE_SOURCE_DIR}/src/lib)

add_executable(multilat multilat.c)
target_link_libraries(multilat nvmemul pthread)


================================================
FILE: bench/multilat/multilat.c
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#define _GNU_SOURCE
#include <pthread.h>
#include <sched.h>

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
//#include <pthread.h>

#include "thread.h"
#include <sys/time.h>
#include "pmalloc.h"
#include "debug.h"
//#include "stat.h"


#define NDEBUG

//#ifndef NDEBUG
#include <sys/syscall.h>
//#endif

// packs the arguments received from user
typedef struct {
	int mem_refs_dram;
	int mem_refs_nvm;
	int interleave_dram;
	int interleave_nvm;
	//int from_node;
	//int to_node;
} arg_s;


// for multi thread management
#define MAX_NUM_THREADS 50
pthread_t thread_desc[MAX_NUM_THREADS];
//pthread_mutex_t mutex;


// for CPU cache trashing and pointer chasing
#include <inttypes.h>
typedef struct {
	uint64_t val;
	char padding[0];
} element_t;

typedef struct {
    uint64_t   N;
    uint64_t   element_size;
    element_t* head;
} chain_t;
uint64_t trash_cache(uint64_t N);
chain_t* alloc_chain(uint64_t seedin, uint64_t N, uint64_t element_size, uint64_t node_i, uint64_t node_j);
element_t* element(chain_t* chain, uint64_t index);
void inline read_element(chain_t* chain, uint64_t index, char* buf, uint64_t buf_size);

// factor is 10 (could be more), to make sure we have a buffer much bigger than CPU cache
// the memory buffer is NOT shared among threads
// for now the cache size is hardcoded as 20 MB
#define NELEMS (10 * 20480000 / 64LLU)
#define PAGESZ 4096
#define MAX_NUM_CHAINS 16
//#undef USE_HUGETLB
#define SEED_IN 1
#define NCHAINS 1


/*extern inline hrtime_t hrtime_cycles(void);
static inline void delay_cycles(hrtime_t cycles)
{
    hrtime_t start, stop;

    start = hrtime_cycles();
    do {
        stop = hrtime_cycles();
    } while (stop - start < cycles);
}*/


// for fixing thread affinity to a single CPU after allocating memory chains and binding it to the local or remote nodes
static int max_number_of_cpus(void)
{
    int n, cpus = 2048;
    size_t setsize =  CPU_ALLOC_SIZE(cpus);
    cpu_set_t *set = CPU_ALLOC(cpus);
    if (!set)
        goto err;

	for (;;) {
		CPU_ZERO_S(setsize, set);
		/* the library version does not return size of cpumask_t */
		n = syscall(SYS_sched_getaffinity, 0, setsize, set);
		if (n < 0 && cpus < 1024 * 1024) {
		        CPU_FREE(set);
			cpus *= 2;
			set = CPU_ALLOC(cpus);
			if (!set)
				goto err;
			continue;
		}

	CPU_FREE(set);
	return n * 8;
	}
err:
	printf("cannot determine NR_CPUS");
	return 0;
}

static int bind_cpu(thread_t *thread) {
    size_t setsize;
    cpu_set_t *cur_cpuset;
    cpu_set_t *new_cpuset;

    int ncpus = max_number_of_cpus();

    if (thread == NULL) {
        // if thread is NULL it means the emulator is disabled, return without setting CPU affinity
        //printf("thread self is null");
        return 0;
    }

    if (ncpus == 0) {
    	return 1;
    }

    setsize = CPU_ALLOC_SIZE(ncpus);
    cur_cpuset = CPU_ALLOC(ncpus);
    new_cpuset = CPU_ALLOC(ncpus);
    CPU_ZERO_S(setsize, cur_cpuset);
    CPU_ZERO_S(setsize, new_cpuset);
    CPU_SET_S(thread->cpu_id, setsize, new_cpuset);

    if (pthread_getaffinity_np(thread->pthread, setsize, cur_cpuset) != 0) {
        DBG_LOG(ERROR, "Cannot get thread tid [%d] affinity, pthread: 0x%lx on processor %d\n",
        		thread->tid, thread->pthread, thread->cpu_id);
        return 1;
    }

    if (CPU_EQUAL(cur_cpuset, new_cpuset)) {
        //printf("No need to bind CPU\n");
    	return 0;
    }

    DBG_LOG(INFO, "Binding thread tid [%d] pthread: 0x%lx on processor %d\n", thread->tid, thread->pthread, thread->cpu_id);

    if (pthread_setaffinity_np(thread->pthread, setsize, new_cpuset) != 0) {
        DBG_LOG(ERROR, "Cannot bind thread tid [%d] pthread: 0x%lx on processor %d\n", thread->tid, thread->pthread, thread->cpu_id);
        return 1;
    }

    return 0;
}

uint64_t force_ldm_stalls(chain_t **C,
                          int element_size,
                          int access_size,
                          int mem_refs,               // number of pointers/elements to chase
                          uint64_t max_nelems,        // max number of available elements/pointers
                          int it_n,                   // seed to calculate the first pointer to chase, used to avoid repeating
                                                      // pointers during consecutive calls
	                      unsigned long *time_diff_ns) {
    uint64_t j, i;
    int nchains = SEED_IN;
    uint64_t sumv[MAX_NUM_CHAINS];
    uint64_t nextp[MAX_NUM_CHAINS];
    char *buf;
    uint64_t buf_size = 16384;
    int count = 0;
    uint64_t start;
    uint64_t it_limit;
    struct timespec time_start, time_end;

    assert(nchains < MAX_NUM_CHAINS);

    if (mem_refs <= 0) return 0;

    buf = (char*) malloc(buf_size);
    assert(buf != NULL);

    if (max_nelems > mem_refs) {
        it_limit = max_nelems / mem_refs;
    } else {
    	it_limit = 1;
    }
    it_n = it_n % it_limit;
    start = it_n * mem_refs;
    if ((start + mem_refs) > max_nelems) {
    	start = 0;
    }

    /* chase the pointers */
    if (nchains == 1) {
    	clock_gettime(CLOCK_MONOTONIC, &time_start);
        sumv[0] = 0;
        // chase pointers until the 'mem_refs' count, the pointer chasing will restart from beginning if 'mem_refs'
        // is greater than 'nelems'
        for (count = 0, i = start; count < mem_refs; i = element(C[0], i)->val, ++count) {
            __asm__("");
            sumv[0] += element(C[0], i)->val;
            if (access_size > element_size) {
                read_element(C[0], i, buf, buf_size);
            }
        }
        clock_gettime(CLOCK_MONOTONIC, &time_end);
    }
//    else {
//        for (j=0; j < nchains; j++) {
//            sumv[j] = 0;
//            nextp[j] = 0;
//        }
//        for (; 0 != element(C[0], nextp[0])->val; ) {
//            for (j=0; j < nchains; j++) {
//                sumv[j] += element(C[j], nextp[j])->val;
//                if (access_size > element_size) {
//                    read_element(C[j], nextp[j], buf, buf_size);
//                }
//                nextp[j] = element(C[j], nextp[j])->val;
//            }
//        }
//    }

    *time_diff_ns = ((time_end.tv_sec * 1000000000) + time_end.tv_nsec) -
                    ((time_start.tv_sec * 1000000000) + time_start.tv_nsec);

    free(buf);
    return sumv[0];
}

void thread_iter(int dram_refs, int nvm_refs, int interleave_dram, int interleave_nvm) {
	long it_n;
	unsigned long time_dram, time_nvm, total_time_dram_ns, total_time_nvm_ns;
	uint64_t seed;
	uint64_t j;
	chain_t *C_dram[MAX_NUM_CHAINS];
	chain_t *C_nvm[MAX_NUM_CHAINS];
	int missing_dram_refs, missing_nvm_refs;
	int dram_stalls, nvm_stalls;
	struct timespec task_time_start, task_time_end;
	unsigned long task_time_diff_ns;
#ifndef NDEBUG
	pid_t tid = (pid_t) syscall(SYS_gettid);
#endif

	assert(NELEMS < UINT64_MAX);

    for (j=0; j < NCHAINS; j++) {
        seed = SEED_IN + j*j;
        C_dram[j] = alloc_chain(seed, NELEMS, 64LLU, 0, 0);
        C_nvm[j] = alloc_chain(seed, NELEMS, 64LLU, 0, 1);
        __asm__("");
    }

    bind_cpu(thread_self());

    // cache must be trashed after bind_cpu() call
    trash_cache(NELEMS);

    total_time_dram_ns = 0;
    total_time_nvm_ns = 0;

    missing_dram_refs = dram_refs;
    missing_nvm_refs = nvm_refs;

#ifndef NDEBUG
    printf("DRAM accesses to be made: %ld\n", dram_refs);
    printf("NVM accesses to be made: %ld\n", nvm_refs);
#endif

    //delay_cycles(8000000000);
    //printf("STARTING MEASURES\n");

    clock_gettime(CLOCK_MONOTONIC, &task_time_start);

    for (it_n = 0; (missing_dram_refs > 0) || (missing_nvm_refs > 0); ++it_n) {
    	__asm__("");

    	// calculate the number o memory accesses to be made on each memory type
    	if (missing_dram_refs > interleave_dram) {
    		missing_dram_refs -= interleave_dram;
    		dram_stalls = interleave_dram;
    	} else {
    		dram_stalls = missing_dram_refs;
    		missing_dram_refs = 0;
    	}

    	if (missing_nvm_refs > interleave_nvm) {
			missing_nvm_refs -= interleave_nvm;
			nvm_stalls = interleave_nvm;
		} else {
			nvm_stalls = missing_nvm_refs;
			missing_nvm_refs = 0;
		}

    	time_dram = 0;
    	time_nvm = 0;

    	// do memory accesses interleaved by dividing the number of accesses in smaller amount
    	// as configured by user
        force_ldm_stalls((chain_t **)&C_dram, 64LLU, 8, dram_stalls, NELEMS, it_n, &time_dram);
        force_ldm_stalls((chain_t **)&C_nvm, 64LLU, 8, nvm_stalls, NELEMS, it_n, &time_nvm);

        total_time_dram_ns += time_dram;
        total_time_nvm_ns += time_nvm;
#ifndef NDEBUG
        printf("%ld DRAM accesses took: %ld ns\n", dram_stalls, time_dram);
        printf("%ld NVM accesses took: %ld ns\n", nvm_stalls, time_nvm);
#endif
    }

    clock_gettime(CLOCK_MONOTONIC, &task_time_end);
    task_time_diff_ns = ((task_time_end.tv_sec * 1000000000) + task_time_end.tv_nsec) -
                        ((task_time_start.tv_sec * 1000000000) + task_time_start.tv_nsec);

    // the memory latency is the total time divided by the number of accesses for each memory type
    if (dram_refs > 0)
        total_time_dram_ns /= dram_refs;
    else
        total_time_dram_ns = 0;
    if (nvm_refs > 0)
        total_time_nvm_ns /= nvm_refs;
    else
        total_time_nvm_ns = 0;

    printf("DRAM latency: %ld ns\n", total_time_dram_ns);
    printf("NVM latency: %ld ns\n", total_time_nvm_ns);
    printf("Measure time: %.3lf ms\n", (double)task_time_diff_ns/1000000.0);
    
    printf("Expected time: %.3ld ms\n", ((total_time_dram_ns * dram_refs) + (total_time_nvm_ns * nvm_refs)) / 1000000);

    for (j=0; j < NCHAINS; j++) {
        free(C_dram[j]);
        free(C_nvm[j]);
    }
}

void *thread_fn(void *arg) {
	int interleave_dram = ((arg_s *) arg)->interleave_dram;
	int interleave_nvm = ((arg_s *) arg)->interleave_nvm;
	int dram_refs = ((arg_s *) arg)->mem_refs_dram;
	int nvm_refs = ((arg_s *) arg)->mem_refs_nvm;

	thread_iter(dram_refs, nvm_refs, interleave_dram, interleave_nvm);

	return 0;
}

void run_threads(int n_threads, int dram_refs, int nvm_refs, int interleaved_dram, int interleaved_nvm)
{
	pthread_attr_t attr;
    int i;
    arg_s args;

    if ((n_threads > MAX_NUM_THREADS) || (n_threads <= 0)) {
    	printf("INVALID RANGE:\n");
    	printf("\tMax number of threads is %d\n", MAX_NUM_THREADS);
    	exit(-1);
    }

    if (dram_refs < 0 || nvm_refs < 0 || interleaved_dram < 0 || interleaved_nvm < 0) {
    	printf("INVALID RANGE:\n");
    	printf("\tdram refs: %d, nvm refs: %d, interleaved dram refs: %d, interleaved nvm refs: %d\n",
    			dram_refs, nvm_refs, interleaved_dram, interleaved_nvm);
    	exit(-1);
    }

    if ((dram_refs > 0 && interleaved_dram == 0) || (nvm_refs > 0 && interleaved_nvm == 0)) {
    	printf("INVALID ARGUMENTS:\n");
    	printf("\tnumber of accesses in sequence cannot be zero if the number of accesses for the same memory type is greater than zero.\n");
    	exit(-1);
    }

    if (dram_refs < interleaved_dram) {
    	printf("INVALID ARGUMENTS:\n");
    	printf("\tnumber of DRAM accesses cannot be lower than the number of DRAM accesses in sequence\n");
    	exit(-1);
    }
    if (nvm_refs < interleaved_nvm) {
    	printf("INVALID ARGUMENTS:\n");
    	printf("\tnumber of NVM accesses cannot be lower than the number of NVM accesses in sequence\n");
    	exit(-1);
    }

    if (pthread_attr_init(&attr) != 0) {
		printf("pthread_attr_init failed");
		exit(-1);
	}

    //srand(time(NULL));

    args.interleave_dram = interleaved_dram;
    args.interleave_nvm = interleaved_nvm;
    args.mem_refs_dram = dram_refs;
    args.mem_refs_nvm = nvm_refs;

    for (i = 0; i < n_threads; ++i) {
	    pthread_create(&thread_desc[i], &attr, thread_fn, (void *)&args);
	}

    pthread_attr_destroy(&attr);

    for (i = 0; i < n_threads; ++i) {
        pthread_join(thread_desc[i], NULL);
    }
}

int main(int argn, char **argv)
{
    int dram_refs;
    int nvm_refs;
    int interleaved_dram;
    int interleaved_nvm;
    int n_threads;

    if (argn != 6) {
        printf("INVALID ARGUMENTS:\n");
        printf("\t%s [# threads] [# total dram accesses] [# total nvm accesses] [# dram accesses in sequence] [# nvm accesses in sequence]\n", argv[0]);
        return -1;
    }

    n_threads = atoi(argv[1]);
    dram_refs = atoi(argv[2]);
    nvm_refs = atoi(argv[3]);
    interleaved_dram = atoi(argv[4]);
    interleaved_nvm = atoi(argv[5]);

    run_threads(n_threads, dram_refs, nvm_refs, interleaved_dram, interleaved_nvm);

    return 0;
}


================================================
FILE: bench/new_memlat/CMakeLists.txt
================================================
include_directories(${CMAKE_SOURCE_DIR}/src/lib)
add_executable(new_memlat memlat.c)
target_link_libraries(new_memlat nvmemul pthread)


================================================
FILE: bench/new_memlat/memlat.c
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <assert.h>
#include <pthread.h>
#include "model.h"
#include "thread.h"

#define MAX_NUM_THREADS 512

uint64_t g_seed, g_nchains, g_nelems, g_from_node_id, g_to_node_id, g_element_size, g_access_size;

extern int measure_latency2(uint64_t seedin, int nchains, size_t nelems, int element_size, int access_size, int from_node_id, int to_node_id);

static uint64_t safe_strtoull(const char *s) {
    char *ep;
    uint64_t r;
    assert(NULL != s && '\0' != *s);
    r = strtoull(s, &ep, 10);
    assert('\0' == *ep);
    return r;
}

extern latency_model_t latency_model;

#ifdef MEMLAT_SUPPORT
extern __thread int tls_hw_local_latency;
extern __thread int tls_hw_remote_latency;
extern __thread uint64_t tls_global_remote_dram;
extern __thread uint64_t tls_global_local_dram;

static inline uint64_t ns_to_cycles(int cpu_speed_mhz, int ns)
{
    return (cpu_speed_mhz * ns) / 1000;
}
#endif

void* worker(void* arg) 
{
    int latency_ns;
#ifdef MEMLAT_SUPPORT
    uint64_t exp_stalls;
    uint64_t calc_nvm_accesses;
    uint64_t detected_hw_lat;
    uint64_t actual_lat = 0;
    uint64_t total_time;
    uint64_t fixed_latency_ns = 0;
    uint64_t nvm_accesses = 0;
    uint64_t nvm_hw_latency;
#endif

    latency_ns = measure_latency2(g_seed, g_nchains, g_nelems, g_element_size, g_access_size, g_from_node_id, g_to_node_id);
    printf("latency_ns: %d ns\n", latency_ns);

#ifdef MEMLAT_SUPPORT
    total_time = g_nelems * latency_ns;
    if (thread_self()->virtual_node->dram_node != thread_self()->virtual_node->nvram_node) {
        detected_hw_lat = ns_to_cycles(thread_self()->cpu_speed_mhz, tls_hw_remote_latency);
        if (tls_global_remote_dram > 0) {
    	    actual_lat = thread_self()->stall_cycles / tls_global_remote_dram;
    	    fixed_latency_ns = total_time / tls_global_remote_dram;
    	    nvm_accesses = tls_global_remote_dram;
    	}
    	nvm_hw_latency = tls_hw_remote_latency;
    } else {
        detected_hw_lat = ns_to_cycles(thread_self()->cpu_speed_mhz, tls_hw_local_latency);
        if (tls_global_local_dram > 0) {
    	    actual_lat = thread_self()->stall_cycles / tls_global_local_dram;
    	    fixed_latency_ns = total_time / tls_global_local_dram;
    	    nvm_accesses = tls_global_local_dram;
    	}
    	nvm_hw_latency = tls_hw_local_latency;
    }
    exp_stalls = g_nelems * detected_hw_lat;
    calc_nvm_accesses = thread_self()->stall_cycles / detected_hw_lat;

    printf("target latency: %d ns\n", latency_model.read_latency);
    printf("Error: %3.1f%%\n", (double)(abs(latency_model.read_latency - latency_ns)*100) / (double)latency_model.read_latency);
    printf("target NVM accesses: %ld\n", g_nelems);
    printf("detected HW latency: %ld ns\n", nvm_hw_latency);
    printf("detected HW latency: %ld cycles (detected_hw_lat making use of cpu_speed_mhz)\n", detected_hw_lat);
    printf("expected CPU stalls: %ld cycles (target_nvm_accesses * detected_hw_lat)\n", exp_stalls);
    printf("actual CPU stalls: %ld cycles\n", thread_self()->stall_cycles);
    printf("calculated NVM accesses: %ld (actual_cpu_stalls / detected_hw_lat)\n", calc_nvm_accesses);
    if (nvm_accesses != 0) {
        printf("actual NVM accesses: %ld\n", nvm_accesses);
        printf("actual latency: %ld cyles (actual_stalls / actual_nvm_accesses)\n", actual_lat);
        printf("fixed measured latency: %ld ns (total_chasing_time / actual_nvm_accesses)\n", fixed_latency_ns);
        printf("fixed latency error: %3.1f%%\n", (double)(abs(latency_model.read_latency - fixed_latency_ns)*100) / (double)latency_model.read_latency);
    } else {
        fixed_latency_ns = total_time / calc_nvm_accesses;
        printf("fixed measured latency: %ld ns (total_chasing_time / calculated_nvm_accesses)\n", fixed_latency_ns);
        printf("fixed latency error: %3.1f%%\n", (double)(abs(latency_model.read_latency - fixed_latency_ns)*100) / (double)latency_model.read_latency);
    }
#endif
    return NULL;
}
int main(int argc, char *argv[]) {
	int i;
    uint64_t nthreads;
    pthread_t thread[MAX_NUM_THREADS];

    if (9 != argc) {
        fprintf(stderr, "usage: %s PRNGseed Nthreads Nchains Nelems SZelem SZaccess from_node to_node\n", argv[0]);
        return 1;
    }
    g_seed  = safe_strtoull(argv[1]);
    nthreads = safe_strtoull(argv[2]);
    g_nchains = safe_strtoull(argv[3]);
    g_nelems = safe_strtoull(argv[4]);
    g_element_size = safe_strtoull(argv[5]);
    g_access_size = safe_strtoull(argv[6]);
    g_from_node_id = safe_strtoull(argv[7]);
    g_to_node_id = safe_strtoull(argv[8]);

	for (i = 0; i< nthreads; i++) {
		pthread_create(&thread[i], NULL, worker, NULL);
    }
	for(i = 0 ; i < nthreads; i++) {
		pthread_join(thread[i], NULL);
    }
    return 0;
}


================================================
FILE: bench/new_memlat/memlat.sh
================================================
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################
#!/bin/bash

# percentage of error as threshold to discard outliers, anything above this percentage will be discarded
MAX_ERROR_PERCENTAGE=10
# max number of tries to execute memlat
MAX_TRIES=10


TEMP_FILE=/tmp/tmp_memlat.out


NVM_EMUL_PATH="`dirname $0`/../.."
NELEMS=$1
TARGET_DRAM=$2


function usage()
{
    echo "$0 [number of elements] [0=local dram|1=remote dram]"
    exit 1
}

function validate_decimal()
{
    re='^[0-9]+$'
    if ! [[ $1 =~ $re ]] ; then
        return 1
    fi
    return 0
}

function check_parameters()
{
    if [ $# -ne 2 ]; then
        echo "Incorrect arguments"
        usage
    fi

    validate_decimal ${NELEMS}

    if [ $? -ne 0 ]; then
        echo "Invalid number of arguments"
        usage
    fi

    if [ ${TARGET_DRAM} -ne 0 -a ${TARGET_DRAM} -ne 1 ]; then
        echo "Incorret dram target"
        usage
    fi
}

function verify_run
{
    target=$(cat ${TEMP_FILE} | grep "target latency" | awk '{ print $3 }')
    measured=$(cat ${TEMP_FILE} | grep "measured latency" | awk '{ print $4 }')

    if [ ${measured} -gt ${target} ]; then
        delta=$(expr ${measured} - ${target});
    else
        delta=$(expr ${target} - ${measured});
    fi

    if [ ${target} -gt 0 ]; then
        error=$(expr ${delta} \* 100)
        error=$(expr ${error} \/ ${target})
    else
        error=0
    fi


    if [ ${error} -gt ${MAX_ERROR_PERCENTAGE} ]; then
        return 1
    fi

    return 0
}

############ MAIN ######################

check_parameters $*

# execute memlat in loop until the result is within the threshold or the max tries is reached
for (( c=0; c<${MAX_TRIES}; c++ )); do
    ${NVM_EMUL_PATH}/scripts/runenv.sh ${NVM_EMUL_PATH}/build/bench/new_memlat/new_memlat 1 1 1 ${NELEMS} 64 8 0 ${TARGET_DRAM} &> ${TEMP_FILE}

    verify_run

    ret=$?

    if [ ${ret} -eq 0 ]; then
        cat ${TEMP_FILE} | grep "measured latency"
        break
    fi
done

if [ ${ret} -ne 0 ]; then
    echo "Could not produce a valid run"
fi

rm -f ${TEMP_FILE}

exit ${ret}


================================================
FILE: benchmark-tests/bandwidth-model-building.sh
================================================
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################
#!/bin/bash

echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

cp  nvmemul-bandwidth.ini  nvmemul.ini
rm /tmp/bandwidth_model
../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0
../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0


================================================
FILE: benchmark-tests/memlat-bench-test-10M-single-socket.sh
================================================
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################
#!/bin/bash

#awk '($1~/physical_nodes/) {print;}'  nvmemul.ini

echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

dir_name_res=FULL-RESULTS-test
dir_name_sum=SUMMARY-RESULTS-test

rm -rf $dir_name_sum
mkdir  $dir_name_sum

rm -f foo*
rm -rf $dir_name_res
mkdir $dir_name_res

cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >> $dir_name_res/foo-runs-test

cp nvmemul-orig.ini nvmemul.ini
../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 >foo


    for numchains in 1 
    do
	for epoch in 10000 
	do 
	    echo "#FORMAT #1_emul_lat(ns) #2_min_meas_lat(ns)  #3_aver_meas_lat(ns)  #4_max_meas_lat(ns)  #5_aver_error(%) #6_max_error(%)" >  $dir_name_sum/summary-nvm-lat-accuracy-epoch-$epoch-numchains-$numchains.txt

	    for lat in 200 300 400 500 600 700 800 900 1000
	    do
		awk 'BEGIN {read_lat = substr(ARGV[2],3); epoch_lat = substr(ARGV[3],3);}
(!(NR==7 || NR==9 || NR==10 || $1~/physical_nodes/)){ print;}
(NR==7){ print $1,$2, read_lat,";";}
(NR==9){ print $1,$2, epoch_lat,";";}
(NR==10){ print $1,$2, epoch_lat,";";}
($1~/physical_nodes/) {print $1,$2,"\"0\""";";}
' nvmemul-orig.ini v=$lat v=$epoch > foo-nvmemul-$lat-$epoch.ini
		mv foo-nvmemul-$lat-$epoch.ini  nvmemul.ini
		echo "lat epoch chains" $lat $epoch $numchains >>   $dir_name_res/foo-runs
		
		for time in 1 2 3 4 5 6 7 8 9 10
		do
		    ../build/bench/memlat/memlat 1 1 $numchains 10000000 64 8 0 0 >> $dir_name_res/full_results-$lat-$epoch-$numchains.txt
 		done
                grep latency_ns $dir_name_res/full_results-$lat-$epoch-$numchains.txt > $dir_name_res/results-$lat-$epoch-$numchains.txt
		awk 'BEGIN {max = 0; min = 1000000; sum = 0; aver=0.0; max_error=0.0; aver_error=0.0;read_lat = substr(ARGV[2],3);epoch_lat = substr(ARGV[3],3); MPL = substr(ARGV[4],3); }
($2 > max){max = $2;}
($2 < min){min = $2;}
{sum=sum+$2; if ($2 < read_lat*1.0) {error=read_lat -$2} else {error=$2 - read_lat}; if (error > max_error) max_error=error;}
END {aver=sum/NR; if (aver < read_lat*1.0) {aver_error = (read_lat - aver)*100.0/read_lat} else {aver_error = (aver - read_lat )*100.0/read_lat}; print read_lat, min,aver,max, aver_error,max_error*100.0/read_lat;} '   $dir_name_res/results-$lat-$epoch-$numchains.txt v=$lat v=$epoch v=$numchains >> $dir_name_sum/summary-nvm-lat-accuracy-epoch-$epoch-numchains-$numchains.txt
		
	    done
	done
    done


#FORMAT_summary-results: #1_nvm_lat(ns) #2_min_nvm_lat(ns)  #3_aver_nvm_lat(ns)  #4_max_nvm_lat(ns)  #5_aver_error(%) #6_max_error(%)

#parameter is nvm_lat





================================================
FILE: benchmark-tests/memlat-bench-test-10M.sh
================================================
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################
#!/bin/bash

#awk '($1~/physical_nodes/) {print;}'  nvmemul.ini

num_sockets=$(cat /proc/cpuinfo | grep "physical id" | sort -u | wc -l)
if [ $num_sockets -eq 1 ]; 
then
echo "Single Socket"
./memlat-bench-test-10M-single-socket.sh
exit 0
fi

echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

dir_name_res=FULL-RESULTS-test
dir_name_sum=SUMMARY-RESULTS-test

rm -rf $dir_name_sum
mkdir  $dir_name_sum

rm -f foo*
rm -rf $dir_name_res
mkdir $dir_name_res

cat /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor >> $dir_name_res/foo-runs-test

cp nvmemul-orig.ini nvmemul.ini
../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1 >foo

for conf in local remote
do
    if [ $conf = local ]; then confpar=0 
    else confpar=1
    fi
    for numchains in 1 
    do
	for epoch in 10000 
	do 
	    echo "#FORMAT #1_emul_lat(ns) #2_min_meas_lat(ns)  #3_aver_meas_lat(ns)  #4_max_meas_lat(ns)  #5_aver_error(%) #6_max_error(%)" >  $dir_name_sum/summary-nvm-lat-accuracy-$conf-epoch-$epoch-numchains-$numchains.txt

	    for lat in 200 300 400 500 600 700 800 900 1000
	    do
		awk 'BEGIN {read_lat = substr(ARGV[2],3); epoch_lat = substr(ARGV[3],3); config = substr(ARGV[4],3);}
(!(NR==7 || NR==9 || NR==10 || $1~/physical_nodes/)){ print;}
(NR==7){ print $1,$2, read_lat,";";}
(NR==9){ print $1,$2, epoch_lat,";";}
(NR==10){ print $1,$2, epoch_lat,";";}
($1~/physical_nodes/ && config ~ /local/) {print $1,$2,"\"0\""";";}
($1~/physical_nodes/ && config ~ /remote/) {print $1,$2,"\"0,1\""";";}
' nvmemul-orig.ini v=$lat v=$epoch v=$conf > foo-nvmemul-$lat-$epoch.ini
		mv foo-nvmemul-$lat-$epoch.ini  nvmemul.ini
		echo "lat epoch chains" $lat $epoch $numchains >>   $dir_name_res/foo-runs
		
		for time in 1 2 3 4 5 6 7 8 9 10
		do
		    ../build/bench/memlat/memlat 1 1 $numchains 10000000 64 8 0 $confpar >> $dir_name_res/full_results-$conf-$lat-$epoch-$numchains.txt
 		done
                grep latency_ns $dir_name_res/full_results-$conf-$lat-$epoch-$numchains.txt > $dir_name_res/results-$conf-$lat-$epoch-$numchains.txt
		awk 'BEGIN {max = 0; min = 1000000; sum = 0; aver=0.0; max_error=0.0; aver_error=0.0;read_lat = substr(ARGV[2],3);epoch_lat = substr(ARGV[3],3); MPL = substr(ARGV[4],3); }
($2 > max){max = $2;}
($2 < min){min = $2;}
{sum=sum+$2; if ($2 < read_lat*1.0) {error=read_lat -$2} else {error=$2 - read_lat}; if (error > max_error) max_error=error;}
END {aver=sum/NR; if (aver < read_lat*1.0) {aver_error = (read_lat - aver)*100.0/read_lat} else {aver_error = (aver - read_lat )*100.0/read_lat}; print read_lat, min,aver,max, aver_error,max_error*100.0/read_lat;} '   $dir_name_res/results-$conf-$lat-$epoch-$numchains.txt v=$lat v=$epoch v=$numchains >> $dir_name_sum/summary-nvm-lat-accuracy-$conf-epoch-$epoch-numchains-$numchains.txt
		
	    done
	done
    done
done


#FORMAT_summary-results: #1_nvm_lat(ns) #2_min_nvm_lat(ns)  #3_aver_nvm_lat(ns)  #4_max_nvm_lat(ns)  #5_aver_error(%) #6_max_error(%)

#parameter is nvm_lat





================================================
FILE: benchmark-tests/memlat-orig-lat-test-single-socket.sh
================================================
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################
#!/bin/bash

#awk '($1~/physical_nodes/) {print;}'  nvmemul.ini

echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

dir_name_res=ORIG-lat-test

rm -f foo*
rm -rf $dir_name_res
mkdir $dir_name_res


cp  nvmemul-debug.ini  nvmemul.ini
../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0

for time in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
do
    ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 0 > $dir_name_res/foo-hw-latency.txt
    grep "measuring latency: latency is" $dir_name_res/foo-hw-latency.txt > $dir_name_res/foo
    awk 'NR==1 {local=$7;}
         END {print local}'  $dir_name_res/foo >>  $dir_name_res/list-hw-latency.txt
done

echo "#FORMAT:#1_min #2_aver #3_max" > $dir_name_res/final-hw-latency.txt  

awk 'BEGIN {max1 = 0.0; min1 = 10000000.0; sum1 = 0.0;}
         ($1 > max1){max1 = $1;}
         ($1 < min1){min1 = $1;}
         {sum1=sum1+$1;sum2=sum2+$2;}
         END {print min1, sum1/NR, max1;}'  $dir_name_res/list-hw-latency.txt  >> $dir_name_res/final-hw-latency.txt  

rm  $dir_name_res/foo*





















================================================
FILE: benchmark-tests/memlat-orig-lat-test.sh
================================================
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################
#!/bin/bash

#awk '($1~/physical_nodes/) {print;}'  nvmemul.ini

num_sockets=$(cat /proc/cpuinfo | grep "physical id" | sort -u | wc -l)
if [ $num_sockets -eq 1 ]; 
then
echo "Single Socket"
./memlat-orig-lat-test-single-socket.sh
exit 0
fi

echo performance | sudo tee /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor

dir_name_res=ORIG-lat-test

rm -f foo*
rm -rf $dir_name_res
mkdir $dir_name_res


cp  nvmemul-debug.ini  nvmemul.ini
../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1

#FORMAT: ns
#FORMAT: min_local #2_aver_local max_local min_remote #5_aver_remote max_remote 
#FORMAT: 

for time in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
do
    ../build/bench/memlat/memlat 1 1 1 1000000 64 8 0 1 > $dir_name_res/foo-hw-latency.txt
    grep "measuring latency: latency is" $dir_name_res/foo-hw-latency.txt > $dir_name_res/foo
    awk 'NR==1 {local=$7;}
         NR==2 {remote=$7;}
         END {print local , remote}'  $dir_name_res/foo >>  $dir_name_res/list-hw-latency.txt
done

echo "#FORMAT:#1_min_local #2_aver_local #3_max_local #4_min_remote #5_aver_remote #6_max_remote" > $dir_name_res/final-hw-latency.txt  

awk 'BEGIN {max1 = 0.0; min1 = 10000000.0; max2 = 0.0; min2 = 10000000.0; sum1 = 0.0; sum2 = 0.0;}
         ($1 > max1){max1 = $1;}
         ($1 < min1){min1 = $1;}
         ($2 > max2){max2 = $2;}
         ($2 < min2){min2 = $2;}
         {sum1=sum1+$1;sum2=sum2+$2;}
         END {print min1, sum1/NR, max1,  min2, sum2/NR, max2 ;}'  $dir_name_res/list-hw-latency.txt  >> $dir_name_res/final-hw-latency.txt  

rm  $dir_name_res/foo*

#FORMAT:   ns
#FORMAT:#1_min_local #2_aver_local #3_max_local #4_min_remote #5_aver_remote #6_max_remote 





















================================================
FILE: benchmark-tests/nvmemul-bandwidth.ini
================================================
# Configuration file 

latency:
{
    enable = true;
    inject_delay = true;
    read = 1000;
    write = 1000;
    max_epoch_duration_us = 10000;
    min_epoch_duration_us = 10000;
    calibration = false;
};

bandwidth:
{
    enable = true;
    model = "/tmp/bandwidth_model";
    read = 2000;
    write = 2000;
};

topology:
{
    mc_pci = "/tmp/mc_pci_bus";
    physical_nodes = "0";
    hyperthreading = true; # do not use multiple hardware threads per core
};

statistics:
{
    enable = true;
    #file = "/tmp/statistics";
};

debug:
{
    # debugging level
    level = 5;
    verbose = 0;

    # modules set to True produce debugging output
    module:
    {
        all = False;
    };
};


================================================
FILE: benchmark-tests/nvmemul-debug.ini
================================================
# Configuration file 

latency:
{
    enable = true;
    inject_delay = true;
read = 1000 ;
    write = 1000;
max_epoch_duration_us = 10000 ;
min_epoch_duration_us = 10000 ;
    calibration = false;
};

bandwidth:
{
    enable = false;
    model = "/tmp/bandwidth_model";
    read = 2000;
    write = 2000;
};

topology:
{
    mc_pci = "/tmp/mc_pci_bus";
physical_nodes = "0,1";
    hyperthreading = true; # do not use multiple hardware threads per core
};

statistics:
{
    enable = true;
    #file = "/tmp/statistics";
};

debug:
{
    # debugging level
    level = 5;
    verbose = 0;

    # modules set to True produce debugging output
    module:
    {
        all = False;
    };
};


================================================
FILE: benchmark-tests/nvmemul-orig.ini
================================================
# Configuration file 

latency:
{
    enable = true;
    inject_delay = true;
read = 1000 ;
    write = 1000;
max_epoch_duration_us = 10000 ;
min_epoch_duration_us = 10000 ;
    calibration = false;
};

bandwidth:
{
    enable = false;
    model = "/tmp/bandwidth_model";
    read = 2000;
    write = 2000;
};

topology:
{
    mc_pci = "/tmp/mc_pci_bus";
physical_nodes = "0,1";
    hyperthreading = true; # do not use multiple hardware threads per core
};

statistics:
{
    enable = true;
    #file = "/tmp/statistics";
};

debug:
{
    # debugging level
    level = 3;
    verbose = 0;

    # modules set to True produce debugging output
    module:
    {
        all = False;
    };
};


================================================
FILE: benchmark-tests/nvmemul.ini
================================================
# Configuration file 

latency:
{
    enable = true;
    inject_delay = true;
read = 300 ;
    write = 200;
max_epoch_duration_us = 10000 ;
min_epoch_duration_us = 10000 ;
    calibration = false;
};

bandwidth:
{
    enable = false;
    model = "/tmp/bandwidth_model";
    read = 2000;
    write = 2000;
};

topology:
{
    mc_pci = "/tmp/mc_pci_bus";
physical_nodes = "0,1";
    hyperthreading = true; # do not use multiple hardware threads per core
};

statistics:
{
    enable = true;
    #file = "/tmp/statistics";
};

debug:
{
    # debugging level
    level = 5;
    verbose = 0;

    # modules set to True produce debugging output
    module:
    {
        all = False;
    };
};


================================================
FILE: license.txt
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/




================================================
FILE: nvmemul-orig.ini
================================================
# Configuration file 

latency:
{
    enable = true;
    inject_delay = true;
read = 1000 ;
    write = 1000;
max_epoch_duration_us = 10000 ;
min_epoch_duration_us = 10000 ;
    calibration = false;
};

bandwidth:
{
    enable = false;
    model = "/tmp/bandwidth_model";
    read = 2000;
    write = 2000;
};

topology:
{
    mc_pci = "/tmp/mc_pci_bus";
physical_nodes = "0,1";
    hyperthreading = true; # do not use multiple hardware threads per core
};

statistics:
{
    enable = true;
    #file = "/tmp/statistics";
};

debug:
{
    # debugging level
    level = 3;
    verbose = 0;

    # modules set to True produce debugging output
    module:
    {
        all = False;
    };
};


================================================
FILE: nvmemul.dox
================================================
/**

@mainpage Quartz:  A Lightweight  Performance Emulator for  Persistent Memory Software.


\section section-intro Introduction

Quartz: A DRAM-based performance emulation platform that leverages features 
available in commodity hardware to emulate different latency and bandwidth 
characteristics of future byte-addressable NVM technologies.

*/

    









================================================
FILE: nvmemul.ini
================================================
# Configuration file 

latency:
{
    enable = true;
    inject_delay = true;
read = 1000 ;
    write = 1000;
max_epoch_duration_us = 10000 ;
min_epoch_duration_us = 10000 ;
    calibration = false;
};

bandwidth:
{
    enable = false;
    model = "/tmp/bandwidth_model";
    read = 500;
    write = 500;
};

topology:
{
    mc_pci = "/tmp/mc_pci_bus";
physical_nodes = "0,1";
    hyperthreading = true; # do not use multiple hardware threads per core
};

statistics:
{
    enable = true;
    #file = "/tmp/statistics";
};

debug:
{
    # debugging level
    level = 1;
    verbose = 0;

    # modules set to True produce debugging output
    module:
    {
        all = False;
    };
};


================================================
FILE: scripts/install.sh
================================================
#!/bin/bash
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################

PAPI_MAJOR=5
PAPI_MINOR=1
PAPI_RELEASE=1

CMAKE_MAJOR=2
CMAKE_MINOR=8

function install_deps_rpm() {
    yum install -q -y numactl-devel libconfig libconfig-devel cmake kernel-devel-`uname -r` msr-tools uthash-devel

    if [ $? -ne 0 ]; then
        echo "Dependencies installation failed"
        exit -1
    fi
}

function install_deps_deb() {
    apt-get install -y libnuma-dev libconfig-dev cmake  msr-tools uthash-dev

    if [ $? -ne 0 ]; then
        echo "Dependencies installation failed"
        exit -1
    fi
}

function check_supported_papi() {
    major=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f1`
    minor=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f2`
    release=`papi_version | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f3`

    if [ ${major} -ne ${PAPI_MAJOR} ]; then
        echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})"
        exit -1
    fi
    if [ ${minor} -ne ${PAPI_MINOR} ]; then
        echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})"
        exit -1
    fi
    if [ ${release} -ne ${PAPI_RELEASE} ]; then
        echo "CMake version (${major}.${minor}.${release}) not supported (=${PAPI_MAJOR}.${PAPI_MINOR}.${PAPI_RELEASE})"
        exit -1
    fi
}

function check_supported_cmake() {
    major=`cmake -version | head -1 | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f1`
    minor=`cmake -version | head -1 | cut -d ' ' -f3 | cut -d '-' -f1 | cut -d '.' -f2`
    
    if [ ${major} -lt ${CMAKE_MAJOR} ]; then
        echo "CMake version (${major}.${minor}) not supported (>=${CMAKE_MAJOR}.${CMAKE_MINOR})"
        exit -1
    fi
    if [ ${major} -eq ${CMAKE_MAJOR} ]; then
        if [ ${minor} -lt ${CMAKE_MINOR} ]; then
            echo "CMake version (${major}.${minor}) not supported (>=${CMAKE_MAJOR}.${CMAKE_MINOR})"
            exit -1
        fi
    fi
}

function check_supported_versions() {
    check_supported_cmake
#    check_supported_papi
}


#################### MAIN ####################

if [ $(id -u) -ne 0 ]; then
   echo "You mut be root to execute this script"
   exit -1
fi

if [ -f /etc/redhat-release ]; then
    install_deps_rpm
elif [ -f /etc/centos-release ]; then
    install_deps_rpm
elif [ -f /etc/debian_version -o -f /etc/debian-release ]; then
    install_deps_deb
else
    echo "Linux distribution not supported"
    exit -1
fi

check_supported_versions



================================================
FILE: scripts/runenv.sh
================================================
#!/bin/bash
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################

NVM_EMUL_PATH="`dirname $0`/.."


if [ -z "$1" ]; then
    echo "runenv.sh [cmd to run]"
    exit 1
fi

rootdir="$NVM_EMUL_PATH"
bindir=$rootdir"/build"

if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then
    current_scaling=$(cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor);

    if [ "${current_scaling}" != "performance" ]; then
        file_list=$(ls /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor)
        for cpu_file in ${file_list}; do
            echo "performance" | sudo tee ${cpu_file} > /dev/null
        done
    fi
fi

$rootdir/scripts/turboboost.sh disable

v=$(uname -r | cut -d '.' -f1)
if [ $v -ge 4 ]; then
    echo "2" | sudo tee /sys/bus/event_source/devices/cpu/rdpmc
fi

export LD_PRELOAD=$bindir"/src/lib/libnvmemul.so"
export NVMEMUL_INI=$rootdir"/nvmemul.ini"

if [ ! -f ${LD_PRELOAD} ]; then
    echo "Library not found. Compile the emulator's library first."
    exit -1
fi

echo $LD_PRELOAD
echo $NVMEMUL_INI

# execute the command passed as argument
$@



================================================
FILE: scripts/setupdev.sh
================================================
#!/bin/bash
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################

NVM_EMUL_PATH="`dirname $0`/.."

device_name="nvmemul"
device_module_name=${device_name}".ko"
device_path="/dev/${device_name}"
device_module_path=`find ${NVM_EMUL_PATH}/build -name ${device_module_name}`


function loaddev {
    if [ -z "${device_module_path}" ]; then
        echo "Module not found. Compile the emulator's source code first."
        exit -1
    fi

    /sbin/insmod ${device_module_path} 2> /dev/null

    if [ $? -ne 0 ]; then
        lsmod | grep ${device_name} > /dev/null
        if [ $? -eq 0 ]; then
            echo "Kernel module already loaded, please reload it."
            exit 1
        fi
        echo "Kernel module loading failed"
        exit 1
    fi

    device_major=`grep ${device_name} /proc/devices | awk '{ print $1 }'`
    if [ $? -ne 0 -o -z "${device_major}" ]; then
        echo "Failed to detect module major"
        exit 1
    fi

    rm -f ${device_path}
    if [ $? -ne 0 ]; then
        echo "Failed to delete kernel module device file"
        exit 1
    fi

    mknod ${device_path} c ${device_major} 0
    chmod a+wr ${device_path}

    lsmod | grep ${device_name} > /dev/null

    if [ $? -eq 0 ]; then
        echo "Kernel module loaded successfully"
    else
        echo "kernel module loading failed"
        exit 1
    fi
}

function unloaddev {
    /sbin/rmmod ${device_name} 2> /dev/null
    rm -f ${device_path}
    if [ $? -eq 0 ]; then
        echo "Kernel module unloaded successfully"
    else
        echo "Failed to delete kernel module device file"
        exit 1
    fi
}

function help() {
    echo "$0 <load|unload|reload>"
}

### MAIN ###

if [ $(id -u) -ne 0 ]; then
   echo "You mut be root to execute this script"
   exit -1
fi

if [ $# -eq 0 ]; then
    help
    exit 1
fi

if [ "$1" = "load" ] || [ "$1" = "l" ]; then
    loaddev
elif [ "$1" = "unload" ] || [ "$1" = "u" ]; then
    unloaddev
elif [ "$1" = "reload" ] || [ "$1" = "r" ]; then
    unloaddev
    loaddev
else
    help
    exit 1
fi

exit 0


================================================
FILE: scripts/turboboost.sh
================================================
#!/bin/bash
#################################################################
#Copyright 2016 Hewlett Packard Enterprise Development LP.  
#This program is free software; you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation; either version 2 of the License, or (at
#your option) any later version. This program is distributed in the
#hope that it will be useful, but WITHOUT ANY WARRANTY; without even
#the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
#PURPOSE. See the GNU General Public License for more details. You
#should have received a copy of the GNU General Public License along
#with this program; if not, write to the Free Software Foundation,
#Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#################################################################

function usage()
{
    echo "$0 <function> [target CPU id]"
    echo -e "\tfunctions:"
    echo -e "\t\t check: verifies if a given CPU id has Turbo Boost enabled"
    echo -e "\t\t disable: disables a given CPU id or all CPUs if not specified"
    echo -e "\t\t enabled: enables a given CPU id or all CPUs if not specified"
}

function verify_cpu_id()
{
    re='^[0-9]+$'
    if ! [[ $1 =~ $re ]]; then
        echo "CPU id is not a number"
        exit 1
    fi
}

function check_msr_module()
{
    lsmod | grep msr > /dev/null
    if [ $? -ne 0 ]; then
         # some systems need this, others don't
        sudo modprobe msr &> /dev/null
        #if [ $? -ne 0 ]; then
        #    echo "Failed to load MSR module"
        #    exit 1
        #fi
    fi
}

function check()
{
    cpu=$1

    if [ -z "${cpu}" ]; then
        usage
        exit 1
    fi

    cpus=$(lscpu | sed -n 4p | awk '{ print $2 }')

    if [ ${cpu} -ge ${cpus} ]; then
        echo "CPU id out of range"
        exit 1
    fi

    disabled=$(sudo rdmsr -p${cpu} 0x1a0 -f 38:38)

    if [ "${disabled}" == "1" ]; then
        echo "Turbo Boost for processor ${cpu} is disabled"
    else
        echo "Turbo Boost for processor ${cpu} is enabled"
    fi
}

function enable()
{
    cpu=$1

    cpus=$(lscpu | sed -n 4p | awk '{ print $2 }')

    if [ -z "${cpu}" ]; then
        for (( i=0; i<${cpus}; i++ )); do 
            sudo wrmsr -p$i 0x1a0 0x850089
        done
        echo "Turbo Boost enabled for all CPUs"
    else
        if [ ${cpu} -ge ${cpus} ]; then
            echo "CPU id out of range"
            exit 1
        fi
        sudo wrmsr -p${cpu} 0x1a0 0x850089
        echo "Turbo Boost enabled for CPU ${cpu}"
    fi
}

function disable()
{
    cpu=$1

    cpus=$(lscpu | sed -n 4p | awk '{ print $2 }')

    if [ -z "${cpu}" ]; then
        for (( i=0; i<${cpus}; i++ )); do 
            sudo wrmsr -p$i 0x1a0 0x4000850089;
        done
        echo "Turbo Boost disabled for all CPUs"
    else
        if [ ${cpu} -ge ${cpus} ]; then
            echo "CPU id out of range"
            exit 1
        fi
        sudo wrmsr -p${cpu} 0x1a0 0x4000850089;
        echo "Turbo Boost disabled for CPU ${cpu}"
    fi
}



### MAIN ###

if [ $# -eq 0 ]; then
    usage
    exit 1
fi

funct=$1
target_cpu=$2

check_msr_module

if [ ! -z "${target_cpu}" ]; then
    verify_cpu_id ${target_cpu}
fi

case ${funct} in
    "enable")
        enable ${target_cpu}
        ;;
    "disable")
        disable ${target_cpu}
        ;;
    "check")
        check ${target_cpu}
        ;;
    *)
        usage
        exit 1
esac

exit 0



================================================
FILE: src/CMakeLists.txt
================================================
add_subdirectory(lib)
add_subdirectory(dev)


================================================
FILE: src/dev/CMakeLists.txt
================================================
# Build NVM Emulation device driver (using Kbuild Makefile)

set(DEV_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
set(DEV_BIN_DIR "${CMAKE_CURRENT_BINARY_DIR}")
set(DEV_KERNEL_MODULE "${DEV_BIN_DIR}/nvmemul.ko")
mark_as_advanced(DEV_DIR DEV_BIN_DIR)

# We invoke make in build folder to keep the glog's source folder clean.
file(MAKE_DIRECTORY ${DEV_BIN_DIR})
add_custom_command(OUTPUT ${DEV_KERNEL_MODULE}
    COMMAND ${CMAKE_COMMAND} -E copy_directory ${DEV_DIR} ${DEV_BIN_DIR}
    COMMAND ${CMAKE_MAKE_PROGRAM} -j
    COMMENT [Build-NVM Emulation Device]
    WORKING_DIRECTORY "${DEV_BIN_DIR}"
    DEPENDS ${DEV_DIR}/pmc.c # just to see if it has been overwritten
)

# we use add_custom_command for the build itself because otherwise we have to build it
# every time. the following add_custom_target gives a name for the output.
add_custom_target(dev_build ALL DEPENDS ${DEV_KERNEL_MODULE})


================================================
FILE: src/dev/Makefile
================================================
# build modules
obj-m = nvmemul.o
nvmemul-objs = pmc.o

# use the kernel build system
KERNEL_VERSION := `uname -r`
KERNEL_SOURCE := /lib/modules/$(KERNEL_VERSION)/build

SRCDIR=`pwd`
OBJDIR=`pwd`

all:
	make -C $(KERNEL_SOURCE)  M=$(OBJDIR) modules

clean: 
	make -C $(KERNEL_SOURCE) M=$(OBJDIR) clean


================================================
FILE: src/dev/ioctl_query.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __IOCTL_QUERY_H
#define __IOCTL_QUERY_H

#include <linux/ioctl.h>

#define MYDEV_MAGIC (0xAA)

typedef struct { 
    unsigned int counter_id;
    unsigned int event_id;
} ioctl_query_setcounter_t;

typedef struct { 
    unsigned int bus_id;
    unsigned int device_id;
    unsigned int function_id;
    unsigned int offset;
    unsigned int val;
} ioctl_query_setgetpci_t;

#define IOCTL_SETCOUNTER _IOR(MYDEV_MAGIC, 0, ioctl_query_setcounter_t *) 
#define IOCTL_SETPCI     _IOR(MYDEV_MAGIC, 1, ioctl_query_setgetpci_t *) 
#define IOCTL_GETPCI     _IOWR(MYDEV_MAGIC, 2, ioctl_query_setgetpci_t *) 


#endif /* __IOCTL_QUERY_H */


================================================
FILE: src/dev/pmc.c
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/major.h>
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/fs.h>
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/proc_fs.h>
#include <linux/fcntl.h>
#include <linux/smp.h>
#include <linux/uaccess.h>

#include <asm/msr.h>
#include <asm/uaccess.h>

#include "ioctl_query.h"

static long pmc_ioctl(struct file *f, unsigned int cmd, unsigned long arg);
//unsigned long read_cr4(void);
//void write_cr4(unsigned long);
#ifndef read_cr4
#define read_cr4 native_read_cr4
#endif
#ifndef write_cr4
#define write_cr4 native_write_cr4
#endif

struct file_operations pmc_fops = {
	.unlocked_ioctl = pmc_ioctl,
	.compat_ioctl = pmc_ioctl,
};

static const char* module_name = "nvmemul";
static int mod_major = 0;
static const int NVMEMUL_MAJOR = 0;
const const int PERFCTR0 = 0xc1;
const const int PERFEVENTSEL0 = 0x186;


void pmc_set_pce_bit(void* arg) 
{
	unsigned long cr4reg;

    cr4reg = read_cr4();
	cr4reg |= 0x100; // setting the PCE bit
	write_cr4(cr4reg);
}

int pmc_init_module(void)
{
 	printk(KERN_INFO "%s: Loading. Initializing...\n", module_name);
	if ((mod_major = register_chrdev(NVMEMUL_MAJOR, module_name, &pmc_fops)) == -EBUSY) {
		printk(KERN_INFO "%s: Unable to get major for %s device\n", module_name, module_name);
		return -EIO;
	}

	if (mod_major <= 0) {
		printk(KERN_INFO "%s: Unable to get major for %s device\n", module_name, module_name);
		return -EIO;
	}

	printk(KERN_INFO "%s: major is %d\n", module_name, mod_major);

	/*
	 * In order to use the rdpmc instruction in user mode, we need to set the
	 * PCE bit of CR4. PCE is 8th bit of cr4, and 256 is 2 << 8
	 */

    pmc_set_pce_bit(NULL);
    smp_call_function(pmc_set_pce_bit, NULL, 1);

	return 0;
}	

void pmc_exit_module(void) {
 	printk(KERN_INFO "%s: Unloading. Cleaning up...\n", module_name);
	/* Freeing the major number */
	unregister_chrdev(mod_major, module_name);
}	

struct counter_s {
    int counter_id;
    unsigned long val; 
};


/* 
 * pmc_clear clears the PMC specified by counter
 * counter = 0 => perfctr0
 * counter = 1 => perfctr1
 * it uses WRMSR to write the values in the counters
 */
static void __pmc_clear(int counter_id) {
	int counterRegister = PERFCTR0 + counter_id;
	/* clear the old register */

	__asm__ __volatile__("mov %0, %%ecx\n\t"
	        "xor %%edx, %%edx\n\t"
            "xor %%eax, %%eax\n\t"
            "wrmsr\n\t"
	        : /* no outputs */
	        : "m" (counterRegister)
	        : "eax", "ecx", "edx" /* all clobbered */);
}

static void pmc_clear(void* arg) {
    struct counter_s* counter = (struct counter_s*) arg;
    __pmc_clear(counter->counter_id);
}

void pmc_clear_all_cpu(int counter_id)
{
    struct counter_s counter = { counter_id, 0};
    pmc_clear((void*) &counter);
    smp_call_function(pmc_clear, (void*) &counter, 1);
}

/* 
 * This function writes the value specified by the arg to the counter
 * indicated by counter 
 */

static void __set_counter(int counter_id, unsigned long val) 
{
    int selectionRegister = PERFEVENTSEL0 + counter_id;
    __pmc_clear(counter_id);

    /* set the value */

    __asm__ __volatile__("mov %0, %%ecx\n\t" /* ecx contains the number of the MSR to set */
            "xor %%edx, %%edx\n\t"/* edx contains the high bits to set the MSR to */
            "mov %1, %%eax\n\t" /* eax contains the low bits to set the MSR to */
            "wrmsr\n\t"
            : /* no outputs */
            : "m" (selectionRegister), "m" (val)
            : "eax", "ecx", "edx" /* clobbered */);
}

void set_counter(void* arg)
{
    struct counter_s* counter = (struct counter_s*) arg;

    __set_counter(counter->counter_id, counter->val);
}

void set_counter_all_cpu(int counter_id, unsigned long arg)
{
    struct counter_s counter = { counter_id, arg};

    set_counter((void*) &counter);    
    smp_call_function(set_counter, (void*) &counter, 1);
}

static long pmc_ioctl_setcounter(struct file* f, unsigned int cmd, unsigned long arg)
{
    ioctl_query_setcounter_t q;

    if (copy_from_user(&q, (ioctl_query_setcounter_t*) arg, sizeof(ioctl_query_setcounter_t))) {
        return -EFAULT;
    }

	if ((q.counter_id < 0) || (q.counter_id > 3)) {
		printk(KERN_INFO "%s: set_counter illegal value 0x%x for counter\n", module_name, q.counter_id);
        return -ENXIO;
    }
    /* disable counter */
    set_counter_all_cpu(q.counter_id, 0);
    pmc_clear_all_cpu(q.counter_id);
	/* set counter */
	set_counter_all_cpu(q.counter_id, q.event_id);
    printk(KERN_INFO "%s: setcounter counter_id: 0x%x event_id=0x%x\n", module_name, q.counter_id, q.event_id); 
    return 0;
}

static long pmc_ioctl_setpci(struct file* f, unsigned int cmd, unsigned long arg)
{
    ioctl_query_setgetpci_t q;
    struct pci_bus *bus = NULL;

    if (copy_from_user(&q, (ioctl_query_setgetpci_t*) arg, sizeof(ioctl_query_setgetpci_t))) {
        return -EFAULT;
    }

    while ((bus = pci_find_next_bus(bus))) {
        if (q.bus_id == bus->number) {
            pci_bus_write_config_word(bus, PCI_DEVFN(q.device_id, q.function_id), q.offset, (u16) q.val);
            printk(KERN_INFO "%s: setpci bus_id=0x%x device_id=0x%x, function_id=0x%x, val=0x%x\n",
                    module_name, q.bus_id, q.device_id, q.function_id, q.val);
            return 0;
        }
    }
    return -ENXIO;
}

static long pmc_ioctl_getpci(struct file* f, unsigned int cmd, unsigned long arg)
{
    ioctl_query_setgetpci_t q;
    struct pci_bus *bus = NULL;

    if (copy_from_user(&q, (ioctl_query_setgetpci_t*) arg, sizeof(ioctl_query_setgetpci_t))) {
        return -EFAULT;
    }

    while ((bus = pci_find_next_bus(bus))) {
        if (q.bus_id == bus->number) {
            unsigned int val = 0;
            pci_bus_read_config_word(bus, PCI_DEVFN(q.device_id, q.function_id), q.offset, (u16*) &val);
            printk(KERN_INFO "%s: getpci bus_id 0x%x device_id 0x%x, function_id 0x%x, offset 0x%x, val 0x%x\n",
                    module_name, q.bus_id, q.device_id, q.function_id, q.offset, val);
            q.val = val;
            if (copy_to_user((ioctl_query_setgetpci_t*) arg, &q, sizeof(ioctl_query_setgetpci_t))) {
                return -EFAULT;
            }
            return 0;
        }
    }
    return -ENXIO;
}

static long pmc_ioctl(struct file *f, unsigned int cmd, unsigned long arg) 
{
    int ret = -1;

	printk(KERN_INFO "%s: ioctl command: 0x%x\n", module_name, cmd);
	switch (cmd) {
		case IOCTL_SETCOUNTER:
            ret = pmc_ioctl_setcounter(f, cmd, arg);
            break;
        case IOCTL_SETPCI:
            ret = pmc_ioctl_setpci(f, cmd, arg);
            break;
        case IOCTL_GETPCI:
            ret = pmc_ioctl_getpci(f, cmd, arg);
            break;
		default:
			printk(KERN_INFO "%s: ioctl illegal command: 0x%x\n", module_name, cmd);
			break;
	}
	return ret;
}


/* Declaration of the init and exit functions */
module_init(pmc_init_module);
module_exit(pmc_exit_module);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("HPLabs");


================================================
FILE: src/lib/CMakeLists.txt
================================================
project(nvmemul)

option(STATISTICS "Enable statistics report" ON)

if(STATISTICS)
  message(STATUS "WITH STATISTICS")
  add_definitions(-DUSE_STATISTICS)
else()
  message(STATUS "WITHOUT STATISTICS")
endif()

set(nvmemul_src
    config.c
    debug.c
    dev.c
    init.c
    interpose.c
    measure_bw.c
    measure_lat.c
    misc.c
    monotonic_timer.c
    model_bw.c
    model_lat.c
    pflush.c
    pmalloc.c
    stat.c
    thread.c
    topology.c
    process_rank.c
)

include_directories(${CMAKE_SOURCE_DIR}/third_party)
include_directories(${CMAKE_SOURCE_DIR}/src)
include_directories(${CMAKE_SOURCE_DIR}/src/lib)
add_definitions(-g)
add_definitions(-O2)
add_definitions(-fPIC)
add_definitions(-Wall)
add_definitions(-march=native)
add_definitions(-fopenmp)
add_definitions(-std=gnu89)
#add_definitions(-DNDEBUG)
#add_definitions(-std=c99)
add_definitions(-msse4)
add_subdirectory(cpu)
add_library(nvmemul SHARED ${nvmemul_src} $<TARGET_OBJECTS:cpu>)
target_link_libraries(nvmemul dl)
target_link_libraries(nvmemul config)
target_link_libraries(nvmemul numa)
target_link_libraries(nvmemul rt)
target_link_libraries(nvmemul m)
target_link_libraries(nvmemul gomp)


================================================
FILE: src/lib/config.c
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include "config.h"
#include <libconfig.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <ctype.h>

#define ENVVAR_MAX_LEN 128

static char* __getenv(const char* prefix, const char* name)
{
	char normalized_name[ENVVAR_MAX_LEN];

	if ((strlen(name) + strlen(prefix) + 1) > ENVVAR_MAX_LEN) {
		return NULL;
	}
	
    strcpy(normalized_name, prefix);
    strcat(normalized_name, "_");
    strcat(normalized_name, name);

    return getenv(normalized_name);
}

static inline int 
env_setting_lookup(const char *name, char **value_str)
{
	char *val;
	char normalized_name[ENVVAR_MAX_LEN];
	int  i;

	if ((strlen(name)) > ENVVAR_MAX_LEN) {
		return CONFIG_FALSE;
	}
	
	for (i=0; name[i]; i++) {
		if (name[i] == '.') {
			normalized_name[i] = '_';
		} else {
			normalized_name[i] = toupper(name[i]);
		}
	}
	normalized_name[i] = '\0';
	
	val = __getenv(ENVVAR_PREFIX, normalized_name);
	if (val) {
		*value_str = val;
		return CONFIG_TRUE;
	} else {
		return CONFIG_FALSE;
	}
}


static inline int
env_setting_lookup_int(const char *name, int *value)
{
	char *value_str;

	if (env_setting_lookup(name, &value_str) == CONFIG_FALSE) {
		return CONFIG_FALSE;
	}

	if (value_str) {
		*value = atoi(value_str);
		return CONFIG_TRUE;
	} else {
		return CONFIG_FALSE;
	}
}


static inline int
env_setting_lookup_bool(const char *name, int *value)
{
	return env_setting_lookup_int(name, value);
}


static inline int 
env_setting_lookup_string(const char *name, char **value)
{
	return env_setting_lookup(name, value);
}


int
__cconfig_lookup_bool(config_t *cfg, const char *name, int *value) 
{
	int val;
	int found_val = 0;

	if (env_setting_lookup_bool(name, &val) == CONFIG_TRUE) {
		found_val = 1;
	} else {
	    if (config_lookup_bool(cfg, name, &val) == CONFIG_TRUE) {
			found_val = 1;
		}
	}

	if (found_val)	{
		*value = val;
		return CONFIG_TRUE;
	}
	return CONFIG_FALSE;
}


int
__cconfig_lookup_valid_bool(config_t *cfg, 
                     const char *name, 
                     int *value, 
                     int validity_check, ...)
{
	return __cconfig_lookup_bool(cfg, name, value);
}


int
__cconfig_lookup_int(config_t *cfg, const char *name, int *value)
{
	int val;
	int found_val = 0;

	if (env_setting_lookup_int(name, &val) == CONFIG_TRUE) {
		found_val = 1;
	} else {
		// third parameter changed from libconfig 1.3 to 1.4, it was 'long' and now it is 'int'
	    if (config_lookup_int(cfg, name, &val) == CONFIG_TRUE) {
			found_val = 1;
		}
	}

	if (found_val)	{
		*value = val;
		return CONFIG_TRUE;
	}
	return CONFIG_FALSE;
}


int
__cconfig_lookup_valid_int(config_t *cfg, 
                           const char *name, 
                           int *value, 
                           int validity_check, ...)
{
	int              min;
	int              max;
	int              list_length;
	int              i;
	int              val;
	int              listval;
	va_list          ap;

	if (__cconfig_lookup_int(cfg, name, &val) == CONFIG_TRUE) {
		switch (validity_check) {
			case CONFIG_NO_CHECK:
				*value = val;
				return CONFIG_TRUE;
			case CONFIG_RANGE_CHECK:
				va_start(ap, validity_check);
				min = va_arg(ap, int);
				max = va_arg(ap, int);
				va_end(ap);
				if (*value >= min && *value <= max) {
					*value = val;
					return CONFIG_TRUE;
				}
				break;
			case CONFIG_LIST_CHECK:
				va_start(ap, validity_check);
				list_length = va_arg(ap, int);
				for (i=0; i<list_length; i++) {
					listval = va_arg(ap, int);
					if (val == listval) {
						*value = val;
						return CONFIG_TRUE;
					}
				}
				va_end(ap);
				break;
		}
	}
	return CONFIG_FALSE;
}


int
__cconfig_lookup_string(config_t *cfg, const char *name, char **value)
{
	char *val;
	int  found_val = 0;

	if (env_setting_lookup_string(name, &val) == CONFIG_TRUE) {
		found_val = 1;
	} else {	
	    if (config_lookup_string(cfg, name, (const char**) &val) == CONFIG_TRUE) {
			found_val = 1;
		}
	}

	if (found_val)	{
		*value = val;
		return CONFIG_TRUE;
	}
	return CONFIG_FALSE;
}


int
__cconfig_lookup_valid_string(config_t *cfg, 
                              const char *name, 
                              char **value, 
                              int validity_check, ...)
{
	int       list_length;
	int       i;
	char      *val;
	va_list   ap;

	if (__cconfig_lookup_string(cfg, name, &val) == CONFIG_TRUE) {
		switch (validity_check) {
			case CONFIG_NO_CHECK:
				*value = val;
				return CONFIG_TRUE;
			case CONFIG_RANGE_CHECK:
				break;
			case CONFIG_LIST_CHECK:
				va_start(ap, validity_check);
				list_length = va_arg(ap, int);
				for (i=0; i<list_length; i++) {
					if (strcmp(val, va_arg(ap, char *))==0) {
						*value = val;
						return CONFIG_TRUE;
					}
				}
				va_end(ap);
				break;
		}
	}
	return CONFIG_FALSE;
}


int 
__cconfig_init(config_t *cfg, const char *config_file)
{
    int ret;
	char* env_config_file;

	if ((env_config_file = __getenv(ENVVAR_PREFIX, "INI"))) {
		config_file = env_config_file;
	}
	
	config_init(cfg);
	if ((ret = config_read_file(cfg, config_file)) == CONFIG_FALSE) {
        fprintf(stderr, "ERROR: nvmemul: Configuration file %s not found.\n", config_file);
    }
    return ret;
}


================================================
FILE: src/lib/config.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __CONFIG_H
#define __CONFIG_H

/**
 * \file 
 * 
 * Runtime configuration parameters
 */


#include <stdio.h>
#include <libconfig.h>

#define ENVVAR_PREFIX "NVMEMUL"

#ifdef __cplusplus
extern "C" {
#endif

/* Make sure we don't redefine a macro already defined in libconfig.h */

#ifdef CONFIG_NO_CHECK
# error "ERROR: Redefining previously defined CONFIG_NO_CHECK"
#else
# define CONFIG_NO_CHECK    0
#endif

#ifdef CONFIG_RANGE_CHECK
# error "ERROR: Redefining previously defined CONFIG_RANGE_CHECK"
#else
# define CONFIG_RANGE_CHECK 1
#endif

#ifdef CONFIG_LIST_CHECK
# error "ERROR: Redefining previously defined CONFIG_LIST_CHECK"
#else
# define CONFIG_LIST_CHECK  2
#endif



/** 
 * The lookup functions return the value of a configuration variable based on 
 * the following order: 
 *  1) value of environment variable
 *  2) value in configuration file variable
 *  
 * If the variable is not found then a lookup function does not set the value.
 */

int __cconfig_lookup_bool(config_t *cfg, const char *name, int *value);
int __cconfig_lookup_int(config_t *cfg, const char *name, int *value);
int __cconfig_lookup_string(config_t *cfg, const char *name, char **value);
int __cconfig_lookup_valid_bool(config_t *cfg, const char *name, int *value, int validity_check, ...);
int __cconfig_lookup_valid_int(config_t *cfg, const char *name, int *value, int validity_check, ...);
int __cconfig_lookup_valid_string(config_t *cfg, const char *name, char **value, int validity_check, ...);
int __cconfig_init(config_t *cfg, const char *config_file);

#ifdef __cplusplus
}
#endif

#endif /* __CONFIG_H */


================================================
FILE: src/lib/cpu/CMakeLists.txt
================================================
set(nvmemul_cpu_src
    cpu.c
    pmc.c
)

add_library(cpu OBJECT ${nvmemul_cpu_src})


================================================
FILE: src/lib/cpu/cpu.c
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <regex.h>
#include <string.h>
#include "cpu.h"
#include "dev.h"
#include "error.h"
#include "misc.h"
#include "known_cpus.h"
#include "xeon-ex.h"
#include <cpuid.h>

// Mainline architectures and processors available here:
// https://software.intel.com/en-us/articles/intel-architecture-and-processor-identification-with-cpuid-model-and-family-numbers
//
// It turns out that CPUID is not an accurate approach to identifying a
// processor as different processors may have the same CPUID.
// So instead we rely on the brand string returned by /proc/cpuinfo:model_name

#define MASK(msb, lsb) (~((~0) << (msb + 1)) & ((~0) << lsb))
#define EXTRACT(val, msb, lsb) ((MASK(msb, lsb) & val) >> lsb)
#define MODEL(eax) EXTRACT(eax, 7, 4)
#define EXTENDED_MODEL(eax) EXTRACT(eax, 19, 16)
#define MODEL_NUMBER(eax) ((EXTENDED_MODEL(eax) << 4) | MODEL(eax))
#define FAMILY(eax) EXTRACT(eax, 11, 8)
#define Extended_Family(eax) EXTRACT(eax, 27, 20)
#define Family_Number(eax) (FAMILY(eax) + Extended_Family(eax))

void cpuid(unsigned int info, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx)
{
    __asm__(
        "cpuid;"
        : "=a"(*eax), "=b"(*ebx), "=c"(*ecx), "=d"(*edx)
        : "a"(info));
}

void get_family_model(int *family, int *model)
{
    unsigned int eax, ebx, ecx, edx;
    int success = __get_cpuid(1, &eax, &ebx, &ecx, &edx);
    if (family != NULL)
    {
        *family = success ? Family_Number(eax) : 0;
    }

    if (model != NULL)
    {
        *model = success ? MODEL_NUMBER(eax) : 0;
    }
}

// caller is responsible for freeing memory allocated by this function
char *cpuinfo(char *valname)
{
    FILE *fp;
    char *line = NULL;
    size_t len = 0;
    ssize_t read;

    fp = fopen("/proc/cpuinfo", "r");
    if (fp == NULL)
    {
        return NULL;
    }

    while ((read = getline(&line, &len, fp)) != -1)
    {
        if (strstr(line, valname))
        {
            char *colon = strchr(line, ':');
            int len = colon - line;
            char *buf = malloc(strlen(line) - len);
            strcpy(buf, &line[len + 2]);
            free(line);
            fclose(fp);
            return buf;
        }
    }

    free(line);
    fclose(fp);
    return NULL;
}

// reads current cpu frequency through the /proc/cpuinfo file
// avoid calling this function often
int cpu_speed_mhz()
{
    size_t val;
    char *str = cpuinfo("cpu MHz");
    val = string_to_size(str);
    free(str);
    return val;
}

// reads cpu LLC cache size through the /proc/cpuinfo file
// avoid calling this function often
size_t cpu_llc_size_bytes()
{
    size_t val;
    char *str = cpuinfo("cache size");
    val = string_to_size(str);
    free(str);
    return val;
}

// caller is responsible for freeing memory allocated by this function
char *cpu_model_name()
{
    return cpuinfo("model name");
}

int match(const char *to_match, const char *regex_text)
{
    int ret;
    const char *p = to_match;
    regex_t regex;
    regmatch_t m[1];

    if ((ret = regcomp(&regex, regex_text, REG_EXTENDED | REG_NEWLINE)) != 0)
    {
        return E_ERROR;
    }
    if ((ret = regexec(&regex, p, 1, m, 0)))
    {
        regfree(&regex);
        return E_ERROR; // no match
    }
    regfree(&regex);
    return E_SUCCESS;
}

int is_Xeon()
{
    char *model_name;
    if ((model_name = cpu_model_name()) == NULL)
    {
        return 0;
    }

    if (match(model_name, "Xeon") == E_SUCCESS)
    {
        free(model_name);
        return 1;
    }
    else
    {
        free(model_name);
        return 0;
    }
}

int is_Intel()
{
    char *model_name;
    if ((model_name = cpu_model_name()) == NULL)
    {
        return 0;
    }

    if (match(model_name, "Intel") == E_SUCCESS)
    {
        free(model_name);
        return 1;
    }
    else
    {
        free(model_name);
        return 0;
    }
}

cpu_model_t *cpu_model()
{
    int i, family, model;
    cpu_model_t *cpu_model = NULL;

    if (!is_Intel())
        return NULL;

    get_family_model(&family, &model);

    int isXeon = is_Xeon();

    for (i = 0; known_cpus[i].microarch != Invalid; i++)
    {
        microarch_ID_t c = known_cpus[i];

        if (c.family == family && c.model == model)
        {
            switch (c.microarch)
            {
            case SandyBridge:
                cpu_model = &cpu_model_intel_xeon_ex;
                break;
            case IvyBridge:
                cpu_model = &cpu_model_intel_xeon_ex_v2;
                break;
            case Haswell:
                cpu_model = &cpu_model_intel_xeon_ex_v3;
                break;
            default:
                return NULL;
            }

            if (!isXeon)
                cpu_model->microarch = (microarch_t)(cpu_model->microarch - 1);

            DBG_LOG(INFO, "Detected CPU model '%s'\n", microarch_strings[cpu_model->microarch]);
            break;
        }
    }

    if (!cpu_model)
    {
        return NULL;
    }

    // complete the model with some runtime information
    cpu_model->llc_size_bytes = cpu_llc_size_bytes();
    //    cpu_model->speed_mhz = cpu_speed_mhz();

    return cpu_model;
}


================================================
FILE: src/lib/cpu/cpu.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __CPU_H
#define __CPU_H

#include <stddef.h>
#include <stdint.h>
#include "dev.h"

#define MAX_THROTTLE_VALUE 1023

int set_throttle_register(int node, uint64_t val);
size_t cpu_llc_size_bytes();

struct pmc_set_s;

typedef enum {
    THROTTLE_DDR_ACT = 0,
    THROTTLE_DDR_READ,
    THROTTLE_DDR_WRITE
} throttle_type_t;

// order matters. see cpu_model()
typedef enum {
    Invalid,
    SandyBridge,
    SandyBridgeXeon,
    IvyBridge,
    IvyBridgeXeon,
    Haswell,
    HaswellXeon
} microarch_t;

typedef struct
{
    int family;
    int model;
    microarch_t microarch;
} microarch_ID_t;

/**
 *  CPU object that encapsulates processor-specific methods for accessing
 *  performance counters and memory controller PCI registers
 */
typedef struct cpu_model_s {
    microarch_t microarch; // processor description
    size_t llc_size_bytes; // last level cache size
//    int speed_mhz; // cpu clock frequency
    struct pmc_events_s* pmc_events; // performance monitoring events supported by the processor
    int (*set_throttle_register)(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t val);
    int (*get_throttle_register)(pci_regs_t *regs, throttle_type_t throttle_type, uint16_t* val);
} cpu_model_t;

cpu_model_t* cpu_model();
int cpu_speed_mhz();

#endif /* __CPU_H */


================================================
FILE: src/lib/cpu/haswell-papi.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __CPU_HASWELL_H
#define __CPU_HASWELL_H

#include <papi.h>
#include "debug.h"

// Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
// applications to list all available performance events with their architecture specific
// detailed description and translate them to their respective event code. 'showevtinfo' application can
// be used to list all available performance event names with detailed description and 'check_events' application
// can be used to translate the performance event to the corresponding event code.  

// These events will be initialized and started.
// Every event reading will return an array with the values for all these events.
// The array index is the same index used to define the event in the *_native_events array below
const char *haswell_native_events[MAX_NUM_EVENTS] = {
    "CYCLE_ACTIVITY:STALLS_L2_PENDING",
    "MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE",
    "MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM",
    "MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM"
};

uint64_t haswell_read_stall_events_local() {
    long long values[MAX_NUM_EVENTS];
    uint64_t events = 0;

    if (pmc_events_read_local_thread(values) == PAPI_OK) {
		uint64_t l2_pending = values[0];
		uint64_t llc_hit  = values[1];
		uint64_t remote_dram = values[2];
		uint64_t local_dram  = values[3];

		DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n",
			l2_pending, llc_hit, remote_dram, local_dram);

		double num = remote_dram + local_dram;
		double den = num + llc_hit;
		if (den == 0) return 0;

		events = (uint64_t)((double)l2_pending * ((double)num / den));
    } else {
        DBG_LOG(ERROR, "read stall cycles failed\n");
    }

    return events;
}

uint64_t haswell_read_stall_events_remote() {
    long long values[MAX_NUM_EVENTS];
    uint64_t events = 0;

    if (pmc_events_read_local_thread(values) == PAPI_OK) {
		uint64_t l2_pending = values[0];
		uint64_t llc_hit  = values[1];
		uint64_t remote_dram = values[2];
		uint64_t local_dram  = values[3];

		DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n",
			l2_pending, llc_hit, remote_dram, local_dram);

		// calculate stalls based on l2 stalls and LLC miss/hit
		double num = remote_dram + local_dram;
		double den = num + llc_hit;
		if (den == 0) return 0;
		double stalls = (double)l2_pending * ((double)num / den);

		// calculate remote dram stalls based on total stalls and local/remote dram accesses
		den = remote_dram + local_dram;
		if (den == 0) return 0;
		events = (uint64_t) (stalls * ((double)remote_dram / den));
    } else {
        DBG_LOG(ERROR, "read stall cycles failed\n");
    }

    return events;
}

#endif /* __CPU_HASWELL_H */


================================================
FILE: src/lib/cpu/haswell.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __CPU_HASWELL_H
#define __CPU_HASWELL_H

#include <math.h>
#include "thread.h"
#include "cpu/pmc.h"
#include "debug.h"

// Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
// applications to list all available performance events with their architecture specific
// detailed description and translate them to their respective event code. 'showevtinfo' application can
// be used to list all available performance event names with detailed description and 'check_events' application
// can be used to translate the performance event to the corresponding event code.  

extern __thread int tls_hw_local_latency;
extern __thread int tls_hw_remote_latency;
#ifdef MEMLAT_SUPPORT
extern __thread uint64_t tls_global_remote_dram;
extern __thread uint64_t tls_global_local_dram;
#endif

#undef FOREACH_PMC_HW_EVENT
#define FOREACH_PMC_HW_EVENT(ACTION)                                                                       \
  ACTION("CYCLE_ACTIVITY:STALLS_L2_PENDING", NULL, 0x55305a3)                                              \
  ACTION("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", NULL, 0x5308d2)                                        \
  ACTION("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", NULL, 0x530cd3)                                     \
  ACTION("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", NULL, 0x5303d3)

#undef FOREACH_PMC_EVENT
#define FOREACH_PMC_EVENT(ACTION, prefix)                                                                  \
  ACTION(ldm_stall_cycles, prefix)                                                                         \
  ACTION(remote_dram, prefix)

#define L3_FACTOR 7.0

DECLARE_ENABLE_PMC(haswell, ldm_stall_cycles)
{
    ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", 1);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", 2);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", 3);

    return E_SUCCESS;
}

DECLARE_CLEAR_PMC(haswell, ldm_stall_cycles)
{
}

DECLARE_READ_PMC(haswell, ldm_stall_cycles)
{
   uint64_t l2_pending_diff  = READ_MY_HW_EVENT_DIFF(0);
   uint64_t llc_hit_diff     = READ_MY_HW_EVENT_DIFF(1);
   uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2);
   uint64_t local_dram_diff  = READ_MY_HW_EVENT_DIFF(3);

   DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n",
		   l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff);

   if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0;
#ifdef MEMLAT_SUPPORT
   tls_global_local_dram += local_dram_diff;
#endif

   // calculate stalls based on L2 stalls and LLC miss/hit
   double num = L3_FACTOR * (remote_dram_diff + local_dram_diff);
   double den = num + llc_hit_diff;
   if (den == 0) return 0;
   return (uint64_t) ((double)l2_pending_diff * (num / den));
}


DECLARE_ENABLE_PMC(haswell, remote_dram)
{
    ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_HIT_RETIRED:XSNP_NONE", 1);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:REMOTE_DRAM", 2);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_L3_MISS_RETIRED:LOCAL_DRAM", 3);

    return E_SUCCESS;
}

DECLARE_CLEAR_PMC(haswell, remote_dram)
{
}

DECLARE_READ_PMC(haswell, remote_dram)
{
   uint64_t l2_pending_diff  = READ_MY_HW_EVENT_DIFF(0);
   uint64_t llc_hit_diff     = READ_MY_HW_EVENT_DIFF(1);
   uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2);
   uint64_t local_dram_diff  = READ_MY_HW_EVENT_DIFF(3);

   DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n",
		   l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff);

   if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0;
#ifdef MEMLAT_SUPPORT
   tls_global_remote_dram += remote_dram_diff;
#endif

   // calculate stalls based on L2 stalls and LLC miss/hit
   double num = L3_FACTOR * (remote_dram_diff + local_dram_diff);
   double den = num + llc_hit_diff;
   if (den == 0) return 0;
   double stalls = (double)l2_pending_diff * (num / den);

   // calculate remote dram stalls based on total stalls and local/remote dram accesses
   // also consider the weight of remote memory access against local memory access
   den = (remote_dram_diff * tls_hw_remote_latency) + (local_dram_diff * tls_hw_local_latency);
   if (den == 0) return 0;
   return (uint64_t) (stalls * ((double)(remote_dram_diff * tls_hw_remote_latency) / den));
}


PMC_EVENTS(haswell, 4)
#endif /* __CPU_HASWELL_H */


================================================
FILE: src/lib/cpu/ivybridge-papi.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __CPU_IVYBRIDGE_H
#define __CPU_IVYBRIDGE_H

#include <papi.h>
#include "debug.h"

// Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
// applications to list all available performance events with their architecture specific
// detailed description and translate them to their respective event code. 'showevtinfo' application can
// be used to list all available performance event names with detailed description and 'check_events' application
// can be used to translate the performance event to the corresponding event code.  

// These events will be initialized and started.
// Every event reading will return an array with the values for all these events.
// The array index is the same index used to define the event in the *_native_events array below
const char *ivybridge_native_events[MAX_NUM_EVENTS] = {
    "CYCLE_ACTIVITY:STALLS_L2_PENDING",
    "MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE",
    "MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM",
    "MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM"
};

uint64_t ivybridge_read_stall_events_local() {
    long long values[MAX_NUM_EVENTS];
    uint64_t events = 0;

    if (pmc_events_read_local_thread(values) == PAPI_OK) {
		uint64_t l2_pending = values[0];
		uint64_t llc_hit  = values[1];
		uint64_t remote_dram = values[2];
		uint64_t local_dram  = values[3];

		DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n",
			l2_pending, llc_hit, remote_dram, local_dram);

		double num = remote_dram + local_dram;
		double den = num + llc_hit;
		if (den == 0) return 0;

		events = (uint64_t)((double)l2_pending * ((double)num / den));
    } else {
        DBG_LOG(ERROR, "read stall cycles failed\n");
    }

    return events;
}

uint64_t ivybridge_read_stall_events_remote() {
    long long values[MAX_NUM_EVENTS];
    uint64_t events = 0;

    if (pmc_events_read_local_thread(values) == PAPI_OK) {
		uint64_t l2_pending = values[0];
		uint64_t llc_hit  = values[1];
		uint64_t remote_dram = values[2];
		uint64_t local_dram  = values[3];

		DBG_LOG(DEBUG, "read stall L2 cycles %lu; llc_hit %lu; remote_dram %lu; local_dram %lu\n",
			l2_pending, llc_hit, remote_dram, local_dram);

		// calculate stalls based on l2 stalls and LLC miss/hit
		double num = remote_dram + local_dram;
		double den = num + llc_hit;
		if (den == 0) return 0;
		double stalls = (double)l2_pending * ((double)num / den);

		// calculate remote dram stalls based on total stalls and local/remote dram accesses
		den = remote_dram + local_dram;
		if (den == 0) return 0;
		events = (uint64_t) (stalls * ((double)remote_dram / den));
    } else {
        DBG_LOG(ERROR, "read stall cycles failed\n");
    }

    return events;
}

#endif /* __CPU_IVYBRIDGE_H */


================================================
FILE: src/lib/cpu/ivybridge.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __CPU_IVYBRIDGE_H
#define __CPU_IVYBRIDGE_H

#include <math.h>
#include "thread.h"
#include "cpu/pmc.h"
#include "debug.h"

// Perfmon2 is a library that provides a generic interface to access the PMU. It also comes with
// applications to list all available performance events with their architecture specific
// detailed description and translate them to their respective event code. 'showevtinfo' application can
// be used to list all available performance event names with detailed description and 'check_events' application
// can be used to translate the performance event to the corresponding event code.  

extern __thread int tls_hw_local_latency;
extern __thread int tls_hw_remote_latency;
#ifdef MEMLAT_SUPPORT
extern __thread uint64_t tls_global_remote_dram;
extern __thread uint64_t tls_global_local_dram;
#endif

#undef FOREACH_PMC_HW_EVENT
#define FOREACH_PMC_HW_EVENT(ACTION)                                                                       \
  ACTION("CYCLE_ACTIVITY:STALLS_L2_PENDING", NULL, 0x55305a3)                                              \
  ACTION("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", NULL, 0x5308d2)                                        \
  ACTION("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", NULL, 0x530cd3)                                     \
  ACTION("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", NULL, 0x5303d3)

#undef FOREACH_PMC_EVENT
#define FOREACH_PMC_EVENT(ACTION, prefix)                                                                  \
  ACTION(ldm_stall_cycles, prefix)                                                                         \
  ACTION(remote_dram, prefix)


#define L3_FACTOR 7.0

DECLARE_ENABLE_PMC(ivybridge, ldm_stall_cycles)
{
    ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", 1);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", 2);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", 3);

    return E_SUCCESS;
}

DECLARE_CLEAR_PMC(ivybridge, ldm_stall_cycles)
{
}

DECLARE_READ_PMC(ivybridge, ldm_stall_cycles)
{
   uint64_t l2_pending_diff  = READ_MY_HW_EVENT_DIFF(0);
   uint64_t llc_hit_diff     = READ_MY_HW_EVENT_DIFF(1);
   uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2);
   uint64_t local_dram_diff  = READ_MY_HW_EVENT_DIFF(3);

   DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n",
		   l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff);

   if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0;
#ifdef MEMLAT_SUPPORT
   tls_global_local_dram += local_dram_diff;
#endif

   // calculate stalls based on L2 stalls and LLC miss/hit
   double num = L3_FACTOR * (remote_dram_diff + local_dram_diff);
   double den = num + llc_hit_diff;
   if (den == 0) return 0;
   return (uint64_t) ((double)l2_pending_diff * (num / den));
}


DECLARE_ENABLE_PMC(ivybridge, remote_dram)
{
    ASSIGN_PMC_HW_EVENT_TO_ME("CYCLE_ACTIVITY:STALLS_L2_PENDING", 0);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_HIT_RETIRED:XSNP_NONE", 1);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:REMOTE_DRAM", 2);
    ASSIGN_PMC_HW_EVENT_TO_ME("MEM_LOAD_UOPS_LLC_MISS_RETIRED:LOCAL_DRAM", 3);

    return E_SUCCESS;
}

DECLARE_CLEAR_PMC(ivybridge, remote_dram)
{
}

DECLARE_READ_PMC(ivybridge, remote_dram)
{
   uint64_t l2_pending_diff  = READ_MY_HW_EVENT_DIFF(0);
   uint64_t llc_hit_diff     = READ_MY_HW_EVENT_DIFF(1);
   uint64_t remote_dram_diff = READ_MY_HW_EVENT_DIFF(2);
   uint64_t local_dram_diff  = READ_MY_HW_EVENT_DIFF(3);

   DBG_LOG(DEBUG, "read stall L2 cycles diff %lu; llc_hit %lu; cycles diff remote_dram %lu; local_dram %lu\n",
		   l2_pending_diff, llc_hit_diff, remote_dram_diff, local_dram_diff);

   if ((remote_dram_diff == 0) && (local_dram_diff == 0)) return 0;
#ifdef MEMLAT_SUPPORT
   tls_global_remote_dram += remote_dram_diff;
#endif

   // calculate stalls based on L2 stalls and LLC miss/hit
   double num = L3_FACTOR * (remote_dram_diff + local_dram_diff);
   double den = num + llc_hit_diff;
   if (den == 0) return 0;
   double stalls = (double)l2_pending_diff * (num / den);

   // calculate remote dram stalls based on total stalls and local/remote dram accesses
   // also consider the weight of remote memory access against local memory access
   den = (remote_dram_diff * tls_hw_remote_latency) + (local_dram_diff * tls_hw_local_latency);
   if (den == 0) return 0;
   return (uint64_t) (stalls * ((double)(remote_dram_diff * tls_hw_remote_latency) / den));
}


PMC_EVENTS(ivybridge, 4)
#endif /* __CPU_IVYBRIDGE_H */


================================================
FILE: src/lib/cpu/known_cpus.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __KNOWN_CPUS_H
#define __KNOWN_CPUS_H

#include "cpu.h"

// later, cpu_model_name() is used to distinguish between
// Xeon and non-Xeon processors. It's much easier here
// to consider all processors non-Xeon.
// references:
// 1- http://a4lg.com/tech/x86/database/x86-families-and-models.en.html
// 2- Intel® Xeon® Processor E7-8800/4800 v3 Product Family Specification
// 3- https://software.intel.com/en-us/articles/intel-architecture-and-processor-identification-with-cpuid-model-and-family-numbers
microarch_ID_t known_cpus[] =
    {
        // order does not matter
        {.family = 0x06, .model = 0x2A, .microarch = SandyBridge},
        {.family = 0x06, .model = 0x2D, .microarch = SandyBridge},

        {.family = 0x06, .model = 0x3A, .microarch = IvyBridge},
        {.family = 0x06, .model = 0x3E, .microarch = IvyBridge},

        {.family = 0x06, .model = 0x3C, .microarch = Haswell},
        {.family = 0x06, .model = 0x3F, .microarch = Haswell},
        {.family = 0x06, .model = 0x45, .microarch = Haswell},
        {.family = 0x06, .model = 0x46, .microarch = Haswell},

        // must be the last element
        {.family = 0x0, .model = 0x0, .microarch = Invalid}};

// order must correspond to microarch_t
char *microarch_strings[] =
    {
        "Invalid",
        "Sandy Bridge",
        "Sandy Bridge Xeon",
        "Ivy Bridge",
        "Ivy Bridge Xeon",
        "Haswell",
        "Haswell Xeon"};

#endif /* __KNOWN_CPUS_H */


================================================
FILE: src/lib/cpu/pmc-papi.c
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include <papi.h>
#include <pthread.h>
#include <sys/syscall.h>
#include "cpu/pmc-papi.h"
#include "debug.h"

__thread int tls_event_set = PAPI_NULL;

#define STR_MAX_SIZE 256

static void log_papi_critical(int ret_val, const char *msg) {
	//char papi_str[STR_MAX_SIZE];
	//PAPI_perror(ret_val, (char *)papi_str, sizeof(papi_str));
    DBG_LOG(CRITICAL, "%s (%s)\n", msg, PAPI_strerror(ret_val));
}

int pmc_init() {
	int ret_val;

    if ((ret_val = PAPI_library_init(PAPI_VER_CURRENT)) != PAPI_VER_CURRENT) {
        log_papi_critical(ret_val, "PMC library init error");
        return -1;
    }

    if ((ret_val = PAPI_thread_init(pthread_self)) != PAPI_OK) {
        log_papi_critical(ret_val, "PMC thread support init error");
        return -1;
    }

//    if ((ret_val = PAPI_set_domain(PAPI_DOM_ALL)) != PAPI_OK) {
//        log_papi_critical(ret_val, "PMC set domain error");
//        return -1;
//    }

    return 0;
}

void pmc_shutdown() {
    PAPI_shutdown();
}

int pmc_create_event_set_local_thread() {
	int ret_val;

    if ((ret_val = PAPI_create_eventset(&tls_event_set)) != PAPI_OK) {
        log_papi_critical(ret_val, "PMC event set init error");
        return -1;
    }

//    if ((ret_val = PAPI_set_granularity(PAPI_GRN_SYS)) != PAPI_OK) {
//        log_papi_critical(ret_val, "PMC set granularity error");
//        return -1;
//    }

    return 0;
}

void pmc_destroy_event_set_local_thread() {
    PAPI_cleanup_eventset(tls_event_set);
    PAPI_destroy_eventset(&tls_event_set);
}

int pmc_register_thread() {
	return PAPI_register_thread();
}

int pmc_unregister_thread() {
	return PAPI_unregister_thread();
}

int pmc_register_event_local_thread(const char *event_name) {
    int ret_val;
    char msg[STR_MAX_SIZE];

    // The pthread scope for each thread should be set to PTHREAD_SCOPE_SYSTEM.
    // On linux, pthread supports only PTHREAD_SCOPE_SYSTEM.

    assert(tls_event_set != PAPI_NULL);
    assert(event_name);

    if ((ret_val = PAPI_add_named_event(tls_event_set, (char *)event_name)) != PAPI_OK) {
    	snprintf(msg, sizeof(msg), "PMC event (%s) register error", event_name);
    	log_papi_critical(ret_val, msg);
        return -1;
    }

    return 0;
}

int pmc_events_start_local_thread() {
    int ret_val;

    assert(tls_event_set != PAPI_NULL);

    if ((ret_val = PAPI_start(tls_event_set)) != PAPI_OK) {
    	log_papi_critical(ret_val, "PMC events start error");
        return -1;
    }

    return 0;
}

void pmc_events_stop_local_thread() {
	long long values[MAX_NUM_EVENTS];

	assert(tls_event_set != PAPI_NULL);

    PAPI_stop(tls_event_set, values);
}

int pmc_events_read_local_thread(long long *values) {
    int ret_val;
//    int status = 0;

    assert(values);

//    PAPI_state(event_set, &status);
//    if (status != PAPI_RUNNING) {
//        DBG_LOG(CRITICAL, "PMC event set not in running state");
//        return -1;
//    }

    if ((ret_val = PAPI_read(tls_event_set, values)) != PAPI_OK) {
    	log_papi_critical(ret_val, "PMC events read error");
        return -1;
    }

    if ((ret_val = PAPI_reset(tls_event_set)) != PAPI_OK) {
        log_papi_critical(ret_val, "PMC events reset error");
        return -1;
    }

    return 0;
}


================================================
FILE: src/lib/cpu/pmc-papi.h
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#ifndef __CPU_PMC_H
#define __CPU_PMC_H

#include <stdint.h>


// Usually the architectures support up to 4 counters enabled at the same
// time per core when HT is enabled
#define MAX_NUM_EVENTS 4

typedef uint64_t (*read_stalls_t)(void);

typedef struct {
	const char **native_events;
	read_stalls_t read_stalls_events_local;
	read_stalls_t read_stalls_events_remote;
} pmc_event_t;

int pmc_init();
void pmc_shutdown();
int pmc_create_event_set_local_thread();
void pmc_destroy_event_set_local_thread();
int pmc_register_event_local_thread(const char *event_name);
int pmc_events_start_local_thread();
void pmc_events_stop_local_thread();
int pmc_events_read_local_thread(long long *values);

int pmc_register_thread();
int pmc_unregister_thread();

#endif /* __CPU_PMC_H */


================================================
FILE: src/lib/cpu/pmc.c
================================================
/***************************************************************************
Copyright 2016 Hewlett Packard Enterprise Development LP.  
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or (at
your option) any later version. This program is distributed in the
hope that it will be useful, but WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the GNU General Public License for more details. You
should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
***************************************************************************/
#include <stdlib.h>
#include "cpu/pmc.h"
#include "dev.h"
#include "error.h"
#include "thread.h"
#include "topology.h"

#pragma GCC push_options
#pragma GCC optimize ("O0")

// The width of general purpose counters are 40bits.
// https://www.felixcloutier.com/x86/RDPMC.html
#define RDPMC_MAX_VALUE 0xFFFFFFFFFF  

long long rdpmc(int counter) 
{

	unsigned eax;
	unsigned edx;
	unsigned long long r;

	__asm__ __volatile__ ("mov %2, %%ecx\n\t"
	                      "rdpmc\n\t"
	                      "mov %%eax, %0\n\t"
	                      "and $255, %%edx\n\t"
	                      "mov %%edx, %1\n\t"
	                      : "=m" (eax), "=m" (edx), "=m" (counter)
	                      : /* no inputs */
	                      : "eax", "ecx", "edx"); /* eax, ecx, edx clobbered */
	                      r = ((unsigned long long) edx << 32) | eax;
	return r;

}

int rdpmc32(int counter) {

	unsigned eax;
	
	__asm__ __volatile__ ("mov %1, %%ecx\n\t"
	                      "rdpmc\n\t"
	                      "mov %%eax, %0\n\t"
	                      : "=m" (eax), "=m" (counter)
	                      : /* no inputs */
	                      : "eax", "ecx", "edx"); /* eax, ecx, edx clobbered */
	return eax;

}
#pragma GCC pop_options


/*int num_used_hw_cntrs(pmc_events_t* events)
{
    int i;
    int used;
    pmc_hw_event_t* event = 0;

     // check if this a known registered hardware event
    for (i=0, used=0; events->known_hw_events[i].name; i++) {
        event = &events->known_hw_events[i];
        used += event->active ? 0 : 1;
    }
    return used;    
}*/

int get_avail_hw_cntr_id(pmc_events_t* events)
{
    int i;
    int used;
    pmc_hw_event_t* event = 0;
    int status = -1;

    int* hw_cntr_id_status = calloc(events->num_avail_hw_cntrs, sizeof(int));
    
    for (i=0, used=0; events->known_hw_events[i].name; i++) {
        event = &events->known_hw_events[i];
        if (event->active) {
            used++;
            hw_cntr_id_status[event->hw_cntr_id] = 1;
        }
    }
    
    if (used == events->num_avail_hw_cntrs) {
        goto done;
    }

    for (i=0; events->num_avail_hw_cntrs; i++) {
        if (hw_cntr_id_status[i] == 0) {
            status = i;
            goto done;
        }
    }

done:
	free(hw_cntr_id_status);
	return status;
}

pmc_hw_event_t* enable_pmc_hw_event(pmc_events_t* events, const char* name)
{
    int i;
    pmc_hw_event_t* event = 0;
    int found = 0;

     // check if this a known registered hardw

Download .txt

gitextract_aunglxr9/

├── AUTHORS
├── CMakeLists.txt
├── Doxyfile
├── README-BENCHMARKS-TESTING.md
├── README.md
├── TODO.dox
├── bench/
│   ├── CMakeLists.txt
│   ├── memlat/
│   │   ├── CMakeLists.txt
│   │   └── memlat.c
│   ├── multilat/
│   │   ├── CMakeLists.txt
│   │   └── multilat.c
│   └── new_memlat/
│       ├── CMakeLists.txt
│       ├── memlat.c
│       └── memlat.sh
├── benchmark-tests/
│   ├── bandwidth-model-building.sh
│   ├── memlat-bench-test-10M-single-socket.sh
│   ├── memlat-bench-test-10M.sh
│   ├── memlat-orig-lat-test-single-socket.sh
│   ├── memlat-orig-lat-test.sh
│   ├── nvmemul-bandwidth.ini
│   ├── nvmemul-debug.ini
│   ├── nvmemul-orig.ini
│   └── nvmemul.ini
├── license.txt
├── nvmemul-orig.ini
├── nvmemul.dox
├── nvmemul.ini
├── scripts/
│   ├── install.sh
│   ├── runenv.sh
│   ├── setupdev.sh
│   └── turboboost.sh
├── src/
│   ├── CMakeLists.txt
│   ├── dev/
│   │   ├── CMakeLists.txt
│   │   ├── Makefile
│   │   ├── ioctl_query.h
│   │   └── pmc.c
│   └── lib/
│       ├── CMakeLists.txt
│       ├── config.c
│       ├── config.h
│       ├── cpu/
│       │   ├── CMakeLists.txt
│       │   ├── cpu.c
│       │   ├── cpu.h
│       │   ├── haswell-papi.h
│       │   ├── haswell.h
│       │   ├── ivybridge-papi.h
│       │   ├── ivybridge.h
│       │   ├── known_cpus.h
│       │   ├── pmc-papi.c
│       │   ├── pmc-papi.h
│       │   ├── pmc.c
│       │   ├── pmc.h
│       │   ├── sandybridge-papi.h
│       │   ├── sandybridge.h
│       │   └── xeon-ex.h
│       ├── debug.c
│       ├── debug.h
│       ├── dev.c
│       ├── dev.h
│       ├── errno.h
│       ├── error.h
│       ├── init.c
│       ├── interpose.c
│       ├── interpose.h
│       ├── measure.h
│       ├── measure_bw.c
│       ├── measure_lat.c
│       ├── misc.c
│       ├── misc.h
│       ├── model.h
│       ├── model_bw.c
│       ├── model_lat.c
│       ├── monotonic_timer.c
│       ├── monotonic_timer.h
│       ├── pflush.c
│       ├── pflush.h
│       ├── pmalloc.c
│       ├── pmalloc.h
│       ├── process_rank.c
│       ├── stat.c
│       ├── stat.h
│       ├── thread.c
│       ├── thread.h
│       ├── topology.c
│       └── topology.h
└── test/
    ├── CMakeLists.txt
    ├── test_dev.cc
    ├── test_interpose.cc
    ├── test_multithread.c
    ├── test_mutex.cc
    ├── test_nvm.c
    ├── test_nvm_remote_dram.c
    └── test_thread.cc

Download .txt

SYMBOL INDEX (256 symbols across 45 files)

FILE: bench/memlat/memlat.c
  function safe_strtoull (line 26) | static uint64_t safe_strtoull(const char *s) {
  function main (line 45) | int main(int argc, char *argv[]) {

FILE: bench/multilat/multilat.c
  type arg_s (line 37) | typedef struct {
  type element_t (line 55) | typedef struct {
  type chain_t (line 60) | typedef struct {
  function max_number_of_cpus (line 94) | static int max_number_of_cpus(void)
  function bind_cpu (line 123) | static int bind_cpu(thread_t *thread) {
  function force_ldm_stalls (line 168) | uint64_t force_ldm_stalls(chain_t **C,
  function thread_iter (line 243) | void thread_iter(int dram_refs, int nvm_refs, int interleave_dram, int i...
  function run_threads (line 361) | void run_threads(int n_threads, int dram_refs, int nvm_refs, int interle...
  function main (line 420) | int main(int argn, char **argv)

FILE: bench/new_memlat/memlat.c
  function safe_strtoull (line 28) | static uint64_t safe_strtoull(const char *s) {
  function ns_to_cycles (line 45) | static inline uint64_t ns_to_cycles(int cpu_speed_mhz, int ns)
  function main (line 111) | int main(int argc, char *argv[]) {

FILE: src/dev/ioctl_query.h
  type ioctl_query_setcounter_t (line 21) | typedef struct {
  type ioctl_query_setgetpci_t (line 26) | typedef struct {

FILE: src/dev/pmc.c
  type file (line 34) | struct file
  type file_operations (line 44) | struct file_operations
  function pmc_set_pce_bit (line 56) | void pmc_set_pce_bit(void* arg)
  function pmc_init_module (line 65) | int pmc_init_module(void)
  function pmc_exit_module (line 91) | void pmc_exit_module(void) {
  type counter_s (line 97) | struct counter_s {
  function __pmc_clear (line 109) | static void __pmc_clear(int counter_id) {
  function pmc_clear (line 122) | static void pmc_clear(void* arg) {
  function pmc_clear_all_cpu (line 127) | void pmc_clear_all_cpu(int counter_id)
  function __set_counter (line 139) | static void __set_counter(int counter_id, unsigned long val)
  function set_counter (line 155) | void set_counter(void* arg)
  function set_counter_all_cpu (line 162) | void set_counter_all_cpu(int counter_id, unsigned long arg)
  function pmc_ioctl_setcounter (line 170) | static long pmc_ioctl_setcounter(struct file* f, unsigned int cmd, unsig...
  function pmc_ioctl_setpci (line 191) | static long pmc_ioctl_setpci(struct file* f, unsigned int cmd, unsigned ...
  function pmc_ioctl_getpci (line 211) | static long pmc_ioctl_getpci(struct file* f, unsigned int cmd, unsigned ...
  function pmc_ioctl (line 236) | static long pmc_ioctl(struct file *f, unsigned int cmd, unsigned long arg)

FILE: src/lib/config.c
  function env_setting_lookup (line 38) | static inline int
  function env_setting_lookup_int (line 68) | static inline int
  function env_setting_lookup_bool (line 86) | static inline int
  function env_setting_lookup_string (line 93) | static inline int
  function __cconfig_lookup_bool (line 100) | int
  function __cconfig_lookup_valid_bool (line 122) | int
  function __cconfig_lookup_int (line 132) | int
  function __cconfig_lookup_valid_int (line 155) | int
  function __cconfig_lookup_string (line 202) | int
  function __cconfig_lookup_valid_string (line 224) | int
  function __cconfig_init (line 259) | int

FILE: src/lib/cpu/cpu.c
  function cpuid (line 42) | void cpuid(unsigned int info, unsigned int *eax, unsigned int *ebx, unsi...
  function get_family_model (line 50) | void get_family_model(int *family, int *model)
  function cpu_speed_mhz (line 100) | int cpu_speed_mhz()
  function cpu_llc_size_bytes (line 111) | size_t cpu_llc_size_bytes()
  function match (line 126) | int match(const char *to_match, const char *regex_text)
  function is_Xeon (line 146) | int is_Xeon()
  function is_Intel (line 166) | int is_Intel()
  function cpu_model_t (line 186) | cpu_model_t *cpu_model()

FILE: src/lib/cpu/cpu.h
  type pmc_set_s (line 26) | struct pmc_set_s
  type throttle_type_t (line 28) | typedef enum {
  type microarch_t (line 35) | typedef enum {
  type microarch_ID_t (line 45) | typedef struct
  type cpu_model_t (line 56) | typedef struct cpu_model_s {

FILE: src/lib/cpu/haswell-papi.h
  function haswell_read_stall_events_local (line 36) | uint64_t haswell_read_stall_events_local() {
  function haswell_read_stall_events_remote (line 61) | uint64_t haswell_read_stall_events_remote() {

FILE: src/lib/cpu/ivybridge-papi.h
  function ivybridge_read_stall_events_local (line 36) | uint64_t ivybridge_read_stall_events_local() {
  function ivybridge_read_stall_events_remote (line 61) | uint64_t ivybridge_read_stall_events_remote() {

FILE: src/lib/cpu/pmc-papi.c
  function log_papi_critical (line 24) | static void log_papi_critical(int ret_val, const char *msg) {
  function pmc_init (line 30) | int pmc_init() {
  function pmc_shutdown (line 51) | void pmc_shutdown() {
  function pmc_create_event_set_local_thread (line 55) | int pmc_create_event_set_local_thread() {
  function pmc_destroy_event_set_local_thread (line 71) | void pmc_destroy_event_set_local_thread() {
  function pmc_register_thread (line 76) | int pmc_register_thread() {
  function pmc_unregister_thread (line 80) | int pmc_unregister_thread() {
  function pmc_register_event_local_thread (line 84) | int pmc_register_event_local_thread(const char *event_name) {
  function pmc_events_start_local_thread (line 103) | int pmc_events_start_local_thread() {
  function pmc_events_stop_local_thread (line 116) | void pmc_events_stop_local_thread() {
  function pmc_events_read_local_thread (line 124) | int pmc_events_read_local_thread(long long *values) {

FILE: src/lib/cpu/pmc-papi.h
  type pmc_event_t (line 26) | typedef struct {

FILE: src/lib/cpu/pmc.c
  function rdpmc (line 28) | long long rdpmc(int counter)
  function rdpmc32 (line 48) | int rdpmc32(int counter) {
  function get_avail_hw_cntr_id (line 78) | int get_avail_hw_cntr_id(pmc_events_t* events)
  function pmc_hw_event_t (line 111) | pmc_hw_event_t* enable_pmc_hw_event(pmc_events_t* events, const char* name)
  function disable_pmc_hw_event (line 159) | void disable_pmc_hw_event(pmc_events_t* events, const char* name)
  function clear_pmc_hw_event (line 185) | void clear_pmc_hw_event(pmc_hw_event_t* event)
  function read_pmc_hw_event_cur (line 190) | uint64_t read_pmc_hw_event_cur(pmc_hw_event_t* event)
  function read_pmc_hw_event_diff (line 195) | uint64_t read_pmc_hw_event_diff(pmc_hw_event_t* event)
  function pmc_event_t (line 210) | pmc_event_t* enable_pmc_event(cpu_model_t* cpu, const char* name)
  function assign_pmc_hw_event_to_event (line 243) | int assign_pmc_hw_event_to_event(pmc_events_t* events, const char* name,...
  function release_all_pmc_hw_events_of_event (line 261) | void release_all_pmc_hw_events_of_event(pmc_event_t* event)
  function disable_pmc_event (line 274) | void disable_pmc_event(cpu_model_t* cpu, const char* name)

FILE: src/lib/cpu/pmc.h
  type pmc_hw_event_t (line 54) | typedef struct {
  type pmc_event_t (line 63) | typedef struct pmc_event_s {
  type pmc_events_t (line 73) | typedef struct pmc_events_s {
  function clear_pmc_event (line 90) | static inline void clear_pmc_event(pmc_event_t* event)
  function read_pmc_event (line 97) | static inline uint64_t read_pmc_event(pmc_event_t* event)

FILE: src/lib/cpu/sandybridge-papi.h
  function sandybridge_latency_calibration_local (line 38) | void sandybridge_latency_calibration_local(int *hw_latency, int target_l...
  function sandybridge_latency_calibration_remote (line 43) | void sandybridge_latency_calibration_remote(int *hw_latency, int target_...
  function sandybridge_read_stall_events_local (line 48) | uint64_t sandybridge_read_stall_events_local() {

FILE: src/lib/cpu/xeon-ex.h
  function intel_xeon_ex_set_throttle_register (line 26) | int intel_xeon_ex_set_throttle_register(pci_regs_t *regs, throttle_type_...
  function intel_xeon_ex_get_throttle_register (line 58) | int intel_xeon_ex_get_throttle_register(pci_regs_t *regs, throttle_type_...

FILE: src/lib/debug.c
  function strrep (line 28) | static int
  function dbg_set_level (line 45) | void
  function dbg_init (line 52) | int
  function dbg_backtrace (line 95) | void

FILE: src/lib/debug.h
  type dbg_code (line 39) | enum dbg_code {

FILE: src/lib/dev.c
  function set_counter (line 28) | int set_counter(unsigned int counter_id, unsigned int event_id)
  function set_pci (line 50) | int set_pci(unsigned int bus_id, unsigned int device_id, unsigned int fu...
  function get_pci (line 74) | int get_pci(unsigned int bus_id, unsigned int device_id, unsigned int fu...

FILE: src/lib/dev.h
  type pci_addr (line 22) | typedef struct {
  type pci_regs_t (line 28) | typedef struct {

FILE: src/lib/init.c
  function finalize (line 36) | void finalize() {
  function init (line 65) | void init()

FILE: src/lib/interpose.c
  function init_interposition (line 46) | int init_interposition()
  type pthread_create_functor_t (line 72) | typedef struct {
  function pthread_create (line 96) | int pthread_create(pthread_t *thread, const pthread_attr_t *attr,
  function pthread_mutex_lock (line 123) | int pthread_mutex_lock(pthread_mutex_t *mutex)
  function pthread_mutex_trylock (line 145) | int pthread_mutex_trylock(pthread_mutex_t *mutex)
  function pthread_mutex_unlock (line 165) | int pthread_mutex_unlock(pthread_mutex_t *mutex)

FILE: src/lib/measure_bw.c
  function to_bw (line 58) | static inline double to_bw(size_t bytes, double secs) {
  function timeitp (line 88) | int timeitp(void (*function)(void*, size_t), int nthreads, void* array, ...
  function timeit (line 148) | int timeit(void (*function)(void*, size_t), void* array, size_t size, in...
  function write_memory_nontemporal_sse (line 176) | void write_memory_nontemporal_sse(void* array, size_t size) {
  function write_memory_sse (line 187) | void write_memory_sse(void* array, size_t size) {
  function read_memory_sse (line 198) | void read_memory_sse(void* array, size_t size) {
  function measure_read_bw (line 216) | double measure_read_bw(int cpu_node, int mem_node)
  function measure_write_bw (line 233) | double measure_write_bw(int cpu_node, int mem_node)
  function measure_read_bw (line 290) | double measure_read_bw(int cpu_node, int mem_node)

FILE: src/lib/measure_lat.c
  type element_t (line 48) | typedef struct {
  type chain_t (line 53) | typedef struct {
  function min (line 59) | inline uint64_t min(uint64_t a, uint64_t b)
  function prng (line 67) | static uint64_t prng(uint64_t* seed) {
  type timeval (line 77) | struct timeval
  function element_t (line 89) | element_t* element(chain_t* chain, uint64_t index)
  function read_element (line 95) | void inline read_element(chain_t* chain, uint64_t index, char* buf, uint...
  function trash_cache (line 179) | uint64_t trash_cache(uint64_t N)
  function __measure_latency (line 209) | int __measure_latency(uint64_t seedin, int nchains, size_t nelems, int e...
  function measure_latency (line 278) | int measure_latency(cpu_model_t* cpu, int from_node_id, int to_node_id)
  function measure_latency2 (line 288) | int measure_latency2(uint64_t seedin, int nchains, size_t nelems, int el...
  function calibrate_load_from_file (line 311) | static int calibrate_load_from_file(virtual_node_t *virtual_node) {
  function calibrate_save_to_file (line 346) | static void calibrate_save_to_file(virtual_node_t *virtual_node, double ...
  function diff_target_latencies (line 366) | static int diff_target_latencies(int measured_latency, int target_latenc...
  function calibrate (line 371) | static double calibrate(virtual_node_t *virtual_node, double step_value,...
  function calibrate_with_size (line 426) | static double calibrate_with_size(virtual_node_t *virtual_node, double c...
  function latency_calibration (line 444) | void latency_calibration(virtual_node_t *virtual_node) {

FILE: src/lib/misc.c
  function string_to_size (line 21) | size_t string_to_size(char* str)

FILE: src/lib/model.h
  type latency_model_t (line 29) | typedef struct {
  type bw_model_t (line 52) | typedef struct {
  type virtual_topology_s (line 62) | struct virtual_topology_s
  type virtual_topology_s (line 63) | struct virtual_topology_s

FILE: src/lib/model_bw.c
  function train_model (line 51) | static int train_model(physical_node_t* phys_node, char model_type, bw_m...
  function load_model (line 105) | static int load_model(const char* path, const char* prefix, bw_model_t* ...
  function save_model (line 142) | static int save_model(const char* path, const char* prefix, bw_model_t* ...
  function find_data_point (line 163) | static int find_data_point(bw_model_t* model, double target_bw, unsigned...
  function __set_write_bw (line 180) | int __set_write_bw(physical_node_t* node, uint64_t target_bw)
  function set_write_bw (line 204) | int set_write_bw(config_t* cfg, physical_node_t* node)
  function __set_read_bw (line 212) | int __set_read_bw(physical_node_t* node, uint64_t target_bw)
  function set_read_bw (line 236) | int set_read_bw(config_t* cfg, physical_node_t* node)
  function init_bandwidth_model (line 244) | int init_bandwidth_model(config_t* cfg, virtual_topology_t* topology)

FILE: src/lib/model_lat.c
  function hrtime_t (line 43) | inline hrtime_t hrtime_cycles(void)
  function hrtime_t (line 58) | inline hrtime_t cycles_to_us(int cpu_speed_mhz, hrtime_t cycles)
  function create_delay_cycles (line 65) | static inline void create_delay_cycles(hrtime_t cycles)
  function check_target_latency_against_hw_latency (line 85) | static int check_target_latency_against_hw_latency(virtual_topology_t* v...
  function init_latency_model (line 109) | int init_latency_model(config_t* cfg, cpu_model_t* cpu, virtual_topology...
  function init_thread_latency_model (line 173) | void init_thread_latency_model(thread_t *thread)
  function create_latency_epoch (line 179) | void create_latency_epoch()

FILE: src/lib/monotonic_timer.c
  function monotonic_time (line 13) | double monotonic_time() {
  function monotonic_time_us (line 20) | double monotonic_time_us() {
  function rdtsc (line 46) | static inline uint64_t rdtsc() {
  function init_rdtsc_per_sec (line 58) | static void __attribute__((constructor)) init_rdtsc_per_sec() {
  function monotonic_time (line 74) | double monotonic_time() {
  function monotonic_time_us (line 79) | double monotonic_time_us() {

FILE: src/lib/pflush.c
  type hrtime_t (line 18) | typedef uint64_t hrtime_t;
  function asm_rdtsc (line 22) | static inline unsigned long long asm_rdtsc(void)
  function asm_rdtscp (line 29) | static inline unsigned long long asm_rdtscp(void)
  function asm_rdtsc (line 38) | static inline unsigned long long asm_rdtsc(void)
  function asm_rdtscp (line 45) | static inline unsigned long long asm_rdtscp(void)
  function init_pflush (line 71) | void init_pflush(int cpu_speed_mhz, int write_latency_ns)
  function hrtime_t (line 77) | inline hrtime_t cycles_to_ns(int cpu_speed_mhz, hrtime_t cycles)
  function hrtime_t (line 82) | inline hrtime_t ns_to_cycles(int cpu_speed_mhz, hrtime_t ns)
  function emulate_latency_ns (line 87) | static inline
  function pflush (line 109) | void

FILE: src/lib/pmalloc.c
  function pfree (line 53) | void pfree(void* start, size_t size)

FILE: src/lib/process_rank.c
  function set_process_local_rank (line 35) | int set_process_local_rank()
  function unset_process_local_rank (line 127) | int unset_process_local_rank()

FILE: src/lib/stat.c
  function stats_set_init_time (line 29) | void stats_set_init_time(double init_time_us) {
  function stats_enable (line 37) | void stats_enable(config_t *cfg) {
  function hrtime_t (line 59) | static inline hrtime_t ns_to_cycles(int cpu_speed_mhz, int ns)
  function show_thread_stats (line 67) | static void show_thread_stats(thread_t *thread, FILE *out_file) {
  function stats_report (line 107) | void stats_report() {
  function sum (line 166) | double sum(double array[], int n)
  function sumxy (line 178) | double sumxy(double x[], double y[], int n)
  function avg (line 190) | double avg(double array[], int n)
  function slope (line 198) | double slope(double x[], double y[], int n)

FILE: src/lib/stat.h
  type thread_s (line 22) | struct thread_s
  type stats_t (line 24) | typedef struct {
  type thread_stats_t (line 32) | typedef struct {

FILE: src/lib/thread.c
  function rr_next_cpu_id (line 34) | static void rr_next_cpu_id(thread_manager_t* thread_manager, int* next_v...
  function rr_set_next_cpu_based_on_rank (line 57) | void rr_set_next_cpu_based_on_rank(int rank, int max_rank)
  function partition_cpus_based_on_rank (line 74) | void partition_cpus_based_on_rank(int rank, int max_rank, int num_cpus,
  function bind_thread_on_cpu (line 110) | int bind_thread_on_cpu(thread_manager_t* thread_manager, thread_t* threa...
  function bind_thread_on_mem (line 125) | int bind_thread_on_mem(thread_manager_t* thread_manager, thread_t* threa...
  function thread_t (line 137) | thread_t* thread_self()
  function thread_interrupt_handler (line 142) | void thread_interrupt_handler(int signum)
  function setup_events_thread_self (line 150) | static int setup_events_thread_self(thread_t *thread, const char **nativ...
  function register_thread (line 179) | int register_thread(thread_manager_t* thread_manager, pthread_t pthread,...
  function unregister_thread (line 261) | int unregister_thread(thread_manager_t* thread_manager, thread_t * thread)
  function register_self (line 290) | int register_self()
  function unregister_self (line 303) | int unregister_self()
  function interrupt_threads (line 323) | void interrupt_threads(thread_manager_t* manager)
  type timespec (line 352) | struct timespec
  function set_epoch_duration (line 365) | static void set_epoch_duration(config_t* cfg, const char *config_str, in...
  function init_thread_manager (line 381) | int init_thread_manager(config_t* cfg, virtual_topology_t* virtual_topol...
  function reached_min_epoch_duration (line 427) | int reached_min_epoch_duration(thread_t* thread) {
  function reached_max_epoch_duration (line 466) | static int reached_max_epoch_duration(thread_t* thread) {
  function block_new_epoch (line 492) | void block_new_epoch() {
  function unblock_new_epoch (line 499) | void unblock_new_epoch() {
  function thread_manager_t (line 506) | thread_manager_t* get_thread_manager() {

FILE: src/lib/thread.h
  type thread_manager_s (line 27) | struct thread_manager_s
  type hrtime_t (line 29) | typedef uint64_t hrtime_t;
  type thread_t (line 34) | typedef struct thread_s {
  type thread_manager_t (line 53) | typedef struct thread_manager_s {
  type virtual_topology_s (line 66) | struct virtual_topology_s

FILE: src/lib/topology.c
  function select_cpus_based_on_local_rank (line 41) | int select_cpus_based_on_local_rank(virtual_topology_t* virtual_topology)
  function get_mc_pci_bus_list (line 84) | int get_mc_pci_bus_list(pci_regs_t *bus_id_list[], int max_list_size, in...
  function discover_mc_pci_topology (line 137) | int discover_mc_pci_topology(cpu_model_t* cpu_model, physical_node_t* ph...
  function load_mc_pci_topology (line 196) | static int load_mc_pci_topology(const char* path, physical_node_t* physi...
  function save_mc_pci_topology (line 251) | static int save_mc_pci_topology(const char* path, physical_node_t* physi...
  function num_cpus (line 274) | int num_cpus(struct bitmask* bitmask)
  function system_num_cpus (line 288) | int system_num_cpus()
  function print_bitmask (line 293) | void print_bitmask(struct bitmask* bitmask) {
  function next_cpu (line 303) | int next_cpu(struct bitmask* bitmask, int cpu_id)
  function first_cpu (line 316) | int first_cpu(struct bitmask* bitmask)
  function partition_cpus (line 321) | int partition_cpus(virtual_topology_t* virtual_topology)
  function init_virtual_topology (line 339) | int init_virtual_topology(config_t* cfg, cpu_model_t* cpu_model, virtual...

FILE: src/lib/topology.h
  type physical_node_t (line 47) | typedef struct {
  type virtual_node_t (line 61) | typedef struct virtual_node_s {
  type virtual_topology_t (line 68) | typedef struct virtual_topology_s {
  type bitmask (line 75) | struct bitmask
  type bitmask (line 76) | struct bitmask

FILE: test/test_dev.cc
  function main (line 20) | int main(int argc, char** argv)

FILE: test/test_interpose.cc
  function register_thread (line 32) | int register_thread(pthread_t thread)
  function interpose_pthread_create (line 47) | void interpose_pthread_create()
  function interpose_pthread_mutex_lock (line 57) | void interpose_pthread_mutex_lock(pthread_mutex_t* lock)
  function interpose_pthread_mutex_unlock (line 62) | void interpose_pthread_mutex_unlock(pthread_mutex_t* lock)
  function TEST (line 67) | TEST(Interpose, pthread_create)
  function TEST (line 74) | TEST(Interpose, pthread_mutex_lock)
  function main (line 80) | int main(int argc, char** argv)

FILE: test/test_multithread.c
  type arg_s (line 34) | typedef struct {
  type element_t (line 48) | typedef struct {
  type chain_t (line 53) | typedef struct {
  function max_number_of_cpus (line 76) | static int max_number_of_cpus(void)
  function bind_cpu (line 105) | static int bind_cpu(thread_t *thread) {
  function force_ldm_stalls (line 150) | uint64_t force_ldm_stalls(chain_t **C,
  function iter (line 219) | void iter(int cs_n, int cs_duration, int out_cs_duration, int from_node,...
  function manage_threads (line 290) | void manage_threads(int n_threads, int cs_n, int cs_duration, int out_cs...
  function main (line 336) | int main(int argn, char **argv)

FILE: test/test_mutex.cc
  function main (line 37) | int main(int argc, char** argv)

FILE: test/test_nvm.c
  function iter (line 23) | void iter()
  function main (line 49) | int main()

FILE: test/test_nvm_remote_dram.c
  function iter (line 24) | void iter()
  function main (line 56) | int main()

FILE: test/test_thread.cc
  function main (line 39) | int main(int argc, char** argv)

Download .json

Condensed preview — 92 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (404K chars).

[
  {
    "path": "AUTHORS",
    "chars": 146,
    "preview": "Haris Volos           (haris.volos@hpe.com)\nGuilherme Magalhaes   (guilherme.magalhaes@hpe.com)\nLucy Cherkasova       (l"
  },
  {
    "path": "CMakeLists.txt",
    "chars": 155,
    "preview": "cmake_minimum_required(VERSION 2.8)\n\n#add_subdirectory(third_party)\nadd_subdirectory(src)\nadd_subdirectory(bench)\nenable"
  },
  {
    "path": "Doxyfile",
    "chars": 51851,
    "preview": "# Doxyfile 1.4.7\n\n# This file describes the settings to be used by the documentation system\n# doxygen (www.doxygen.org) "
  },
  {
    "path": "README-BENCHMARKS-TESTING.md",
    "chars": 4605,
    "preview": "**For testing whether your environment is configured correctly for\nrunning Quartz** (e.g., whether you set all the requi"
  },
  {
    "path": "README.md",
    "chars": 21274,
    "preview": "\nQuartz: A DRAM-based performance emulator for NVM\n----------------------\n\nQuartz leverages features available in commod"
  },
  {
    "path": "TODO.dox",
    "chars": 1456,
    "preview": "/**\n\\file\n\n\\todo Improve performance counter API by making it more generic. For example, autogenerate pmc event_id using"
  },
  {
    "path": "bench/CMakeLists.txt",
    "chars": 81,
    "preview": "add_subdirectory(memlat)\nadd_subdirectory(new_memlat)\nadd_subdirectory(multilat)\n"
  },
  {
    "path": "bench/memlat/CMakeLists.txt",
    "chars": 127,
    "preview": "include_directories(${CMAKE_SOURCE_DIR}/src/lib)\nadd_executable(memlat memlat.c)\ntarget_link_libraries(memlat nvmemul pt"
  },
  {
    "path": "bench/memlat/memlat.c",
    "chars": 2469,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "bench/multilat/CMakeLists.txt",
    "chars": 134,
    "preview": "include_directories(${CMAKE_SOURCE_DIR}/src/lib)\n\nadd_executable(multilat multilat.c)\ntarget_link_libraries(multilat nvm"
  },
  {
    "path": "bench/multilat/multilat.c",
    "chars": 13413,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "bench/new_memlat/CMakeLists.txt",
    "chars": 135,
    "preview": "include_directories(${CMAKE_SOURCE_DIR}/src/lib)\nadd_executable(new_memlat memlat.c)\ntarget_link_libraries(new_memlat nv"
  },
  {
    "path": "bench/new_memlat/memlat.c",
    "chars": 5679,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "bench/new_memlat/memlat.sh",
    "chars": 2904,
    "preview": "#################################################################\n#Copyright 2016 Hewlett Packard Enterprise Development"
  },
  {
    "path": "benchmark-tests/bandwidth-model-building.sh",
    "chars": 1144,
    "preview": "#################################################################\n#Copyright 2016 Hewlett Packard Enterprise Development"
  },
  {
    "path": "benchmark-tests/memlat-bench-test-10M-single-socket.sh",
    "chars": 3415,
    "preview": "#################################################################\n#Copyright 2016 Hewlett Packard Enterprise Development"
  },
  {
    "path": "benchmark-tests/memlat-bench-test-10M.sh",
    "chars": 3863,
    "preview": "#################################################################\n#Copyright 2016 Hewlett Packard Enterprise Development"
  },
  {
    "path": "benchmark-tests/memlat-orig-lat-test-single-socket.sh",
    "chars": 1967,
    "preview": "#################################################################\n#Copyright 2016 Hewlett Packard Enterprise Development"
  },
  {
    "path": "benchmark-tests/memlat-orig-lat-test.sh",
    "chars": 2585,
    "preview": "#################################################################\n#Copyright 2016 Hewlett Packard Enterprise Development"
  },
  {
    "path": "benchmark-tests/nvmemul-bandwidth.ini",
    "chars": 700,
    "preview": "# Configuration file \n\nlatency:\n{\n    enable = true;\n    inject_delay = true;\n    read = 1000;\n    write = 1000;\n    max"
  },
  {
    "path": "benchmark-tests/nvmemul-debug.ini",
    "chars": 690,
    "preview": "# Configuration file \n\nlatency:\n{\n    enable = true;\n    inject_delay = true;\nread = 1000 ;\n    write = 1000;\nmax_epoch_"
  },
  {
    "path": "benchmark-tests/nvmemul-orig.ini",
    "chars": 690,
    "preview": "# Configuration file \n\nlatency:\n{\n    enable = true;\n    inject_delay = true;\nread = 1000 ;\n    write = 1000;\nmax_epoch_"
  },
  {
    "path": "benchmark-tests/nvmemul.ini",
    "chars": 688,
    "preview": "# Configuration file \n\nlatency:\n{\n    enable = true;\n    inject_delay = true;\nread = 300 ;\n    write = 200;\nmax_epoch_du"
  },
  {
    "path": "license.txt",
    "chars": 894,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "nvmemul-orig.ini",
    "chars": 690,
    "preview": "# Configuration file \n\nlatency:\n{\n    enable = true;\n    inject_delay = true;\nread = 1000 ;\n    write = 1000;\nmax_epoch_"
  },
  {
    "path": "nvmemul.dox",
    "chars": 363,
    "preview": "/**\n\n@mainpage Quartz:  A Lightweight  Performance Emulator for  Persistent Memory Software.\n\n\n\\section section-intro In"
  },
  {
    "path": "nvmemul.ini",
    "chars": 688,
    "preview": "# Configuration file \n\nlatency:\n{\n    enable = true;\n    inject_delay = true;\nread = 1000 ;\n    write = 1000;\nmax_epoch_"
  },
  {
    "path": "scripts/install.sh",
    "chars": 3398,
    "preview": "#!/bin/bash\n#################################################################\n#Copyright 2016 Hewlett Packard Enterprise"
  },
  {
    "path": "scripts/runenv.sh",
    "chars": 1916,
    "preview": "#!/bin/bash\n#################################################################\n#Copyright 2016 Hewlett Packard Enterprise"
  },
  {
    "path": "scripts/setupdev.sh",
    "chars": 2880,
    "preview": "#!/bin/bash\n#################################################################\n#Copyright 2016 Hewlett Packard Enterprise"
  },
  {
    "path": "scripts/turboboost.sh",
    "chars": 3506,
    "preview": "#!/bin/bash\n#################################################################\n#Copyright 2016 Hewlett Packard Enterprise"
  },
  {
    "path": "src/CMakeLists.txt",
    "chars": 44,
    "preview": "add_subdirectory(lib)\nadd_subdirectory(dev)\n"
  },
  {
    "path": "src/dev/CMakeLists.txt",
    "chars": 884,
    "preview": "# Build NVM Emulation device driver (using Kbuild Makefile)\n\nset(DEV_DIR \"${CMAKE_CURRENT_SOURCE_DIR}\")\nset(DEV_BIN_DIR "
  },
  {
    "path": "src/dev/Makefile",
    "chars": 302,
    "preview": "# build modules\nobj-m = nvmemul.o\nnvmemul-objs = pmc.o\n\n# use the kernel build system\nKERNEL_VERSION := `uname -r`\nKERNE"
  },
  {
    "path": "src/dev/ioctl_query.h",
    "chars": 1529,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/dev/pmc.c",
    "chars": 7977,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/CMakeLists.txt",
    "chars": 1170,
    "preview": "project(nvmemul)\n\noption(STATISTICS \"Enable statistics report\" ON)\n\nif(STATISTICS)\n  message(STATUS \"WITH STATISTICS\")\n "
  },
  {
    "path": "src/lib/config.c",
    "chars": 6081,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/config.h",
    "chars": 2507,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/CMakeLists.txt",
    "chars": 86,
    "preview": "set(nvmemul_cpu_src\n    cpu.c\n    pmc.c\n)\n\nadd_library(cpu OBJECT ${nvmemul_cpu_src})\n"
  },
  {
    "path": "src/lib/cpu/cpu.c",
    "chars": 6072,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/cpu.h",
    "chars": 2193,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/haswell-papi.h",
    "chars": 3733,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/haswell.h",
    "chars": 5541,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/ivybridge-papi.h",
    "chars": 3748,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/ivybridge.h",
    "chars": 5571,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/known_cpus.h",
    "chars": 2358,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/pmc-papi.c",
    "chars": 4114,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/pmc-papi.h",
    "chars": 1701,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/pmc.c",
    "chars": 7980,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/pmc.h",
    "chars": 4474,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/sandybridge-papi.h",
    "chars": 3274,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/sandybridge.h",
    "chars": 3495,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/cpu/xeon-ex.h",
    "chars": 3774,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/debug.c",
    "chars": 3169,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/debug.h",
    "chars": 6656,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/dev.c",
    "chars": 2896,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/dev.h",
    "chars": 1571,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/errno.h",
    "chars": 2584,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/error.h",
    "chars": 991,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/init.c",
    "chars": 5176,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/interpose.c",
    "chars": 6086,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/interpose.h",
    "chars": 1677,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/measure.h",
    "chars": 2154,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/measure_bw.c",
    "chars": 8874,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/measure_lat.c",
    "chars": 15205,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/misc.c",
    "chars": 1600,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/misc.h",
    "chars": 969,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/model.h",
    "chars": 2144,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/model_bw.c",
    "chars": 10324,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/model_lat.c",
    "chars": 9918,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/monotonic_timer.c",
    "chars": 2465,
    "preview": "// Copyright 2013 Alex Reece.\n//\n// A cross platform monotonic timer.\n\n#include <unistd.h>\n#include \"monotonic_timer.h\"\n"
  },
  {
    "path": "src/lib/monotonic_timer.h",
    "chars": 437,
    "preview": "// Copyright 2013 Alex Reece.\n//\n// A cross platform monotonic timer.\n\n#ifndef MONOTONIC_TIMER_H_\n#define MONOTONIC_TIME"
  },
  {
    "path": "src/lib/pflush.c",
    "chars": 3775,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/pflush.h",
    "chars": 1324,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/pmalloc.c",
    "chars": 2070,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/pmalloc.h",
    "chars": 1320,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/process_rank.c",
    "chars": 5933,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/stat.c",
    "chars": 7498,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/stat.h",
    "chars": 1924,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/thread.c",
    "chars": 16798,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/thread.h",
    "chars": 2504,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/topology.c",
    "chars": 18299,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "src/lib/topology.h",
    "chars": 3168,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "test/CMakeLists.txt",
    "chars": 1342,
    "preview": "include_directories(${CMAKE_SOURCE_DIR}/third_party/gtest-1.7.0/include)\ninclude_directories(${CMAKE_SOURCE_DIR}/src/lib"
  },
  {
    "path": "test/test_dev.cc",
    "chars": 1266,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "test/test_interpose.cc",
    "chars": 2503,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "test/test_multithread.c",
    "chars": 10342,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "test/test_mutex.cc",
    "chars": 1628,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "test/test_nvm.c",
    "chars": 1441,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "test/test_nvm_remote_dram.c",
    "chars": 1676,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  },
  {
    "path": "test/test_thread.cc",
    "chars": 1580,
    "preview": "/***************************************************************************\nCopyright 2016 Hewlett Packard Enterprise D"
  }
]

About this extraction

This page contains the full source code of the HewlettPackard/quartz GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 92 files (375.4 KB), approximately 97.4k tokens, and a symbol index with 256 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Extract another repo