Repository: facebookarchive/linkbench Branch: master Commit: ac67d54bf291 Files: 89 Total size: 521.8 KB Directory structure: gitextract_nozznvpx/ ├── .arcconfig ├── .gitignore ├── DataModel.md ├── LICENSE ├── NOTICES ├── README.md ├── bin/ │ ├── genswift │ ├── linkbench │ └── swift-generator-cli-0.11.0-standalone.jar ├── config/ │ ├── FBWorkload.properties │ ├── LinkConfigMysql.properties │ └── LinkConfigRocksDb.properties ├── pom.xml └── src/ ├── main/ │ └── java/ │ └── com/ │ └── facebook/ │ ├── LinkBench/ │ │ ├── Config.java │ │ ├── ConfigUtil.java │ │ ├── GraphStore.java │ │ ├── InvertibleShuffler.java │ │ ├── Link.java │ │ ├── LinkBenchConfigError.java │ │ ├── LinkBenchDriver.java │ │ ├── LinkBenchDriverMR.java │ │ ├── LinkBenchLoad.java │ │ ├── LinkBenchOp.java │ │ ├── LinkBenchRequest.java │ │ ├── LinkBenchTask.java │ │ ├── LinkCount.java │ │ ├── LinkStore.java │ │ ├── LinkStoreHBaseGeneralAtomicityTesting.java │ │ ├── LinkStoreMysql.java │ │ ├── LinkStoreRocksDb.java │ │ ├── MemoryLinkStore.java │ │ ├── Node.java │ │ ├── NodeLoader.java │ │ ├── NodeStore.java │ │ ├── Phase.java │ │ ├── RealDistribution.java │ │ ├── Shuffler.java │ │ ├── Timer.java │ │ ├── distributions/ │ │ │ ├── AccessDistributions.java │ │ │ ├── ApproxHarmonic.java │ │ │ ├── GeometricDistribution.java │ │ │ ├── Harmonic.java │ │ │ ├── ID2Chooser.java │ │ │ ├── LinkDistributions.java │ │ │ ├── LogNormalDistribution.java │ │ │ ├── PiecewiseLinearDistribution.java │ │ │ ├── ProbabilityDistribution.java │ │ │ ├── UniformDistribution.java │ │ │ └── ZipfDistribution.java │ │ ├── generators/ │ │ │ ├── DataGenerator.java │ │ │ ├── MotifDataGenerator.java │ │ │ └── UniformDataGenerator.java │ │ ├── stats/ │ │ │ ├── LatencyStats.java │ │ │ ├── RunningMean.java │ │ │ └── SampledStats.java │ │ └── util/ │ │ └── ClassLoadUtil.java │ └── rocks/ │ └── swift/ │ ├── rocks.thrift │ └── rocks_common.thrift └── test/ └── java/ └── com/ └── facebook/ └── LinkBench/ ├── DistributionTestBase.java ├── DummyLinkStore.java ├── DummyLinkStoreTest.java ├── GeneratedDataDump.java ├── GeomDistTest.java ├── GraphStoreTestBase.java ├── HarmonicTest.java ├── ID2ChooserTest.java ├── InvertibleShufflerTest.java ├── LinkStoreTestBase.java ├── LogNormalTest.java ├── MemoryGraphStoreTest.java ├── MemoryLinkStoreTest.java ├── MemoryNodeStoreTest.java ├── MySqlGraphStoreTest.java ├── MySqlLinkStoreTest.java ├── MySqlNodeStoreTest.java ├── MySqlTestConfig.java ├── NodeStoreTestBase.java ├── PiecewiseDistTest.java ├── TestAccessDistribution.java ├── TestDataGen.java ├── TestRealDistribution.java ├── TestStats.java ├── TimerTest.java ├── UniformDistTest.java ├── ZipfDistTest.java └── testtypes/ ├── MySqlTest.java ├── ProviderTest.java ├── RocksDbTest.java └── SlowTest.java ================================================ FILE CONTENTS ================================================ ================================================ FILE: .arcconfig ================================================ { "project_id" : "linkbench", "conduit_uri" : "https://reviews.facebook.net/", "copyright_holder" : "", "lint.engine" : "ArcanistSingleLintEngine", "lint.engine.single.linter" : "ArcanistTextLinter" } ================================================ FILE: .gitignore ================================================ target out .project .classpath .settings *.iws *.ipr *.iml .idea .DS_Store ================================================ FILE: DataModel.md ================================================ LinkBench Data and Queries ========================== Facebook has various types of object nodes, such as users, pages etc and various types of associations between those objects. The collection of objects and associations can be viewed as a social graph. A goal of Facebook's database infrastructure is to store this graph in a way to achieve good performance and high efficiency. LinkBench is an attempt to simulate the workload of Facebook's social graph database. This is done in two ways: * Using database schema and operations that are similar to Facebook schema and operations. * Generating data and queries which are broadly similar to the real production data and applications. This document describes the *node* and *link* concepts and outlines the concrete data representation and various SQL statements used for operations on links. Node ---- A node represents and object with arbitrary associated data. class Node { public long id; // Unique identifier for node public int type; // Type of node public long version; // Version, incremented each change public int time; // Last modification time public byte data[]; // Arbitrary payload data } Link ---- We use the term link to denote an association of some type between two objects. The term id1 is used to denote source object of an association, and the term id2 is used to denote destination object of an association. Here is a simple conceptual representation of a link object coming from an application. The key members of a link are type, id1, id2, time (while this could be the time of link creation, it could also be any other attribute on which we might want to sort all id2s assocated with an id1 through this type), version, visibility (could be VISIBILITY_DEFAULT ie visible, or VISIBILITY_HIDDEN i.e hidden) and some link data (uninterpreted stream of bytes). class Link { public long id1; // id of source node public long link_type; // type of link public long id2; // id of destination node public byte visibility; // is link visible? public byte[] data; // arbitrary data (must be short) public int version; // version of link public long time; // client-defined sort key (often timestamp) } SQL schema ---------- Nodes are stored in a straightforward way. In our production databases different types can be stored in different tables, but for LinkBench we use one big table: CREATE TABLE `nodetable` ( `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, `type` int(10) unsigned NOT NULL, `version` bigint(20) unsigned NOT NULL, `time` int(10) unsigned NOT NULL, `data` mediumtext NOT NULL, PRIMARY KEY(`id`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; Links are stored as adjacency lists. Similarly to nodes, we use one big table. CREATE TABLE `linktable` ( `id1` bigint(20) unsigned NOT NULL DEFAULT '0', `id2` bigint(20) unsigned NOT NULL DEFAULT '0', `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', `visibility` tinyint(3) NOT NULL DEFAULT '0', `data` varchar(255) NOT NULL DEFAULT '', `time` bigint(20) unsigned NOT NULL DEFAULT '0', `version` int(11) unsigned NOT NULL DEFAULT '0', PRIMARY KEY (`id1`,`id2`,`link_type`), KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`version`,`data`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; We also have a separate table to track link counts, in order to allow efficient querying of link counts for long adjacency lists. CREATE TABLE `counttable` ( `id` bigint(20) unsigned NOT NULL DEFAULT '0', `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', `count` int(10) unsigned NOT NULL DEFAULT '0', `time` bigint(20) unsigned NOT NULL DEFAULT '0', `version` bigint(20) unsigned NOT NULL DEFAULT '0', PRIMARY KEY (`id`,`link_type`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 SQL for ADD_LINK(Link l) operation ---------------------------------- This takes a link object represented by l as argument. Here we step through the multiple stages of the add link transaction: START TRANSACTION First we try to insert into link table. INSERT INTO linktable ( id1, id2, link_type, visibility, data, time, version ) VALUES ( l.id1, l.id2, l.link_type, VISIBILITY_DEFAULT, l.data, l.time. l.version ) ON DUPLICATE KEY UPDATE visibility = VISIBILITY_DEFAULT; Depending upon the number of affect rows reported by MySQL we decide whether to update count and/or data. Here is pseudocode for that: if (affectedrows == 0) { // nothing changed. A row is found but was already visible update remaining link fields } else if (affectedrows == 1) { // a new row was inserted update count table } else { // affectedrows is 2 // link switch from hidden to visible update remaining link fields update count table } Here is statement used for updating data (the first query only updates visibility): UPDATE linktable SET visibility = VISIBILITY_DEFAULT, data = l.data, time = l.time, version = l.version WHERE id1 = l.id1 AND id2 = l.id2 AND link_type = l.link_type; Updating counttable: INSERT INTO counttable( id, link_type, count, time, version ) VALUES ( l.id1, l.link_type, 1, l.time, l.version ) ON DUPLICATE KEY UPDATE count = count + 1; And finally we commit: COMMIT //commit transaction SQL for UPDATE_LINK(Link l) operation ----------------------------------- This also takes a link object as argument and we do the same thing as ADD_LINK(l). SQL for DELETE_LINK (id1, id2, link_type) ----------------------------------------- We only require l.id1, l.id2 and l.link_type to delete a link. We have the option of expunging (actually deleting), or just hiding the lnk. START TRANSACTION Updating linktable. First do a select to check if the link is not there, is there and hidden, or is there and visible. In case of a visible link, later we need to mark the link as hidden, and update counttable. SELECT visibility FROM linktable WHERE id1 = AND id2 = AND link_type = ; if (row does not exist || link is hidden) // do nothing } else if (link is visible) { // either delete or mark the link as hidden if (expunge) { DELETE FROM linktable WHERE id1 = AND id2 = AND link_type = ; } else { UPDATE linktable SET visibility = VISIBILITY_HIDDEN WHERE id1 = AND id2 = AND link_type = ; } } Then, if needed, we update the count table: if (update_count_needed) { INSERT INTO counttable ( id, assoc_type, count, time, version ) VALUES ( , , 0, getSystemTime(), 0 ) ON DUPLICATE KEY UPDATE count = IF (count = 0, 0, count - 1), time = getSystemTime(), version = version + 1; } That finishes the transaction: COMMIT SQL for COUNT_LINKS(id1, link_type) operation --------------------------------------------- SELECT COUNT FROM counttable WHERE id = AND link_type = ; SQL for MULTIGET_LINK(id1, link_type, id2s) operation ----------------------------------------------- SELECT id1, id2, link_type, visibility, data, time, version FROM linktable WHERE id1 = AND id2 in () AND link_type = ; SQL for GET_LINK_RANGE(id1, link_type, minTime, maxTime, offset, limit) operation ---------------------------------------------------------------------- SELECT id1, id2, link_type, visibility, data, time, version FROM linktable WHERE id1 = AND link_type = AND time >= AND time <= AND visibility = VISIBILITY_DEFAULT ORDER BY time DESC LIMIT , ; ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: NOTICES ================================================ Several third party components are included with the LinkBench source distribution. Apache Commons CLI, Apache Commons Math, Apache log4j, Apache Hadoop, and Apache HBase are licensed under the Apache 2.0 license, which is available at: http://www.apache.org/licenses/LICENSE-2.0 JUnit is licensed under the Common Public License v1.0, which is available at: http://junit.sourceforge.net/cpl-v10.html MySQL Connector/J is licensed under the General Public License v2.0 with the MySQL FOSS exception. Further license information is included in the Connector/J source distribution. The license can also be viewed at: http://www.mysql.com/about/legal/licensing/foss-exception/ ================================================ FILE: README.md ================================================ - - - **_This project is not actively maintained. Proceed at your own risk!_** - - - LinkBench Overview ==================== LinkBench is a database benchmark developed to evaluate database performance for workloads similar to those of Facebook's production MySQL deployment. LinkBench is highly configurable and extensible. It can be reconfigured to simulate a variety of workloads and plugins can be written for benchmarking additional database systems. LinkBench is released under the Apache License, Version 2.0. Background ---------- One way of modeling social network data is as a *social graph*, where entities or *nodes* such as people, posts, comments and pages are connected by *links* which model different relationships between the nodes. Different types of links can represent friendship between two users, a user liking another object, ownership of a post, or any relationship you like. These nodes and links carry metadata such as their type, timestamps and version numbers, along with arbitrary *payload data*. Facebook represents much of its data in this way, with the data stored in MySQL databases. The goal of LinkBench is to emulate the social graph database workload and provide a realistic benchmark for database performance on social workloads. LinkBench's data model is based on the social graph, and LinkBench has the ability to generate a large synthetic social graph with key properties similar to the real graph. The workload of database operations is based on Facebook's production workload, and is also generated in such a way that key properties of the workload match the production workload. LinkBench Architecture ----------------------
++====================================++
||          LinkBench Driver          ||
++====================================++
||   +---------------------------+    ||
||   | Graph      | Workload     |    ||      Open connections  +=======+
||   | Generator  | Generator    |    ||   /------------------> | Graph |
||   +---------------------------+    ||  /-------------------> | Store |
||   |                           |<======---------------------> | Shard |
||   |   Graph Store Adapter     |<======---------------------> |       |
||   |   (e.g. MySQL adapter)    |<======---------------------> | e.g.  |
||   +---------------------------+    ||  \-------------------> | MySQL |
||                                    ||   \------------------> | Server|
||   ~~~~~~~~~~~~    ~~~~~~~~~~~~     ||                        +=======+
||   ~~~~~~~~~~~~    ~~~~~~~~~~~~     ||
||   ~~~~~~~~~~~~    ~~~~~~~~~~~~     ||
||   ~~~~~~~~~~~~    ~~~~~~~~~~~~     ||
||     Requester Threads              ||
++====================================++
The main software component of LinkBench is the driver, which acts as the client to the database being benchmarked. LinkBench is designed to support benchmarking of any database system that can support all of the require graph operations through a *Graph Store Adapter*. The LinkBench benchmark typically proceeds in two phases. The first is the *load phase*, where an initial graph is generated using the *graph generator* and loaded into the graph store in bulk. On a large benchmark run, this graph might have a billion nodes, and occupy over a terabyte on disk. The generated graph is designed to have similar properties to the Facebook social graph. For example, the number of links out from each node follows a power-law distribution, where most nodes have at most a few links, but a few nodes have many more links. The second is the *request phase*, where the actual benchmarking occurs. In the request phase, the benchmark driver spawns many request threads, which make concurrent requests to the database. The *workload generator* is used by each request thread to generate a series of database operations that mimics the Facebook production workload in many aspects. For example, the mix of different varieties of read and write operations is the same, and the access patterns create a similar pattern of hot (frequently access) and cold nodes in the graph. At the end of the request phase LinkBench will report a range of statistics such as latency and throughput. Getting Started =============== In this README we'll walk you through compiling LinkBench and running a MySQL benchmark. Prerequisites: -------------- These instructions assume you are using a UNIX-like system such as a Linux distribution or Mac OS X. **Java**: You will need a Java 7+ runtime environment. LinkBench by default uses the version of Java on your path. You can override this by setting the JAVA\_HOME environment variable to the directory of the desired Java runtime version. You will also need a Java JDK to compile from source. **Maven**: To build LinkBench, you will need the Apache Maven build tool. If you do not have it already, it is available from http://maven.apache.org . **MySQL Connector**: To benchmark MySQL with LinkBench, you need MySQL Connector/J, A version of the MySQL connector is bundled with LinkBench. If you wish to use a more recent version, replace the mysql jar under lib/. See http://dev.mysql.com/downloads/connector/j/ **MySQL Server**: To benchmark MySQL you will need a running MySQL server with free disk space. Getting and Building LinkBench ---------------------------- First get the source code git clone git@github.com:facebook/linkbench.git Then enter the directory and build LinkBench cd linkbench mvn clean package In order to skip slower tests (some run quite long), type mvn clean package -P fast-test To skip all tests mvn clean package -DskipTests If the build is successful, you should get a message like this at the end of the output: BUILD SUCCESSFUL Total time: 3 seconds If the build fails while downloading required files, you may need to configure Maven, for example to use a proxy. Example Maven proxy configuration is shown here: http://maven.apache.org/guides/mini/guide-proxies.html Now you can run the LinkBench command line tool: ./bin/linkbench Running it without arguments will show a brief help message: Did not select benchmark mode usage: linkbench [-c ] [-csvstats ] [-csvstream ] [-D ] [-L ] [-l] [-r] -c Linkbench config file -csvstats,--csvstats CSV stats output -csvstream,--csvstream CSV streaming stats output -D Override a config setting -L Log to this file -l Execute loading stage of benchmark -r Execute request stage of benchmark Running a Benchmark with MySQL ============================== In this section we will document the process of setting up a new MySQL database and running a benchmark with LinkBench. MySQL Setup ----------- We need to create a new database and tables on the MySQL server. We'll create a new database called `linkdb` and the needed tables to store graph nodes, links and link counts. Run the following commands in the MySQL console: create database linkdb; use linkdb; CREATE TABLE `linktable` ( `id1` bigint(20) unsigned NOT NULL DEFAULT '0', `id2` bigint(20) unsigned NOT NULL DEFAULT '0', `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', `visibility` tinyint(3) NOT NULL DEFAULT '0', `data` varchar(255) NOT NULL DEFAULT '', `time` bigint(20) unsigned NOT NULL DEFAULT '0', `version` int(11) unsigned NOT NULL DEFAULT '0', PRIMARY KEY (link_type, `id1`,`id2`), KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1 PARTITION BY key(id1) PARTITIONS 16; CREATE TABLE `counttable` ( `id` bigint(20) unsigned NOT NULL DEFAULT '0', `link_type` bigint(20) unsigned NOT NULL DEFAULT '0', `count` int(10) unsigned NOT NULL DEFAULT '0', `time` bigint(20) unsigned NOT NULL DEFAULT '0', `version` bigint(20) unsigned NOT NULL DEFAULT '0', PRIMARY KEY (`id`,`link_type`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; CREATE TABLE `nodetable` ( `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, `type` int(10) unsigned NOT NULL, `version` bigint(20) unsigned NOT NULL, `time` int(10) unsigned NOT NULL, `data` mediumtext NOT NULL, PRIMARY KEY(`id`) ) ENGINE=InnoDB DEFAULT CHARSET=latin1; You may want to set up a special database user account for benchmarking: -- Note: replace 'linkbench'@'localhost' with 'linkbench'@'%' to allow remote connections CREATE USER 'linkbench'@'localhost' IDENTIFIED BY 'mypassword'; -- Grant all privileges on linkdb to this user GRANT ALL ON linkdb TO 'linkbench'@'localhost' If you want to obtain representative benchmark results, we highly recommend that you invest some time configuring and tuning MySQL. MySQL performance tuning can be complex and a comprehensive guide is beyond the scope of this readme, but here are a few basic guidelines: * Read the [Optimization section of the MySQL user manual](http://dev.mysql.com/doc/refman/5.6/en/optimization.html). * Make sure you have a sensible size setting for the [InnoDB buffer pool size](http://dev.mysql.com/doc/refman/5.6/en/optimizing-innodb-diskio.html), so as to reduce disk I/O. * Table partitioning (as shown above) can eliminate some bottlenecks that occur with LinkBench where the linktable is heavily accessed. Configuration Files ------------------- LinkBench requires several configuration files that specify the benchmark setup, the parameters of the graph to be generated, etc. Before benchmarking you will want to make a copy of the example config file: cp config/LinkConfigMysql.properties config/MyConfig.properties Open MyConfig.properties. At a minimum you will need to fill in the settings under *MySQL Connection Information* to match the server, user and database you set up earlier. E.g. # MySQL connection information host = localhost user = linkbench password = your_password port = 3306 dbid = linkdb You can read through the settings in this file. There are a lot of settings that control the benchmark itself, and the output of the LinkBench command link tool. Notice that MyConfig.properties references another file in this line: workload_file = config/FBWorkload.properties This workload file defines how the social graph should be generated and what mix of operations should make up the benchmark. The included workload file has been tuned to match our production workload in query mix. If you want to change the scale of the benchmark (the default graph is quite small for benchmarking purposes), you should look at the maxid1 setting. This controls the number of nodes in the initial graph created in the load phase: increase it to get a larger database. # start node id (inclusive) startid1 = 1 # end node id for initial load (exclusive) # With default config and MySQL/InnoDB, 1M ids ~= 1GB maxid1 = 10000001 Loading Data ------------ First we need to do an initial load of data using our new config file: ./bin/linkbench -c config/MyConfig.properties -l This will take a while to load, and you should get frequent progress updates. Once loading is finished you should see a notification like: LOAD PHASE COMPLETED. Loaded 10000000 nodes (Expected 10000000). Loaded 47423071 links (4.74 links per node). Took 620.4 seconds. Links/second = 76435 At the end LinkBench reports a range of statistics on load time that are of limited interest at this stage. You can significantly speed up the LinkBench load phase by making these temporary changes in the MySQL command shell before loading: alter table linktable drop key `id1_type`; set global innodb_flush_log_at_trx_commit = 2; set global sync_binlog = 0; After loading you should revert the changes: set global innodb_flush_log_at_trx_commit = 1; set global sync_binlog = 1; alter table linktable add key `id1_type` (`id1`,`link_type`,`visibility`,`time`,`id2`,`version`,`data`); Request Phase ------------- Now you can do some benchmarking. Run the request phase using the below command: ./bin/linkbench -c config/MyConfig.properties -r LinkBench will log progress to the console, along with statistics. Once all requests have been sent, or the time limit has elapsed, LinkBench will notify you of completion: REQUEST PHASE COMPLETED. 25000000 requests done in 2266 seconds. Requests/second = 11029 You can also inspect the latency statistics. For example, the following line tells us the mean latency for link range scan operations, along with latency ranges for median (p50), 99th percentile (p99) and so on. GET_LINKS_LIST count = 12678653 p25 = [0.7,0.8]ms p50 = [1,2]ms p75 = [1,2]ms p95 = [10,11]ms p99 = [15,16]ms max = 2064.476ms mean = 2.427ms Advanced LinkBench Command Line Usage ------------------------------------- Here are some further examples of how to use the LinkBench command link utility. You can override any properties from the configuration file from the command line with -D key=value. For example, this runs the benchmark with a 10 minute warmup before collecting statistics: ./bin/linkbench -c config/MyConfig.properties -D warmup_time=600 -r This runs the benchmark with more detailed logging, and all output going to the file linkbench.log: ./bin/linkbench -c config/MyConfig.properties -D debuglevel=DEBUG -L linkbench.log -r LinkBench supports output of statistics in csv format for easier analysis. There are two categories of statistic: the final summary and per-thread statistics output periodically through the benchmark. -csvstats controls the former and -csvstream the latter: ./bin/linkbench -c config/MyConfig.properties -csvstats final-stats.csv -csvstreams streaming-stats.csv -r Benchmark Guidelines ==================== Benchmarks are often controversial and are challenging to do well. Here are some guidelines for avoiding common pitfalls with LinkBench. Database Tuning --------------- To remove confounding factors in database setup, there are several steps you can take to obtain better results: * Warm up the databases before collecting statistics. LinkBench has a *warmup_time* setting that sends requests for a period before starting to collect statistics. * Run benchmarks for long periods of time (hours rather than minutes) to reduce impact of random variation and to allow the database to reach a steady state. * If at all possible, get expert help tuning the database for your hardware and workload. * Benchmarks where the database fits mostly or entirely in RAM are interesting but aren't comparable to benchmarks where the database is much larger than RAM. Typically for MySQL benchmarks our databases are 10-15x larger than the buffer pool. * Databases should be benchmarked in comparable configurations. We always run LinkBench with durable writes (i.e. so that after an operation returns, the data is written to persistent storage and can be recovered in the event of a system crash). Similarly, our LinkBench MySQL implementation provides serializable consistency of operations. Weaker durability or consistency properties should be disclosed alongside benchmark results. Understanding Performance Profile Under Varying Load ---------------------------------------------------- Different systems can behave different when heavily or lightly loaded. The default benchmark settings simulate a heavily loaded database, with 100 concurrent request threads each sending requests as quickly as they can. Some database systems perform better than others with many concurrent clients or heavy load, so performance under heavy load does not give a complete picture of performance. Typically databases are not fully loaded all of the time, so latency of requests under moderate load is also an important measure of database performance. To get a better understanding of database performance under varying load it can be helpful to: * Modify the *requesters* parameter to test database performance with varying numbers of clients. * Modify the *requestrate* config setting so that requests are throttled. Request latency vs. throughput curves help with understanding the full performance profile of a database system. Understanding Resource Utilization ------------------------- If you are doing a benchmark exercise, it is often a good idea to collect additional information about system resource utilization, particularly for CPU and I/O. This can aid a lot in understanding and comparing benchmark results beyond headline performance numbers. It is easiest to make use of collected data if you can match up timestamps to your benchmark logs, so the examples here will append timestamps to each line of output. vmstat reports useful summary information on CPU and memory: vmstat 1 | gawk '{now=strftime("%Y-%m-%d %T "); print now $0}' > linkbench.run.1/vmstat.out iostat reports some useful I/O statistics: iostat -d -x 1 | gawk '{now=strftime("%Y-%m-%d %T "); print now $0}' > linkbench.run.1/iostat.out Extending/Customizing LinkBench =============================== You can customize LinkBench in several ways. Reconfiguring Workload --------------------- We have already introduced you to the LinkBench configuration files. All settings in these files are documented and a great deal can be changed simply through these configuration files. For example: * You can experiment with read-intensive or write-intensive workloads by modifying the mix of operations. * You can alter the mix of hot/cold rows by modifying the shape parameter for ZipfDistribution. If you set it close to 1, there will be only a few very hot nodes in the database, or if you set if close to 0, accesses will be spread evenly across all nodes. Additional Workload Generators ------------------------------ It is possible to further customize the data and workload by providing new implementations of some key classes: * ProbabilityDistribution: which can be used to control the distribution of out-edges in the graph, or the access patterns for requests. * DataGenerator: which can be used to generate data in different ways for requests. Additional Database Systems --------------------------- You can write plugins to benchmark additional database systems simply by writing a Java class implementing a small set of graph operations. Any classes implementing the `com.facebook.LinkBench.LinkStore` and `com.facebook.LinkBench.NodeStore` interfaces can be loaded through the *linkstore* and *nodestore* configuration file keys. There are several steps you will have to go through to add a new plugin . First you need to choose you will represent LinkBench nodes and links. Several factors play a role in the design, but speed of range scans and atomicity of updates are particularly important. The MySQL schema from earlier in this README serves as a reference implementation. Next you need to create a new Java class, such as `public class MyStore extends GraphStore`, and implement all of the required methods of `LinkStore` and `NodeStore`. Two reference implementations are provided: `LinkStoreMysql`, a fully-fledged implementation, and `MemoryLinkStore`, a toy in-memory implementation. LinkBench provides some tests to validate your implementation that you can use during development. If you extend any of the test classes `LinkStoreTestBase`, `NodeStoreTestBase` and `GraphStoreTestBase` with the required methods that set up your database, then a range of tests will be run against it. These tests are sanity checks rather than comprehensive verification of your implementation. In particular, they do not try to verify the atomicity, consistency or durability properties of the implementation. Database-specific tests are not run by default. You can enable them with Maven profiles. For example, to run the MySQL tests you can run: mvn test -P mysql-test The MySQL related unit tests are run against a test database that needs setting up before running the unit tests. The default settings for this test database are hardcoded in src/test/java/com/facebook/LinkBench/MySqlTestConfig.java. The default settings uses localhost:3306 to connect to the database and uses username "linkbench" and password "linkbench". The unit test code creates all the required tables, so the developer needs to setup a MySql database called "linkbench_unittestdb" to which the linkbench user has permissions to create and drop tables. **If you implement a plugin for a new database, please consider contributing it back to the main LinkBench distribution with a pull request.** ================================================ FILE: bin/genswift ================================================ #!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # The command script # # Environment Variables # # JAVA_HOME The java implementation to use. Overrides JAVA_HOME. # BIN=$(dirname $(readlink -f "$0")) HOME=`dirname $BIN` echo "HOME is at $HOME" if [ "$JAVA_HOME" = "" ]; then JAVA=`which java` if [ ! -x $JAVA ]; then echo "Error: java not found, set JAVA_HOME or add java to PATH." exit 1 fi else JAVA=$JAVA_HOME/bin/java fi echo "Using java at: $JAVA" SWIFT="$BIN/swift-generator-cli-0.11.0-standalone.jar" # run it pushd "$HOME/src/main/java/com/facebook/rocks/swift" rm *.java rm -rf gen-swift "$JAVA" -jar "$SWIFT" -tweak ADD_CLOSEABLE_INTERFACE rocks_common.thrift "$JAVA" -jar "$SWIFT" -tweak ADD_CLOSEABLE_INTERFACE rocks.thrift mv gen-swift/com/facebook/rocks/swift/* . rm -rf gen-swift popd ================================================ FILE: bin/linkbench ================================================ #!/usr/bin/env bash # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # The command script # # Environment Variables # # JAVA_HOME The java implementation to use. Overrides JAVA_HOME. # # HEAPSIZE The maximum amount of heap to use, in MB. # Default is 1000. # # OPTS Extra Java runtime options. # # CONF_DIR Alternate conf dir. Default is ./config. # # This script creates the benchmark data and then runs the workload # on it bin=`dirname "$0"` bin=`cd "$bin"; pwd` # Export LINKBENCH_HOME so that LinkBench Java code can access env var export LINKBENCH_HOME=`dirname $bin` if [ "$JAVA_HOME" = "" ]; then JAVA=`which java` if [ ! -x $JAVA ]; then echo "Error: java not found, set JAVA_HOME or add java to PATH." exit 1 fi else JAVA=$JAVA_HOME/bin/java fi echo "Using java at: $JAVA" JAVA_HEAP_MAX=-Xmx1000m # check envvars which might override default args if [ "$HEAPSIZE" != "" ]; then #echo "run with heapsize $HEAPSIZE" JAVA_HEAP_MAX="-Xmx""$HEAPSIZE""m" #echo $JAVA_HEAP_MAX fi # CLASSPATH initially contains $CONF_DIR CLASSPATH="${CONF_DIR}" CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar # so that filenames w/ spaces are handled correctly in loops below IFS= # add latest jar to CLASSPATH CLASSPATH=${CLASSPATH}:target/FacebookLinkBench.jar: # restore ordinary behaviour unset IFS # figure out which class to run CLASS='com.facebook.LinkBench.LinkBenchDriver' # run it exec "$JAVA" $JAVA_HEAP_MAX $OPTS $JMX_OPTS -classpath "$CLASSPATH" $CLASS \ "$@" ================================================ FILE: bin/swift-generator-cli-0.11.0-standalone.jar ================================================ ================================================ FILE: config/FBWorkload.properties ================================================ # LinkBench workload configuration file for Facebook social graph workload # # # Default parameters emulate a scaled-down version of Facebook's real # social graph workload. The default parameters generate a benchmark # database of approximately 10GB, which is approximate for testing, but # too small for full-scale benchmarking. To generate a bigger graph, # increase maxid1. # Optionally you can change workload parameters to modify benchmark data # and the request workload. ###################### # Data Files # ###################### # Path for file with real distributions for links, accesses, etc. # Can be absolute path, or relative path from LinkBench home directory data_file = config/Distribution.dat ##################################### # # # Graph Generation Configuration # # # ##################################### # start node id (inclusive) startid1 = 1 # end node id for initial load (exclusive) # With default config and MySQL/InnoDB, 1M ids ~= 1GB maxid1 = 10000001 # Number of distinct link types (link outdegree is shared among types) link_type_count = 2 # +----------------------------+ # |Graph outdegree distribution| # +----------------------------+ # These parameters control how the outdegree of each node in the graph # is chosen. # nlinks_func selects the outdegree distribution function. Options are: ## REAL: use the empirically observed distribution in the data file nlinks_func = real ## ProbabilityDistribution class name: use the probability distribution # with other parameters for that class with the nlinks_ prefix. E.g. # nlinks_func = com.facebook.LinkBench.distributions.ZipfDistribution # nlinks_shape = 1.5 # nlinks_mean = 2000000 ## A synthetic distribution # RECIPROCAL: small id1s tend to get more #links : # #links(id1) = maxid1/(1+id1) # MULTIPLES: id1s that are multiples of nlinks_config get # nlinks_config links (rest get nlinks_default) # PERFECT_POWERS means perfect squares/cubes/etc get more #links # (rest get nlinks_default) # the larger a perfect square is, the more #links it gets. # nlinks_config controls whether it is squares, cubes, etc # EXPONENTIAL means exponential i.e powers of nlinks_config get more #links #nlinks_func = RECIPROCAL # config param that goes along with nlinks_func #nlinks_config = 1 # minimum link count: use 0 or 1 for this #nlinks_default = 0 # +--------------------------+ # | Link ID2 selection | # +--------------------------+ # These options allow selection of alternative behavior for selecting # link id2s of edges in graph # if nonzero, generate id2 uniformly between 0 and this - 1 during load # and lookups. Must be < 2^31 # randomid2max = 0 # +----------------------+ # |Node/link payload data| # +----------------------+ # Median payload data size of links link_datasize = 8 # Data generator for new links # Default settings give ~30% compression ratio link_add_datagen = com.facebook.LinkBench.generators.MotifDataGenerator link_add_datagen_startbyte = 32 link_add_datagen_endbyte = 100 link_add_datagen_uniqueness = 0.225 link_add_datagen_motif_length = 128 # Data generator for link updates link_up_datagen = com.facebook.LinkBench.generators.MotifDataGenerator link_up_datagen_startbyte = 32 link_up_datagen_endbyte = 100 link_up_datagen_uniqueness = 0.225 link_up_datagen_motif_length = 128 # Median payload data size of nodes node_datasize = 128 # Data generator for new nodes # Node data generators give ~60% compression ratio node_add_datagen = com.facebook.LinkBench.generators.MotifDataGenerator node_add_datagen_startbyte = 50 node_add_datagen_endbyte = 220 node_add_datagen_uniqueness = 0.63 # Data generator for node updates node_up_datagen = com.facebook.LinkBench.generators.MotifDataGenerator node_up_datagen_startbyte = 50 node_up_datagen_endbyte = 220 node_up_datagen_uniqueness = 0.63 ##################################### # # # Request Workload Configuration # # # ##################################### # configuration for generating id2 in the request phase # 0 means thread i generates id2 randomly without restriction; # 1 means thread i generates id2 such that id2 % nrequesters = i, # this is to prevent threads from adding/deleting/updating same cells, # always use this configuration (1) when using HBaseGeneralAtomicityTesting; id2gen_config = 0 # Operation mix for request phase # numbers are percentages and must sum to 100 addlink = 8.9886601 deletelink = 2.9907664 updatelink = 8.0122125 countlink = 4.8863567 getlink = 0.5261142 getlinklist = 50.7119145 getnode = 12.9326683 addnode = 2.5732789 updatenode = 7.366437 deletenode = 1.0115914 # Controls what proportion of linklist queries above will try # to retrieve more history getlinklist_history = 0.3 # +-------------------------+ # |Node access distributions| # +-------------------------+ # These control the access patterns of different classes of operations. # The following distributions can be configured # read_* : link reads (dist is correlated with outdegree) # write_* : link writes (dist is correlated with outdegree) # read_uncorr_* : optionally, mix in an uncorrelated distribution # write_uncorr_* : optionally, mix in an uncorrelated distribution # node_read_* : node reads # node_update_* : node updates # node_delete_* : node deletes # For each of these the *_func parameter selects an access pattern. # The available options are: # * Any ProbabilityDistribution class (e.g. ZipfDistribution) # * REAL - Real empirical distribution for reads/writes as appropriate # * ROUND_ROBIN - Cycle through ids # * RECIPROCAL - Pick with probability # * MULTIPLE - Pick a multiple of config parameter # * POWER - Pick a power of config parameter # * PERFECT_POWER - Pick a perfect power (square, cube, etc) with exponent # as configured # read_function controls access patterns for link reads # shape for Zipf based on pareto parameter of 1.25 read_function = com.facebook.LinkBench.distributions.ZipfDistribution read_shape = 0.8 # Example of using POWER #read_function = POWER #read_param = 2 # read_uncorr_function is alternative to read_function that is # uncorrelated to outdegree # blend: % of link reads to use uncorrelated func for # Here we have high proportion uncorrelated to keep range scan size down read_uncorr_blend = 99.5 read_uncorr_function = com.facebook.LinkBench.distributions.ZipfDistribution read_uncorr_shape = 0.8 # write_function controls access patterns for link writes # shape for Zipf based on pareto parameter of 1.35 write_function = com.facebook.LinkBench.distributions.ZipfDistribution write_shape = 0.741 # write_uncorr_function is alternative to write_* that is uncorrelated # to outdegree # 95% uncorrelated give weak correlation with outdegree write_uncorr_blend = 95 write_uncorr_shape = 0.741 write_uncorr_config = 1 # node_read_function controls reads for graph nodes # shape for Zipf based on pareto parameter of 1.6 node_read_function = com.facebook.LinkBench.distributions.ZipfDistribution node_read_shape = 0.625 # node_update_functions controls updates for graph nodes # shape for Zipf based on pareto parameter of 1.65 node_update_function = com.facebook.LinkBench.distributions.ZipfDistribution node_update_shape = 0.606 # Use uniform rather than skewed distribution for deletes, because: # a) we don't want to delete the most frequently read nodes # b) nodes can only be deleted once node_delete_function = com.facebook.LinkBench.distributions.UniformDistribution # Distribution to select how many ids per link get request. Comment # out to only get one link at a time link_multiget_dist = com.facebook.LinkBench.distributions.GeometricDistribution link_multiget_dist_min = 1 link_multiget_dist_max = 128 # Prob param for geometric distribution approximating real mean of ~2.6 link_multiget_dist_prob = 0.382 ================================================ FILE: config/LinkConfigMysql.properties ================================================ # Sample MySQL LinkBench configuration file. # # This file contains settings for the data store, as well as controlling # benchmark output and behavior. The workload is defined in a separate # file. # # At a minimum to use this file, you will need to fill in MySQL # connection information. ########################## # Workload Configuration # ########################## # Path for workload properties file. Properties in this file will override # those in workload properties file. # Can be absolute path, or relative path from LinkBench home directory workload_file = config/FBWorkload.properties ################################# # # # Data Source Configuration # # # ################################# # Implementation of LinkStore and NodeStore to use linkstore = com.facebook.LinkBench.LinkStoreMysql nodestore = com.facebook.LinkBench.LinkStoreMysql # MySQL connection information host = yourhostname.here user = MySQLuser password = MySQLpass port = 3306 # dbid: the database name to use dbid = linkdb # database table names linktable = linktable # counttable not required for all databases counttable = counttable nodetable = nodetable ############################### # # # Logging and Stats Setup # # # ############################### # This controls logging output. Settings are, in order of increasing # verbosity: # ERROR: only output serious errors # WARN: output warnings # INFO: output additional information such as progress # DEBUG: output high-level debugging information # TRACE: output more detailed lower-level debugging information debuglevel = INFO # display frequency of per-thread progress in seconds progressfreq = 300 # display frequency of per-thread stats (latency, etc) in seconds displayfreq = 1800 # display global load update (% complete, etc) after this many links loaded load_progress_interval = 50000 # display global update on request phase (% complete, etc) after this many ops req_progress_interval = 10000 # max number of samples to store for each per-thread statistic maxsamples = 10000 ############################### # # # Load Phase Configuration # # # ############################### # number of threads to run during load phase loaders = 10 # whether to generate graph nodes during load process generate_nodes = true # partition loading work into chunks of id1s of this size loader_chunk_size = 2048 # seed for initial data load random number generation (optional) # load_random_seed = 12345 ################################## # # # Request Phase Configuration # # # ################################## # number of threads to run during request phase requesters = 100 # read + write requests per thread requests = 500000 # request rate per thread. <= 0 means unthrottled requests, > 0 limits # the average request rate to that number of requests per second per thread, # with the inter-request intervals governed by an exponential distribution requestrate = 0 # max duration in seconds for request phase of benchmark maxtime = 100000 # warmup time in seconds. The benchmark is run for a warmup period # during which no statistics are recorded. This allows database caches, # etc to warm up. warmup_time = 0 # seed for request random number generation (optional) # request_random_seed = 12345 # maximum number of failures per requester to tolerate before aborting # negative number means never abort max_failed_requests = 100 ############################### # # # MySQL Tuning # # # ############################### # Optional tuning parameters # # of link inserts to batch together when loading # MySQL_bulk_insert_batch = 1024 # optional tuning - disable binary logging during load phase # WARNING: do not use unless you know what you are doing, it can # break replication amongst other things # MySQL_disable_binlog_load = true ================================================ FILE: config/LinkConfigRocksDb.properties ================================================ # Sample RocksDb LinkBench configuration file. # # This file contains settings for the data store, as well as controlling # benchmark output and behavior. The workload is defined in a separate # file. # ########################## # Workload Configuration # ########################## # Path for workload properties file. Properties in this file will override # those in workload properties file. # Can be absolute path, or relative path from LinkBench home directory workload_file = config/FBWorkload.properties ################################# # # # Data Source Configuration # # # ################################# # Implementation of LinkStore and NodeStore to use linkstore = com.facebook.LinkBench.LinkStoreRocksDb nodestore = com.facebook.LinkBench.LinkStoreRocksDb # RocksDb connection information host = yourhostname.here port = 9090 # dbid: the database name to use dbid = linkdb ############################### # # # Logging and Stats Setup # # # ############################### # This controls logging output. Settings are, in order of increasing # verbosity: # ERROR: only output serious errors # WARN: output warnings # INFO: output additional information such as progress # DEBUG: output high-level debugging information # TRACE: output more detailed lower-level debugging information debuglevel = INFO # display frequency of per-thread progress in seconds progressfreq = 300 # display frequency of per-thread stats (latency, etc) in seconds displayfreq = 1800 # display global load update (% complete, etc) after this many links loaded load_progress_interval = 50000 # display global update on request phase (% complete, etc) after this many ops req_progress_interval = 10000 # max number of samples to store for each per-thread statistic maxsamples = 10000 ############################### # # # Load Phase Configuration # # # ############################### # number of threads to run during load phase loaders = 10 # whether to generate graph nodes during load process generate_nodes = true # partition loading work into chunks of id1s of this size loader_chunk_size = 2048 # seed for initial data load random number generation (optional) # load_random_seed = 12345 ################################## # # # Request Phase Configuration # # # ################################## # number of threads to run during request phase requesters = 100 # read + write requests per thread requests = 500000 # request rate per thread. <= 0 means unthrottled requests, > 0 limits # the average request rate to that number of requests per second per thread, # with the inter-request intervals governed by an exponential distribution requestrate = 0 # max duration in seconds for request phase of benchmark maxtime = 100000 # warmup time in seconds. The benchmark is run for a warmup period # during which no statistics are recorded. This allows database caches, # etc to warm up. warmup_time = 0 # seed for request random number generation (optional) # request_random_seed = 12345 # maximum number of failures per requester to tolerate before aborting # negative number means never abort max_failed_requests = 100 # write options write_options_sync = false write_options_disableWAL = false ================================================ FILE: pom.xml ================================================ 4.0.0 com.facebook.linkbench linkbench 0.1-SNAPSHOT jar https://github.com/facebook/linkbench 2012 tarmstrong Tim Armstrong tarmstrong@fb.com dhruba Dhruba Borthakur dhruba@fb.com amayank Mayank Agarwal amayank@fb.com andrewcox Andrew Cox andrewcox@fb.com scm:git:https://github.com/facebook/linkbench.git scm:git@github.com:facebook/linkbench.git https://github.com/facebook/linkbench HEAD commons-cli commons-cli 1.2 org.apache.commons commons-math3 3.0 org.apache.hadoop hadoop-core 0.20.2 org.apache.hbase hbase 0.94.3 com.google.guava guava org.jboss.netty netty asm asm com.facebook.swift swift-codec 0.13.2 com.facebook.swift swift-service 0.13.2 junit junit 4.11 log4j log4j 1.2.17 org.slf4j slf4j-simple 1.7.0 mysql mysql-connector-java 5.1.22 nexus central http://repo1.maven.org/maven2 true true central http://repo1.maven.org/maven2 true true mysql-tests org.apache.maven.plugins maven-surefire-plugin **/*MySql*.class fast-test org.apache.maven.plugins maven-surefire-plugin com.facebook.LinkBench.testtypes.ProviderTest, com.facebook.LinkBench.testtypes.SlowTest org.apache.maven.plugins maven-compiler-plugin 3.0 1.7 1.7 true true org.apache.maven.plugins maven-source-plugin 2.1.2 attach-sources verify jar-no-fork test-jar org.apache.maven.plugins maven-jar-plugin 2.4 test-jar org.apache.maven.plugins maven-assembly-plugin 2.4 false jar-with-dependencies FacebookLinkBench package single org.apache.maven.plugins maven-surefire-plugin 2.14 org.apache.maven.surefire surefire-junit47 2.14 true com.facebook.LinkBench.testtypes.ProviderTest com.facebook.mojo swift-maven-plugin 0.13.2 generate ${basedir}/src/main/java/com/facebook/rocks/swift/ * true ================================================ FILE: src/main/java/com/facebook/LinkBench/Config.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; /** * Consolidate shared config key strings in this file * See sample config file for documentation of config properties * @author tarmstrong * */ public class Config { public static final String DEBUGLEVEL = "debuglevel"; /* Control store implementations used */ public static final String LINKSTORE_CLASS = "linkstore"; public static final String NODESTORE_CLASS = "nodestore"; /* Schema and tables used */ public static final String DBID = "dbid"; public static final String LINK_TABLE = "linktable"; public static final String COUNT_TABLE = "counttable"; public static final String NODE_TABLE = "nodetable"; /* Control graph structure */ public static final String LOAD_RANDOM_SEED = "load_random_seed"; public static final String MIN_ID = "startid1"; public static final String MAX_ID = "maxid1"; public static final String GENERATE_NODES = "generate_nodes"; public static final String RANDOM_ID2_MAX = "randomid2max"; public static final String NLINKS_PREFIX = "nlinks_"; public static final String NLINKS_FUNC = "nlinks_func"; public static final String NLINKS_CONFIG = "nlinks_config"; public static final String NLINKS_DEFAULT = "nlinks_default"; public static final String LINK_TYPE_COUNT ="link_type_count"; /* Data generation */ public static final String LINK_DATASIZE = "link_datasize"; public static final String NODE_DATASIZE = "node_datasize"; public static final String UNIFORM_GEN_STARTBYTE = "startbyte"; public static final String UNIFORM_GEN_ENDBYTE = "endbyte"; public static final String MOTIF_GEN_UNIQUENESS = "uniqueness"; public static final String MOTIF_GEN_LENGTH = "motif_length"; public static final String LINK_ADD_DATAGEN = "link_add_datagen"; public static final String LINK_ADD_DATAGEN_PREFIX = "link_add_datagen_"; public static final String LINK_UP_DATAGEN = "link_up_datagen"; public static final String LINK_UP_DATAGEN_PREFIX = "link_up_datagen_"; public static final String NODE_ADD_DATAGEN = "node_add_datagen"; public static final String NODE_ADD_DATAGEN_PREFIX = "node_add_datagen_"; public static final String NODE_UP_DATAGEN = "node_up_datagen"; public static final String NODE_UP_DATAGEN_PREFIX = "node_up_datagen_"; // Sigma values control variance of data size log normal distribution public static final double LINK_DATASIZE_SIGMA = 1.0; public static final double NODE_DATASIZE_SIGMA = 1.0; /* Loading performance tuning */ public static final String NUM_LOADERS = "loaders"; public static final String LOADER_CHUNK_SIZE = "loader_chunk_size"; /* Request workload */ public static final String NUM_REQUESTERS = "requesters"; public static final String REQUEST_RANDOM_SEED = "request_random_seed"; // Distribution of accesses to IDs public static final String READ_CONFIG_PREFIX = "read_"; public static final String WRITE_CONFIG_PREFIX = "write_"; public static final String NODE_READ_CONFIG_PREFIX = "node_read_"; public static final String NODE_UPDATE_CONFIG_PREFIX = "node_update_"; public static final String NODE_DELETE_CONFIG_PREFIX = "node_delete_"; public static final String ACCESS_FUNCTION_SUFFIX = "function"; public static final String ACCESS_CONFIG_SUFFIX = "config"; public static final String READ_FUNCTION = "read_function"; public static final String READ_CONFIG = "read_config"; public static final String WRITE_FUNCTION = "write_function"; public static final String WRITE_CONFIG = "write_config"; public static final String READ_UNCORR_CONFIG_PREFIX = "read_uncorr_"; public static final String WRITE_UNCORR_CONFIG_PREFIX = "read_uncorr_"; public static final String READ_UNCORR_FUNCTION = READ_UNCORR_CONFIG_PREFIX + ACCESS_FUNCTION_SUFFIX; public static final String WRITE_UNCORR_FUNCTION = WRITE_UNCORR_CONFIG_PREFIX + ACCESS_FUNCTION_SUFFIX; public static final String BLEND_SUFFIX = "blend"; public static final String READ_UNCORR_BLEND = READ_UNCORR_CONFIG_PREFIX + BLEND_SUFFIX; public static final String WRITE_UNCORR_BLEND = WRITE_UNCORR_CONFIG_PREFIX + BLEND_SUFFIX; // Probability of different operations public static final String PR_ADD_LINK = "addlink"; public static final String PR_DELETE_LINK = "deletelink"; public static final String PR_UPDATE_LINK = "updatelink"; public static final String PR_COUNT_LINKS = "countlink"; public static final String PR_GET_LINK = "getlink"; public static final String PR_GET_LINK_LIST = "getlinklist"; public static final String PR_ADD_NODE = "addnode"; public static final String PR_UPDATE_NODE = "updatenode"; public static final String PR_DELETE_NODE = "deletenode"; public static final String PR_GET_NODE = "getnode"; public static final String PR_GETLINKLIST_HISTORY = "getlinklist_history"; public static final String WARMUP_TIME = "warmup_time"; public static final String MAX_TIME = "maxtime"; public static final String REQUEST_RATE = "requestrate"; public static final String NUM_REQUESTS = "requests"; public static final String MAX_FAILED_REQUESTS = "max_failed_requests"; public static final String ID2GEN_CONFIG = "id2gen_config"; public static final String LINK_MULTIGET_DIST = "link_multiget_dist"; public static final String LINK_MULTIGET_DIST_MIN = "link_multiget_dist_min"; public static final String LINK_MULTIGET_DIST_MAX = "link_multiget_dist_max"; public static final String LINK_MULTIGET_DIST_PREFIX = "link_multiget_dist_"; /* Probability distribution parameters */ public static final String PROB_MEAN = "mean"; /* Statistics collection and reporting */ public static final String MAX_STAT_SAMPLES = "maxsamples"; public static final String DISPLAY_FREQ = "displayfreq"; public static final String MAPRED_REPORT_PROGRESS = "reportprogress"; public static final String PROGRESS_FREQ = "progressfreq"; /* Reporting for progress indicators */ public static String REQ_PROG_INTERVAL = "req_progress_interval"; public static String LOAD_PROG_INTERVAL = "load_progress_interval"; /* MapReduce specific configuration */ public static final String TEMPDIR = "tempdir"; public static final String LOAD_DATA = "loaddata"; public static final String MAPRED_USE_INPUT_FILES = "useinputfiles"; /* External data */ public static final String DISTRIBUTION_DATA_FILE = "data_file"; public static final String WORKLOAD_CONFIG_FILE = "workload_file"; } ================================================ FILE: src/main/java/com/facebook/LinkBench/ConfigUtil.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.File; import java.io.IOException; import java.util.Properties; import org.apache.log4j.ConsoleAppender; import org.apache.log4j.EnhancedPatternLayout; import org.apache.log4j.FileAppender; import org.apache.log4j.Layout; import org.apache.log4j.Level; import org.apache.log4j.Logger; public class ConfigUtil { public static final String linkbenchHomeEnvVar = "LINKBENCH_HOME"; public static final String LINKBENCH_LOGGER = "com.facebook.linkbench"; /** * @return null if not set, or if not valid path */ public static String findLinkBenchHome() { String linkBenchHome = System.getenv("LINKBENCH_HOME"); if (linkBenchHome != null && linkBenchHome.length() > 0) { File dir = new File(linkBenchHome); if (dir.exists() && dir.isDirectory()) { return linkBenchHome; } } return null; } public static Level getDebugLevel(Properties props) throws LinkBenchConfigError { if (props == null) { return Level.DEBUG; } String levStr = props.getProperty(Config.DEBUGLEVEL); if (levStr == null) { return Level.DEBUG; } try { int level = Integer.parseInt(levStr); if (level <= 0) { return Level.INFO; } else if (level == 1) { return Level.DEBUG; } else { return Level.TRACE; } } catch (NumberFormatException e) { Level lev = Level.toLevel(levStr, null); if (lev != null) { return lev; } else { throw new LinkBenchConfigError("Invalid setting for debug level: " + levStr); } } } /** * Setup log4j to log to stderr with a timestamp and thread id * Could add in configuration from file later if it was really necessary * @param props * @param logFile if not null, info logging will be diverted to this file * @throws IOException * @throws Exception */ public static void setupLogging(Properties props, String logFile) throws LinkBenchConfigError, IOException { Layout fmt = new EnhancedPatternLayout("%p %d [%t]: %m%n%throwable{30}"); Level logLevel = ConfigUtil.getDebugLevel(props); Logger.getRootLogger().removeAllAppenders(); Logger lbLogger = Logger.getLogger(LINKBENCH_LOGGER); lbLogger.setLevel(logLevel); ConsoleAppender console = new ConsoleAppender(fmt, "System.err"); /* If logfile is specified, put full stream in logfile and only * print important messages to terminal */ if (logFile != null) { console.setThreshold(Level.WARN); // Only print salient messages lbLogger.addAppender(new FileAppender(fmt, logFile)); } lbLogger.addAppender(console); } /** * Look up key in props, failing if not present * @param props * @param key * @return * @throws LinkBenchConfigError thrown if key not present */ public static String getPropertyRequired(Properties props, String key) throws LinkBenchConfigError { String v = props.getProperty(key); if (v == null) { throw new LinkBenchConfigError("Expected configuration key " + key + " to be defined"); } return v; } public static int getInt(Properties props, String key) throws LinkBenchConfigError { return getInt(props, key, null); } /** * Retrieve a config key and convert to integer * @param props * @param key * @return a non-null string value * @throws LinkBenchConfigError if not present or not integer */ public static int getInt(Properties props, String key, Integer defaultVal) throws LinkBenchConfigError { if (defaultVal != null && !props.containsKey(key)) { return defaultVal; } String v = getPropertyRequired(props, key); try { return Integer.parseInt(v); } catch (NumberFormatException e) { throw new LinkBenchConfigError("Expected configuration key " + key + " to be integer, but was '" + v + "'"); } } public static long getLong(Properties props, String key) throws LinkBenchConfigError { return getLong(props, key, null); } /** * Retrieve a config key and convert to long integer * @param props * @param key * @param defaultVal default value if key not present * @return * @throws LinkBenchConfigError if not present or not integer */ public static long getLong(Properties props, String key, Long defaultVal) throws LinkBenchConfigError { if (defaultVal != null && !props.containsKey(key)) { return defaultVal; } String v = getPropertyRequired(props, key); try { return Long.parseLong(v); } catch (NumberFormatException e) { throw new LinkBenchConfigError("Expected configuration key " + key + " to be long integer, but was '" + v + "'"); } } public static double getDouble(Properties props, String key) throws LinkBenchConfigError { return getDouble(props, key, null); } /** * Retrieve a config key and convert to double * @param props * @param key * @param defaultVal default value if key not present * @return * @throws LinkBenchConfigError if not present or not double */ public static double getDouble(Properties props, String key, Double defaultVal) throws LinkBenchConfigError { if (defaultVal != null && !props.containsKey(key)) { return defaultVal; } String v = getPropertyRequired(props, key); try { return Double.parseDouble(v); } catch (NumberFormatException e) { throw new LinkBenchConfigError("Expected configuration key " + key + " to be double, but was '" + v + "'"); } } /** * Retrieve a config key and convert to boolean. * Valid boolean strings are "true" or "false", case insensitive * @param props * @param key * @return * @throws LinkBenchConfigError if not present or not boolean */ public static boolean getBool(Properties props, String key) throws LinkBenchConfigError { return getBool(props, key, null); } public static boolean getBool(Properties props, String key, Boolean defaultVal) throws LinkBenchConfigError { if (defaultVal != null && !props.containsKey(key)) { return defaultVal; } String v = getPropertyRequired(props, key).trim().toLowerCase(); // Parse manually since parseBoolean accepts many things as "false" if (v.equals("true")) { return true; } else if (v.equals("false")) { return false; } else { throw new LinkBenchConfigError("Expected configuration key " + key + " to be true or false, but was '" + v + "'"); } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/GraphStore.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.List; /** * An abstract class for storing both nodes and edges * @author tarmstrong */ public abstract class GraphStore extends LinkStore implements NodeStore { /** Provide generic implementation */ public long[] bulkAddNodes(String dbid, List nodes) throws Exception { long ids[] = new long[nodes.size()]; int i = 0; for (Node node: nodes) { long id = addNode(dbid, node); ids[i++] = id; } return ids; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/InvertibleShuffler.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Random; /** * Shuffler designed to make computing permutation and inverse easy */ public class InvertibleShuffler { private final long[] params; private final int shuffleGroups; long n; long nRoundedUp; // n rounded up to next multiple of shuffleGroups long nRoundedDown; // n rounded down to next multiple of shuffleGroups int minGroupSize; public InvertibleShuffler(long seed, int shuffleGroups, long n) { this(new Random(seed), shuffleGroups, n); } public InvertibleShuffler(Random rng, int shuffleGroups, long n) { if (shuffleGroups > n) { // Can't have more shuffle groups than items shuffleGroups = (int)n; } this.shuffleGroups = shuffleGroups; this.n = n; this.params = new long[shuffleGroups]; this.minGroupSize = (int)n / shuffleGroups; for (int i = 0; i < shuffleGroups; i++) { // Positive long params[i] = Math.abs(rng.nextInt(minGroupSize)); } this.nRoundedDown = (n / shuffleGroups) * shuffleGroups; this.nRoundedUp = n == nRoundedDown ? n : nRoundedDown + shuffleGroups; } public long permute(long i) { return permute(i, false); } public long invertPermute(long i) { return permute(i, true); } public long permute(long i, boolean inverse) { if (i < 0 || i >= n) { throw new IllegalArgumentException("Bad index to permute: " + i + ": out of range [0:" + (n - 1) + "]"); } // Number of the group int group = (int) (i % shuffleGroups); // Whether this is a big or small group boolean bigGroup = group < n % shuffleGroups; // Calculate the (positive) rotation long rotate = params[group]; if (inverse) { // Reverse the rotation if (bigGroup) { rotate = minGroupSize + 1 - rotate; } else { rotate = minGroupSize - rotate; } assert(rotate >= 0); } long j = (i + shuffleGroups * rotate); long result; if (j < n) { result = j; } else { // Depending on the group there might be different numbers of // ids in the ring if (bigGroup) { result = j % nRoundedUp; } else { result = j % nRoundedDown; } if (result >= n) { result = group; } } assert(result % shuffleGroups == group); return result; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/Link.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Arrays; public class Link { public Link(long id1, long link_type, long id2, byte visibility, byte[] data, int version, long time) { this.id1 = id1; this.link_type = link_type; this.id2 = id2; this.visibility = visibility; this.data = data; this.version = version; this.time = time; } Link() { link_type = LinkStore.DEFAULT_LINK_TYPE; visibility = LinkStore.VISIBILITY_DEFAULT; } public boolean equals(Object other) { if (other instanceof Link) { Link o = (Link) other; return id1 == o.id1 && id2 == o.id2 && link_type == o.link_type && visibility == o.visibility && version == o.version && time == o.time && Arrays.equals(data, o.data); } else { return false; } } public String toString() { return String.format("Link(id1=%d, id2=%d, link_type=%d," + "visibility=%d, version=%d," + "time=%d, data=%s", id1, id2, link_type, visibility, version, time, data.toString()); } /** * Clone an existing link * @param l */ public Link clone() { Link l = new Link(); l.id1 = this.id1; l.link_type = this.link_type; l.id2 = this.id2; l.visibility = this.visibility; l.data = this.data.clone(); l.version = this.version; l.time = this.time; return l; } /** The node id of the source of directed edge */ public long id1; /** The node id of the target of directed edge */ public long id2; /** Type of link */ public long link_type; /** Visibility mode */ public byte visibility; /** Version of link */ public int version; /** time is the sort key for links. Often it contains a timestamp, but it can be used as a arbitrary user-defined sort key. */ public long time; /** Arbitrary payload data */ public byte[] data; } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkBenchConfigError.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; public class LinkBenchConfigError extends RuntimeException { private static final long serialVersionUID = 1L; public LinkBenchConfigError(String msg) { super(msg); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkBenchDriver.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintStream; import java.nio.ByteBuffer; import java.security.NoSuchAlgorithmException; import java.security.SecureRandom; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import java.util.Properties; import java.util.Random; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.atomic.AtomicLong; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.GnuParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.log4j.Logger; import com.facebook.LinkBench.LinkBenchLoad.LoadChunk; import com.facebook.LinkBench.LinkBenchLoad.LoadProgress; import com.facebook.LinkBench.LinkBenchRequest.RequestProgress; import com.facebook.LinkBench.stats.LatencyStats; import com.facebook.LinkBench.stats.SampledStats; import com.facebook.LinkBench.util.ClassLoadUtil; /* LinkBenchDriver class. First loads data using multi-threaded LinkBenchLoad class. Then does read and write requests of various types (addlink, deletelink, updatelink, getlink, countlinks, getlinklist) using multi-threaded LinkBenchRequest class. Config options are taken from config file passed as argument. */ public class LinkBenchDriver { public static final int EXIT_BADARGS = 1; public static final int EXIT_BADCONFIG = 2; /* Command line arguments */ private static String configFile = null; private static String workloadConfigFile = null; private static Properties cmdLineProps = null; private static String logFile = null; /** File for final statistics */ private static PrintStream csvStatsFile = null; /** File for output of incremental csv data */ private static PrintStream csvStreamFile = null; private static boolean doLoad = false; private static boolean doRequest = false; private Properties props; private final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); LinkBenchDriver(String configfile, Properties overrideProps, String logFile) throws java.io.FileNotFoundException, IOException, LinkBenchConfigError { // which link store to use props = new Properties(); props.load(new FileInputStream(configfile)); for (String key: overrideProps.stringPropertyNames()) { props.setProperty(key, overrideProps.getProperty(key)); } loadWorkloadProps(); ConfigUtil.setupLogging(props, logFile); logger.info("Config file: " + configfile); logger.info("Workload config file: " + workloadConfigFile); } /** * Load properties from auxilliary workload properties file if provided. * Properties from workload properties file do not override existing * properties * @throws IOException * @throws FileNotFoundException */ private void loadWorkloadProps() throws IOException, FileNotFoundException { if (props.containsKey(Config.WORKLOAD_CONFIG_FILE)) { workloadConfigFile = props.getProperty(Config.WORKLOAD_CONFIG_FILE); if (!new File(workloadConfigFile).isAbsolute()) { String linkBenchHome = ConfigUtil.findLinkBenchHome(); if (linkBenchHome == null) { throw new RuntimeException("Data file config property " + Config.WORKLOAD_CONFIG_FILE + " was specified using a relative path, but linkbench home" + " directory was not specified through environment var " + ConfigUtil.linkbenchHomeEnvVar); } else { workloadConfigFile = linkBenchHome + File.separator + workloadConfigFile; } } Properties workloadProps = new Properties(); workloadProps.load(new FileInputStream(workloadConfigFile)); // Add workload properties, but allow other values to override for (String key: workloadProps.stringPropertyNames()) { if (props.getProperty(key) == null) { props.setProperty(key, workloadProps.getProperty(key)); } } } } private static class Stores { final LinkStore linkStore; final NodeStore nodeStore; public Stores(LinkStore linkStore, NodeStore nodeStore) { super(); this.linkStore = linkStore; this.nodeStore = nodeStore; } } // generate instances of LinkStore and NodeStore private Stores initStores() throws Exception { LinkStore linkStore = createLinkStore(); NodeStore nodeStore = createNodeStore(linkStore); return new Stores(linkStore, nodeStore); } private LinkStore createLinkStore() throws Exception, IOException { // The property "linkstore" defines the class name that will be used to // store data in a database. The folowing class names are pre-packaged // for easy access: // LinkStoreMysql : run benchmark on mySQL // LinkStoreHBaseGeneralAtomicityTesting : atomicity testing on HBase. String linkStoreClassName = ConfigUtil.getPropertyRequired(props, Config.LINKSTORE_CLASS); logger.debug("Using LinkStore implementation: " + linkStoreClassName); LinkStore linkStore; try { linkStore = ClassLoadUtil.newInstance(linkStoreClassName, LinkStore.class); } catch (ClassNotFoundException nfe) { throw new IOException("Cound not find class for " + linkStoreClassName); } return linkStore; } /** * @param linkStore a LinkStore instance to be reused if it turns out * that linkStore and nodeStore classes are same * @return * @throws Exception * @throws IOException */ private NodeStore createNodeStore(LinkStore linkStore) throws Exception, IOException { String nodeStoreClassName = props.getProperty(Config.NODESTORE_CLASS); if (nodeStoreClassName == null) { logger.debug("No NodeStore implementation provided"); } else { logger.debug("Using NodeStore implementation: " + nodeStoreClassName); } if (linkStore != null && linkStore.getClass().getName().equals( nodeStoreClassName)) { // Same class, reuse object if (!NodeStore.class.isAssignableFrom(linkStore.getClass())) { throw new Exception("Specified NodeStore class " + nodeStoreClassName + " is not a subclass of NodeStore"); } return (NodeStore)linkStore; } else { NodeStore nodeStore; try { nodeStore = ClassLoadUtil.newInstance(nodeStoreClassName, NodeStore.class); return nodeStore; } catch (java.lang.ClassNotFoundException nfe) { throw new IOException("Cound not find class for " + nodeStoreClassName); } } } void load() throws IOException, InterruptedException, Throwable { if (!doLoad) { logger.info("Skipping load data per the cmdline arg"); return; } // load data int nLinkLoaders = ConfigUtil.getInt(props, Config.NUM_LOADERS); boolean bulkLoad = true; BlockingQueue chunk_q = new LinkedBlockingQueue(); // max id1 to generate long maxid1 = ConfigUtil.getLong(props, Config.MAX_ID); // id1 at which to start long startid1 = ConfigUtil.getLong(props, Config.MIN_ID); // Create loaders logger.info("Starting loaders " + nLinkLoaders); logger.debug("Bulk Load setting: " + bulkLoad); Random masterRandom = createMasterRNG(props, Config.LOAD_RANDOM_SEED); boolean genNodes = ConfigUtil.getBool(props, Config.GENERATE_NODES); int nTotalLoaders = genNodes ? nLinkLoaders + 1 : nLinkLoaders; LatencyStats latencyStats = new LatencyStats(nTotalLoaders); List loaders = new ArrayList(nTotalLoaders); LoadProgress loadTracker = LoadProgress.create(logger, props); for (int i = 0; i < nLinkLoaders; i++) { LinkStore linkStore = createLinkStore(); bulkLoad = bulkLoad && linkStore.bulkLoadBatchSize() > 0; LinkBenchLoad l = new LinkBenchLoad(linkStore, props, latencyStats, csvStreamFile, i, maxid1 == startid1 + 1, chunk_q, loadTracker); loaders.add(l); } if (genNodes) { logger.info("Will generate graph nodes during loading"); int loaderId = nTotalLoaders - 1; NodeStore nodeStore = createNodeStore(null); Random rng = new Random(masterRandom.nextLong()); loaders.add(new NodeLoader(props, logger, nodeStore, rng, latencyStats, csvStreamFile, loaderId)); } enqueueLoadWork(chunk_q, startid1, maxid1, nLinkLoaders, new Random(masterRandom.nextLong())); // run loaders loadTracker.startTimer(); long loadTime = concurrentExec(loaders); long expectedNodes = maxid1 - startid1; long actualLinks = 0; long actualNodes = 0; for (final Runnable l:loaders) { if (l instanceof LinkBenchLoad) { actualLinks += ((LinkBenchLoad)l).getLinksLoaded(); } else { assert(l instanceof NodeLoader); actualNodes += ((NodeLoader)l).getNodesLoaded(); } } latencyStats.displayLatencyStats(); if (csvStatsFile != null) { latencyStats.printCSVStats(csvStatsFile, true); } double loadTime_s = (loadTime/1000.0); logger.info(String.format("LOAD PHASE COMPLETED. " + " Loaded %d nodes (Expected %d)." + " Loaded %d links (%.2f links per node). " + " Took %.1f seconds. Links/second = %d", actualNodes, expectedNodes, actualLinks, actualLinks / (double) actualNodes, loadTime_s, (long) Math.round(actualLinks / loadTime_s))); } /** * Create a new random number generated, optionally seeded to a known * value from the config file. If seed value not provided, a seed * is chosen. In either case the seed is logged for later reproducibility. * @param props * @param configKey config key for the seed value * @return */ private Random createMasterRNG(Properties props, String configKey) { long seed; if (props.containsKey(configKey)) { seed = ConfigUtil.getLong(props, configKey); logger.info("Using configured random seed " + configKey + "=" + seed); } else { seed = System.nanoTime() ^ (long)configKey.hashCode(); logger.info("Using random seed " + seed + " since " + configKey + " not specified"); } SecureRandom masterRandom; try { masterRandom = SecureRandom.getInstance("SHA1PRNG"); } catch (NoSuchAlgorithmException e) { logger.warn("SHA1PRNG not available, defaulting to default SecureRandom" + " implementation"); masterRandom = new SecureRandom(); } masterRandom.setSeed(ByteBuffer.allocate(8).putLong(seed).array()); // Can be used to check that rng is behaving as expected logger.debug("First number generated by master " + configKey + ": " + masterRandom.nextLong()); return masterRandom; } private void enqueueLoadWork(BlockingQueue chunk_q, long startid1, long maxid1, int nloaders, Random rng) { // Enqueue work chunks. Do it in reverse order as a heuristic to improve // load balancing, since queue is FIFO and later chunks tend to be larger int chunkSize = ConfigUtil.getInt(props, Config.LOADER_CHUNK_SIZE, 2048); long chunk_num = 0; ArrayList stack = new ArrayList(); for (long id1 = startid1; id1 < maxid1; id1 += chunkSize) { stack.add(new LoadChunk(chunk_num, id1, Math.min(id1 + chunkSize, maxid1), rng)); chunk_num++; } for (int i = stack.size() - 1; i >= 0; i--) { chunk_q.add(stack.get(i)); } for (int i = 0; i < nloaders; i++) { // Add a shutdown signal for each loader chunk_q.add(LoadChunk.SHUTDOWN); } } void sendrequests() throws IOException, InterruptedException, Throwable { if (!doRequest) { logger.info("Skipping request phase per the cmdline arg"); return; } // config info for requests int nrequesters = ConfigUtil.getInt(props, Config.NUM_REQUESTERS); if (nrequesters == 0) { logger.info("NO REQUEST PHASE CONFIGURED. "); return; } LatencyStats latencyStats = new LatencyStats(nrequesters); List requesters = new LinkedList(); RequestProgress progress = LinkBenchRequest.createProgress(logger, props); Random masterRandom = createMasterRNG(props, Config.REQUEST_RANDOM_SEED); // create requesters for (int i = 0; i < nrequesters; i++) { Stores stores = initStores(); LinkBenchRequest l = new LinkBenchRequest(stores.linkStore, stores.nodeStore, props, latencyStats, csvStreamFile, progress, new Random(masterRandom.nextLong()), i, nrequesters); requesters.add(l); } progress.startTimer(); // run requesters concurrentExec(requesters); long finishTime = System.currentTimeMillis(); // Calculate duration accounting for warmup time long benchmarkTime = finishTime - progress.getBenchmarkStartTime(); long requestsdone = 0; int abortedRequesters = 0; // wait for requesters for (LinkBenchRequest requester: requesters) { requestsdone += requester.getRequestsDone(); if (requester.didAbort()) { abortedRequesters++; } } latencyStats.displayLatencyStats(); if (csvStatsFile != null) { latencyStats.printCSVStats(csvStatsFile, true); } logger.info("REQUEST PHASE COMPLETED. " + requestsdone + " requests done in " + (benchmarkTime/1000) + " seconds." + " Requests/second = " + (1000*requestsdone)/benchmarkTime); if (abortedRequesters > 0) { logger.error(String.format("Benchmark did not complete cleanly: %d/%d " + "request threads aborted. See error log entries for details.", abortedRequesters, nrequesters)); } } /** * Start all runnables at the same time. Then block till all * tasks are completed. Returns the elapsed time (in millisec) * since the start of the first task to the completion of all tasks. */ static long concurrentExec(final List tasks) throws Throwable { final CountDownLatch startSignal = new CountDownLatch(tasks.size()); final CountDownLatch doneSignal = new CountDownLatch(tasks.size()); final AtomicLong startTime = new AtomicLong(0); for (final Runnable task : tasks) { new Thread(new Runnable() { @Override public void run() { /* * Run a task. If an uncaught exception occurs, bail * out of the benchmark immediately, since any results * of the benchmark will no longer be valid anyway */ try { startSignal.countDown(); startSignal.await(); long now = System.currentTimeMillis(); startTime.compareAndSet(0, now); task.run(); } catch (Throwable e) { Logger threadLog = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); threadLog.error("Unrecoverable exception in worker thread:", e); Runtime.getRuntime().halt(1); } doneSignal.countDown(); } }).start(); } doneSignal.await(); // wait for all threads to finish long endTime = System.currentTimeMillis(); return endTime - startTime.get(); } void drive() throws IOException, InterruptedException, Throwable { load(); sendrequests(); } public static void main(String[] args) throws IOException, InterruptedException, Throwable { processArgs(args); LinkBenchDriver d = new LinkBenchDriver(configFile, cmdLineProps, logFile); try { d.drive(); } catch (LinkBenchConfigError e) { System.err.println("Configuration error: " + e.toString()); System.exit(EXIT_BADCONFIG); } } private static void printUsage(Options options) { //PrintWriter writer = new PrintWriter(System.err); HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("linkbench", options, true); } private static Options initializeOptions() { Options options = new Options(); Option config = new Option("c", true, "Linkbench config file"); config.setArgName("file"); options.addOption(config); Option log = new Option("L", true, "Log to this file"); log.setArgName("file"); options.addOption(log); Option csvStats = new Option("csvstats", "csvstats", true, "CSV stats output"); csvStats.setArgName("file"); options.addOption(csvStats); Option csvStream = new Option("csvstream", "csvstream", true, "CSV streaming stats output"); csvStream.setArgName("file"); options.addOption(csvStream); options.addOption("l", false, "Execute loading stage of benchmark"); options.addOption("r", false, "Execute request stage of benchmark"); // Java-style properties to override config file // -Dkey=value Option property = new Option("D", "Override a config setting"); property.setArgs(2); property.setArgName("property=value"); property.setValueSeparator('='); options.addOption(property); return options; } /** * Process command line arguments and set static variables * exits program if invalid arguments provided * @param options * @param args * @throws ParseException */ private static void processArgs(String[] args) throws ParseException { Options options = initializeOptions(); CommandLine cmd = null; try { CommandLineParser parser = new GnuParser(); cmd = parser.parse( options, args); } catch (ParseException ex) { // Use Apache CLI-provided messages System.err.println(ex.getMessage()); printUsage(options); System.exit(EXIT_BADARGS); } /* * Apache CLI validates arguments, so can now assume * all required options are present, etc */ if (cmd.getArgs().length > 0) { System.err.print("Invalid trailing arguments:"); for (String arg: cmd.getArgs()) { System.err.print(' '); System.err.print(arg); } System.err.println(); printUsage(options); System.exit(EXIT_BADARGS); } // Set static option variables doLoad = cmd.hasOption('l'); doRequest = cmd.hasOption('r'); logFile = cmd.getOptionValue('L'); // May be null configFile = cmd.getOptionValue('c'); if (configFile == null) { // Try to find in usual location String linkBenchHome = ConfigUtil.findLinkBenchHome(); if (linkBenchHome != null) { configFile = linkBenchHome + File.separator + "config" + File.separator + "LinkConfigMysql.properties"; } else { System.err.println("Config file not specified through command " + "line argument and " + ConfigUtil.linkbenchHomeEnvVar + " environment variable not set to valid directory"); printUsage(options); System.exit(EXIT_BADARGS); } } String csvStatsFileName = cmd.getOptionValue("csvstats"); // May be null if (csvStatsFileName != null) { try { csvStatsFile = new PrintStream(new FileOutputStream(csvStatsFileName)); } catch (FileNotFoundException e) { System.err.println("Could not open file " + csvStatsFileName + " for writing"); printUsage(options); System.exit(EXIT_BADARGS); } } String csvStreamFileName = cmd.getOptionValue("csvstream"); // May be null if (csvStreamFileName != null) { try { csvStreamFile = new PrintStream( new FileOutputStream(csvStreamFileName)); // File is written to by multiple threads, first write header SampledStats.writeCSVHeader(csvStreamFile); } catch (FileNotFoundException e) { System.err.println("Could not open file " + csvStreamFileName + " for writing"); printUsage(options); System.exit(EXIT_BADARGS); } } cmdLineProps = cmd.getOptionProperties("D"); if (!(doLoad || doRequest)) { System.err.println("Did not select benchmark mode"); printUsage(options); System.exit(EXIT_BADARGS); } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkBenchDriverMR.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.DataInput; import java.io.DataOutput; import java.io.FileInputStream; import java.io.IOException; import java.lang.reflect.Constructor; import java.util.Iterator; import java.util.LinkedList; import java.util.Properties; import java.util.Random; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.mapred.FileInputFormat; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.InputFormat; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; import com.facebook.LinkBench.LinkBenchLoad.LoadProgress; import com.facebook.LinkBench.LinkBenchRequest.RequestProgress; import com.facebook.LinkBench.stats.LatencyStats; /** * LinkBenchDriverMR class. * First loads data using map-reduced LinkBenchLoad class. * Then does read and write requests of various types (addlink, deletelink, * updatelink, getlink, countlinks, getlinklist) using map-reduced * LinkBenchRequest class. * Config options are taken from config file passed as argument. */ public class LinkBenchDriverMR extends Configured implements Tool { public static final int LOAD = 1; public static final int REQUEST = 2; private static Path TMP_DIR = new Path("TMP_Link_Bench"); private static boolean REPORT_PROGRESS = false; private static boolean USE_INPUT_FILES = false; //use generate input by default private static final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); static enum Counters { LINK_LOADED, REQUEST_DONE } private static Properties props; private static String store; private static final Class[] EMPTY_ARRAY = new Class[]{}; /** * generate an instance of LinkStore * @param currentphase LOAD or REQUEST * @param mapperid id of the mapper 0, 1, ... */ private static LinkStore initStore(Phase currentphase, int mapperid) throws IOException { LinkStore newstore = null; if (store == null) { store = ConfigUtil.getPropertyRequired(props, Config.LINKSTORE_CLASS); logger.info("Using store class: " + store); } // The property "store" defines the class name that will be used to // store data in a database. The folowing class names are pre-packaged // for easy access: // LinkStoreMysql : run benchmark on mySQL // LinkStoreHBase : run benchmark on HBase // LinkStoreHBaseGeneralAtomicityTesting : atomicity testing on HBase. // LinkStoreTaoAtomicityTesting: atomicity testing for Facebook's HBase // Class clazz = null; try { clazz = getClassByName(store); } catch (java.lang.ClassNotFoundException nfe) { throw new IOException("Cound not find class for " + store); } newstore = (LinkStore)newInstance(clazz); if (clazz == null) { throw new IOException("Unknown data store " + store); } return newstore; } /** * InputSplit for generated inputs */ private class LinkBenchInputSplit implements InputSplit { private int id; // id of mapper private int num; // total number of mappers LinkBenchInputSplit() {} public LinkBenchInputSplit(int i, int n) { this.id = i; this.num = n; } public int getID() {return this.id;} public int getNum() {return this.num;} public long getLength() {return 1;} public String[] getLocations() throws IOException { return new String[]{}; } public void readFields(DataInput in) throws IOException { this.id = in.readInt(); this.num = in.readInt(); } public void write(DataOutput out) throws IOException { out.writeInt(this.id); out.writeInt(this.num); } } /** * RecordReader for generated inputs */ private class LinkBenchRecordReader implements RecordReader { private int id; private int num; private boolean done; public LinkBenchRecordReader(LinkBenchInputSplit split) { this.id = split.getID(); this.num = split.getNum(); this.done = false; } public IntWritable createKey() {return new IntWritable();} public IntWritable createValue() {return new IntWritable();} public void close() throws IOException { } // one loader per split public float getProgress() { return 0.5f;} // one loader per split public long getPos() {return 1;} public boolean next(IntWritable key, IntWritable value) throws IOException { if (this.done) { return false; } else { key.set(this.id); value.set(this.num); this.done = true; } return true; } } /** * InputFormat for generated inputs */ private class LinkBenchInputFormat implements InputFormat { public InputSplit[] getSplits(JobConf conf, int numsplits) { InputSplit[] splits = new InputSplit[numsplits]; for (int i = 0; i < numsplits; ++i) { splits[i] = (InputSplit) new LinkBenchInputSplit(i, numsplits); } return splits; } public RecordReader getRecordReader( InputSplit split, JobConf conf, Reporter reporter) { return (RecordReader)(new LinkBenchRecordReader((LinkBenchInputSplit)split)); } public void validateInput(JobConf conf) {} // no need to validate } /** * create JobConf for map reduce job * @param currentphase LOAD or REQUEST * @param nmappers number of mappers (loader or requester) */ private JobConf createJobConf(int currentphase, int nmappers) { final JobConf jobconf = new JobConf(getConf(), getClass()); jobconf.setJobName("LinkBench MapReduce Driver"); if (USE_INPUT_FILES) { jobconf.setInputFormat(SequenceFileInputFormat.class); } else { jobconf.setInputFormat(LinkBenchInputFormat.class); } jobconf.setOutputKeyClass(IntWritable.class); jobconf.setOutputValueClass(LongWritable.class); jobconf.setOutputFormat(SequenceFileOutputFormat.class); if(currentphase == LOAD) { jobconf.setMapperClass(LoadMapper.class); } else { //REQUEST jobconf.setMapperClass(RequestMapper.class); } jobconf.setNumMapTasks(nmappers); jobconf.setReducerClass(LoadRequestReducer.class); jobconf.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. jobconf.setSpeculativeExecution(false); return jobconf; } /** * setup input files for map reduce job * @param jobconf configuration of the map reduce job * @param nmappers number of mappers (loader or requester) */ private static FileSystem setupInputFiles(JobConf jobconf, int nmappers) throws IOException, InterruptedException { //setup input/output directories final Path indir = new Path(TMP_DIR, "in"); final Path outdir = new Path(TMP_DIR, "out"); FileInputFormat.setInputPaths(jobconf, indir); FileOutputFormat.setOutputPath(jobconf, outdir); final FileSystem fs = FileSystem.get(jobconf); if (fs.exists(TMP_DIR)) { throw new IOException("Tmp directory " + fs.makeQualified(TMP_DIR) + " already exists. Please remove it first."); } if (!fs.mkdirs(indir)) { throw new IOException("Cannot create input directory " + indir); } //generate an input file for each map task if (USE_INPUT_FILES) { for(int i=0; i < nmappers; ++i) { final Path file = new Path(indir, "part"+i); final IntWritable mapperid = new IntWritable(i); final IntWritable nummappers = new IntWritable(nmappers); final SequenceFile.Writer writer = SequenceFile.createWriter( fs, jobconf, file, IntWritable.class, IntWritable.class, CompressionType.NONE); try { writer.append(mapperid, nummappers); } finally { writer.close(); } logger.info("Wrote input for Map #"+i); } } return fs; } /** * read output from the map reduce job * @param fs the DFS FileSystem * @param jobconf configuration of the map reduce job */ public static long readOutput(FileSystem fs, JobConf jobconf) throws IOException, InterruptedException { //read outputs final Path outdir = new Path(TMP_DIR, "out"); Path infile = new Path(outdir, "reduce-out"); IntWritable nworkers = new IntWritable(); LongWritable result = new LongWritable(); long output = 0; SequenceFile.Reader reader = new SequenceFile.Reader(fs, infile, jobconf); try { reader.next(nworkers, result); output = result.get(); } finally { reader.close(); } return output; } /** * Mapper for LOAD phase * Load data to the store * Output the number of loaded links */ public static class LoadMapper extends MapReduceBase implements Mapper { public void map(IntWritable loaderid, IntWritable nloaders, OutputCollector output, Reporter reporter) throws IOException { ConfigUtil.setupLogging(props, null); LinkStore store = initStore(Phase.LOAD, loaderid.get()); LatencyStats latencyStats = new LatencyStats(nloaders.get()); long maxid1 = ConfigUtil.getLong(props, Config.MAX_ID); long startid1 = ConfigUtil.getLong(props, Config.MIN_ID); LoadProgress prog_tracker = LoadProgress.create( Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER), props); LinkBenchLoad loader = new LinkBenchLoad(store, props, latencyStats, null, loaderid.get(), maxid1 == startid1 + 1, nloaders.get(), prog_tracker, new Random()); LinkedList tasks = new LinkedList(); tasks.add(loader); long linksloaded = 0; try { LinkBenchDriver.concurrentExec(tasks); linksloaded = loader.getLinksLoaded(); } catch (java.lang.Throwable t) { throw new IOException(t); } output.collect(new IntWritable(nloaders.get()), new LongWritable(linksloaded)); if (REPORT_PROGRESS) { reporter.incrCounter(Counters.LINK_LOADED, linksloaded); } } } /** * Mapper for REQUEST phase * Send requests * Output the number of finished requests */ public static class RequestMapper extends MapReduceBase implements Mapper { public void map(IntWritable requesterid, IntWritable nrequesters, OutputCollector output, Reporter reporter) throws IOException { ConfigUtil.setupLogging(props, null); LinkStore store = initStore(Phase.REQUEST, requesterid.get()); LatencyStats latencyStats = new LatencyStats(nrequesters.get()); RequestProgress progress = LinkBenchRequest.createProgress(logger, props); progress.startTimer(); // TODO: Don't support NodeStore yet final LinkBenchRequest requester = new LinkBenchRequest(store, null, props, latencyStats, null, progress, new Random(), requesterid.get(), nrequesters.get()); // Wrap in runnable to handle error Thread t = new Thread(new Runnable() { public void run() { try { requester.run(); } catch (Throwable t) { logger.error("Uncaught error in requester:", t); } } }); t.start(); long requestdone = 0; try { t.join(); requestdone = requester.getRequestsDone(); } catch (InterruptedException e) { } output.collect(new IntWritable(nrequesters.get()), new LongWritable(requestdone)); if (REPORT_PROGRESS) { reporter.incrCounter(Counters.REQUEST_DONE, requestdone); } } } /** * Reducer for both LOAD and REQUEST * Get the sum of "loaded links" or "finished requests" */ public static class LoadRequestReducer extends MapReduceBase implements Reducer { private long sum = 0; private int nummappers = 0; private JobConf conf; /** Store job configuration. */ @Override public void configure(JobConf job) { conf = job; } public void reduce(IntWritable nmappers, Iterator values, OutputCollector output, Reporter reporter) throws IOException { nummappers = nmappers.get(); while(values.hasNext()) { sum += values.next().get(); } output.collect(new IntWritable(nmappers.get()), new LongWritable(sum)); } /** * Reduce task done, write output to a file. */ @Override public void close() throws IOException { //write output to a file Path outDir = new Path(TMP_DIR, "out"); Path outFile = new Path(outDir, "reduce-out"); FileSystem fileSys = FileSystem.get(conf); SequenceFile.Writer writer = SequenceFile.createWriter(fileSys, conf, outFile, IntWritable.class, LongWritable.class, CompressionType.NONE); writer.append(new IntWritable(nummappers), new LongWritable(sum)); writer.close(); } } /** * main route of the LOAD phase */ private void load() throws IOException, InterruptedException { boolean loaddata = (!props.containsKey(Config.LOAD_DATA)) || ConfigUtil.getBool(props, Config.LOAD_DATA); if (!loaddata) { logger.info("Skipping load data per the config"); return; } int nloaders = ConfigUtil.getInt(props, Config.NUM_LOADERS); final JobConf jobconf = createJobConf(LOAD, nloaders); FileSystem fs = setupInputFiles(jobconf, nloaders); try { logger.info("Starting loaders " + nloaders); final long starttime = System.currentTimeMillis(); JobClient.runJob(jobconf); long loadtime = (System.currentTimeMillis() - starttime); // compute total #links loaded long maxid1 = ConfigUtil.getLong(props, Config.MAX_ID); long startid1 = ConfigUtil.getLong(props, Config.MIN_ID); int nlinks_default = ConfigUtil.getInt(props, Config.NLINKS_DEFAULT); long expectedlinks = (1 + nlinks_default) * (maxid1 - startid1); long actuallinks = readOutput(fs, jobconf); logger.info("LOAD PHASE COMPLETED. Expected to load " + expectedlinks + " links. " + actuallinks + " loaded in " + (loadtime/1000) + " seconds." + "Links/second = " + ((1000*actuallinks)/loadtime)); } finally { fs.delete(TMP_DIR, true); } } /** * main route of the REQUEST phase */ private void sendrequests() throws IOException, InterruptedException { // config info for requests int nrequesters = ConfigUtil.getInt(props, Config.NUM_REQUESTERS); final JobConf jobconf = createJobConf(REQUEST, nrequesters); FileSystem fs = setupInputFiles(jobconf, nrequesters); try { logger.info("Starting requesters " + nrequesters); final long starttime = System.currentTimeMillis(); JobClient.runJob(jobconf); long endtime = System.currentTimeMillis(); // request time in millis long requesttime = (endtime - starttime); long requestsdone = readOutput(fs, jobconf); logger.info("REQUEST PHASE COMPLETED. " + requestsdone + " requests done in " + (requesttime/1000) + " seconds." + "Requests/second = " + (1000*requestsdone)/requesttime); } finally { fs.delete(TMP_DIR, true); } } /** * read in configuration and invoke LOAD and REQUEST */ @Override public int run(String[] args) throws Exception { if (args.length < 1) { System.err.println("Args : LinkBenchDriver configfile"); ToolRunner.printGenericCommandUsage(System.err); return -1; } props = new Properties(); props.load(new FileInputStream(args[0])); // get name or temporary directory String tempdirname = props.getProperty(Config.TEMPDIR); if (tempdirname != null) { TMP_DIR = new Path(tempdirname); } // whether report progress through reporter REPORT_PROGRESS = (!props.containsKey(Config.MAPRED_REPORT_PROGRESS)) || ConfigUtil.getBool(props, Config.MAPRED_REPORT_PROGRESS); // whether store mapper input in files USE_INPUT_FILES = (!props.containsKey(Config.MAPRED_USE_INPUT_FILES)) || ConfigUtil.getBool(props, Config.MAPRED_USE_INPUT_FILES); load(); sendrequests(); return 0; } /** * Load a class by name. * @param name the class name. * @return the class object. * @throws ClassNotFoundException if the class is not found. */ public static Class getClassByName(String name) throws ClassNotFoundException { ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); return Class.forName(name, true, classLoader); } /** Create an object for the given class and initialize it from conf * * @param theClass class of which an object is created * @param conf Configuration * @return a new object */ public static T newInstance(Class theClass) { T result; try { Constructor meth = theClass.getDeclaredConstructor(EMPTY_ARRAY); meth.setAccessible(true); result = meth.newInstance(); } catch (Exception e) { throw new RuntimeException(e); } return result; } public static void main(String[] args) throws Exception { System.exit(ToolRunner.run(null, new LinkBenchDriverMR(), args)); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkBenchLoad.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.Properties; import java.util.Random; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.BlockingQueue; import java.util.concurrent.atomic.AtomicLong; import org.apache.log4j.Level; import org.apache.log4j.Logger; import com.facebook.LinkBench.distributions.ID2Chooser; import com.facebook.LinkBench.distributions.LogNormalDistribution; import com.facebook.LinkBench.generators.DataGenerator; import com.facebook.LinkBench.stats.LatencyStats; import com.facebook.LinkBench.stats.SampledStats; import com.facebook.LinkBench.util.ClassLoadUtil; /* * Multi-threaded loader for loading graph edges (but not nodes) into * LinkStore. The range from startid1 to maxid1 is chunked up into equal sized * disjoint ranges. These are then enqueued for processing by a number * of loader threads to be loaded in parallel. The #links generated for * an id1 is based on the configured distribution. The # of link types, * and link payload data is also controlled by the configuration file. * The actual counts of #links generated is tracked in nlinks_counts. */ public class LinkBenchLoad implements Runnable { private final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); private long maxid1; // max id1 to generate private long startid1; // id1 at which to start private int loaderID; // ID for this loader private LinkStore store;// store interface (several possible implementations // like mysql, hbase etc) private final LogNormalDistribution linkDataSize; private final DataGenerator linkDataGen; // Generate link data private SampledStats stats; private LatencyStats latencyStats; Level debuglevel; String dbid; private ID2Chooser id2chooser; // Counters for load statistics long sameShuffle; long diffShuffle; long linksloaded; /** * special case for single hot row benchmark. If singleAssoc is set, * then make this method not print any statistics message, all statistics * are collected at a higher layer. */ boolean singleAssoc; private BlockingQueue chunk_q; // Track last time stats were updated in ms private long lastDisplayTime; // How often stats should be reported private final long displayFreq_ms; private LoadProgress prog_tracker; private Properties props; /** * Convenience constructor * @param store2 * @param props * @param latencyStats * @param loaderID * @param nloaders */ public LinkBenchLoad(LinkStore store, Properties props, LatencyStats latencyStats, PrintStream csvStreamOut, int loaderID, boolean singleAssoc, int nloaders, LoadProgress prog_tracker, Random rng) { this(store, props, latencyStats, csvStreamOut, loaderID, singleAssoc, new ArrayBlockingQueue(2), prog_tracker); // Just add a single chunk to the queue chunk_q.add(new LoadChunk(loaderID, startid1, maxid1, rng)); chunk_q.add(LoadChunk.SHUTDOWN); } public LinkBenchLoad(LinkStore linkStore, Properties props, LatencyStats latencyStats, PrintStream csvStreamOut, int loaderID, boolean singleAssoc, BlockingQueue chunk_q, LoadProgress prog_tracker) throws LinkBenchConfigError { /* * Initialize fields from arguments */ this.store = linkStore; this.props = props; this.latencyStats = latencyStats; this.loaderID = loaderID; this.singleAssoc = singleAssoc; this.chunk_q = chunk_q; this.prog_tracker = prog_tracker; /* * Load settings from properties */ maxid1 = ConfigUtil.getLong(props, Config.MAX_ID); startid1 = ConfigUtil.getLong(props, Config.MIN_ID); // math functions may cause problems for id1 = 0. Start at 1. if (startid1 <= 0) { throw new LinkBenchConfigError("startid1 must be >= 1"); } debuglevel = ConfigUtil.getDebugLevel(props); double medianLinkDataSize = ConfigUtil.getDouble(props, Config.LINK_DATASIZE); linkDataSize = new LogNormalDistribution(); linkDataSize.init(0, LinkStore.MAX_LINK_DATA, medianLinkDataSize, Config.LINK_DATASIZE_SIGMA); try { linkDataGen = ClassLoadUtil.newInstance( ConfigUtil.getPropertyRequired(props, Config.LINK_ADD_DATAGEN), DataGenerator.class); linkDataGen.init(props, Config.LINK_ADD_DATAGEN_PREFIX); } catch (ClassNotFoundException ex) { logger.error(ex); throw new LinkBenchConfigError("Error loading data generator class: " + ex.getMessage()); } displayFreq_ms = ConfigUtil.getLong(props, Config.DISPLAY_FREQ) * 1000; int maxsamples = ConfigUtil.getInt(props, Config.MAX_STAT_SAMPLES); dbid = ConfigUtil.getPropertyRequired(props, Config.DBID); /* * Initialize statistics */ linksloaded = 0; sameShuffle = 0; diffShuffle = 0; stats = new SampledStats(loaderID, maxsamples, csvStreamOut); id2chooser = new ID2Chooser(props, startid1, maxid1, 1, 1); } public long getLinksLoaded() { return linksloaded; } @Override public void run() { try { this.store.initialize(props, Phase.LOAD, loaderID); } catch (Exception e) { logger.error("Error while initializing store", e); throw new RuntimeException(e); } int bulkLoadBatchSize = store.bulkLoadBatchSize(); boolean bulkLoad = bulkLoadBatchSize > 0; ArrayList loadBuffer = null; ArrayList countLoadBuffer = null; if (bulkLoad) { loadBuffer = new ArrayList(bulkLoadBatchSize); countLoadBuffer = new ArrayList(bulkLoadBatchSize); } logger.info("Starting loader thread #" + loaderID + " loading links"); lastDisplayTime = System.currentTimeMillis(); while (true) { LoadChunk chunk; try { chunk = chunk_q.take(); //logger.info("chunk end="+chunk.end); } catch (InterruptedException ie) { logger.warn("InterruptedException not expected, try again", ie); continue; } // Shutdown signal is received though special chunk type if (chunk.shutdown) { break; } // Load the link range specified in the chunk processChunk(chunk, bulkLoad, bulkLoadBatchSize, loadBuffer, countLoadBuffer); } if (bulkLoad) { // Load any remaining links or counts loadLinks(loadBuffer); loadCounts(countLoadBuffer); } if (!singleAssoc) { logger.debug(" Same shuffle = " + sameShuffle + " Different shuffle = " + diffShuffle); displayStats(lastDisplayTime, bulkLoad); } store.close(); } private void displayStats(long startTime, boolean bulkLoad) { long endTime = System.currentTimeMillis(); if (bulkLoad) { stats.displayStats(startTime, endTime, Arrays.asList(LinkBenchOp.LOAD_LINKS_BULK, LinkBenchOp.LOAD_COUNTS_BULK, LinkBenchOp.LOAD_LINKS_BULK_NLINKS, LinkBenchOp.LOAD_COUNTS_BULK_NLINKS)); } else { stats.displayStats(startTime, endTime, Arrays.asList(LinkBenchOp.LOAD_LINK)); } } private void processChunk(LoadChunk chunk, boolean bulkLoad, int bulkLoadBatchSize, ArrayList loadBuffer, ArrayList countLoadBuffer) { if (Level.DEBUG.isGreaterOrEqual(debuglevel)) { logger.debug("Loader thread #" + loaderID + " processing " + chunk.toString()); } // Counter for total number of links loaded in chunk; long links_in_chunk = 0; Link link = null; if (!bulkLoad) { // When bulk-loading, need to have multiple link objects at a time // otherwise reuse object link = initLink(); } long prevPercentPrinted = 0; for (long id1 = chunk.start; id1 < chunk.end; id1 += chunk.step) { long added_links= createOutLinks(chunk.rng, link, loadBuffer, countLoadBuffer, id1, singleAssoc, bulkLoad, bulkLoadBatchSize); links_in_chunk += added_links; if (!singleAssoc) { long nloaded = (id1 - chunk.start) / chunk.step; if (bulkLoad) { nloaded -= loadBuffer.size(); } long percent = 100 * nloaded/(chunk.size); if ((percent % 10 == 0) && (percent > prevPercentPrinted)) { logger.debug(chunk.toString() + ": Percent done = " + percent); prevPercentPrinted = percent; } } // Check if stats should be flushed and reset long now = System.currentTimeMillis(); if (lastDisplayTime + displayFreq_ms <= now) { displayStats(lastDisplayTime, bulkLoad); stats.resetSamples(); lastDisplayTime = now; } } // Update progress and maybe print message prog_tracker.update(chunk.size, links_in_chunk); } /** * Create the out links for a given id1 * @param link * @param loadBuffer * @param id1 * @param singleAssoc * @param bulkLoad * @param bulkLoadBatchSize * @return total number of links added */ private long createOutLinks(Random rng, Link link, ArrayList loadBuffer, ArrayList countLoadBuffer, long id1, boolean singleAssoc, boolean bulkLoad, int bulkLoadBatchSize) { Map linkTypeCounts = null; if (bulkLoad) { linkTypeCounts = new HashMap(); } long nlinks_total = 0; for (long link_type: id2chooser.getLinkTypes()) { long nlinks = id2chooser.calcLinkCount(id1, link_type); nlinks_total += nlinks; if (id2chooser.sameShuffle) { sameShuffle++; } else { diffShuffle++; } if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("id1 = " + id1 + " link_type = " + link_type + " nlinks = " + nlinks); } for (long j = 0; j < nlinks; j++) { if (bulkLoad) { // Can't reuse link object link = initLink(); } constructLink(rng, link, id1, link_type, j, singleAssoc); if (bulkLoad) { loadBuffer.add(link); if (loadBuffer.size() >= bulkLoadBatchSize) { loadLinks(loadBuffer); } // Update link counts for this type LinkCount count = linkTypeCounts.get(link.link_type); if (count == null) { count = new LinkCount(id1, link.link_type, link.time, link.version, 1); linkTypeCounts.put(link.link_type, count); } else { count.count++; count.time = Math.max(count.time, link.time); count.version = link.version; } } else { loadLink(link, j, nlinks, singleAssoc); } } } // Maintain the counts separately if (bulkLoad) { for (LinkCount count: linkTypeCounts.values()) { countLoadBuffer.add(count); if (countLoadBuffer.size() >= bulkLoadBatchSize) { loadCounts(countLoadBuffer); } } } return nlinks_total; } private Link initLink() { Link link = new Link(); link.link_type = LinkStore.DEFAULT_LINK_TYPE; link.visibility = LinkStore.VISIBILITY_DEFAULT; link.version = 0; link.data = new byte[0]; link.time = System.currentTimeMillis(); return link; } /** * Helper method to fill in link data * @param link this link is filled in. Should have been initialized with * initLink() earlier * @param outlink_ix the number of this link out of all outlinks from * id1 * @param singleAssoc whether we are in singleAssoc mode */ private void constructLink(Random rng, Link link, long id1, long link_type, long outlink_ix, boolean singleAssoc) { link.id1 = id1; link.link_type = link_type; // Using random number generator for id2 means we won't know // which id2s exist. So link id1 to // maxid1 + id1 + 1 thru maxid1 + id1 + nlinks(id1) UNLESS // config randomid2max is nonzero. if (singleAssoc) { link.id2 = 45; // some constant } else { link.id2 = id2chooser.chooseForLoad(rng, id1, link_type,outlink_ix); int datasize = (int)linkDataSize.choose(rng); link.data = linkDataGen.fill(rng, new byte[datasize]); } if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("id2 chosen is " + link.id2); } // Randomize time so that id2 and timestamp aren't closely correlated link.time = chooseInitialTimestamp(rng); } private long chooseInitialTimestamp(Random rng) { // Choose something from now back to about 50 days return (System.currentTimeMillis() - Integer.MAX_VALUE - 1L) + rng.nextInt(); } /** * Load an individual link into the db. * * If an error occurs during loading, this method will log it, * add stats, and reset the connection. * @param link * @param outlink_ix * @param nlinks * @param singleAssoc */ private void loadLink(Link link, long outlink_ix, long nlinks, boolean singleAssoc) { long timestart = 0; if (!singleAssoc) { timestart = System.nanoTime(); } try { // no inverses for now store.addLink(dbid, link, true); linksloaded++; if (!singleAssoc && outlink_ix == nlinks - 1) { long timetaken = (System.nanoTime() - timestart); // convert to microseconds stats.addStats(LinkBenchOp.LOAD_LINK, timetaken/1000, false); latencyStats.recordLatency(loaderID, LinkBenchOp.LOAD_LINK, timetaken/1000); } } catch (Throwable e){//Catch exception if any long endtime2 = System.nanoTime(); long timetaken2 = (endtime2 - timestart)/1000; logger.error("Error: " + e.getMessage(), e); stats.addStats(LinkBenchOp.LOAD_LINK, timetaken2, true); store.clearErrors(loaderID); } } private void loadLinks(ArrayList loadBuffer) { long timestart = System.nanoTime(); try { // no inverses for now int nlinks = loadBuffer.size(); store.addBulkLinks(dbid, loadBuffer, true); linksloaded += nlinks; loadBuffer.clear(); long timetaken = (System.nanoTime() - timestart); // convert to microseconds stats.addStats(LinkBenchOp.LOAD_LINKS_BULK, timetaken/1000, false); stats.addStats(LinkBenchOp.LOAD_LINKS_BULK_NLINKS, nlinks, false); latencyStats.recordLatency(loaderID, LinkBenchOp.LOAD_LINKS_BULK, timetaken/1000); } catch (Throwable e){//Catch exception if any long endtime2 = System.nanoTime(); long timetaken2 = (endtime2 - timestart)/1000; logger.error("Error: " + e.getMessage(), e); stats.addStats(LinkBenchOp.LOAD_LINKS_BULK, timetaken2, true); store.clearErrors(loaderID); } } private void loadCounts(ArrayList loadBuffer) { long timestart = System.nanoTime(); try { // no inverses for now int ncounts = loadBuffer.size(); store.addBulkCounts(dbid, loadBuffer); loadBuffer.clear(); long timetaken = (System.nanoTime() - timestart); // convert to microseconds stats.addStats(LinkBenchOp.LOAD_COUNTS_BULK, timetaken/1000, false); stats.addStats(LinkBenchOp.LOAD_COUNTS_BULK_NLINKS, ncounts, false); latencyStats.recordLatency(loaderID, LinkBenchOp.LOAD_COUNTS_BULK, timetaken/1000); } catch (Throwable e){//Catch exception if any long endtime2 = System.nanoTime(); long timetaken2 = (endtime2 - timestart)/1000; logger.error("Error: " + e.getMessage(), e); stats.addStats(LinkBenchOp.LOAD_COUNTS_BULK, timetaken2, true); store.clearErrors(loaderID); } } /** * Represents a portion of the id space, starting with * start, going up until end (non-inclusive) with step size * step * */ public static class LoadChunk { public static LoadChunk SHUTDOWN = new LoadChunk(true, 0, 0, 0, 1, null); public LoadChunk(long id, long start, long end, Random rng) { this(false, id, start, end, 1, rng); } public LoadChunk(boolean shutdown, long id, long start, long end, long step, Random rng) { super(); this.shutdown = shutdown; this.id = id; this.start = start; this.end = end; this.step = step; this.size = (end - start) / step; this.rng = rng; } public final boolean shutdown; public final long id; public final long start; public final long end; public final long step; public final long size; public Random rng; public String toString() { if (shutdown) { return "chunk SHUTDOWN"; } String range; if (step == 1) { range = "[" + start + ":" + end + "]"; } else { range = "[" + start + ":" + step + ":" + end + "]"; } return "chunk " + id + range; } } public static class LoadProgress { /** report progress at intervals of progressReportInterval links */ private final long progressReportInterval; public LoadProgress(Logger progressLogger, long id1s_total, long progressReportInterval) { super(); this.progressReportInterval = progressReportInterval; this.progressLogger = progressLogger; this.id1s_total = id1s_total; this.starttime_ms = 0; this.id1s_loaded = new AtomicLong(); this.links_loaded = new AtomicLong(); } public static LoadProgress create(Logger progressLogger, Properties props) { long maxid1 = ConfigUtil.getLong(props, Config.MAX_ID); long startid1 = ConfigUtil.getLong(props, Config.MIN_ID); long nids = maxid1 - startid1; long progressReportInterval = ConfigUtil.getLong(props, Config.LOAD_PROG_INTERVAL, 50000L); return new LoadProgress(progressLogger, nids, progressReportInterval); } private final Logger progressLogger; private final AtomicLong id1s_loaded; // progress private final AtomicLong links_loaded; // progress private final long id1s_total; // goal private long starttime_ms; /** Mark current time as start time for load */ public void startTimer() { starttime_ms = System.currentTimeMillis(); } /** * Update progress * @param id1_incr number of additional id1s loaded since last call * @param links_incr number of links loaded since last call */ public void update(long id1_incr, long links_incr) { long curr_id1s = id1s_loaded.addAndGet(id1_incr); long curr_links = links_loaded.addAndGet(links_incr); long prev_links = curr_links - links_incr; if ((curr_links / progressReportInterval) > (prev_links / progressReportInterval) || curr_id1s == id1s_total) { double percentage = (curr_id1s / (double)id1s_total) * 100.0; // Links per second loaded long now = System.currentTimeMillis(); double link_rate = ((curr_links) / ((double) now - starttime_ms))*1000; double id1_rate = ((curr_id1s) / ((double) now - starttime_ms))*1000; progressLogger.info(String.format( "%d/%d id1s loaded (%.1f%% complete) at %.2f id1s/sec avg. " + "%d links loaded at %.2f links/sec avg.", curr_id1s, id1s_total, percentage, id1_rate, curr_links, link_rate)); } } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkBenchOp.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; // Various operation types for which we want to gather stats public enum LinkBenchOp { ADD_NODE, UPDATE_NODE, DELETE_NODE, GET_NODE, ADD_LINK, DELETE_LINK, UPDATE_LINK, COUNT_LINK, MULTIGET_LINK, GET_LINKS_LIST, LOAD_NODE_BULK, LOAD_LINK, LOAD_LINKS_BULK, LOAD_COUNTS_BULK, // Although the following are not truly operations, we need stats // for them RANGE_SIZE, // how big range scans are LOAD_LINKS_BULK_NLINKS, // how many links inserted in bulk LOAD_COUNTS_BULK_NLINKS, // how many counts inserted in bulk UNKNOWN; public String displayName() { return name(); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkBenchRequest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Properties; import java.util.Random; import java.util.concurrent.atomic.AtomicLong; import org.apache.log4j.Level; import org.apache.log4j.Logger; import com.facebook.LinkBench.RealDistribution.DistributionType; import com.facebook.LinkBench.distributions.AccessDistributions; import com.facebook.LinkBench.distributions.AccessDistributions.AccessDistribution; import com.facebook.LinkBench.distributions.ID2Chooser; import com.facebook.LinkBench.distributions.LogNormalDistribution; import com.facebook.LinkBench.distributions.ProbabilityDistribution; import com.facebook.LinkBench.generators.DataGenerator; import com.facebook.LinkBench.stats.LatencyStats; import com.facebook.LinkBench.stats.SampledStats; import com.facebook.LinkBench.util.ClassLoadUtil; public class LinkBenchRequest implements Runnable { private final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); Properties props; LinkStore linkStore; NodeStore nodeStore; RequestProgress progressTracker; long numRequests; /** Requests per second: <= 0 for unlimited rate */ private long requestrate; /** Maximum number of failed requests: < 0 for unlimited */ private long maxFailedRequests; /** * Time to run benchmark for before collecting stats. Allows * caches, etc to warm up. */ private long warmupTime; /** Maximum time to run benchmark for, not including warmup time */ long maxTime; int nrequesters; int requesterID; long maxid1; long startid1; Level debuglevel; long displayFreq_ms; long progressFreq_ms; String dbid; boolean singleAssoc = false; // Control data generation settings private LogNormalDistribution linkDataSize; private DataGenerator linkAddDataGen; private DataGenerator linkUpDataGen; private LogNormalDistribution nodeDataSize; private DataGenerator nodeAddDataGen; private DataGenerator nodeUpDataGen; // cummulative percentages double pc_addlink; double pc_deletelink; double pc_updatelink; double pc_countlink; double pc_getlink; double pc_getlinklist; double pc_addnode; double pc_deletenode; double pc_updatenode; double pc_getnode; // Chance of doing historical range query double p_historical_getlinklist; private static class HistoryKey { public final long id1; public final long link_type; public HistoryKey(long id1, long link_type) { super(); this.id1 = id1; this.link_type = link_type; } public HistoryKey(Link l) { this(l.id1, l.link_type); } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + (int) (id1 ^ (id1 >>> 32)); result = prime * result + (int) (link_type ^ (link_type >>> 32)); return result; } @Override public boolean equals(Object obj) { if (!(obj instanceof HistoryKey)) return false; HistoryKey other = (HistoryKey) obj; return id1 == other.id1 && link_type == other.link_type; } } // Cache of last link in lists where full list wasn't retrieved ArrayList listTailHistory; // Index of history to avoid duplicates HashMap listTailHistoryIndex; // Limit of cache size private int listTailHistoryLimit; // Probability distribution for ids in multiget ProbabilityDistribution multigetDist; // Statistics SampledStats stats; LatencyStats latencyStats; // Other informational counters long numfound = 0; long numnotfound = 0; long numHistoryQueries = 0; /** * Random number generator use for generating workload. If * initialized with same seed, should generate same sequence of requests * so that tests and benchmarks are repeatable. */ Random rng; // Last node id accessed long lastNodeId; long requestsDone = 0; long errors = 0; boolean aborted; // Access distributions private AccessDistribution writeDist; // link writes private AccessDistribution writeDistUncorr; // to blend with link writes private double writeDistUncorrBlend; // Percentage to used writeDist2 for private AccessDistribution readDist; // link reads private AccessDistribution readDistUncorr; // to blend with link reads private double readDistUncorrBlend; // Percentage to used readDist2 for private AccessDistribution nodeReadDist; // node reads private AccessDistribution nodeUpdateDist; // node writes private AccessDistribution nodeDeleteDist; // node deletes private ID2Chooser id2chooser; public LinkBenchRequest(LinkStore linkStore, NodeStore nodeStore, Properties props, LatencyStats latencyStats, PrintStream csvStreamOut, RequestProgress progressTracker, Random rng, int requesterID, int nrequesters) { assert(linkStore != null); if (requesterID < 0 || requesterID >= nrequesters) { throw new IllegalArgumentException("Bad requester id " + requesterID + "/" + nrequesters); } this.linkStore = linkStore; this.nodeStore = nodeStore; this.props = props; this.latencyStats = latencyStats; this.progressTracker = progressTracker; this.rng = rng; this.nrequesters = nrequesters; this.requesterID = requesterID; debuglevel = ConfigUtil.getDebugLevel(props); dbid = ConfigUtil.getPropertyRequired(props, Config.DBID); numRequests = ConfigUtil.getLong(props, Config.NUM_REQUESTS); requestrate = ConfigUtil.getLong(props, Config.REQUEST_RATE, 0L); maxFailedRequests = ConfigUtil.getLong(props, Config.MAX_FAILED_REQUESTS, 0L); warmupTime = Math.max(0, ConfigUtil.getLong(props, Config.WARMUP_TIME, 0L)); maxTime = ConfigUtil.getLong(props, Config.MAX_TIME); maxid1 = ConfigUtil.getLong(props, Config.MAX_ID); startid1 = ConfigUtil.getLong(props, Config.MIN_ID); // math functions may cause problems for id1 < 1 if (startid1 <= 0) { throw new LinkBenchConfigError("startid1 must be >= 1"); } if (maxid1 <= startid1) { throw new LinkBenchConfigError("maxid1 must be > startid1"); } // is this a single assoc test? if (startid1 + 1 == maxid1) { singleAssoc = true; logger.info("Testing single row assoc read."); } initRequestProbabilities(props); initLinkDataGeneration(props); initLinkRequestDistributions(props, requesterID, nrequesters); if (pc_getnode > pc_getlinklist) { // Load stuff for node workload if needed if (nodeStore == null) { throw new IllegalArgumentException("nodeStore not provided but non-zero " + "probability of node operation"); } initNodeDataGeneration(props); initNodeRequestDistributions(props); } displayFreq_ms = ConfigUtil.getLong(props, Config.DISPLAY_FREQ, 60L) * 1000; progressFreq_ms = ConfigUtil.getLong(props, Config.PROGRESS_FREQ, 6L) * 1000; int maxsamples = ConfigUtil.getInt(props, Config.MAX_STAT_SAMPLES); stats = new SampledStats(requesterID, maxsamples, csvStreamOut); listTailHistoryLimit = 2048; // Hardcoded limit for now listTailHistory = new ArrayList(listTailHistoryLimit); listTailHistoryIndex = new HashMap(); p_historical_getlinklist = ConfigUtil.getDouble(props, Config.PR_GETLINKLIST_HISTORY, 0.0) / 100; lastNodeId = startid1; } private void initRequestProbabilities(Properties props) { pc_addlink = ConfigUtil.getDouble(props, Config.PR_ADD_LINK); pc_deletelink = pc_addlink + ConfigUtil.getDouble(props, Config.PR_DELETE_LINK); pc_updatelink = pc_deletelink + ConfigUtil.getDouble(props, Config.PR_UPDATE_LINK); pc_countlink = pc_updatelink + ConfigUtil.getDouble(props, Config.PR_COUNT_LINKS); pc_getlink = pc_countlink + ConfigUtil.getDouble(props, Config.PR_GET_LINK); pc_getlinklist = pc_getlink + ConfigUtil.getDouble(props, Config.PR_GET_LINK_LIST); pc_addnode = pc_getlinklist + ConfigUtil.getDouble(props, Config.PR_ADD_NODE, 0.0); pc_updatenode = pc_addnode + ConfigUtil.getDouble(props, Config.PR_UPDATE_NODE, 0.0); pc_deletenode = pc_updatenode + ConfigUtil.getDouble(props, Config.PR_DELETE_NODE, 0.0); pc_getnode = pc_deletenode + ConfigUtil.getDouble(props, Config.PR_GET_NODE, 0.0); if (Math.abs(pc_getnode - 100.0) > 1e-5) {//compare real numbers throw new LinkBenchConfigError("Percentages of request types do not " + "add to 100, only " + pc_getnode + "!"); } } private void initLinkRequestDistributions(Properties props, int requesterID, int nrequesters) { writeDist = AccessDistributions.loadAccessDistribution(props, startid1, maxid1, DistributionType.LINK_WRITES); readDist = AccessDistributions.loadAccessDistribution(props, startid1, maxid1, DistributionType.LINK_READS); // Load uncorrelated distributions for blending if needed writeDistUncorr = null; if (props.containsKey(Config.WRITE_UNCORR_BLEND)) { // Ratio of queries to use uncorrelated. Convert from percentage writeDistUncorrBlend = ConfigUtil.getDouble(props, Config.WRITE_UNCORR_BLEND) / 100.0; if (writeDistUncorrBlend > 0.0) { writeDistUncorr = AccessDistributions.loadAccessDistribution(props, startid1, maxid1, DistributionType.LINK_WRITES_UNCORR); } } readDistUncorr = null; if (props.containsKey(Config.READ_UNCORR_BLEND)) { // Ratio of queries to use uncorrelated. Convert from percentage readDistUncorrBlend = ConfigUtil.getDouble(props, Config.READ_UNCORR_BLEND) / 100.0; if (readDistUncorrBlend > 0.0) { readDistUncorr = AccessDistributions.loadAccessDistribution(props, startid1, maxid1, DistributionType.LINK_READS_UNCORR); } } id2chooser = new ID2Chooser(props, startid1, maxid1, nrequesters, requesterID); // Distribution of #id2s per multiget String multigetDistClass = props.getProperty(Config.LINK_MULTIGET_DIST); if (multigetDistClass != null && multigetDistClass.trim().length() != 0) { int multigetMin = ConfigUtil.getInt(props, Config.LINK_MULTIGET_DIST_MIN); int multigetMax = ConfigUtil.getInt(props, Config.LINK_MULTIGET_DIST_MAX); try { multigetDist = ClassLoadUtil.newInstance(multigetDistClass, ProbabilityDistribution.class); multigetDist.init(multigetMin, multigetMax, props, Config.LINK_MULTIGET_DIST_PREFIX); } catch (ClassNotFoundException e) { logger.error(e); throw new LinkBenchConfigError("Class" + multigetDistClass + " could not be loaded as ProbabilityDistribution"); } } else { multigetDist = null; } } private void initLinkDataGeneration(Properties props) { try { double medLinkDataSize = ConfigUtil.getDouble(props, Config.LINK_DATASIZE); linkDataSize = new LogNormalDistribution(); linkDataSize.init(0, LinkStore.MAX_LINK_DATA, medLinkDataSize, Config.LINK_DATASIZE_SIGMA); linkAddDataGen = ClassLoadUtil.newInstance( ConfigUtil.getPropertyRequired(props, Config.LINK_ADD_DATAGEN), DataGenerator.class); linkAddDataGen.init(props, Config.LINK_ADD_DATAGEN_PREFIX); linkUpDataGen = ClassLoadUtil.newInstance( ConfigUtil.getPropertyRequired(props, Config.LINK_UP_DATAGEN), DataGenerator.class); linkUpDataGen.init(props, Config.LINK_UP_DATAGEN_PREFIX); } catch (ClassNotFoundException ex) { logger.error(ex); throw new LinkBenchConfigError("Error loading data generator class: " + ex.getMessage()); } } private void initNodeRequestDistributions(Properties props) { try { nodeReadDist = AccessDistributions.loadAccessDistribution(props, startid1, maxid1, DistributionType.NODE_READS); } catch (LinkBenchConfigError e) { // Not defined logger.info("Node access distribution not configured: " + e.getMessage()); throw new LinkBenchConfigError("Node read distribution not " + "configured but node read operations have non-zero probability"); } try { nodeUpdateDist = AccessDistributions.loadAccessDistribution(props, startid1, maxid1, DistributionType.NODE_UPDATES); } catch (LinkBenchConfigError e) { // Not defined logger.info("Node access distribution not configured: " + e.getMessage()); throw new LinkBenchConfigError("Node write distribution not " + "configured but node write operations have non-zero probability"); } try { nodeDeleteDist = AccessDistributions.loadAccessDistribution(props, startid1, maxid1, DistributionType.NODE_DELETES); } catch (LinkBenchConfigError e) { // Not defined logger.info("Node delete distribution not configured: " + e.getMessage()); throw new LinkBenchConfigError("Node delete distribution not " + "configured but node write operations have non-zero probability"); } } private void initNodeDataGeneration(Properties props) { try { double medNodeDataSize = ConfigUtil.getDouble(props, Config.NODE_DATASIZE); nodeDataSize = new LogNormalDistribution(); nodeDataSize.init(0, NodeStore.MAX_NODE_DATA, medNodeDataSize, Config.NODE_DATASIZE_SIGMA); String dataGenClass = ConfigUtil.getPropertyRequired(props, Config.NODE_ADD_DATAGEN); nodeAddDataGen = ClassLoadUtil.newInstance(dataGenClass, DataGenerator.class); nodeAddDataGen.init(props, Config.NODE_ADD_DATAGEN_PREFIX); dataGenClass = ConfigUtil.getPropertyRequired(props, Config.NODE_UP_DATAGEN); nodeUpDataGen = ClassLoadUtil.newInstance(dataGenClass, DataGenerator.class); nodeUpDataGen.init(props, Config.NODE_UP_DATAGEN_PREFIX); } catch (ClassNotFoundException ex) { logger.error(ex); throw new LinkBenchConfigError("Error loading data generator class: " + ex.getMessage()); } } public long getRequestsDone() { return requestsDone; } public boolean didAbort() { return aborted; } // gets id1 for the request based on desired distribution private long chooseRequestID(DistributionType type, long previousId1) { AccessDistribution dist; switch (type) { case LINK_READS: // Blend between distributions if needed if (readDistUncorr == null || rng.nextDouble() >= readDistUncorrBlend) { dist = readDist; } else { dist = readDistUncorr; } break; case LINK_WRITES: // Blend between distributions if needed if (writeDistUncorr == null || rng.nextDouble() >= writeDistUncorrBlend) { dist = writeDist; } else { dist = writeDistUncorr; } break; case LINK_WRITES_UNCORR: dist = writeDistUncorr; break; case NODE_READS: dist = nodeReadDist; break; case NODE_UPDATES: dist = nodeUpdateDist; break; case NODE_DELETES: dist = nodeDeleteDist; break; default: throw new RuntimeException("Unknown value for type: " + type); } long newid1 = dist.nextID(rng, previousId1); // Distribution responsible for generating number in range assert((newid1 >= startid1) && (newid1 < maxid1)); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("id1 generated = " + newid1 + " for access distribution: " + dist.getClass().getName() + ": " + dist.toString()); } if (dist.getShuffler() != null) { // Shuffle to go from position in space ranked from most to least accessed, // to the real id space newid1 = startid1 + dist.getShuffler().permute(newid1 - startid1); } return newid1; } /** * Randomly choose a single request and execute it, updating statistics * @param recordStats If true, record latency and other stats. * @return true if successful, false on error */ private boolean oneRequest(boolean recordStats) { double r = rng.nextDouble() * 100.0; long starttime = 0; long endtime = 0; LinkBenchOp type = LinkBenchOp.UNKNOWN; // initialize to invalid value Link link = new Link(); try { if (r <= pc_addlink) { // generate add request type = LinkBenchOp.ADD_LINK; link.id1 = chooseRequestID(DistributionType.LINK_WRITES, link.id1); link.link_type = id2chooser.chooseRandomLinkType(rng); link.id2 = id2chooser.chooseForOp(rng, link.id1, link.link_type, ID2Chooser.P_ADD_EXIST); link.visibility = LinkStore.VISIBILITY_DEFAULT; link.version = 0; link.time = System.currentTimeMillis(); link.data = linkAddDataGen.fill(rng, new byte[(int)linkDataSize.choose(rng)]); starttime = System.nanoTime(); // no inverses for now boolean alreadyExists = linkStore.addLink(dbid, link, true); boolean added = !alreadyExists; endtime = System.nanoTime(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("addLink id1=" + link.id1 + " link_type=" + link.link_type + " id2=" + link.id2 + " added=" + added); } } else if (r <= pc_deletelink) { type = LinkBenchOp.DELETE_LINK; long id1 = chooseRequestID(DistributionType.LINK_WRITES, link.id1); long link_type = id2chooser.chooseRandomLinkType(rng); long id2 = id2chooser.chooseForOp(rng, id1, link_type, ID2Chooser.P_DELETE_EXIST); starttime = System.nanoTime(); linkStore.deleteLink(dbid, id1, link_type, id2, true, // no inverse false); endtime = System.nanoTime(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("deleteLink id1=" + id1 + " link_type=" + link_type + " id2=" + id2); } } else if (r <= pc_updatelink) { type = LinkBenchOp.UPDATE_LINK; link.id1 = chooseRequestID(DistributionType.LINK_WRITES, link.id1); link.link_type = id2chooser.chooseRandomLinkType(rng); // Update one of the existing links link.id2 = id2chooser.chooseForOp(rng, link.id1, link.link_type, ID2Chooser.P_UPDATE_EXIST); link.visibility = LinkStore.VISIBILITY_DEFAULT; link.version = 0; link.time = System.currentTimeMillis(); link.data = linkUpDataGen.fill(rng, new byte[(int)linkDataSize.choose(rng)]); starttime = System.nanoTime(); // no inverses for now boolean found1 = linkStore.addLink(dbid, link, true); boolean found = found1; endtime = System.nanoTime(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("updateLink id1=" + link.id1 + " link_type=" + link.link_type + " id2=" + link.id2 + " found=" + found); } } else if (r <= pc_countlink) { type = LinkBenchOp.COUNT_LINK; long id1 = chooseRequestID(DistributionType.LINK_READS, link.id1); long link_type = id2chooser.chooseRandomLinkType(rng); starttime = System.nanoTime(); long count = linkStore.countLinks(dbid, id1, link_type); endtime = System.nanoTime(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("countLink id1=" + id1 + " link_type=" + link_type + " count=" + count); } } else if (r <= pc_getlink) { type = LinkBenchOp.MULTIGET_LINK; long id1 = chooseRequestID(DistributionType.LINK_READS, link.id1); long link_type = id2chooser.chooseRandomLinkType(rng); int nid2s = 1; if (multigetDist != null) { nid2s = (int)multigetDist.choose(rng); } long id2s[] = id2chooser.chooseMultipleForOp(rng, id1, link_type, nid2s, ID2Chooser.P_GET_EXIST); starttime = System.nanoTime(); int found = getLink(id1, link_type, id2s); assert(found >= 0 && found <= nid2s); endtime = System.nanoTime(); if (found > 0) { numfound += found; } else { numnotfound += nid2s - found; } } else if (r <= pc_getlinklist) { type = LinkBenchOp.GET_LINKS_LIST; Link links[]; if (rng.nextDouble() < p_historical_getlinklist && !this.listTailHistory.isEmpty()) { links = getLinkListTail(); } else { long id1 = chooseRequestID(DistributionType.LINK_READS, link.id1); long link_type = id2chooser.chooseRandomLinkType(rng); starttime = System.nanoTime(); links = getLinkList(id1, link_type); endtime = System.nanoTime(); } int count = ((links == null) ? 0 : links.length); if (recordStats) { stats.addStats(LinkBenchOp.RANGE_SIZE, count, false); } } else if (r <= pc_addnode) { type = LinkBenchOp.ADD_NODE; Node newNode = createAddNode(); starttime = System.nanoTime(); lastNodeId = nodeStore.addNode(dbid, newNode); endtime = System.nanoTime(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("addNode " + newNode); } } else if (r <= pc_updatenode) { type = LinkBenchOp.UPDATE_NODE; // Choose an id that has previously been created (but might have // been since deleted long upId = chooseRequestID(DistributionType.NODE_UPDATES, lastNodeId); // Generate new data randomly Node newNode = createUpdateNode(upId); starttime = System.nanoTime(); boolean changed = nodeStore.updateNode(dbid, newNode); endtime = System.nanoTime(); lastNodeId = upId; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("updateNode " + newNode + " changed=" + changed); } } else if (r <= pc_deletenode) { type = LinkBenchOp.DELETE_NODE; long idToDelete = chooseRequestID(DistributionType.NODE_DELETES, lastNodeId); starttime = System.nanoTime(); boolean deleted = nodeStore.deleteNode(dbid, LinkStore.DEFAULT_NODE_TYPE, idToDelete); endtime = System.nanoTime(); lastNodeId = idToDelete; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("deleteNode " + idToDelete + " deleted=" + deleted); } } else if (r <= pc_getnode) { type = LinkBenchOp.GET_NODE; starttime = System.nanoTime(); long idToFetch = chooseRequestID(DistributionType.NODE_READS, lastNodeId); Node fetched = nodeStore.getNode(dbid, LinkStore.DEFAULT_NODE_TYPE, idToFetch); endtime = System.nanoTime(); lastNodeId = idToFetch; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { if (fetched == null) { logger.trace("getNode " + idToFetch + " not found"); } else { logger.trace("getNode " + fetched); } } } else { logger.error("No-op in requester: last probability < 1.0"); return false; } // convert to microseconds long timetaken = (endtime - starttime)/1000; if (recordStats) { // record statistics stats.addStats(type, timetaken, false); latencyStats.recordLatency(requesterID, type, timetaken); } return true; } catch (Throwable e){//Catch exception if any long endtime2 = System.nanoTime(); long timetaken2 = (endtime2 - starttime)/1000; logger.error(type.displayName() + " error " + e.getMessage(), e); if (recordStats) { stats.addStats(type, timetaken2, true); } linkStore.clearErrors(requesterID); return false; } } /** * Create a new node for adding to database * @return */ private Node createAddNode() { byte data[] = nodeAddDataGen.fill(rng, new byte[(int)nodeDataSize.choose(rng)]); return new Node(-1, LinkStore.DEFAULT_NODE_TYPE, 1, (int)(System.currentTimeMillis()/1000), data); } /** * Create new node for updating in database */ private Node createUpdateNode(long id) { byte data[] = nodeUpDataGen.fill(rng, new byte[(int)nodeDataSize.choose(rng)]); return new Node(id, LinkStore.DEFAULT_NODE_TYPE, 2, (int)(System.currentTimeMillis()/1000), data); } @Override public void run() { logger.info("Requester thread #" + requesterID + " started: will do " + numRequests + " ops after " + warmupTime + " second warmup"); logger.debug("Requester thread #" + requesterID + " first random number " + rng.nextLong()); try { this.linkStore.initialize(props, Phase.REQUEST, requesterID); if (this.nodeStore != null && this.nodeStore != this.linkStore) { this.nodeStore.initialize(props, Phase.REQUEST, requesterID); } } catch (Exception e) { logger.error("Error while initializing store", e); throw new RuntimeException(e); } long warmupStartTime = System.currentTimeMillis(); boolean warmupDone = warmupTime <= 0; long benchmarkStartTime; if (!warmupDone) { benchmarkStartTime = warmupStartTime + warmupTime * 1000; } else { benchmarkStartTime = warmupStartTime; } long endTime = benchmarkStartTime + maxTime * 1000; long lastUpdate = warmupStartTime; long curTime = warmupStartTime; long i; if (singleAssoc) { LinkBenchOp type = LinkBenchOp.UNKNOWN; try { Link link = new Link(); // add a single assoc to the database link.id1 = 45; link.id1 = 46; type = LinkBenchOp.ADD_LINK; // no inverses for now linkStore.addLink(dbid, link, true); // read this assoc from the database over and over again type = LinkBenchOp.MULTIGET_LINK; for (i = 0; i < numRequests; i++) { int found = getLink(link.id1, link.link_type, new long[]{link.id2}); if (found == 1) { requestsDone++; } else { logger.warn("ThreadID = " + requesterID + " not found link for id1=45"); } } } catch (Throwable e) { logger.error(type.displayName() + "error " + e.getMessage(), e); aborted = true; } closeStores(); return; } long warmupRequests = 0; long requestsSinceLastUpdate = 0; long lastStatDisplay_ms = curTime; long reqTime_ns = System.nanoTime(); double requestrate_ns = ((double)requestrate)/1e9; while (requestsDone < numRequests) { if (requestrate > 0) { reqTime_ns = Timer.waitExpInterval(rng, reqTime_ns, requestrate_ns); } boolean success = oneRequest(warmupDone); if (!success) { errors++; if (maxFailedRequests >= 0 && errors > maxFailedRequests) { logger.error(String.format("Requester #%d aborting: %d failed requests" + " (out of %d total) ", requesterID, errors, requestsDone)); aborted = true; break; } } curTime = System.currentTimeMillis(); // Track requests done if (warmupDone) { requestsDone++; requestsSinceLastUpdate++; if (requestsSinceLastUpdate >= RequestProgress.THREAD_REPORT_INTERVAL) { progressTracker.update(requestsSinceLastUpdate); requestsSinceLastUpdate = 0; } } else { warmupRequests++; } // Per-thread periodic progress updates if (curTime > lastUpdate + progressFreq_ms) { if (warmupDone) { logger.info(String.format("Requester #%d %d/%d requests done", requesterID, requestsDone, numRequests)); lastUpdate = curTime; } else { logger.info(String.format("Requester #%d warming up. " + " %d warmup requests done. %d/%d seconds of warmup done", requesterID, warmupRequests, (curTime - warmupStartTime) / 1000, warmupTime)); lastUpdate = curTime; } } // Per-thread periodic stat dumps after warmup done if (warmupDone && (lastStatDisplay_ms + displayFreq_ms) <= curTime) { displayStats(lastStatDisplay_ms, curTime); stats.resetSamples(); lastStatDisplay_ms = curTime; } // Check if warmup completed if (!warmupDone && curTime >= benchmarkStartTime) { warmupDone = true; lastUpdate = curTime; lastStatDisplay_ms = curTime; requestsSinceLastUpdate = 0; logger.info(String.format("Requester #%d warmup finished " + " after %d warmup requests. 0/%d requests done", requesterID, warmupRequests, numRequests)); lastUpdate = curTime; } // Enforce time limit if (curTime > endTime) { logger.info(String.format("Requester #%d: time limit of %ds elapsed" + ", shutting down.", requesterID, maxTime)); break; } } // Do final update of statistics progressTracker.update(requestsSinceLastUpdate); displayStats(lastStatDisplay_ms, System.currentTimeMillis()); // Report final stats logger.info("ThreadID = " + requesterID + " total requests = " + requestsDone + " requests/second = " + ((1000 * requestsDone)/ Math.max(1, (curTime - benchmarkStartTime))) + " found = " + numfound + " not found = " + numnotfound + " history queries = " + numHistoryQueries + "/" + stats.getCount(LinkBenchOp.GET_LINKS_LIST)); closeStores(); } /** * Close datastores before finishing */ private void closeStores() { linkStore.close(); if (nodeStore != null && nodeStore != linkStore) { nodeStore.close(); } } private void displayStats(long lastStatDisplay_ms, long now_ms) { stats.displayStats(lastStatDisplay_ms, now_ms, Arrays.asList( LinkBenchOp.MULTIGET_LINK, LinkBenchOp.GET_LINKS_LIST, LinkBenchOp.COUNT_LINK, LinkBenchOp.UPDATE_LINK, LinkBenchOp.ADD_LINK, LinkBenchOp.RANGE_SIZE, LinkBenchOp.ADD_NODE, LinkBenchOp.UPDATE_NODE, LinkBenchOp.DELETE_NODE, LinkBenchOp.GET_NODE)); } int getLink(long id1, long link_type, long id2s[]) throws Exception { Link links[] = linkStore.multigetLinks(dbid, id1, link_type, id2s); return links == null ? 0 : links.length; } Link[] getLinkList(long id1, long link_type) throws Exception { Link links[] = linkStore.getLinkList(dbid, id1, link_type); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("getLinkList(id1=" + id1 + ", link_type=" + link_type + ") => count=" + (links == null ? 0 : links.length)); } // If there were more links than limit, record if (links != null && links.length >= linkStore.getRangeLimit()) { Link lastLink = links[links.length-1]; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("Maybe more history for (" + id1 +"," + link_type + " older than " + lastLink.time); } addTailCacheEntry(lastLink); } return links; } Link[] getLinkListTail() throws Exception { assert(!listTailHistoryIndex.isEmpty()); assert(!listTailHistory.isEmpty()); int choice = rng.nextInt(listTailHistory.size()); Link prevLast = listTailHistory.get(choice); // Get links past the oldest last retrieved Link links[] = linkStore.getLinkList(dbid, prevLast.id1, prevLast.link_type, 0, prevLast.time, 1, linkStore.getRangeLimit()); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("getLinkListTail(id1=" + prevLast.id1 + ", link_type=" + prevLast.link_type + ", max_time=" + prevLast.time + " => count=" + (links == null ? 0 : links.length)); } if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("Historical range query for (" + prevLast.id1 +"," + prevLast.link_type + " older than " + prevLast.time + ": " + (links == null ? 0 : links.length) + " results"); } if (links != null && links.length == linkStore.getRangeLimit()) { // There might be yet more history Link last = links[links.length-1]; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("might be yet more history for (" + last.id1 +"," + last.link_type + " older than " + last.time); } // Update in place listTailHistory.set(choice, last.clone()); } else { // No more history after this, remove from cache removeTailCacheEntry(choice, null); } numHistoryQueries++; return links; } /** * Add a new link to the history cache, unless already present * @param lastLink the last (i.e. lowest timestamp) link retrieved */ private void addTailCacheEntry(Link lastLink) { HistoryKey key = new HistoryKey(lastLink); if (listTailHistoryIndex.containsKey(key)) { // Already present return; } if (listTailHistory.size() < listTailHistoryLimit) { listTailHistory.add(lastLink.clone()); listTailHistoryIndex.put(key, listTailHistory.size() - 1); } else { // Need to evict entry int choice = rng.nextInt(listTailHistory.size()); removeTailCacheEntry(choice, lastLink.clone()); } } /** * Remove or replace entry in listTailHistory and update index * @param pos index of entry in listTailHistory * @param repl replace with this if not null */ private void removeTailCacheEntry(int pos, Link repl) { Link entry = listTailHistory.get(pos); if (pos == listTailHistory.size() - 1) { // removing from last position, don't need to fill gap listTailHistoryIndex.remove(new HistoryKey(entry)); int lastIx = listTailHistory.size() - 1; if (repl == null) { listTailHistory.remove(lastIx); } else { listTailHistory.set(lastIx, repl); listTailHistoryIndex.put(new HistoryKey(repl), lastIx); } } else { if (repl == null) { // Replace with last entry in cache to fill gap repl = listTailHistory.get(listTailHistory.size() - 1); listTailHistory.remove(listTailHistory.size() - 1); } listTailHistory.set(pos, repl); listTailHistoryIndex.put(new HistoryKey(repl), pos); } } public static class RequestProgress { // How many ops before a thread should register its progress static final int THREAD_REPORT_INTERVAL = 250; /** How many ops before a progress update should be printed to console */ private final long interval; private final Logger progressLogger; private long totalRequests; private final AtomicLong requestsDone; private long benchmarkStartTime; private long warmupTime_s; private long timeLimit_s; public RequestProgress(Logger progressLogger, long totalRequests, long timeLimit_s, long warmupTime_s, long interval) { this.interval = interval; this.progressLogger = progressLogger; this.totalRequests = totalRequests; this.requestsDone = new AtomicLong(); this.timeLimit_s = timeLimit_s; this.warmupTime_s = warmupTime_s; } public void startTimer() { benchmarkStartTime = System.currentTimeMillis() + warmupTime_s * 1000; } public long getBenchmarkStartTime() { return benchmarkStartTime; } public void update(long requestIncr) { long curr = requestsDone.addAndGet(requestIncr); long prev = curr - requestIncr; if ((curr / interval) > (prev / interval) || curr == totalRequests) { float progressPercent = ((float) curr) / totalRequests * 100; long now = System.currentTimeMillis(); long elapsed = now - benchmarkStartTime; float elapsed_s = ((float) elapsed) / 1000; float limitPercent = (elapsed_s / ((float) timeLimit_s)) * 100; float rate = curr / ((float)elapsed_s); progressLogger.info(String.format( "%d/%d requests finished: %.1f%% complete at %.1f ops/sec" + " %.1f/%d secs elapsed: %.1f%% of time limit used", curr, totalRequests, progressPercent, rate, elapsed_s, timeLimit_s, limitPercent)); } } } public static RequestProgress createProgress(Logger logger, Properties props) { long total_requests = ConfigUtil.getLong(props, Config.NUM_REQUESTS) * ConfigUtil.getLong(props, Config.NUM_REQUESTERS); long progressInterval = ConfigUtil.getLong(props, Config.REQ_PROG_INTERVAL, 10000L); long warmupTime = ConfigUtil.getLong(props, Config.WARMUP_TIME, 0L); long maxTime = ConfigUtil.getLong(props, Config.MAX_TIME); return new RequestProgress(logger, total_requests, maxTime, warmupTime, progressInterval); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkBenchTask.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; /** * The same as runnable, except run() can throw Exceptions * to be handled by the caller. */ public interface LinkBenchTask { public abstract void run() throws Exception; } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkCount.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; public class LinkCount { public final long id1; public final long link_type; public long time; public long version; public long count; public LinkCount(long id1, long link_type, long time, long version, long init_count) { super(); this.id1 = id1; this.link_type = link_type; this.time = time; this.version = version; this.count = init_count; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkStore.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.*; import java.util.ArrayList; import java.util.List; import java.util.Properties; public abstract class LinkStore { // void createLinkTable(); public static final long DEFAULT_LINK_TYPE = 123456789; public static final long MAX_ID2 = Long.MAX_VALUE; public static final int DEFAULT_NODE_TYPE = 2048; // visibility public static final byte VISIBILITY_HIDDEN = 0; public static final byte VISIBILITY_DEFAULT = 1; public static final int MAX_OPTYPES = LinkBenchOp.values().length; public static final int DEFAULT_LIMIT = 10000; public static final long MAX_LINK_DATA = 255; /** Controls the current setting for range limit */ protected int rangeLimit; /** The default constructor */ public LinkStore() { this.rangeLimit = DEFAULT_LIMIT; } public int getRangeLimit() { return rangeLimit; } public void setRangeLimit(int rangeLimit) { this.rangeLimit = rangeLimit; } /** initialize the store object */ public abstract void initialize(Properties p, Phase currentPhase, int threadId) throws IOException, Exception; /** * Do any cleanup. After this is called, store won't be reused */ public abstract void close(); // this is invoked when an error happens in case connection needs to be // cleaned up, reset, reopened, whatever public abstract void clearErrors(int threadID); /** * Add provided link to the store. If already exists, update with new data * @param dbid * @param a * @param noinverse * @return true if new link added, false if updated. Implementation is * optional, for informational purposes only. * @throws Exception */ public abstract boolean addLink(String dbid, Link a, boolean noinverse) throws Exception; /** * Delete link identified by parameters from store * @param dbid * @param id1 * @param link_type * @param id2 * @param noinverse * @param expunge if true, delete permanently. If false, hide instead * @return true if row existed. Implementation is optional, for informational * purposes only. * @throws Exception */ public abstract boolean deleteLink(String dbid, long id1, long link_type, long id2, boolean noinverse, boolean expunge) throws Exception; /** * Update a link in the database, or add if not found * @param dbid * @param a * @param noinverse * @return true if link found, false if new link created. Implementation is * optional, for informational purposes only. * @throws Exception */ public abstract boolean updateLink(String dbid, Link a, boolean noinverse) throws Exception; /** * lookup using id1, type, id2 * Returns hidden links. * @param dbid * @param id1 * @param link_type * @param id2 * @return * @throws Exception */ public abstract Link getLink(String dbid, long id1, long link_type, long id2) throws Exception; /** * Lookup multiple links: same as getlink but retrieve * multiple ids * @return list of matching links found, in any order */ public Link[] multigetLinks(String dbid, long id1, long link_type, long id2s[]) throws Exception { // Default implementation ArrayList res = new ArrayList(id2s.length); for (int i = 0; i < id2s.length; i++) { Link l = getLink(dbid, id1, link_type, id2s[i]); if (l != null) { res.add(l); } } return res.toArray(new Link[res.size()]); } /** * lookup using just id1, type * Does not return hidden links * @param dbid * @param id1 * @param link_type * @return list of links in descending order of time, or null * if no matching links * @throws Exception */ public abstract Link[] getLinkList(String dbid, long id1, long link_type) throws Exception; /** * lookup using just id1, type * Does not return hidden links * @param dbid * @param id1 * @param link_type * @param minTimestamp * @param maxTimestamp * @param offset * @param limit * @return list of links in descending order of time, or null * if no matching links * @throws Exception */ public abstract Link[] getLinkList(String dbid, long id1, long link_type, long minTimestamp, long maxTimestamp, int offset, int limit) throws Exception; // count the #links public abstract long countLinks(String dbid, long id1, long link_type) throws Exception; /** * @return 0 if it doesn't support addBulkLinks and recalculateCounts methods * If it does support them, return the maximum number of links that * can be added at a time */ public int bulkLoadBatchSize() { return 0; } /** Add a batch of links without updating counts */ public void addBulkLinks(String dbid, List a, boolean noinverse) throws Exception { throw new UnsupportedOperationException("addBulkLinks not supported for " + "LinkStore subclass " + this.getClass().getName()); } /** Add a batch of counts */ public void addBulkCounts(String dbid, List a) throws Exception { throw new UnsupportedOperationException("addBulkCounts not supported for " + "LinkStore subclass " + this.getClass().getName()); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkStoreHBaseGeneralAtomicityTesting.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Properties; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.util.Bytes; import org.apache.log4j.Level; /* This class is a stress test for verifying HBase API get/put operations. It basically checks, at a reasonable frequency, whether these opertations behave as expected. At first, run CreateTaoTable in hbase/shell to create the required table, then run LinkBenchDriver with config option: store="HBaseGeneralAtomicityTesting" (in file LinkConfig.peroperties) Other configs (in file LinkConfig.properties) that need to be set: + table_name: should be the same as the name used with CreateTaoTable in the previous step. + id2gen_config: this specifies genereting disjoint id2 for threads. Must be set to 1. + countlink, getlink, getlinklist: must be set to 0. + sleeprate + sleeptime */ public class LinkStoreHBaseGeneralAtomicityTesting extends LinkStore { private final boolean DEBUG = false; HTable table; Level debuglevel; ArrayList columnfamilies; Phase currentphase; int threadid; String threadname; double sleeprate; int sleeptime; int maxsleepingthreads; static int counter = 0; static Object lock = new Object(); // Caution: Because character dot (.) is used as a separation indicator, // there must be no (.) in the link data. // If this property is violated, // an exception will be thrown in method bytesToLink(...) private byte[] linkToBytes(Link a) { String temp = Long.toString(a.id1) + "." + Long.toString(a.link_type) + "." + Long.toString(a.id2) + "." + Byte.toString(a.visibility) + "." + Bytes.toString(a.data) + "." + // there must be no (.) in a.data Integer.toString(a.version) + "." + Long.toString(a.time) + "."; return Bytes.toBytes(temp); } // concatanate id1, link_typ1, id2 separated by "." private String combine(long id1, long link_type, long id2) { String temp = Long.toString(id1) + "." + Long.toString(link_type) + "." + Long.toString(id2); return temp; } private Link bytesToLink(byte[] blink) throws Exception { String slink = new String(blink); String[] tokens = slink.split("."); assertTrue(tokens.length == 9, "wrong link format"); // number of identities in a link must be 9 Link a = new Link(); a.id1 = Long.parseLong(tokens[0]); a.link_type = Long.parseLong(tokens[1]); a.id2 = Long.parseLong(tokens[2]); a.visibility = Byte.parseByte(tokens[3]); a.data = tokens[4].getBytes(); a.version = Integer.parseInt(tokens[5]); a.time = Long.parseLong(tokens[6]); return a; } private String bytesToString(byte[] value) { String st = new String(value); return st; } private void assertTrue(boolean expression, String message) throws Exception { if (!expression) { System.err.println("-------------------------------------------"); System.err.println("Test failure: " + message); (new Exception()).printStackTrace(); System.exit(1); } } /* * Constructor */ public LinkStoreHBaseGeneralAtomicityTesting( Phase input_currentphase, int input_threadid, Properties props) throws IOException { initialize(props, input_currentphase, input_threadid); } public LinkStoreHBaseGeneralAtomicityTesting() { super(); } @Override public void initialize(Properties props, Phase currentphase, int threadid) throws IOException { this.currentphase = currentphase; this.threadid = threadid; if (currentphase == Phase.LOAD) { threadname = "Loader " + threadid; } else if (currentphase == Phase.REQUEST) { threadname = "Requester " + threadid; } else { System.err.println("Fatal error: Phase " + currentphase + "does not exists."); System.exit(1); } Configuration conf = HBaseConfiguration.create(); String tablename = ConfigUtil.getPropertyRequired(props, Config.LINK_TABLE); table = new HTable(conf, tablename); debuglevel = ConfigUtil.getDebugLevel(props); sleeprate = ConfigUtil.getDouble(props, "sleeprate"); sleeptime = ConfigUtil.getInt(props, "sleeptime"); maxsleepingthreads = ConfigUtil.getInt(props, "maxsleepingthreads"); if (ConfigUtil.getInt(props, "id2gen_config") != 1) { System.err.println("Fatal error: id2gen_config must be 1."); System.err.println("Please check config file."); System.exit(1); } // create a list that stores column family names of assoc_tien columnfamilies = new ArrayList(); columnfamilies.add("cf1"); columnfamilies.add("cf2"); columnfamilies.add("cf3"); } @Override public void close() { //TODO } /* * Interface implementation */ @Override public void clearErrors(int threadID) { try { System.err.println("Clearing region cache in threadId " + threadID); HConnection hm = table.getConnection(); hm.clearRegionCache(); } catch (Throwable e) { e.printStackTrace(); return; } } @Override public boolean addLink(String dbid, Link a, boolean noinverse) throws Exception { String linkHead = combine(a.id1, a.link_type, a.id2); byte[] row = linkHead.getBytes(); byte[] value = linkToBytes(a); // put data into table Put p = new Put(row); for (String cf : columnfamilies) { p.add(Bytes.toBytes(cf), Bytes.toBytes(""), value); } if (DEBUG) { System.out.println(threadname + ": addLink " + a.id1 + "." + a.link_type + "." + a.id2); } table.put(p); // sleep for some time if (currentphase == Phase.REQUEST && Math.random() < sleeprate) { synchronized(lock) { if (counter < maxsleepingthreads) { ++counter; System.out.println(threadname + " goes to sleep. " + "Number of sleeping threads is: " + counter); try { lock.wait(sleeptime); } catch (InterruptedException e) { e.printStackTrace(); } --counter; System.out.println(threadname + " woke up. " + "Number of sleeping threads is: " + counter); } } } // make sure the new data is there, and value stored // in three column families are identical Get g = new Get(row); Result result = table.get(g); assertTrue(!result.isEmpty(), linkHead); for (String cf : columnfamilies) { byte[] tempvalue = result.getValue(Bytes.toBytes(cf), Bytes.toBytes("")); assertTrue(Arrays.equals(value, tempvalue), "rowid = " + linkHead + "; column family = " + cf + "; get value = " + bytesToString(value) + "; expected value = " + bytesToString(tempvalue)); } return true; // always pretend was added } @Override public boolean deleteLink(String dbid, long id1, long link_type, long id2, boolean noinverse, boolean expunge) throws Exception { String linkHead = combine(id1, link_type, id2); byte[] row = linkHead.getBytes(); // delete data from table Delete d = new Delete(row); if (DEBUG) { System.out.println(threadname + ": deleteLink " + id1 + "." + link_type + "." + id2); } table.delete(d); // sleep for some time if (currentphase == Phase.REQUEST && Math.random() < sleeprate) { synchronized(lock) { if (counter < maxsleepingthreads) { ++counter; System.out.println(threadname + " goes to sleep. " + "Number of sleeping threads is: " + counter); try { lock.wait(sleeptime); } catch (InterruptedException e) { e.printStackTrace(); } --counter; System.out.println(threadname + " woke up. " + "Number of sleeping threads is: " + counter); } } } // check if data has been actually deleted Get g = new Get(row); Result result = table.get(g); assertTrue(result.isEmpty(), linkHead); return true; // always pretend was found } @Override public boolean updateLink(String dbid, Link a, boolean noinverse) throws Exception { addLink(dbid, a, noinverse); return true; // always pretend was updated } @Override public Link getLink(String dbid, long id1, long link_type, long id2) throws Exception { String linkHead = combine(id1, link_type, id2); byte[] row = linkHead.getBytes(); // get data from table Get g = new Get(row); Result result = table.get(g); assertTrue(!result.isEmpty(), linkHead); // ensure values stored in three column families are identical byte[] value = null; for (String cf: columnfamilies) { byte[] tempvalue = result.getValue(Bytes.toBytes(cf), Bytes.toBytes("")); if (value == null) value = tempvalue; else assertTrue(Arrays.equals(value, tempvalue), id1 + "." + link_type + "." + id2); } // return link Link a; if (value == null) a = null; else { a = bytesToLink(value); assertTrue(a.id1 == id1, linkHead); assertTrue(a.id2 == id2, linkHead); assertTrue(a.link_type == link_type, linkHead); } return a; } @Override public Link[] getLinkList(String dbid, long id1, long link_type) throws Exception { throw new Exception("Don't use getLinkList in HBaseGeneralAtomicityTest"); } @Override public Link[] getLinkList(String dbid, long id1, long link_type, long minTimestamp, long maxTimestamp, int offset, int limit) throws Exception { throw new Exception("Don't use getLinkList in HBaseGeneralAtomicityTest"); } // count the #links @Override public long countLinks(String dbid, long id1, long link_type) throws Exception { throw new Exception("Don't use countLinks in HBaseGeneralAtomicityTest"); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkStoreMysql.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Properties; import java.util.Random; import org.apache.log4j.Level; import org.apache.log4j.Logger; public class LinkStoreMysql extends GraphStore { /* MySql database server configuration keys */ public static final String CONFIG_HOST = "host"; public static final String CONFIG_PORT = "port"; public static final String CONFIG_USER = "user"; public static final String CONFIG_PASSWORD = "password"; public static final String CONFIG_BULK_INSERT_BATCH = "mysql_bulk_insert_batch"; public static final String CONFIG_DISABLE_BINLOG_LOAD = "mysql_disable_binlog_load"; public static final int DEFAULT_BULKINSERT_SIZE = 1024; private static final boolean INTERNAL_TESTING = false; String linktable; String counttable; String nodetable; String host; String user; String pwd; String port; String defaultDB; Level debuglevel; // Use read-only and read-write connections and statements to avoid toggling // auto-commit. Connection conn_ro, conn_rw; Statement stmt_ro, stmt_rw; private Phase phase; int bulkInsertSize = DEFAULT_BULKINSERT_SIZE; // Optional optimization: disable binary logging boolean disableBinLogForLoad = false; private final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); public LinkStoreMysql() { super(); } public LinkStoreMysql(Properties props) throws IOException, Exception { super(); initialize(props, Phase.LOAD, 0); } public void initialize(Properties props, Phase currentPhase, int threadId) throws IOException, Exception { counttable = ConfigUtil.getPropertyRequired(props, Config.COUNT_TABLE); if (counttable.equals("")) { String msg = "Error! " + Config.COUNT_TABLE + " is empty!" + "Please check configuration file."; logger.error(msg); throw new RuntimeException(msg); } nodetable = props.getProperty(Config.NODE_TABLE); if (nodetable.equals("")) { // For now, don't assume that nodetable is provided String msg = "Error! " + Config.NODE_TABLE + " is empty!" + "Please check configuration file."; logger.error(msg); throw new RuntimeException(msg); } host = ConfigUtil.getPropertyRequired(props, CONFIG_HOST); user = ConfigUtil.getPropertyRequired(props, CONFIG_USER); pwd = ConfigUtil.getPropertyRequired(props, CONFIG_PASSWORD); port = props.getProperty(CONFIG_PORT); defaultDB = ConfigUtil.getPropertyRequired(props, Config.DBID); if (port == null || port.equals("")) port = "3306"; //use default port debuglevel = ConfigUtil.getDebugLevel(props); phase = currentPhase; if (props.containsKey(CONFIG_BULK_INSERT_BATCH)) { bulkInsertSize = ConfigUtil.getInt(props, CONFIG_BULK_INSERT_BATCH); } if (props.containsKey(CONFIG_DISABLE_BINLOG_LOAD)) { disableBinLogForLoad = ConfigUtil.getBool(props, CONFIG_DISABLE_BINLOG_LOAD); } // connect try { openConnection(); } catch (Exception e) { logger.error("error connecting to database:", e); throw e; } linktable = ConfigUtil.getPropertyRequired(props, Config.LINK_TABLE); } // connects to test database private void openConnection() throws Exception { conn_ro = null; conn_rw = null; stmt_ro = null; stmt_rw = null; Random rng = new Random(); String jdbcUrl = "jdbc:mysql://"+ host + ":" + port + "/"; if (defaultDB != null) { jdbcUrl += defaultDB; } Class.forName("com.mysql.jdbc.Driver").newInstance(); jdbcUrl += "?elideSetAutoCommits=true" + "&useLocalTransactionState=true" + "&allowMultiQueries=true" + "&useLocalSessionState=true" + /* Need affected row count from queries to distinguish updates/inserts * consistently across different MySql versions (see MySql bug 46675) */ "&useAffectedRows=true"; /* Fix for failing connections at high concurrency, short random delay for * each */ try { int t = rng.nextInt(1000) + 100; //System.err.println("Sleeping " + t + " msecs"); Thread.sleep(t); } catch (InterruptedException ie) { } conn_rw = DriverManager.getConnection(jdbcUrl, user, pwd); conn_rw.setAutoCommit(false); try { int t = rng.nextInt(1000) + 100; //System.err.println("Sleeping " + t + " msecs"); Thread.sleep(t); } catch (InterruptedException ie) { } conn_ro = DriverManager.getConnection(jdbcUrl, user, pwd); conn_ro.setAutoCommit(true); //System.err.println("connected"); stmt_rw = conn_rw.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); stmt_ro = conn_ro.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY); if (phase == Phase.LOAD && disableBinLogForLoad) { // Turn binary logging off for duration of connection stmt_rw.executeUpdate("SET SESSION sql_log_bin=0"); stmt_ro.executeUpdate("SET SESSION sql_log_bin=0"); } } @Override public void close() { try { if (stmt_rw != null) stmt_rw.close(); if (stmt_ro != null) stmt_ro.close(); if (conn_rw != null) conn_rw.close(); if (conn_ro != null) conn_ro.close(); } catch (SQLException e) { logger.error("Error while closing MySQL connection: ", e); } } public void clearErrors(int threadID) { logger.info("Reopening MySQL connection in threadID " + threadID); try { if (conn_rw != null) { conn_rw.close(); } if (conn_ro != null) { conn_ro.close(); } openConnection(); } catch (Throwable e) { e.printStackTrace(); return; } } /** * Set of all JDBC SQLState strings that indicate a transient MySQL error * that should be handled by retrying */ private static final HashSet retrySQLStates = populateRetrySQLStates(); /** * Populate retrySQLStates * SQLState codes are defined in MySQL Connector/J documentation: * http://dev.mysql.com/doc/refman/5.6/en/connector-j-reference-error-sqlstates.html */ private static HashSet populateRetrySQLStates() { HashSet states = new HashSet(); states.add("41000"); // ER_LOCK_WAIT_TIMEOUT states.add("40001"); // ER_LOCK_DEADLOCK return states; } /** * Handle SQL exception by logging error and selecting how to respond * @param ex SQLException thrown by MySQL JDBC driver * @return true if transaction should be retried */ private boolean processSQLException(SQLException ex, String op) { boolean retry = retrySQLStates.contains(ex.getSQLState()); String msg = "SQLException thrown by MySQL driver during execution of " + "operation: " + op + ". "; msg += "Message was: '" + ex.getMessage() + "'. "; msg += "SQLState was: " + ex.getSQLState() + ". "; if (retry) { msg += "Error is probably transient, retrying operation."; logger.warn(msg); } else { msg += "Error is probably non-transient, will abort operation."; logger.error(msg); } return retry; } // get count for testing purpose private void testCount(Statement stmt, String dbid, String assoctable, String counttable, long id, long link_type) throws Exception { String select1 = "SELECT COUNT(id2)" + " FROM " + dbid + "." + assoctable + " WHERE id1 = " + id + " AND link_type = " + link_type + " AND visibility = " + LinkStore.VISIBILITY_DEFAULT; String select2 = "SELECT COALESCE (SUM(count), 0)" + " FROM " + dbid + "." + counttable + " WHERE id = " + id + " AND link_type = " + link_type; String verify = "SELECT IF ((" + select1 + ") = (" + select2 + "), 1, 0) as result"; ResultSet result = stmt.executeQuery(verify); int ret = -1; while (result.next()) { ret = result.getInt("result"); } if (ret != 1) { throw new Exception("Data inconsistency between " + assoctable + " and " + counttable); } } @Override public boolean addLink(String dbid, Link l, boolean noinverse) throws Exception { while (true) { try { return addLinkImpl(dbid, l, noinverse); } catch (SQLException ex) { if (!processSQLException(ex, "addLink")) { throw ex; } } } } private boolean addLinkImpl(String dbid, Link l, boolean noinverse) throws Exception { if (Level.DEBUG.isGreaterOrEqual(debuglevel)) { logger.debug("addLink " + l.id1 + "." + l.id2 + "." + l.link_type); } // if the link is already there then update its visibility // only update visibility; skip updating time, version, etc. int nrows = addLinksNoCount(dbid, Collections.singletonList(l)); // Note: at this point, we have an exclusive lock on the link // row until the end of the transaction, so can safely do // further updates without concurrency issues. if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("nrows = " + nrows); } // based on nrows, determine whether the previous query was an insert // or update boolean row_found; boolean update_data = false; int update_count = 0; switch (nrows) { case 1: // a new row was inserted --> need to update counttable if (l.visibility == VISIBILITY_DEFAULT) { update_count = 1; } row_found = false; break; case 0: // A row is found but its visibility was unchanged // --> need to update other data update_data = true; row_found = true; break; case 2: // a visibility was changed from VISIBILITY_HIDDEN to DEFAULT // or vice-versa // --> need to update both counttable and other data if (l.visibility == VISIBILITY_DEFAULT) { update_count = 1; } else { update_count = -1; } update_data = true; row_found = true; break; default: String msg = "Value of affected-rows number is not valid" + nrows; logger.error("SQL Error: " + msg); throw new Exception(msg); } if (update_count != 0) { int base_count = update_count < 0 ? 0 : 1; // query to update counttable // if (id, link_type) is not there yet, add a new record with count = 1 // The update happens atomically, with the latest count and version long currentTime = (new Date()).getTime(); String updatecount = "INSERT INTO " + dbid + "." + counttable + "(id, link_type, count, time, version) " + "VALUES (" + l.id1 + ", " + l.link_type + ", " + base_count + ", " + currentTime + ", " + 0 + ") " + "ON DUPLICATE KEY UPDATE" + " count = count + " + update_count + ", version = version + 1 " + ", time = " + currentTime + ";"; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(updatecount); } // This is the last statement of transaction - append commit to avoid // extra round trip if (!update_data) { updatecount += " commit;"; } stmt_rw.executeUpdate(updatecount); } if (update_data) { // query to update link data (the first query only updates visibility) String updatedata = "UPDATE " + dbid + "." + linktable + " SET" + " visibility = " + l.visibility + ", data = " + stringLiteral(l.data)+ ", time = " + l.time + ", version = " + l.version + " WHERE id1 = " + l.id1 + " AND id2 = " + l.id2 + " AND link_type = " + l.link_type + "; commit;"; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(updatedata); } stmt_rw.executeUpdate(updatedata); } if (INTERNAL_TESTING) { testCount(stmt_ro, dbid, linktable, counttable, l.id1, l.link_type); } return row_found; } /** * Internal method: add links without updating the count * @param dbid * @param links * @return * @throws SQLException */ private int addLinksNoCount(String dbid, List links) throws SQLException { if (links.size() == 0) return 0; // query to insert a link; StringBuilder sb = new StringBuilder(); sb.append("INSERT INTO " + dbid + "." + linktable + "(id1, id2, link_type, " + "visibility, data, time, version) VALUES "); boolean first = true; for (Link l : links) { if (first) { first = false; } else { sb.append(','); } sb.append("(" + l.id1 + ", " + l.id2 + ", " + l.link_type + ", " + l.visibility + ", " + stringLiteral(l.data) + ", " + l.time + ", " + l.version + ")"); } sb.append(" ON DUPLICATE KEY UPDATE visibility = VALUES(visibility)"); String insert = sb.toString(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(insert); } int nrows = stmt_rw.executeUpdate(insert); return nrows; } @Override public boolean deleteLink(String dbid, long id1, long link_type, long id2, boolean noinverse, boolean expunge) throws Exception { while (true) { try { return deleteLinkImpl(dbid, id1, link_type, id2, noinverse, expunge); } catch (SQLException ex) { if (!processSQLException(ex, "deleteLink")) { throw ex; } } } } private boolean deleteLinkImpl(String dbid, long id1, long link_type, long id2, boolean noinverse, boolean expunge) throws Exception { if (Level.DEBUG.isGreaterOrEqual(debuglevel)) { logger.debug("deleteLink " + id1 + "." + id2 + "." + link_type); } // First do a select to check if the link is not there, is there and // hidden, or is there and visible; // Result could be either NULL, VISIBILITY_HIDDEN or VISIBILITY_DEFAULT. // In case of VISIBILITY_DEFAULT, later we need to mark the link as // hidden, and update counttable. // We lock the row exclusively because we rely on getting the correct // value of visible to maintain link counts. Without the lock, // a concurrent transaction could also see the link as visible and // we would double-decrement the link count. String select = "SELECT visibility" + " FROM " + dbid + "." + linktable + " WHERE id1 = " + id1 + " AND id2 = " + id2 + " AND link_type = " + link_type + " FOR UPDATE;"; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(select); } ResultSet result = stmt_rw.executeQuery(select); int visibility = -1; boolean found = false; while (result.next()) { visibility = result.getInt("visibility"); found = true; } if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(String.format("(%d, %d, %d) visibility = %d", id1, link_type, id2, visibility)); } if (!found) { // do nothing } else if (visibility == VISIBILITY_HIDDEN && !expunge) { // do nothing } else { // Only update count if link is present and visible boolean updateCount = (visibility != VISIBILITY_HIDDEN); // either delete or mark the link as hidden String delete; if (!expunge) { delete = "UPDATE " + dbid + "." + linktable + " SET visibility = " + VISIBILITY_HIDDEN + " WHERE id1 = " + id1 + " AND id2 = " + id2 + " AND link_type = " + link_type + ";"; } else { delete = "DELETE FROM " + dbid + "." + linktable + " WHERE id1 = " + id1 + " AND id2 = " + id2 + " AND link_type = " + link_type + ";"; } if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(delete); } stmt_rw.executeUpdate(delete); // update count table // * if found (id1, link_type) in count table, set // count = (count == 1) ? 0) we decrease the value of count // column by 1; // * otherwise, insert new link with count column = 0 // The update happens atomically, with the latest count and version long currentTime = (new Date()).getTime(); String update = "INSERT INTO " + dbid + "." + counttable + " (id, link_type, count, time, version) " + "VALUES (" + id1 + ", " + link_type + ", 0" + ", " + currentTime + ", " + 0 + ") " + "ON DUPLICATE KEY UPDATE" + " count = IF (count = 0, 0, count - 1)" + ", time = " + currentTime + ", version = version + 1;"; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(update); } stmt_rw.executeUpdate(update); } conn_rw.commit(); if (INTERNAL_TESTING) { testCount(stmt_ro, dbid, linktable, counttable, id1, link_type); } return found; } @Override public boolean updateLink(String dbid, Link l, boolean noinverse) throws Exception { // Retry logic is in addLink boolean added = addLink(dbid, l, noinverse); return !added; // return true if updated instead of added } // lookup using id1, type, id2 @Override public Link getLink(String dbid, long id1, long link_type, long id2) throws Exception { while (true) { try { return getLinkImpl(dbid, id1, link_type, id2); } catch (SQLException ex) { if (!processSQLException(ex, "getLink")) { throw ex; } } } } private Link getLinkImpl(String dbid, long id1, long link_type, long id2) throws Exception { Link res[] = multigetLinks(dbid, id1, link_type, new long[] {id2}); if (res == null) return null; assert(res.length <= 1); return res.length == 0 ? null : res[0]; } @Override public Link[] multigetLinks(String dbid, long id1, long link_type, long[] id2s) throws Exception { while (true) { try { return multigetLinksImpl(dbid, id1, link_type, id2s); } catch (SQLException ex) { if (!processSQLException(ex, "multigetLinks")) { throw ex; } } } } private Link[] multigetLinksImpl(String dbid, long id1, long link_type, long[] id2s) throws Exception { StringBuilder querySB = new StringBuilder(); querySB.append(" select id1, id2, link_type," + " visibility, data, time, " + " version from " + dbid + "." + linktable + " where id1 = " + id1 + " and link_type = " + link_type + " and id2 in ("); boolean first = true; for (long id2: id2s) { if (first) { first = false; } else { querySB.append(","); } querySB.append(id2); } querySB.append(");"); String query = querySB.toString(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("Query is " + query); } ResultSet rs = stmt_ro.executeQuery(query); // Get the row count to allocate result array assert(rs.getType() != ResultSet.TYPE_FORWARD_ONLY); rs.last(); int count = rs.getRow(); rs.beforeFirst(); Link results[] = new Link[count]; int i = 0; while (rs.next()) { Link l = createLinkFromRow(rs); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("Lookup result: " + id1 + "," + link_type + "," + l.id2 + " found"); } results[i++] = l; } return results; } // lookup using just id1, type @Override public Link[] getLinkList(String dbid, long id1, long link_type) throws Exception { // Retry logic in getLinkList return getLinkList(dbid, id1, link_type, 0, Long.MAX_VALUE, 0, rangeLimit); } @Override public Link[] getLinkList(String dbid, long id1, long link_type, long minTimestamp, long maxTimestamp, int offset, int limit) throws Exception { while (true) { try { return getLinkListImpl(dbid, id1, link_type, minTimestamp, maxTimestamp, offset, limit); } catch (SQLException ex) { if (!processSQLException(ex, "getLinkListImpl")) { throw ex; } } } } private Link[] getLinkListImpl(String dbid, long id1, long link_type, long minTimestamp, long maxTimestamp, int offset, int limit) throws Exception { String query = " select id1, id2, link_type," + " visibility, data, time," + " version from " + dbid + "." + linktable + " FORCE INDEX(`id1_type`) " + " where id1 = " + id1 + " and link_type = " + link_type + " and time >= " + minTimestamp + " and time <= " + maxTimestamp + " and visibility = " + LinkStore.VISIBILITY_DEFAULT + " order by time desc " + " limit " + offset + "," + limit + ";"; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("Query is " + query); } ResultSet rs = stmt_ro.executeQuery(query); // Find result set size // be sure we fast forward to find result set size assert(rs.getType() != ResultSet.TYPE_FORWARD_ONLY); rs.last(); int count = rs.getRow(); rs.beforeFirst(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("Range lookup result: " + id1 + "," + link_type + " is " + count); } if (count == 0) { return null; } // Fetch the link data Link links[] = new Link[count]; int i = 0; while (rs.next()) { Link l = createLinkFromRow(rs); links[i] = l; i++; } assert(i == count); return links; } private Link createLinkFromRow(ResultSet rs) throws SQLException { Link l = new Link(); l.id1 = rs.getLong(1); l.id2 = rs.getLong(2); l.link_type = rs.getLong(3); l.visibility = rs.getByte(4); l.data = rs.getBytes(5); l.time = rs.getLong(6); l.version = rs.getInt(7); return l; } // count the #links @Override public long countLinks(String dbid, long id1, long link_type) throws Exception { while (true) { try { return countLinksImpl(dbid, id1, link_type); } catch (SQLException ex) { if (!processSQLException(ex, "countLinks")) { throw ex; } } } } private long countLinksImpl(String dbid, long id1, long link_type) throws Exception { long count = 0; String query = " select count from " + dbid + "." + counttable + " where id = " + id1 + " and link_type = " + link_type + ";"; ResultSet rs = stmt_ro.executeQuery(query); boolean found = false; while (rs.next()) { // found if (found) { logger.trace("Count query 2nd row!: " + id1 + "," + link_type); } found = true; count = rs.getLong(1); } if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("Count result: " + id1 + "," + link_type + " is " + found + " and " + count); } return count; } @Override public int bulkLoadBatchSize() { return bulkInsertSize; } @Override public void addBulkLinks(String dbid, List links, boolean noinverse) throws Exception { while (true) { try { addBulkLinksImpl(dbid, links, noinverse); return; } catch (SQLException ex) { if (!processSQLException(ex, "addBulkLinks")) { throw ex; } } } } private void addBulkLinksImpl(String dbid, List links, boolean noinverse) throws Exception { if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("addBulkLinks: " + links.size() + " links"); } addLinksNoCount(dbid, links); conn_rw.commit(); } @Override public void addBulkCounts(String dbid, List counts) throws Exception { while (true) { try { addBulkCountsImpl(dbid, counts); return; } catch (SQLException ex) { if (!processSQLException(ex, "addBulkCounts")) { throw ex; } } } } private void addBulkCountsImpl(String dbid, List counts) throws Exception { if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("addBulkCounts: " + counts.size() + " link counts"); } if (counts.size() == 0) return; StringBuilder sqlSB = new StringBuilder(); sqlSB.append("REPLACE INTO " + dbid + "." + counttable + "(id, link_type, count, time, version) " + "VALUES "); boolean first = true; for (LinkCount count: counts) { if (first) { first = false; } else { sqlSB.append(","); } sqlSB.append("(" + count.id1 + ", " + count.link_type + ", " + count.count + ", " + count.time + ", " + count.version + ")"); } String sql = sqlSB.toString(); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(sql); } stmt_rw.executeUpdate(sql); conn_rw.commit(); } private void checkNodeTableConfigured() throws Exception { if (this.nodetable == null) { throw new Exception("Nodetable not specified: cannot perform node" + " operation"); } } @Override public void resetNodeStore(String dbid, long startID) throws Exception { checkNodeTableConfigured(); // Truncate table deletes all data and allows us to reset autoincrement stmt_rw.execute(String.format("TRUNCATE TABLE `%s`.`%s`;", dbid, nodetable)); stmt_rw.execute(String.format("ALTER TABLE `%s`.`%s` " + "AUTO_INCREMENT = %d;", dbid, nodetable, startID)); } @Override public long addNode(String dbid, Node node) throws Exception { while (true) { try { return addNodeImpl(dbid, node); } catch (SQLException ex) { if (!processSQLException(ex, "addNode")) { throw ex; } } } } private long addNodeImpl(String dbid, Node node) throws Exception { long ids[] = bulkAddNodes(dbid, Collections.singletonList(node)); assert(ids.length == 1); return ids[0]; } @Override public long[] bulkAddNodes(String dbid, List nodes) throws Exception { while (true) { try { return bulkAddNodesImpl(dbid, nodes); } catch (SQLException ex) { if (!processSQLException(ex, "bulkAddNodes")) { throw ex; } } } } private long[] bulkAddNodesImpl(String dbid, List nodes) throws Exception { checkNodeTableConfigured(); StringBuilder sql = new StringBuilder(); sql.append("INSERT INTO `" + dbid + "`.`" + nodetable + "` " + "(type, version, time, data) " + "VALUES "); boolean first = true; for (Node node: nodes) { if (first) { first = false; } else { sql.append(","); } sql.append("(" + node.type + "," + node.version + "," + node.time + "," + stringLiteral(node.data) + ")"); } sql.append("; commit;"); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(sql); } stmt_rw.executeUpdate(sql.toString(), Statement.RETURN_GENERATED_KEYS); ResultSet rs = stmt_rw.getGeneratedKeys(); long newIds[] = new long[nodes.size()]; // Find the generated id int i = 0; while (rs.next() && i < nodes.size()) { newIds[i++] = rs.getLong(1); } if (i != nodes.size()) { throw new Exception("Wrong number of generated keys on insert: " + " expected " + nodes.size() + " actual " + i); } assert(!rs.next()); // check done rs.close(); return newIds; } @Override public Node getNode(String dbid, int type, long id) throws Exception { while (true) { try { return getNodeImpl(dbid, type, id); } catch (SQLException ex) { if (!processSQLException(ex, "getNode")) { throw ex; } } } } private Node getNodeImpl(String dbid, int type, long id) throws Exception { checkNodeTableConfigured(); ResultSet rs = stmt_ro.executeQuery( "SELECT id, type, version, time, data " + "FROM `" + dbid + "`.`" + nodetable + "` " + "WHERE id=" + id + ";"); if (rs.next()) { Node res = new Node(rs.getLong(1), rs.getInt(2), rs.getLong(3), rs.getInt(4), rs.getBytes(5)); // Check that multiple rows weren't returned assert(rs.next() == false); rs.close(); if (res.type != type) { return null; } else { return res; } } return null; } @Override public boolean updateNode(String dbid, Node node) throws Exception { while (true) { try { return updateNodeImpl(dbid, node); } catch (SQLException ex) { if (!processSQLException(ex, "updateNode")) { throw ex; } } } } private boolean updateNodeImpl(String dbid, Node node) throws Exception { checkNodeTableConfigured(); String sql = "UPDATE `" + dbid + "`.`" + nodetable + "`" + " SET " + "version=" + node.version + ", time=" + node.time + ", data=" + stringLiteral(node.data) + " WHERE id=" + node.id + " AND type=" + node.type + "; commit;"; if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace(sql); } int rows = stmt_rw.executeUpdate(sql); if (rows == 1) return true; else if (rows == 0) return false; else throw new Exception("Did not expect " + rows + "affected rows: only " + "expected update to affect at most one row"); } @Override public boolean deleteNode(String dbid, int type, long id) throws Exception { while (true) { try { return deleteNodeImpl(dbid, type, id); } catch (SQLException ex) { if (!processSQLException(ex, "deleteNode")) { throw ex; } } } } private boolean deleteNodeImpl(String dbid, int type, long id) throws Exception { checkNodeTableConfigured(); int rows = stmt_rw.executeUpdate( "DELETE FROM `" + dbid + "`.`" + nodetable + "` " + "WHERE id=" + id + " and type =" + type + "; commit;"); if (rows == 0) { return false; } else if (rows == 1) { return true; } else { throw new Exception(rows + " rows modified on delete: should delete " + "at most one"); } } /** * Convert a byte array into a valid mysql string literal, assuming that * it will be inserted into a column with latin-1 encoding. * Based on information at * http://dev.mysql.com/doc/refman/5.1/en/string-literals.html * @param arr * @return */ private static String stringLiteral(byte arr[]) { CharBuffer cb = Charset.forName("ISO-8859-1").decode(ByteBuffer.wrap(arr)); StringBuilder sb = new StringBuilder(); sb.append('\''); for (int i = 0; i < cb.length(); i++) { char c = cb.get(i); switch (c) { case '\'': sb.append("\\'"); break; case '\\': sb.append("\\\\"); break; case '\0': sb.append("\\0"); break; case '\b': sb.append("\\b"); break; case '\n': sb.append("\\n"); break; case '\r': sb.append("\\r"); break; case '\t': sb.append("\\t"); break; default: if (Character.getNumericValue(c) < 0) { // Fall back on hex string for values not defined in latin-1 return hexStringLiteral(arr); } else { sb.append(c); } } } sb.append('\''); return sb.toString(); } /** * Create a mysql hex string literal from array: * E.g. [0xf, bc, 4c, 4] converts to x'0fbc4c03' * @param arr * @return the mysql hex literal including quotes */ private static String hexStringLiteral(byte[] arr) { StringBuilder sb = new StringBuilder(); sb.append("x'"); for (int i = 0; i < arr.length; i++) { byte b = arr[i]; int lo = b & 0xf; int hi = (b >> 4) & 0xf; sb.append(Character.forDigit(hi, 16)); sb.append(Character.forDigit(lo, 16)); } sb.append("'"); return sb.toString(); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/LinkStoreRocksDb.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import com.facebook.rocks.swift.*; import com.facebook.swift.service.ThriftClientManager; import com.facebook.nifty.client.FramedClientConnector; import com.google.common.net.HostAndPort; import org.apache.thrift.transport.TTransportException; import java.io.IOException; import java.io.ByteArrayOutputStream; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Properties; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.apache.commons.codec.binary.Hex; import static com.google.common.net.HostAndPort.fromParts; /* * This file implements Linkbench methods for loading/requesting data to rocksDb * database server by calling thrift apis after creating a java thrift client * through swift : rocksClient for the link and the node operations. */ public class LinkStoreRocksDb extends GraphStore { private static final ThriftClientManager clientManager = new ThriftClientManager(); private ThreadLocal rocksClient = new ThreadLocal(); /* RocksDb database server configuration keys */ public static final String CONFIG_HOST = "host"; public static final String CONFIG_PORT = "port"; public static final String CONFIG_WRITE_SYNC = "write_options_sync"; public static final String CONFIG_WRITE_DISABLE_WAL = "write_options_disableWAL"; public static final String CONFIG_USER = "user"; public static final String CONFIG_PASSWORD = "password"; public static final int DEFAULT_BULKINSERT_SIZE = 1024; private static final boolean INTERNAL_TESTING = false; private static int totalThreads = 0; String host; int port; WriteOptions writeOptions; String user; String pwd; Level debuglevel; int bulkInsertSize = DEFAULT_BULKINSERT_SIZE; private final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); private RocksService getRocksClient() throws Exception { if (rocksClient.get() == null) { try { rocksClient.set(clientManager.createClient( new FramedClientConnector(fromParts(host, port)), RocksService.class).get()); logger.info("Opened Rocksdb connection to " + host + ":" + port); } catch (Exception e) { logger.error("Error in open rocksdb to " + host + ":" + port + " " + e); throw e; } } return rocksClient.get(); } static synchronized void incrThreads() { totalThreads++; } static synchronized boolean isLastThread() { if (--totalThreads == 0) { return true; } else { return false; } } @Override public void close() { try { if (!isLastThread()) { return; } if (clientManager != null) { clientManager.close(); } } catch (Exception ioex) { logger.error("Error while closing client connection: " + ioex); } } @Override public void initialize(Properties p, Phase currentPhase, int threadId) throws IOException, Exception { incrThreads(); host = ConfigUtil.getPropertyRequired(p, CONFIG_HOST); port = ConfigUtil.getInt(p, CONFIG_PORT); writeOptions = new WriteOptions(); writeOptions.setSync(ConfigUtil.getBool(p, CONFIG_WRITE_SYNC, false)); writeOptions.setDisableWAL( ConfigUtil.getBool(p, CONFIG_WRITE_DISABLE_WAL, false)); debuglevel = ConfigUtil.getDebugLevel(p); } public LinkStoreRocksDb() { super(); } public LinkStoreRocksDb(Properties props) throws IOException, Exception { super(); initialize(props, Phase.LOAD, 0); } public void clearErrors(int threadID) { logger.warn("Closing Rocksdb connection in threadID " + threadID); try { if (rocksClient.get() != null) { rocksClient.get().close(); rocksClient.remove(); } } catch (Throwable e) { logger.error("Error in Reopen!" + e); e.printStackTrace(); } } @Override public boolean addLink(String dbid, Link l, boolean noinverse) throws Exception{ try { return addLinkImpl(dbid, l, noinverse); } catch (Exception ex) { logger.error("addlink failed! " + ex); throw ex; } } private boolean addLinkImpl(String dbid, Link l, boolean noinverse) throws Exception { if (Level.DEBUG.isGreaterOrEqual(debuglevel)) { logger.debug("addLink " + l.id1 + "." + l.id2 + "." + l.link_type); } AssocVisibility av = AssocVisibility.values()[l.visibility]; String s = "wormhole..."; dbid += "assocs"; long result = getRocksClient().TaoAssocPut( dbid.getBytes(), l.link_type, l.id1, l.id2, l.time, av, true, Long.valueOf(l.version), l.data, s.getBytes(), writeOptions); return result == 1; } /** * Internal method: add links without updating the count */ private boolean addLinksNoCount(String dbid, List links) throws Exception { if (links.size() == 0) return false; dbid += "assocs"; for (Link l:links) { AssocVisibility av = AssocVisibility.values()[l.visibility]; String s = "wormhole..."; long result = getRocksClient().TaoAssocPut(dbid.getBytes(), l.link_type, l.id1, l.id2, l.time, av, false, Long.valueOf(l.version), l.data, s.getBytes(), writeOptions); } return true; } @Override public boolean deleteLink(String dbid, long id1, long link_type, long id2, boolean noinverse, boolean expunge) throws Exception { try { return deleteLinkImpl(dbid, id1, link_type, id2, noinverse, expunge); } catch (Exception ex) { logger.error("deletelink failed! " + ex); throw ex; } } private boolean deleteLinkImpl(String dbid, long id1, long link_type, long id2, boolean noinverse, boolean expunge) throws Exception { if (Level.DEBUG.isGreaterOrEqual(debuglevel)) { logger.debug("deleteLink " + id1 + "." + id2 + "." + link_type); } String s = "wormhole..."; dbid += "assocs"; long result = getRocksClient().TaoAssocDelete( dbid.getBytes() , link_type, id1, id2, -1 /*version ignored*/, AssocVisibility.HARD_DELETE, true, s.getBytes(), writeOptions); return result == 1; } @Override public boolean updateLink(String dbid, Link l, boolean noinverse) throws Exception { // Retry logic is in addLink boolean added = addLink(dbid, l, noinverse); return !added; // return true if updated instead of added } // lookup using id1, type, id2 @Override public Link getLink(String dbid, long id1, long link_type, long id2) throws Exception { try { return getLinkImpl(dbid, id1, link_type, id2); } catch (Exception ex) { logger.error("getLink failed! " + ex); throw ex; } } private Link getLinkImpl(String dbid, long id1, long link_type, long id2) throws Exception { Link res[] = multigetLinks(dbid, id1, link_type, new long[] {id2}); if (res == null) return null; assert(res.length <= 1); return res.length == 0 ? null : res[0]; } @Override public Link[] multigetLinks(String dbid, long id1, long link_type, long[] id2s) throws Exception { try { return multigetLinksImpl(dbid, id1, link_type, id2s); } catch (Exception ex) { logger.error("multigetlinks failed! " + ex); throw ex; } } private Link[] multigetLinksImpl(String dbid, long id1, long link_type, long[] id2s) throws Exception { List l = new ArrayList(); for (int i = 0; i < id2s.length; i++) { l.add(new Long(id2s[i])); } dbid += "assocs"; List tr = getRocksClient().TaoAssocGetID2s( dbid.getBytes(), link_type, id1, l); Link results[] = new Link[tr.size()]; int i = 0; for (TaoAssocGetEntry tar : tr) { results[i] = new Link(id1, link_type, tar.getId2(), LinkStore.VISIBILITY_DEFAULT, tar.getData(), (int)(tar.getVersion()), tar.getTime()); } return results; } // lookup using just id1, type @Override public Link[] getLinkList(String dbid, long id1, long link_type) throws Exception { return getLinkListImpl( dbid, id1, link_type, 0, Long.MAX_VALUE, 0, rangeLimit); } @Override public Link[] getLinkList(String dbid, long id1, long link_type, long minTimestamp, long maxTimestamp, int offset, int limit) throws Exception { try { return getLinkListImpl(dbid, id1, link_type, minTimestamp, maxTimestamp, offset, limit); } catch (Exception ex) { logger.error("getLinkList failed! " + ex); throw ex; } } private Link[] getLinkListImpl(String dbid, long id1, long link_type, long minTimestamp, long maxTimestamp, int offset, int limit) throws Exception { dbid += "assocs"; List tr = getRocksClient().TaoAssocGetTimeRange( dbid.getBytes(), link_type, id1, minTimestamp, maxTimestamp, Long.valueOf(offset), Long.valueOf(limit)); Link results[] = new Link[tr.size()]; int i = 0; for (TaoAssocGetEntry tar : tr) { results[i] = new Link(id1, link_type, tar.getId2(), LinkStore.VISIBILITY_DEFAULT, tar.getData(), (int)(tar.getVersion()), tar.getTime()); i++; } return results; } // count the #links @Override public long countLinks(String dbid, long id1, long link_type) throws Exception { try { return countLinksImpl(dbid, id1, link_type); } catch (Exception ex) { logger.error("countLinks failed! " + ex); throw ex; } } private long countLinksImpl(String dbid, long id1, long link_type) throws Exception { dbid += "assocs"; long count = getRocksClient().TaoAssocCount( dbid.getBytes(), link_type, id1); if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("Count result: " + id1 + "," + link_type + " is " + count); } return count; } @Override public int bulkLoadBatchSize() { return bulkInsertSize; } @Override public void addBulkLinks(String dbid, List links, boolean noinverse) throws Exception { try { addBulkLinksImpl(dbid, links, noinverse); } catch (Exception ex) { logger.error("addBulkLinks failed! " + ex); throw ex; } } private void addBulkLinksImpl(String dbid, List links, boolean noinverse) throws Exception { if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("addBulkLinks: " + links.size() + " links"); } addLinksNoCount(dbid, links); } @Override public void addBulkCounts(String dbid, List counts) throws Exception { try { addBulkCountsImpl(dbid, counts); } catch (Exception ex) { logger.error("addbulkCounts failed! " + ex); throw ex; } } private void addBulkCountsImpl(String dbid, List counts) throws Exception { if (Level.TRACE.isGreaterOrEqual(debuglevel)) { logger.trace("addBulkCounts: " + counts.size() + " link counts"); } if (counts.size() == 0) return; dbid += "assocs"; for (LinkCount count: counts) { getRocksClient().TaoAssocCountPut( dbid.getBytes(), count.link_type, count.id1, count.count, null, writeOptions); } } @Override public void resetNodeStore(String dbid, long startID) throws Exception { //doesn't have a defined utility for Rocksdb } @Override public long addNode(String dbid, Node node) throws Exception { try { return addNodeImpl(dbid, node); } catch (Exception ex) { logger.error("addNode failed! " + ex); throw ex; } } private long addNodeImpl(String dbid, Node node) throws Exception { long ids[] = bulkAddNodes(dbid, Collections.singletonList(node)); assert(ids.length == 1); return ids[0]; } @Override public long[] bulkAddNodes(String dbid, List nodes) throws Exception { try { return bulkAddNodesImpl(dbid, nodes); } catch (Exception ex) { logger.error("bulkAddNodes failed! " + ex); throw ex; } } private long[] bulkAddNodesImpl(String dbid, List nodes) throws Exception { long newIds[] = new long[nodes.size()]; int i = 0; for (Node n : nodes) { getRocksClient().TaoFBObjectPut( n.id, n.type, (int) n.version, (int) n.version, (long) n.time, n.data, true, null, writeOptions); newIds[i++] = n.id; } return newIds; } @Override public Node getNode(String dbid, int type, long id) throws Exception { try { return getNodeImpl(dbid, type, id); } catch (Exception ex) { logger.error("getnode failed! " + ex); throw ex; } } private Node getNodeImpl(String dbid, int type, long id) throws Exception { ReadOptions ropts = new ReadOptions(); ropts.setVerifyChecksums(true); ropts.setFillCache(true); TaoFBObjectGetResult rgr = getRocksClient().TaoFBObjectGet(id, type); if (!rgr.isFound()) { return null; //Node was not found } else { return new Node( id, type, rgr.getVersion(), (int) rgr.getUpdateTime(), rgr.getData()); } } @Override public boolean updateNode(String dbid, Node node) throws Exception { try { return updateNodeImpl(dbid, node); } catch (Exception ex) { logger.error("updateNode failed! " + ex); throw ex; } } private boolean updateNodeImpl(String dbid, Node node) throws Exception { return addNode(dbid, node) == 1; } @Override public boolean deleteNode(String dbid, int type, long id) throws Exception { try { return deleteNodeImpl(dbid, type, id); } catch (Exception ex) { logger.error("deleteNode failed! " + ex); throw ex; } } private boolean deleteNodeImpl(String dbid, int type, long id) throws Exception { getRocksClient(). TaoFBObjectDel(id, type, null, writeOptions); return true; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/MemoryLinkStore.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.Comparator; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Properties; import java.util.SortedSet; import java.util.TreeSet; /** * Simple in-memory implementation of GraphStore * Not efficient or optimized at all, just for testing purposes. * * MemoryLinkStore instances sharing the same data can be created * using the newInstance() method. * MemoryLinkStore can be accessed concurrently from multiple threads, * but a simple mutex is used so there is no internal concurrency (requests * are serialized) */ public class MemoryLinkStore extends GraphStore { private static class LinkLookupKey { final long id1; final long link_type; public LinkLookupKey(long id1, long link_type) { super(); this.id1 = id1; this.link_type = link_type; } @Override public boolean equals(Object other) { if (!(other instanceof LinkLookupKey)) { return false; } LinkLookupKey other2 = (LinkLookupKey)other; return id1 == other2.id1 && link_type == other2.link_type; } @Override public int hashCode() { return Long.valueOf(id1).hashCode() ^ Long.valueOf(link_type).hashCode(); } } /** Order links from most to least recent */ private static class LinkTimeStampComparator implements Comparator { @Override public int compare(Link l1, Link l2) { // ascending order of id1 if (l1.id1 != l2.id1) { if (l1.id1 < l2.id2) { return -1; } else { return 1; } } if (l1.time != l2.time) { // descending order of time if (l1.time < l2.time) { return 1; } else { return -1; } } // ascending order of id2 if (l1.id2 == l2.id2) { return 0; } else if (l1.id2 < l2.id2) { return -1; } else { return 1; } } } /** * Class for allocating IDs and storing objects */ private static class NodeDB { private long nextID; // Next id to allocate Map data = new HashMap(); /** Construct a new instance allocating ids from 1 */ NodeDB() { this(1); } NodeDB(long startID) { this.nextID = startID; } long allocateID() { return nextID++; } void reset(long startID) { nextID = startID; data.clear(); } } /** Simple implementation of LinkStore with nested maps and a set of * links sorted by timestamp: * dbid -> (id1, assoc_type) -> links */ private final Map>> linkdbs; private final Map nodedbs; /** * Storage for objects */ private static final Comparator LINK_COMPARATOR = new LinkTimeStampComparator(); /** * Create a new MemoryLinkStore instance with fresh data */ public MemoryLinkStore() { this(new HashMap>>(), new HashMap()); } /** * Create a new MemoryLinkStore handle sharing data with existing instance */ private MemoryLinkStore(Map>> linkdbs, Map nodedbs) { this.linkdbs = linkdbs; this.nodedbs = nodedbs; } /** * Find a list of links based on * @param dbid * @param id1 * @param link_type * @param createPath if true, create nested structures. If false, return * null if map for (dbid, id, link_type) does not exist and * do not modify the structure; * @return */ private SortedSet findLinkByKey(String dbid, long id1, long link_type, boolean createPath) { Map> db = linkdbs.get(dbid); if (db == null) { if (createPath) { // Autocreate db db = new HashMap>(); linkdbs.put(dbid, db); } else { return null; } } LinkLookupKey key = new LinkLookupKey(id1, link_type); SortedSet links = db.get(key); if (links == null) { if (createPath) { links = newSortedLinkSet(); db.put(key, links); } else { return null; } } return links; } private TreeSet newSortedLinkSet() { return new TreeSet(LINK_COMPARATOR); } /** Create a new MemoryLinkStore sharing the same data structures as * this one */ public MemoryLinkStore newHandle() { return new MemoryLinkStore(linkdbs, nodedbs); } @Override public void initialize(Properties p, Phase currentPhase, int threadId) throws IOException, Exception { } @Override public void close() { } @Override public void clearErrors(int threadID) { } @Override public boolean addLink(String dbid, Link a, boolean noinverse) throws Exception { synchronized (linkdbs) { SortedSet links = findLinkByKey(dbid, a.id1, a.link_type, true); boolean exists = false; // Check for duplicates Iterator it = links.iterator(); while (it.hasNext()) { Link existing = it.next(); if (existing.id2 == a.id2) { it.remove(); exists = true; } } // Clone argument before inserting links.add(a.clone()); /*System.err.println(String.format("added link (%d, %d, %d), %d in list", a.id1, a.link_type, a.id2, links.size()));*/ return !exists; } } @Override public boolean deleteLink(String dbid, long id1, long link_type, long id2, boolean noinverse, boolean expunge) throws Exception { synchronized (linkdbs) { //NOTE: does not reclaim space from unused structures SortedSet linkSet = findLinkByKey(dbid, id1, link_type, false); if (linkSet != null) { Iterator it = linkSet.iterator(); while (it.hasNext()) { Link l = it.next(); if (l.id2 == id2) { if (!expunge) { l.visibility = VISIBILITY_HIDDEN; } else { it.remove(); } return true; // found it! } } } } return false; } @Override public boolean updateLink(String dbid, Link a, boolean noinverse) throws Exception { synchronized (linkdbs) { SortedSet linkSet = findLinkByKey(dbid, a.id1, a.link_type, false); if (linkSet != null) { Iterator it = linkSet.iterator(); while (it.hasNext()) { Link l = it.next(); if (l.id2 == a.id2) { it.remove(); linkSet.add(a.clone()); return true; } } } // Throw error if updating non-existing link throw new Exception(String.format("Link not found: (%d, %d, %d)", a.id1, a.link_type, a.id2)); } } @Override public Link getLink(String dbid, long id1, long link_type, long id2) throws Exception { synchronized (linkdbs) { SortedSet linkSet = findLinkByKey(dbid, id1, link_type, false); if (linkSet != null) { for (Link l: linkSet) { if (l.id2 == id2) { return l.clone(); } } } return null; } } @Override public Link[] getLinkList(String dbid, long id1, long link_type) throws Exception { return getLinkList(dbid, id1, link_type, 0, Long.MAX_VALUE, 0, rangeLimit); } @Override public Link[] getLinkList(String dbid, long id1, long link_type, long minTimestamp, long maxTimestamp, int offset, int limit) throws Exception { int skipped = 0; // used for offset synchronized (linkdbs) { SortedSet linkSet = findLinkByKey(dbid, id1, link_type, false); if (linkSet == null || linkSet.size() == 0) { return null; } else { // Do a first pass to find size of result array int matching = 0; for (Link l: linkSet) { if (l.visibility == VISIBILITY_DEFAULT && l.time >= minTimestamp && l.time <= maxTimestamp) { if (skipped < offset) { skipped++; continue; } matching++; if (matching >= limit) { break; } } } if (matching == 0) { return null; } Link res[] = new Link[matching]; // Iterate in desc order of timestamp, break ties by id2 int i = 0; skipped = 0; for (Link l: linkSet) { if (l.visibility == VISIBILITY_DEFAULT && l.time >= minTimestamp && l.time <= maxTimestamp) { if (skipped < offset) { skipped++; continue; } res[i++] = l; if (i >= limit) { break; } } } return res; } } } @Override public long countLinks(String dbid, long id1, long link_type) throws Exception { synchronized(linkdbs) { SortedSet linkSet = findLinkByKey(dbid, id1, link_type, false); if (linkSet == null) { return 0; } else { // Count the number of visible links int visible = 0; for (Link l: linkSet) { if (l.visibility == VISIBILITY_DEFAULT) { visible++; } } /*System.err.println( String.format("Lookup (%d, %d): %d visible, %d total", id1, link_type, visible, linkSet.size()));*/ return visible; } } } /** * Should be called with lock on nodedbs held * @param dbid * @param autocreate * @return */ private NodeDB getNodeDB(String dbid, boolean autocreate) throws Exception { NodeDB db = nodedbs.get(dbid); if (db == null) { if (autocreate) { db = new NodeDB(); nodedbs.put(dbid, db); } else { /* Not initialized.. can't autocreate since we don't know the desired * start ID */ throw new Exception("dbid " + dbid + " was not initialized"); } } return db; } @Override public void resetNodeStore(String dbid, long startID) { synchronized(nodedbs) { NodeDB db = nodedbs.get(dbid); if (db == null) { nodedbs.put(dbid, new NodeDB(startID)); } else { db.reset(startID); } } } @Override public long addNode(String dbid, Node node) throws Exception { synchronized(nodedbs) { NodeDB db = getNodeDB(dbid, false); long newId = db.allocateID(); // Put copy of node in map Node inserted = node.clone(); inserted.id = newId; Node prev = db.data.put(newId, inserted); if (prev != null) { throw new Exception("Internal error: node " + prev.toString() + " already existing in dbid " + dbid); } return newId; } } @Override public Node getNode(String dbid, int type, long id) throws Exception { synchronized(nodedbs) { NodeDB db = getNodeDB(dbid, false); Node n = db.data.get(id); if (n == null || n.type != type) { // Shouldn't return lookup on type mismatch return null; } else { return n.clone(); // return copy } } } @Override public boolean updateNode(String dbid, Node node) throws Exception { synchronized(nodedbs) { NodeDB db = getNodeDB(dbid, false); Node n = db.data.get(node.id); if (n == null || n.type != node.type) { // don't update on type mismatch return false; } else { // Store copy db.data.put(node.id, node.clone()); return true; } } } @Override public boolean deleteNode(String dbid, int type, long id) throws Exception { synchronized(nodedbs) { NodeDB db = getNodeDB(dbid, false); Node n = db.data.get(id); if (n == null || n.type != type) { // don't delete on type mismatch return false; } else { db.data.remove(id); return true; } } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/Node.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Arrays; /** * Object node in social graph * @author tarmstrong */ public class Node { /** Unique identifier for node */ public long id; /** Type of node */ public int type; /** Version of node: typically updated on every change */ public long version; /** Last update time of node as UNIX timestamp */ public int time; /** Arbitrary payload data */ public byte data[]; public Node(long id, int type, long version, int time, byte data[]) { super(); this.id = id; this.type = type; this.version = version; this.time = time; this.data = data; } public Node clone() { return new Node(id, type, version, time, data); } @Override public boolean equals(Object other) { if (!(other instanceof Node)) { return false; } Node o = (Node) other; return id == o.id && type == o.type && version == o.version && time == o.time && Arrays.equals(data, o.data); } public String toString() { return "Node(" + "id=" + id + ",type=" + type + ",version=" + version + "," + "timestamp=" + time + ",data=" + Arrays.toString(data) + ")"; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/NodeLoader.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.PrintStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Properties; import java.util.Random; import org.apache.log4j.Level; import org.apache.log4j.Logger; import com.facebook.LinkBench.distributions.LogNormalDistribution; import com.facebook.LinkBench.generators.DataGenerator; import com.facebook.LinkBench.stats.LatencyStats; import com.facebook.LinkBench.stats.SampledStats; import com.facebook.LinkBench.util.ClassLoadUtil; /** * Load class for generating node data * * This is separate from link loading because we can't have multiple parallel * loaders loading nodes, as the order of IDs being assigned would be messed up * @author tarmstrong */ public class NodeLoader implements Runnable { private static final long REPORT_INTERVAL = 25000; private final Properties props; private final Logger logger; private final NodeStore nodeStore; private final Random rng; private final String dbid; // Data generation settings private final DataGenerator nodeDataGen; private final LogNormalDistribution nodeDataLength; private final Level debuglevel; private final int loaderId; private final SampledStats stats; private final LatencyStats latencyStats; private long startTime_ms; private long nodesLoaded = 0; private long totalNodes = 0; /** Next node count to report on */ private long nextReport = 0; /** Last time stat update displayed */ private long lastDisplayTime_ms; /** How often to display stat updates */ private final long displayFreq_ms; public NodeLoader(Properties props, Logger logger, NodeStore nodeStore, Random rng, LatencyStats latencyStats, PrintStream csvStreamOut, int loaderId) { super(); this.props = props; this.logger = logger; this.nodeStore = nodeStore; this.rng = rng; this.latencyStats = latencyStats; this.loaderId = loaderId; double medianDataLength = ConfigUtil.getDouble(props, Config.NODE_DATASIZE); nodeDataLength = new LogNormalDistribution(); nodeDataLength.init(0, NodeStore.MAX_NODE_DATA, medianDataLength, Config.NODE_DATASIZE_SIGMA); try { nodeDataGen = ClassLoadUtil.newInstance( ConfigUtil.getPropertyRequired(props, Config.NODE_ADD_DATAGEN), DataGenerator.class); nodeDataGen.init(props, Config.NODE_ADD_DATAGEN_PREFIX); } catch (ClassNotFoundException ex) { logger.error(ex); throw new LinkBenchConfigError("Error loading data generator class: " + ex.getMessage()); } debuglevel = ConfigUtil.getDebugLevel(props); dbid = ConfigUtil.getPropertyRequired(props, Config.DBID); displayFreq_ms = ConfigUtil.getLong(props, Config.DISPLAY_FREQ) * 1000; int maxsamples = ConfigUtil.getInt(props, Config.MAX_STAT_SAMPLES); this.stats = new SampledStats(loaderId, maxsamples, csvStreamOut); } @Override public void run() { logger.info("Starting loader thread #" + loaderId + " loading nodes"); try { this.nodeStore.initialize(props, Phase.LOAD, loaderId); } catch (Exception e) { logger.error("Error while initializing store", e); throw new RuntimeException(e); } try { // Set up ids to start at desired range nodeStore.resetNodeStore(dbid, ConfigUtil.getLong(props, Config.MIN_ID)); } catch (Exception e) { logger.error("Error while resetting IDs, cannot proceed with " + "node loading", e); return; } int bulkLoadBatchSize = nodeStore.bulkLoadBatchSize(); ArrayList nodeLoadBuffer = new ArrayList(bulkLoadBatchSize); long maxId = ConfigUtil.getLong(props, Config.MAX_ID); long startId = ConfigUtil.getLong(props, Config.MIN_ID); totalNodes = maxId - startId; nextReport = startId + REPORT_INTERVAL; startTime_ms = System.currentTimeMillis(); lastDisplayTime_ms = startTime_ms; for (long id = startId; id < maxId; id++) { genNode(rng, id, nodeLoadBuffer, bulkLoadBatchSize); long now = System.currentTimeMillis(); if (lastDisplayTime_ms + displayFreq_ms <= now) { displayAndResetStats(); } } // Load any remaining data loadNodes(nodeLoadBuffer); logger.info("Loading of nodes [" + startId + "," + maxId + ") done"); displayAndResetStats(); nodeStore.close(); } private void displayAndResetStats() { long now = System.currentTimeMillis(); stats.displayStats(lastDisplayTime_ms, now, Arrays.asList(LinkBenchOp.LOAD_NODE_BULK)); stats.resetSamples(); lastDisplayTime_ms = now; } /** * Create and insert the node into the DB * @param rng * @param id1 */ private void genNode(Random rng, long id1, ArrayList nodeLoadBuffer, int bulkLoadBatchSize) { int dataLength = (int)nodeDataLength.choose(rng); Node node = new Node(id1, LinkStore.DEFAULT_NODE_TYPE, 1, (int)(System.currentTimeMillis()/1000), nodeDataGen.fill(rng, new byte[dataLength])); nodeLoadBuffer.add(node); if (nodeLoadBuffer.size() >= bulkLoadBatchSize) { loadNodes(nodeLoadBuffer); nodeLoadBuffer.clear(); } } private void loadNodes(ArrayList nodeLoadBuffer) { long actualIds[] = null; long timestart = System.nanoTime(); try { actualIds = nodeStore.bulkAddNodes(dbid, nodeLoadBuffer); long timetaken = (System.nanoTime() - timestart); nodesLoaded += nodeLoadBuffer.size(); // Check that expected ids were allocated assert(actualIds.length == nodeLoadBuffer.size()); for (int i = 0; i < actualIds.length; i++) { if (nodeLoadBuffer.get(i).id != actualIds[i]) { logger.warn("Expected ID of node: " + nodeLoadBuffer.get(i).id + " != " + actualIds[i] + " the actual ID"); } } nodeLoadBuffer.clear(); // convert to microseconds stats.addStats(LinkBenchOp.LOAD_NODE_BULK, timetaken/1000, false); latencyStats.recordLatency(loaderId, LinkBenchOp.LOAD_NODE_BULK, timetaken/1000); if (nodesLoaded >= nextReport) { double totalTimeTaken = (System.currentTimeMillis() - startTime_ms) / 1000.0; logger.debug(String.format( "Loader #%d: %d/%d nodes loaded at %f nodes/sec", loaderId, nodesLoaded, totalNodes, nodesLoaded / totalTimeTaken)); nextReport += REPORT_INTERVAL; } } catch (Throwable e){//Catch exception if any long endtime2 = System.nanoTime(); long timetaken2 = (endtime2 - timestart)/1000; logger.error("Error: " + e.getMessage(), e); stats.addStats(LinkBenchOp.LOAD_NODE_BULK, timetaken2, true); nodeStore.clearErrors(loaderId); nodeLoadBuffer.clear(); return; } } public long getNodesLoaded() { return nodesLoaded; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/NodeStore.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.List; import java.util.Properties; /** * Some implementations of NodeStore may require that each * dbid be initialized with reset before any data is written to it (so as to * ensure that the starting id is actually specified. */ public interface NodeStore { // Limit data to 1MB public static final long MAX_NODE_DATA = 1024 * 1024; /** initialize the store object */ public void initialize(Properties p, Phase currentPhase, int threadId) throws IOException, Exception; /** * Reset node storage to a clean state in shard: * deletes all stored nodes * resets id allocation, with new IDs to be allocated starting from startID */ public void resetNodeStore(String dbid, long startID) throws Exception; /** * Adds a new node object to the database. * * This allocates a new id for the object and returns i. * * The benchmark assumes that, after resetStore() is called, * node IDs are allocated in sequence, i.e. startID, startID + 1, ... * Add node should return the next ID in the sequence. * * @param dbid the db shard to put that object in * @param node a node with all data aside from id filled in. The id * field is *not* updated to the new value by this function * @return the id allocated for the node */ public long addNode(String dbid, Node node) throws Exception; /** * Bulk loading to more efficiently load nodes. * Calling this is equivalent to calling addNode multiple times. * * @param dbid * @param nodes * @return the actual IDs allocated to the nodes * @throws Exception */ public long[] bulkAddNodes(String dbid, List nodes) throws Exception; /** * Preferred size of data to load * @return */ public int bulkLoadBatchSize(); /** * Get a node of the specified type * @param dbid the db shard the id is mapped to * @param type the type of the object * @param id the id of the object * @return null if not found, a Node with all fields filled in otherwise */ public Node getNode(String dbid, int type, long id) throws Exception; /** * Update all parameters of the node specified. * @param dbid * @param node * @return true if the update was successful, false if not present */ public boolean updateNode(String dbid, Node node) throws Exception; /** * Delete the object specified by the arguments * @param dbid * @param type * @param id * @return true if the node was deleted, false if not present */ public boolean deleteNode(String dbid, int type, long id) throws Exception; public void clearErrors(int loaderId); /** * Close the node store and clean up any resources */ public void close(); } ================================================ FILE: src/main/java/com/facebook/LinkBench/Phase.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; /** * Different phases of the benchmark * */ public enum Phase { LOAD, REQUEST } ================================================ FILE: src/main/java/com/facebook/LinkBench/RealDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.File; import java.io.FileNotFoundException; import java.util.ArrayList; import java.util.InputMismatchException; import java.util.Locale; import java.util.NavigableMap; import java.util.NoSuchElementException; import java.util.Properties; import java.util.Random; import java.util.Scanner; import java.util.TreeMap; import org.apache.log4j.Logger; import com.facebook.LinkBench.distributions.PiecewiseLinearDistribution; /* * This class simulates the real distribution based on statistical data. */ public class RealDistribution extends PiecewiseLinearDistribution { /** The locale used for number formats, etc in distribution file */ private static final Locale INPUT_FILE_LOCALE = Locale.ENGLISH; public static final String DISTRIBUTION_CONFIG = "realdist"; private static final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); /* params to shuffler for link degree */ public static final long NLINKS_SHUFFLER_SEED = 20343988438726021L; public static final int NLINKS_SHUFFLER_GROUPS = 1024; /* shufflers to generate distributions uncorrelated to above */ public static final long UNCORR_SHUFFLER_SEED = 53238253823453L; public static final int UNCORR_SHUFFLER_GROUPS = 1024; /* Shufflers for requests that are correlated with link degree */ public static final long WRITE_CORR_SHUFFLER_SEED = NLINKS_SHUFFLER_SEED; public static final int WRITE_CORR_SHUFFLER_GROUPS = NLINKS_SHUFFLER_GROUPS; public static final long READ_CORR_SHUFFLER_SEED = NLINKS_SHUFFLER_SEED; public static final int READ_CORR_SHUFFLER_GROUPS = NLINKS_SHUFFLER_GROUPS; /* Shufflers for requests that are uncorrelated with link degree */ public static final long WRITE_UNCORR_SHUFFLER_SEED = UNCORR_SHUFFLER_SEED; public static final int WRITE_UNCORR_SHUFFLER_GROUPS = UNCORR_SHUFFLER_GROUPS; public static final long READ_UNCORR_SHUFFLER_SEED = UNCORR_SHUFFLER_SEED; public static final int READ_UNCORR_SHUFFLER_GROUPS = UNCORR_SHUFFLER_GROUPS; public static final long NODE_READ_SHUFFLER_SEED = 4766565305853767165L; public static final int NODE_READ_SHUFFLER_GROUPS = 1024; public static final long NODE_UPDATE_SHUFFLER_SEED = NODE_READ_SHUFFLER_SEED; public static final int NODE_UPDATE_SHUFFLER_GROUPS = NODE_READ_SHUFFLER_GROUPS; public static final long NODE_DELETE_SHUFFLER_SEED = NODE_READ_SHUFFLER_SEED; public static final int NODE_DELETE_SHUFFLER_GROUPS = NODE_READ_SHUFFLER_GROUPS; public static enum DistributionType { LINKS, LINK_READS, LINK_READS_UNCORR, LINK_WRITES, LINK_WRITES_UNCORR, NODE_READS, NODE_UPDATES, NODE_DELETES, } private DistributionType type = null; public RealDistribution() { this.type = null; } @Override public void init(long min, long max, Properties props, String keyPrefix) { this.min = min; this.max = max; String dist = ConfigUtil.getPropertyRequired(props, keyPrefix + DISTRIBUTION_CONFIG); DistributionType configuredType; if (dist.equals("link_reads")) { configuredType = DistributionType.LINK_READS; } else if (dist.equals("link_writes")) { configuredType = DistributionType.LINK_WRITES; } else if (dist.equals("node_reads")) { configuredType = DistributionType.NODE_READS; } else if (dist.equals("node_writes")) { configuredType = DistributionType.NODE_UPDATES; } else if (dist.equals("links")) { configuredType = DistributionType.LINKS; } else { throw new RuntimeException("Invalid distribution type for " + "RealDistribution: " + dist); } init(props, min, max, configuredType); } /* * Initialize this with one of the empirical distribution types * This will automatically load the data file if needed */ public void init(Properties props, long min, long max, DistributionType type) { loadOneShot(props); switch (type) { case LINKS: init(min, max, nlinks_cdf, null, null, nlinks_expected_val); break; case LINK_WRITES: init(min, max, link_nwrites_cdf, nwrites_cs, nwrites_right_points, link_nwrites_expected_val); break; case LINK_READS: init(min, max, link_nreads_cdf, link_nreads_cs, link_nreads_right_points, link_nreads_expected_val); break; case NODE_UPDATES: init(min, max, node_nwrites_cdf, nwrites_cs, nwrites_right_points, node_nwrites_expected_val); break; case NODE_READS: init(min, max, node_nreads_cdf, node_nreads_cs, node_nreads_right_points, node_nreads_expected_val); break; default: throw new RuntimeException("Unknown distribution type: " + type); } } private static ArrayList nlinks_cdf, link_nreads_cdf, link_nwrites_cdf, node_nreads_cdf, node_nwrites_cdf; private static double[] link_nreads_cs, nwrites_cs, node_nreads_cs, node_nwrites_cs; /** * These right_points arrays are used to keep track of state of * the id1 generation, with each cell holding the next id to * return. These are shared between RealDistribution instances * and different threads. * * It is not clear that this works entirely as intended and it * certainly is non-deterministic when multiple threads are * involved. */ private static long[] link_nreads_right_points, nwrites_right_points, node_nreads_right_points, node_nwrites_right_points; private static double nlinks_expected_val, link_nreads_expected_val, link_nwrites_expected_val, node_nreads_expected_val, node_nwrites_expected_val; /* * This method loads data from data file into memory; * must be called before any getNlinks or getNextId1s; * must be declared as synchronized method to prevent race condition. */ public static synchronized void loadOneShot(Properties props) { if (nlinks_cdf == null) { try { getStatisticalData(props); } catch (FileNotFoundException e) { throw new RuntimeException(e); } } } /* * This method get the area below the distribution nreads_ccdf or * nwrite_ccdf. This helps to determine the number of nreads after which * the generating distribution would be approximately equal to real * distribution. * * Keep in mind the because the number of id1s is constant, the * generating #reads distribution keeps changing. It starts at "100% 0", * keeps growing and eventually at some point (after certain number of * reads) it should be equal to the real #reads distribution. * * Because the number of id1s is constant (equal to maxid1 - startid1), * the total number of reads is also a constant, according to the * following fomular: * * (number of reads) = (number of id1s) x (area below nreads_pdf) * * To illustrate, consider the following nreads_pdf distribution: * 60%=0; 20%=1; 10%=2; 10%=3; and there are 100 id1s. * * The number of reads would be a constanst: * 100 * (20% * 1 + 10% * 2 + 10% * 3) = 100 * 80%. * The multiplication factor (20% * 1 + 10% * 2 + 10% * 3) is what we * want this method to return. * * If we already have the ccdf (comlementary cumulative distribution * function): 40%>=1; 20%>=2; 10%>=3; and its cumulative sum: * [40%, 40%+20%, 40%+20%+10%] = [40%, 60%, 80%], then just need to * return the last cumulative sum (80%). */ static double getArea(DistributionType type) { if (type == DistributionType.LINK_READS) return link_nreads_cs[link_nreads_cs.length - 1]; else if (type == DistributionType.LINK_WRITES) return nwrites_cs[nwrites_cs.length - 1]; else return 0; } //helper function: private static ArrayList readCDF(String filePath, Scanner scanner) { ArrayList points = new ArrayList(); while (scanner.hasNextInt()) { int value = scanner.nextInt(); // File on disk has percentages try { double percent = scanner.nextDouble(); double probability = percent / 100; Point temp = new Point(value, probability); points.add(temp); } catch (InputMismatchException ex) { throw new LinkBenchConfigError("Expected to find floating point " + "value in input file" + filePath + " but found token \"" + scanner.next() + "\""); } catch (NoSuchElementException ex) { throw new LinkBenchConfigError("Expected to find floating point " + "value in input file" + filePath + " but found end of file"); } } return points; } //convert CDF from ArrayList to Map static NavigableMap getCDF(DistributionType dist) { ArrayList points = dist == DistributionType.LINKS ? nlinks_cdf : dist == DistributionType.LINK_READS? link_nreads_cdf : dist == DistributionType.LINK_WRITES ? link_nwrites_cdf : dist == DistributionType.NODE_READS ? node_nreads_cdf : dist == DistributionType.NODE_UPDATES ? node_nwrites_cdf : null; if (points == null) return null; TreeMap map = new TreeMap(); for (Point point : points) { map.put(point.value, point.probability); } return map; } /* * This method reads from data_file nlinks, nreads, nwrites discreate * cumulative distribution function (CDF) and produces corresponding * pdf and ccdf. * * The data file is generated by LinkBenchConfigGenerator, and can be * located by parameter data_file in the config file. * * CDF is returned under the form of an array whose elements are pairs of * value and the cumulative distribution at that value i.e. . */ private static void getStatisticalData(Properties props) throws FileNotFoundException { String filename = ConfigUtil.getPropertyRequired(props, Config.DISTRIBUTION_DATA_FILE); // If relative path, should be relative to linkbench home directory String fileAbsPath; if (new File(filename).isAbsolute()) { fileAbsPath = filename; } else { String linkBenchHome = ConfigUtil.findLinkBenchHome(); if (linkBenchHome == null) { throw new RuntimeException("Data file config property " + Config.DISTRIBUTION_DATA_FILE + " was specified using a relative path, but linkbench home" + " directory was not specified through environment var " + ConfigUtil.linkbenchHomeEnvVar); } else { fileAbsPath = linkBenchHome + File.separator + filename; } } logger.info("Loading real distribution data from " + fileAbsPath); Scanner scanner = new Scanner(new File(fileAbsPath)); scanner.useLocale(INPUT_FILE_LOCALE); while (scanner.hasNext()) { String type = scanner.next(); if (type.equals("nlinks")) { nlinks_cdf = readCDF(fileAbsPath, scanner); nlinks_expected_val = expectedValue(nlinks_cdf); } else if (type.equals("link_nreads")) { link_nreads_cdf = readCDF(fileAbsPath, scanner); double[] nreads_pdf = getPDF(link_nreads_cdf); double[] nreads_ccdf = getCCDF(nreads_pdf); link_nreads_cs = getCumulativeSum(nreads_ccdf); link_nreads_right_points = new long[link_nreads_cs.length]; for (int i = 0; i < link_nreads_right_points.length; ++i) { link_nreads_right_points[i] = 0; } link_nreads_expected_val = expectedValue(link_nreads_cdf); } else if (type.equals("link_nwrites")) { link_nwrites_cdf = readCDF(fileAbsPath, scanner); double[] nwrites_pdf = getPDF(link_nwrites_cdf); double[] nwrites_ccdf = getCCDF(nwrites_pdf); nwrites_cs = getCumulativeSum(nwrites_ccdf); nwrites_right_points = new long[nwrites_cs.length]; for (int i = 0; i < nwrites_right_points.length; ++i) { nwrites_right_points[i] = 0; } link_nwrites_expected_val = expectedValue(link_nwrites_cdf); } else if (type.equals("node_nreads")) { node_nreads_cdf = readCDF(fileAbsPath, scanner); double[] node_nreads_pdf = getPDF(node_nreads_cdf); double[] node_nreads_ccdf = getCCDF(node_nreads_pdf); node_nreads_cs = getCumulativeSum(node_nreads_ccdf); node_nreads_right_points = new long[node_nreads_cs.length]; for (int i = 0; i < node_nreads_right_points.length; ++i) { node_nreads_right_points[i] = 0; } node_nreads_expected_val = expectedValue(node_nreads_cdf); } else if (type.equals("node_nwrites")) { node_nwrites_cdf = readCDF(fileAbsPath, scanner); double[] node_nwrites_pdf = getPDF(node_nwrites_cdf); double[] node_nwrites_ccdf = getCCDF(node_nwrites_pdf); node_nwrites_cs = getCumulativeSum(node_nwrites_ccdf); node_nwrites_right_points = new long[node_nwrites_cs.length]; for (int i = 0; i < node_nwrites_right_points.length; ++i) { node_nwrites_right_points[i] = 0; } node_nwrites_expected_val = expectedValue(node_nwrites_cdf); } else { throw new RuntimeException("Unexpected token in distribution file, " + "expected name of next distribution: \"" + type + "\""); } } } static long getNlinks(long id1, long startid1, long maxid1) { // simple workload balancing return (long)expectedCount(startid1, maxid1, id1, nlinks_cdf); } @Override public long choose(Random rng) { if (type == DistributionType.LINKS) { throw new RuntimeException("choose not supported for LINKS"); } return super.choose(rng); } public static InvertibleShuffler getShuffler(DistributionType type, long n) { switch (type) { case LINK_READS: return new InvertibleShuffler(READ_CORR_SHUFFLER_SEED, READ_CORR_SHUFFLER_GROUPS, n); case LINK_READS_UNCORR: return new InvertibleShuffler(READ_UNCORR_SHUFFLER_SEED, READ_UNCORR_SHUFFLER_GROUPS, n); case LINK_WRITES: return new InvertibleShuffler(WRITE_CORR_SHUFFLER_SEED, WRITE_CORR_SHUFFLER_GROUPS, n); case LINK_WRITES_UNCORR: return new InvertibleShuffler(WRITE_UNCORR_SHUFFLER_SEED, WRITE_UNCORR_SHUFFLER_GROUPS, n); case NODE_READS: return new InvertibleShuffler(NODE_READ_SHUFFLER_SEED, NODE_READ_SHUFFLER_GROUPS, n); case NODE_UPDATES: return new InvertibleShuffler(NODE_UPDATE_SHUFFLER_SEED, NODE_UPDATE_SHUFFLER_GROUPS, n); case NODE_DELETES: return new InvertibleShuffler(NODE_DELETE_SHUFFLER_SEED, NODE_DELETE_SHUFFLER_GROUPS, n); case LINKS: return new InvertibleShuffler(NLINKS_SHUFFLER_SEED, NLINKS_SHUFFLER_GROUPS, n); default: return null; } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/Shuffler.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; /* * A class to generate permutation of 0, 1, 2, ..(N-1) using O(1) memory. */ class Shuffler { /* Example to show how algorithm works: n=9; m=4. * * Starting with the original a = {0, 1, ..n}, apply the following * transformation to generate a permutation of a: * * 1. Divide 0..9 into multiple groups, each group has length m=4 * a = 0 1 2 3|4 5 6 7|8 9 * * 2. Move elements with same position in each group together * M(a) = 0 4 8|1 5 9|2 6|3 7 * * 3. T(a) = position of i in the permutation in M(a) * T(a) = {0 3 6 8 1 4 7 9 2 5} */ //get T(a)[i] static long getPermutationValue(long i, long n, long m) { long minsize = n/m; long maxsize = (n%m == 0) ? minsize : minsize + 1; long n_maxsize_groups = n%m; long newgroupid = i%m, newidx = i/m; if (newgroupid < n_maxsize_groups) { return newgroupid*maxsize + newidx; } else { return n_maxsize_groups*maxsize + (newgroupid - n_maxsize_groups)*minsize + newidx; } } static long getPermutationValue(long i, long start, long end, long m) { return start + getPermutationValue(i - start, end - start, m); } //multiplication of transformations //apply multiple transformation T to make a more random permutation static long getPermutationValue(long i, long start, long end, long[] ms) { for (int j = 0; j < ms.length; ++j) { i = getPermutationValue(i, start, end, ms[j]); } return i; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/Timer.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Random; public class Timer { /** * Wait an amount of time since the last event determined by the * exponential distribution * @param rng random number generator to use * @param lastevent_time_ns last event time (units same as System.nanoTime()) * @param arrival_rate_ns arrival rate: events per nanosecond * @return time of the next event */ public static long waitExpInterval(Random rng, long lasteventTime_ns, double arrivalRate_ns) { long nextTime_ns = lasteventTime_ns + Math.round(-1 * Math.log(rng.nextDouble()) / arrivalRate_ns); Timer.waitUntil(nextTime_ns); return nextTime_ns; } /** * Wait until System.nanoTime() is > the argument * @param time_ns */ public static void waitUntil(long time_ns) { long now = System.nanoTime(); while (now < time_ns) { long wait = time_ns - now; try { Thread.sleep(wait / 1000000, (int)(wait % 1000)); } catch (InterruptedException ie) { // Restart loop } now = System.nanoTime(); } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/AccessDistributions.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.Properties; import java.util.Random; import org.apache.log4j.Logger; import com.facebook.LinkBench.Config; import com.facebook.LinkBench.ConfigUtil; import com.facebook.LinkBench.InvertibleShuffler; import com.facebook.LinkBench.LinkBenchConfigError; import com.facebook.LinkBench.RealDistribution; import com.facebook.LinkBench.RealDistribution.DistributionType; import com.facebook.LinkBench.util.ClassLoadUtil; /** * Module for id access patterns that allows different implementations * of the AccessDistribution interface to be instantiated for configurable * access patterns. * @author tarmstrong * */ public class AccessDistributions { public interface AccessDistribution { /** * Choose the next id to be accessed * @param rng random number generator * @param previousId previous ID (for stateful generators) * @return */ public abstract long nextID(Random rng, long previousId); /** * A shuffler to shuffle the results, or * null if the results shouldn't be shuffled * @return */ public abstract InvertibleShuffler getShuffler(); } public static class BuiltinAccessDistribution implements AccessDistribution { private AccessDistMode mode; protected long minid; protected long maxid; private long config; /** Use to generate decent quality random longs in range */ UniformDistribution uniform; public BuiltinAccessDistribution(AccessDistMode mode, long minid, long maxid, long config) { this.mode = mode; this.minid = minid; this.maxid = maxid; this.config = config; uniform = new UniformDistribution(); uniform.init(minid, maxid, null, null); } @Override public long nextID(Random rng, long previousid) { long newid; double drange = (double)(maxid - minid); switch(mode) { case ROUND_ROBIN: //sequential from startid1 to maxid1 (circular) if (previousid <= minid) { newid = minid; } else { newid = previousid+1; if (newid >= maxid) { newid = minid; } } break; case RECIPROCAL: // inverse function f(x) = 1/x. newid = (long)(Math.ceil(drange/uniform.choose(rng))); if (newid < minid) newid = minid; if (newid >= maxid) newid = maxid; break; case MULTIPLE: // generate id1 that is even multiple of config newid = config * (long)(Math.ceil(uniform.choose(rng)/config)); break; case POWER: // generate id1 that is power of config double log = Math.ceil(Math.log(uniform.choose(rng))/Math.log(config)); newid = Math.min(maxid - 1, (long)Math.pow(config, log)); break; case PERFECT_POWER: // generate id1 that is perfect square if config is 2, // perfect cube if config is 3 etc // get the nth root where n = distrconfig long nthroot = (long)Math.ceil(Math.pow(uniform.choose(rng), (1.0)/config)); // get nthroot raised to power n newid = Math.min(maxid - 1, (long)Math.pow(nthroot, config)); break; default: throw new RuntimeException("Unknown access dist mode: " + mode); } return newid; } @Override public InvertibleShuffler getShuffler() { // Don't shuffle these distributions return null; } } public static class ProbAccessDistribution implements AccessDistribution { private final ProbabilityDistribution dist; private InvertibleShuffler shuffler; public ProbAccessDistribution(ProbabilityDistribution dist, InvertibleShuffler shuffler) { super(); this.dist = dist; this.shuffler = shuffler; } @Override public long nextID(Random rng, long previousId) { return dist.choose(rng); } @Override public InvertibleShuffler getShuffler() { return shuffler; } } public static enum AccessDistMode { REAL, // Real empirical distribution ROUND_ROBIN, // Cycle through ids RECIPROCAL, // Pick with probability MULTIPLE, // Pick a multiple of config parameter POWER, // Pick a power of config parameter PERFECT_POWER // Pick a perfect power (square, cube, etc) with exponent // as configured } public static AccessDistribution loadAccessDistribution(Properties props, long minid, long maxid, DistributionType kind) throws LinkBenchConfigError { Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); String keyPrefix; switch(kind) { case LINK_READS: keyPrefix = Config.READ_CONFIG_PREFIX; break; case LINK_READS_UNCORR: keyPrefix = Config.READ_UNCORR_CONFIG_PREFIX; break; case LINK_WRITES: keyPrefix = Config.WRITE_CONFIG_PREFIX; break; case LINK_WRITES_UNCORR: keyPrefix = Config.WRITE_UNCORR_CONFIG_PREFIX; break; case NODE_READS: keyPrefix = Config.NODE_READ_CONFIG_PREFIX; break; case NODE_UPDATES: keyPrefix = Config.NODE_UPDATE_CONFIG_PREFIX; break; case NODE_DELETES: keyPrefix = Config.NODE_DELETE_CONFIG_PREFIX; break; default: throw new RuntimeException("Bad kind " + kind); } String func_key = keyPrefix + Config.ACCESS_FUNCTION_SUFFIX; String access_func = ConfigUtil.getPropertyRequired(props, func_key); try { AccessDistMode mode = AccessDistMode.valueOf(access_func.toUpperCase()); if (mode == AccessDistMode.REAL) { RealDistribution realDist = new RealDistribution(); realDist.init(props, minid, maxid, kind); InvertibleShuffler shuffler = RealDistribution.getShuffler(kind, maxid - minid); logger.debug("Using real access distribution" + " for " + kind.toString().toLowerCase()); return new ProbAccessDistribution(realDist, shuffler); } else { String config_key = keyPrefix + Config.ACCESS_CONFIG_SUFFIX; long config_val = ConfigUtil.getLong(props, config_key); logger.debug("Using built-in access distribution " + mode + " with config param " + config_val + " for " + kind.toString().toLowerCase()); return new BuiltinAccessDistribution(mode, minid, maxid, config_val); } } catch (IllegalArgumentException e) { return tryDynamicLoad(access_func, props, keyPrefix, minid, maxid, kind); } } /** * * @param className ProbabilityDistribution class name * @param props * @param keyPrefix prefix to use for looking up keys in props * @param minid * @param maxid * @return */ private static AccessDistribution tryDynamicLoad(String className, Properties props, String keyPrefix, long minid, long maxid, DistributionType kind) { try { Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); logger.debug("Using ProbabilityDistribution class " + className + " for " + kind.toString().toLowerCase()); ProbabilityDistribution pDist = ClassLoadUtil.newInstance(className, ProbabilityDistribution.class); pDist.init(minid, maxid, props, keyPrefix); InvertibleShuffler shuffler = RealDistribution.getShuffler(kind, maxid - minid); return new ProbAccessDistribution(pDist, shuffler); } catch (ClassNotFoundException e) { throw new LinkBenchConfigError("Access distribution class " + className + " not successfully loaded: " + e.getMessage()); } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/ApproxHarmonic.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; /** * Approximations to harmonic numbers that speed up calculation time. */ public class ApproxHarmonic { private static final long APPROX_THRESHOLD = 100000; // Euler Mascheroni constant private static final double EULER_MASCHERONI = 0.5772156649015328606065120900824024310421; /** * Approximation to generalized harmonic for 0 >= m >= 1. * Designed to not take more than a couple of seconds to calculate, * and the have error of < 0.05% * @param n * @param m assume > 0 <= * @return */ public static double generalizedHarmonic(final long n, final double m) { if (n < 0) { throw new IllegalArgumentException("n must be non-negative"); } if (m < 0 || m > 1) { throw new IllegalArgumentException("m = " + m + " outside " + "range [0, 1]"); } if (n < APPROX_THRESHOLD) { // Approximation less accurate for small n, and full calculation // doesn't take as long return Harmonic.generalizedHarmonic(n, m); } if (m == 1) { // Standard approximation for regular harmonic numbers return Math.log(n) + EULER_MASCHERONI + 1 / (2 * n); } else { // Rough approximation for generalized harmonic for // m >= 0 and m <= 1 // Standard integral of 1/(n^k) double integral = (1 / (1 - m)) * Math.pow(n, 1 - m); // Empirically derived correction factor that is good enough // to get to within 0.2% or so of exact number double correction = 0.58 - 1 / (1 - m); return integral + correction; } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/GeometricDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.Properties; import java.util.Random; import org.apache.commons.math3.util.FastMath; import com.facebook.LinkBench.Config; import com.facebook.LinkBench.ConfigUtil; /** * Geometric distribution * * NOTE: this generates values in the range [min, max). Since the * real geometric distribution generates values in range [min, inf), * we truncate anything >= max */ public class GeometricDistribution implements ProbabilityDistribution { /** The probability parameter that defines the distribution */ private double p = 0.0; /** Valid range */ private long min = 0, max = 0; private double scale = 0.0; public static final String PROB_PARAM_KEY = "prob"; @Override public void init(long min, long max, Properties props, String keyPrefix) { double parsedP = ConfigUtil.getDouble(props, keyPrefix + PROB_PARAM_KEY); double scaleVal = 1.0;; if (props.containsKey(Config.PROB_MEAN)) { scaleVal = (max - min) * ConfigUtil.getDouble(props, keyPrefix + Config.PROB_MEAN); } init(min, max, parsedP, scaleVal); } public void init(long min, long max, double p, double scale) { this.min = min; this.max = max; this.p = p; this.scale = scale; } @Override public double pdf(long id) { return scaledPdf(id, 1.0); } @Override public double expectedCount(long id) { return scaledPdf(id, scale); } private double scaledPdf(long id, double scaleFactor) { if (id < min || id >= max) return 0.0; long x = id - min; return FastMath.pow(1 - p, x) * scaleFactor * p; } @Override public double cdf(long id) { if (id < min) return 0.0; if (id >= max) return 1.0; return 1 - FastMath.pow(1 - p, id - min + 1); } @Override public long choose(Random rng) { return quantile(rng.nextDouble()); } @Override public long quantile(double r) { /* * Quantile function for geometric distribution over * range [0, inf) where 0 < r < 1 * quantile(r) = ceiling(ln(1 - r) / ln (1 - p)) * Source: http://www.math.uah.edu/stat/bernoulli/Geometric.html */ if (r == 0.0) return min; // 0.0 must be handled specially long x = min + (long)FastMath.ceil( FastMath.log(1 - r) / FastMath.log(1 - p)); // truncate over max return Math.min(x, max - 1); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/Harmonic.java ================================================ package com.facebook.LinkBench.distributions; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /** * This code was derived and modified from the Apache Commons * Math 3.0 source release and modified for use in LinkBench * * @author tarmstrong */ import org.apache.commons.math3.util.FastMath; public class Harmonic { /** * Calculates the Nth generalized harmonic number. See * Harmonic * Series. * * @param n Term in the series to calculate (must be larger than 1) * @param m Exponent (special case {@code m = 1} is the harmonic series). * @return the nth generalized harmonic number. */ public static double generalizedHarmonic(final long n, final double m) { double value = 0; for (long k = n; k > 0; --k) { value += 1.0 / FastMath.pow(k, m); } return value; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/ID2Chooser.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.Properties; import java.util.Random; import com.facebook.LinkBench.Config; import com.facebook.LinkBench.ConfigUtil; import com.facebook.LinkBench.InvertibleShuffler; import com.facebook.LinkBench.LinkStore; import com.facebook.LinkBench.RealDistribution; import com.facebook.LinkBench.RealDistribution.DistributionType; import com.facebook.LinkBench.distributions.LinkDistributions.LinkDistribution; /** * Encapsulate logic for choosing id2s for request workload. * @author tarmstrong * */ public class ID2Chooser { /* * Constants controlling the desired probability of a link for (id1, link_type, id2) * existing for a given operation. must be > 0 */ public static final double P_GET_EXIST = 0.5; // Mix of new and pre-loaded public static final double P_UPDATE_EXIST = 0.9; // Mostly pre-loaded public static final double P_DELETE_EXIST = 0.9; public static final double P_ADD_EXIST = 0.05; // Avoid colliding with pre-loaded too much // How many times to try to find a unique id2 private static final int MAX_UNIQ_ITERS = 100; private final long startid1; private long maxid1; /** if > 0, choose id2s in range [startid1, randomid2max) */ private final long randomid2max; /** Number of distinct link types */ private final int linkTypeCount; private final InvertibleShuffler nLinksShuffler; // #links distribution from properties file private final LinkDistribution linkDist; // configuration for generating id2 private final int id2gen_config; // Information about number of request threads, used to generate // thread-unique id2s private final int nrequesters; private final int requesterID; public ID2Chooser(Properties props, long startid1, long maxid1, int nrequesters, int requesterID) { this.startid1 = startid1; this.maxid1 = maxid1; this.nrequesters = nrequesters; this.requesterID = requesterID; // random number generator for id2 randomid2max = ConfigUtil.getLong(props, Config.RANDOM_ID2_MAX, 0L); // configuration for generating id2 id2gen_config = ConfigUtil.getInt(props, Config.ID2GEN_CONFIG, 0); linkTypeCount = ConfigUtil.getInt(props, Config.LINK_TYPE_COUNT, 1); linkDist = LinkDistributions.loadLinkDistribution(props, startid1, maxid1); nLinksShuffler = RealDistribution.getShuffler(DistributionType.LINKS, maxid1 - startid1); } /** * Choose an ids * @param rng * @param id1 * @param link_type * @param outlink_ix this is the ith link of this type for this id1 * @return */ public long chooseForLoad(Random rng, long id1, long link_type, long outlink_ix) { if (randomid2max == 0) { return id1 + outlink_ix; } else { return rng.nextInt((int)randomid2max); } } /** * Choose an id2 for an operation given an id1 * @param id1 * @param linkType * @param pExisting approximate probability that id should be in * existing range * @return */ public long chooseForOp(Random rng, long id1, long linkType, double pExisting) { long nlinks = calcLinkCount(id1, linkType); long range = calcID2Range(pExisting, nlinks); return chooseForOpInternal(rng, id1, range); } public long[] chooseMultipleForOp(Random rng, long id1, long linkType, int nid2s, double pExisting) { long id2s[] = new long[nid2s]; long nlinks = calcLinkCount(id1, linkType); long range = calcID2Range(pExisting, nlinks); if (range <= nid2s && randomid2max == 0) { // Range is smaller than required # of ids, fill in all from range for (int i = 0; i < nid2s; i++) { long id2 = id1 + i; if (id2gen_config == 1) { id2 = fixId2(id2, nrequesters, requesterID, randomid2max); } id2s[i] = id2; } } else { for (int i = 0; i < nid2s; i++) { long id2; int iters = 0; // avoid long or infinite loop do { // Find a unique id2 id2 = chooseForOpInternal(rng, id1, range); iters++; } while (contains(id2s, i, id2) && iters <= MAX_UNIQ_ITERS); id2s[i] = id2; } } return id2s; } /** * Check if id2 is in first n elements of id2s * @param id2s * @param i * @param id2 * @return */ private boolean contains(long[] id2s, int n, long id2) { for (int i = 0; i < n; i++) { if (id2s[i] == id2) { return true; } } return false; } private long calcID2Range(double pExisting, long nlinks) { long range = (long) Math.ceil((1/pExisting) * nlinks); range = Math.max(1, range);// Ensure non-empty range return range; } /** * Internal helper to choose id * @param rng * @param id1 * @param range range size of id2s to select within * @return */ private long chooseForOpInternal(Random rng, long id1, long range) { assert(range >= 1); // We want to sometimes add a link that already exists and sometimes // add a new link. So generate id2 such that it has roughly pExisting // chance of already existing. // This happens unless randomid2max is non-zero (in which case just pick a // random id2 upto randomid2max). long id2; if (randomid2max == 0) { id2 = id1 + rng.nextInt((int)range); } else { id2 = rng.nextInt((int)randomid2max); } if (id2gen_config == 1) { return fixId2(id2, nrequesters, requesterID, randomid2max); } else { return id2; } } public boolean sameShuffle; /** * Calculates the original number of outlinks for a given id1 (i.e. the * number that would have been loaded) * Sets sameShuffle field to true if shuffled was same as original * @return number of links for this id1 */ public long calcTotalLinkCount(long id1) { assert(id1 >= startid1 && id1 < maxid1); // Shuffle. A low id after shuffling means many links, a high means few long shuffled; if (linkDist.doShuffle()) { shuffled = startid1 + nLinksShuffler.invertPermute(id1 - startid1); } else { shuffled = id1; } assert(shuffled >= startid1 && shuffled < maxid1); sameShuffle = shuffled == id1; long nlinks = linkDist.getNlinks(shuffled); return nlinks; } // return a new id2 that satisfies 3 conditions: // 1. close to current id2 (can be slightly smaller, equal, or larger); // 2. new_id2 % nrequesters = requestersId; // 3. smaller or equal to randomid2max unless randomid2max = 0 private static long fixId2(long id2, long nrequesters, long requesterID, long randomid2max) { long newid2 = id2 - (id2 % nrequesters) + requesterID; if ((newid2 > randomid2max) && (randomid2max > 0)) newid2 -= nrequesters; return newid2; } public long[] getLinkTypes() { long res[] = new long[linkTypeCount]; // Just have link types in a sequence starting at the default one for (int i = 0; i < linkTypeCount; i++) { res[i] = LinkStore.DEFAULT_LINK_TYPE + i; } return res; } /** * Choose a link type. * For now just select each type with equal probability. */ public long chooseRandomLinkType(Random rng) { return LinkStore.DEFAULT_LINK_TYPE + rng.nextInt(linkTypeCount); } public long calcLinkCount(long id1, long linkType) { // Divide total links between types so that total is correct long totCount = calcTotalLinkCount(id1); long minCount = totCount / linkTypeCount; long leftOver = totCount - minCount; int typeNum = (int)(linkType -LinkStore.DEFAULT_LINK_TYPE); if (typeNum < leftOver) { return minCount + 1; } else { return minCount; } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/LinkDistributions.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.Properties; import org.apache.log4j.Logger; import com.facebook.LinkBench.Config; import com.facebook.LinkBench.ConfigUtil; import com.facebook.LinkBench.LinkBenchConfigError; import com.facebook.LinkBench.RealDistribution; import com.facebook.LinkBench.RealDistribution.DistributionType; import com.facebook.LinkBench.util.ClassLoadUtil; public class LinkDistributions { public static interface LinkDistribution { public abstract long getNlinks(long id1); /** * Let caller know it should shuffle IDs * @return */ public boolean doShuffle(); } public static class ProbLinkDistribution implements LinkDistribution { private ProbabilityDistribution dist; public ProbLinkDistribution(ProbabilityDistribution dist) { this.dist = dist; } @Override public long getNlinks(long id1) { return (long) Math.round( dist.expectedCount(id1)); } /** shuffle, otherwise ids will be in order of most to least ids */ @Override public boolean doShuffle() { return true; } } /** * Built-in distributions */ public static enum LinkDistMode { REAL, // observed distribution CONST, // Constant value RECIPROCAL, // 1/x MULTIPLES, // boost multiples of param PERFECT_SQUARES, EXPONENTIAL } /** * Some link distributions using arithmetic tricks */ public static class ArithLinkDistribution implements LinkDistribution { private LinkDistMode mode; private long nlinks_config; private long nlinks_default; private long minid1, maxid1; public ArithLinkDistribution(long minid1, long maxid1, LinkDistMode mode, long nlinks_config, long nlinks_default) { this.minid1 = minid1; this.maxid1 = maxid1; this.mode = mode; this.nlinks_config = nlinks_config; this.nlinks_default = nlinks_default; } /** * Gets the #links to generate for an id1 based on distribution specified by * nlinks_func, nlinks_config */ @Override public long getNlinks(long id1) { switch (mode) { case CONST: // Constant return nlinks_default; case RECIPROCAL: // Corresponds to function 1/x long n = maxid1 - minid1; long off = id1 - minid1; return nlinks_default + (long) Math.ceil((double) n / (double) off); case MULTIPLES: // if id1 is multiple of nlinks_config, then add nlinks_config return nlinks_default + (id1 % nlinks_config == 0 ? nlinks_config : 0); case EXPONENTIAL: // Corresponds to exponential distribution // If id1 is nlinks_config^k, then add // nlinks_config^k - nlinks_config^(k-1) more links long log = (long) Math.ceil(Math.log(id1) / Math.log(nlinks_config)); long temp = (long) Math.pow(nlinks_config, log); return nlinks_default + (temp == id1 ? (id1 - (long) Math.pow(nlinks_config, log - 1)) : 0); case PERFECT_SQUARES: // if nlinks_func is 2 then // if id1 is K * K, then add K * K - (K - 1) * (K - 1) more links. // The idea is to give more #links to perfect squares. The larger // the perfect square is, the more #links it will get. // Generalize the above for nlinks_func is n: // if id1 is K^n, then add K^n - (K - 1)^n more links long nthroot = (long) Math.ceil(Math.pow(id1, (1.0) / nlinks_config)); long temp2 = (long) Math.pow(nthroot, nlinks_config); return nlinks_default += (temp2 == id1 ? (id1 - (long) Math.pow( nthroot - 1, nlinks_config)) : 0); default: throw new RuntimeException("Unknown mode: " + mode); } } @Override public boolean doShuffle() { // don't shuffle: these methods already randomize order by design return false; } } public static LinkDistribution loadLinkDistribution(Properties props, long minid1, long maxid1) { Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); String nlinks_func; // distribution function for #links nlinks_func = ConfigUtil.getPropertyRequired(props, Config.NLINKS_FUNC); // We have built-in versions defined by LinkDistMode, and also support // dynamic loading of ProbabilityDistribution instances LinkDistMode mode; try { // Try to see if it is built-in mode = LinkDistMode.valueOf(nlinks_func.toUpperCase()); } catch (IllegalArgumentException ex) { // If not built-in, assume it's a class name return tryDynamicLoad(nlinks_func, props, minid1, maxid1); } // real distribution has it own initialization if (mode == LinkDistMode.REAL) { logger.debug("Using real link distribution"); RealDistribution realDist = new RealDistribution(); realDist.init(props, minid1, maxid1, DistributionType.LINKS); return new ProbLinkDistribution(realDist); } else { // Various arithmetic modes // an additional parameter for the function int nlinks_config = ConfigUtil.getInt(props, Config.NLINKS_CONFIG); // minimum #links - expected to be 0 or 1 int nlinks_default = ConfigUtil.getInt(props, Config.NLINKS_DEFAULT); logger.debug("Using built-in arithmetic link distribution " + mode + " with default #links " + nlinks_config + " and " + " config parameter " + nlinks_config); return new ArithLinkDistribution(minid1, maxid1, mode, nlinks_config, nlinks_default); // throw new LinkBenchConfigError("Unknown setting for links function: " + // nlinks_func); } } /** * Try to dynamically load a ProbabilityDistribution class * @param className * @param props * @param minid1 * @param maxid1 * @return */ private static LinkDistribution tryDynamicLoad(String className, Properties props, long minid1, long maxid1) { try { Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); logger.debug("Using LinkDistribution class " + className); ProbabilityDistribution pDist = ClassLoadUtil.newInstance(className, ProbabilityDistribution.class); pDist.init(minid1, maxid1, props, Config.NLINKS_PREFIX); return new ProbLinkDistribution(pDist); } catch (ClassNotFoundException e) { throw new LinkBenchConfigError("Link distribution class " + className + " not successfully loaded: " + e.getMessage()); } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/LogNormalDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.Properties; import java.util.Random; import org.apache.commons.math3.util.FastMath; import com.facebook.LinkBench.ConfigUtil; public class LogNormalDistribution implements ProbabilityDistribution { private long min; private long max; private double mu; // mean of the natural log of random variable private double sigma; // standard deviation of natural log of random variable public static final String CONFIG_MEDIAN = "median"; public static final String CONFIG_SIGMA = "sigma"; @Override public void init(long min, long max, Properties props, String keyPrefix) { double sigma = ConfigUtil.getDouble(props, CONFIG_SIGMA); double median = ConfigUtil.getDouble(props, CONFIG_MEDIAN); init(min, max, median, sigma); } /** * * @param min * @param max * @param median the median value of the distribution * @param sigma the standard deviation of the natural log of the variable * @param scale */ public void init(long min, long max, double median, double sigma) { this.min = min; this.max = max; this.mu = FastMath.log(median); this.sigma = sigma; } @Override public double pdf(long id) { throw new RuntimeException("pdf not implemented"); } @Override public double expectedCount(long id) { throw new RuntimeException("expectedCount not implemented"); } @Override public double cdf(long id) { if (id < min) return 0.0; if (id >= max) return 1.0; org.apache.commons.math3.distribution.LogNormalDistribution d = new org.apache.commons.math3.distribution.LogNormalDistribution(mu, sigma); return d.cumulativeProbability(id); } @Override public long choose(Random rng) { long choice = (long) Math.round(FastMath.exp((rng.nextGaussian() * sigma) + mu)); if (choice < min) return min; else if (choice >= max) return max - 1; else return choice; } @Override public long quantile(double p) { throw new RuntimeException("Quantile not implemented"); } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/PiecewiseLinearDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.ArrayList; import java.util.Arrays; import java.util.Random; /** * A distribution where the cumulative density function is an arbitrary * piecewise linear function. * * Rather confusingly there are two possible ways of looking at the * distribution. The first is to divide the keyspace by ids, and order * these IDs by the number of accesses. Then DIST-A determines how likely * it is that that given key will be chosen. The second is to divide the * keyspace into buckets, where there are multiple keys in each bucket which * have been accessed the same number of times. There DIST-B determines how * likely a random key is to fall into each bucket. The input data is * represented as DIST-B, but the probability distribution represented by * this class is DIST-A, so we need to convert from one representation to * another. * * The conversion process works as follows. * Suppose you have items numbered 0 to n - 1. Then item i gets assigned * the percentile rank p = i / (n - 1), a number between 0 and 1. * * The input is a set of tuples (r, v), where v is the total number of * observations of the item at percentile p. So the values of the are * denominated not in probability density, but rather in number of observation. * * This means that to convert the input to a probability density distribution, * we need to calculate the expected value of the distribution, and then divide * the value by that. * * This is an abstract class: the init method needs to be implemented * @author tarmstrong * */ public abstract class PiecewiseLinearDistribution implements ProbabilityDistribution { //helper class to store (value, probability) public static class Point implements Comparable { public int value; public double probability; public Point(int input_value, double input_probability) { this.value = input_value; this.probability = input_probability; } public int compareTo(Point obj) { Point p = (Point)obj; return this.value - p.value; } public String toString() { return "(" + value + ", " + probability + ")"; } } protected void init(long min, long max, ArrayList cdf) { double pdf[] = getPDF(cdf); double ccdf[] = getCCDF(pdf); double cs[] = getCumulativeSum(ccdf); long right_points[] = new long[cs.length]; init(min, max, cdf, cs, right_points, expectedValue(cdf)); } /** * Init with precalculated values * @param min * @param max * @param cdf * @param cs * @param right_points * @param expectedValue */ protected void init(long min, long max, ArrayList cdf, double cs[], long right_points[], double expectedValue) { this.min = min; this.max = max; this.cdf = cdf; this.cs = cs; this.right_points = right_points; this.expected_val = expectedValue; } protected long max; protected long min; protected ArrayList cdf; protected double[] cs; protected long[] right_points; /** * Total number of observations in data */ private double expected_val; @Override public double pdf(long id) { long n = (max - min); double totalSum = expected_val * n; return expectedCount(id) / totalSum; } @Override public double expectedCount(long id) { return expectedCount(min, max, id, cdf); } public static double expectedCount(long min, long max, long id, ArrayList cdf) { if (id < min || id >= max) { return 0.0; } long n = (max - min); // Put in into range [0.0, 1.0] with most popular at 0.0 double u = 1.0 - (id - min) / (double) n; int ix = binarySearch(cdf, u); Point p1 = cdf.get(ix); assert(u <= p1.probability); // Assuming piecewise linear, so equally as probably as p1.value return p1.value; } @Override public double cdf(long id) { // Since this should be the CDF function for DIST-A, rather // than DIST-B, it is non-trivial to calculate (requires some kind // of integration of DIST-B). throw new RuntimeException("Cdf not implemented yet"); } @Override public long quantile(double p) { // This is not implemented, due to similar reasons to cdf throw new RuntimeException("Quantile not implemented yet"); } @Override public long choose(Random rng) { return choose(rng, min, max, cs, right_points); } protected static long choose(Random rng, long startid1, long maxid1, double[] cs, long[] right_points) { double max_probability = cs[cs.length - 1]; double p = max_probability * rng.nextDouble(); int idx = binarySearch(cs, p); if (idx == 0) idx = 1; /* * TODO: this algorithm does not appear to generate data * faithful to the distribution. * Additional problems include data races if multiple threads are * concurrently modifying the shared arrays, and the fact * that a workload cannot be reproduced. */ long result = right_points[idx] % (maxid1 - startid1); right_points[idx] = (result + 1) % (maxid1 - startid1); long id1 = startid1 + result; return id1; } /** * Get the expected value of the distribution (e.g. the * average number of links * @param cdf * @return */ protected static double expectedValue(ArrayList cdf) { // This function is not entirely precise since it assumes // that the ID space is continuous, which is not an accurate // approximation for small ID counts if (cdf.size() == 0) return 0; // Assume CDF is piecewise linear double sum = 0; sum = cdf.get(0).probability * cdf.get(0).value; for (int i = 1; i < cdf.size(); i++) { Point prev = cdf.get(i-1); Point curr = cdf.get(i); double p = curr.probability - prev.probability; sum += p * curr.value; } return sum; } public static int binarySearch(ArrayList points, double p) { int left = 0, right = points.size() - 1; while (left < right) { int mid = (left + right)/2; if (points.get(mid).probability >= p) { right = mid; } else { left = mid + 1; } } if (points.get(left).probability >= p) { return left; } else { return left + 1; } } public static int binarySearch(double[] a, double p) { // Use built-in binary search int res = Arrays.binarySearch(a, p); if (res >= 0) { return res; } else { // Arrays.binarySearch returns (-(insertion point) - 1) when not found return -(res + 1); } } protected static double[] getPDF(ArrayList cdf) { int max_value = cdf.get(cdf.size() - 1).value; double[] pdf = new double[max_value + 1]; // set all 0 for (int i = 0; i < pdf.length; ++i) pdf[i] = 0; // convert cdf to pdf pdf[cdf.get(0).value] = cdf.get(0).probability; for (int i = 1; i < cdf.size(); ++i) { pdf[cdf.get(i).value] = cdf.get(i).probability - cdf.get(i - 1).probability; } return pdf; } protected static double[] getCCDF(double[] pdf) { int length = pdf.length; double[] ccdf = new double[length]; ccdf[length - 1] = pdf[length - 1]; for (int i = length - 2; i >= 0; --i) { ccdf[i] = ccdf[i + 1] + pdf[i]; } return ccdf; } protected static double[] getCumulativeSum(double[] cdf) { int length = cdf.length; double[] cs = new double[length]; cs[0] = 0; //ignore cdf[0] for (int i = 1; i < length; ++i) { cs[i] = cs[i - 1] + cdf[i]; } return cs; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/ProbabilityDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.Properties; import java.util.Random; /** * Probability distribution over a range of integers [min, max), ranked * in descending order of probability, where min is most probable and * max - 1 is least probable. * @author tarmstrong * */ public interface ProbabilityDistribution { /** * Initialize probability distribution for range [min, max) with additional * parameters pulled from properties dictionary by implementation. * @param min * @param max * @param props Properties dictionary for any extra parameters * @param keyPrefix In case there are multiple distributions with * different parameters in properties, this prefix can be * provided to distinguish when looking up keys */ public abstract void init(long min, long max, Properties props, String keyPrefix); /** * Probability density function, i.e. P(X = id) * @param id * @return */ public abstract double pdf(long id); /** * Probability density function scaled by an implementation-defined * factor (e.g. the number of trials, giving the expected number of values) * @param id * @return */ public abstract double expectedCount(long id); /** * Cumulative distribution function, i.e. for a random variable * X chosen accord to the distribution P(X <= id). * E.g. cdf(min - 1) = 0.0, and cdf(max - 1) = 1.0 * @param id * @return a probability in range [0.0, 1.0] */ public abstract double cdf(long id); /** * Choose a random id in range [min, max) according to the probability * distribution. * @param rng a random number generator to use for random choice * @return the chosen id */ public abstract long choose(Random rng); /** * Quantile function for the distribution * @return x such that Pr(X <= x) = p */ public abstract long quantile(double p); } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/UniformDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.Properties; import java.util.Random; import com.facebook.LinkBench.Config; import com.facebook.LinkBench.ConfigUtil; /** * Uniform distribution over integers in range [minID, maxID), * where minID is included in range and maxID excluded * */ public class UniformDistribution implements ProbabilityDistribution { private long min = 0; private long max = 1; private double scale = 1.0; public void init(long min, long max, Properties props, String keyPrefix) { if (max <= min) { throw new IllegalArgumentException("max = " + max + " <= min = " + min + ": probability distribution cannot have zero or negative domain"); } this.min = min; this.max = max; if (props != null && props.containsKey(keyPrefix + Config.PROB_MEAN)) { scale = (max - min) * ConfigUtil.getDouble(props, keyPrefix + Config.PROB_MEAN); } else { scale = 1.0; } } public void init(long min, long max, double scale) { this.min = min; this.max = max; this.scale = scale; } @Override public double pdf(long id) { return scaledPDF(id, 1.0); } @Override public double expectedCount(long id) { return scaledPDF(id, scale); } private double scaledPDF(long id, double scale) { // Calculate this way to avoid losing precision by calculating very // small pdf number if (id < min || id >= max) return 0.0; return scale / (double) (max - min); } /** * Cumulative distribution function for distribution * @param id * @return */ public double cdf(long id) { if (id >= max) { return 1.0; } if (id < min) { return 0.0; } long n = max - min; long rank = id - min + 1; return rank / (double)n; } /** * Quantile function */ public long quantile(double p) { assert(p >= 0.0 && p <= 1.0); long n = max - min; long i = (long)Math.floor(p * n); if (i == n) return max - 1; return i + min; } // Total number of representable numbers by int private static final long UINT_RANGE = Integer.MAX_VALUE - (long) Integer.MIN_VALUE; /** Choose an id X uniformly in the range*/ public long choose(Random rng) { long n = max - min; // Java's random number generator has less randomness in lower bits // so just taking a mod doesn't give a good quality result. if (n <= Integer.MAX_VALUE) { return min + (long)rng.nextInt((int)n); } else if (n < UINT_RANGE) { return randint2(rng, n); } else { return UINT_RANGE * rng.nextInt((int)(n / UINT_RANGE)) + randint2(rng, n % UINT_RANGE); } } /** * Produce a random integer in range [0, n] * n must be in range [0, MAX_INT - MIN_INT] * @param rng * @param n * @return */ private long randint2(Random rng, long n) { assert(n < UINT_RANGE); double p = Integer.MAX_VALUE / (double)n; if (rng.nextDouble() < p) { return rng.nextInt(Integer.MAX_VALUE); } else { return Integer.MAX_VALUE + (long)(rng.nextInt((int)(n - Integer.MAX_VALUE))); } } } ================================================ FILE: src/main/java/com/facebook/LinkBench/distributions/ZipfDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.distributions; import java.util.ArrayList; import java.util.Properties; import java.util.Random; import org.apache.commons.math3.util.FastMath; import org.apache.log4j.Logger; import com.facebook.LinkBench.Config; import com.facebook.LinkBench.ConfigUtil; public class ZipfDistribution implements ProbabilityDistribution { private final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); private long min = 0; private long max = 1; private double shape = 0.0; /** The total number of items in the world */ private double scale; // precomputed values private double alpha = 0.0; private double eta = 0.0; private double zetan = 0.0; private double point5theta = 0.0; @Override public void init(long min, long max, Properties props, String keyPrefix) { if (max <= min) { throw new IllegalArgumentException("max = " + max + " <= min = " + min + ": probability distribution cannot have zero or negative domain"); } this.min = min; this.max = max; String shapeS = props != null ? ConfigUtil.getPropertyRequired(props, keyPrefix + "shape") : null; if (shapeS == null ) { throw new IllegalArgumentException("ZipfDistribution must be provided " + keyPrefix + "shape parameter"); } shape = Double.valueOf(shapeS); if (shape <= 0.0) { throw new IllegalArgumentException("Zipf shape parameter " + shape + " is not positive"); } if (props != null && props.containsKey(keyPrefix + Config.PROB_MEAN)) { scale = (max - min) * ConfigUtil.getDouble(props, keyPrefix + Config.PROB_MEAN); } else { scale = 1.0; } // Precompute some values to speed up future method calls long n = max - min; alpha = 1 / (1 - shape); zetan = calcZetan(n); eta = (1 - FastMath.pow(2.0 / n, 1 - shape)) / (1 - Harmonic.generalizedHarmonic(2, shape) / zetan); point5theta = FastMath.pow(0.5, shape); } // For large n, calculating zetan takes a long time. This is a simple // but effective caching technique that speeds up startup a lot // when multiple instances of the distribution are initialized in // close succession. private static class CacheEntry { long n; double shape; double zetan; } /** Min value of n to cache */ private static final long MIN_CACHE_VALUE = 1000; private static final int MAX_CACHE_ENTRIES = 1024; private static ArrayList zetanCache = new ArrayList(MAX_CACHE_ENTRIES); private double calcZetan(long n) { if (n < MIN_CACHE_VALUE) { return uncachedCalcZetan(n); } synchronized(ZipfDistribution.class) { for (int i = 0; i < zetanCache.size(); i++) { CacheEntry ce = zetanCache.get(i); if (ce.n == n && ce.shape == shape) { return ce.zetan; } } } double calcZetan = uncachedCalcZetan(n); synchronized (ZipfDistribution.class) { CacheEntry ce = new CacheEntry(); ce.zetan = calcZetan; ce.n = n; ce.shape = shape; if (zetanCache.size() >= MAX_CACHE_ENTRIES) { zetanCache.remove(0); } zetanCache.add(ce); } return calcZetan; } private double uncachedCalcZetan(long n) { double calcZetan; if (shape <= 1.0) { // use approximation calcZetan = ApproxHarmonic.generalizedHarmonic(n, shape); } else { // Can't use approximation // If calculation will take more than 5 or so seconds, let user know // what is happening if (n > 20000000) { logger.info("Precalculating constants for Zipf distribution over " + n + " items with shape = " + shape + ". Please be patient, this can take a little time."); } calcZetan = Harmonic.generalizedHarmonic(n, shape); } return calcZetan; } @Override public double pdf(long id) { return scaledPDF(id, 1.0); } @Override public double expectedCount(long id) { return scaledPDF(id, scale); } private double scaledPDF(long id, double scale) { // Calculate this way to avoid losing precision by calculating very // small pdf number if (id < min || id >= max) return 0.0; return (scale / (double) FastMath.pow(id + 1 - min, shape))/ zetan; } @Override public double cdf(long id) { if (id < min) return 0.0; if (id >= max) return 1.0; double harm; if (shape <= 1.0) { harm = ApproxHarmonic.generalizedHarmonic(id + 1 - min, shape); } else { harm = Harmonic.generalizedHarmonic(id + 1 - min, shape); } return harm / zetan; } /** * Algorithm from "Quickly Generating Billion-Record Synthetic Databases", * Gray et. al., 1994 * * Pick a value in range [min, max) according to zipf distribution, * with min being the most likely to be chosen */ @Override public long choose(Random rng) { return quantile(rng.nextDouble()); } /** * Quantile function * * parts of formula are precomputed in init since they are expensive * to calculate and only depend on the distribution parameters */ public long quantile(double p) { double uz = p * zetan; long n = max - min; if (uz < 1) return min; if (uz < 1 + point5theta) return min + 1; long offset = (long) (n * FastMath.pow(eta * p - eta + 1, alpha)); if (offset >= n) return max - 1; return min + offset; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/generators/DataGenerator.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.generators; import java.util.Properties; import java.util.Random; public interface DataGenerator { public void init(Properties props, String keyPrefix); /** * Fill the provided array with randomly generated data * @param data * @return the argument, as a convenience so that an array can be * constructed and filled in a single statement */ public byte[] fill(Random rng, byte data[]); } ================================================ FILE: src/main/java/com/facebook/LinkBench/generators/MotifDataGenerator.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.generators; import java.util.Properties; import java.util.Random; import com.facebook.LinkBench.Config; import com.facebook.LinkBench.ConfigUtil; import com.facebook.LinkBench.LinkBenchConfigError; /** * A simple data generator where the same sequences of bytes, or "motifs" occur * multiple times. This is designed to emulate one particular property of real * data that is exploited by compression algorithms. Typically a short sequence * of data generated by this generator will not be very compressible on its own, * as no motifs will recur, but if multiple output strings are concatenated * together then the same motifs will recur repeatedly and the data will be * compressible. * * The motif data generator has a buffer of "shared" motifs, which reoccur * frequently in the output of the generator * * The data generator generates bytes from within the range of values [min, max). * There is an additional parameter, which is called uniqueness for lack of a * better name. The generator fills a buffer with data in chunks. A chunk * is either generated as random new bytes, or is drawn from the "motifs", * * The uniqueness parameter controls the proportion of new chunks versus duplicated * motifs. It is a probability between 0.0 and 1.0. It can also be seen as the expected * percentage of bytes are generated from scratch. * * Control how often motifs appear in data * uniqueness = 0.0: all data drawn from motifs * uniqueness 1.0: completely independent bytes */ public class MotifDataGenerator implements DataGenerator { private static final int MAX_CHUNK_SIZE = 128; public static final int DEFAULT_MOTIF_BUFFER_SIZE = 512; /** Lowest byte to appear in output */ private int start; /** Number of distinct bytes to appear in output */ private int range; /** percentage of data drawn from motifs */ private double uniqueness; /** * Buffer with a sequence of random bytes that are * pasted into output. Starts off null, initialized * on demand. */ private byte motifs[]; /** Size of motif buffer */ private int motifBytes; public MotifDataGenerator() { start = '\0'; range = 1; uniqueness = 0.0; } /** * Generate characters from start to end (inclusive both ends) * @param start * @param end */ public void init(int start, int end, double uniqueness) { init(start, end, uniqueness, DEFAULT_MOTIF_BUFFER_SIZE); } public void init(int start, int end, double uniqueness, int motifBytes) { if (start < 0 || start >= 256) { throw new LinkBenchConfigError("start " + start + " out of range [0,255]"); } if (end < 0 || end >= 256) { throw new LinkBenchConfigError("endbyte " + end + " out of range [0,255]"); } if (start >= end) { throw new LinkBenchConfigError("startByte " + start + " >= endByte " + end); } this.start = (byte)start; this.range = end - start + 1; this.uniqueness = uniqueness; this.motifBytes = motifBytes; this.motifs = null; } @Override public void init(Properties props, String keyPrefix) { int startByte = ConfigUtil.getInt(props, keyPrefix + Config.UNIFORM_GEN_STARTBYTE); int endByte = ConfigUtil.getInt(props, keyPrefix + Config.UNIFORM_GEN_ENDBYTE); double uniqueness = ConfigUtil.getDouble(props, keyPrefix + Config.MOTIF_GEN_UNIQUENESS); if (props.contains(keyPrefix + Config.MOTIF_GEN_LENGTH)) { int motifBytes = ConfigUtil.getInt(props, keyPrefix + Config.MOTIF_GEN_LENGTH); init(startByte, endByte, uniqueness, motifBytes); } else { init(startByte, endByte, uniqueness); } } /** * Give an upper bound for the compression ratio for the algorithm * @return number between 0.0 and 1.0 - 0.0 is perfectly compressible, * 1.0 is incompressible */ public double estMaxCompression() { // Avg bytes required to represent each character (uniformly distributed) double charCompression = range / (double) 255; // random data shouldn't have any inter-character correlations that can // be compressed. Upper bound derived by assuming motif is completely // compressible return charCompression * uniqueness; } @Override public byte[] fill(Random rng, byte[] data) { // Fill motifs now so that we can use rng if (motifs == null) { motifs = new byte[motifBytes]; for (int i = 0; i < motifs.length; i++) { motifs[i] = (byte) (start + rng.nextInt(range)); } } int n = data.length; int chunk = Math.min(MAX_CHUNK_SIZE, motifBytes); for (int i = 0; i < n; i += chunk) { if (rng.nextDouble() < uniqueness) { int chunkEnd = Math.min(n, i + chunk); // New sequence of unique bytes for (int j = i; j < chunkEnd; j++) { data[j] = (byte) (start + rng.nextInt(range)); } } else { int thisChunk = Math.min(chunk, n - i); int k = rng.nextInt(motifBytes - thisChunk + 1); // Copy previous sequence of bytes System.arraycopy(motifs, k, data, i, thisChunk); } } return data; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/generators/UniformDataGenerator.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.generators; import java.util.Properties; import java.util.Random; import com.facebook.LinkBench.Config; import com.facebook.LinkBench.ConfigUtil; import com.facebook.LinkBench.LinkBenchConfigError; /** * A super simple data generator that generates a string of * characters chosen uniformly from a range. * * This probably isn't a good generator to use if you want something realistic, * especially if compressibility properties of the data will affect your * experiment. */ public class UniformDataGenerator implements DataGenerator { private int range; private int start; public UniformDataGenerator() { start = '\0'; range = 1; } /** * Generate characters from start to end (inclusive both ends) * @param start * @param end */ public void init(int start, int end) { if (start < 0 || start >= 256) { throw new LinkBenchConfigError("start " + start + " out of range [0,255]"); } if (end < 0 || end >= 256) { throw new LinkBenchConfigError("endbyte " + end + " out of range [0,255]"); } if (start >= end) { throw new LinkBenchConfigError("startByte " + start + " >= endByte " + end); } this.start = (byte)start; this.range = end - start + 1; } @Override public void init(Properties props, String keyPrefix) { int startByte = ConfigUtil.getInt(props, keyPrefix + Config.UNIFORM_GEN_STARTBYTE); int endByte = ConfigUtil.getInt(props, keyPrefix + Config.UNIFORM_GEN_ENDBYTE); init(startByte, endByte); } @Override public byte[] fill(Random rng, byte[] data) { return gen(rng, data, start, range); } public static byte[] gen(Random rng, byte[] data, int startByte, int range) { int n = data.length; for (int i = 0; i < n; i++) { data[i] = (byte) (startByte + rng.nextInt(range)); } return data; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/stats/LatencyStats.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.stats; import java.io.PrintStream; import java.text.DecimalFormat; import org.apache.log4j.Logger; import com.facebook.LinkBench.ConfigUtil; import com.facebook.LinkBench.LinkBenchOp; import com.facebook.LinkBench.LinkStore; /** * Class used to track and compute latency statistics, particularly * percentiles. Times are divided into buckets, with counts maintained * per bucket. The division into buckets is based on typical latencies * for database operations: most are in the range of 0.1ms to 100ms. * we have 0.1ms-granularity buckets up to 1ms, then 1ms-granularity from * 1-100ms, then 100ms-granularity, and then 1s-granularity. */ public class LatencyStats { public static int MAX_MILLIS = 100; /** * Keep track of running mean per thread and op type */ private RunningMean means[][]; /** Final means per op type */ double finalMeans[]; // Displayed along with stats private int maxThreads; public LatencyStats(int maxThreads) { this.maxThreads = maxThreads; means = new RunningMean[maxThreads][LinkStore.MAX_OPTYPES]; bucketCounts = new long[maxThreads][LinkStore.MAX_OPTYPES][NUM_BUCKETS]; maxLatency = new long[maxThreads][LinkStore.MAX_OPTYPES]; } private static final int SUB_MS_BUCKETS = 10; // Sub-ms granularity private static final int MS_BUCKETS = 99; // ms-granularity buckets private static final int HUNDREDMS_BUCKETS = 9; // 100ms-granularity buckets private static final int SEC_BUCKETS = 9; // 1s-granularity buckets public static final int NUM_BUCKETS = SUB_MS_BUCKETS + MS_BUCKETS + HUNDREDMS_BUCKETS + SEC_BUCKETS + 1; /** Counts of operations falling into each bucket */ private final long bucketCounts[][][]; /** Counts of samples per type */ private long sampleCounts[]; /** Cumulative bucket counts keyed by type, bucket# (calculated at end) */ private long bucketCountsCumulative[][]; /** Maximum latency by thread and type */ private long maxLatency[][]; public static int latencyToBucket(long microTime) { long ms = 1000; long msTime = microTime / ms; // Floored if (msTime == 0) { // Bucket per 0.1 ms return (int) (microTime / 100); } else if (msTime < 100) { // msBucket = 0 means 1-2 ms int msBucket = (int) msTime - 1; // Bucket per ms return SUB_MS_BUCKETS + msBucket; } else if (msTime < 1000){ int hundredMSBucket = (int) ( msTime / 100 ) - 1; return SUB_MS_BUCKETS + MS_BUCKETS + hundredMSBucket; } else if (msTime < 10000) { int secBucket = (int) (msTime / 1000) - 1; return SUB_MS_BUCKETS + MS_BUCKETS + HUNDREDMS_BUCKETS + secBucket; } else { return NUM_BUCKETS - 1; } } /** * * @param bucket * @return inclusive min and exclusive max time in microsecs for bucket */ public static long[] bucketBound(int bucket) { int ms = 1000; long s = ms * 1000; long res[] = new long[2]; if (bucket < SUB_MS_BUCKETS) { res[0] = bucket * 100; res[1] = (bucket+1) * 100; } else if (bucket < SUB_MS_BUCKETS + MS_BUCKETS) { res[0] = (bucket - SUB_MS_BUCKETS + 1) * ms; res[1] = (bucket - SUB_MS_BUCKETS + 2) * ms; } else if (bucket < SUB_MS_BUCKETS + MS_BUCKETS + HUNDREDMS_BUCKETS) { int hundredMS = bucket - SUB_MS_BUCKETS - MS_BUCKETS + 1; res[0] = hundredMS * 100 * ms; res[1] = (hundredMS + 1) * 100 * ms; } else if (bucket < SUB_MS_BUCKETS + MS_BUCKETS + HUNDREDMS_BUCKETS + SEC_BUCKETS) { int secBucket = bucket - SUB_MS_BUCKETS - MS_BUCKETS - SEC_BUCKETS + 1; res[0] = secBucket * s; res[1] = (secBucket + 1) * s; } else { res[0] = (SEC_BUCKETS + 1)* s; res[1] = 100 * s; } return res; } /** * Used by the linkbench driver to record latency of each * individual call */ public void recordLatency(int threadid, LinkBenchOp type, long microtimetaken) { long opBuckets[] = bucketCounts[threadid][type.ordinal()]; int bucket = latencyToBucket(microtimetaken); opBuckets[bucket]++; double time_ms = microtimetaken / 1000.0; if (means[threadid][type.ordinal()] == null) { means[threadid][type.ordinal()] = new RunningMean(time_ms); } else { means[threadid][type.ordinal()].addSample(time_ms); } if (maxLatency[threadid][type.ordinal()] < microtimetaken) { maxLatency[threadid][type.ordinal()] = microtimetaken; } } /** * Print out percentile values */ public void displayLatencyStats() { calcMeans(); calcCumulativeBuckets(); Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); // print percentiles for (LinkBenchOp type: LinkBenchOp.values()) { if (sampleCounts[type.ordinal()] == 0) { // no samples of this type continue; } DecimalFormat df = new DecimalFormat("#.###"); // Format to max 3 decimal place logger.info(type.displayName() + " count = " + sampleCounts[type.ordinal()] + " " + " p25 = " + percentileString(type, 25) + "ms " + " p50 = " + percentileString(type, 50) + "ms " + " p75 = " + percentileString(type, 75) + "ms " + " p95 = " + percentileString(type, 95) + "ms " + " p99 = " + percentileString(type, 99) + "ms " + " max = " + df.format(getMax(type)) + "ms " + " mean = " + df.format(getMean(type))+ "ms"); } } public void printCSVStats(PrintStream out, boolean header) { printCSVStats(out, header, LinkBenchOp.values()); } public void printCSVStats(PrintStream out, boolean header, LinkBenchOp... ops) { int percentiles[] = new int[] {25, 50, 75, 95, 99}; // Write out the header if (header) { out.print("op,count"); for (int percentile: percentiles) { out.print(String.format(",p%d_low,p%d_high", percentile, percentile)); } out.print(",max,mean"); out.println(); } // Print in milliseconds down to 10us granularity DecimalFormat df = new DecimalFormat("#.##"); for (LinkBenchOp op: ops) { long samples = sampleCounts[op.ordinal()]; if (samples == 0) { continue; } out.print(op.name()); out.print(","); out.print(samples); for (int percentile: percentiles) { long bounds[] = getBucketBounds(op, percentile); out.print(","); out.print(df.format(bounds[0] / 1000.0)); out.print(","); out.print(df.format(bounds[1] / 1000.0)); } out.print(","); out.print(df.format(getMax(op))); out.print(","); out.print(df.format(getMean(op))); out.println(); } } /** * Fill in the counts and means arrays */ private void calcMeans() { sampleCounts = new long[LinkStore.MAX_OPTYPES]; finalMeans = new double[LinkStore.MAX_OPTYPES]; for (int i = 0; i < LinkStore.MAX_OPTYPES; i++) { long samples = 0; for (int thread = 0; thread < maxThreads; thread++) { if (means[thread][i] != null) { samples += means[thread][i].samples(); } } sampleCounts[i] = samples; double weightedMean = 0.0; for (int thread = 0; thread < maxThreads; thread++) { if (means[thread][i] != null) { weightedMean += (means[thread][i].samples() / (double) samples) * means[thread][i].mean(); } } finalMeans[i] = weightedMean; } } private void calcCumulativeBuckets() { // Calculate the cumulative operation counts by bucket for each type bucketCountsCumulative = new long[LinkStore.MAX_OPTYPES][NUM_BUCKETS]; for (int type = 0; type < LinkStore.MAX_OPTYPES; type++) { long count = 0; for (int bucket = 0; bucket < NUM_BUCKETS; bucket++) { for (int thread = 0; thread < maxThreads; thread++) { count += bucketCounts[thread][type][bucket]; } bucketCountsCumulative[type][bucket] = count; } } } private long[] getBucketBounds(LinkBenchOp type, long percentile) { long n = sampleCounts[type.ordinal()]; // neededRank is the rank of the sample at the desired percentile long neededRank = (long) ((percentile / 100.0) * n); int bucketNum = -1; for (int i = 0; i < NUM_BUCKETS; i++) { long rank = bucketCountsCumulative[type.ordinal()][i]; if (neededRank <= rank) { // We have found the right bucket bucketNum = i; break; } } assert(bucketNum >= 0); // Should definitely be found; return bucketBound(bucketNum); } /** * * @return A human-readable string for the bucket bounds */ private String percentileString(LinkBenchOp type, long percentile) { return boundsToString(getBucketBounds(type, percentile)); } static String boundsToString(long[] bucketBounds) { double minMs = bucketBounds[0] / 1000.0; double maxMs = bucketBounds[1] / 1000.0; DecimalFormat df = new DecimalFormat("#.##"); // Format to max 1 decimal place return "["+ df.format(minMs) + "," + df.format(maxMs) + "]"; } private double getMean(LinkBenchOp type) { return finalMeans[type.ordinal()]; } private double getMax(LinkBenchOp type) { long max_us = 0; for (int thread = 0; thread < maxThreads; thread++) { max_us = Math.max(max_us, maxLatency[thread][type.ordinal()]); } return max_us / 1000.0; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/stats/RunningMean.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.stats; /** * Keep track of mean in numerically stable way * See "Comparison of Several Algorithms for Computing Sample Means and * Variances", Ling, 1974, J. American Stat. Assoc. */ public class RunningMean { /** Number of samples */ private long n; /** First sample */ private final double v1; /** sum of difference */ private double running; /** initialize with first sample */ public RunningMean(double v1) { super(); this.v1 = v1; this.n = 1; this.running = 0.0; } public void addSample(double vi) { n++; running += (vi - v1); } public double mean() { return v1 + running / n; } public long samples() { return n; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/stats/SampledStats.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.stats; import java.io.PrintStream; import java.text.DecimalFormat; import java.util.Arrays; import java.util.Collection; import java.util.Random; import org.apache.log4j.Logger; import com.facebook.LinkBench.ConfigUtil; import com.facebook.LinkBench.LinkBenchOp; import com.facebook.LinkBench.LinkStore; /** * This class is used to keep track of statistics. It collects a sample of the * total data (controlled by maxsamples) and can then compute statistics based * on that sample. * * The overall idea is to compute reasonably accurate statistics using bounded * space. * * Currently the class is used to print out stats at given intervals, with the * sample taken over a given time interval and printed at the end of the interval. */ public class SampledStats { // Actual number of operations per type that caller did private long numops[]; // Max samples per type private int maxsamples; // samples for various optypes private long[][] samples; /** Number of operations that the sample is drawn from */ private int opsSinceReset[]; // minimums encounetered per operation type private long minimums[]; // maximums encountered per operation type private long maximums[]; // #errors encountered per type private long errors[]; // Displayed along with stats private int threadID; private final Logger logger = Logger.getLogger(ConfigUtil.LINKBENCH_LOGGER); /** Stream to write csv output to ( null if no csv output ) */ private final PrintStream csvOutput; /** Random number generator used to decide which to include in sample */ private Random rng; public SampledStats(int input_threadID, int input_maxsamples, PrintStream csvOutput) { threadID = input_threadID; maxsamples = input_maxsamples; this.csvOutput = csvOutput; samples = new long[LinkStore.MAX_OPTYPES][maxsamples]; opsSinceReset = new int[LinkStore.MAX_OPTYPES]; minimums = new long[LinkStore.MAX_OPTYPES]; maximums = new long[LinkStore.MAX_OPTYPES]; numops = new long[LinkStore.MAX_OPTYPES]; errors = new long[LinkStore.MAX_OPTYPES]; rng = new Random(); csvOutput = null; } public void addStats(LinkBenchOp type, long timetaken, boolean error) { if (error) { errors[type.ordinal()]++; } if ((minimums[type.ordinal()] == 0) || (minimums[type.ordinal()] > timetaken)) { minimums[type.ordinal()] = timetaken; } if (timetaken > maximums[type.ordinal()]) { maximums[type.ordinal()] = timetaken; } numops[type.ordinal()]++; int opIndex = opsSinceReset[type.ordinal()]; opsSinceReset[type.ordinal()]++; if (opIndex < maxsamples) { samples[type.ordinal()][opIndex] = timetaken; } else { // Replacing with the probability guarantees that each measurement // has an equal probability of being included in the sample double pReplace = ((double)maxsamples) / opIndex; if (rng.nextDouble() < pReplace) { // Select sample to replace randomly samples[type.ordinal()][rng.nextInt(maxsamples)] = timetaken; } } } public void resetSamples() { for (LinkBenchOp type: LinkBenchOp.values()) { opsSinceReset[type.ordinal()] = 0; } } /** * display stats for samples from start (inclusive) to end (exclusive) * @param type * @param start * @param end * @param startTime_ms * @param nowTime_ms */ private void displayStats(LinkBenchOp type, int start, int end, long sampleStartTime_ms, long nowTime_ms) { int elems = end - start; long timestamp = nowTime_ms / 1000; long sampleDuration = nowTime_ms - sampleStartTime_ms; if (elems <= 0) { logger.info("ThreadID = " + threadID + " " + type.displayName() + " totalops = " + numops[type.ordinal()] + " totalErrors = " + errors[type.ordinal()] + " ops = " + opsSinceReset[type.ordinal()] + " sampleDuration = " + sampleDuration + "ms" + " samples = " + elems); if (csvOutput != null) { csvOutput.println(threadID + "," + timestamp + "," + type.name() + "," + numops[type.ordinal()] + "," + errors[type.ordinal()] + "," + 0 + "," + sampleDuration + ",0,,,,,,,,,"); } return; } // sort from start (inclusive) to end (exclusive) Arrays.sort(samples[type.ordinal()], start, end); RunningMean meanCalc = new RunningMean(samples[type.ordinal()][0]); for (int i = start + 1; i < end; i++) { meanCalc.addSample(samples[type.ordinal()][i]); } long min = samples[type.ordinal()][start]; long p25 = samples[type.ordinal()][start + elems/4]; long p50 = samples[type.ordinal()][start + elems/2]; long p75 = samples[type.ordinal()][end - 1 - elems/4]; long p90 = samples[type.ordinal()][end - 1 - elems/10]; long p95 = samples[type.ordinal()][end - 1 - elems/20]; long p99 = samples[type.ordinal()][end - 1 - elems/100]; long max = samples[type.ordinal()][end - 1]; double mean = meanCalc.mean(); DecimalFormat df = new DecimalFormat("#.##"); logger.info("ThreadID = " + threadID + " " + type.displayName() + " totalOps = " + numops[type.ordinal()] + " totalErrors = " + errors[type.ordinal()] + " ops = " + opsSinceReset[type.ordinal()] + " sampleDuration = " + sampleDuration + "ms" + " samples = " + elems + " mean = " + df.format(mean) + " min = " + min + " 25% = " + p25 + " 50% = " + p50 + " 75% = " + p75 + " 90% = " + p90 + " 95% = " + p95 + " 99% = " + p99 + " max = " + max); if (csvOutput != null) { csvOutput.println(threadID + "," + timestamp + "," + type.name() + "," + numops[type.ordinal()] + "," + errors[type.ordinal()] + "," + opsSinceReset[type.ordinal()] + "," + sampleDuration + "," + elems + "," + mean + "," + min + "," + p25 + "," + p50 + "," + p75 + "," + p90 + "," + p95 + "," + p99 + "," + max); } } /** * Write a header with column names for a csv file showing progress * @param out */ public static void writeCSVHeader(PrintStream out) { out.println("threadID,timestamp,op,totalops,totalerrors,ops," + "sampleDuration_us,sampleOps,mean_us,min_us,p25_us,p50_us," + "p75_us,p90_us,p95_us,p99_us,max_us"); } public void displayStatsAll(long sampleStartTime_ms, long nowTime_ms) { displayStats(sampleStartTime_ms, nowTime_ms, Arrays.asList(LinkBenchOp.values())); } public void displayStats(long sampleStartTime_ms, long nowTime_ms, Collection ops) { for (LinkBenchOp op: ops) { displayStats(op, 0, Math.min(maxsamples, opsSinceReset[op.ordinal()]), sampleStartTime_ms, nowTime_ms); } } /** * @return total operation count so far for type */ public long getCount(LinkBenchOp type) { return this.numops[type.ordinal()]; } } ================================================ FILE: src/main/java/com/facebook/LinkBench/util/ClassLoadUtil.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench.util; import java.lang.reflect.Constructor; /** * Utility methods for dynamic loading of classes */ public class ClassLoadUtil { private static final Class[] EMPTY_ARRAY = new Class[]{}; /** * Load a class by name. * @param name the class name. * @return the class object. * @throws ClassNotFoundException if the class is not found. */ public static Class getClassByName(String name) throws ClassNotFoundException { ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); return Class.forName(name, true, classLoader); } /** Create an object for the given class and initialize it from conf * * @param theClass class of which an object is created * @param expected the expected parent class or interface * @return a new object */ public static T newInstance(Class theClass, Class expected) { T result; try { if (!expected.isAssignableFrom(theClass)) { throw new Exception("Specified class " + theClass.getName() + "" + "does not extend/implement " + expected.getName()); } Class clazz = (Class)theClass; Constructor meth = clazz.getDeclaredConstructor(EMPTY_ARRAY); meth.setAccessible(true); result = meth.newInstance(); } catch (Exception e) { throw new RuntimeException(e); } return result; } public static T newInstance(String className, Class expected) throws ClassNotFoundException { return newInstance(getClassByName(className), expected); } } ================================================ FILE: src/main/java/com/facebook/rocks/swift/rocks.thrift ================================================ // Copyright 2012 Facebook include "rocks_common.thrift" //include "common/fb303/if/fb303.thrift" namespace cpp facebook.rocks namespace cpp2 facebook.rocks namespace java facebook.rocks namespace java.swift com.facebook.rocks.swift namespace php rocks namespace py rocks typedef binary Text typedef binary Bytes typedef binary Slice const string kVersionHeader = "version"; const string kShardKeyRange = "keyrange"; exception RocksException { 1:string msg, } // Options for writing struct WriteOptions { 1:bool sync, 2:bool disableWAL, } struct Snapshot { 1:i64 snapshotid // server generated } // Options for reading. If you do not have a // snapshot, set snapshot.snapshotid = 0 struct ReadOptions { 1:bool verify_checksums, 2:bool fill_cache, 3:Snapshot snapshot } // // Visibility state for assoc // enum AssocVisibility { VISIBLE = 0, // live object, include in lookups and count DELETED = 1, // exclude from lookup queries and count, ok to // delete permanently from persistent store UNUSED1 = 2, // not used HIDDEN = 3, // exclude from lookup queries and count UNUSED2 = 4, // not used HARD_DELETE = 5 // deleted by calling expunge, will be swept // as soon as possible } service RocksService {//extends fb303.FacebookService { // fbtype related rocks_common.TaoFBTypeGetResult TaoFBTypeGet( // fbid info to get 1:i64 fbid, ) throws (1:RocksException io); rocks_common.TaoFBTypeCreateResult TaoFBTypeCreate( // dbid 1:i32 dbid, // Creation time 2:i64 ctime, // Type of fbid to create 3:i32 fbtype, /** wormhole comment */ 4:Text wormhole_comment, 5:WriteOptions woptions, ) throws (1:RocksException io); rocks_common.RetCode TaoFBTypeResurrect( // FBID to resurrect 1:i64 fbid, // FbType 2:i32 fbtype, // Deletion flags 3:i32 deletion_flags, /** wormhole comment */ 4:Text wormhole_comment, 5:WriteOptions woptions, ) throws (1:RocksException io); rocks_common.RetCode TaoFBTypeDel( // fbid to delete 1:i64 fbid, // FbType 2:i32 fbtype, // Type of delete to perform 3:i32 flags, /** wormhole comment */ 4:Text wormhole_comment, 5:WriteOptions woptions, ) throws (1:RocksException io); void TaoFBObjectPut( // fbid to delete 1:i64 fbid, // FbType of the fbid 2:i32 fbtype, // version 3:i32 version, // new version 4:i32 new_version, // time 5:i64 time, // data 6:Text data, // is create 7:bool is_create, /** wormhole comment */ 8:Text wormhole_comment, 9:WriteOptions woptions, ) throws (1:RocksException io); rocks_common.TaoFBObjectGetResult TaoFBObjectGet( // fbid to delete 1:i64 fbid, // FbType of the fbid 2:i32 fbtype, ) throws (1:RocksException io); void TaoFBObjectDel( // fbid to delete 1:i64 fbid, // FbType of the fbid 2:i32 fbtype, /** wormhole comment */ 3:Text wormhole_comment, 4:WriteOptions woptions, ) throws (1:RocksException io); /** * TAO Assoc Put operation. * Note that currently the argument visibility has no effect. * * @if update_count is true, then return the updated count for this assoc * @if update_count is false, then return 0 * @return negative number if failure */ i64 TaoAssocPut( /** name of table */ 1:Text tableName, /** type assoc */ 2:i64 assocType, /** id1 of assoc */ 3:i64 id1, /** id2 of assoc */ 4:i64 id2, /** timestamp of assoc */ 5:i64 timestamp, /** visibility */ 6:AssocVisibility visibility, /** whether to keep the count or not */ 7:bool update_count, /** version of the data blob */ 8:i64 version, /** serialized data of assoc */ 9:Text data, /** wormhole comment */ 10:Text wormhole_comment, 11:WriteOptions woptions, ) throws (1:RocksException io), /** * TAO Assoc Delete operation. * * @return the updated count for this assoc */ i64 TaoAssocDelete( /** name of table */ 1:Text tableName, /** type assoc */ 2:i64 assocType, /** id1 of assoc */ 3:i64 id1, /** id2 of assoc */ 4:i64 id2, 5:i64 version, /** visibility flag for this delete */ 6:AssocVisibility visibility, /** whether to keep the count or not */ 7:bool update_count, /** wormhole comment */ 8:Text wormhole_comment, 9:WriteOptions woptions, ) throws (1:RocksException io), /** * TAO Assoc Get TimeRange operation. * Obtain assocs in bewteen starTime and endTime in the given order. * The range check is inclusive: startTime <= time && time <= endTime. */ list TaoAssocGetTimeRange( /** name of table */ 1:Text tableName, /** type of assoc */ 2:i64 assocType, /** id1 of assoc */ 3:i64 id1, /** maximum timestamp of assocs to retrieve */ 4:i64 startTime, /** minimum timestamp of assocs to retrieve */ 5:i64 endTime, /** number of assocs to skip from start */ 6:i64 offset, /** max number of assocs (columns) returned */ 7:i64 limit ) throws (1:RocksException io), /** * TAO Assoc Get CursorRange operation. * Obtain assocs after in the given order. */ list TaoAssocGetCursorRange( /** name of table */ 1:Text tableName, /** type of assoc */ 2:i64 assocType, /** id1 of assoc */ 3:i64 id1, /** cursor: id2 **/ 4:i64 id2, /** cursor: time */ 5:i64 time, /** number of assocs to skip from start */ 6:i64 offset, /** max number of assocs (columns) returned */ 7:i64 limit ) throws (1:RocksException io), /** * TAO Assoc Get operation. * Obtain assocs with the given id2s */ list TaoAssocGetID2s( /** name of table */ 1:Text tableName, /** type of assoc */ 2:i64 assocType, /** id1 of assoc */ 3:i64 id1, /** list of id2 need to be fetch */ 4:list id2s ) throws (1:RocksException ex), /** * TAO Assoc Count Get operation. * Returns the number of assocs for given id1 and assoc type */ i64 TaoAssocCount( /** name of table */ 1:Text tableName, /** type of assoc */ 2:i64 assocType, /** id1 of assoc */ 3:i64 id1, ) throws (1:RocksException ex), i64 TaoAssocCountPut( /** name of table */ 1:Text tableName, /** type of assoc */ 2:i64 assocType, /** id1 of assoc */ 3:i64 id1, 4:i64 count, /** wormhole comment */ 5:Text wormhole_comment, 6:WriteOptions woptions, ) throws (1:RocksException io), } ================================================ FILE: src/main/java/com/facebook/rocks/swift/rocks_common.thrift ================================================ // Copyright 2012 Facebook namespace cpp facebook.rocks namespace cpp2 facebook.rocks namespace java facebook.rocks namespace java.swift com.facebook.rocks.swift namespace php rocks namespace py rocks_common // Types typedef binary Text typedef binary Bytes typedef binary Slice enum Code { kOk = 0, kEnd = 1, kNotFound = 2, kCorruption = 3, kNotSupported = 4, kInvalidArgument = 5, kIOError = 6, kSnapshotNotExists = 7, kWrongShard = 8, kUnknownError = 9, kClientError = 10, kEnsembleError = 11, kWriterAlreadyExistsError = 12, kReplicationError = 13, kRetryableError = 14, kTimeoutError = 15, kOpTooLarge = 16, kQueryFailed = 17, } struct RetCode { 1: Code state, 2: Slice msg, } /** * Response from fbtype_get */ struct TaoFBTypeGetResult { 1: RetCode retCode, 2:i32 fbtype, // Deletion flags 3:i32 flags, // Creation time 4:i64 ctime, } /** * Response from fbtype_create */ struct TaoFBTypeCreateResult { 1: RetCode retCode, // Creation time 2:i64 fbid, } struct TaoFBObjectGetResult { 1: bool found, 2:i32 version, 3:i64 updateTime, 4:Text data, } /** * Holds the assoc get result of a id2 */ struct TaoAssocGetEntry { /** id2 of assoc */ 2:i64 id2, /** time stamp of the assoc */ 3:i64 time, /** version of the data blob */ 4:i64 version, /** serialized data of the assoc */ 5:Text data, } struct TaoAssocGetResult { 1: RetCode retCode, 2: list entries, } struct TaoAssocCountResult { 1: RetCode retCode, 2: i64 count, } service RocksServiceCommon { } ================================================ FILE: src/test/java/com/facebook/LinkBench/DistributionTestBase.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Properties; import java.util.Random; import junit.framework.TestCase; import org.junit.Test; import com.facebook.LinkBench.distributions.ProbabilityDistribution; /** * This test implements generic unit tests for different implementations of * ProbabilityDistribution. * * Most of these tests are either sanity tests (check that output is within * expected range and obeys basic invariants), and consistency tests * (check that the output of two different methods is consistent). * * While these tests go a long way to checking the consistency of the * behavior of the ProbabilityDistribution, it cannot check that the * specific correct values are generated: it is helpful to implement * additional tests for each concrete implementation. * * @author tarmstrong */ public abstract class DistributionTestBase extends TestCase { protected abstract ProbabilityDistribution getDist(); protected Properties getDistParams() { return new Properties(); } /** Number of cdf checks to perform */ protected int cdfChecks() { return 50000; } /** Number of pdf checks */ protected int pdfChecks() { return 50000; } protected Bucketer getBucketer() { return new UniformBucketer(cdfChecks()); } /** Percentage difference between cdf and choose() to tolerate */ protected double tolerance() { return 0.002; } public Random initRandom(String testName) { long seed = System.currentTimeMillis(); System.err.println("Choose seed " + seed + " for test " + testName); return new Random(seed); } /** * Check a few invariants cdf should adhere to */ @Test public void testCDFSanity() { ProbabilityDistribution dist = getDist(); long min = 453, max = 26546454; dist.init(min, max, getDistParams(), ""); assertEquals(dist.cdf(min-1), 0.0); assertEquals(dist.cdf(min-234321), 0.0); assertEquals(dist.cdf(max), 1.0); assertEquals(dist.cdf(max+2343242224234L), 1.0); // Check cdf is monotonically increasing double last = 0.0; long step = (max - min) / cdfChecks(); for (long id = min; id < max; id += step) { double p = dist.cdf(id); assertTrue(p >= last); last = p; } } @Test public void testPDFSanity() { ProbabilityDistribution dist = getDist(); long min = 453, max = 26546454; dist.init(min, max, getDistParams(), ""); assertEquals(0.0, dist.pdf(min-1)); assertEquals(0.0, dist.pdf(min-234321)); assertEquals(0.0, dist.pdf(max)); assertEquals(0.0, dist.pdf(max+2343242224234L)); // Check pdf is in correct range double total = 0.0; long step = (max - min) / pdfChecks(); for (long id = min; id < max; id += step) { double p = dist.pdf(id); if ((id - min) < step * 100) { System.err.println("p(X=" + id + ") = " + p); } assertTrue(p >= 0.0); assertTrue(p <= 1.0); total += p; } assert(total <= 1.0); } @Test public void testPDFSum() { ProbabilityDistribution dist = getDist(); long min = 1, max = 50; dist.init(min, max, getDistParams(), ""); // Check sum of pdf over small range // Order of least to most probably to minimize sum error double total = 0.0; for (long id = max - 1; id >= min; id--) { double p = dist.pdf(id); assertTrue(p >= 0.0); assertTrue(p <= 1.0); System.err.println("p(X=" + id + ") = " + p); total += p; } System.err.println("Total = " + total); // Give significant tolerance due to rounding errors assertTrue(total <= 1.05); assertTrue(total >= 0.95 ); } @Test public void testChooseSanity() { ProbabilityDistribution dist = getDist(); long min = 453, max = 26546454; dist.init(min, max, getDistParams(), ""); Random rng = initRandom("testChooseSanity"); for (int i = 0; i < 100000; i++) { long id = dist.choose(rng); assertTrue(id >= min); assertTrue(id < max); } } /** * Check that choose() and cdf() are returning consistent results * (i.e. that the result of choose are distributed according to cdf) */ @Test public void testCDFChooseConsistency() { long min = 100, max = 100000; Bucketer bucketer = getBucketer(); int bucketCount = bucketer.getBucketCount(); int buckets[] = new int[bucketCount]; long n = max - min; Random rng = initRandom("testCDFChooseConsistency"); ProbabilityDistribution dist = getDist(); dist.init(min, max, getDistParams(), ""); int trials = 1000000; for (int i = 0; i < trials; i++) { long id = dist.choose(rng); long off = id - min; int bucket = bucketer.chooseBucket(off, n); buckets[bucket]++; } int totalCount = 0; boolean fail = false; for (int b = 0; b < bucketCount; b++) { totalCount += buckets[b]; long bucketTop = bucketer.bucketMax(b, n) + min; double actCDF = ((double)totalCount) / trials; double expCDF = dist.cdf(bucketTop); // 0.2% error if (Math.abs(expCDF - actCDF) > tolerance()) { System.err.println(String.format("Divergence between CDF and " + "choose function: P(X <= %d) act: %f exp: %f", bucketTop, actCDF, expCDF)); fail = true; } } if (fail) { fail("Divergence between cdf and choose methods: see preceding output " + "for details"); } } @Test public void testCDFPDFConsistency() { long min = 252352, max = 6544543; ProbabilityDistribution dist = getDist(); dist.init(min, max, getDistParams(), ""); long step = (max - min) / cdfChecks(); for (long id = min + 1; id < max; id += step) { double c = dist.cdf(id); double c1 = dist.cdf(id - 1); double p = dist.pdf(id); double err = Math.abs((c - c1) - p); if (err > 0.0001) { fail(String.format("Error > 0.001: cdf(%d) - cdf(%d) = %f, pdf(%d) = %f", id, id -1, c1 - c, id, p)); } } } @Test public void testQuantileSanity() { long min = 0, max = 1000; ProbabilityDistribution dist = getDist(); dist.init(min, max, getDistParams(), ""); long last = dist.quantile(0.0); for (double q = 0.0; q <= 1.0; q += 0.125) { long id = dist.quantile(q); System.err.format("quantile(%f) = %d\n", q, id); assertTrue(id >= min); assertTrue(id < max); assertTrue(id >= last); last = id; } // min should be most probable, and therefore should definitely // be returned by quantile assertEquals(min, dist.quantile(0.0)); } /** * Different distributions should be bucketed in different ways * to test their fit. For example, the zipf distribution treats * lower keys specially so we want to have better resolution for * those */ static interface Bucketer { public int getBucketCount(); public int chooseBucket(long i, long n); public long bucketMax(int bucket, long n); } static class UniformBucketer implements Bucketer { final int bucketCount; public UniformBucketer(int bucketCount) { this.bucketCount = bucketCount; } public int getBucketCount() { return bucketCount; } public int chooseBucket(long i, long n) { return (int)((i * bucketCount) / n); } public long bucketMax(int bucket, long n) { return ((long)((((double)bucket+1)/bucketCount)*n)) - 1; } } } ================================================ FILE: src/test/java/com/facebook/LinkBench/DummyLinkStore.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.List; import java.util.Properties; import com.facebook.LinkBench.Link; import com.facebook.LinkBench.LinkCount; import com.facebook.LinkBench.LinkStore; import com.facebook.LinkBench.Phase; /** * Can either be used as a wrapper around an existing LinkStore instance that * logs operations, or as a dummy linkstore instance that does nothing * */ public class DummyLinkStore extends GraphStore { public LinkStore wrappedStore; public GraphStore wrappedGraphStore; public DummyLinkStore() { this(null); } public DummyLinkStore(LinkStore wrappedStore) { this(wrappedStore, false); } public DummyLinkStore(LinkStore wrappedStore, boolean alreadyInitialized) { this.wrappedStore = wrappedStore; if (wrappedStore instanceof GraphStore) { wrappedGraphStore = (GraphStore) wrappedStore; } this.initialized = alreadyInitialized; } /** * @return true if real data is written and can be queried */ public boolean isRealLinkStore() { return wrappedStore != null; } /** * @return true if real node data is written and can be queried */ public boolean isRealGraphStore() { return wrappedGraphStore != null; } public boolean initialized = false; public long adds = 0; public long deletes = 0; public long updates = 0; public long multigetLinks = 0; public long getLinks = 0; public long getLinkLists = 0; public long getLinkListsHistory = 0; public long countLinks = 0; public long addNodes = 0; public long updateNodes = 0; public long deleteNodes = 0; public long getNodes = 0; public int bulkLoadBatchSize; public long bulkLoadLinkOps; public long bulkLoadLinkRows; public long bulkLoadCountOps; public long bulkLoadCountRows; @Override public void initialize(Properties p, Phase currentPhase, int threadId) throws IOException, Exception { if (initialized) { throw new Exception("Double initialization"); } initialized = true; if (wrappedStore != null) { wrappedStore.initialize(p, currentPhase, threadId); } } @Override public void close() { checkInitialized(); initialized = false; if (wrappedStore != null) { wrappedStore.close(); } } @Override public void clearErrors(int threadID) { checkInitialized(); if (wrappedStore != null) { wrappedStore.clearErrors(threadID); } } @Override public boolean addLink(String dbid, Link a, boolean noinverse) throws Exception { checkInitialized(); adds++; if (wrappedStore != null) { return wrappedStore.addLink(dbid, a, noinverse); } else { return true; } } @Override public boolean deleteLink(String dbid, long id1, long link_type, long id2, boolean noinverse, boolean expunge) throws Exception { checkInitialized(); deletes++; if (wrappedStore != null) { return wrappedStore.deleteLink(dbid, id1, link_type, id2, noinverse, expunge); } else { return true; } } @Override public boolean updateLink(String dbid, Link a, boolean noinverse) throws Exception { checkInitialized(); updates++; if (wrappedStore != null) { return wrappedStore.updateLink(dbid, a, noinverse); } else { return true; } } @Override public Link[] multigetLinks(String dbid, long id1, long link_type, long[] id2s) throws Exception { checkInitialized(); multigetLinks++; if (wrappedStore != null) { return wrappedStore.multigetLinks(dbid, id1, link_type, id2s); } else { return null; } } @Override public Link getLink(String dbid, long id1, long link_type, long id2) throws Exception { checkInitialized(); getLinks++; if (wrappedStore != null) { return wrappedStore.getLink(dbid, id1, link_type, id2); } else { return null; } } @Override public Link[] getLinkList(String dbid, long id1, long link_type) throws Exception { checkInitialized(); getLinkLists++; if (wrappedStore != null) { return wrappedStore.getLinkList(dbid, id1, link_type); } else { return null; } } @Override public Link[] getLinkList(String dbid, long id1, long link_type, long minTimestamp, long maxTimestamp, int offset, int limit) throws Exception { checkInitialized(); getLinkLists++; getLinkListsHistory++; if (wrappedStore != null) { return wrappedStore.getLinkList(dbid, id1, link_type, minTimestamp, maxTimestamp, offset, limit); } else { return null; } } @Override public long countLinks(String dbid, long id1, long link_type) throws Exception { checkInitialized(); countLinks++; if (wrappedStore != null) { return wrappedStore.countLinks(dbid, id1, link_type); } else { return 0; } } private void checkInitialized() { if (!initialized) { throw new RuntimeException("Expected store to be initialized"); } } @Override public int bulkLoadBatchSize() { if (wrappedStore != null) { return wrappedStore.bulkLoadBatchSize(); } else{ return bulkLoadBatchSize; } } @Override public void addBulkLinks(String dbid, List a, boolean noinverse) throws Exception { bulkLoadLinkOps++; bulkLoadLinkRows += a.size(); if (wrappedStore != null) { wrappedStore.addBulkLinks(dbid, a, noinverse); } } @Override public void addBulkCounts(String dbid, List a) throws Exception { bulkLoadCountOps++; bulkLoadCountRows += a.size(); if (wrappedStore != null) { wrappedStore.addBulkCounts(dbid, a); } } @Override public int getRangeLimit() { if (wrappedStore != null) { return wrappedStore.getRangeLimit(); } else { return rangeLimit; } } @Override public void setRangeLimit(int rangeLimit) { if (wrappedStore != null) { wrappedStore.setRangeLimit(rangeLimit); } else { this.rangeLimit = rangeLimit; } } @Override public void resetNodeStore(String dbid, long startID) throws Exception { if (wrappedGraphStore != null) { wrappedGraphStore.resetNodeStore(dbid, startID); } } @Override public long addNode(String dbid, Node node) throws Exception { addNodes++; if (wrappedGraphStore != null) { return wrappedGraphStore.addNode(dbid, node); } return 0; } @Override public Node getNode(String dbid, int type, long id) throws Exception { getNodes++; if (wrappedGraphStore != null) { return wrappedGraphStore.getNode(dbid, type, id); } return null; } @Override public boolean updateNode(String dbid, Node node) throws Exception { updateNodes++; if (wrappedGraphStore != null) { return wrappedGraphStore.updateNode(dbid, node); } return false; } @Override public boolean deleteNode(String dbid, int type, long id) throws Exception { deleteNodes++; if (wrappedGraphStore != null) { return wrappedGraphStore.deleteNode(dbid, type, id); } return false; } } ================================================ FILE: src/test/java/com/facebook/LinkBench/DummyLinkStoreTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.Properties; public class DummyLinkStoreTest extends LinkStoreTestBase { private Properties props; @Override protected void initStore(Properties props) { // Do nothing this.props = props; } @Override protected DummyLinkStore getStoreHandle(boolean initialized) throws IOException, Exception { DummyLinkStore store = new DummyLinkStore(); if (initialized) { store.initialize(props, Phase.REQUEST, 0); } return store; } } ================================================ FILE: src/test/java/com/facebook/LinkBench/GeneratedDataDump.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.BufferedOutputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.Random; import com.facebook.LinkBench.generators.DataGenerator; import com.facebook.LinkBench.generators.MotifDataGenerator; import com.facebook.LinkBench.generators.UniformDataGenerator; /** * Generate some sample data using data generators, in order to test out compressibility * of randomly generated data. * * This generates several output files consistent of many generated payload data fields * separated by newlines */ public class GeneratedDataDump { private static final Random rng = new Random(); public static void main(String args[]) { String outputDir = ""; if (args.length == 0) { outputDir = "."; } else if (args.length == 1) { outputDir = args[0]; } else { System.err.println("GeneratedDataDump "); System.exit(1); } // Number of bytes per row final int objBytes = 256; final int assocBytes = 6; // Number of rows to generate final int objRows = 250000; final int assocRows = 10000000; writeGeneratedDataFile(outputDir + "/gen-data-motif.txt", makeMotifObj(), objRows, objBytes); writeGeneratedDataFile(outputDir + "/gen-data-uniform.txt", makeUniformObj(), objRows, objBytes); writeGeneratedDataFile(outputDir + "/gen-data-assoc-motif.txt", makeMotifAssoc(), assocRows, assocBytes); writeGeneratedDataFile(outputDir + "/gen-data-assoc-uniform.txt", makeUniformAssoc(), assocRows, assocBytes); } private static void writeGeneratedDataFile(String outFileName, DataGenerator gen, int rows, int bytes) { OutputStream out = null; try { out = new BufferedOutputStream(new FileOutputStream(outFileName)); } catch (FileNotFoundException e) { System.err.println("file " + outFileName + " could not be opened"); System.exit(1); } byte buf[] = new byte[bytes]; try { for (int i = 0; i < rows; i++) { gen.fill(rng, buf); out.write(buf); out.write('\n'); } out.close(); } catch (IOException e) { e.printStackTrace(); System.exit(1); } } private static DataGenerator makeUniformObj() { UniformDataGenerator gen = new UniformDataGenerator(); gen.init(50, 75); return gen; } private static MotifDataGenerator makeMotifObj() { MotifDataGenerator gen = new MotifDataGenerator(); int start = 50; int end = 220; double uniqueness = 0.63; gen.init(start, end, uniqueness); return gen; } private static DataGenerator makeUniformAssoc() { UniformDataGenerator gen = new UniformDataGenerator(); gen.init(50, 75); return gen; } private static MotifDataGenerator makeMotifAssoc() { MotifDataGenerator gen = new MotifDataGenerator(); int start = 32; int end = 100; double uniqueness = 0.225; int motifSize = 128; gen.init(start, end, uniqueness, motifSize); return gen; } } ================================================ FILE: src/test/java/com/facebook/LinkBench/GeomDistTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Properties; import org.junit.Test; import com.facebook.LinkBench.distributions.GeometricDistribution; import com.facebook.LinkBench.distributions.ProbabilityDistribution; public class GeomDistTest extends DistributionTestBase { @Override protected ProbabilityDistribution getDist() { return new GeometricDistribution(); } @Override protected Properties getDistParams() { Properties props = new Properties(); props.setProperty(GeometricDistribution.PROB_PARAM_KEY, "0.2"); return props; } /** * Test cdf and pdf against precalculated values */ @Test public void testGeom() { GeometricDistribution d = new GeometricDistribution(); d.init(1, Long.MAX_VALUE, 0.3, 1.0); assertEquals(0.3, d.cdf(1), 0.001); assertEquals(0.51, d.cdf(2), 0.001); assertEquals(0.657, d.cdf(3), 0.001); assertEquals(0.917646, d.cdf(7), 0.001); assertEquals(0.971752, d.cdf(10), 0.001); assertEquals(0.995252, d.cdf(15), 0.001); assertEquals(0.3, d.pdf(1), 0.001); assertEquals(0.21, d.pdf(2), 0.001); assertEquals(0.147, d.pdf(3), 0.001); assertEquals(0.035, d.pdf(7), 0.001); assertEquals(0.012106, d.pdf(10), 0.001); assertEquals(0.002035, d.pdf(15), 0.001); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/GraphStoreTestBase.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.Properties; import java.util.Random; import junit.framework.TestCase; import org.apache.log4j.Logger; import org.junit.Test; import com.facebook.LinkBench.LinkBenchRequest.RequestProgress; import com.facebook.LinkBench.distributions.AccessDistributions.AccessDistMode; import com.facebook.LinkBench.distributions.UniformDistribution; import com.facebook.LinkBench.generators.UniformDataGenerator; import com.facebook.LinkBench.stats.LatencyStats; public abstract class GraphStoreTestBase extends TestCase { protected String testDB = "linkbench_unittestdb"; private Logger logger = Logger.getLogger(""); /** * Reinitialize link store database properties. * Should attempt to clean database * @param props Properties for test DB. * Override any required properties in this property dict */ protected abstract void initStore(Properties props) throws IOException, Exception; /** * Override to vary size of test * @return number of ids to use in testing */ protected long getIDCount() { return 50000; } /** * Override to vary number of requests in test */ protected int getRequestCount() { return 100000; } /** * Override to vary maximum number of threads */ protected int maxConcurrentThreads() { return Integer.MAX_VALUE; } /** Get a new handle to the initialized store, wrapped in * DummyLinkStore * @return new handle to linkstore */ protected abstract DummyLinkStore getStoreHandle(boolean initialized) throws IOException, Exception; @Override protected void setUp() throws Exception { super.setUp(); initStore(basicProps()); } /** * Provide properties for basic test store * @return */ protected Properties basicProps() { Properties props = new Properties(); props.setProperty(Config.DBID, testDB); return props; } public static void fillLoadProps(Properties props, long startId, long idCount, int linksPerId) { LinkStoreTestBase.fillLoadProps(props, startId, idCount, linksPerId); props.setProperty(Config.NODE_DATASIZE, "512.0"); props.setProperty(Config.NODE_ADD_DATAGEN, UniformDataGenerator.class.getName()); props.setProperty(Config.NODE_ADD_DATAGEN_PREFIX + Config.UNIFORM_GEN_STARTBYTE, "0"); props.setProperty(Config.NODE_ADD_DATAGEN_PREFIX + Config.UNIFORM_GEN_ENDBYTE, "255"); } public static void fillReqProps(Properties props, long startId, long idCount, int requests, long timeLimit, double p_addlink, double p_deletelink, double p_updatelink, double p_countlink, double p_multigetlink, double p_getlinklist, double p_addnode, double p_updatenode, double p_deletenode, double p_getnode) { LinkStoreTestBase.fillReqProps(props, startId, idCount, requests, timeLimit, p_addlink, p_deletelink, p_updatelink, p_countlink, p_multigetlink, p_getlinklist, true); props.setProperty(Config.PR_ADD_NODE, Double.toString(p_addnode)); props.setProperty(Config.PR_UPDATE_NODE, Double.toString(p_updatenode)); props.setProperty(Config.PR_DELETE_NODE, Double.toString(p_deletenode)); props.setProperty(Config.PR_GET_NODE, Double.toString(p_getnode)); props.setProperty(Config.NODE_READ_CONFIG_PREFIX + Config.ACCESS_FUNCTION_SUFFIX, UniformDistribution.class.getName()); props.setProperty(Config.NODE_UPDATE_CONFIG_PREFIX + Config.ACCESS_FUNCTION_SUFFIX, AccessDistMode.ROUND_ROBIN.name()); props.setProperty(Config.NODE_UPDATE_CONFIG_PREFIX + Config.ACCESS_CONFIG_SUFFIX, "0"); props.setProperty(Config.NODE_DELETE_CONFIG_PREFIX + Config.ACCESS_FUNCTION_SUFFIX, UniformDistribution.class.getName()); props.setProperty(Config.NODE_DATASIZE, "1024"); props.setProperty(Config.NODE_ADD_DATAGEN, UniformDataGenerator.class.getName()); props.setProperty(Config.NODE_ADD_DATAGEN_PREFIX + Config.UNIFORM_GEN_STARTBYTE, "0"); props.setProperty(Config.NODE_ADD_DATAGEN_PREFIX + Config.UNIFORM_GEN_ENDBYTE, "255"); props.setProperty(Config.NODE_DATASIZE, "1024"); props.setProperty(Config.NODE_UP_DATAGEN, UniformDataGenerator.class.getName()); props.setProperty(Config.NODE_UP_DATAGEN_PREFIX + Config.UNIFORM_GEN_STARTBYTE, "0"); props.setProperty(Config.NODE_UP_DATAGEN_PREFIX + Config.UNIFORM_GEN_ENDBYTE, "255"); } /** * Test the full workload with node and link ops to exercise the * requester * @throws Exception * @throws IOException */ @Test public void testFullWorkload() throws IOException, Exception { long startId = 532; long idCount = getIDCount(); int linksPerId = 5; int requests = getRequestCount(); long timeLimit = requests; Properties props = basicProps(); fillLoadProps(props, startId, idCount, linksPerId); double p_add = 0.1, p_del = 0.05, p_up = 0.05, p_count = 0.05, p_multiget = 0.05, p_getlinks = 0.1, p_add_node = 0.2, p_up_node = 0.05, p_del_node = 0.05, p_get_node = 0.3; fillReqProps(props, startId, idCount, requests, timeLimit, p_add * 100, p_del * 100, p_up * 100, p_count * 100, p_multiget * 100, p_getlinks * 100, p_add_node * 100, p_up_node * 100, p_del_node * 100, p_get_node * 100); try { Random rng = LinkStoreTestBase.createRNG(); LinkStoreTestBase.serialLoad(rng, logger, props, getStoreHandle(false)); serialLoadNodes(rng, logger, props, getStoreHandle(false)); DummyLinkStore reqStore = getStoreHandle(false); LatencyStats latencyStats = new LatencyStats(1); RequestProgress tracker = new RequestProgress(logger, requests, timeLimit, 1, 10000); // Test both link and node requests LinkBenchRequest requester = new LinkBenchRequest(reqStore, reqStore, props, latencyStats, System.out, tracker, rng, 0, 1); tracker.startTimer(); requester.run(); latencyStats.displayLatencyStats(); latencyStats.printCSVStats(System.out, true); assertEquals(requests, reqStore.adds + reqStore.updates + reqStore.deletes + reqStore.countLinks + reqStore.multigetLinks + reqStore.getLinkLists + reqStore.addNodes + reqStore.updateNodes + reqStore.deleteNodes + reqStore.getNodes); // Check that the proportion of operations is roughly right - within 1% // For now, updates are actually implemented as add operations assertTrue(Math.abs(reqStore.adds / (double)requests - (p_add + p_up)) < 0.01); assertTrue(Math.abs(reqStore.updates / (double)requests - 0.0) < 0.01); assertTrue(Math.abs(reqStore.deletes / (double)requests - p_del) < 0.01); assertTrue(Math.abs(reqStore.countLinks / (double)requests - p_count) < 0.01); assertTrue(Math.abs(reqStore.multigetLinks / (double)requests - p_multiget) < 0.01); assertTrue(Math.abs(reqStore.getLinkLists / (double)requests - p_getlinks) < 0.01); assertTrue(Math.abs(reqStore.addNodes / (double)requests - p_add_node) < 0.01); assertTrue(Math.abs(reqStore.updateNodes / (double)requests - p_up_node) < 0.01); assertTrue(Math.abs(reqStore.deleteNodes / (double)requests - p_del_node) < 0.01); assertTrue(Math.abs(reqStore.getNodes / (double)requests - p_get_node) < 0.01); assertEquals(0, reqStore.bulkLoadCountOps); assertEquals(0, reqStore.bulkLoadLinkOps); } finally { try { LinkStoreTestBase.deleteIDRange(testDB, getStoreHandle(true), startId, idCount); deleteNodeIDRange(testDB, LinkStore.DEFAULT_NODE_TYPE, getStoreHandle(true), startId, idCount); } catch (Throwable t) { System.err.println("Error during cleanup:"); t.printStackTrace(); } } } /** * Delete all nodes in ID range specified */ static void deleteNodeIDRange(String testDB, int type, DummyLinkStore storeHandle, long startId, long idCount) throws Exception { for (long i = startId; i < startId + idCount; i++) { storeHandle.deleteNode(testDB, type, i); } } private void serialLoadNodes(Random rng, Logger logger, Properties props, DummyLinkStore storeHandle) throws Exception { storeHandle.initialize(props, Phase.LOAD, 0); storeHandle.resetNodeStore(testDB, ConfigUtil.getLong(props, Config.MIN_ID)); storeHandle.close(); // Close before passing to loader LatencyStats stats = new LatencyStats(1); NodeLoader loader = new NodeLoader(props, logger, storeHandle, rng, stats, System.out, 0); loader.run(); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/HarmonicTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import junit.framework.TestCase; import org.junit.Test; import org.junit.experimental.categories.Category; import com.facebook.LinkBench.distributions.ApproxHarmonic; import com.facebook.LinkBench.distributions.Harmonic; import com.facebook.LinkBench.testtypes.SlowTest; @Category(SlowTest.class) public class HarmonicTest extends TestCase { @Test @Category(SlowTest.class) public void testHarmonic() { assertEquals(1, Harmonic.generalizedHarmonic(1, 0.8), 0.001); assertEquals(1.99534, Harmonic.generalizedHarmonic(10, 1.5), 0.00001); assertEquals(61.8010, Harmonic.generalizedHarmonic(1000, 0.5), 0.001); assertEquals(207.541, Harmonic.generalizedHarmonic(1000000, 0.7), 0.002); assertEquals(2679914.0, Harmonic.generalizedHarmonic(12345678, 0.1), 1); } static final double SHAPES[] = {0.01, 0.1, 0.5, 0.9, 0.99}; @Test public void testApproxFast() { for (long i = 0; i < 16; i+=4) { testApproxHelper(i); } } @Test @Category(SlowTest.class) public void testApproxSlow() { for (long i = 16; i < 30; i+=4) { testApproxHelper(i); } } /** Test that approximation is close to actual for a range of shapes and ns */ private void testApproxHelper(long i) { long n = (long)Math.pow(2, i); for (double shape: SHAPES) { double exact = Harmonic.generalizedHarmonic(n, shape); long start = System.currentTimeMillis(); double approx = ApproxHarmonic.generalizedHarmonic(n, shape); long end = System.currentTimeMillis(); System.err.format("ApproxHarmonic.generalizedHarmonic(%d, %f) " + "took %.3fs\n", n, shape, (end - start) / 1000.0); double err = approx - exact; double errPc = (err / exact) * 100.0; System.err.format("ApproxHarmonic.generalizedHarmonic(%d, %f) = %f. " + "exact=%f err=%f err%%=%.2f\n", n, shape, approx, exact, err, errPc); double errThresh = 0.05; assertTrue(String.format("Err%%=%.3f must be < 0.05%%", Math.abs(errPc)), Math.abs(errPc) < errThresh); } } } ================================================ FILE: src/test/java/com/facebook/LinkBench/ID2ChooserTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.HashMap; import java.util.HashSet; import java.util.Properties; import java.util.Random; import junit.framework.TestCase; import org.junit.Test; import com.facebook.LinkBench.distributions.ID2Chooser; import com.facebook.LinkBench.distributions.ZipfDistribution; public class ID2ChooserTest extends TestCase { Properties props; @Override public void setUp() { props = new Properties(); props.setProperty(Config.RANDOM_ID2_MAX, "0"); props.setProperty(Config.NLINKS_FUNC, ZipfDistribution.class.getName()); props.setProperty(Config.NLINKS_PREFIX + "shape", "0.5"); props.setProperty(Config.NLINKS_PREFIX + "scale", "1000000"); } @Test public void testNoLoadCollisions() { long n = 10000; long min = 500; long max = n + min; long seed = 5313242; Random rng = new Random(seed); ID2Chooser c = new ID2Chooser(props, min, max, 1, 1); // Check we don't get the same id2 more than once (i.e. duplicate links) int nlinks = 50; HashMap seen = new HashMap(); long id1 = 1234; for (int i = 0; i < nlinks; i++) { long id2 = c.chooseForLoad(rng, id1, LinkStore.DEFAULT_LINK_TYPE, i); Integer j = seen.get(id2); if (j != null) { fail("Same link generated twice: (" + id1 + ", " + id2 + ") for " + " indices " + j + " and " + i); } seen.put(id2, i); } } @Test public void testChooseForOp() { // Currently just exercise the code: I don't have a good way to verify the // output is as intended long seed = 1643; Random rng = new Random(seed); long n = 10000; long min = 500; long max = n + min; int trials = 1000; ID2Chooser c = new ID2Chooser(props, min, max, 1, 1); for (int i = 0; i < trials; i++) { long id2 = c.chooseForOp(rng, i + min, LinkStore.DEFAULT_LINK_TYPE, 1.0); assert(id2 >= min); id2 = c.chooseForOp(rng, i + min, LinkStore.DEFAULT_LINK_TYPE, 0.5); assert(id2 >= min); } } /** * Check that the choosing mechanism is generating id2s with the * right probability of a loaded link matching */ @Test public void testMatchPercent() { long seed = 15325435L; Random rng = new Random(seed); int minid = 500, maxid=1000000; ID2Chooser chooser = new ID2Chooser(props, minid, maxid, 1, 0); for (int id1 = minid; id1 < maxid; id1 += 3763) { HashSet existing = new HashSet(); long nlinks = chooser.calcTotalLinkCount(id1); for (long i = 0; i < nlinks; i++) { long id2 = chooser.chooseForLoad(rng, id1, LinkStore.DEFAULT_LINK_TYPE, i); existing.add(id2); } int trials = 10000; int hit = 0; // hit for prob = 50% for (int i = 0; i < trials; i++) { // Test with 100% prob of hit long id2 = chooser.chooseForOp(rng, id1, LinkStore.DEFAULT_LINK_TYPE, 1.0); assertTrue(existing.contains(id2) || existing.size() == 0); // Test with 50% prob of hit id2 = chooser.chooseForOp(rng, id1, LinkStore.DEFAULT_LINK_TYPE, 0.5); if (existing.contains(id2)) { hit++; } } double hitPercent = hit / (double)trials; if (existing.size() > 0 && Math.abs(0.5 - hitPercent) > 0.05) { fail(hitPercent * 100 + "% of ids2 were hits for id1 " + id1); } } } /** * Check that link counts work for multiple link types */ @Test public void testLinkCount() { long startid = 1, maxid = 1000; Properties newProps = new Properties(props); int nLinkTypes = 10; newProps.setProperty(Config.LINK_TYPE_COUNT, Integer.toString(nLinkTypes)); ID2Chooser chooser = new ID2Chooser(newProps, startid, maxid, 1, 0); long linkTypes[] = chooser.getLinkTypes(); assertEquals(nLinkTypes, linkTypes.length); // Check it works for some different IDs for (long id = startid; id < maxid; id += 7) { long totalCount = 0; for (long linkType: linkTypes) { totalCount += chooser.calcLinkCount(id, linkType); } assertEquals(chooser.calcTotalLinkCount(id), totalCount); } } } ================================================ FILE: src/test/java/com/facebook/LinkBench/InvertibleShufflerTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Arrays; import org.junit.Test; import junit.framework.TestCase; /** * Check that the shuffler obeys expected invariants */ public class InvertibleShufflerTest extends TestCase { @Test public void testShuffleSmallRange() { long seed = 1234; testShuffle(0, 10, true, seed, 1); testShuffle(0, 4, true, seed, 2); testShuffle(0, 10, true, seed, 5); testShuffle(0, 7, true, seed, 3); testShuffle(0, 10, true, seed, 7); testShuffle(0, 10, true, seed, 11); testShuffle(0, 10, true, seed, 15); testShuffle(0, 100, true, seed, 11); } @Test public void testShuffleMedRange() { testShuffle(512, 10543, false, 13, 7); testShuffle(512, 10543, false, 13, 7, 27, 140); } @Test public void testShuffleLargeRange() { testShuffle(12345, 123456, false, 13, 7, 27, 140); } /** * Check that result is a valid permutation (i.e. a 1->1 mapping) * @param minId * @param maxId * @param params */ public static void testShuffle(int minId, int maxId, boolean print, long... params) { String shuffleDesc = String.format( "Permuting range [%d,%d) with params %s", minId, maxId, Arrays.toString(params)); if (print) { System.err.println(shuffleDesc); } int n = maxId - minId; //ProbDistShuffler shuf = new ProbDistShuffler(params[0], (int)params[1], n); InvertibleShuffler shuf = new InvertibleShuffler(params[0], (int)params[1], n); long reverse[] = new long[n]; // Store the reverse permutation // Store if ID has appeared (inited to false) boolean exists[] = new boolean[n]; for (int i = minId; i < maxId; i++) { //long lj = Shuffler.getPermutationValue(i, minId, maxId, params); long lj = minId + shuf.permute(i - minId); assertEquals(i, minId + shuf.invertPermute(lj - minId)); if (lj < minId || lj >= maxId) { fail(String.format("Error with test %s, permutation result p(%d) = %d" + " out of range [%d, %d)", shuffleDesc, i, lj, minId, maxId)); } assertTrue(lj >= minId); assertTrue(lj < maxId); int j = (int)lj; // Must be in integer range if (exists[j - minId]) { fail(String.format( "Error with test %s: collision. p(%d) = p(%d) = %d", shuffleDesc, i, reverse[j - minId], j)); } reverse[j - minId] = i; exists[j - minId] = true; if (print) { System.err.print(" " + j); } } if (print) { System.err.println(); } /* If we made it to here there were no collisions and we know all * n ids appeared in the permutation */ } } ================================================ FILE: src/test/java/com/facebook/LinkBench/LinkStoreTestBase.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.Arrays; import java.util.Properties; import java.util.Random; import java.util.concurrent.BlockingQueue; import java.util.concurrent.LinkedBlockingQueue; import junit.framework.TestCase; import org.apache.log4j.Logger; import org.junit.Test; import com.facebook.LinkBench.LinkBenchLoad.LoadChunk; import com.facebook.LinkBench.LinkBenchLoad.LoadProgress; import com.facebook.LinkBench.LinkBenchRequest.RequestProgress; import com.facebook.LinkBench.distributions.AccessDistributions.AccessDistMode; import com.facebook.LinkBench.distributions.GeometricDistribution; import com.facebook.LinkBench.distributions.ZipfDistribution; import com.facebook.LinkBench.distributions.LinkDistributions.LinkDistMode; import com.facebook.LinkBench.distributions.UniformDistribution; import com.facebook.LinkBench.generators.UniformDataGenerator; import com.facebook.LinkBench.stats.LatencyStats; /** * This test implements unit tests that *all* implementations of LinkStore * should pass. * * Different implementations of LinkStore will require different configuration * and different setups for testing, so in order to test out a particular * LinkStore implementation, you can subclass this test and implement the * required abstract methods so that the test store is initialized correctly * and all required configuration properties are filled in. * * @author tarmstrong */ public abstract class LinkStoreTestBase extends TestCase { protected String testDB = "linkbench_unittestdb"; private Logger logger = Logger.getLogger(""); /** * Reinitialize link store database properties. * Should attempt to clean database * @param props Properties for test DB. * Override any required properties in this property dict */ protected abstract void initStore(Properties props) throws IOException, Exception; /** * Override to vary size of test * @return number of ids to use in testing */ protected long getIDCount() { return 50000; } /** * Override to vary number of requests in test */ protected int getRequestCount() { return 100000; } /** * Override to vary maximum number of threads */ protected int maxConcurrentThreads() { return Integer.MAX_VALUE; } /** Get a new handle to the initialized store, wrapped in * DummyLinkStore * @return new handle to linkstore */ protected abstract DummyLinkStore getStoreHandle(boolean initialized) throws IOException, Exception; @Override protected void setUp() throws Exception { super.setUp(); initStore(basicProps()); } /** * Provide properties for basic test store * @return */ protected Properties basicProps() { Properties props = new Properties(); props.setProperty(Config.DBID, testDB); return props; } public static void fillLoadProps(Properties props, long startId, long idCount, int linksPerId) { props.setProperty(Config.MIN_ID,Long.toString(startId)); props.setProperty(Config.MAX_ID, Long.toString(startId + idCount)); props.setProperty(Config.RANDOM_ID2_MAX, "0"); // Fixed number of rows props.setProperty(Config.NLINKS_FUNC, LinkDistMode.CONST.name()); props.setProperty(Config.NLINKS_CONFIG, "0"); // ignored props.setProperty(Config.NLINKS_DEFAULT, Integer.toString(linksPerId)); props.setProperty(Config.DISPLAY_FREQ, "10"); // Show stats frequently props.setProperty(Config.MAX_STAT_SAMPLES, "10000"); props.setProperty(Config.LINK_DATASIZE, "100.0"); props.setProperty(Config.LINK_ADD_DATAGEN, UniformDataGenerator.class.getName()); props.setProperty(Config.LINK_ADD_DATAGEN_PREFIX + Config.UNIFORM_GEN_STARTBYTE, "0"); props.setProperty(Config.LINK_ADD_DATAGEN_PREFIX + Config.UNIFORM_GEN_ENDBYTE, "255"); } public static void fillReqProps(Properties props, long startId, long idCount, int requests, long timeLimit, double p_addlink, double p_deletelink, double p_updatelink, double p_countlink, double p_getlink, double p_getlinklist, boolean enableMultiget) { props.setProperty(Config.MIN_ID,Long.toString(startId)); props.setProperty(Config.MAX_ID, Long.toString(startId + idCount)); props.setProperty(Config.NUM_REQUESTS, Long.toString(requests)); props.setProperty(Config.MAX_TIME, Long.toString(timeLimit)); props.setProperty(Config.RANDOM_ID2_MAX, "0"); props.setProperty(Config.ID2GEN_CONFIG, "0"); props.setProperty(Config.PR_ADD_LINK, Double.toString(p_addlink)); props.setProperty(Config.PR_DELETE_LINK, Double.toString(p_deletelink)); props.setProperty(Config.PR_UPDATE_LINK, Double.toString(p_updatelink)); props.setProperty(Config.PR_COUNT_LINKS, Double.toString(p_countlink)); props.setProperty(Config.PR_GET_LINK, Double.toString(p_getlink)); props.setProperty(Config.PR_GET_LINK_LIST, Double.toString(p_getlinklist)); props.setProperty(Config.WRITE_FUNCTION, UniformDistribution.class.getName()); props.setProperty(Config.READ_FUNCTION, AccessDistMode.RECIPROCAL.name()); props.setProperty(Config.READ_CONFIG, "0"); // Test blending on reads props.setProperty(Config.READ_UNCORR_BLEND, "0.5"); props.setProperty(Config.READ_UNCORR_FUNCTION, ZipfDistribution.class.getName()); props.setProperty(Config.READ_UNCORR_CONFIG_PREFIX + "shape", "0.5"); if (enableMultiget) { props.setProperty(Config.LINK_MULTIGET_DIST, GeometricDistribution.class.getName()); props.setProperty(Config.LINK_MULTIGET_DIST_MIN, "0"); props.setProperty(Config.LINK_MULTIGET_DIST_MAX, "10"); props.setProperty(Config.LINK_MULTIGET_DIST_PREFIX + GeometricDistribution.PROB_PARAM_KEY, "0.8"); } props.setProperty(Config.LINK_DATASIZE, "200"); props.setProperty(Config.LINK_ADD_DATAGEN, UniformDataGenerator.class.getName()); props.setProperty(Config.LINK_ADD_DATAGEN_PREFIX + Config.UNIFORM_GEN_STARTBYTE, "0"); props.setProperty(Config.LINK_ADD_DATAGEN_PREFIX + Config.UNIFORM_GEN_ENDBYTE, "255"); props.setProperty(Config.LINK_UP_DATAGEN, UniformDataGenerator.class.getName()); props.setProperty(Config.LINK_UP_DATAGEN_PREFIX + Config.UNIFORM_GEN_STARTBYTE, "0"); props.setProperty(Config.LINK_UP_DATAGEN_PREFIX + Config.UNIFORM_GEN_ENDBYTE, "255"); } /** * Utility to create a random number generator and print * the seed for later reproducibility of test failures * @return */ static Random createRNG() { long randSeed = System.currentTimeMillis(); System.out.println("Random seed: " + randSeed); Random rng = new Random(randSeed); return rng; } /** Simple test with multiple operations on single link */ @Test public void testOneLink() throws IOException, Exception { DummyLinkStore store = getStoreHandle(true); long id1 = 1123, id2 = 1124, ltype = 321; Link writtenLink = new Link(id1, ltype, id2, LinkStore.VISIBILITY_DEFAULT, new byte[] {0x1}, 1, 1994); store.addLink(testDB, writtenLink, true); if (store.isRealLinkStore()) { Link readBack = store.getLink(testDB, id1, ltype, id2); assertNotNull(readBack); if (!writtenLink.equals(readBack)) { throw new Exception("Expected " + readBack.toString() + " to equal " + writtenLink.toString()); } assertEquals(1, store.countLinks(testDB, id1, ltype)); } // Try expunge store.deleteLink(testDB, id1, ltype, id2, true, true); assertNull(store.getLink(testDB, id1, ltype, id2)); assertNull(store.getLinkList(testDB, id1, ltype)); assertEquals(0, store.countLinks(testDB, id1, ltype)); store.addLink(testDB, writtenLink, true); if (store.isRealLinkStore()) { assertNotNull(store.getLink(testDB, id1, ltype, id2)); assertEquals(1, store.countLinks(testDB, id1, ltype)); } // try hiding store.deleteLink(testDB, id1, ltype, id2, true, false); if (store.isRealLinkStore()) { Link hidden = store.getLink(testDB, id1, ltype, id2); assertNotNull(hidden); assertEquals(LinkStore.VISIBILITY_HIDDEN, hidden.visibility); // Check it is same up to visibility Link check = hidden.clone(); check.visibility = LinkStore.VISIBILITY_DEFAULT; assertTrue(writtenLink.equals(check)); assertEquals(0, store.countLinks(testDB, id1, ltype)); assertNull(store.getLinkList(testDB, id1, ltype)); } // Update link: check it is unhidden store.updateLink(testDB, writtenLink, true); if (store.isRealLinkStore()) { assertTrue(writtenLink.equals(store.getLink(testDB, id1, ltype, id2))); assertEquals(1, store.countLinks(testDB, id1, ltype)); Link links[] = store.getLinkList(testDB, id1, ltype); assertEquals(1, links.length); assertTrue(writtenLink.equals(links[0])); } // Update link but don't change, check nothing changes store.updateLink(testDB, writtenLink, true); if (store.isRealLinkStore()) { assertTrue(writtenLink.equals(store.getLink(testDB, id1, ltype, id2))); assertEquals(1, store.countLinks(testDB, id1, ltype)); Link links[] = store.getLinkList(testDB, id1, ltype); assertEquals(1, links.length); assertTrue(writtenLink.equals(links[0])); } store.deleteLink(testDB, id1, ltype, id2, true, true); } @Test public void testMultipleLinks() throws Exception, IOException { DummyLinkStore store = getStoreHandle(true); long ida = 5434, idb = 5435, idc = 9999, idd = 9998; long ltypea = 1, ltypeb = 2; byte data[] = new byte[] {0xf, 0xa, 0xc, 0xe, 0xb, 0x0, 0x0, 0xc}; long t = 10000000; Link links[] = new Link[] { new Link(ida, ltypea, idc, LinkStore.VISIBILITY_DEFAULT, data, 1, System.currentTimeMillis()), new Link(ida, ltypeb, idc, LinkStore.VISIBILITY_DEFAULT, data, 1, System.currentTimeMillis()), new Link(idb, ltypeb, ida, LinkStore.VISIBILITY_DEFAULT, data, 1, t + 1), new Link(idb, ltypeb, idb, LinkStore.VISIBILITY_DEFAULT, data, 1, t), new Link(idb, ltypeb, idc, LinkStore.VISIBILITY_HIDDEN, data, 1, t - 2), new Link(idb, ltypeb, idd, LinkStore.VISIBILITY_DEFAULT, data, 1, t + 3), }; for (Link l: links) { store.addLink(testDB, l, true); } if (store.isRealLinkStore()) { // Check counts assertEquals(1, store.countLinks(testDB, ida, ltypea)); assertEquals(1, store.countLinks(testDB, ida, ltypeb)); assertEquals(0, store.countLinks(testDB, idb, ltypea)); assertEquals(3, store.countLinks(testDB, idb, ltypeb)); Link retrieved[]; retrieved = store.getLinkList(testDB, ida, ltypea); assertEquals(1, retrieved.length); assertTrue(links[0].equals(retrieved[0])); retrieved = store.getLinkList(testDB, ida, ltypeb); assertEquals(1, retrieved.length); assertTrue(links[1].equals(retrieved[0])); retrieved = store.getLinkList(testDB, idb, ltypeb); // Check link list, Four matching links, one hidden checkExpectedList(store, idb, ltypeb, links[5], links[2], links[3]); // Check limit retrieved = store.getLinkList(testDB, idb, ltypeb, 0, t + 100, 0, 1); assertEquals(1, retrieved.length); assertTrue(links[5].equals(retrieved[0])); //Check offset + limit retrieved = store.getLinkList(testDB, idb, ltypeb, 0, t + 100, 1, 2); assertEquals(2, retrieved.length); assertTrue(links[2].equals(retrieved[0])); assertTrue(links[3].equals(retrieved[1])); // Check range filtering retrieved = store.getLinkList(testDB, idb, ltypeb, t + 1, t + 2, 0, Integer.MAX_VALUE); assertEquals(1, retrieved.length); assertTrue(links[2].equals(retrieved[0])); } } /** * Simple test to make sure multiget works * @throws IOException * @throws Exception */ @Test public void testMultiget() throws IOException, Exception { DummyLinkStore store = getStoreHandle(true); long id1 = 99999999999L; Link a = new Link(id1, LinkStore.DEFAULT_LINK_TYPE, 42, LinkStore.VISIBILITY_DEFAULT, new byte[0], 1, System.currentTimeMillis()); Link b = a.clone(); b.id2 = 43; store.addLink(testDB, a, true); store.addLink(testDB, b, true); // Retrieve the two added links Link l[] = store.multigetLinks(testDB, a.id1, a.link_type, new long[] {a.id2, b.id2, 1234}); if (store.isRealLinkStore()) { assertEquals(2, l.length); // Could be returned in either order if (a.equals(l[0])) { assertTrue(b.equals(l[1])); } else { assertTrue(b.equals(l[0])); assertTrue(a.equals(l[1])); } } } /** * Regression test for flaw in MySql where visibility is assumed to * be default on add */ @Test public void testHiding() throws Exception { DummyLinkStore store = getStoreHandle(true); Link l = new Link(1, 1, 1, LinkStore.VISIBILITY_HIDDEN, new byte[] {0x1}, 1, System.currentTimeMillis()); store.addLink(testDB, l, true); checkExpectedList(store, 1, 1, new Link[0]); // Check that updating works right store.deleteLink(testDB, 1, 1, 1, true, false); checkExpectedList(store, 1, 1, new Link[0]); // Make it visible l.visibility = LinkStore.VISIBILITY_DEFAULT; store.addLink(testDB, l, true); checkExpectedList(store, 1, 1, l); // Expunge store.deleteLink(testDB, 1, 1, 1, true, true); checkExpectedList(store, 1, 1, new Link[0]); } /** * Test that all fields are updated correctly on update * @throws Exception * @throws IOException */ @Test public void testOverwrite() throws IOException, Exception { long id1 = 314214212421L; Link orig = new Link(id1, 1, 1, LinkStore.VISIBILITY_DEFAULT, new byte[] {'1','1','1'}, 0, 1); Link changed = orig.clone(); changed.data = new byte[] {'2', '2', '2'}; changed.version = 1; changed.time = 2; DummyLinkStore store = getStoreHandle(true); store.addLink(testDB, orig, true); // Check added ok Link tmp = store.getLink(testDB, orig.id1, orig.link_type, orig.id2); if (store.isRealLinkStore()) { assertTrue(orig.equals(tmp)); assertEquals(1, store.countLinks(testDB, orig.id1, orig.link_type)); } // Overwrite, then check update worked for all fields store.addLink(testDB, changed, true); tmp = store.getLink(testDB, orig.id1, orig.link_type, orig.id2); if (store.isRealLinkStore()) { assertTrue(changed.equals(tmp)); assertEquals(1, store.countLinks(testDB, orig.id1, orig.link_type)); } // Add hidden link, check update happened Link hidden = orig.clone(); hidden.visibility = LinkStore.VISIBILITY_HIDDEN; store.addLink(testDB, hidden, true); tmp = store.getLink(testDB, orig.id1, orig.link_type, orig.id2); if (store.isRealLinkStore()) { assertTrue(hidden.equals(tmp)); assertEquals(0, store.countLinks(testDB, orig.id1, orig.link_type)); } } /** * Regression test for bad handling of string escaping */ @Test public void testSqlInjection() throws IOException, Exception { Link l = new Link(1, 1, 1, LinkStore.VISIBILITY_DEFAULT, "' asdfasdf".getBytes(), 1, 1); byte updateData[] = "';\\".getBytes(); testAddThenUpdate(l, updateData); } private void testAddThenUpdate(Link l, byte[] updateData) throws IOException, Exception { DummyLinkStore ls = getStoreHandle(true); ls.addLink(testDB, l, true); Link l2 = ls.getLink(testDB, 1, 1, 1); if (ls.isRealLinkStore()) { assertNotNull(l2); assertTrue(l.equals(l2)); } l.data = updateData; ls.updateLink(testDB, l, true); l2 = ls.getLink(testDB, 1, 1, 1); if (ls.isRealLinkStore()) { assertNotNull(l2); assertTrue(l.equals(l2)); } } /** Check handling of bytes 0-127 */ @Test public void testBinary1() throws IOException, Exception { binaryDataTest(0, 128); } /** Check handling of bytes 160-256 */ @Test public void testBinary2() throws IOException, Exception { int start = 160; binaryDataTest(start, 256-start); } /** Check handling of bytes 128-159 */ @Test public void testBinary3() throws IOException, Exception { int start = 128; binaryDataTest(start, 159-start); } /** * Test insertion/update of binary data: insert binary string with * bytes [startByte:startByte + dataMaxSize) and read back * @throws IOException * @throws Exception */ private void binaryDataTest(int startByte, int dataMaxSize) throws IOException, Exception { byte data[] = new byte[dataMaxSize]; for (int i = 0; i < data.length; i++) { byte b = (byte)((i + startByte) % 256); data[i] = b; } Link l = new Link(1, 1, 1, LinkStore.VISIBILITY_DEFAULT, data, 1, 1); // Different length and data byte updateData[] = new byte[dataMaxSize/2]; for (int i = 0; i < updateData.length; i++) { updateData[i] = (byte)((i + startByte ) % 256); } testAddThenUpdate(l, updateData); } /** * Generic test for a loader using a wrapped LinkStore * implementation * @throws Exception * @throws IOException */ @Test public void testLoader() throws IOException, Exception { long startId = 1; long idCount = getIDCount(); int linksPerId = 3; Properties props = basicProps(); fillLoadProps(props, startId, idCount, linksPerId); initStore(props); DummyLinkStore store = getStoreHandle(false); try { Random rng = createRNG(); serialLoad(rng, logger, props, store); long testEndTime = System.currentTimeMillis(); assertFalse(store.initialized); // Check was closed /* Validate results */ if (store.bulkLoadBatchSize() > 0) { assertEquals(idCount, store.bulkLoadCountRows); } assertEquals(idCount * linksPerId, store.bulkLoadLinkRows + store.adds); if (store.isRealLinkStore()) { // old store was closed by loader store.initialize(props, Phase.REQUEST, 0); // read back data and sanity check validateLoadedData(logger, store, startId, idCount, linksPerId, testEndTime); } } finally { if (!store.initialized) { store.initialize(props, Phase.REQUEST, 0); } deleteIDRange(testDB, store, startId, idCount); } } /** * Run the requester against * This test validates both the requester (by looking at counts to make * sure it at least did the right number of ops) and the LinkStore * (by stress-testing it). * @throws Exception * @throws IOException */ @Test public void testRequester() throws IOException, Exception { long startId = 532; long idCount = getIDCount(); int linksPerId = 5; int requests = getRequestCount(); long timeLimit = requests; Properties props = basicProps(); fillLoadProps(props, startId, idCount, linksPerId); double p_add = 0.2, p_del = 0.2, p_up = 0.1, p_count = 0.1, p_multiget = 0.2, p_getlinks = 0.2; fillReqProps(props, startId, idCount, requests, timeLimit, p_add * 100, p_del * 100, p_up * 100, p_count * 100, p_multiget * 100, p_getlinks * 100, true); try { Random rng = createRNG(); serialLoad(rng, logger, props, getStoreHandle(false)); DummyLinkStore reqStore = getStoreHandle(false); LatencyStats latencyStats = new LatencyStats(1); RequestProgress tracker = new RequestProgress(logger, requests, timeLimit, 0, 1000); LinkBenchRequest requester = new LinkBenchRequest(reqStore, null, props, latencyStats, System.out, tracker, rng, 0, 1); tracker.startTimer(); requester.run(); latencyStats.displayLatencyStats(); latencyStats.printCSVStats(System.out, true); assertEquals(requests, reqStore.adds + reqStore.updates + reqStore.deletes + reqStore.countLinks + reqStore.multigetLinks + reqStore.getLinkLists); // Check that the proportion of operations is roughly right - within 1% // For now, updates are actually implemented as add operations assertTrue(Math.abs(reqStore.adds / (double)requests - (p_add + p_up)) < 0.01); assertTrue(Math.abs(reqStore.updates / (double)requests - 0.0) < 0.01); assertTrue(Math.abs(reqStore.deletes / (double)requests - p_del) < 0.01); assertTrue(Math.abs(reqStore.countLinks / (double)requests - p_count) < 0.01); assertTrue(Math.abs(reqStore.multigetLinks / (double)requests - p_multiget) < 0.01); assertTrue(Math.abs(reqStore.getLinkLists / (double)requests - p_getlinks) < 0.01); assertEquals(0, reqStore.bulkLoadCountOps); assertEquals(0, reqStore.bulkLoadLinkOps); } finally { deleteIDRange(testDB, getStoreHandle(true), startId, idCount); } System.err.println("Done!"); } /** * Test that the requester throttling slows down requests * @throws Exception * @throws IOException */ @Test public void testRequesterThrottling() throws IOException, Exception { long startId = 1000000; // Small test long idCount = getIDCount() / 10; int linksPerId = 3; Properties props = basicProps(); int requests = 2000; long timeLimit = requests; int requestsPerSec = 500; // Limit to fairly low rate fillLoadProps(props, startId, idCount, linksPerId); fillReqProps(props, startId, idCount, requests, timeLimit, 20, 20, 10, 10, 20, 20, false); props.setProperty("requestrate", Integer.toString(requestsPerSec)); try { Random rng = createRNG(); serialLoad(rng, logger, props, getStoreHandle(false)); RequestProgress tracker = new RequestProgress(logger, requests, timeLimit, 2, 1000); DummyLinkStore reqStore = getStoreHandle(false); LinkBenchRequest requester = new LinkBenchRequest(reqStore, null, props, new LatencyStats(1), System.out, tracker, rng, 0, 1); long startTime = System.currentTimeMillis(); tracker.startTimer(); requester.run(); long endTime = System.currentTimeMillis(); assertEquals(requests, reqStore.adds + reqStore.updates + reqStore.deletes + reqStore.countLinks + reqStore.multigetLinks + reqStore.getLinkLists); double actualArrivalRate = 1000 * requests / (double)(endTime - startTime); System.err.println("Expected request rate: " + requestsPerSec + " actual request rate: " + actualArrivalRate); // Check that it isn't more that 5% faster than expected average assertTrue("arrival rate within 10% of expected", actualArrivalRate <= 1.1 * requestsPerSec); } finally { deleteIDRange(testDB, getStoreHandle(true), startId, idCount); } System.err.println("Done!"); } /** * Check that the get link list history requests occur */ @Test public void testHistoryRequests() throws Exception { long startId = 1000000; // Few ids with many links long idCount = 10; int rangeLimit = 10; int linksPerId = (int) (rangeLimit * 20); Properties props = basicProps(); double pHistory = 0.25; // Quarter history requests int requests = 50000; // enough requests that we should get 20%+ history // queries with something in cache. Many requests to // ensure we cycle through lists multiple times long timeLimit = requests; fillLoadProps(props, startId, idCount, linksPerId); fillReqProps(props, startId, idCount, requests, timeLimit, 0, 0, 0, 0, 0, 100, false); // Use uniform distribution to make sure we get lots of lists in history props.setProperty(Config.READ_FUNCTION, UniformDistribution.class.getName()); // Test blending on reads props.setProperty(Config.READ_UNCORR_BLEND, "0.0"); props.setProperty(Config.PR_GETLINKLIST_HISTORY, Double.toString( pHistory * 100)); try { Random rng = createRNG(); serialLoad(rng, logger, props, getStoreHandle(false)); RequestProgress tracker = new RequestProgress(logger, requests, timeLimit, 0, 1000); DummyLinkStore reqStore = getStoreHandle(false); reqStore.setRangeLimit(rangeLimit); // Small limit for testing LatencyStats latencyStats = new LatencyStats(1); LinkBenchRequest requester = new LinkBenchRequest(reqStore, null, props, latencyStats, System.out, tracker, rng, 0, 1); tracker.startTimer(); requester.run(); latencyStats.displayLatencyStats(); assertEquals(requests, reqStore.getLinkLists); double actualPHistory = reqStore.getLinkListsHistory / (double) reqStore.getLinkLists; System.err.println("# getLinkLists: " + reqStore.getLinkLists + " # getLinkLists for history: " + reqStore.getLinkListsHistory + " " + (actualPHistory * 100) + "%"); if (reqStore.isRealLinkStore()) { assertTrue(actualPHistory <= 1.05 * pHistory); // Can be substantially lower due to history cache being empty assertTrue(actualPHistory >= 0.75 * pHistory); } } finally { deleteIDRange(testDB, getStoreHandle(true), startId, idCount); } } private void checkExpectedList(DummyLinkStore store, long id1, long ltype, Link... expected) throws Exception { if (!store.isRealLinkStore()) return; assertEquals(expected.length, store.countLinks(testDB, id1, ltype)); Link actual[] = store.getLinkList(testDB, id1, ltype); if (expected.length == 0) { assertNull(actual); } else { assertEquals(expected.length, actual.length); for (int i = 0; i < expected.length; i++) { if (!expected[i].equals(actual[i])) { fail("Mismatch between result lists. Expected: " + Arrays.toString(expected) + " Actual: " + Arrays.toString(actual)); } } } } /** * Use the LinkBenchLoad class to do a serial load of data * @param logger * @param props * @param store * @param idCount * @throws IOException * @throws Exception */ static void serialLoad(Random rng, Logger logger, Properties props, DummyLinkStore store) throws IOException, Exception { LatencyStats latencyStats = new LatencyStats(1); /* Load up queue with work */ BlockingQueue chunk_q = new LinkedBlockingQueue(); long startId = ConfigUtil.getLong(props, Config.MIN_ID); long idCount = ConfigUtil.getLong(props, Config.MAX_ID) - startId; int chunkSize = 128; int seq = 0; for (long i = startId; i < startId + idCount; i+= chunkSize) { LoadChunk chunk = new LoadChunk(seq, i, Math.min(idCount + startId, i + chunkSize), rng); chunk_q.add(chunk); seq++; } chunk_q.add(LoadChunk.SHUTDOWN); LoadProgress tracker = new LoadProgress(logger, idCount, 1000); tracker.startTimer(); LinkBenchLoad loader = new LinkBenchLoad(store, props, latencyStats, System.out, 0, false, chunk_q, tracker); /* Run the loading process */ loader.run(); logger.info("Loaded " + (store.adds + store.bulkLoadLinkRows) + " links. " + store.adds + " individually " + " and " + store.bulkLoadLinkRows + " in rows"); } private void validateLoadedData(Logger logger, DummyLinkStore wrappedStore, long startId, long idCount, int linksPerId, long maxTimestamp) throws Exception { for (long i = startId; i < startId + idCount; i++) { assertEquals(wrappedStore.countLinks(testDB, i, LinkStore.DEFAULT_LINK_TYPE), linksPerId); Link links[] = wrappedStore.getLinkList(testDB, i, LinkStore.DEFAULT_LINK_TYPE); if (linksPerId == 0) { assertTrue(links == null); } else { assertEquals(links.length, linksPerId); long lastTimestamp = Long.MAX_VALUE; for (Link l: links) { assertEquals(l.id1, i); assertEquals(l.link_type, LinkStore.DEFAULT_LINK_TYPE); assertEquals(l.visibility, LinkStore.VISIBILITY_DEFAULT); // Check timestamp correc if (l.time > maxTimestamp) { System.err.println(l.time + ", " + maxTimestamp); } assertTrue(l.time <= maxTimestamp); // Check descending assertTrue(lastTimestamp >= l.time); lastTimestamp = l.time; } } } logger.info("Successfully sanity checked data for " + idCount + " ids"); } static void deleteIDRange(String testDB, DummyLinkStore store, long startId, long idCount) throws Exception { // attempt to delete data for (long i = startId; i < startId + idCount; i++) { Link links[] = store.getLinkList(testDB, i, LinkStore.DEFAULT_LINK_TYPE); if (links != null) { for (Link l: links) { assert(l != null); store.deleteLink(testDB, l.id1, l.link_type, l.id2, true, true); } } } } } ================================================ FILE: src/test/java/com/facebook/LinkBench/LogNormalTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Properties; import com.facebook.LinkBench.distributions.LogNormalDistribution; import com.facebook.LinkBench.distributions.ProbabilityDistribution; public class LogNormalTest extends DistributionTestBase { @Override protected ProbabilityDistribution getDist() { return new LogNormalDistribution(); } @Override protected Properties getDistParams() { Properties props = new Properties(); props.setProperty(LogNormalDistribution.CONFIG_SIGMA, "1"); props.setProperty(LogNormalDistribution.CONFIG_MEDIAN, "5000"); return props; } @Override protected double tolerance() { return 0.05; } /** * Sanity check values */ public void testLogNormal() { LogNormalDistribution d = new LogNormalDistribution(); int median = 10; d.init(0, 100, median, 1); // CDF of median should be 0.5 by def. assertEquals(0.5, d.cdf(median), 0.01); // Precomputed points d.init(0, 1000, 100, 1); assertEquals(0.033434, d.cdf(16), 0.0001); assertEquals(0.327695, d.cdf(64), 0.0001); assertEquals(0.597491, d.cdf(128), 0.0001); assertEquals(0.94878, d.cdf(512), 0.0001); } @Override public void testPDFSanity() { System.err.println("test not implemented"); } @Override public void testPDFSum() { System.err.println("test not implemented"); } @Override public void testCDFPDFConsistency() { System.err.println("test not implemented"); } @Override public void testQuantileSanity() { System.err.println("test not implemented"); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/MemoryGraphStoreTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.Properties; public class MemoryGraphStoreTest extends GraphStoreTestBase { MemoryLinkStore store; @Override protected void initStore(Properties props) throws IOException, Exception { store = new MemoryLinkStore(); } @Override protected DummyLinkStore getStoreHandle(boolean initialized) throws IOException, Exception { return new DummyLinkStore(store.newHandle(), initialized); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/MemoryLinkStoreTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.Properties; public class MemoryLinkStoreTest extends LinkStoreTestBase { MemoryLinkStore store; @Override public void setUp() throws Exception { super.setUp(); } @Override protected Properties basicProps() { Properties props = super.basicProps(); props.setProperty(Config.LINKSTORE_CLASS, MemoryLinkStore.class.getName()); return props; } @Override protected void initStore(Properties props) throws IOException, Exception { store = new MemoryLinkStore(); } @Override protected DummyLinkStore getStoreHandle(boolean initialized) { // Return a new memory link store handle. The underlying link store doesn't // need to be initialized, so just set wrapper to correct init status return new DummyLinkStore(store.newHandle(), initialized); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/MemoryNodeStoreTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.Properties; public class MemoryNodeStoreTest extends NodeStoreTestBase { MemoryLinkStore store; @Override protected void initNodeStore(Properties props) throws Exception, IOException { store = new MemoryLinkStore(); store.initialize(props, Phase.REQUEST, 0); } @Override protected NodeStore getNodeStoreHandle(boolean initialized) throws Exception, IOException { return new DummyLinkStore(store.newHandle(), initialized); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/MySqlGraphStoreTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.sql.Connection; import java.util.Properties; import org.junit.experimental.categories.Category; import com.facebook.LinkBench.testtypes.MySqlTest; @Category(MySqlTest.class) public class MySqlGraphStoreTest extends GraphStoreTestBase { private Properties props; private Connection conn; @Override protected void initStore(Properties props) throws IOException, Exception { this.props = props; this.conn = MySqlTestConfig.createConnection(testDB); MySqlTestConfig.dropTestTables(conn, testDB); MySqlTestConfig.createTestTables(conn, testDB); } @Override protected long getIDCount() { // Make quicker return 500; } @Override protected int getRequestCount() { // Make quicker, enough requests that we can reasonably check // that operation percentages are about about right return 10000; } @Override protected void tearDown() throws Exception { super.tearDown(); MySqlTestConfig.dropTestTables(conn, testDB); } @Override protected Properties basicProps() { Properties props = super.basicProps(); MySqlTestConfig.fillMySqlTestServerProps(props); return props; } @Override protected DummyLinkStore getStoreHandle(boolean initialize) throws IOException, Exception { DummyLinkStore result = new DummyLinkStore(new LinkStoreMysql()); if (initialize) { result.initialize(props, Phase.REQUEST, 0); } return result; } } ================================================ FILE: src/test/java/com/facebook/LinkBench/MySqlLinkStoreTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.sql.Connection; import java.util.Properties; import org.junit.experimental.categories.Category; import com.facebook.LinkBench.testtypes.MySqlTest; /** * Test the MySQL LinkStore implementation. * * Assumes that the database specified by the testDB field has been created * with permissions for a user/pass linkbench/linkbench to create tables, select, * insert, delete, etc. */ @Category(MySqlTest.class) public class MySqlLinkStoreTest extends LinkStoreTestBase { private Connection conn; /** Properties for last initStore call */ private Properties currProps; @Override protected long getIDCount() { // Make test smaller so that it doesn't take too long return 5000; } @Override protected int getRequestCount() { // Fewer requests to keep test quick return 10000; } protected Properties basicProps() { Properties props = super.basicProps(); MySqlTestConfig.fillMySqlTestServerProps(props); return props; } @Override protected void initStore(Properties props) throws IOException, Exception { this.currProps = (Properties)props.clone(); if (conn != null) { conn.close(); } conn = MySqlTestConfig.createConnection(testDB); MySqlTestConfig.dropTestTables(conn, testDB); MySqlTestConfig.createTestTables(conn, testDB); } @Override public DummyLinkStore getStoreHandle(boolean initialize) throws IOException, Exception { DummyLinkStore result = new DummyLinkStore(new LinkStoreMysql()); if (initialize) { result.initialize(currProps, Phase.REQUEST, 0); } return result; } @Override protected void tearDown() throws Exception { super.tearDown(); MySqlTestConfig.dropTestTables(conn, testDB); conn.close(); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/MySqlNodeStoreTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.sql.Connection; import java.util.Properties; import org.junit.experimental.categories.Category; import com.facebook.LinkBench.testtypes.MySqlTest; @Category(MySqlTest.class) public class MySqlNodeStoreTest extends NodeStoreTestBase { Connection conn; Properties currProps; @Override protected Properties basicProps() { Properties props = super.basicProps(); MySqlTestConfig.fillMySqlTestServerProps(props); return props; } @Override protected void initNodeStore(Properties props) throws Exception, IOException { currProps = props; conn = MySqlTestConfig.createConnection(testDB); MySqlTestConfig.dropTestTables(conn, testDB); MySqlTestConfig.createTestTables(conn, testDB); } @Override protected NodeStore getNodeStoreHandle(boolean initialize) throws Exception, IOException { DummyLinkStore result = new DummyLinkStore(new LinkStoreMysql()); if (initialize) { result.initialize(currProps, Phase.REQUEST, 0); } return result; } } ================================================ FILE: src/test/java/com/facebook/LinkBench/MySqlTestConfig.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.sql.Statement; import java.util.Properties; /** * Class containing hardcoded parameters and helper functions used to create * and connect to the unit test database for MySql * @author tarmstrong */ public class MySqlTestConfig { // Hardcoded parameters for now static String host = "localhost"; static int port = 3306; static String user = "linkbench"; static String pass = "linkbench"; static String linktable = "test_linktable"; static String counttable = "test_counttable"; static String nodetable = "test_nodetable"; public static void fillMySqlTestServerProps(Properties props) { props.setProperty(Config.LINKSTORE_CLASS, LinkStoreMysql.class.getName()); props.setProperty(Config.NODESTORE_CLASS, LinkStoreMysql.class.getName()); props.setProperty(LinkStoreMysql.CONFIG_HOST, host); props.setProperty(LinkStoreMysql.CONFIG_PORT, Integer.toString(port)); props.setProperty(LinkStoreMysql.CONFIG_USER, user); props.setProperty(LinkStoreMysql.CONFIG_PASSWORD, pass); props.setProperty(Config.LINK_TABLE, linktable); props.setProperty(Config.COUNT_TABLE, counttable); props.setProperty(Config.NODE_TABLE, nodetable); } static Connection createConnection(String testDB) throws InstantiationException, IllegalAccessException, ClassNotFoundException, SQLException { Class.forName("com.mysql.jdbc.Driver").newInstance(); return DriverManager.getConnection( "jdbc:mysql://"+ MySqlTestConfig.host + ":" + MySqlTestConfig.port + "/" + testDB + "?elideSetAutoCommits=true" + "&useLocalTransactionState=true" + "&allowMultiQueries=true" + "&useLocalSessionState=true", MySqlTestConfig.user, MySqlTestConfig.pass); } static void createTestTables(Connection conn, String testDB) throws SQLException { Statement stmt = conn.createStatement(); stmt.executeUpdate(String.format( "CREATE TABLE `%s`.`%s` (" + "`id1` bigint(20) unsigned NOT NULL DEFAULT '0'," + "`id2` bigint(20) unsigned NOT NULL DEFAULT '0'," + "`link_type` bigint(20) unsigned NOT NULL DEFAULT '0'," + "`visibility` tinyint(3) NOT NULL DEFAULT '0'," + "`data` varchar(255) NOT NULL DEFAULT ''," + "`time` bigint(20) unsigned NOT NULL DEFAULT '0'," + "`version` int(11) unsigned NOT NULL DEFAULT '0'," + "PRIMARY KEY (`id1`,`id2`,`link_type`)," + "KEY `id1_type` (`id1`,`link_type`,`visibility`,`time`,`version`,`data`)" + ") ENGINE=InnoDB DEFAULT CHARSET=latin1;", testDB, MySqlTestConfig.linktable)); stmt.executeUpdate(String.format("CREATE TABLE `%s`.`%s` (" + "`id` bigint(20) unsigned NOT NULL DEFAULT '0'," + "`link_type` bigint(20) unsigned NOT NULL DEFAULT '0'," + "`count` int(10) unsigned NOT NULL DEFAULT '0'," + "`time` bigint(20) unsigned NOT NULL DEFAULT '0'," + "`version` bigint(20) unsigned NOT NULL DEFAULT '0'," + "PRIMARY KEY (`id`,`link_type`)" + ") ENGINE=InnoDB DEFAULT CHARSET=latin1;", testDB, MySqlTestConfig.counttable)); stmt.executeUpdate(String.format( "CREATE TABLE `%s`.`%s` (" + "`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT," + "`type` int(10) unsigned NOT NULL," + "`version` bigint(20) unsigned NOT NULL," + "`time` int(10) unsigned NOT NULL," + "`data` mediumtext NOT NULL," + "primary key(`id`)" + ") ENGINE=InnoDB DEFAULT CHARSET=latin1;", testDB, MySqlTestConfig.nodetable)); } static void dropTestTables(Connection conn, String testDB) throws SQLException { Statement stmt = conn.createStatement(); stmt.executeUpdate(String.format("DROP TABLE IF EXISTS `%s`.`%s`;", testDB, MySqlTestConfig.linktable)); stmt.executeUpdate(String.format("DROP TABLE IF EXISTS `%s`.`%s`;", testDB, MySqlTestConfig.counttable)); stmt.executeUpdate(String.format("DROP TABLE IF EXISTS `%s`.`%s`;", testDB, MySqlTestConfig.nodetable)); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/NodeStoreTestBase.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.IOException; import java.util.Arrays; import java.util.Properties; import junit.framework.TestCase; import org.junit.Test; /** * This test implements unit tests that *all* implementations of NodeStore * should pass. * * Different implementations of NodeStore will require different configuration * and different setups for testing, so in order to test out a particular * NodeStore implementation, you can subclass this test and implement the * required abstract methods so that the test store is initialized correctly * and all required configuration properties are filled in. * * @author tarmstrong */ public abstract class NodeStoreTestBase extends TestCase { protected String testDB = "linkbench_unittestdb"; protected abstract void initNodeStore(Properties props) throws Exception, IOException; protected abstract NodeStore getNodeStoreHandle(boolean initialized) throws Exception, IOException; protected Properties basicProps() { Properties props = new Properties(); props.setProperty(Config.DBID, testDB); return props; } @Override public void setUp() throws Exception { super.setUp(); Properties props = basicProps(); // Set up db initNodeStore(props); getNodeStoreHandle(true).resetNodeStore(testDB, 0); } @Test public void testIDAlloc() throws IOException, Exception { int now = (int)(System.currentTimeMillis()/1000L); NodeStore store = getNodeStoreHandle(true); final int nodeType = 2048; final long initId = 4; // We always start counting from 4 at Facebook store.resetNodeStore(testDB, initId); // Start from clean store byte data[] = new byte[] {0xb, 0xe, 0xa, 0x5, 0x7}; Node test = new Node(-1, nodeType, 1, now, data); long id = store.addNode(testDB, test); // Check id allocated assertEquals("expected first ID allocated after reset", initId, id); assertEquals("addNode should not modify arguments", -1, test.id); test.id = id; // Allocate another id = store.addNode(testDB, test); test.id = id; long secondId = initId + 1; assertEquals("expected second ID allocated after reset", secondId, id); // Check retrieval Node fetched = store.getNode(testDB, nodeType, secondId); assertNotSame("Fetched nodes should not alias", fetched, test); assertEquals("Check fetched node" + fetched + ".equals(" + test + ")", test, fetched); // but should have same data // Check deletion assertTrue(store.deleteNode(testDB, nodeType, secondId)); assertNull(store.getNode(testDB, nodeType, secondId)); // Delete non-existent data assertFalse("Deleting non-existent node should fail", store.deleteNode(testDB, nodeType, 8)); int otherType = nodeType + 1; assertFalse("Node should not be deleted if types don't match", store.deleteNode(testDB, otherType, initId)); // Check reset works right long newInitId = initId - 1; store.resetNodeStore(testDB, newInitId); assertNull("Nodes should be deleted after reset", store.getNode(testDB, nodeType, newInitId)); assertEquals("Correct ID after second reset", newInitId, store.addNode(testDB, test)); assertEquals("Correct ID after second reset", newInitId + 1, store.addNode(testDB, test)); assertNotNull("Added node should exist", store.getNode(testDB, nodeType, newInitId)); assertNotNull("Added node should exist", store.getNode(testDB, nodeType, newInitId + 1)); } @Test public void testUpdate() throws IOException, Exception { NodeStore store = getNodeStoreHandle(true); store.resetNodeStore(testDB, 0); Node test = new Node(-1, 1234, 3, 3, "the quick brown fox".getBytes()); test.id = store.addNode(testDB, test); test.data = "jumped over the lazy dog".getBytes(); assertTrue(store.updateNode(testDB, test)); Node test2 = store.getNode(testDB, test.type, test.id); assertNotNull(test2); assertTrue(test.equals(test2)); } @Test public void testBinary() throws IOException, Exception { byte data[] = new byte[4096]; for (int i = 0; i < data.length; i++) { data[i] = (byte)(i % 256); } NodeStore store = getNodeStoreHandle(true); store.resetNodeStore(testDB, 0); Node test = new Node(-1, 1234, 3, 3, data); test.id = store.addNode(testDB, test); Node test2 = store.getNode(testDB, test.type, test.id); assertNotNull(test2); assertTrue(Arrays.equals(data, test2.data)); byte data2[] = new byte[data.length * 2]; for (int i = 0; i < data2.length; i++) { data2[i] = (byte)((i + 52) % 256); } test.data = data2; assertTrue(store.updateNode(testDB, test)); Node test3 = store.getNode(testDB, test.type, test.id); assertNotNull(test3); assertTrue(Arrays.equals(data2, test3.data)); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/PiecewiseDistTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.ArrayList; import java.util.Properties; import com.facebook.LinkBench.distributions.PiecewiseLinearDistribution; import com.facebook.LinkBench.distributions.PiecewiseLinearDistribution.Point; import com.facebook.LinkBench.distributions.ProbabilityDistribution; public class PiecewiseDistTest extends DistributionTestBase { ArrayList testDistribution = null; @Override public void setUp() throws Exception { super.setUp(); // Make up an arbitrary distribution testDistribution = new ArrayList(); testDistribution.add(new Point(0, 0.1)); testDistribution.add(new Point(1, 0.15)); testDistribution.add(new Point(2, 0.17)); testDistribution.add(new Point(3, 0.20)); testDistribution.add(new Point(4, 0.23)); testDistribution.add(new Point(10, 0.26)); testDistribution.add(new Point(20, 0.4)); testDistribution.add(new Point(30, 0.45)); testDistribution.add(new Point(40, 0.6)); testDistribution.add(new Point(55, 0.64)); testDistribution.add(new Point(70, 0.70)); testDistribution.add(new Point(90, 0.75)); testDistribution.add(new Point(100, 0.82)); testDistribution.add(new Point(110, 0.92)); testDistribution.add(new Point(120, 1.0)); } @Override protected int cdfChecks() { return 50; } @Override protected ProbabilityDistribution getDist() { return new PiecewiseLinearDistribution() { @Override public void init(long min, long max, Properties props, String keyPrefix) { init(min, max, testDistribution); } }; } @Override public void testCDFSanity() { System.err.println("CDF not implemented"); } @Override public void testCDFChooseConsistency() { System.err.println("CDF not implemented"); } @Override public void testCDFPDFConsistency() { System.err.println("CDF not implemented"); } @Override public void testQuantileSanity() { System.err.println("Quantile not implemented"); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/TestAccessDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.File; import java.util.Properties; import java.util.Random; import org.junit.Test; import com.facebook.LinkBench.RealDistribution.DistributionType; import com.facebook.LinkBench.distributions.AccessDistributions.AccessDistMode; import com.facebook.LinkBench.distributions.AccessDistributions.AccessDistribution; import com.facebook.LinkBench.distributions.AccessDistributions.BuiltinAccessDistribution; import com.facebook.LinkBench.distributions.AccessDistributions.ProbAccessDistribution; import com.facebook.LinkBench.distributions.UniformDistribution; import com.facebook.LinkBench.distributions.ZipfDistribution; import junit.framework.AssertionFailedError; import junit.framework.TestCase; public class TestAccessDistribution extends TestCase { @Test public void testMultiple() { testSanityBuiltinDist(AccessDistMode.MULTIPLE, 3); } @Test public void testPerfectPower() { testSanityBuiltinDist(AccessDistMode.PERFECT_POWER, 3); } @Test public void testPower() { testSanityBuiltinDist(AccessDistMode.POWER, 3); } @Test public void testReciprocal() { testSanityBuiltinDist(AccessDistMode.RECIPROCAL, 3); } @Test public void testRoundRobin() { testSanityBuiltinDist(AccessDistMode.ROUND_ROBIN, 0); } @Test public void testUniform() { UniformDistribution u = new UniformDistribution(); Properties props = new Properties(); int min = 100, max = 200; u.init(min, max, props, ""); ProbAccessDistribution unshuffled = new ProbAccessDistribution(u, null); testSanityAccessDist(unshuffled, min, max); ProbAccessDistribution shuffled = new ProbAccessDistribution(u, new InvertibleShuffler(13, 25, max - min)); testSanityAccessDist(shuffled, min, max); } @Test public void testZipf() { ZipfDistribution z = new ZipfDistribution(); Properties props = new Properties(); props.setProperty("shape", "0.5"); int min = 100, max = 200; z.init(min, max, props, ""); ProbAccessDistribution unshuffled = new ProbAccessDistribution(z, null); testSanityAccessDist(unshuffled, min, max); ProbAccessDistribution shuffled = new ProbAccessDistribution(z, new InvertibleShuffler(13, 25, max - min)); testSanityAccessDist(shuffled, min, max); } @Test public void testReal() { RealDistribution r = new RealDistribution(); Properties props = new Properties(); props.setProperty(Config.DISTRIBUTION_DATA_FILE, new File("config/Distribution.dat").getAbsolutePath()); int min = 100, max = 200; r.init(props, min, max, DistributionType.LINK_READS); ProbAccessDistribution unshuffled = new ProbAccessDistribution(r, null); testSanityAccessDist(unshuffled, min, max); ProbAccessDistribution shuffled = new ProbAccessDistribution(r, new InvertibleShuffler(13, 25, max - min)); testSanityAccessDist(shuffled, min, max); } public static void testSanityBuiltinDist(AccessDistMode mode, long config) { long minid = 123; long maxid = 12345; BuiltinAccessDistribution dist = new BuiltinAccessDistribution(mode, minid, maxid, config); testSanityAccessDist(dist, minid, maxid); } /** * Check that results are in range, etc. */ public static void testSanityAccessDist(AccessDistribution dist, long minid, long maxid) { long seed = System.currentTimeMillis(); System.err.println("Using seed " + seed); Random rng = new Random(seed); long id = 1; int trials = 10000; long start = System.currentTimeMillis(); for (int i = 0; i < trials; i++) { id = dist.nextID(rng, id); try { assertTrue(id >= minid); assertTrue(id < maxid); } catch (AssertionFailedError e) { System.err.println("Error: on trial " + i + " id returned: " + id + " not in range [" + minid + "," + maxid + ")"); throw e; } } long end = System.currentTimeMillis(); System.err.println("Took " + (end - start) + " ms for " + trials + " trials"); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/TestDataGen.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Random; import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; import junit.framework.TestCase; import org.junit.Test; import org.junit.experimental.categories.Category; import com.facebook.LinkBench.generators.DataGenerator; import com.facebook.LinkBench.generators.MotifDataGenerator; import com.facebook.LinkBench.generators.UniformDataGenerator; import com.facebook.LinkBench.testtypes.SlowTest; @Category(SlowTest.class) public class TestDataGen extends TestCase { public static void printByteGrid(byte[] data) { for (int i = 0; i < data.length; i += 32) { for (int j = i; j < Math.min(i + 32, data.length); j++) { System.err.format("%3d ", data[j]); } System.err.println(); } } /** * Test how quickly uniform data generator can generate patterns */ @Test public void testTimingUniform() { System.err.println("Testing uniform generator"); System.err.println("========================="); DataGenFactory fact = new DataGenFactory() { public DataGenerator make(double param) { UniformDataGenerator gen = new UniformDataGenerator(); gen.init(0, 8); return gen; } }; testTiming(fact, 128); testTiming(fact, 1024); } /** * Test how quickly motif data generator can generate patterns to make * sure its not */ @Test public void testTimingMotif() { System.err.println("Testing motif generator"); System.err.println("========================="); DataGenFactory fact = new DataGenFactory() { public DataGenerator make(double param) { MotifDataGenerator gen = new MotifDataGenerator(); gen.init(0, 8, param); return gen; } }; testTiming(fact, 128); testTiming(fact, 1024); } private void testTiming(DataGenFactory fact, int bufSize) { byte buf[] = new byte[bufSize]; Random rng = new Random(); int trials = 200000; double params[] = new double[] {0.0, 0.25, 0.5, 0.75, 1.0}; long times_ns[] = new long[params.length]; for (int i = 0; i < params.length; i++) { double param = params[i]; // Warm up doTest(fact, buf, rng, trials, param); // Make sure hotspot will have compiled try { Thread.sleep(50); } catch (InterruptedException e) { e.printStackTrace(); } long timeTaken = doTest(fact, buf, rng, trials, param); times_ns[i] = timeTaken; } for (int i = 0; i < params.length; i++) { double trialTime = times_ns[i] / (double) trials; double byteTime = trialTime / buf.length; System.err.format("uniqueness = %.3f, time for %d byte buffer = %.1f ns, time per byte = %.1fns\n", params[i], buf.length, trialTime, byteTime); } } private static interface DataGenFactory { public abstract DataGenerator make(double param); } private long doTest(DataGenFactory fact, byte[] buf, Random rng, int trials, double param) { DataGenerator gen = fact.make(param); long start = System.nanoTime(); for (int j = 0; j < trials; j++) { gen.fill(rng, buf); } long end = System.nanoTime(); long timeTaken = end - start; return timeTaken; } /** * Exercise the motif data generator and print the output. * * Currently difficult to automatically verify output. */ @Test public void testMotif() { MotifDataGenerator gen = new MotifDataGenerator(); System.err.println("uniqueness 0.25"); gen.init(0, 8, 0.25); byte data[] = gen.fill(new Random(), new byte[64]); printByteGrid(data); System.err.println("uniqueness 0.0"); gen.init(0, 8, 0.0); data = gen.fill(new Random(), new byte[64]); printByteGrid(data); System.err.println("uniqueness 0.05"); gen.init(0, 8, 0.05); data = gen.fill(new Random(), new byte[64]); printByteGrid(data); System.err.println("uniqueness 1.0"); gen.init(0, 8, 1.0); data = gen.fill(new Random(), new byte[64]); printByteGrid(data); } /** * Estimate the compressibility of randomly generated data by * compressing a long stream of the data * @throws IOException */ @Test public void testCompressibility() throws IOException { MotifDataGenerator gen = new MotifDataGenerator(); gen.init(0, 255, 0.5); System.err.println("\nUniqueness=0.5 Range=255\n==============="); testCompressibility(gen, 1024, 10000); testCompressibility(gen, 64, 1); gen.init(0, 127, 0.5); System.err.println("\nUniqueness=0.5 Range=127\n==============="); testCompressibility(gen, 1024, 10000); testCompressibility(gen, 64, 1); gen.init(0, 255, 0.0); System.err.println("\nUniqueness=0.0 Range=255\n==============="); testCompressibility(gen, 1024, 10000); testCompressibility(gen, 64, 1); gen.init(0, 255, 1.0); System.err.println("\nUniqueness=1.0 Range=255\n==============="); testCompressibility(gen, 1024, 10000); testCompressibility(gen, 64, 1); gen.init(0, 255, 1.0); gen.init(0, 127, 1.0); System.err.println("\nUniqueness=1.0 Range=127\n==============="); testCompressibility(gen, 1024, 10000); testCompressibility(gen, 64, 1); gen.init(0, 1, 1.0); System.err.println("\nUniqueness=1.0 Range=1\n==============="); testCompressibility(gen, 1024, 10000); testCompressibility(gen, 64, 1); } private void testCompressibility(MotifDataGenerator gen, int blockSize, int blocks) throws IOException { long seed = System.nanoTime(); System.err.println("seed = " + seed); Random rng = new Random(seed); ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); Deflater def = new Deflater(Deflater.BEST_COMPRESSION); DeflaterOutputStream gzipOut = new DeflaterOutputStream(byteOut, def); byte block[] = new byte[blockSize]; for (int i = 0; i < blocks; i++) { gen.fill(rng, block); gzipOut.write(block); } gzipOut.close(); byte compressed[] = byteOut.toByteArray(); int origLen = blockSize * blocks; System.err.format("%dx%d blocks. Compressed %d bytes to %d: %.2f. Bound: %.2f\n", blocks, blockSize, origLen, compressed.length, compressed.length / (double) origLen, gen.estMaxCompression()); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/TestRealDistribution.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.io.File; import java.util.ArrayList; import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Properties; import java.util.Random; import java.util.SortedMap; import java.util.TreeMap; import junit.framework.TestCase; import org.junit.Test; import com.facebook.LinkBench.RealDistribution.DistributionType; import com.facebook.LinkBench.distributions.LinkDistributions.ProbLinkDistribution; /* * This class measures how similar data generated by class RealDistribution * and the data file is. */ public class TestRealDistribution extends TestCase { Properties props; @Override public void setUp() throws Exception { props = new Properties(); String distFile = new File("config/Distribution.dat").getAbsolutePath(); props.setProperty(Config.DISTRIBUTION_DATA_FILE, distFile); RealDistribution.loadOneShot(props); } /** regression test for binary search bug */ public void testBinSearchRegression1() { ArrayList l = new ArrayList(); l.add(new RealDistribution.Point(0, 0.0)); l.add(new RealDistribution.Point(1, 0.5)); l.add(new RealDistribution.Point(2, 1.0)); assertEquals(0, RealDistribution.binarySearch(l, 0.00)); assertEquals(1, RealDistribution.binarySearch(l, 0.001)); assertEquals(1, RealDistribution.binarySearch(l, 0.4)); assertEquals(1, RealDistribution.binarySearch(l, 0.5)); assertEquals(2, RealDistribution.binarySearch(l, 0.50001)); assertEquals(2, RealDistribution.binarySearch(l, 1.0)); assertEquals(3, RealDistribution.binarySearch(l, 1.33)); } /** regression test for binary search bug */ public void testBinSearchRegression2() { double l[] = new double[3]; l[0] = 0.0; l[1] = 0.5; l[2] = 1.0; assertEquals(0, RealDistribution.binarySearch(l, 0.00)); assertEquals(1, RealDistribution.binarySearch(l, 0.001)); assertEquals(1, RealDistribution.binarySearch(l, 0.4)); assertEquals(1, RealDistribution.binarySearch(l, 0.5)); assertEquals(2, RealDistribution.binarySearch(l, 0.50001)); assertEquals(2, RealDistribution.binarySearch(l, 1.0)); assertEquals(3, RealDistribution.binarySearch(l, 1.33)); } @Test public void testGetNLinks() throws Exception { //TODO: would be good to have some real measure for whether these // error values are in the "proper" range System.out.println("testGetNLinks\n==========="); double err; err = testGetNlinks(props, 1000000, 2000001); System.out.println("testGetNlinks(1000000, 2000001) err=" + err); assertTrue(err < 0.0001); err = testGetNlinks(props, 1234567, 7654321); System.out.println("testGetNlinks(1234567, 7654321) err=" + err); assertTrue(err < 0.0001); err = testGetNlinks(props, 97, 10000097); System.out.println("testGetNlinks(97, 10000097) err=" + err); assertTrue(err < 0.0001); System.out.println(); } @Test public void testGetNextId1() throws Exception { //TODO: would be good to have some real measure for whether these // error values are in the "proper" range System.out.println("testGetNextId1\n==========="); long randSeed = System.currentTimeMillis(); System.out.println("random seed: " + randSeed); Random rng = new Random(randSeed); double err; err = testGetNextId1(props, rng, 1000000, 2000001, DistributionType.LINK_READS); System.out.println("testGetNextId1(1000000, 2000001, nreads) err=" + err); err = testGetNextId1(props, rng, 1000000, 2000001, DistributionType.LINK_WRITES); System.out.println("testGetNextId1(1000000, 2000001, nwrites) err=" + err); err = testGetNextId1(props, rng, 1000000, 2000001, DistributionType.NODE_READS); System.out.println("testGetNextId1(1000000, 2000001, node_nreads) err=" + err); err = testGetNextId1(props, rng, 1000000, 2000001, DistributionType.NODE_UPDATES); System.out.println("testGetNextId1(1000000, 2000001, node_nwrites) err=" + err); err = testGetNextId1(props, rng, 1234567, 7654321, DistributionType.LINK_READS); System.out.println("testGetNextId1(1234567, 7654321, nreads) err=" + err); err = testGetNextId1(props, rng, 1234567, 7654321, DistributionType.LINK_WRITES); System.out.println("testGetNextId1(1234567, 7654321, nwrites) err=" + err); err = testGetNextId1(props, rng, 97, 10000097, DistributionType.LINK_READS); System.out.println("testGetNextId1(97, 10000097, nreads) err=" + err); err = testGetNextId1(props, rng, 97, 10000097, DistributionType.LINK_WRITES); System.out.println("testGetNextId1(97, 10000097, nwrites) err=" + err); System.out.println(); } //return the distribution for a sequence of numbers private static NavigableMap getDistribution(int[] seq, int start, int end) { //create a map from values to number of times they appears in the sequence SortedMap map = new TreeMap(); for (int i = start; i < end; ++i) { Integer p = map.get(seq[i]); if (p==null) map.put(seq[i], 1); else map.put(seq[i], p + 1); } //calculate the cumulative distribution of map TreeMap cdf = new TreeMap(); double sum = 0; for (Object key : map.keySet()) { sum += map.get((Integer)key) / (double) (end - start); cdf.put((Integer)key, sum); } return cdf; } //return the rms error between two distributions private static double getComparisonError(NavigableMap act, NavigableMap exp, boolean printBigErrors) { int min = Math.min(act.firstKey(), exp.firstKey()); int max = Math.min(act.lastKey(), exp.lastKey()); int samplePoints = Math.min(10000, max - min + 1); double cumulative = 0; // Sample linearly interpolated distribution at many points for (int i = 0; i < samplePoints; i++) { int k = min + (int)Math.round(((max - min) * ((double)i)/((double)samplePoints))); double pa = interpolatedValue(act, k); double pe = interpolatedValue(exp, k); cumulative += (pa - pe) * (pa - pe); // Print errors > 0.1% if (printBigErrors && Math.abs(pa - pe) > 0.001) { System.err.println(String.format("Large divergence %f " + "cdf_exp(%d) = %f, cdf_act(%d) = %f", Math.abs(pa - pe), k, pe, k, pa)); } } return cumulative / samplePoints; } private static double interpolatedValue(NavigableMap a, int k) { Entry floor = a.floorEntry(k); Entry ceil = a.ceilingEntry(k); if (ceil.getKey() == floor.getKey()) { return ceil.getValue(); } double mix = (k - floor.getKey()) / (double)(ceil.getKey() - floor.getKey()); return mix * ceil.getValue() + (1 - mix) * floor.getValue(); } //test RealDistribution.getNextId1 //type is either "nlinks" or "nwrites" //maxid1 is exclusive private static double testGetNextId1(Properties props, Random rng, int startid1, int maxid1, DistributionType type) throws Exception { int[] cnt = new int[maxid1]; double nqueries = (maxid1 - startid1)*RealDistribution.getArea(type) /100.0; RealDistribution dist = new RealDistribution(); dist.init(props, startid1, maxid1, type); for (int i = 0; i < nqueries; ++i) { long x = dist.choose(rng); if (x < startid1 || x >= maxid1) { throw new Exception("Invalid value of id1: " + x); } cnt[(int)x]++; } NavigableMap generated_data = getDistribution(cnt, startid1, maxid1); NavigableMap real_data = RealDistribution.getCDF(type); return getComparisonError(generated_data, real_data, true); } //test getNLinks private static double testGetNlinks(Properties props, int startid1, int maxid1) throws Exception { RealDistribution rDist = new RealDistribution(); rDist.init(props, startid1, maxid1, DistributionType.LINKS); ProbLinkDistribution dist = new ProbLinkDistribution(rDist); int[] nlinks = new int[maxid1]; for (int i = startid1; i < maxid1; ++i) { long x = dist.getNlinks(i); if (x < 0) { fail("x is negative: " + x + " for i=" + i); } nlinks[i] = (int)x; } NavigableMap generated_data = getDistribution(nlinks, startid1, maxid1); NavigableMap real_data = RealDistribution.getCDF(DistributionType.LINKS); return getComparisonError(generated_data, real_data, true); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/TestStats.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import junit.framework.AssertionFailedError; import junit.framework.TestCase; import org.junit.Test; import com.facebook.LinkBench.stats.LatencyStats; public class TestStats extends TestCase { @Test public void testBucketing() { // 0 microseconds until 100 seconds for (long us = 0; us < 100 * 1000 * 1000; us += 100 ) { int bucket = LatencyStats.latencyToBucket(us); try { assertTrue(bucket >= 0); assertTrue(bucket < LatencyStats.NUM_BUCKETS); long range[] = LatencyStats.bucketBound(bucket); assertTrue(us >= range[0]); assertTrue(us < range[1]); } catch (AssertionFailedError e) { System.err.println("Failed for " + us + "us, bucket=" + bucket); throw e; } } } } ================================================ FILE: src/test/java/com/facebook/LinkBench/TimerTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Random; import org.junit.Test; import junit.framework.TestCase; public class TimerTest extends TestCase { @Test public void testTimer1() { for (int i = 0; i < 100; i++) { long wakeTime = System.nanoTime() + i * 1000 * 500; Timer.waitUntil(wakeTime); long now = System.nanoTime(); assertTrue(now >= wakeTime); // Check that the precision isn't so awful that it would // indicate a definite bug (e.g. 100ms) assertTrue(now <= wakeTime + 1e7); } } /** * Test that we can use the timer to wait for short intervals * without getting far behind */ @Test public void testTimer2() { // Repeatedly wait for 10us long waits = 100 * 100; // 100ms total long time = System.nanoTime(); long startTime = time; for (int i = 0; i < waits; i++) { time += 1e4; // 10 us Timer.waitUntil(time); } long endTime = System.nanoTime(); System.err.println("took " + ((endTime - startTime) / 1000000) + "ms"); assertTrue(endTime - startTime >= 1e8); assertTrue(endTime - startTime < 1.02e8); // no longer than 102ms } @Test public void testExponentialArrivals() { long randSeed = System.currentTimeMillis(); System.err.println("Random seed: " + randSeed); Random rng = new Random(randSeed); int trials = 40000; int arrivalRate_s = 200000; double arrivalRate_ns = arrivalRate_s / (double)1e9; // Check that the exponential distribution is creating correct arrival rate. long startTime = System.nanoTime(); long time = startTime; for (int i = 0; i < trials; i++) { time = Timer.waitExpInterval(rng, time, arrivalRate_ns); } long endTime = System.nanoTime(); double actualArrivalRate_ns = trials / (double) (endTime - startTime); System.err.println("actual arrival rate: " + actualArrivalRate_ns * 1e9 + " /s " + " expected " + arrivalRate_s + "/s"); assertTrue(actualArrivalRate_ns >= arrivalRate_ns * 0.9); assertTrue(actualArrivalRate_ns <= arrivalRate_ns * 1.1); } } ================================================ FILE: src/test/java/com/facebook/LinkBench/UniformDistTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Random; import org.junit.Test; import com.facebook.LinkBench.distributions.ProbabilityDistribution; import com.facebook.LinkBench.distributions.UniformDistribution; public class UniformDistTest extends DistributionTestBase { @Override public ProbabilityDistribution getDist() { return new UniformDistribution(); } @Test public void testInRange() { // Check 2^31 < n < 2^32 and n > 2^32 long maxes[] = {(long)Math.pow(2, 31.5), (long)Math.pow(2, 34.23)}; int trials = 10000; Random rng = new Random(); for (long max: maxes) { UniformDistribution dist = new UniformDistribution(); dist.init(0, max, 1); for (int trial = 0; trial < trials; trial++) { long i = dist.choose(rng); assertTrue(i >= 0); assertTrue(i < max); } } } } ================================================ FILE: src/test/java/com/facebook/LinkBench/ZipfDistTest.java ================================================ /* * Copyright 2012, Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.LinkBench; import java.util.Properties; import com.facebook.LinkBench.distributions.ProbabilityDistribution; import com.facebook.LinkBench.distributions.ZipfDistribution; public class ZipfDistTest extends DistributionTestBase { @Override protected int cdfChecks() { // Don't do many checks, since its expensive return 5; } @Override public ProbabilityDistribution getDist() { return new ZipfDistribution(); } @Override public Properties getDistParams() { Properties props = new Properties(); props.setProperty("shape", "0.9"); return props; } @Override protected Bucketer getBucketer() { return new ZipfBucketer(); } @Override protected double tolerance() { /* * Method for choosing IDs isn't 100% precise * but something is more seriously wrong if it is more than 1% off */ return 0.01; } /** * Check distribution more closely at low values */ static class ZipfBucketer implements Bucketer { private static final long bucketBounds[] = {0, 1, 2, 3, 4, 10, 20, 30, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000}; public int getBucketCount() { return bucketBounds.length + 1; } public int chooseBucket(long i, long n) { for (int j = 0; j < bucketBounds.length; j++) { if (i <= bucketBounds[j]) { return j; } } return bucketBounds.length; } public long bucketMax(int bucket, long n) { if (bucket >= bucketBounds.length) return n; else return bucketBounds[bucket]; } } } ================================================ FILE: src/test/java/com/facebook/LinkBench/testtypes/MySqlTest.java ================================================ package com.facebook.LinkBench.testtypes; public interface MySqlTest extends ProviderTest { } ================================================ FILE: src/test/java/com/facebook/LinkBench/testtypes/ProviderTest.java ================================================ package com.facebook.LinkBench.testtypes; /** * Marker interface for tests for specific data store providers, particualrly * those that require an external data store to be setup. */ public interface ProviderTest { } ================================================ FILE: src/test/java/com/facebook/LinkBench/testtypes/RocksDbTest.java ================================================ package com.facebook.LinkBench.testtypes; public interface RocksDbTest extends ProviderTest { } ================================================ FILE: src/test/java/com/facebook/LinkBench/testtypes/SlowTest.java ================================================ package com.facebook.LinkBench.testtypes; /** * Marker interface for slow unit tests that should not be run as part of default * unit tests */ public interface SlowTest { }