Repository: burmanm/gorilla-tsc Branch: master Commit: f1a28ee3aed3 Files: 24 Total size: 113.1 KB Directory structure: gitextract_m85owujp/ ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.adoc ├── pom.xml └── src/ ├── main/ │ └── java/ │ └── fi/ │ └── iki/ │ └── yak/ │ └── ts/ │ └── compression/ │ └── gorilla/ │ ├── BitInput.java │ ├── BitOutput.java │ ├── ByteBufferBitInput.java │ ├── ByteBufferBitOutput.java │ ├── Compressor.java │ ├── Decompressor.java │ ├── GorillaCompressor.java │ ├── GorillaDecompressor.java │ ├── LongArrayInput.java │ ├── LongArrayOutput.java │ ├── Pair.java │ ├── Predictor.java │ ├── ValueCompressor.java │ ├── ValueDecompressor.java │ ├── benchmark/ │ │ └── EncodingBenchmark.java │ └── predictors/ │ ├── DifferentialFCM.java │ └── LastValuePredictor.java └── test/ └── java/ └── fi/ └── iki/ └── yak/ └── ts/ └── compression/ └── gorilla/ ├── EncodeGorillaTest.java └── EncodeTest.java ================================================ FILE CONTENTS ================================================ ================================================ FILE: .gitignore ================================================ *.class # Mobile Tools for Java (J2ME) .mtj.tmp/ # Package Files # *.jar *.war *.ear # virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml hs_err_pid* .idea/ *.iml target/ ================================================ FILE: .travis.yml ================================================ # Enable container-based infrastructure sudo: false language: java install: mvn install -DskipTests -Dgpg.skip jdk: - oraclejdk8 ================================================ FILE: LICENSE ================================================ Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ================================================ FILE: README.adoc ================================================ = Time series compression library, based on the Facebook's Gorilla paper :source-language: java ifdef::env-github[] [link=https://travis-ci.org/burmanm/gorilla-tsc] image::https://travis-ci.org/burmanm/gorilla-tsc.svg?branch=master[Build Status,70,18] [link=https://maven-badges.herokuapp.com/maven-central/fi.iki.yak/compression-gorilla] image::https://img.shields.io/maven-central/v/fi.iki.yak/compression-gorilla.svg[Maven central] endif::[] == Introduction This is Java based implementation of the compression methods described in the paper link:http://www.vldb.org/pvldb/vol8/p1816-teller.pdf["Gorilla: A Fast, Scalable, In-Memory Time Series Database"]. For explanation on how the compression methods work, read the excellent paper. In comparison to the original paper, this implementation allows using both integer values (`long`) as well as floating point values (`double`), both 64 bit in length. Versions 1.x and 2.x are not compatible with each other due to small differences to the stored array. Versions 2.x will support reading and storing older format also, see usage for more details. == Usage The included tests are a good source for examples. === Maven [source, xml] ---- fi.iki.yak compression-gorilla ---- You can find latest version from the maven logo link above. === Compressing To compress in the older 1.x format, use class ``Compressor``. For 2.x, use ``GorillaCompressor`` (recommended). ``LongArrayOutput`` is also recommended compared to ``ByteBufferBitOutput`` because of performance. One can supply alternative predictor to the ``GorillaCompressor`` if required. One such implementation is included, ``DifferentialFCM`` that provides better compression ratio for some data patterns. [source, java] ---- long now = LocalDateTime.now(ZoneOffset.UTC).truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); LongArrayOutput output = new LongArrayOutput(); GorillaCompressor c = new GorillaCompressor(now, output); ---- Compression class requires a block timestamp and an implementation of `BitOutput` interface. [source, java] ---- c.addValue(long, double); ---- Adds a new floating-point value to the time series. If you wish to store only long values, use `c.addValue(long, long)`, however do `not` mix these in the same series. After the block is ready, remember to call: [source, java] ---- c.close(); ---- which flushes the remaining data to the stream and writes closing information. === Decompressing To decompress from the older 1.x format, use class ``Decompressor``. For 2.x, use ``GorillaDecompressor`` (recommended). ``LongArrayInput`` is also recommended compared to ``ByteBufferBitInput`` because of performance if the 2.x format was used to compress the time series. If the original compressor used different predictor than ``LastValuePredictor`` it must be defined in the constructor. [source, java] ---- LongArrayInput input = new LongArrayInput(byteBuffer); GorillaDecompressor d = new GorillaDecompressor(input); ---- To decompress a stream of bytes, supply `GorillaDecompressor` with a suitable implementation of `BitInput` interface. The LongArrayInput allows to decompress a long array or existing `ByteBuffer` presentation with 8 byte word length. [source, java] ---- Pair pair = d.readPair(); ---- Requesting next pair with `readPair()` returns the following series value or a `null` once the series is completely read. The pair is a simple placeholder object with `getTimestamp()` and `getDoubleValue()` or `getLongValue()`. == Performance The following performance in reached in a Linux VM running on VMware Player in Windows 8.1 host. i7 2600K at 4GHz. The benchmark used is the ``EncodingBenchmark``. These results should not be directly compared to other implementations unless similar dataset is used. Results are in millions of datapoints (timestamp + value) pairs per second. The values in this benchmark are in doubles (performance with longs is slightly higher, around ~2-3M/s). .Compression |=== |GorillaCompressor (2.0.0) |Compressor (1.1.0) |83.5M/s (~1.34GB/s) |31.2M/s (~499MB/s) |=== .Decompression |=== |GorillaDecompressor (2.0.0) |Decompressor (1.1.0) |77,9M/s (~1.25GB/s) |51.4M/s (~822MB/s) |=== Most of the differences in decompression / compression speed between versions come from implementation changes and not from the small changes to the output format. == Roadmap There were few things I wanted to get to 2.0.0, but had to decide against due to lack of time. I will implement these later with potentially some breaking API changes: * Support timestamp only compressions (2.2.x) * Include ByteBufferLongOutput/ByteBufferLongInput in the package (2.2.x) * Move bit operations to inside the GorillaCompressor/GorillaDecompressor to allow easier usage with other allocators (2.2.x) == Internals === Differences to the original paper * Maximum number of leadingZeros is stored with 6 bits to allow up to 63 leading zeros, which are necessary when storing long values. (>= 2.0.0) * Timestamp delta-of-delta are stored by first turning them with ZigZag encoding to positive integers and then reduced by one to fit in the necessary bits. In the decoding phase all the values are incremented by one to fetch the original value. (>= 2.0.0) * The compressed blocks are created with a 27 bit delta header (unlike in the original paper, which uses a 14 bit delta header). This allows to use up to one day block size using millisecond precision. (>= 1.0.0) === Data structure Values must be inserted in the increasing time order, out-of-order insertions are not supported. The included ByteBufferBitInput and ByteBufferBitOutput classes use a big endian order for the data. == Contributing File an issue and/or send a pull request. === License .... Copyright 2016-2018 Michael Burman and/or other contributors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. .... ================================================ FILE: pom.xml ================================================ 4.0.0 fi.iki.yak compression-gorilla 2.1.2-SNAPSHOT Gorilla time series compression in Java Implements the time series compression methods as described in the Facebook's Gorilla paper https://github.com/burmanm/gorilla-tsc Apache License, Version 2.0 http://www.apache.org/licenses/LICENSE-2.0.txt repo 1.8 5.0.0-M4 1.0.0-M4 1.18 benchmark 2.5.3 https://github.com/burmanm/gorilla-tsc scm:git:git://github.com/burmam/gorilla-tsc.git scm:git:git@github.com:burmanm/gorilla-tsc.git HEAD yak@iki.fi Michael Burman https://github.com/burmanm burmanm ossrh https://oss.sonatype.org/service/local/staging/deploy/maven2/ org.junit.jupiter junit-jupiter-engine ${junit.jupiter.version} test org.openjdk.jmh jmh-core ${jmh.version} org.openjdk.jmh jmh-generator-annprocess ${jmh.version} provided maven-compiler-plugin 3.1 ${java.version} ${java.version} maven-surefire-plugin 2.19 org.junit.platform junit-platform-surefire-provider ${junit.platform.version} org.apache.maven.plugins maven-shade-plugin 2.4.3 package shade ${jar.name} org.openjdk.jmh.Main org.apache.maven.plugins maven-release-plugin ${maven.release.plugin.version} true false release deploy org.sonatype.plugins nexus-staging-maven-plugin 1.6.7 true ossrh https://oss.sonatype.org/ false org.apache.maven.plugins maven-javadoc-plugin jar org.apache.maven.plugins maven-source-plugin jar org.apache.maven.plugins maven-gpg-plugin 1.5 sign-artifacts verify sign ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/BitInput.java ================================================ package fi.iki.yak.ts.compression.gorilla; /** * This interface is used for reading a compressed time series. * * @author Michael Burman */ public interface BitInput { /** * Reads the next bit and returns true if bit is set and false if not. * * @return true == 1, false == 0 */ boolean readBit(); /** * Returns a long that was stored in the next X bits in the stream. * * @param bits Amount of least significant bits to read from the stream. * @return reads the next long in the series using bits meaningful bits */ long getLong(int bits); /** * Read until next unset bit is found, or until maxBits has been reached. * * @param maxBits How many bits at maximum until returning * @return Integer value of the read bits */ int nextClearBit(int maxBits); } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/BitOutput.java ================================================ package fi.iki.yak.ts.compression.gorilla; /** * This interface is used to write a compressed timeseries. * * @author Michael Burman */ public interface BitOutput { /** * Stores a single bit and increases the bitcount by 1 */ void writeBit(); /** * Stores a 0 and increases the bitcount by 1 */ void skipBit(); /** * Write the given long value using the defined amount of least significant bits. * * @param value The long value to be written * @param bits How many bits are stored to the stream */ void writeBits(long value, int bits); /** * Flushes the current byte to the underlying stream */ void flush(); } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/ByteBufferBitInput.java ================================================ package fi.iki.yak.ts.compression.gorilla; import java.nio.ByteBuffer; /** * An implementation of BitInput that parses the data from byte array or existing ByteBuffer. * * @author Michael Burman */ public class ByteBufferBitInput implements BitInput { private ByteBuffer bb; private byte b; private int bitsLeft = 0; /** * Uses an existing ByteBuffer to read the stream. Starts at the ByteBuffer's current position. * * @param buf Use existing ByteBuffer */ public ByteBufferBitInput(ByteBuffer buf) { bb = buf; flipByte(); } public ByteBufferBitInput(byte[] input) { this(ByteBuffer.wrap(input)); } /** * Reads the next bit and returns a boolean representing it. * * @return true if the next bit is 1, otherwise 0. */ public boolean readBit() { boolean bit = ((b >> (bitsLeft - 1)) & 1) == 1; bitsLeft--; flipByte(); return bit; } /** * Reads a long from the next X bits that represent the least significant bits in the long value. * * @param bits How many next bits are read from the stream * @return long value that was read from the stream */ public long getLong(int bits) { long value = 0; while(bits > 0) { if(bits > bitsLeft || bits == Byte.SIZE) { // Take only the bitsLeft "least significant" bits byte d = (byte) (b & ((1<>> (bitsLeft - bits)) & ((1< 0) { int shift = bits - bitsLeft; if(shift >= 0) { b |= (byte) ((value >> shift) & ((1 << bitsLeft) - 1)); bits -= bitsLeft; bitsLeft = 0; } else { shift = bitsLeft - bits; b |= (byte) (value << shift); bitsLeft -= bits; bits = 0; } flipByte(); } } /** * Causes the currently handled byte to be written to the stream */ @Override public void flush() { bitsLeft = 0; flipByte(); // Causes write to the ByteBuffer } /** * Returns the underlying DirectByteBuffer * * @return ByteBuffer of type DirectByteBuffer */ public ByteBuffer getByteBuffer() { return this.bb; } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/Compressor.java ================================================ package fi.iki.yak.ts.compression.gorilla; /** * Implements the time series compression as described in the Facebook's Gorilla Paper. Value compression * is for floating points only. * * @author Michael Burman */ public class Compressor { private int storedLeadingZeros = Integer.MAX_VALUE; private int storedTrailingZeros = 0; private long storedVal = 0; private long storedTimestamp = 0; private long storedDelta = 0; private long blockTimestamp = 0; public final static short FIRST_DELTA_BITS = 27; private BitOutput out; // We should have access to the series? public Compressor(long timestamp, BitOutput output) { blockTimestamp = timestamp; out = output; addHeader(timestamp); } private void addHeader(long timestamp) { // One byte: length of the first delta // One byte: precision of timestamps out.writeBits(timestamp, 64); } /** * Adds a new long value to the series. Note, values must be inserted in order. * * @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision) * @param value next floating point value in the series */ public void addValue(long timestamp, long value) { if(storedTimestamp == 0) { writeFirst(timestamp, value); } else { compressTimestamp(timestamp); compressValue(value); } } /** * Adds a new double value to the series. Note, values must be inserted in order. * * @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision) * @param value next floating point value in the series */ public void addValue(long timestamp, double value) { if(storedTimestamp == 0) { writeFirst(timestamp, Double.doubleToRawLongBits(value)); } else { compressTimestamp(timestamp); compressValue(Double.doubleToRawLongBits(value)); } } private void writeFirst(long timestamp, long value) { storedDelta = timestamp - blockTimestamp; storedTimestamp = timestamp; storedVal = value; out.writeBits(storedDelta, FIRST_DELTA_BITS); out.writeBits(storedVal, 64); } /** * Closes the block and writes the remaining stuff to the BitOutput. */ public void close() { // These are selected to test interoperability and correctness of the solution, this can be read with go-tsz out.writeBits(0x0F, 4); out.writeBits(0xFFFFFFFF, 32); out.skipBit(); out.flush(); } /** * Difference to the original Facebook paper, we store the first delta as 27 bits to allow * millisecond accuracy for a one day block. * * Also, the timestamp delta-delta is not good for millisecond compressions.. * * @param timestamp epoch */ private void compressTimestamp(long timestamp) { // a) Calculate the delta of delta long newDelta = (timestamp - storedTimestamp); long deltaD = newDelta - storedDelta; // If delta is zero, write single 0 bit if(deltaD == 0) { out.skipBit(); } else if(deltaD >= -63 && deltaD <= 64) { out.writeBits(0x02, 2); // store '10' out.writeBits(deltaD, 7); // Using 7 bits, store the value.. } else if(deltaD >= -255 && deltaD <= 256) { out.writeBits(0x06, 3); // store '110' out.writeBits(deltaD, 9); // Use 9 bits } else if(deltaD >= -2047 && deltaD <= 2048) { out.writeBits(0x0E, 4); // store '1110' out.writeBits(deltaD, 12); // Use 12 bits } else { out.writeBits(0x0F, 4); // Store '1111' out.writeBits(deltaD, 32); // Store delta using 32 bits } storedDelta = newDelta; storedTimestamp = timestamp; } private void compressValue(long value) { // TODO Fix already compiled into a big method long xor = storedVal ^ value; if(xor == 0) { // Write 0 out.skipBit(); } else { int leadingZeros = Long.numberOfLeadingZeros(xor); int trailingZeros = Long.numberOfTrailingZeros(xor); // Check overflow of leading? Can't be 32! if(leadingZeros >= 32) { leadingZeros = 31; } // Store bit '1' out.writeBit(); if(leadingZeros >= storedLeadingZeros && trailingZeros >= storedTrailingZeros) { writeExistingLeading(xor); } else { writeNewLeading(xor, leadingZeros, trailingZeros); } } storedVal = value; } /** * If there at least as many leading zeros and as many trailing zeros as previous value, control bit = 0 (type a) * store the meaningful XORed value * * @param xor XOR between previous value and current */ private void writeExistingLeading(long xor) { out.skipBit(); int significantBits = 64 - storedLeadingZeros - storedTrailingZeros; out.writeBits(xor >>> storedTrailingZeros, significantBits); } /** * store the length of the number of leading zeros in the next 5 bits * store length of the meaningful XORed value in the next 6 bits, * store the meaningful bits of the XORed value * (type b) * * @param xor XOR between previous value and current * @param leadingZeros New leading zeros * @param trailingZeros New trailing zeros */ private void writeNewLeading(long xor, int leadingZeros, int trailingZeros) { out.writeBit(); out.writeBits(leadingZeros, 5); // Number of leading zeros in the next 5 bits int significantBits = 64 - leadingZeros - trailingZeros; out.writeBits(significantBits, 6); // Length of meaningful bits in the next 6 bits out.writeBits(xor >>> trailingZeros, significantBits); // Store the meaningful bits of XOR storedLeadingZeros = leadingZeros; storedTrailingZeros = trailingZeros; } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/Decompressor.java ================================================ package fi.iki.yak.ts.compression.gorilla; /** * Decompresses a compressed stream created by the Compressor. Returns pairs of timestamp and floating point value. * * @author Michael Burman */ public class Decompressor { private int storedLeadingZeros = Integer.MAX_VALUE; private int storedTrailingZeros = 0; private long storedVal = 0; private long storedTimestamp = 0; private long storedDelta = 0; private long blockTimestamp = 0; private boolean endOfStream = false; private BitInput in; public Decompressor(BitInput input) { in = input; readHeader(); } private void readHeader() { blockTimestamp = in.getLong(64); } /** * Returns the next pair in the time series, if available. * * @return Pair if there's next value, null if series is done. */ public Pair readPair() { next(); if(endOfStream) { return null; } return new Pair(storedTimestamp, storedVal); } private void next() { if (storedTimestamp == 0) { // First item to read storedDelta = in.getLong(Compressor.FIRST_DELTA_BITS); if(storedDelta == (1<<27) - 1) { endOfStream = true; return; } storedVal = in.getLong(64); storedTimestamp = blockTimestamp + storedDelta; } else { nextTimestamp(); } } private int bitsToRead() { int val = in.nextClearBit(4); int toRead = 0; switch(val) { case 0x00: break; case 0x02: toRead = 7; // '10' break; case 0x06: toRead = 9; // '110' break; case 0x0e: toRead = 12; break; case 0x0F: toRead = 32; break; } return toRead; } private void nextTimestamp() { // Next, read timestamp long deltaDelta = 0; int toRead = bitsToRead(); if (toRead > 0) { deltaDelta = in.getLong(toRead); if(toRead == 32) { if ((int) deltaDelta == 0xFFFFFFFF) { // End of stream endOfStream = true; return; } } else { // Turn "unsigned" long value back to signed one if(deltaDelta > (1 << (toRead - 1))) { deltaDelta -= (1 << toRead); } } deltaDelta = (int) deltaDelta; } storedDelta = storedDelta + deltaDelta; storedTimestamp = storedDelta + storedTimestamp; nextValue(); } private void nextValue() { // Read value if (in.readBit()) { // else -> same value as before if (in.readBit()) { // New leading and trailing zeros storedLeadingZeros = (int) in.getLong(5); byte significantBits = (byte) in.getLong(6); if(significantBits == 0) { significantBits = 64; } storedTrailingZeros = 64 - significantBits - storedLeadingZeros; } long value = in.getLong(64 - storedLeadingZeros - storedTrailingZeros); value <<= storedTrailingZeros; value = storedVal ^ value; storedVal = value; } } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/GorillaCompressor.java ================================================ package fi.iki.yak.ts.compression.gorilla; import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor; /** * Implements a slightly modified version of the time series compression as described in the Facebook's Gorilla * Paper. * * @author Michael Burman */ public class GorillaCompressor { private long storedTimestamp = 0; private int storedDelta = 0; private long blockTimestamp = 0; public final static int FIRST_DELTA_BITS = 27; private static int DELTAD_7_MASK = 0x02 << 7; private static int DELTAD_9_MASK = 0x06 << 9; private static int DELTAD_12_MASK = 0x0E << 12; private BitOutput out; private ValueCompressor valueCompressor; public GorillaCompressor(long timestamp, BitOutput output) { this(timestamp, output, new LastValuePredictor()); } public GorillaCompressor(long timestamp, BitOutput output, Predictor predictor) { blockTimestamp = timestamp; out = output; addHeader(timestamp); this.valueCompressor = new ValueCompressor(output, predictor); } private void addHeader(long timestamp) { out.writeBits(timestamp, 64); } /** * Adds a new long value to the series. Note, values must be inserted in order. * * @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision) * @param value next floating point value in the series */ public void addValue(long timestamp, long value) { if(storedTimestamp == 0) { writeFirst(timestamp, value); } else { compressTimestamp(timestamp); valueCompressor.compressValue(value); } } /** * Adds a new double value to the series. Note, values must be inserted in order. * * @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision) * @param value next floating point value in the series */ public void addValue(long timestamp, double value) { if(storedTimestamp == 0) { writeFirst(timestamp, Double.doubleToRawLongBits(value)); return; } compressTimestamp(timestamp); valueCompressor.compressValue(Double.doubleToRawLongBits(value)); } private void writeFirst(long timestamp, long value) { storedDelta = (int) (timestamp - blockTimestamp); storedTimestamp = timestamp; out.writeBits(storedDelta, FIRST_DELTA_BITS); valueCompressor.writeFirst(value); } /** * Closes the block and writes the remaining stuff to the BitOutput. */ public void close() { out.writeBits(0x0F, 4); out.writeBits(0xFFFFFFFF, 32); out.skipBit(); out.flush(); } /** * Difference to the original Facebook paper, we store the first delta as 27 bits to allow * millisecond accuracy for a one day block. * * Also, the timestamp delta-delta is not good for millisecond compressions.. * * @param timestamp epoch */ private void compressTimestamp(long timestamp) { // a) Calculate the delta of delta int newDelta = (int) (timestamp - storedTimestamp); int deltaD = newDelta - storedDelta; if(deltaD == 0) { out.skipBit(); } else { deltaD = encodeZigZag32(deltaD); deltaD--; // Increase by one in the decompressing phase as we have one free bit int bitsRequired = 32 - Integer.numberOfLeadingZeros(deltaD); // Faster than highestSetBit // Turns to inlineable tableswitch switch(bitsRequired) { case 1: case 2: case 3: case 4: case 5: case 6: case 7: deltaD |= DELTAD_7_MASK; out.writeBits(deltaD, 9); break; case 8: case 9: deltaD |= DELTAD_9_MASK; out.writeBits(deltaD, 12); break; case 10: case 11: case 12: out.writeBits(deltaD | DELTAD_12_MASK, 16); break; default: out.writeBits(0x0F, 4); // Store '1111' out.writeBits(deltaD, 32); // Store delta using 32 bits break; } storedDelta = newDelta; } storedTimestamp = timestamp; } // START: From protobuf /** * Encode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers * into values that can be efficiently encoded with varint. (Otherwise, * negative values must be sign-extended to 64 bits to be varint encoded, * thus always taking 10 bytes on the wire.) * * @param n A signed 32-bit integer. * @return An unsigned 32-bit integer, stored in a signed int because * Java has no explicit unsigned support. */ public static int encodeZigZag32(final int n) { // Note: the right-shift must be arithmetic return (n << 1) ^ (n >> 31); } // END: From protobuf } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/GorillaDecompressor.java ================================================ package fi.iki.yak.ts.compression.gorilla; import java.util.stream.Stream; import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor; /** * Decompresses a compressed stream created by the GorillaCompressor. * * @author Michael Burman */ public class GorillaDecompressor { private long storedTimestamp = 0; private long storedDelta = 0; private long blockTimestamp = 0; private long storedVal = 0; private boolean endOfStream = false; private final BitInput in; private final ValueDecompressor decompressor; public GorillaDecompressor(BitInput input) { this(input, new LastValuePredictor()); } public GorillaDecompressor(BitInput input, Predictor predictor) { in = input; readHeader(); this.decompressor = new ValueDecompressor(input, predictor); } private void readHeader() { blockTimestamp = in.getLong(64); } /** * Returns the next pair in the time series, if available. * * @return Pair if there's next value, null if series is done. */ public Pair readPair() { next(); if(endOfStream) { return null; } Pair pair = new Pair(storedTimestamp, storedVal); return pair; } private void next() { // TODO I could implement a non-streaming solution also.. is there ever a need for streaming solution? if(storedTimestamp == 0) { first(); return; } nextTimestamp(); } private void first() { // First item to read storedDelta = in.getLong(Compressor.FIRST_DELTA_BITS); if(storedDelta == (1<<27) - 1) { endOfStream = true; return; } storedVal = decompressor.readFirst(); // storedVal = in.getLong(64); storedTimestamp = blockTimestamp + storedDelta; } private void nextTimestamp() { // Next, read timestamp int readInstruction = in.nextClearBit(4); long deltaDelta; switch(readInstruction) { case 0x00: storedTimestamp = storedDelta + storedTimestamp; storedVal = decompressor.nextValue(); return; case 0x02: deltaDelta = in.getLong(7); break; case 0x06: deltaDelta = in.getLong(9); break; case 0x0e: deltaDelta = in.getLong(12); break; case 0x0F: deltaDelta = in.getLong(32); // For storage save.. if this is the last available word, check if remaining bits are all 1 if ((int) deltaDelta == 0xFFFFFFFF) { // End of stream endOfStream = true; return; } break; default: return; } deltaDelta++; deltaDelta = decodeZigZag32((int) deltaDelta); storedDelta = storedDelta + deltaDelta; storedTimestamp = storedDelta + storedTimestamp; storedVal = decompressor.nextValue(); } // START: From protobuf /** * Decode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers into values that can be * efficiently encoded with varint. (Otherwise, negative values must be sign-extended to 64 bits * to be varint encoded, thus always taking 10 bytes on the wire.) * * @param n An unsigned 32-bit integer, stored in a signed int because Java has no explicit * unsigned support. * @return A signed 32-bit integer. */ public static int decodeZigZag32(final int n) { return (n >>> 1) ^ -(n & 1); } // END: From protobuf } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/LongArrayInput.java ================================================ /* * Copyright 2016 Red Hat, Inc. and/or its affiliates * and other contributors as indicated by the @author tags. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package fi.iki.yak.ts.compression.gorilla; /** * Implements on-heap long array input stream * * @author Michael Burman */ public class LongArrayInput implements BitInput { private final long[] longArray; // TODO Investigate also the ByteBuffer performance here.. or Unsafe private long lB; private int position = 0; private int bitsLeft = 0; public LongArrayInput(long[] array) { this.longArray = array; flipByte(); } @Override public boolean readBit() { boolean bit = (lB & LongArrayOutput.BIT_SET_MASK[bitsLeft - 1]) != 0; bitsLeft--; checkAndFlipByte(); return bit; } private void flipByte() { lB = longArray[position++]; bitsLeft = Long.SIZE; } private void checkAndFlipByte() { if(bitsLeft == 0) { flipByte(); } } @Override public long getLong(int bits) { long value; if(bits <= bitsLeft) { // We can read from this word only // Shift to correct position and take only n least significant bits value = (lB >>> (bitsLeft - bits)) & LongArrayOutput.MASK_ARRAY[bits - 1]; bitsLeft -= bits; // We ate n bits from it checkAndFlipByte(); } else { // This word and next one, no more (max bits is 64) value = lB & LongArrayOutput.MASK_ARRAY[bitsLeft - 1]; // Read what's left first bits -= bitsLeft; flipByte(); // We need the next one value <<= bits; // Give n bits of space to value value |= (lB >>> (bitsLeft - bits)); bitsLeft -= bits; } return value; } @Override public int nextClearBit(int maxBits) { int val = 0x00; for(int i = 0; i < maxBits; i++) { val <<= 1; // TODO This loop has too many branches and unnecessary boolean casts boolean bit = readBit(); if(bit) { val |= 0x01; } else { break; } } return val; } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/LongArrayOutput.java ================================================ package fi.iki.yak.ts.compression.gorilla; import java.util.Arrays; /** * An implementation of BitOutput interface that uses on-heap long array. * * @author Michael Burman */ public class LongArrayOutput implements BitOutput { public static final int DEFAULT_ALLOCATION = 256; private long[] longArray; private int position = 0; protected long lB; protected int bitsLeft = Long.SIZE; public final static long[] MASK_ARRAY; public final static long[] BIT_SET_MASK; // Java does not allow creating 64 bit masks with (1L << 64) - 1; (end result is 0) static { MASK_ARRAY = new long[64]; long mask = 1; long value = 0; for (int i = 0; i < MASK_ARRAY.length; i++) { value = value | mask; mask = mask << 1; MASK_ARRAY[i] = value; } BIT_SET_MASK = new long[64]; for(int i = 0; i < BIT_SET_MASK.length; i++) { BIT_SET_MASK[i] = (1L << i); } } /** * Creates a new ByteBufferBitOutput with a default allocated size of 4096 bytes. */ public LongArrayOutput() { this(DEFAULT_ALLOCATION); } /** * Give an initialSize different than DEFAULT_ALLOCATIONS. Recommended to use values which are dividable by 4096. * * @param initialSize New initialsize to use */ public LongArrayOutput(int initialSize) { longArray = new long[initialSize]; lB = longArray[position]; } protected void expandAllocation() { long[] largerArray = new long[longArray.length*2]; System.arraycopy(longArray, 0, largerArray, 0, longArray.length); longArray = largerArray; } private void checkAndFlipByte() { // Wish I could avoid this check in most cases... if(bitsLeft == 0) { flipWord(); } } protected int capacityLeft() { return longArray.length - position; } protected void flipWord() { if(capacityLeft() <= 2) { // We want to have always at least 2 longs available expandAllocation(); } flipWordWithoutExpandCheck(); } protected void flipWordWithoutExpandCheck() { longArray[position] = lB; ++position; resetInternalWord(); } private void resetInternalWord() { lB = 0; bitsLeft = Long.SIZE; } /** * Sets the next bit (or not) and moves the bit pointer. */ public void writeBit() { lB |= BIT_SET_MASK[bitsLeft - 1]; bitsLeft--; checkAndFlipByte(); } public void skipBit() { bitsLeft--; checkAndFlipByte(); } /** * Writes the given long to the stream using bits amount of meaningful bits. This command does not * check input values, so if they're larger than what can fit the bits (you should check this before writing), * expect some weird results. * * @param value Value to be written to the stream * @param bits How many bits are stored to the stream */ public void writeBits(long value, int bits) { if(bits <= bitsLeft) { int lastBitPosition = bitsLeft - bits; lB |= (value << lastBitPosition) & MASK_ARRAY[bitsLeft - 1]; bitsLeft -= bits; checkAndFlipByte(); // We could be at 0 bits left because of the <= condition .. would it be faster with // the other one? } else { value &= MASK_ARRAY[bits - 1]; int firstBitPosition = bits - bitsLeft; lB |= value >>> firstBitPosition; bits -= bitsLeft; flipWord(); lB |= value << (64 - bits); bitsLeft -= bits; } } /** * Causes the currently handled word to be written to the stream */ @Override public void flush() { flipWord(); } public long[] getLongArray() { long[] copy = Arrays.copyOf(longArray, position + 1); copy[copy.length - 1] = lB; return copy; } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/Pair.java ================================================ package fi.iki.yak.ts.compression.gorilla; /** * Pair is an extracted timestamp,value pair from the stream * * @author Michael Burman */ public class Pair { private long timestamp; private long value; public Pair(long timestamp, long value) { this.timestamp = timestamp; this.value = value; } public long getTimestamp() { return timestamp; } public double getDoubleValue() { return Double.longBitsToDouble(value); } public long getLongValue() { return value; } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/Predictor.java ================================================ /* * Copyright 2017 Red Hat, Inc. and/or its affiliates * and other contributors as indicated by the @author tags. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package fi.iki.yak.ts.compression.gorilla; /** * @author miburman */ public interface Predictor { /** * Give the real value * * @param value Long / bits of Double */ void update(long value); /** * Predicts the next value * * @return Predicted value */ long predict(); } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/ValueCompressor.java ================================================ package fi.iki.yak.ts.compression.gorilla; import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor; /** * ValueCompressor for the Gorilla encoding format. Supply with long presentation of the value, * in case of doubles use Double.doubleToRawLongBits(value) * * @author Michael Burman */ public class ValueCompressor { private int storedLeadingZeros = Integer.MAX_VALUE; private int storedTrailingZeros = 0; private Predictor predictor; private BitOutput out; public ValueCompressor(BitOutput out) { this(out, new LastValuePredictor()); } public ValueCompressor(BitOutput out, Predictor predictor) { this.out = out; this.predictor = predictor; } void writeFirst(long value) { predictor.update(value); out.writeBits(value, 64); } protected void compressValue(long value) { // In original Gorilla, Last-Value predictor is used long diff = predictor.predict() ^ value; predictor.update(value); if(diff == 0) { // Write 0 out.skipBit(); } else { int leadingZeros = Long.numberOfLeadingZeros(diff); int trailingZeros = Long.numberOfTrailingZeros(diff); out.writeBit(); // Optimize to writeNewLeading / writeExistingLeading? if(leadingZeros >= storedLeadingZeros && trailingZeros >= storedTrailingZeros) { writeExistingLeading(diff); } else { writeNewLeading(diff, leadingZeros, trailingZeros); } } } /** * If there at least as many leading zeros and as many trailing zeros as previous value, control bit = 0 (type a) * store the meaningful XORed value * * @param xor XOR between previous value and current */ private void writeExistingLeading(long xor) { out.skipBit(); int significantBits = 64 - storedLeadingZeros - storedTrailingZeros; xor >>>= storedTrailingZeros; out.writeBits(xor, significantBits); } /** * store the length of the number of leading zeros in the next 5 bits * store length of the meaningful XORed value in the next 6 bits, * store the meaningful bits of the XORed value * (type b) * * @param xor XOR between previous value and current * @param leadingZeros New leading zeros * @param trailingZeros New trailing zeros */ private void writeNewLeading(long xor, int leadingZeros, int trailingZeros) { out.writeBit(); // Different from version 1.x, use (significantBits - 1) in storage - avoids a branch int significantBits = 64 - leadingZeros - trailingZeros; // Different from original, bits 5 -> 6, avoids a branch, allows storing small longs out.writeBits(leadingZeros, 6); // Number of leading zeros in the next 6 bits out.writeBits(significantBits - 1, 6); // Length of meaningful bits in the next 6 bits out.writeBits(xor >>> trailingZeros, significantBits); // Store the meaningful bits of XOR storedLeadingZeros = leadingZeros; storedTrailingZeros = trailingZeros; } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/ValueDecompressor.java ================================================ package fi.iki.yak.ts.compression.gorilla; import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor; /** * Value decompressor for Gorilla encoded values * * @author Michael Burman */ public class ValueDecompressor { private final BitInput in; private final Predictor predictor; private int storedLeadingZeros = Integer.MAX_VALUE; private int storedTrailingZeros = 0; public ValueDecompressor(BitInput input) { this(input, new LastValuePredictor()); } public ValueDecompressor(BitInput input, Predictor predictor) { this.in = input; this.predictor = predictor; } public long readFirst() { long value = in.getLong(Long.SIZE); predictor.update(value); return value; } public long nextValue() { int val = in.nextClearBit(2); switch(val) { case 3: // New leading and trailing zeros storedLeadingZeros = (int) in.getLong(6); byte significantBits = (byte) in.getLong(6); significantBits++; storedTrailingZeros = Long.SIZE - significantBits - storedLeadingZeros; // missing break is intentional, we want to overflow to next one case 2: long value = in.getLong(Long.SIZE - storedLeadingZeros - storedTrailingZeros); value <<= storedTrailingZeros; value = predictor.predict() ^ value; predictor.update(value); return value; } return predictor.predict(); } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/benchmark/EncodingBenchmark.java ================================================ package fi.iki.yak.ts.compression.gorilla.benchmark; import fi.iki.yak.ts.compression.gorilla.*; import org.openjdk.jmh.annotations.*; import org.openjdk.jmh.infra.Blackhole; import java.nio.ByteBuffer; import java.time.LocalDateTime; import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.ArrayList; import java.util.List; import java.util.stream.Stream; /** * @author Michael Burman */ @BenchmarkMode(Mode.Throughput) @State(Scope.Benchmark) @Fork(1) @Warmup(iterations = 5) @Measurement(iterations = 10) // Reduce the amount of iterations if you start to see GC interference public class EncodingBenchmark { @State(Scope.Benchmark) public static class DataGenerator { public List insertList; @Param({"100000"}) public int amountOfPoints; public long blockStart; public long[] uncompressedTimestamps; public long[] uncompressedValues; public double[] uncompressedDoubles; public long[] compressedArray; public ByteBuffer uncompressedBuffer; public ByteBuffer compressedBuffer; public List pairs; @Setup(Level.Trial) public void setup() { blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); long now = blockStart + 60; uncompressedTimestamps = new long[amountOfPoints]; uncompressedDoubles = new double[amountOfPoints]; uncompressedValues = new long[amountOfPoints]; insertList = new ArrayList<>(amountOfPoints); ByteBuffer bb = ByteBuffer.allocate(amountOfPoints * 2*Long.BYTES); pairs = new ArrayList<>(amountOfPoints); for(int i = 0; i < amountOfPoints; i++) { now += 60; bb.putLong(now); bb.putDouble(i); uncompressedTimestamps[i] = now; uncompressedDoubles[i] = i; uncompressedValues[i] = i; pairs.add(new Pair(now, i)); // bb.putLong(i); } if (bb.hasArray()) { uncompressedBuffer = bb.duplicate(); uncompressedBuffer.flip(); } ByteBufferBitOutput output = new ByteBufferBitOutput(); LongArrayOutput arrayOutput = new LongArrayOutput(amountOfPoints); Compressor c = new Compressor(blockStart, output); GorillaCompressor gc = new GorillaCompressor(blockStart, arrayOutput); bb.flip(); for(int j = 0; j < amountOfPoints; j++) { // c.addValue(bb.getLong(), bb.getLong()); c.addValue(bb.getLong(), bb.getDouble()); gc.addValue(uncompressedTimestamps[j], uncompressedDoubles[j]); } gc.close(); c.close(); ByteBuffer byteBuffer = output.getByteBuffer(); byteBuffer.flip(); compressedBuffer = byteBuffer; compressedArray = arrayOutput.getLongArray(); } } // @Benchmark @OperationsPerInvocation(100000) public void encodingBenchmark(DataGenerator dg) { ByteBufferBitOutput output = new ByteBufferBitOutput(); Compressor c = new Compressor(dg.blockStart, output); for(int j = 0; j < dg.amountOfPoints; j++) { c.addValue(dg.uncompressedBuffer.getLong(), dg.uncompressedBuffer.getDouble()); } c.close(); dg.uncompressedBuffer.rewind(); } @Benchmark @OperationsPerInvocation(100000) public void decodingBenchmark(DataGenerator dg, Blackhole bh) throws Exception { ByteBuffer duplicate = dg.compressedBuffer.duplicate(); ByteBufferBitInput input = new ByteBufferBitInput(duplicate); Decompressor d = new Decompressor(input); Pair pair; while((pair = d.readPair()) != null) { bh.consume(pair); } } @Benchmark @OperationsPerInvocation(100000) public void encodingGorillaBenchmark(DataGenerator dg) { LongArrayOutput output = new LongArrayOutput(); GorillaCompressor c = new GorillaCompressor(dg.blockStart, output); for(int j = 0; j < dg.amountOfPoints; j++) { c.addValue(dg.uncompressedTimestamps[j], dg.uncompressedDoubles[j]); } c.close(); } @Benchmark @OperationsPerInvocation(100000) public void encodingGorillaBenchmarkLong(DataGenerator dg) { LongArrayOutput output = new LongArrayOutput(); GorillaCompressor c = new GorillaCompressor(dg.blockStart, output); for(int j = 0; j < dg.amountOfPoints; j++) { c.addValue(dg.uncompressedTimestamps[j], dg.uncompressedValues[j]); } c.close(); } // @Benchmark // @OperationsPerInvocation(100000) // public void encodingGorillaStreamBenchmark(DataGenerator dg) { // LongArrayOutput output = new LongArrayOutput(); // GorillaCompressor c = new GorillaCompressor(dg.blockStart, output); // // c.compressLongStream(dg.pairs.stream()); // c.close(); // } @Benchmark @OperationsPerInvocation(100000) public void decodingGorillaBenchmark(DataGenerator dg, Blackhole bh) throws Exception { LongArrayInput input = new LongArrayInput(dg.compressedArray); GorillaDecompressor d = new GorillaDecompressor(input); Pair pair; while((pair = d.readPair()) != null) { bh.consume(pair); } } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/predictors/DifferentialFCM.java ================================================ package fi.iki.yak.ts.compression.gorilla.predictors; import fi.iki.yak.ts.compression.gorilla.Predictor; /** * Differential Finite Context Method (DFCM) is a context based predictor. * * @author Michael Burman */ public class DifferentialFCM implements Predictor { private long lastValue = 0L; private final long[] table; private int lastHash = 0; private final int mask; /** * Create a new DFCM predictor * * @param size Prediction table size, will be rounded to the next power of two and must be larger than 0 */ public DifferentialFCM(int size) { if(size > 0) { size--; int leadingZeros = Long.numberOfLeadingZeros(size); int newSize = 1 << (Long.SIZE - leadingZeros); this.table = new long[newSize]; this.mask = newSize - 1; } else { throw new IllegalArgumentException("Size must be positive"); } } @Override public void update(long value) { table[lastHash] = value - lastValue; lastHash = (int) (((lastHash << 5) ^ ((value - lastValue) >> 50)) & this.mask); lastValue = value; } @Override public long predict() { return table[lastHash] + lastValue; } } ================================================ FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/predictors/LastValuePredictor.java ================================================ package fi.iki.yak.ts.compression.gorilla.predictors; import fi.iki.yak.ts.compression.gorilla.Predictor; /** * Last-Value predictor, a computational predictor using previous value as a prediction for the next one * * @author Michael Burman */ public class LastValuePredictor implements Predictor { private long storedVal = 0; public LastValuePredictor() {} public void update(long value) { this.storedVal = value; } public long predict() { return storedVal; } } ================================================ FILE: src/test/java/fi/iki/yak/ts/compression/gorilla/EncodeGorillaTest.java ================================================ package fi.iki.yak.ts.compression.gorilla; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import java.nio.ByteBuffer; import java.time.LocalDateTime; import java.time.Month; import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.concurrent.ThreadLocalRandom; import org.junit.jupiter.api.Test; import fi.iki.yak.ts.compression.gorilla.predictors.DifferentialFCM; /** * These are generic tests to test that input matches the output after compression + decompression cycle, using * both the timestamp and value compression. * * @author Michael Burman */ public class EncodeGorillaTest { private void comparePairsToCompression(long blockTimestamp, Pair[] pairs) { LongArrayOutput output = new LongArrayOutput(); GorillaCompressor c = new GorillaCompressor(blockTimestamp, output); Arrays.stream(pairs).forEach(p -> c.addValue(p.getTimestamp(), p.getDoubleValue())); c.close(); LongArrayInput input = new LongArrayInput(output.getLongArray()); GorillaDecompressor d = new GorillaDecompressor(input); // Replace with stream once GorillaDecompressor supports it for(int i = 0; i < pairs.length; i++) { Pair pair = d.readPair(); assertEquals(pairs[i].getTimestamp(), pair.getTimestamp(), "Timestamp did not match"); assertEquals(pairs[i].getDoubleValue(), pair.getDoubleValue(), "Value did not match"); } assertNull(d.readPair()); } @Test void simpleEncodeAndDecodeTest() throws Exception { long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); Pair[] pairs = { new Pair(now + 10, Double.doubleToRawLongBits(1.0)), new Pair(now + 20, Double.doubleToRawLongBits(-2.0)), new Pair(now + 28, Double.doubleToRawLongBits(-2.5)), new Pair(now + 84, Double.doubleToRawLongBits(65537)), new Pair(now + 400, Double.doubleToRawLongBits(2147483650.0)), new Pair(now + 2300, Double.doubleToRawLongBits(-16384)), new Pair(now + 16384, Double.doubleToRawLongBits(2.8)), new Pair(now + 16500, Double.doubleToRawLongBits(-38.0)) }; comparePairsToCompression(now, pairs); } @Test public void willItBlend() throws Exception { long blockTimestamp = 1500400800000L; Pair[] pairs = { new Pair(1500405481623L, 69087), new Pair(1500405488693L, 65640), new Pair(1500405495993L, 58155), new Pair(1500405503743L, 61025), new Pair(1500405511623L, 91156), new Pair(1500405519803L, 37516), new Pair(1500405528313L, 93515), new Pair(1500405537233L, 96226), new Pair(1500405546453L, 23833), new Pair(1500405556103L, 73186), new Pair(1500405566143L, 96947), new Pair(1500405576163L, 46927), new Pair(1500405586173L, 77954), new Pair(1500405596183L, 29302), new Pair(1500405606213L, 6700), new Pair(1500405616163L, 71971), new Pair(1500405625813L, 8528), new Pair(1500405635763L, 85321), new Pair(1500405645634L, 83229), new Pair(1500405655633L, 78298), new Pair(1500405665623L, 87122), new Pair(1500405675623L, 82055), new Pair(1500405685723L, 75067), new Pair(1500405695663L, 33680), new Pair(1500405705743L, 17576), new Pair(1500405715813L, 89701), new Pair(1500405725773L, 21427), new Pair(1500405735883L, 58255), new Pair(1500405745903L, 3768), new Pair(1500405755863L, 62086), new Pair(1500405765843L, 66965), new Pair(1500405775773L, 35801), new Pair(1500405785883L, 72169), new Pair(1500405795843L, 43089), new Pair(1500405805733L, 31418), new Pair(1500405815853L, 84781), new Pair(1500405825963L, 36103), new Pair(1500405836004L, 87431), new Pair(1500405845953L, 7379), new Pair(1500405855913L, 66919), new Pair(1500405865963L, 30906), new Pair(1500405875953L, 88630), new Pair(1500405885943L, 27546), new Pair(1500405896033L, 43813), new Pair(1500405906094L, 2124), new Pair(1500405916063L, 49399), new Pair(1500405926143L, 94577), new Pair(1500405936123L, 98459), new Pair(1500405946033L, 49457), new Pair(1500405956023L, 92838), new Pair(1500405966023L, 15628), new Pair(1500405976043L, 53916), new Pair(1500405986063L, 90387), new Pair(1500405996123L, 43176), new Pair(1500406006123L, 18838), new Pair(1500406016174L, 78847), new Pair(1500406026173L, 39591), new Pair(1500406036004L, 77070), new Pair(1500406045964L, 56788), new Pair(1500406056043L, 96706), new Pair(1500406066123L, 20756), new Pair(1500406076113L, 64433), new Pair(1500406086133L, 45791), new Pair(1500406096123L, 75028), new Pair(1500406106193L, 55403), new Pair(1500406116213L, 36991), new Pair(1500406126073L, 92929), new Pair(1500406136103L, 60416), new Pair(1500406146183L, 55485), new Pair(1500406156383L, 53525), new Pair(1500406166313L, 96021), new Pair(1500406176414L, 22705), new Pair(1500406186613L, 89801), new Pair(1500406196543L, 51975), new Pair(1500406206483L, 86741), new Pair(1500406216483L, 22440), new Pair(1500406226433L, 51818), new Pair(1500406236403L, 61965), new Pair(1500406246413L, 19074), new Pair(1500406256494L, 54521), new Pair(1500406266413L, 59315), new Pair(1500406276303L, 19171), new Pair(1500406286213L, 98800), new Pair(1500406296183L, 7086), new Pair(1500406306103L, 60578), new Pair(1500406316073L, 96828), new Pair(1500406326143L, 83746), new Pair(1500406336153L, 85481), new Pair(1500406346113L, 22346), new Pair(1500406356133L, 80976), new Pair(1500406366065L, 43586), new Pair(1500406376074L, 82500), new Pair(1500406386184L, 13576), new Pair(1500406396113L, 77871), new Pair(1500406406094L, 60978), new Pair(1500406416203L, 35264), new Pair(1500406426323L, 79733), new Pair(1500406436343L, 29140), new Pair(1500406446323L, 7237), new Pair(1500406456344L, 52866), new Pair(1500406466393L, 88456), new Pair(1500406476493L, 33533), new Pair(1500406486524L, 96961), new Pair(1500406496453L, 16389), new Pair(1500406506453L, 31181), new Pair(1500406516433L, 63282), new Pair(1500406526433L, 92857), new Pair(1500406536413L, 4582), new Pair(1500406546383L, 46832), new Pair(1500406556473L, 6335), new Pair(1500406566413L, 44367), new Pair(1500406576513L, 84640), new Pair(1500406586523L, 36174), new Pair(1500406596553L, 40075), new Pair(1500406606603L, 80886), new Pair(1500406616623L, 43784), new Pair(1500406626623L, 25077), new Pair(1500406636723L, 18617), new Pair(1500406646723L, 72681), new Pair(1500406656723L, 84811), new Pair(1500406666783L, 90053), new Pair(1500406676685L, 25708), new Pair(1500406686713L, 57134), new Pair(1500406696673L, 87193), new Pair(1500406706743L, 66057), new Pair(1500406716724L, 51404), new Pair(1500406726753L, 90141), new Pair(1500406736813L, 10434), new Pair(1500406746803L, 29056), new Pair(1500406756833L, 48160), new Pair(1500406766924L, 96652), new Pair(1500406777113L, 64141), new Pair(1500406787113L, 22143), new Pair(1500406797093L, 20561), new Pair(1500406807113L, 66401), new Pair(1500406817283L, 76802), new Pair(1500406827284L, 37555), new Pair(1500406837323L, 63169), new Pair(1500406847463L, 45712), new Pair(1500406857513L, 44751), new Pair(1500406867523L, 98891), new Pair(1500406877523L, 38122), new Pair(1500406887623L, 46202), new Pair(1500406897703L, 5875), new Pair(1500406907663L, 17397), new Pair(1500406917603L, 39994), new Pair(1500406927633L, 82385), new Pair(1500406937623L, 15598), new Pair(1500406947693L, 36235), new Pair(1500406957703L, 97536), new Pair(1500406967673L, 28557), new Pair(1500406977723L, 13985), new Pair(1500406987663L, 64304), new Pair(1500406997573L, 83693), new Pair(1500407007494L, 6574), new Pair(1500407017493L, 25134), new Pair(1500407027503L, 50383), new Pair(1500407037523L, 55922), new Pair(1500407047603L, 73436), new Pair(1500407057473L, 68235), new Pair(1500407067553L, 1469), new Pair(1500407077463L, 44315), new Pair(1500407087463L, 95064), new Pair(1500407097443L, 1997), new Pair(1500407107473L, 17247), new Pair(1500407117453L, 42454), new Pair(1500407127413L, 73631), new Pair(1500407137363L, 96890), new Pair(1500407147343L, 43450), new Pair(1500407157363L, 42042), new Pair(1500407167403L, 83014), new Pair(1500407177473L, 32051), new Pair(1500407187523L, 69280), new Pair(1500407197495L, 21425), new Pair(1500407207453L, 93748), new Pair(1500407217413L, 64151), new Pair(1500407227443L, 38791), new Pair(1500407237463L, 5248), new Pair(1500407247523L, 92935), new Pair(1500407257513L, 18516), new Pair(1500407267584L, 98870), new Pair(1500407277573L, 82244), new Pair(1500407287723L, 65464), new Pair(1500407297723L, 33801), new Pair(1500407307673L, 18331), new Pair(1500407317613L, 89744), new Pair(1500407327553L, 98460), new Pair(1500407337503L, 24709), new Pair(1500407347423L, 8407), new Pair(1500407357383L, 69451), new Pair(1500407367333L, 51100), new Pair(1500407377373L, 25309), new Pair(1500407387443L, 16148), new Pair(1500407397453L, 98974), new Pair(1500407407543L, 80284), new Pair(1500407417583L, 170), new Pair(1500407427453L, 34706), new Pair(1500407437433L, 39681), new Pair(1500407447603L, 6140), new Pair(1500407457513L, 64595), new Pair(1500407467564L, 59862), new Pair(1500407477563L, 53795), new Pair(1500407487593L, 83493), new Pair(1500407497584L, 90639), new Pair(1500407507623L, 16777), new Pair(1500407517613L, 11096), new Pair(1500407527673L, 38512), new Pair(1500407537963L, 52759), new Pair(1500407548023L, 79567), new Pair(1500407558033L, 48664), new Pair(1500407568113L, 10710), new Pair(1500407578164L, 25635), new Pair(1500407588213L, 40985), new Pair(1500407598163L, 94089), new Pair(1500407608163L, 50056), new Pair(1500407618223L, 15550), new Pair(1500407628143L, 78823), new Pair(1500407638223L, 9044), new Pair(1500407648173L, 20782), new Pair(1500407658023L, 86390), new Pair(1500407667903L, 79444), new Pair(1500407677903L, 84051), new Pair(1500407687923L, 91554), new Pair(1500407697913L, 58777), new Pair(1500407708003L, 89474), new Pair(1500407718083L, 94026), new Pair(1500407728034L, 41613), new Pair(1500407738083L, 64667), new Pair(1500407748034L, 5160), new Pair(1500407758003L, 45140), new Pair(1500407768033L, 53704), new Pair(1500407778083L, 68097), new Pair(1500407788043L, 81137), new Pair(1500407798023L, 59657), new Pair(1500407808033L, 56572), new Pair(1500407817983L, 1993), new Pair(1500407828063L, 62608), new Pair(1500407838213L, 76489), new Pair(1500407848203L, 22147), new Pair(1500407858253L, 92829), new Pair(1500407868073L, 48499), new Pair(1500407878053L, 89152), new Pair(1500407888073L, 9191), new Pair(1500407898033L, 49881), new Pair(1500407908113L, 96020), new Pair(1500407918213L, 90203), new Pair(1500407928234L, 32217), new Pair(1500407938253L, 94302), new Pair(1500407948293L, 83111), new Pair(1500407958234L, 75576), new Pair(1500407968073L, 5973), new Pair(1500407978023L, 5175), new Pair(1500407987923L, 63350), new Pair(1500407997833L, 44081) }; comparePairsToCompression(blockTimestamp, pairs); } /** * Tests encoding of similar floats, see https://github.com/dgryski/go-tsz/issues/4 for more information. */ @Test void testEncodeSimilarFloats() throws Exception { long now = LocalDateTime.of(2015, Month.MARCH, 02, 00, 00).toInstant(ZoneOffset.UTC).toEpochMilli(); LongArrayOutput output = new LongArrayOutput(); GorillaCompressor c = new GorillaCompressor(now, output); ByteBuffer bb = ByteBuffer.allocate(5 * 2*Long.BYTES); bb.putLong(now + 1); bb.putDouble(6.00065e+06); bb.putLong(now + 2); bb.putDouble(6.000656e+06); bb.putLong(now + 3); bb.putDouble(6.000657e+06); bb.putLong(now + 4); bb.putDouble(6.000659e+06); bb.putLong(now + 5); bb.putDouble(6.000661e+06); bb.flip(); for(int j = 0; j < 5; j++) { c.addValue(bb.getLong(), bb.getDouble()); } c.close(); bb.flip(); LongArrayInput input = new LongArrayInput(output.getLongArray()); GorillaDecompressor d = new GorillaDecompressor(input); // Replace with stream once GorillaDecompressor supports it for(int i = 0; i < 5; i++) { Pair pair = d.readPair(); assertEquals(bb.getLong(), pair.getTimestamp(), "Timestamp did not match"); assertEquals(bb.getDouble(), pair.getDoubleValue(), "Value did not match"); } assertNull(d.readPair()); } /** * Tests writing enough large amount of datapoints that causes the included LongArrayOutput to do * internal byte array expansion. */ @Test void testEncodeLargeAmountOfData() throws Exception { // This test should trigger ByteBuffer reallocation int amountOfPoints = 100000; long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); LongArrayOutput output = new LongArrayOutput(); long now = blockStart + 60; ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); for(int i = 0; i < amountOfPoints; i++) { bb.putLong(now + i*60); bb.putDouble(i * Math.random()); } GorillaCompressor c = new GorillaCompressor(blockStart, output); bb.flip(); for(int j = 0; j < amountOfPoints; j++) { c.addValue(bb.getLong(), bb.getDouble()); } c.close(); bb.flip(); LongArrayInput input = new LongArrayInput(output.getLongArray()); GorillaDecompressor d = new GorillaDecompressor(input); for(int i = 0; i < amountOfPoints; i++) { long tStamp = bb.getLong(); double val = bb.getDouble(); Pair pair = d.readPair(); assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); assertEquals(val, pair.getDoubleValue()); } assertNull(d.readPair()); } @Test void testEncodeLargeAmountOfDataOldBuffer() throws Exception { // This test should trigger ByteBuffer reallocation int amountOfPoints = 100000; long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); ByteBufferBitOutput output = new ByteBufferBitOutput(); long now = blockStart + 60; ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); for(int i = 0; i < amountOfPoints; i++) { bb.putLong(now + i*60); bb.putDouble(i * Math.random()); } GorillaCompressor c = new GorillaCompressor(blockStart, output); bb.flip(); for(int j = 0; j < amountOfPoints; j++) { c.addValue(bb.getLong(), bb.getDouble()); } c.close(); bb.flip(); ByteBuffer byteBuffer = output.getByteBuffer(); byteBuffer.flip(); ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); GorillaDecompressor d = new GorillaDecompressor(input); for(int i = 0; i < amountOfPoints; i++) { long tStamp = bb.getLong(); double val = bb.getDouble(); Pair pair = d.readPair(); assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); assertEquals(val, pair.getDoubleValue()); } assertNull(d.readPair()); } /** * Although not intended usage, an empty block should not cause errors */ @Test void testEmptyBlock() throws Exception { long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); LongArrayOutput output = new LongArrayOutput(); GorillaCompressor c = new GorillaCompressor(now, output); c.close(); LongArrayInput input = new LongArrayInput(output.getLongArray()); GorillaDecompressor d = new GorillaDecompressor(input); assertNull(d.readPair()); } @Test void testCopyFlush() { long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); LongArrayOutput output = new LongArrayOutput(); GorillaCompressor c = new GorillaCompressor(now, output); c.addValue(now + 1, 1.0); c.addValue(now + 2, 1.0); LongArrayInput input = new LongArrayInput(output.getLongArray()); GorillaDecompressor d = new GorillaDecompressor(input); assertEquals(now + 1, d.readPair().getTimestamp()); assertEquals(now + 2, d.readPair().getTimestamp()); } /** * Long values should be compressable and decompressable in the stream */ @Test void testLongEncoding() throws Exception { // This test should trigger ByteBuffer reallocation int amountOfPoints = 10000; long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); LongArrayOutput output = new LongArrayOutput(); long now = blockStart + 60; ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); for(int i = 0; i < amountOfPoints; i++) { bb.putLong(now + i*60); bb.putLong(ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE)); } GorillaCompressor c = new GorillaCompressor(blockStart, output); bb.flip(); for(int j = 0; j < amountOfPoints; j++) { c.addValue(bb.getLong(), bb.getLong()); } c.close(); bb.flip(); LongArrayInput input = new LongArrayInput(output.getLongArray()); GorillaDecompressor d = new GorillaDecompressor(input); for(int i = 0; i < amountOfPoints; i++) { long tStamp = bb.getLong(); long val = bb.getLong(); Pair pair = d.readPair(); assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); assertEquals(val, pair.getLongValue()); } assertNull(d.readPair()); } /** * Tests writing enough large amount of datapoints that causes the included LongArrayOutput to do * internal byte array expansion. */ @Test void testDifferentialFCM() throws Exception { // This test should trigger ByteBuffer reallocation int amountOfPoints = 100000; long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); LongArrayOutput output = new LongArrayOutput(); long now = blockStart + 60; ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); for(int i = 0; i < amountOfPoints; i++) { bb.putLong(now + i*60); bb.putDouble(i * Math.random()); } GorillaCompressor c = new GorillaCompressor(blockStart, output, new DifferentialFCM(1024)); bb.flip(); for(int j = 0; j < amountOfPoints; j++) { c.addValue(bb.getLong(), bb.getDouble()); } c.close(); bb.flip(); LongArrayInput input = new LongArrayInput(output.getLongArray()); GorillaDecompressor d = new GorillaDecompressor(input, new DifferentialFCM(1024)); for(int i = 0; i < amountOfPoints; i++) { long tStamp = bb.getLong(); double val = bb.getDouble(); Pair pair = d.readPair(); assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); assertEquals(val, pair.getDoubleValue()); } assertNull(d.readPair()); } } ================================================ FILE: src/test/java/fi/iki/yak/ts/compression/gorilla/EncodeTest.java ================================================ package fi.iki.yak.ts.compression.gorilla; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNull; import java.nio.ByteBuffer; import java.time.LocalDateTime; import java.time.Month; import java.time.ZoneOffset; import java.time.temporal.ChronoUnit; import java.util.Arrays; import java.util.concurrent.ThreadLocalRandom; import org.junit.jupiter.api.Test; /** * These are generic tests to test that input matches the output after compression + decompression cycle, using * both the timestamp and value compression. * * @author Michael Burman */ public class EncodeTest { private void comparePairsToCompression(long blockTimestamp, Pair[] pairs) { ByteBufferBitOutput output = new ByteBufferBitOutput(); Compressor c = new Compressor(blockTimestamp, output); Arrays.stream(pairs).forEach(p -> c.addValue(p.getTimestamp(), p.getDoubleValue())); c.close(); ByteBuffer byteBuffer = output.getByteBuffer(); byteBuffer.flip(); ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); Decompressor d = new Decompressor(input); // Replace with stream once decompressor supports it for(int i = 0; i < pairs.length; i++) { Pair pair = d.readPair(); assertEquals(pairs[i].getTimestamp(), pair.getTimestamp(), "Timestamp did not match"); assertEquals(pairs[i].getDoubleValue(), pair.getDoubleValue(), "Value did not match"); } assertNull(d.readPair()); } @Test void simpleEncodeAndDecodeTest() throws Exception { long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); Pair[] pairs = { new Pair(now + 10, Double.doubleToRawLongBits(1.0)), new Pair(now + 20, Double.doubleToRawLongBits(-2.0)), new Pair(now + 28, Double.doubleToRawLongBits(-2.5)), new Pair(now + 84, Double.doubleToRawLongBits(65537)), new Pair(now + 400, Double.doubleToRawLongBits(2147483650.0)), new Pair(now + 2300, Double.doubleToRawLongBits(-16384)), new Pair(now + 16384, Double.doubleToRawLongBits(2.8)), new Pair(now + 16500, Double.doubleToRawLongBits(-38.0)) }; comparePairsToCompression(now, pairs); } @Test public void willItBlend() throws Exception { long blockTimestamp = 1500400800000L; Pair[] pairs = { new Pair(1500405481623L, 69087), new Pair(1500405488693L, 65640), new Pair(1500405495993L, 58155), new Pair(1500405503743L, 61025), new Pair(1500405511623L, 91156), new Pair(1500405519803L, 37516), new Pair(1500405528313L, 93515), new Pair(1500405537233L, 96226), new Pair(1500405546453L, 23833), new Pair(1500405556103L, 73186), new Pair(1500405566143L, 96947), new Pair(1500405576163L, 46927), new Pair(1500405586173L, 77954), new Pair(1500405596183L, 29302), new Pair(1500405606213L, 6700), new Pair(1500405616163L, 71971), new Pair(1500405625813L, 8528), new Pair(1500405635763L, 85321), new Pair(1500405645634L, 83229), new Pair(1500405655633L, 78298), new Pair(1500405665623L, 87122), new Pair(1500405675623L, 82055), new Pair(1500405685723L, 75067), new Pair(1500405695663L, 33680), new Pair(1500405705743L, 17576), new Pair(1500405715813L, 89701), new Pair(1500405725773L, 21427), new Pair(1500405735883L, 58255), new Pair(1500405745903L, 3768), new Pair(1500405755863L, 62086), new Pair(1500405765843L, 66965), new Pair(1500405775773L, 35801), new Pair(1500405785883L, 72169), new Pair(1500405795843L, 43089), new Pair(1500405805733L, 31418), new Pair(1500405815853L, 84781), new Pair(1500405825963L, 36103), new Pair(1500405836004L, 87431), new Pair(1500405845953L, 7379), new Pair(1500405855913L, 66919), new Pair(1500405865963L, 30906), new Pair(1500405875953L, 88630), new Pair(1500405885943L, 27546), new Pair(1500405896033L, 43813), new Pair(1500405906094L, 2124), new Pair(1500405916063L, 49399), new Pair(1500405926143L, 94577), new Pair(1500405936123L, 98459), new Pair(1500405946033L, 49457), new Pair(1500405956023L, 92838), new Pair(1500405966023L, 15628), new Pair(1500405976043L, 53916), new Pair(1500405986063L, 90387), new Pair(1500405996123L, 43176), new Pair(1500406006123L, 18838), new Pair(1500406016174L, 78847), new Pair(1500406026173L, 39591), new Pair(1500406036004L, 77070), new Pair(1500406045964L, 56788), new Pair(1500406056043L, 96706), new Pair(1500406066123L, 20756), new Pair(1500406076113L, 64433), new Pair(1500406086133L, 45791), new Pair(1500406096123L, 75028), new Pair(1500406106193L, 55403), new Pair(1500406116213L, 36991), new Pair(1500406126073L, 92929), new Pair(1500406136103L, 60416), new Pair(1500406146183L, 55485), new Pair(1500406156383L, 53525), new Pair(1500406166313L, 96021), new Pair(1500406176414L, 22705), new Pair(1500406186613L, 89801), new Pair(1500406196543L, 51975), new Pair(1500406206483L, 86741), new Pair(1500406216483L, 22440), new Pair(1500406226433L, 51818), new Pair(1500406236403L, 61965), new Pair(1500406246413L, 19074), new Pair(1500406256494L, 54521), new Pair(1500406266413L, 59315), new Pair(1500406276303L, 19171), new Pair(1500406286213L, 98800), new Pair(1500406296183L, 7086), new Pair(1500406306103L, 60578), new Pair(1500406316073L, 96828), new Pair(1500406326143L, 83746), new Pair(1500406336153L, 85481), new Pair(1500406346113L, 22346), new Pair(1500406356133L, 80976), new Pair(1500406366065L, 43586), new Pair(1500406376074L, 82500), new Pair(1500406386184L, 13576), new Pair(1500406396113L, 77871), new Pair(1500406406094L, 60978), new Pair(1500406416203L, 35264), new Pair(1500406426323L, 79733), new Pair(1500406436343L, 29140), new Pair(1500406446323L, 7237), new Pair(1500406456344L, 52866), new Pair(1500406466393L, 88456), new Pair(1500406476493L, 33533), new Pair(1500406486524L, 96961), new Pair(1500406496453L, 16389), new Pair(1500406506453L, 31181), new Pair(1500406516433L, 63282), new Pair(1500406526433L, 92857), new Pair(1500406536413L, 4582), new Pair(1500406546383L, 46832), new Pair(1500406556473L, 6335), new Pair(1500406566413L, 44367), new Pair(1500406576513L, 84640), new Pair(1500406586523L, 36174), new Pair(1500406596553L, 40075), new Pair(1500406606603L, 80886), new Pair(1500406616623L, 43784), new Pair(1500406626623L, 25077), new Pair(1500406636723L, 18617), new Pair(1500406646723L, 72681), new Pair(1500406656723L, 84811), new Pair(1500406666783L, 90053), new Pair(1500406676685L, 25708), new Pair(1500406686713L, 57134), new Pair(1500406696673L, 87193), new Pair(1500406706743L, 66057), new Pair(1500406716724L, 51404), new Pair(1500406726753L, 90141), new Pair(1500406736813L, 10434), new Pair(1500406746803L, 29056), new Pair(1500406756833L, 48160), new Pair(1500406766924L, 96652), new Pair(1500406777113L, 64141), new Pair(1500406787113L, 22143), new Pair(1500406797093L, 20561), new Pair(1500406807113L, 66401), new Pair(1500406817283L, 76802), new Pair(1500406827284L, 37555), new Pair(1500406837323L, 63169), new Pair(1500406847463L, 45712), new Pair(1500406857513L, 44751), new Pair(1500406867523L, 98891), new Pair(1500406877523L, 38122), new Pair(1500406887623L, 46202), new Pair(1500406897703L, 5875), new Pair(1500406907663L, 17397), new Pair(1500406917603L, 39994), new Pair(1500406927633L, 82385), new Pair(1500406937623L, 15598), new Pair(1500406947693L, 36235), new Pair(1500406957703L, 97536), new Pair(1500406967673L, 28557), new Pair(1500406977723L, 13985), new Pair(1500406987663L, 64304), new Pair(1500406997573L, 83693), new Pair(1500407007494L, 6574), new Pair(1500407017493L, 25134), new Pair(1500407027503L, 50383), new Pair(1500407037523L, 55922), new Pair(1500407047603L, 73436), new Pair(1500407057473L, 68235), new Pair(1500407067553L, 1469), new Pair(1500407077463L, 44315), new Pair(1500407087463L, 95064), new Pair(1500407097443L, 1997), new Pair(1500407107473L, 17247), new Pair(1500407117453L, 42454), new Pair(1500407127413L, 73631), new Pair(1500407137363L, 96890), new Pair(1500407147343L, 43450), new Pair(1500407157363L, 42042), new Pair(1500407167403L, 83014), new Pair(1500407177473L, 32051), new Pair(1500407187523L, 69280), new Pair(1500407197495L, 21425), new Pair(1500407207453L, 93748), new Pair(1500407217413L, 64151), new Pair(1500407227443L, 38791), new Pair(1500407237463L, 5248), new Pair(1500407247523L, 92935), new Pair(1500407257513L, 18516), new Pair(1500407267584L, 98870), new Pair(1500407277573L, 82244), new Pair(1500407287723L, 65464), new Pair(1500407297723L, 33801), new Pair(1500407307673L, 18331), new Pair(1500407317613L, 89744), new Pair(1500407327553L, 98460), new Pair(1500407337503L, 24709), new Pair(1500407347423L, 8407), new Pair(1500407357383L, 69451), new Pair(1500407367333L, 51100), new Pair(1500407377373L, 25309), new Pair(1500407387443L, 16148), new Pair(1500407397453L, 98974), new Pair(1500407407543L, 80284), new Pair(1500407417583L, 170), new Pair(1500407427453L, 34706), new Pair(1500407437433L, 39681), new Pair(1500407447603L, 6140), new Pair(1500407457513L, 64595), new Pair(1500407467564L, 59862), new Pair(1500407477563L, 53795), new Pair(1500407487593L, 83493), new Pair(1500407497584L, 90639), new Pair(1500407507623L, 16777), new Pair(1500407517613L, 11096), new Pair(1500407527673L, 38512), new Pair(1500407537963L, 52759), new Pair(1500407548023L, 79567), new Pair(1500407558033L, 48664), new Pair(1500407568113L, 10710), new Pair(1500407578164L, 25635), new Pair(1500407588213L, 40985), new Pair(1500407598163L, 94089), new Pair(1500407608163L, 50056), new Pair(1500407618223L, 15550), new Pair(1500407628143L, 78823), new Pair(1500407638223L, 9044), new Pair(1500407648173L, 20782), new Pair(1500407658023L, 86390), new Pair(1500407667903L, 79444), new Pair(1500407677903L, 84051), new Pair(1500407687923L, 91554), new Pair(1500407697913L, 58777), new Pair(1500407708003L, 89474), new Pair(1500407718083L, 94026), new Pair(1500407728034L, 41613), new Pair(1500407738083L, 64667), new Pair(1500407748034L, 5160), new Pair(1500407758003L, 45140), new Pair(1500407768033L, 53704), new Pair(1500407778083L, 68097), new Pair(1500407788043L, 81137), new Pair(1500407798023L, 59657), new Pair(1500407808033L, 56572), new Pair(1500407817983L, 1993), new Pair(1500407828063L, 62608), new Pair(1500407838213L, 76489), new Pair(1500407848203L, 22147), new Pair(1500407858253L, 92829), new Pair(1500407868073L, 48499), new Pair(1500407878053L, 89152), new Pair(1500407888073L, 9191), new Pair(1500407898033L, 49881), new Pair(1500407908113L, 96020), new Pair(1500407918213L, 90203), new Pair(1500407928234L, 32217), new Pair(1500407938253L, 94302), new Pair(1500407948293L, 83111), new Pair(1500407958234L, 75576), new Pair(1500407968073L, 5973), new Pair(1500407978023L, 5175), new Pair(1500407987923L, 63350), new Pair(1500407997833L, 44081) }; comparePairsToCompression(blockTimestamp, pairs); } /** * Tests encoding of similar floats, see https://github.com/dgryski/go-tsz/issues/4 for more information. */ @Test void testEncodeSimilarFloats() throws Exception { long now = LocalDateTime.of(2015, Month.MARCH, 02, 00, 00).toInstant(ZoneOffset.UTC).toEpochMilli(); ByteBufferBitOutput output = new ByteBufferBitOutput(); Compressor c = new Compressor(now, output); ByteBuffer bb = ByteBuffer.allocate(5 * 2*Long.BYTES); bb.putLong(now + 1); bb.putDouble(6.00065e+06); bb.putLong(now + 2); bb.putDouble(6.000656e+06); bb.putLong(now + 3); bb.putDouble(6.000657e+06); bb.putLong(now + 4); bb.putDouble(6.000659e+06); bb.putLong(now + 5); bb.putDouble(6.000661e+06); bb.flip(); for(int j = 0; j < 5; j++) { c.addValue(bb.getLong(), bb.getDouble()); } c.close(); bb.flip(); ByteBuffer byteBuffer = output.getByteBuffer(); byteBuffer.flip(); ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); Decompressor d = new Decompressor(input); // Replace with stream once decompressor supports it for(int i = 0; i < 5; i++) { Pair pair = d.readPair(); assertEquals(bb.getLong(), pair.getTimestamp(), "Timestamp did not match"); assertEquals(bb.getDouble(), pair.getDoubleValue(), "Value did not match"); } assertNull(d.readPair()); } /** * Tests writing enough large amount of datapoints that causes the included ByteBufferBitOutput to do * internal byte array expansion. */ @Test void testEncodeLargeAmountOfData() throws Exception { // This test should trigger ByteBuffer reallocation int amountOfPoints = 100000; long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); ByteBufferBitOutput output = new ByteBufferBitOutput(); long now = blockStart + 60; ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); for(int i = 0; i < amountOfPoints; i++) { bb.putLong(now + i*60); bb.putDouble(i * Math.random()); } Compressor c = new Compressor(blockStart, output); bb.flip(); for(int j = 0; j < amountOfPoints; j++) { c.addValue(bb.getLong(), bb.getDouble()); } c.close(); bb.flip(); ByteBuffer byteBuffer = output.getByteBuffer(); byteBuffer.flip(); ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); Decompressor d = new Decompressor(input); for(int i = 0; i < amountOfPoints; i++) { long tStamp = bb.getLong(); double val = bb.getDouble(); Pair pair = d.readPair(); assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); assertEquals(val, pair.getDoubleValue()); } assertNull(d.readPair()); } /** * Although not intended usage, an empty block should not cause errors */ @Test void testEmptyBlock() throws Exception { long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); ByteBufferBitOutput output = new ByteBufferBitOutput(); Compressor c = new Compressor(now, output); c.close(); ByteBuffer byteBuffer = output.getByteBuffer(); byteBuffer.flip(); ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); Decompressor d = new Decompressor(input); assertNull(d.readPair()); } @Test void testLongEncoding() throws Exception { // This test should trigger ByteBuffer reallocation int amountOfPoints = 10000; long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS) .toInstant(ZoneOffset.UTC).toEpochMilli(); ByteBufferBitOutput output = new ByteBufferBitOutput(); long now = blockStart + 60; ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES); for(int i = 0; i < amountOfPoints; i++) { bb.putLong(now + i*60); bb.putLong(ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE)); } Compressor c = new Compressor(blockStart, output); bb.flip(); for(int j = 0; j < amountOfPoints; j++) { c.addValue(bb.getLong(), bb.getLong()); } c.close(); bb.flip(); ByteBuffer byteBuffer = output.getByteBuffer(); byteBuffer.flip(); ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer); Decompressor d = new Decompressor(input); for(int i = 0; i < amountOfPoints; i++) { long tStamp = bb.getLong(); long val = bb.getLong(); Pair pair = d.readPair(); assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i); assertEquals(val, pair.getLongValue()); } assertNull(d.readPair()); } }