Repository: burmanm/gorilla-tsc
Branch: master
Commit: f1a28ee3aed3
Files: 24
Total size: 113.1 KB
Directory structure:
gitextract_m85owujp/
├── .gitignore
├── .travis.yml
├── LICENSE
├── README.adoc
├── pom.xml
└── src/
├── main/
│ └── java/
│ └── fi/
│ └── iki/
│ └── yak/
│ └── ts/
│ └── compression/
│ └── gorilla/
│ ├── BitInput.java
│ ├── BitOutput.java
│ ├── ByteBufferBitInput.java
│ ├── ByteBufferBitOutput.java
│ ├── Compressor.java
│ ├── Decompressor.java
│ ├── GorillaCompressor.java
│ ├── GorillaDecompressor.java
│ ├── LongArrayInput.java
│ ├── LongArrayOutput.java
│ ├── Pair.java
│ ├── Predictor.java
│ ├── ValueCompressor.java
│ ├── ValueDecompressor.java
│ ├── benchmark/
│ │ └── EncodingBenchmark.java
│ └── predictors/
│ ├── DifferentialFCM.java
│ └── LastValuePredictor.java
└── test/
└── java/
└── fi/
└── iki/
└── yak/
└── ts/
└── compression/
└── gorilla/
├── EncodeGorillaTest.java
└── EncodeTest.java
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.class
# Mobile Tools for Java (J2ME)
.mtj.tmp/
# Package Files #
*.jar
*.war
*.ear
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
.idea/
*.iml
target/
================================================
FILE: .travis.yml
================================================
# Enable container-based infrastructure
sudo: false
language: java
install: mvn install -DskipTests -Dgpg.skip
jdk:
- oraclejdk8
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "{}"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright {yyyy} {name of copyright owner}
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.adoc
================================================
= Time series compression library, based on the Facebook's Gorilla paper
:source-language: java
ifdef::env-github[]
[link=https://travis-ci.org/burmanm/gorilla-tsc]
image::https://travis-ci.org/burmanm/gorilla-tsc.svg?branch=master[Build Status,70,18]
[link=https://maven-badges.herokuapp.com/maven-central/fi.iki.yak/compression-gorilla]
image::https://img.shields.io/maven-central/v/fi.iki.yak/compression-gorilla.svg[Maven central]
endif::[]
== Introduction
This is Java based implementation of the compression methods described in the paper link:http://www.vldb.org/pvldb/vol8/p1816-teller.pdf["Gorilla: A Fast, Scalable, In-Memory Time Series Database"]. For explanation on how the compression methods work, read the excellent paper.
In comparison to the original paper, this implementation allows using both integer values (`long`) as well as
floating point values (`double`), both 64 bit in length.
Versions 1.x and 2.x are not compatible with each other due to small differences to the stored array. Versions 2.x
will support reading and storing older format also, see usage for more details.
== Usage
The included tests are a good source for examples.
=== Maven
[source, xml]
----
fi.iki.yakcompression-gorilla
----
You can find latest version from the maven logo link above.
=== Compressing
To compress in the older 1.x format, use class ``Compressor``. For 2.x, use ``GorillaCompressor`` (recommended).
``LongArrayOutput`` is also recommended compared to ``ByteBufferBitOutput`` because of performance. One can supply
alternative predictor to the ``GorillaCompressor`` if required. One such implementation is included,
``DifferentialFCM`` that provides better compression ratio for some data patterns.
[source, java]
----
long now = LocalDateTime.now(ZoneOffset.UTC).truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
LongArrayOutput output = new LongArrayOutput();
GorillaCompressor c = new GorillaCompressor(now, output);
----
Compression class requires a block timestamp and an implementation of `BitOutput` interface.
[source, java]
----
c.addValue(long, double);
----
Adds a new floating-point value to the time series. If you wish to store only long values, use `c.addValue(long,
long)`, however do `not` mix these in the same series.
After the block is ready, remember to call:
[source, java]
----
c.close();
----
which flushes the remaining data to the stream and writes closing information.
=== Decompressing
To decompress from the older 1.x format, use class ``Decompressor``. For 2.x, use ``GorillaDecompressor`` (recommended).
``LongArrayInput`` is also recommended compared to ``ByteBufferBitInput`` because of performance if the 2.x
format was used to compress the time series. If the original compressor used different predictor than
``LastValuePredictor`` it must be defined in the constructor.
[source, java]
----
LongArrayInput input = new LongArrayInput(byteBuffer);
GorillaDecompressor d = new GorillaDecompressor(input);
----
To decompress a stream of bytes, supply `GorillaDecompressor` with a suitable implementation of `BitInput` interface.
The LongArrayInput allows to decompress a long array or existing `ByteBuffer` presentation with 8 byte word
length.
[source, java]
----
Pair pair = d.readPair();
----
Requesting next pair with `readPair()` returns the following series value or a `null` once the series is completely
read. The pair is a simple placeholder object with `getTimestamp()` and `getDoubleValue()` or `getLongValue()`.
== Performance
The following performance in reached in a Linux VM running on VMware Player in Windows 8.1 host. i7 2600K at 4GHz.
The benchmark used is the ``EncodingBenchmark``. These results should not be directly compared to other
implementations unless similar dataset is used.
Results are in millions of datapoints (timestamp + value) pairs per second. The values in this benchmark are
in doubles (performance with longs is slightly higher, around ~2-3M/s).
.Compression
|===
|GorillaCompressor (2.0.0) |Compressor (1.1.0)
|83.5M/s (~1.34GB/s)
|31.2M/s (~499MB/s)
|===
.Decompression
|===
|GorillaDecompressor (2.0.0) |Decompressor (1.1.0)
|77,9M/s (~1.25GB/s)
|51.4M/s (~822MB/s)
|===
Most of the differences in decompression / compression speed between versions come from implementation changes and
not from the small changes to the output format.
== Roadmap
There were few things I wanted to get to 2.0.0, but had to decide against due to lack of time. I will implement these
later with potentially some breaking API changes:
* Support timestamp only compressions (2.2.x)
* Include ByteBufferLongOutput/ByteBufferLongInput in the package (2.2.x)
* Move bit operations to inside the GorillaCompressor/GorillaDecompressor to allow easier usage with
other allocators (2.2.x)
== Internals
=== Differences to the original paper
* Maximum number of leadingZeros is stored with 6 bits to allow up to 63 leading zeros, which are necessary when
storing long values. (>= 2.0.0)
* Timestamp delta-of-delta are stored by first turning them with ZigZag encoding to positive integers and then
reduced by one to fit in the necessary bits. In the decoding phase all the values are incremented by one to fetch the
original value. (>= 2.0.0)
* The compressed blocks are created with a 27 bit delta header (unlike in the original paper, which uses a 14 bit delta
header). This allows to use up to one day block size using millisecond precision. (>= 1.0.0)
=== Data structure
Values must be inserted in the increasing time order, out-of-order insertions are not supported.
The included ByteBufferBitInput and ByteBufferBitOutput classes use a big endian order for the data.
== Contributing
File an issue and/or send a pull request.
=== License
....
Copyright 2016-2018 Michael Burman and/or other contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
....
================================================
FILE: pom.xml
================================================
4.0.0fi.iki.yakcompression-gorilla2.1.2-SNAPSHOTGorilla time series compression in JavaImplements the time series compression methods as described in the Facebook's Gorilla
paperhttps://github.com/burmanm/gorilla-tscApache License, Version 2.0http://www.apache.org/licenses/LICENSE-2.0.txtrepo1.85.0.0-M41.0.0-M41.18benchmark2.5.3https://github.com/burmanm/gorilla-tscscm:git:git://github.com/burmam/gorilla-tsc.gitscm:git:git@github.com:burmanm/gorilla-tsc.gitHEADyak@iki.fiMichael Burmanhttps://github.com/burmanmburmanmossrhhttps://oss.sonatype.org/service/local/staging/deploy/maven2/org.junit.jupiterjunit-jupiter-engine${junit.jupiter.version}testorg.openjdk.jmhjmh-core${jmh.version}org.openjdk.jmhjmh-generator-annprocess${jmh.version}providedmaven-compiler-plugin3.1${java.version}${java.version}maven-surefire-plugin2.19org.junit.platformjunit-platform-surefire-provider${junit.platform.version}org.apache.maven.pluginsmaven-shade-plugin2.4.3packageshade${jar.name}org.openjdk.jmh.Mainorg.apache.maven.pluginsmaven-release-plugin${maven.release.plugin.version}truefalsereleasedeployorg.sonatype.pluginsnexus-staging-maven-plugin1.6.7trueossrhhttps://oss.sonatype.org/falseorg.apache.maven.pluginsmaven-javadoc-pluginjarorg.apache.maven.pluginsmaven-source-pluginjarorg.apache.maven.pluginsmaven-gpg-plugin1.5sign-artifactsverifysign
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/BitInput.java
================================================
package fi.iki.yak.ts.compression.gorilla;
/**
* This interface is used for reading a compressed time series.
*
* @author Michael Burman
*/
public interface BitInput {
/**
* Reads the next bit and returns true if bit is set and false if not.
*
* @return true == 1, false == 0
*/
boolean readBit();
/**
* Returns a long that was stored in the next X bits in the stream.
*
* @param bits Amount of least significant bits to read from the stream.
* @return reads the next long in the series using bits meaningful bits
*/
long getLong(int bits);
/**
* Read until next unset bit is found, or until maxBits has been reached.
*
* @param maxBits How many bits at maximum until returning
* @return Integer value of the read bits
*/
int nextClearBit(int maxBits);
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/BitOutput.java
================================================
package fi.iki.yak.ts.compression.gorilla;
/**
* This interface is used to write a compressed timeseries.
*
* @author Michael Burman
*/
public interface BitOutput {
/**
* Stores a single bit and increases the bitcount by 1
*/
void writeBit();
/**
* Stores a 0 and increases the bitcount by 1
*/
void skipBit();
/**
* Write the given long value using the defined amount of least significant bits.
*
* @param value The long value to be written
* @param bits How many bits are stored to the stream
*/
void writeBits(long value, int bits);
/**
* Flushes the current byte to the underlying stream
*/
void flush();
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/ByteBufferBitInput.java
================================================
package fi.iki.yak.ts.compression.gorilla;
import java.nio.ByteBuffer;
/**
* An implementation of BitInput that parses the data from byte array or existing ByteBuffer.
*
* @author Michael Burman
*/
public class ByteBufferBitInput implements BitInput {
private ByteBuffer bb;
private byte b;
private int bitsLeft = 0;
/**
* Uses an existing ByteBuffer to read the stream. Starts at the ByteBuffer's current position.
*
* @param buf Use existing ByteBuffer
*/
public ByteBufferBitInput(ByteBuffer buf) {
bb = buf;
flipByte();
}
public ByteBufferBitInput(byte[] input) {
this(ByteBuffer.wrap(input));
}
/**
* Reads the next bit and returns a boolean representing it.
*
* @return true if the next bit is 1, otherwise 0.
*/
public boolean readBit() {
boolean bit = ((b >> (bitsLeft - 1)) & 1) == 1;
bitsLeft--;
flipByte();
return bit;
}
/**
* Reads a long from the next X bits that represent the least significant bits in the long value.
*
* @param bits How many next bits are read from the stream
* @return long value that was read from the stream
*/
public long getLong(int bits) {
long value = 0;
while(bits > 0) {
if(bits > bitsLeft || bits == Byte.SIZE) {
// Take only the bitsLeft "least significant" bits
byte d = (byte) (b & ((1<>> (bitsLeft - bits)) & ((1< 0) {
int shift = bits - bitsLeft;
if(shift >= 0) {
b |= (byte) ((value >> shift) & ((1 << bitsLeft) - 1));
bits -= bitsLeft;
bitsLeft = 0;
} else {
shift = bitsLeft - bits;
b |= (byte) (value << shift);
bitsLeft -= bits;
bits = 0;
}
flipByte();
}
}
/**
* Causes the currently handled byte to be written to the stream
*/
@Override
public void flush() {
bitsLeft = 0;
flipByte(); // Causes write to the ByteBuffer
}
/**
* Returns the underlying DirectByteBuffer
*
* @return ByteBuffer of type DirectByteBuffer
*/
public ByteBuffer getByteBuffer() {
return this.bb;
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/Compressor.java
================================================
package fi.iki.yak.ts.compression.gorilla;
/**
* Implements the time series compression as described in the Facebook's Gorilla Paper. Value compression
* is for floating points only.
*
* @author Michael Burman
*/
public class Compressor {
private int storedLeadingZeros = Integer.MAX_VALUE;
private int storedTrailingZeros = 0;
private long storedVal = 0;
private long storedTimestamp = 0;
private long storedDelta = 0;
private long blockTimestamp = 0;
public final static short FIRST_DELTA_BITS = 27;
private BitOutput out;
// We should have access to the series?
public Compressor(long timestamp, BitOutput output) {
blockTimestamp = timestamp;
out = output;
addHeader(timestamp);
}
private void addHeader(long timestamp) {
// One byte: length of the first delta
// One byte: precision of timestamps
out.writeBits(timestamp, 64);
}
/**
* Adds a new long value to the series. Note, values must be inserted in order.
*
* @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision)
* @param value next floating point value in the series
*/
public void addValue(long timestamp, long value) {
if(storedTimestamp == 0) {
writeFirst(timestamp, value);
} else {
compressTimestamp(timestamp);
compressValue(value);
}
}
/**
* Adds a new double value to the series. Note, values must be inserted in order.
*
* @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision)
* @param value next floating point value in the series
*/
public void addValue(long timestamp, double value) {
if(storedTimestamp == 0) {
writeFirst(timestamp, Double.doubleToRawLongBits(value));
} else {
compressTimestamp(timestamp);
compressValue(Double.doubleToRawLongBits(value));
}
}
private void writeFirst(long timestamp, long value) {
storedDelta = timestamp - blockTimestamp;
storedTimestamp = timestamp;
storedVal = value;
out.writeBits(storedDelta, FIRST_DELTA_BITS);
out.writeBits(storedVal, 64);
}
/**
* Closes the block and writes the remaining stuff to the BitOutput.
*/
public void close() {
// These are selected to test interoperability and correctness of the solution, this can be read with go-tsz
out.writeBits(0x0F, 4);
out.writeBits(0xFFFFFFFF, 32);
out.skipBit();
out.flush();
}
/**
* Difference to the original Facebook paper, we store the first delta as 27 bits to allow
* millisecond accuracy for a one day block.
*
* Also, the timestamp delta-delta is not good for millisecond compressions..
*
* @param timestamp epoch
*/
private void compressTimestamp(long timestamp) {
// a) Calculate the delta of delta
long newDelta = (timestamp - storedTimestamp);
long deltaD = newDelta - storedDelta;
// If delta is zero, write single 0 bit
if(deltaD == 0) {
out.skipBit();
} else if(deltaD >= -63 && deltaD <= 64) {
out.writeBits(0x02, 2); // store '10'
out.writeBits(deltaD, 7); // Using 7 bits, store the value..
} else if(deltaD >= -255 && deltaD <= 256) {
out.writeBits(0x06, 3); // store '110'
out.writeBits(deltaD, 9); // Use 9 bits
} else if(deltaD >= -2047 && deltaD <= 2048) {
out.writeBits(0x0E, 4); // store '1110'
out.writeBits(deltaD, 12); // Use 12 bits
} else {
out.writeBits(0x0F, 4); // Store '1111'
out.writeBits(deltaD, 32); // Store delta using 32 bits
}
storedDelta = newDelta;
storedTimestamp = timestamp;
}
private void compressValue(long value) {
// TODO Fix already compiled into a big method
long xor = storedVal ^ value;
if(xor == 0) {
// Write 0
out.skipBit();
} else {
int leadingZeros = Long.numberOfLeadingZeros(xor);
int trailingZeros = Long.numberOfTrailingZeros(xor);
// Check overflow of leading? Can't be 32!
if(leadingZeros >= 32) {
leadingZeros = 31;
}
// Store bit '1'
out.writeBit();
if(leadingZeros >= storedLeadingZeros && trailingZeros >= storedTrailingZeros) {
writeExistingLeading(xor);
} else {
writeNewLeading(xor, leadingZeros, trailingZeros);
}
}
storedVal = value;
}
/**
* If there at least as many leading zeros and as many trailing zeros as previous value, control bit = 0 (type a)
* store the meaningful XORed value
*
* @param xor XOR between previous value and current
*/
private void writeExistingLeading(long xor) {
out.skipBit();
int significantBits = 64 - storedLeadingZeros - storedTrailingZeros;
out.writeBits(xor >>> storedTrailingZeros, significantBits);
}
/**
* store the length of the number of leading zeros in the next 5 bits
* store length of the meaningful XORed value in the next 6 bits,
* store the meaningful bits of the XORed value
* (type b)
*
* @param xor XOR between previous value and current
* @param leadingZeros New leading zeros
* @param trailingZeros New trailing zeros
*/
private void writeNewLeading(long xor, int leadingZeros, int trailingZeros) {
out.writeBit();
out.writeBits(leadingZeros, 5); // Number of leading zeros in the next 5 bits
int significantBits = 64 - leadingZeros - trailingZeros;
out.writeBits(significantBits, 6); // Length of meaningful bits in the next 6 bits
out.writeBits(xor >>> trailingZeros, significantBits); // Store the meaningful bits of XOR
storedLeadingZeros = leadingZeros;
storedTrailingZeros = trailingZeros;
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/Decompressor.java
================================================
package fi.iki.yak.ts.compression.gorilla;
/**
* Decompresses a compressed stream created by the Compressor. Returns pairs of timestamp and floating point value.
*
* @author Michael Burman
*/
public class Decompressor {
private int storedLeadingZeros = Integer.MAX_VALUE;
private int storedTrailingZeros = 0;
private long storedVal = 0;
private long storedTimestamp = 0;
private long storedDelta = 0;
private long blockTimestamp = 0;
private boolean endOfStream = false;
private BitInput in;
public Decompressor(BitInput input) {
in = input;
readHeader();
}
private void readHeader() {
blockTimestamp = in.getLong(64);
}
/**
* Returns the next pair in the time series, if available.
*
* @return Pair if there's next value, null if series is done.
*/
public Pair readPair() {
next();
if(endOfStream) {
return null;
}
return new Pair(storedTimestamp, storedVal);
}
private void next() {
if (storedTimestamp == 0) {
// First item to read
storedDelta = in.getLong(Compressor.FIRST_DELTA_BITS);
if(storedDelta == (1<<27) - 1) {
endOfStream = true;
return;
}
storedVal = in.getLong(64);
storedTimestamp = blockTimestamp + storedDelta;
} else {
nextTimestamp();
}
}
private int bitsToRead() {
int val = in.nextClearBit(4);
int toRead = 0;
switch(val) {
case 0x00:
break;
case 0x02:
toRead = 7; // '10'
break;
case 0x06:
toRead = 9; // '110'
break;
case 0x0e:
toRead = 12;
break;
case 0x0F:
toRead = 32;
break;
}
return toRead;
}
private void nextTimestamp() {
// Next, read timestamp
long deltaDelta = 0;
int toRead = bitsToRead();
if (toRead > 0) {
deltaDelta = in.getLong(toRead);
if(toRead == 32) {
if ((int) deltaDelta == 0xFFFFFFFF) {
// End of stream
endOfStream = true;
return;
}
} else {
// Turn "unsigned" long value back to signed one
if(deltaDelta > (1 << (toRead - 1))) {
deltaDelta -= (1 << toRead);
}
}
deltaDelta = (int) deltaDelta;
}
storedDelta = storedDelta + deltaDelta;
storedTimestamp = storedDelta + storedTimestamp;
nextValue();
}
private void nextValue() {
// Read value
if (in.readBit()) {
// else -> same value as before
if (in.readBit()) {
// New leading and trailing zeros
storedLeadingZeros = (int) in.getLong(5);
byte significantBits = (byte) in.getLong(6);
if(significantBits == 0) {
significantBits = 64;
}
storedTrailingZeros = 64 - significantBits - storedLeadingZeros;
}
long value = in.getLong(64 - storedLeadingZeros - storedTrailingZeros);
value <<= storedTrailingZeros;
value = storedVal ^ value;
storedVal = value;
}
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/GorillaCompressor.java
================================================
package fi.iki.yak.ts.compression.gorilla;
import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor;
/**
* Implements a slightly modified version of the time series compression as described in the Facebook's Gorilla
* Paper.
*
* @author Michael Burman
*/
public class GorillaCompressor {
private long storedTimestamp = 0;
private int storedDelta = 0;
private long blockTimestamp = 0;
public final static int FIRST_DELTA_BITS = 27;
private static int DELTAD_7_MASK = 0x02 << 7;
private static int DELTAD_9_MASK = 0x06 << 9;
private static int DELTAD_12_MASK = 0x0E << 12;
private BitOutput out;
private ValueCompressor valueCompressor;
public GorillaCompressor(long timestamp, BitOutput output) {
this(timestamp, output, new LastValuePredictor());
}
public GorillaCompressor(long timestamp, BitOutput output, Predictor predictor) {
blockTimestamp = timestamp;
out = output;
addHeader(timestamp);
this.valueCompressor = new ValueCompressor(output, predictor);
}
private void addHeader(long timestamp) {
out.writeBits(timestamp, 64);
}
/**
* Adds a new long value to the series. Note, values must be inserted in order.
*
* @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision)
* @param value next floating point value in the series
*/
public void addValue(long timestamp, long value) {
if(storedTimestamp == 0) {
writeFirst(timestamp, value);
} else {
compressTimestamp(timestamp);
valueCompressor.compressValue(value);
}
}
/**
* Adds a new double value to the series. Note, values must be inserted in order.
*
* @param timestamp Timestamp which is inside the allowed time block (default 24 hours with millisecond precision)
* @param value next floating point value in the series
*/
public void addValue(long timestamp, double value) {
if(storedTimestamp == 0) {
writeFirst(timestamp, Double.doubleToRawLongBits(value));
return;
}
compressTimestamp(timestamp);
valueCompressor.compressValue(Double.doubleToRawLongBits(value));
}
private void writeFirst(long timestamp, long value) {
storedDelta = (int) (timestamp - blockTimestamp);
storedTimestamp = timestamp;
out.writeBits(storedDelta, FIRST_DELTA_BITS);
valueCompressor.writeFirst(value);
}
/**
* Closes the block and writes the remaining stuff to the BitOutput.
*/
public void close() {
out.writeBits(0x0F, 4);
out.writeBits(0xFFFFFFFF, 32);
out.skipBit();
out.flush();
}
/**
* Difference to the original Facebook paper, we store the first delta as 27 bits to allow
* millisecond accuracy for a one day block.
*
* Also, the timestamp delta-delta is not good for millisecond compressions..
*
* @param timestamp epoch
*/
private void compressTimestamp(long timestamp) {
// a) Calculate the delta of delta
int newDelta = (int) (timestamp - storedTimestamp);
int deltaD = newDelta - storedDelta;
if(deltaD == 0) {
out.skipBit();
} else {
deltaD = encodeZigZag32(deltaD);
deltaD--; // Increase by one in the decompressing phase as we have one free bit
int bitsRequired = 32 - Integer.numberOfLeadingZeros(deltaD); // Faster than highestSetBit
// Turns to inlineable tableswitch
switch(bitsRequired) {
case 1:
case 2:
case 3:
case 4:
case 5:
case 6:
case 7:
deltaD |= DELTAD_7_MASK;
out.writeBits(deltaD, 9);
break;
case 8:
case 9:
deltaD |= DELTAD_9_MASK;
out.writeBits(deltaD, 12);
break;
case 10:
case 11:
case 12:
out.writeBits(deltaD | DELTAD_12_MASK, 16);
break;
default:
out.writeBits(0x0F, 4); // Store '1111'
out.writeBits(deltaD, 32); // Store delta using 32 bits
break;
}
storedDelta = newDelta;
}
storedTimestamp = timestamp;
}
// START: From protobuf
/**
* Encode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers
* into values that can be efficiently encoded with varint. (Otherwise,
* negative values must be sign-extended to 64 bits to be varint encoded,
* thus always taking 10 bytes on the wire.)
*
* @param n A signed 32-bit integer.
* @return An unsigned 32-bit integer, stored in a signed int because
* Java has no explicit unsigned support.
*/
public static int encodeZigZag32(final int n) {
// Note: the right-shift must be arithmetic
return (n << 1) ^ (n >> 31);
}
// END: From protobuf
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/GorillaDecompressor.java
================================================
package fi.iki.yak.ts.compression.gorilla;
import java.util.stream.Stream;
import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor;
/**
* Decompresses a compressed stream created by the GorillaCompressor.
*
* @author Michael Burman
*/
public class GorillaDecompressor {
private long storedTimestamp = 0;
private long storedDelta = 0;
private long blockTimestamp = 0;
private long storedVal = 0;
private boolean endOfStream = false;
private final BitInput in;
private final ValueDecompressor decompressor;
public GorillaDecompressor(BitInput input) {
this(input, new LastValuePredictor());
}
public GorillaDecompressor(BitInput input, Predictor predictor) {
in = input;
readHeader();
this.decompressor = new ValueDecompressor(input, predictor);
}
private void readHeader() {
blockTimestamp = in.getLong(64);
}
/**
* Returns the next pair in the time series, if available.
*
* @return Pair if there's next value, null if series is done.
*/
public Pair readPair() {
next();
if(endOfStream) {
return null;
}
Pair pair = new Pair(storedTimestamp, storedVal);
return pair;
}
private void next() {
// TODO I could implement a non-streaming solution also.. is there ever a need for streaming solution?
if(storedTimestamp == 0) {
first();
return;
}
nextTimestamp();
}
private void first() {
// First item to read
storedDelta = in.getLong(Compressor.FIRST_DELTA_BITS);
if(storedDelta == (1<<27) - 1) {
endOfStream = true;
return;
}
storedVal = decompressor.readFirst();
// storedVal = in.getLong(64);
storedTimestamp = blockTimestamp + storedDelta;
}
private void nextTimestamp() {
// Next, read timestamp
int readInstruction = in.nextClearBit(4);
long deltaDelta;
switch(readInstruction) {
case 0x00:
storedTimestamp = storedDelta + storedTimestamp;
storedVal = decompressor.nextValue();
return;
case 0x02:
deltaDelta = in.getLong(7);
break;
case 0x06:
deltaDelta = in.getLong(9);
break;
case 0x0e:
deltaDelta = in.getLong(12);
break;
case 0x0F:
deltaDelta = in.getLong(32);
// For storage save.. if this is the last available word, check if remaining bits are all 1
if ((int) deltaDelta == 0xFFFFFFFF) {
// End of stream
endOfStream = true;
return;
}
break;
default:
return;
}
deltaDelta++;
deltaDelta = decodeZigZag32((int) deltaDelta);
storedDelta = storedDelta + deltaDelta;
storedTimestamp = storedDelta + storedTimestamp;
storedVal = decompressor.nextValue();
}
// START: From protobuf
/**
* Decode a ZigZag-encoded 32-bit value. ZigZag encodes signed integers into values that can be
* efficiently encoded with varint. (Otherwise, negative values must be sign-extended to 64 bits
* to be varint encoded, thus always taking 10 bytes on the wire.)
*
* @param n An unsigned 32-bit integer, stored in a signed int because Java has no explicit
* unsigned support.
* @return A signed 32-bit integer.
*/
public static int decodeZigZag32(final int n) {
return (n >>> 1) ^ -(n & 1);
}
// END: From protobuf
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/LongArrayInput.java
================================================
/*
* Copyright 2016 Red Hat, Inc. and/or its affiliates
* and other contributors as indicated by the @author tags.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package fi.iki.yak.ts.compression.gorilla;
/**
* Implements on-heap long array input stream
*
* @author Michael Burman
*/
public class LongArrayInput implements BitInput {
private final long[] longArray; // TODO Investigate also the ByteBuffer performance here.. or Unsafe
private long lB;
private int position = 0;
private int bitsLeft = 0;
public LongArrayInput(long[] array) {
this.longArray = array;
flipByte();
}
@Override
public boolean readBit() {
boolean bit = (lB & LongArrayOutput.BIT_SET_MASK[bitsLeft - 1]) != 0;
bitsLeft--;
checkAndFlipByte();
return bit;
}
private void flipByte() {
lB = longArray[position++];
bitsLeft = Long.SIZE;
}
private void checkAndFlipByte() {
if(bitsLeft == 0) {
flipByte();
}
}
@Override
public long getLong(int bits) {
long value;
if(bits <= bitsLeft) {
// We can read from this word only
// Shift to correct position and take only n least significant bits
value = (lB >>> (bitsLeft - bits)) & LongArrayOutput.MASK_ARRAY[bits - 1];
bitsLeft -= bits; // We ate n bits from it
checkAndFlipByte();
} else {
// This word and next one, no more (max bits is 64)
value = lB & LongArrayOutput.MASK_ARRAY[bitsLeft - 1]; // Read what's left first
bits -= bitsLeft;
flipByte(); // We need the next one
value <<= bits; // Give n bits of space to value
value |= (lB >>> (bitsLeft - bits));
bitsLeft -= bits;
}
return value;
}
@Override
public int nextClearBit(int maxBits) {
int val = 0x00;
for(int i = 0; i < maxBits; i++) {
val <<= 1;
// TODO This loop has too many branches and unnecessary boolean casts
boolean bit = readBit();
if(bit) {
val |= 0x01;
} else {
break;
}
}
return val;
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/LongArrayOutput.java
================================================
package fi.iki.yak.ts.compression.gorilla;
import java.util.Arrays;
/**
* An implementation of BitOutput interface that uses on-heap long array.
*
* @author Michael Burman
*/
public class LongArrayOutput implements BitOutput {
public static final int DEFAULT_ALLOCATION = 256;
private long[] longArray;
private int position = 0;
protected long lB;
protected int bitsLeft = Long.SIZE;
public final static long[] MASK_ARRAY;
public final static long[] BIT_SET_MASK;
// Java does not allow creating 64 bit masks with (1L << 64) - 1; (end result is 0)
static {
MASK_ARRAY = new long[64];
long mask = 1;
long value = 0;
for (int i = 0; i < MASK_ARRAY.length; i++) {
value = value | mask;
mask = mask << 1;
MASK_ARRAY[i] = value;
}
BIT_SET_MASK = new long[64];
for(int i = 0; i < BIT_SET_MASK.length; i++) {
BIT_SET_MASK[i] = (1L << i);
}
}
/**
* Creates a new ByteBufferBitOutput with a default allocated size of 4096 bytes.
*/
public LongArrayOutput() {
this(DEFAULT_ALLOCATION);
}
/**
* Give an initialSize different than DEFAULT_ALLOCATIONS. Recommended to use values which are dividable by 4096.
*
* @param initialSize New initialsize to use
*/
public LongArrayOutput(int initialSize) {
longArray = new long[initialSize];
lB = longArray[position];
}
protected void expandAllocation() {
long[] largerArray = new long[longArray.length*2];
System.arraycopy(longArray, 0, largerArray, 0, longArray.length);
longArray = largerArray;
}
private void checkAndFlipByte() {
// Wish I could avoid this check in most cases...
if(bitsLeft == 0) {
flipWord();
}
}
protected int capacityLeft() {
return longArray.length - position;
}
protected void flipWord() {
if(capacityLeft() <= 2) { // We want to have always at least 2 longs available
expandAllocation();
}
flipWordWithoutExpandCheck();
}
protected void flipWordWithoutExpandCheck() {
longArray[position] = lB;
++position;
resetInternalWord();
}
private void resetInternalWord() {
lB = 0;
bitsLeft = Long.SIZE;
}
/**
* Sets the next bit (or not) and moves the bit pointer.
*/
public void writeBit() {
lB |= BIT_SET_MASK[bitsLeft - 1];
bitsLeft--;
checkAndFlipByte();
}
public void skipBit() {
bitsLeft--;
checkAndFlipByte();
}
/**
* Writes the given long to the stream using bits amount of meaningful bits. This command does not
* check input values, so if they're larger than what can fit the bits (you should check this before writing),
* expect some weird results.
*
* @param value Value to be written to the stream
* @param bits How many bits are stored to the stream
*/
public void writeBits(long value, int bits) {
if(bits <= bitsLeft) {
int lastBitPosition = bitsLeft - bits;
lB |= (value << lastBitPosition) & MASK_ARRAY[bitsLeft - 1];
bitsLeft -= bits;
checkAndFlipByte(); // We could be at 0 bits left because of the <= condition .. would it be faster with
// the other one?
} else {
value &= MASK_ARRAY[bits - 1];
int firstBitPosition = bits - bitsLeft;
lB |= value >>> firstBitPosition;
bits -= bitsLeft;
flipWord();
lB |= value << (64 - bits);
bitsLeft -= bits;
}
}
/**
* Causes the currently handled word to be written to the stream
*/
@Override
public void flush() {
flipWord();
}
public long[] getLongArray() {
long[] copy = Arrays.copyOf(longArray, position + 1);
copy[copy.length - 1] = lB;
return copy;
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/Pair.java
================================================
package fi.iki.yak.ts.compression.gorilla;
/**
* Pair is an extracted timestamp,value pair from the stream
*
* @author Michael Burman
*/
public class Pair {
private long timestamp;
private long value;
public Pair(long timestamp, long value) {
this.timestamp = timestamp;
this.value = value;
}
public long getTimestamp() {
return timestamp;
}
public double getDoubleValue() {
return Double.longBitsToDouble(value);
}
public long getLongValue() {
return value;
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/Predictor.java
================================================
/*
* Copyright 2017 Red Hat, Inc. and/or its affiliates
* and other contributors as indicated by the @author tags.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package fi.iki.yak.ts.compression.gorilla;
/**
* @author miburman
*/
public interface Predictor {
/**
* Give the real value
*
* @param value Long / bits of Double
*/
void update(long value);
/**
* Predicts the next value
*
* @return Predicted value
*/
long predict();
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/ValueCompressor.java
================================================
package fi.iki.yak.ts.compression.gorilla;
import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor;
/**
* ValueCompressor for the Gorilla encoding format. Supply with long presentation of the value,
* in case of doubles use Double.doubleToRawLongBits(value)
*
* @author Michael Burman
*/
public class ValueCompressor {
private int storedLeadingZeros = Integer.MAX_VALUE;
private int storedTrailingZeros = 0;
private Predictor predictor;
private BitOutput out;
public ValueCompressor(BitOutput out) {
this(out, new LastValuePredictor());
}
public ValueCompressor(BitOutput out, Predictor predictor) {
this.out = out;
this.predictor = predictor;
}
void writeFirst(long value) {
predictor.update(value);
out.writeBits(value, 64);
}
protected void compressValue(long value) {
// In original Gorilla, Last-Value predictor is used
long diff = predictor.predict() ^ value;
predictor.update(value);
if(diff == 0) {
// Write 0
out.skipBit();
} else {
int leadingZeros = Long.numberOfLeadingZeros(diff);
int trailingZeros = Long.numberOfTrailingZeros(diff);
out.writeBit(); // Optimize to writeNewLeading / writeExistingLeading?
if(leadingZeros >= storedLeadingZeros && trailingZeros >= storedTrailingZeros) {
writeExistingLeading(diff);
} else {
writeNewLeading(diff, leadingZeros, trailingZeros);
}
}
}
/**
* If there at least as many leading zeros and as many trailing zeros as previous value, control bit = 0 (type a)
* store the meaningful XORed value
*
* @param xor XOR between previous value and current
*/
private void writeExistingLeading(long xor) {
out.skipBit();
int significantBits = 64 - storedLeadingZeros - storedTrailingZeros;
xor >>>= storedTrailingZeros;
out.writeBits(xor, significantBits);
}
/**
* store the length of the number of leading zeros in the next 5 bits
* store length of the meaningful XORed value in the next 6 bits,
* store the meaningful bits of the XORed value
* (type b)
*
* @param xor XOR between previous value and current
* @param leadingZeros New leading zeros
* @param trailingZeros New trailing zeros
*/
private void writeNewLeading(long xor, int leadingZeros, int trailingZeros) {
out.writeBit();
// Different from version 1.x, use (significantBits - 1) in storage - avoids a branch
int significantBits = 64 - leadingZeros - trailingZeros;
// Different from original, bits 5 -> 6, avoids a branch, allows storing small longs
out.writeBits(leadingZeros, 6); // Number of leading zeros in the next 6 bits
out.writeBits(significantBits - 1, 6); // Length of meaningful bits in the next 6 bits
out.writeBits(xor >>> trailingZeros, significantBits); // Store the meaningful bits of XOR
storedLeadingZeros = leadingZeros;
storedTrailingZeros = trailingZeros;
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/ValueDecompressor.java
================================================
package fi.iki.yak.ts.compression.gorilla;
import fi.iki.yak.ts.compression.gorilla.predictors.LastValuePredictor;
/**
* Value decompressor for Gorilla encoded values
*
* @author Michael Burman
*/
public class ValueDecompressor {
private final BitInput in;
private final Predictor predictor;
private int storedLeadingZeros = Integer.MAX_VALUE;
private int storedTrailingZeros = 0;
public ValueDecompressor(BitInput input) {
this(input, new LastValuePredictor());
}
public ValueDecompressor(BitInput input, Predictor predictor) {
this.in = input;
this.predictor = predictor;
}
public long readFirst() {
long value = in.getLong(Long.SIZE);
predictor.update(value);
return value;
}
public long nextValue() {
int val = in.nextClearBit(2);
switch(val) {
case 3:
// New leading and trailing zeros
storedLeadingZeros = (int) in.getLong(6);
byte significantBits = (byte) in.getLong(6);
significantBits++;
storedTrailingZeros = Long.SIZE - significantBits - storedLeadingZeros;
// missing break is intentional, we want to overflow to next one
case 2:
long value = in.getLong(Long.SIZE - storedLeadingZeros - storedTrailingZeros);
value <<= storedTrailingZeros;
value = predictor.predict() ^ value;
predictor.update(value);
return value;
}
return predictor.predict();
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/benchmark/EncodingBenchmark.java
================================================
package fi.iki.yak.ts.compression.gorilla.benchmark;
import fi.iki.yak.ts.compression.gorilla.*;
import org.openjdk.jmh.annotations.*;
import org.openjdk.jmh.infra.Blackhole;
import java.nio.ByteBuffer;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
/**
* @author Michael Burman
*/
@BenchmarkMode(Mode.Throughput)
@State(Scope.Benchmark)
@Fork(1)
@Warmup(iterations = 5)
@Measurement(iterations = 10) // Reduce the amount of iterations if you start to see GC interference
public class EncodingBenchmark {
@State(Scope.Benchmark)
public static class DataGenerator {
public List insertList;
@Param({"100000"})
public int amountOfPoints;
public long blockStart;
public long[] uncompressedTimestamps;
public long[] uncompressedValues;
public double[] uncompressedDoubles;
public long[] compressedArray;
public ByteBuffer uncompressedBuffer;
public ByteBuffer compressedBuffer;
public List pairs;
@Setup(Level.Trial)
public void setup() {
blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
long now = blockStart + 60;
uncompressedTimestamps = new long[amountOfPoints];
uncompressedDoubles = new double[amountOfPoints];
uncompressedValues = new long[amountOfPoints];
insertList = new ArrayList<>(amountOfPoints);
ByteBuffer bb = ByteBuffer.allocate(amountOfPoints * 2*Long.BYTES);
pairs = new ArrayList<>(amountOfPoints);
for(int i = 0; i < amountOfPoints; i++) {
now += 60;
bb.putLong(now);
bb.putDouble(i);
uncompressedTimestamps[i] = now;
uncompressedDoubles[i] = i;
uncompressedValues[i] = i;
pairs.add(new Pair(now, i));
// bb.putLong(i);
}
if (bb.hasArray()) {
uncompressedBuffer = bb.duplicate();
uncompressedBuffer.flip();
}
ByteBufferBitOutput output = new ByteBufferBitOutput();
LongArrayOutput arrayOutput = new LongArrayOutput(amountOfPoints);
Compressor c = new Compressor(blockStart, output);
GorillaCompressor gc = new GorillaCompressor(blockStart, arrayOutput);
bb.flip();
for(int j = 0; j < amountOfPoints; j++) {
// c.addValue(bb.getLong(), bb.getLong());
c.addValue(bb.getLong(), bb.getDouble());
gc.addValue(uncompressedTimestamps[j], uncompressedDoubles[j]);
}
gc.close();
c.close();
ByteBuffer byteBuffer = output.getByteBuffer();
byteBuffer.flip();
compressedBuffer = byteBuffer;
compressedArray = arrayOutput.getLongArray();
}
}
// @Benchmark
@OperationsPerInvocation(100000)
public void encodingBenchmark(DataGenerator dg) {
ByteBufferBitOutput output = new ByteBufferBitOutput();
Compressor c = new Compressor(dg.blockStart, output);
for(int j = 0; j < dg.amountOfPoints; j++) {
c.addValue(dg.uncompressedBuffer.getLong(), dg.uncompressedBuffer.getDouble());
}
c.close();
dg.uncompressedBuffer.rewind();
}
@Benchmark
@OperationsPerInvocation(100000)
public void decodingBenchmark(DataGenerator dg, Blackhole bh) throws Exception {
ByteBuffer duplicate = dg.compressedBuffer.duplicate();
ByteBufferBitInput input = new ByteBufferBitInput(duplicate);
Decompressor d = new Decompressor(input);
Pair pair;
while((pair = d.readPair()) != null) {
bh.consume(pair);
}
}
@Benchmark
@OperationsPerInvocation(100000)
public void encodingGorillaBenchmark(DataGenerator dg) {
LongArrayOutput output = new LongArrayOutput();
GorillaCompressor c = new GorillaCompressor(dg.blockStart, output);
for(int j = 0; j < dg.amountOfPoints; j++) {
c.addValue(dg.uncompressedTimestamps[j], dg.uncompressedDoubles[j]);
}
c.close();
}
@Benchmark
@OperationsPerInvocation(100000)
public void encodingGorillaBenchmarkLong(DataGenerator dg) {
LongArrayOutput output = new LongArrayOutput();
GorillaCompressor c = new GorillaCompressor(dg.blockStart, output);
for(int j = 0; j < dg.amountOfPoints; j++) {
c.addValue(dg.uncompressedTimestamps[j], dg.uncompressedValues[j]);
}
c.close();
}
// @Benchmark
// @OperationsPerInvocation(100000)
// public void encodingGorillaStreamBenchmark(DataGenerator dg) {
// LongArrayOutput output = new LongArrayOutput();
// GorillaCompressor c = new GorillaCompressor(dg.blockStart, output);
//
// c.compressLongStream(dg.pairs.stream());
// c.close();
// }
@Benchmark
@OperationsPerInvocation(100000)
public void decodingGorillaBenchmark(DataGenerator dg, Blackhole bh) throws Exception {
LongArrayInput input = new LongArrayInput(dg.compressedArray);
GorillaDecompressor d = new GorillaDecompressor(input);
Pair pair;
while((pair = d.readPair()) != null) {
bh.consume(pair);
}
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/predictors/DifferentialFCM.java
================================================
package fi.iki.yak.ts.compression.gorilla.predictors;
import fi.iki.yak.ts.compression.gorilla.Predictor;
/**
* Differential Finite Context Method (DFCM) is a context based predictor.
*
* @author Michael Burman
*/
public class DifferentialFCM implements Predictor {
private long lastValue = 0L;
private final long[] table;
private int lastHash = 0;
private final int mask;
/**
* Create a new DFCM predictor
*
* @param size Prediction table size, will be rounded to the next power of two and must be larger than 0
*/
public DifferentialFCM(int size) {
if(size > 0) {
size--;
int leadingZeros = Long.numberOfLeadingZeros(size);
int newSize = 1 << (Long.SIZE - leadingZeros);
this.table = new long[newSize];
this.mask = newSize - 1;
} else {
throw new IllegalArgumentException("Size must be positive");
}
}
@Override
public void update(long value) {
table[lastHash] = value - lastValue;
lastHash = (int) (((lastHash << 5) ^ ((value - lastValue) >> 50)) & this.mask);
lastValue = value;
}
@Override
public long predict() {
return table[lastHash] + lastValue;
}
}
================================================
FILE: src/main/java/fi/iki/yak/ts/compression/gorilla/predictors/LastValuePredictor.java
================================================
package fi.iki.yak.ts.compression.gorilla.predictors;
import fi.iki.yak.ts.compression.gorilla.Predictor;
/**
* Last-Value predictor, a computational predictor using previous value as a prediction for the next one
*
* @author Michael Burman
*/
public class LastValuePredictor implements Predictor {
private long storedVal = 0;
public LastValuePredictor() {}
public void update(long value) {
this.storedVal = value;
}
public long predict() {
return storedVal;
}
}
================================================
FILE: src/test/java/fi/iki/yak/ts/compression/gorilla/EncodeGorillaTest.java
================================================
package fi.iki.yak.ts.compression.gorilla;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import java.nio.ByteBuffer;
import java.time.LocalDateTime;
import java.time.Month;
import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.concurrent.ThreadLocalRandom;
import org.junit.jupiter.api.Test;
import fi.iki.yak.ts.compression.gorilla.predictors.DifferentialFCM;
/**
* These are generic tests to test that input matches the output after compression + decompression cycle, using
* both the timestamp and value compression.
*
* @author Michael Burman
*/
public class EncodeGorillaTest {
private void comparePairsToCompression(long blockTimestamp, Pair[] pairs) {
LongArrayOutput output = new LongArrayOutput();
GorillaCompressor c = new GorillaCompressor(blockTimestamp, output);
Arrays.stream(pairs).forEach(p -> c.addValue(p.getTimestamp(), p.getDoubleValue()));
c.close();
LongArrayInput input = new LongArrayInput(output.getLongArray());
GorillaDecompressor d = new GorillaDecompressor(input);
// Replace with stream once GorillaDecompressor supports it
for(int i = 0; i < pairs.length; i++) {
Pair pair = d.readPair();
assertEquals(pairs[i].getTimestamp(), pair.getTimestamp(), "Timestamp did not match");
assertEquals(pairs[i].getDoubleValue(), pair.getDoubleValue(), "Value did not match");
}
assertNull(d.readPair());
}
@Test
void simpleEncodeAndDecodeTest() throws Exception {
long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
Pair[] pairs = {
new Pair(now + 10, Double.doubleToRawLongBits(1.0)),
new Pair(now + 20, Double.doubleToRawLongBits(-2.0)),
new Pair(now + 28, Double.doubleToRawLongBits(-2.5)),
new Pair(now + 84, Double.doubleToRawLongBits(65537)),
new Pair(now + 400, Double.doubleToRawLongBits(2147483650.0)),
new Pair(now + 2300, Double.doubleToRawLongBits(-16384)),
new Pair(now + 16384, Double.doubleToRawLongBits(2.8)),
new Pair(now + 16500, Double.doubleToRawLongBits(-38.0))
};
comparePairsToCompression(now, pairs);
}
@Test
public void willItBlend() throws Exception {
long blockTimestamp = 1500400800000L;
Pair[] pairs = {
new Pair(1500405481623L, 69087),
new Pair(1500405488693L, 65640),
new Pair(1500405495993L, 58155),
new Pair(1500405503743L, 61025),
new Pair(1500405511623L, 91156),
new Pair(1500405519803L, 37516),
new Pair(1500405528313L, 93515),
new Pair(1500405537233L, 96226),
new Pair(1500405546453L, 23833),
new Pair(1500405556103L, 73186),
new Pair(1500405566143L, 96947),
new Pair(1500405576163L, 46927),
new Pair(1500405586173L, 77954),
new Pair(1500405596183L, 29302),
new Pair(1500405606213L, 6700),
new Pair(1500405616163L, 71971),
new Pair(1500405625813L, 8528),
new Pair(1500405635763L, 85321),
new Pair(1500405645634L, 83229),
new Pair(1500405655633L, 78298),
new Pair(1500405665623L, 87122),
new Pair(1500405675623L, 82055),
new Pair(1500405685723L, 75067),
new Pair(1500405695663L, 33680),
new Pair(1500405705743L, 17576),
new Pair(1500405715813L, 89701),
new Pair(1500405725773L, 21427),
new Pair(1500405735883L, 58255),
new Pair(1500405745903L, 3768),
new Pair(1500405755863L, 62086),
new Pair(1500405765843L, 66965),
new Pair(1500405775773L, 35801),
new Pair(1500405785883L, 72169),
new Pair(1500405795843L, 43089),
new Pair(1500405805733L, 31418),
new Pair(1500405815853L, 84781),
new Pair(1500405825963L, 36103),
new Pair(1500405836004L, 87431),
new Pair(1500405845953L, 7379),
new Pair(1500405855913L, 66919),
new Pair(1500405865963L, 30906),
new Pair(1500405875953L, 88630),
new Pair(1500405885943L, 27546),
new Pair(1500405896033L, 43813),
new Pair(1500405906094L, 2124),
new Pair(1500405916063L, 49399),
new Pair(1500405926143L, 94577),
new Pair(1500405936123L, 98459),
new Pair(1500405946033L, 49457),
new Pair(1500405956023L, 92838),
new Pair(1500405966023L, 15628),
new Pair(1500405976043L, 53916),
new Pair(1500405986063L, 90387),
new Pair(1500405996123L, 43176),
new Pair(1500406006123L, 18838),
new Pair(1500406016174L, 78847),
new Pair(1500406026173L, 39591),
new Pair(1500406036004L, 77070),
new Pair(1500406045964L, 56788),
new Pair(1500406056043L, 96706),
new Pair(1500406066123L, 20756),
new Pair(1500406076113L, 64433),
new Pair(1500406086133L, 45791),
new Pair(1500406096123L, 75028),
new Pair(1500406106193L, 55403),
new Pair(1500406116213L, 36991),
new Pair(1500406126073L, 92929),
new Pair(1500406136103L, 60416),
new Pair(1500406146183L, 55485),
new Pair(1500406156383L, 53525),
new Pair(1500406166313L, 96021),
new Pair(1500406176414L, 22705),
new Pair(1500406186613L, 89801),
new Pair(1500406196543L, 51975),
new Pair(1500406206483L, 86741),
new Pair(1500406216483L, 22440),
new Pair(1500406226433L, 51818),
new Pair(1500406236403L, 61965),
new Pair(1500406246413L, 19074),
new Pair(1500406256494L, 54521),
new Pair(1500406266413L, 59315),
new Pair(1500406276303L, 19171),
new Pair(1500406286213L, 98800),
new Pair(1500406296183L, 7086),
new Pair(1500406306103L, 60578),
new Pair(1500406316073L, 96828),
new Pair(1500406326143L, 83746),
new Pair(1500406336153L, 85481),
new Pair(1500406346113L, 22346),
new Pair(1500406356133L, 80976),
new Pair(1500406366065L, 43586),
new Pair(1500406376074L, 82500),
new Pair(1500406386184L, 13576),
new Pair(1500406396113L, 77871),
new Pair(1500406406094L, 60978),
new Pair(1500406416203L, 35264),
new Pair(1500406426323L, 79733),
new Pair(1500406436343L, 29140),
new Pair(1500406446323L, 7237),
new Pair(1500406456344L, 52866),
new Pair(1500406466393L, 88456),
new Pair(1500406476493L, 33533),
new Pair(1500406486524L, 96961),
new Pair(1500406496453L, 16389),
new Pair(1500406506453L, 31181),
new Pair(1500406516433L, 63282),
new Pair(1500406526433L, 92857),
new Pair(1500406536413L, 4582),
new Pair(1500406546383L, 46832),
new Pair(1500406556473L, 6335),
new Pair(1500406566413L, 44367),
new Pair(1500406576513L, 84640),
new Pair(1500406586523L, 36174),
new Pair(1500406596553L, 40075),
new Pair(1500406606603L, 80886),
new Pair(1500406616623L, 43784),
new Pair(1500406626623L, 25077),
new Pair(1500406636723L, 18617),
new Pair(1500406646723L, 72681),
new Pair(1500406656723L, 84811),
new Pair(1500406666783L, 90053),
new Pair(1500406676685L, 25708),
new Pair(1500406686713L, 57134),
new Pair(1500406696673L, 87193),
new Pair(1500406706743L, 66057),
new Pair(1500406716724L, 51404),
new Pair(1500406726753L, 90141),
new Pair(1500406736813L, 10434),
new Pair(1500406746803L, 29056),
new Pair(1500406756833L, 48160),
new Pair(1500406766924L, 96652),
new Pair(1500406777113L, 64141),
new Pair(1500406787113L, 22143),
new Pair(1500406797093L, 20561),
new Pair(1500406807113L, 66401),
new Pair(1500406817283L, 76802),
new Pair(1500406827284L, 37555),
new Pair(1500406837323L, 63169),
new Pair(1500406847463L, 45712),
new Pair(1500406857513L, 44751),
new Pair(1500406867523L, 98891),
new Pair(1500406877523L, 38122),
new Pair(1500406887623L, 46202),
new Pair(1500406897703L, 5875),
new Pair(1500406907663L, 17397),
new Pair(1500406917603L, 39994),
new Pair(1500406927633L, 82385),
new Pair(1500406937623L, 15598),
new Pair(1500406947693L, 36235),
new Pair(1500406957703L, 97536),
new Pair(1500406967673L, 28557),
new Pair(1500406977723L, 13985),
new Pair(1500406987663L, 64304),
new Pair(1500406997573L, 83693),
new Pair(1500407007494L, 6574),
new Pair(1500407017493L, 25134),
new Pair(1500407027503L, 50383),
new Pair(1500407037523L, 55922),
new Pair(1500407047603L, 73436),
new Pair(1500407057473L, 68235),
new Pair(1500407067553L, 1469),
new Pair(1500407077463L, 44315),
new Pair(1500407087463L, 95064),
new Pair(1500407097443L, 1997),
new Pair(1500407107473L, 17247),
new Pair(1500407117453L, 42454),
new Pair(1500407127413L, 73631),
new Pair(1500407137363L, 96890),
new Pair(1500407147343L, 43450),
new Pair(1500407157363L, 42042),
new Pair(1500407167403L, 83014),
new Pair(1500407177473L, 32051),
new Pair(1500407187523L, 69280),
new Pair(1500407197495L, 21425),
new Pair(1500407207453L, 93748),
new Pair(1500407217413L, 64151),
new Pair(1500407227443L, 38791),
new Pair(1500407237463L, 5248),
new Pair(1500407247523L, 92935),
new Pair(1500407257513L, 18516),
new Pair(1500407267584L, 98870),
new Pair(1500407277573L, 82244),
new Pair(1500407287723L, 65464),
new Pair(1500407297723L, 33801),
new Pair(1500407307673L, 18331),
new Pair(1500407317613L, 89744),
new Pair(1500407327553L, 98460),
new Pair(1500407337503L, 24709),
new Pair(1500407347423L, 8407),
new Pair(1500407357383L, 69451),
new Pair(1500407367333L, 51100),
new Pair(1500407377373L, 25309),
new Pair(1500407387443L, 16148),
new Pair(1500407397453L, 98974),
new Pair(1500407407543L, 80284),
new Pair(1500407417583L, 170),
new Pair(1500407427453L, 34706),
new Pair(1500407437433L, 39681),
new Pair(1500407447603L, 6140),
new Pair(1500407457513L, 64595),
new Pair(1500407467564L, 59862),
new Pair(1500407477563L, 53795),
new Pair(1500407487593L, 83493),
new Pair(1500407497584L, 90639),
new Pair(1500407507623L, 16777),
new Pair(1500407517613L, 11096),
new Pair(1500407527673L, 38512),
new Pair(1500407537963L, 52759),
new Pair(1500407548023L, 79567),
new Pair(1500407558033L, 48664),
new Pair(1500407568113L, 10710),
new Pair(1500407578164L, 25635),
new Pair(1500407588213L, 40985),
new Pair(1500407598163L, 94089),
new Pair(1500407608163L, 50056),
new Pair(1500407618223L, 15550),
new Pair(1500407628143L, 78823),
new Pair(1500407638223L, 9044),
new Pair(1500407648173L, 20782),
new Pair(1500407658023L, 86390),
new Pair(1500407667903L, 79444),
new Pair(1500407677903L, 84051),
new Pair(1500407687923L, 91554),
new Pair(1500407697913L, 58777),
new Pair(1500407708003L, 89474),
new Pair(1500407718083L, 94026),
new Pair(1500407728034L, 41613),
new Pair(1500407738083L, 64667),
new Pair(1500407748034L, 5160),
new Pair(1500407758003L, 45140),
new Pair(1500407768033L, 53704),
new Pair(1500407778083L, 68097),
new Pair(1500407788043L, 81137),
new Pair(1500407798023L, 59657),
new Pair(1500407808033L, 56572),
new Pair(1500407817983L, 1993),
new Pair(1500407828063L, 62608),
new Pair(1500407838213L, 76489),
new Pair(1500407848203L, 22147),
new Pair(1500407858253L, 92829),
new Pair(1500407868073L, 48499),
new Pair(1500407878053L, 89152),
new Pair(1500407888073L, 9191),
new Pair(1500407898033L, 49881),
new Pair(1500407908113L, 96020),
new Pair(1500407918213L, 90203),
new Pair(1500407928234L, 32217),
new Pair(1500407938253L, 94302),
new Pair(1500407948293L, 83111),
new Pair(1500407958234L, 75576),
new Pair(1500407968073L, 5973),
new Pair(1500407978023L, 5175),
new Pair(1500407987923L, 63350),
new Pair(1500407997833L, 44081)
};
comparePairsToCompression(blockTimestamp, pairs);
}
/**
* Tests encoding of similar floats, see https://github.com/dgryski/go-tsz/issues/4 for more information.
*/
@Test
void testEncodeSimilarFloats() throws Exception {
long now = LocalDateTime.of(2015, Month.MARCH, 02, 00, 00).toInstant(ZoneOffset.UTC).toEpochMilli();
LongArrayOutput output = new LongArrayOutput();
GorillaCompressor c = new GorillaCompressor(now, output);
ByteBuffer bb = ByteBuffer.allocate(5 * 2*Long.BYTES);
bb.putLong(now + 1);
bb.putDouble(6.00065e+06);
bb.putLong(now + 2);
bb.putDouble(6.000656e+06);
bb.putLong(now + 3);
bb.putDouble(6.000657e+06);
bb.putLong(now + 4);
bb.putDouble(6.000659e+06);
bb.putLong(now + 5);
bb.putDouble(6.000661e+06);
bb.flip();
for(int j = 0; j < 5; j++) {
c.addValue(bb.getLong(), bb.getDouble());
}
c.close();
bb.flip();
LongArrayInput input = new LongArrayInput(output.getLongArray());
GorillaDecompressor d = new GorillaDecompressor(input);
// Replace with stream once GorillaDecompressor supports it
for(int i = 0; i < 5; i++) {
Pair pair = d.readPair();
assertEquals(bb.getLong(), pair.getTimestamp(), "Timestamp did not match");
assertEquals(bb.getDouble(), pair.getDoubleValue(), "Value did not match");
}
assertNull(d.readPair());
}
/**
* Tests writing enough large amount of datapoints that causes the included LongArrayOutput to do
* internal byte array expansion.
*/
@Test
void testEncodeLargeAmountOfData() throws Exception {
// This test should trigger ByteBuffer reallocation
int amountOfPoints = 100000;
long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
LongArrayOutput output = new LongArrayOutput();
long now = blockStart + 60;
ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES);
for(int i = 0; i < amountOfPoints; i++) {
bb.putLong(now + i*60);
bb.putDouble(i * Math.random());
}
GorillaCompressor c = new GorillaCompressor(blockStart, output);
bb.flip();
for(int j = 0; j < amountOfPoints; j++) {
c.addValue(bb.getLong(), bb.getDouble());
}
c.close();
bb.flip();
LongArrayInput input = new LongArrayInput(output.getLongArray());
GorillaDecompressor d = new GorillaDecompressor(input);
for(int i = 0; i < amountOfPoints; i++) {
long tStamp = bb.getLong();
double val = bb.getDouble();
Pair pair = d.readPair();
assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i);
assertEquals(val, pair.getDoubleValue());
}
assertNull(d.readPair());
}
@Test
void testEncodeLargeAmountOfDataOldBuffer() throws Exception {
// This test should trigger ByteBuffer reallocation
int amountOfPoints = 100000;
long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
ByteBufferBitOutput output = new ByteBufferBitOutput();
long now = blockStart + 60;
ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES);
for(int i = 0; i < amountOfPoints; i++) {
bb.putLong(now + i*60);
bb.putDouble(i * Math.random());
}
GorillaCompressor c = new GorillaCompressor(blockStart, output);
bb.flip();
for(int j = 0; j < amountOfPoints; j++) {
c.addValue(bb.getLong(), bb.getDouble());
}
c.close();
bb.flip();
ByteBuffer byteBuffer = output.getByteBuffer();
byteBuffer.flip();
ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer);
GorillaDecompressor d = new GorillaDecompressor(input);
for(int i = 0; i < amountOfPoints; i++) {
long tStamp = bb.getLong();
double val = bb.getDouble();
Pair pair = d.readPair();
assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i);
assertEquals(val, pair.getDoubleValue());
}
assertNull(d.readPair());
}
/**
* Although not intended usage, an empty block should not cause errors
*/
@Test
void testEmptyBlock() throws Exception {
long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
LongArrayOutput output = new LongArrayOutput();
GorillaCompressor c = new GorillaCompressor(now, output);
c.close();
LongArrayInput input = new LongArrayInput(output.getLongArray());
GorillaDecompressor d = new GorillaDecompressor(input);
assertNull(d.readPair());
}
@Test
void testCopyFlush() {
long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
LongArrayOutput output = new LongArrayOutput();
GorillaCompressor c = new GorillaCompressor(now, output);
c.addValue(now + 1, 1.0);
c.addValue(now + 2, 1.0);
LongArrayInput input = new LongArrayInput(output.getLongArray());
GorillaDecompressor d = new GorillaDecompressor(input);
assertEquals(now + 1, d.readPair().getTimestamp());
assertEquals(now + 2, d.readPair().getTimestamp());
}
/**
* Long values should be compressable and decompressable in the stream
*/
@Test
void testLongEncoding() throws Exception {
// This test should trigger ByteBuffer reallocation
int amountOfPoints = 10000;
long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
LongArrayOutput output = new LongArrayOutput();
long now = blockStart + 60;
ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES);
for(int i = 0; i < amountOfPoints; i++) {
bb.putLong(now + i*60);
bb.putLong(ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE));
}
GorillaCompressor c = new GorillaCompressor(blockStart, output);
bb.flip();
for(int j = 0; j < amountOfPoints; j++) {
c.addValue(bb.getLong(), bb.getLong());
}
c.close();
bb.flip();
LongArrayInput input = new LongArrayInput(output.getLongArray());
GorillaDecompressor d = new GorillaDecompressor(input);
for(int i = 0; i < amountOfPoints; i++) {
long tStamp = bb.getLong();
long val = bb.getLong();
Pair pair = d.readPair();
assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i);
assertEquals(val, pair.getLongValue());
}
assertNull(d.readPair());
}
/**
* Tests writing enough large amount of datapoints that causes the included LongArrayOutput to do
* internal byte array expansion.
*/
@Test
void testDifferentialFCM() throws Exception {
// This test should trigger ByteBuffer reallocation
int amountOfPoints = 100000;
long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
LongArrayOutput output = new LongArrayOutput();
long now = blockStart + 60;
ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES);
for(int i = 0; i < amountOfPoints; i++) {
bb.putLong(now + i*60);
bb.putDouble(i * Math.random());
}
GorillaCompressor c = new GorillaCompressor(blockStart, output, new DifferentialFCM(1024));
bb.flip();
for(int j = 0; j < amountOfPoints; j++) {
c.addValue(bb.getLong(), bb.getDouble());
}
c.close();
bb.flip();
LongArrayInput input = new LongArrayInput(output.getLongArray());
GorillaDecompressor d = new GorillaDecompressor(input, new DifferentialFCM(1024));
for(int i = 0; i < amountOfPoints; i++) {
long tStamp = bb.getLong();
double val = bb.getDouble();
Pair pair = d.readPair();
assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i);
assertEquals(val, pair.getDoubleValue());
}
assertNull(d.readPair());
}
}
================================================
FILE: src/test/java/fi/iki/yak/ts/compression/gorilla/EncodeTest.java
================================================
package fi.iki.yak.ts.compression.gorilla;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;
import java.nio.ByteBuffer;
import java.time.LocalDateTime;
import java.time.Month;
import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;
import java.util.Arrays;
import java.util.concurrent.ThreadLocalRandom;
import org.junit.jupiter.api.Test;
/**
* These are generic tests to test that input matches the output after compression + decompression cycle, using
* both the timestamp and value compression.
*
* @author Michael Burman
*/
public class EncodeTest {
private void comparePairsToCompression(long blockTimestamp, Pair[] pairs) {
ByteBufferBitOutput output = new ByteBufferBitOutput();
Compressor c = new Compressor(blockTimestamp, output);
Arrays.stream(pairs).forEach(p -> c.addValue(p.getTimestamp(), p.getDoubleValue()));
c.close();
ByteBuffer byteBuffer = output.getByteBuffer();
byteBuffer.flip();
ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer);
Decompressor d = new Decompressor(input);
// Replace with stream once decompressor supports it
for(int i = 0; i < pairs.length; i++) {
Pair pair = d.readPair();
assertEquals(pairs[i].getTimestamp(), pair.getTimestamp(), "Timestamp did not match");
assertEquals(pairs[i].getDoubleValue(), pair.getDoubleValue(), "Value did not match");
}
assertNull(d.readPair());
}
@Test
void simpleEncodeAndDecodeTest() throws Exception {
long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
Pair[] pairs = {
new Pair(now + 10, Double.doubleToRawLongBits(1.0)),
new Pair(now + 20, Double.doubleToRawLongBits(-2.0)),
new Pair(now + 28, Double.doubleToRawLongBits(-2.5)),
new Pair(now + 84, Double.doubleToRawLongBits(65537)),
new Pair(now + 400, Double.doubleToRawLongBits(2147483650.0)),
new Pair(now + 2300, Double.doubleToRawLongBits(-16384)),
new Pair(now + 16384, Double.doubleToRawLongBits(2.8)),
new Pair(now + 16500, Double.doubleToRawLongBits(-38.0))
};
comparePairsToCompression(now, pairs);
}
@Test
public void willItBlend() throws Exception {
long blockTimestamp = 1500400800000L;
Pair[] pairs = {
new Pair(1500405481623L, 69087),
new Pair(1500405488693L, 65640),
new Pair(1500405495993L, 58155),
new Pair(1500405503743L, 61025),
new Pair(1500405511623L, 91156),
new Pair(1500405519803L, 37516),
new Pair(1500405528313L, 93515),
new Pair(1500405537233L, 96226),
new Pair(1500405546453L, 23833),
new Pair(1500405556103L, 73186),
new Pair(1500405566143L, 96947),
new Pair(1500405576163L, 46927),
new Pair(1500405586173L, 77954),
new Pair(1500405596183L, 29302),
new Pair(1500405606213L, 6700),
new Pair(1500405616163L, 71971),
new Pair(1500405625813L, 8528),
new Pair(1500405635763L, 85321),
new Pair(1500405645634L, 83229),
new Pair(1500405655633L, 78298),
new Pair(1500405665623L, 87122),
new Pair(1500405675623L, 82055),
new Pair(1500405685723L, 75067),
new Pair(1500405695663L, 33680),
new Pair(1500405705743L, 17576),
new Pair(1500405715813L, 89701),
new Pair(1500405725773L, 21427),
new Pair(1500405735883L, 58255),
new Pair(1500405745903L, 3768),
new Pair(1500405755863L, 62086),
new Pair(1500405765843L, 66965),
new Pair(1500405775773L, 35801),
new Pair(1500405785883L, 72169),
new Pair(1500405795843L, 43089),
new Pair(1500405805733L, 31418),
new Pair(1500405815853L, 84781),
new Pair(1500405825963L, 36103),
new Pair(1500405836004L, 87431),
new Pair(1500405845953L, 7379),
new Pair(1500405855913L, 66919),
new Pair(1500405865963L, 30906),
new Pair(1500405875953L, 88630),
new Pair(1500405885943L, 27546),
new Pair(1500405896033L, 43813),
new Pair(1500405906094L, 2124),
new Pair(1500405916063L, 49399),
new Pair(1500405926143L, 94577),
new Pair(1500405936123L, 98459),
new Pair(1500405946033L, 49457),
new Pair(1500405956023L, 92838),
new Pair(1500405966023L, 15628),
new Pair(1500405976043L, 53916),
new Pair(1500405986063L, 90387),
new Pair(1500405996123L, 43176),
new Pair(1500406006123L, 18838),
new Pair(1500406016174L, 78847),
new Pair(1500406026173L, 39591),
new Pair(1500406036004L, 77070),
new Pair(1500406045964L, 56788),
new Pair(1500406056043L, 96706),
new Pair(1500406066123L, 20756),
new Pair(1500406076113L, 64433),
new Pair(1500406086133L, 45791),
new Pair(1500406096123L, 75028),
new Pair(1500406106193L, 55403),
new Pair(1500406116213L, 36991),
new Pair(1500406126073L, 92929),
new Pair(1500406136103L, 60416),
new Pair(1500406146183L, 55485),
new Pair(1500406156383L, 53525),
new Pair(1500406166313L, 96021),
new Pair(1500406176414L, 22705),
new Pair(1500406186613L, 89801),
new Pair(1500406196543L, 51975),
new Pair(1500406206483L, 86741),
new Pair(1500406216483L, 22440),
new Pair(1500406226433L, 51818),
new Pair(1500406236403L, 61965),
new Pair(1500406246413L, 19074),
new Pair(1500406256494L, 54521),
new Pair(1500406266413L, 59315),
new Pair(1500406276303L, 19171),
new Pair(1500406286213L, 98800),
new Pair(1500406296183L, 7086),
new Pair(1500406306103L, 60578),
new Pair(1500406316073L, 96828),
new Pair(1500406326143L, 83746),
new Pair(1500406336153L, 85481),
new Pair(1500406346113L, 22346),
new Pair(1500406356133L, 80976),
new Pair(1500406366065L, 43586),
new Pair(1500406376074L, 82500),
new Pair(1500406386184L, 13576),
new Pair(1500406396113L, 77871),
new Pair(1500406406094L, 60978),
new Pair(1500406416203L, 35264),
new Pair(1500406426323L, 79733),
new Pair(1500406436343L, 29140),
new Pair(1500406446323L, 7237),
new Pair(1500406456344L, 52866),
new Pair(1500406466393L, 88456),
new Pair(1500406476493L, 33533),
new Pair(1500406486524L, 96961),
new Pair(1500406496453L, 16389),
new Pair(1500406506453L, 31181),
new Pair(1500406516433L, 63282),
new Pair(1500406526433L, 92857),
new Pair(1500406536413L, 4582),
new Pair(1500406546383L, 46832),
new Pair(1500406556473L, 6335),
new Pair(1500406566413L, 44367),
new Pair(1500406576513L, 84640),
new Pair(1500406586523L, 36174),
new Pair(1500406596553L, 40075),
new Pair(1500406606603L, 80886),
new Pair(1500406616623L, 43784),
new Pair(1500406626623L, 25077),
new Pair(1500406636723L, 18617),
new Pair(1500406646723L, 72681),
new Pair(1500406656723L, 84811),
new Pair(1500406666783L, 90053),
new Pair(1500406676685L, 25708),
new Pair(1500406686713L, 57134),
new Pair(1500406696673L, 87193),
new Pair(1500406706743L, 66057),
new Pair(1500406716724L, 51404),
new Pair(1500406726753L, 90141),
new Pair(1500406736813L, 10434),
new Pair(1500406746803L, 29056),
new Pair(1500406756833L, 48160),
new Pair(1500406766924L, 96652),
new Pair(1500406777113L, 64141),
new Pair(1500406787113L, 22143),
new Pair(1500406797093L, 20561),
new Pair(1500406807113L, 66401),
new Pair(1500406817283L, 76802),
new Pair(1500406827284L, 37555),
new Pair(1500406837323L, 63169),
new Pair(1500406847463L, 45712),
new Pair(1500406857513L, 44751),
new Pair(1500406867523L, 98891),
new Pair(1500406877523L, 38122),
new Pair(1500406887623L, 46202),
new Pair(1500406897703L, 5875),
new Pair(1500406907663L, 17397),
new Pair(1500406917603L, 39994),
new Pair(1500406927633L, 82385),
new Pair(1500406937623L, 15598),
new Pair(1500406947693L, 36235),
new Pair(1500406957703L, 97536),
new Pair(1500406967673L, 28557),
new Pair(1500406977723L, 13985),
new Pair(1500406987663L, 64304),
new Pair(1500406997573L, 83693),
new Pair(1500407007494L, 6574),
new Pair(1500407017493L, 25134),
new Pair(1500407027503L, 50383),
new Pair(1500407037523L, 55922),
new Pair(1500407047603L, 73436),
new Pair(1500407057473L, 68235),
new Pair(1500407067553L, 1469),
new Pair(1500407077463L, 44315),
new Pair(1500407087463L, 95064),
new Pair(1500407097443L, 1997),
new Pair(1500407107473L, 17247),
new Pair(1500407117453L, 42454),
new Pair(1500407127413L, 73631),
new Pair(1500407137363L, 96890),
new Pair(1500407147343L, 43450),
new Pair(1500407157363L, 42042),
new Pair(1500407167403L, 83014),
new Pair(1500407177473L, 32051),
new Pair(1500407187523L, 69280),
new Pair(1500407197495L, 21425),
new Pair(1500407207453L, 93748),
new Pair(1500407217413L, 64151),
new Pair(1500407227443L, 38791),
new Pair(1500407237463L, 5248),
new Pair(1500407247523L, 92935),
new Pair(1500407257513L, 18516),
new Pair(1500407267584L, 98870),
new Pair(1500407277573L, 82244),
new Pair(1500407287723L, 65464),
new Pair(1500407297723L, 33801),
new Pair(1500407307673L, 18331),
new Pair(1500407317613L, 89744),
new Pair(1500407327553L, 98460),
new Pair(1500407337503L, 24709),
new Pair(1500407347423L, 8407),
new Pair(1500407357383L, 69451),
new Pair(1500407367333L, 51100),
new Pair(1500407377373L, 25309),
new Pair(1500407387443L, 16148),
new Pair(1500407397453L, 98974),
new Pair(1500407407543L, 80284),
new Pair(1500407417583L, 170),
new Pair(1500407427453L, 34706),
new Pair(1500407437433L, 39681),
new Pair(1500407447603L, 6140),
new Pair(1500407457513L, 64595),
new Pair(1500407467564L, 59862),
new Pair(1500407477563L, 53795),
new Pair(1500407487593L, 83493),
new Pair(1500407497584L, 90639),
new Pair(1500407507623L, 16777),
new Pair(1500407517613L, 11096),
new Pair(1500407527673L, 38512),
new Pair(1500407537963L, 52759),
new Pair(1500407548023L, 79567),
new Pair(1500407558033L, 48664),
new Pair(1500407568113L, 10710),
new Pair(1500407578164L, 25635),
new Pair(1500407588213L, 40985),
new Pair(1500407598163L, 94089),
new Pair(1500407608163L, 50056),
new Pair(1500407618223L, 15550),
new Pair(1500407628143L, 78823),
new Pair(1500407638223L, 9044),
new Pair(1500407648173L, 20782),
new Pair(1500407658023L, 86390),
new Pair(1500407667903L, 79444),
new Pair(1500407677903L, 84051),
new Pair(1500407687923L, 91554),
new Pair(1500407697913L, 58777),
new Pair(1500407708003L, 89474),
new Pair(1500407718083L, 94026),
new Pair(1500407728034L, 41613),
new Pair(1500407738083L, 64667),
new Pair(1500407748034L, 5160),
new Pair(1500407758003L, 45140),
new Pair(1500407768033L, 53704),
new Pair(1500407778083L, 68097),
new Pair(1500407788043L, 81137),
new Pair(1500407798023L, 59657),
new Pair(1500407808033L, 56572),
new Pair(1500407817983L, 1993),
new Pair(1500407828063L, 62608),
new Pair(1500407838213L, 76489),
new Pair(1500407848203L, 22147),
new Pair(1500407858253L, 92829),
new Pair(1500407868073L, 48499),
new Pair(1500407878053L, 89152),
new Pair(1500407888073L, 9191),
new Pair(1500407898033L, 49881),
new Pair(1500407908113L, 96020),
new Pair(1500407918213L, 90203),
new Pair(1500407928234L, 32217),
new Pair(1500407938253L, 94302),
new Pair(1500407948293L, 83111),
new Pair(1500407958234L, 75576),
new Pair(1500407968073L, 5973),
new Pair(1500407978023L, 5175),
new Pair(1500407987923L, 63350),
new Pair(1500407997833L, 44081)
};
comparePairsToCompression(blockTimestamp, pairs);
}
/**
* Tests encoding of similar floats, see https://github.com/dgryski/go-tsz/issues/4 for more information.
*/
@Test
void testEncodeSimilarFloats() throws Exception {
long now = LocalDateTime.of(2015, Month.MARCH, 02, 00, 00).toInstant(ZoneOffset.UTC).toEpochMilli();
ByteBufferBitOutput output = new ByteBufferBitOutput();
Compressor c = new Compressor(now, output);
ByteBuffer bb = ByteBuffer.allocate(5 * 2*Long.BYTES);
bb.putLong(now + 1);
bb.putDouble(6.00065e+06);
bb.putLong(now + 2);
bb.putDouble(6.000656e+06);
bb.putLong(now + 3);
bb.putDouble(6.000657e+06);
bb.putLong(now + 4);
bb.putDouble(6.000659e+06);
bb.putLong(now + 5);
bb.putDouble(6.000661e+06);
bb.flip();
for(int j = 0; j < 5; j++) {
c.addValue(bb.getLong(), bb.getDouble());
}
c.close();
bb.flip();
ByteBuffer byteBuffer = output.getByteBuffer();
byteBuffer.flip();
ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer);
Decompressor d = new Decompressor(input);
// Replace with stream once decompressor supports it
for(int i = 0; i < 5; i++) {
Pair pair = d.readPair();
assertEquals(bb.getLong(), pair.getTimestamp(), "Timestamp did not match");
assertEquals(bb.getDouble(), pair.getDoubleValue(), "Value did not match");
}
assertNull(d.readPair());
}
/**
* Tests writing enough large amount of datapoints that causes the included ByteBufferBitOutput to do
* internal byte array expansion.
*/
@Test
void testEncodeLargeAmountOfData() throws Exception {
// This test should trigger ByteBuffer reallocation
int amountOfPoints = 100000;
long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
ByteBufferBitOutput output = new ByteBufferBitOutput();
long now = blockStart + 60;
ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES);
for(int i = 0; i < amountOfPoints; i++) {
bb.putLong(now + i*60);
bb.putDouble(i * Math.random());
}
Compressor c = new Compressor(blockStart, output);
bb.flip();
for(int j = 0; j < amountOfPoints; j++) {
c.addValue(bb.getLong(), bb.getDouble());
}
c.close();
bb.flip();
ByteBuffer byteBuffer = output.getByteBuffer();
byteBuffer.flip();
ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer);
Decompressor d = new Decompressor(input);
for(int i = 0; i < amountOfPoints; i++) {
long tStamp = bb.getLong();
double val = bb.getDouble();
Pair pair = d.readPair();
assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i);
assertEquals(val, pair.getDoubleValue());
}
assertNull(d.readPair());
}
/**
* Although not intended usage, an empty block should not cause errors
*/
@Test
void testEmptyBlock() throws Exception {
long now = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
ByteBufferBitOutput output = new ByteBufferBitOutput();
Compressor c = new Compressor(now, output);
c.close();
ByteBuffer byteBuffer = output.getByteBuffer();
byteBuffer.flip();
ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer);
Decompressor d = new Decompressor(input);
assertNull(d.readPair());
}
@Test
void testLongEncoding() throws Exception {
// This test should trigger ByteBuffer reallocation
int amountOfPoints = 10000;
long blockStart = LocalDateTime.now().truncatedTo(ChronoUnit.HOURS)
.toInstant(ZoneOffset.UTC).toEpochMilli();
ByteBufferBitOutput output = new ByteBufferBitOutput();
long now = blockStart + 60;
ByteBuffer bb = ByteBuffer.allocateDirect(amountOfPoints * 2*Long.BYTES);
for(int i = 0; i < amountOfPoints; i++) {
bb.putLong(now + i*60);
bb.putLong(ThreadLocalRandom.current().nextLong(Integer.MAX_VALUE));
}
Compressor c = new Compressor(blockStart, output);
bb.flip();
for(int j = 0; j < amountOfPoints; j++) {
c.addValue(bb.getLong(), bb.getLong());
}
c.close();
bb.flip();
ByteBuffer byteBuffer = output.getByteBuffer();
byteBuffer.flip();
ByteBufferBitInput input = new ByteBufferBitInput(byteBuffer);
Decompressor d = new Decompressor(input);
for(int i = 0; i < amountOfPoints; i++) {
long tStamp = bb.getLong();
long val = bb.getLong();
Pair pair = d.readPair();
assertEquals(tStamp, pair.getTimestamp(), "Expected timestamp did not match at point " + i);
assertEquals(val, pair.getLongValue());
}
assertNull(d.readPair());
}
}