{
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected CsvCharacterEscapes _characterEscapes;
/**
* Maximum length of a column value for which quoting-need check is performed
* when not using strict ({@link CsvWriteFeature#STRICT_CHECK_FOR_QUOTING}) checking:
* values longer than this threshold will always be quoted.
* Default value is {@value tools.jackson.dataformat.csv.impl.CsvEncoder#DEFAULT_MAX_QUOTE_CHECK}.
*
* @since 3.2
*/
protected int _maxQuoteCheckChars = CsvEncoder.DEFAULT_MAX_QUOTE_CHECK;
/*
/**********************************************************************
/* Life cycle
/**********************************************************************
*/
protected CsvFactoryBuilder() {
super(StreamReadConstraints.defaults(), StreamWriteConstraints.defaults(),
ErrorReportConfiguration.defaults(),
CsvFactory.DEFAULT_CSV_PARSER_FEATURE_FLAGS,
CsvFactory.DEFAULT_CSV_GENERATOR_FEATURE_FLAGS);
}
public CsvFactoryBuilder(CsvFactory base) {
super(base);
_characterEscapes = base._characterEscapes;
_maxQuoteCheckChars = base._maxQuoteCheckChars;
}
@Override
public CsvFactory build() {
// 28-Dec-2017, tatu: No special settings beyond base class ones, so:
return new CsvFactory(this);
}
// // // Parser features
public CsvFactoryBuilder enable(CsvReadFeature f) {
_formatReadFeatures |= f.getMask();
return _this();
}
public CsvFactoryBuilder enable(CsvReadFeature first, CsvReadFeature... other) {
_formatReadFeatures |= first.getMask();
for (CsvReadFeature f : other) {
_formatReadFeatures |= f.getMask();
}
return _this();
}
public CsvFactoryBuilder disable(CsvReadFeature f) {
_formatReadFeatures &= ~f.getMask();
return _this();
}
public CsvFactoryBuilder disable(CsvReadFeature first, CsvReadFeature... other) {
_formatReadFeatures &= ~first.getMask();
for (CsvReadFeature f : other) {
_formatReadFeatures &= ~f.getMask();
}
return _this();
}
public CsvFactoryBuilder configure(CsvReadFeature f, boolean state) {
return state ? enable(f) : disable(f);
}
// // // Generator features
public CsvFactoryBuilder enable(CsvWriteFeature f) {
_formatWriteFeatures |= f.getMask();
return _this();
}
public CsvFactoryBuilder enable(CsvWriteFeature first, CsvWriteFeature... other) {
_formatWriteFeatures |= first.getMask();
for (CsvWriteFeature f : other) {
_formatWriteFeatures |= f.getMask();
}
return _this();
}
public CsvFactoryBuilder disable(CsvWriteFeature f) {
_formatWriteFeatures &= ~f.getMask();
return _this();
}
public CsvFactoryBuilder disable(CsvWriteFeature first, CsvWriteFeature... other) {
_formatWriteFeatures &= ~first.getMask();
for (CsvWriteFeature f : other) {
_formatWriteFeatures &= ~f.getMask();
}
return _this();
}
public CsvFactoryBuilder configure(CsvWriteFeature f, boolean state) {
return state ? enable(f) : disable(f);
}
// // // Other CSV-specific configuration
/**
* Method for defining custom escapes factory uses for {@link JsonGenerator}s
* it creates.
*/
public CsvFactoryBuilder characterEscapes(CsvCharacterEscapes esc) {
_characterEscapes = esc;
return this;
}
public CsvCharacterEscapes characterEscapes() {
if (_characterEscapes == null) {
}
return _characterEscapes;
}
/**
* Method for configuring the maximum length of column values that are
* checked for quoting necessity when not using strict quoting
* ({@link CsvWriteFeature#STRICT_CHECK_FOR_QUOTING}).
* Values longer than this threshold are always quoted without checking content.
*
* Default value is {@value tools.jackson.dataformat.csv.impl.CsvEncoder#DEFAULT_MAX_QUOTE_CHECK}.
*
* @param maxChars Maximum number of characters to check; values longer
* than this will always be quoted. Negative values are normalized to
* the default ({@value tools.jackson.dataformat.csv.impl.CsvEncoder#DEFAULT_MAX_QUOTE_CHECK}).
* @return this builder (for call chaining)
*
* @since 3.2
*/
public CsvFactoryBuilder maxQuoteCheckChars(int maxChars) {
_maxQuoteCheckChars = (maxChars >= 0) ? maxChars : CsvEncoder.DEFAULT_MAX_QUOTE_CHECK;
return this;
}
/**
* @return Currently configured maximum quote check length; default
* is {@value tools.jackson.dataformat.csv.impl.CsvEncoder#DEFAULT_MAX_QUOTE_CHECK}.
*
* @since 3.2
*/
public int maxQuoteCheckChars() {
return _maxQuoteCheckChars;
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvGenerator.java
================================================
package tools.jackson.dataformat.csv;
import java.io.*;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import tools.jackson.core.*;
import tools.jackson.core.base.GeneratorBase;
import tools.jackson.core.io.CharacterEscapes;
import tools.jackson.core.json.DupDetector;
import tools.jackson.core.util.SimpleStreamWriteContext;
import tools.jackson.dataformat.csv.impl.CsvEncoder;
import tools.jackson.core.io.IOContext;
import tools.jackson.core.util.JacksonFeatureSet;
public class CsvGenerator extends GeneratorBase
{
protected final static long MIN_INT_AS_LONG = Integer.MIN_VALUE;
protected final static long MAX_INT_AS_LONG = Integer.MAX_VALUE;
private final static CsvSchema EMPTY_SCHEMA = CsvSchema.emptySchema();
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
/**
* Bit flag composed of bits that indicate which
* {@link CsvWriteFeature}s
* are enabled.
*/
protected final int _formatFeatures;
/**
* Definition of columns being written, if available.
*/
protected final CsvSchema _schema;
// note: can not be final since we may need to re-create it for new schema
protected CsvEncoder _writer;
/*
/**********************************************************************
/* Output state
/**********************************************************************
*/
/**
* Object that keeps track of the current contextual state of the generator.
*/
protected SimpleStreamWriteContext _streamWriteContext;
/**
* Flag that indicates that we need to write header line, if
* one is needed. Used because schema may be specified after
* instance is constructed.
*/
protected boolean _handleFirstLine = true;
/**
* Index of column that we will be getting next, based on
* the property name call that was made.
*/
protected int _nextColumnByName = -1;
/**
* Decorator to use for decorating the column value to follow, if any;
* {@code null} if none.
*
* @since 2.18
*/
protected CsvValueDecorator _nextColumnDecorator;
/**
* Flag set when property to write is unknown, and the matching value
* is to be skipped quietly.
*/
protected boolean _skipValue;
/**
* Flag set when a row has just been finished, used to distinguish between
* null values within a row vs null rows.
* Only relevant for Array-wrapped rows.
*
* @since 2.21
*/
protected boolean _justFinishedRow = false;
/**
* Separator to use during writing of (simple) array value, to be encoded as a
* single column value, if any.
*/
protected String _arraySeparator = CsvSchema.NO_ARRAY_ELEMENT_SEPARATOR;
/**
* Accumulated contents of an array cell, if any
*/
protected StringBuilder _arrayContents;
/**
* Additional counter that indicates number of value entries in the
* array. Needed because `null` entries do not add content, but need
* to be separated by array cell separator
*/
protected int _arrayElements;
/**
* When skipping output (for "unknown" output), outermost write context
* where skipping should occur
*/
protected SimpleStreamWriteContext _skipWithin;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public CsvGenerator(ObjectWriteContext writeCtxt, IOContext ioCtxt,
int streamWriteFeatures, int csvFeatures,
Writer out, CsvSchema schema, CsvCharacterEscapes characterEscapes,
int maxQuoteCheckChars)
{
super(writeCtxt, ioCtxt, streamWriteFeatures);
_formatFeatures = csvFeatures;
final DupDetector dups = StreamWriteFeature.STRICT_DUPLICATE_DETECTION.enabledIn(streamWriteFeatures)
? DupDetector.rootDetector(this) : null;
_streamWriteContext = SimpleStreamWriteContext.createRootContext(dups);
_schema = schema;
if (characterEscapes == null) {
characterEscapes = CsvCharacterEscapes.fromCsvFeatures(csvFeatures);
}
boolean useFastDoubleWriter = isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER);
_writer = new CsvEncoder(ioCtxt, csvFeatures, out, schema, characterEscapes,
useFastDoubleWriter, maxQuoteCheckChars);
}
public CsvGenerator(ObjectWriteContext writeCtxt, IOContext ioCtxt,
int streamWriteFeatures, int csvFeatures,
CsvEncoder csvWriter)
{
super(writeCtxt, ioCtxt, streamWriteFeatures);
_formatFeatures = csvFeatures;
final DupDetector dups = StreamWriteFeature.STRICT_DUPLICATE_DETECTION.enabledIn(streamWriteFeatures)
? DupDetector.rootDetector(this) : null;
_streamWriteContext = SimpleStreamWriteContext.createRootContext(dups);
_schema = EMPTY_SCHEMA;
_writer = csvWriter;
}
/*
/**********************************************************************
/* Versioned
/**********************************************************************
*/
@Override
public Version version() {
return PackageVersion.VERSION;
}
/*
/**********************************************************************
/* Overrides: capability introspection methods
/**********************************************************************
*/
@Override
public boolean canOmitProperties() {
// Nope: CSV requires at least a placeholder
return false;
}
@Override
public boolean has(StreamWriteCapability capability) {
return DEFAULT_TEXTUAL_WRITE_CAPABILITIES.isEnabled(capability);
}
@Override
public JacksonFeatureSet streamWriteCapabilities() {
return DEFAULT_TEXTUAL_WRITE_CAPABILITIES;
}
/*
/**********************************************************************
/* Overrides: output state handling methods
/**********************************************************************
*/
@Override
public final TokenStreamContext streamWriteContext() { return _streamWriteContext; }
@Override
public final Object currentValue() {
return _streamWriteContext.currentValue();
}
@Override
public final void assignCurrentValue(Object v) {
_streamWriteContext.assignCurrentValue(v);
}
/*
/**********************************************************************
/* Overridden methods, configuration
/**********************************************************************
*/
@Override
public Object streamWriteOutputTarget() {
return _writer.getOutputTarget();
}
/**
* NOTE: while this method will return some information on amount of data buffered, it
* may be an incomplete view as some buffering happens at a higher level, as not-yet-serialized
* values.
*/
@Override
public int streamWriteOutputBuffered() {
return _writer.getOutputBuffered();
}
@Override
public CsvGenerator setCharacterEscapes(CharacterEscapes esc) {
if (esc != null) {
_writer.setOutputEscapes(esc.getEscapeCodesForAscii());
}
return this;
}
@Override
public CharacterEscapes getCharacterEscapes() {
// Not really true but... we have no access to full original escapes
return null;
}
@Override
public PrettyPrinter getPrettyPrinter() {
return null;
}
/*
/**********************************************************************
/* Overridden methods; writing property names
/**********************************************************************
*/
/* And then methods overridden to make final, streamline some
* aspects...
*/
@Override
public JsonGenerator writeName(String name) throws JacksonException
{
if (!_streamWriteContext.writeName(name)) {
_reportError("Cannot write a property name, expecting a value");
}
_writeFieldName(name);
return this;
}
@Override
public JsonGenerator writePropertyId(long id) throws JacksonException {
// 15-Aug-2019, tatu: should be improved to avoid String generation
return writeName(Long.toString(id));
}
@Override
public JsonGenerator writeName(SerializableString name) throws JacksonException
{
// Object is a value, need to verify it's allowed
if (!_streamWriteContext.writeName(name.getValue())) {
_reportError("Cannot write a property name, expecting a value");
}
_writeFieldName(name.getValue());
return this;
}
private final void _writeFieldName(String name) throws JacksonException
{
// just find the matching index -- must have schema for that
if (_schema == null) {
// not a low-level error, so:
_reportCsvWriteError("Unrecognized column '"+name+"', can not resolve without CsvSchema");
}
if (_skipWithin != null) { // new in 2.7
_skipValue = true;
_nextColumnByName = -1;
_nextColumnDecorator = null;
return;
}
// note: we are likely to get next column name, so pass it as hint
CsvSchema.Column col = _schema.column(name, _nextColumnByName+1);
if (col == null) {
_nextColumnByName = -1;
_nextColumnDecorator = null;
if (isEnabled(StreamWriteFeature.IGNORE_UNKNOWN)) {
_skipValue = true;
return;
}
// not a low-level error, so:
_reportCsvWriteError("Unrecognized column '"+name+"': known columns: "+_schema.getColumnDesc());
}
_skipValue = false;
// and all we do is just note index to use for following value write
_nextColumnByName = col.getIndex();
_nextColumnDecorator = col.getValueDecorator();
}
/*
/**********************************************************************
/* Extended API, configuration
/**********************************************************************
*/
public final boolean isEnabled(CsvWriteFeature f) {
return (_formatFeatures & f.getMask()) != 0;
}
/*
/**********************************************************************
/* Public API: low-level I/O
/**********************************************************************
*/
@Override
public final void flush() {
try {
_writer.flush(isEnabled(StreamWriteFeature.FLUSH_PASSED_TO_STREAM));
} catch (IOException e) {
throw _wrapIOFailure(e);
}
}
@Override
public void close()
{
try {
if (!isClosed()) {
// Let's mark row as closed, if we had any...
finishRow();
// Write the header if necessary, occurs when no rows written
if (_handleFirstLine) {
_handleFirstLine();
}
}
} finally {
super.close();
}
}
@Override
protected void _closeInput() throws IOException
{
_writer.close(_ioContext.isResourceManaged() || isEnabled(StreamWriteFeature.AUTO_CLOSE_TARGET),
isEnabled(StreamWriteFeature.FLUSH_PASSED_TO_STREAM));
}
/*
/**********************************************************************
/* Public API: structural output
/**********************************************************************
*/
@Override
public JsonGenerator writeStartArray() throws JacksonException
{
_verifyValueWrite("start an array");
_justFinishedRow = false; // Clear flag when starting new array
// Ok to create root-level array to contain Objects/Arrays, but
// can not nest arrays in objects
if (_streamWriteContext.inObject()) {
if ((_skipWithin == null)
&& _skipValue && isEnabled(StreamWriteFeature.IGNORE_UNKNOWN)) {
_skipWithin = _streamWriteContext;
} else if (!_skipValue) {
// First: column may have its own separator
String sep;
if (_nextColumnByName >= 0) {
CsvSchema.Column col = _schema.column(_nextColumnByName);
sep = col.isArray() ? col.getArrayElementSeparator() : CsvSchema.NO_ARRAY_ELEMENT_SEPARATOR;
} else {
sep = CsvSchema.NO_ARRAY_ELEMENT_SEPARATOR;
}
if (sep.isEmpty()) {
if (!_schema.hasArrayElementSeparator()) {
_reportError("CSV generator does not support Array values for properties without setting 'arrayElementSeparator' in schema");
}
sep = _schema.getArrayElementSeparator();
}
_arraySeparator = sep;
if (_arrayContents == null) {
_arrayContents = new StringBuilder();
} else {
_arrayContents.setLength(0);
}
_arrayElements = 0;
}
} else {
if (!_arraySeparator.isEmpty()) {
// also: no nested arrays, yet
_reportError("CSV generator does not support nested Array values");
}
}
_streamWriteContext = _streamWriteContext.createChildArrayContext(null);
streamWriteConstraints().validateNestingDepth(_streamWriteContext.getNestingDepth());
// and that's about it, really
return this;
}
@Override
public JsonGenerator writeStartArray(Object currValue) throws JacksonException {
writeStartArray();
assignCurrentValue(currValue);
return this;
}
@Override
public JsonGenerator writeEndArray() throws JacksonException
{
if (!_streamWriteContext.inArray()) {
_reportError("Current context not Array but "+_streamWriteContext.typeDesc());
}
_streamWriteContext = _streamWriteContext.getParent();
// 14-Dec-2015, tatu: To complete skipping of ignored structured value, need this:
if (_skipWithin != null) {
if (_streamWriteContext == _skipWithin) {
_skipWithin = null;
}
return this;
}
if (!_arraySeparator.isEmpty()) {
String value = _arrayContents.toString();
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations!
if (_nextColumnDecorator != null) {
value = _nextColumnDecorator.decorateValue(this, value);
}
_arraySeparator = CsvSchema.NO_ARRAY_ELEMENT_SEPARATOR;
_writer.write(_columnIndex(), value);
}
// 20-Nov-2014, tatu: When doing "untyped"/"raw" output, this means that row
// is now done. But not if writing such an array property, so:
if (!_streamWriteContext.inObject()) {
finishRow();
}
return this;
}
@Override
public JsonGenerator writeStartObject() throws JacksonException
{
_verifyValueWrite("start an object");
_justFinishedRow = false;
// No nesting for objects; can write Objects inside logical root-level arrays.
// 14-Dec-2015, tatu: ... except, should be fine if we are ignoring the property
if (_streamWriteContext.inObject() ||
// 07-Nov-2017, tatu: But we may actually be nested indirectly; so check
(_streamWriteContext.inArray() && !_streamWriteContext.getParent().inRoot())) {
if (_skipWithin == null) { // new in 2.7
if (_skipValue && isEnabled(StreamWriteFeature.IGNORE_UNKNOWN)) {
_skipWithin = _streamWriteContext;
} else {
_reportCsvWriteError("CSV generator does not support Object values for properties (nested Objects)");
}
}
}
_streamWriteContext = _streamWriteContext.createChildObjectContext(null);
streamWriteConstraints().validateNestingDepth(_streamWriteContext.getNestingDepth());
return this;
}
@Override
public JsonGenerator writeStartObject(Object currValue) throws JacksonException {
writeStartObject();
assignCurrentValue(currValue);
return this;
}
@Override
public JsonGenerator writeEndObject() throws JacksonException
{
if (!_streamWriteContext.inObject()) {
_reportError("Current context not Object but "+_streamWriteContext.typeDesc());
}
_streamWriteContext = _streamWriteContext.getParent();
// 14-Dec-2015, tatu: To complete skipping of ignored structured value, need this:
if (_skipWithin != null) {
if (_streamWriteContext == _skipWithin) {
_skipWithin = null;
}
return this;
}
// not 100% fool-proof, but chances are row should be done now
finishRow();
return this;
}
/*
/**********************************************************************
/* Output method implementations, textual
/**********************************************************************
*/
@Override
public JsonGenerator writeString(String text) throws JacksonException
{
if (text == null) {
return writeNull();
}
_verifyValueWrite("write String value");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(text);
} else {
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations!
if (_nextColumnDecorator != null) {
text = _nextColumnDecorator.decorateValue(this, text);
}
_writer.write(_columnIndex(), text);
}
}
return this;
}
@Override
public JsonGenerator writeString(char[] text, int offset, int len) throws JacksonException
{
_verifyValueWrite("write String value");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(new String(text, offset, len));
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations!
} else if (_nextColumnDecorator != null) {
String str = new String(text, offset, len);
_writer.write(_columnIndex(),
_nextColumnDecorator.decorateValue(this, str));
} else {
_writer.write(_columnIndex(), text, offset, len);
}
}
return this;
}
@Override
public JsonGenerator writeString(SerializableString sstr) throws JacksonException
{
_verifyValueWrite("write String value");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(sstr.getValue());
} else {
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations!
String text = sstr.getValue();
if (_nextColumnDecorator != null) {
text = _nextColumnDecorator.decorateValue(this, text);
}
_writer.write(_columnIndex(), text);
}
}
return this;
}
@Override
public JsonGenerator writeRawUTF8String(byte[] text, int offset, int len) throws JacksonException {
return _reportUnsupportedOperation();
}
@Override
public JsonGenerator writeUTF8String(byte[] text, int offset, int len) throws JacksonException {
return writeString(new String(text, offset, len, StandardCharsets.UTF_8));
}
/*
/**********************************************************************
/* Output method implementations, unprocessed ("raw")
/**********************************************************************
*/
@Override
public JsonGenerator writeRaw(String text) throws JacksonException {
_writer.writeRaw(text);
return this;
}
@Override
public JsonGenerator writeRaw(String text, int offset, int len) throws JacksonException {
_writer.writeRaw(text, offset, len);
return this;
}
@Override
public JsonGenerator writeRaw(char[] text, int offset, int len) throws JacksonException {
_writer.writeRaw(text, offset, len);
return this;
}
@Override
public JsonGenerator writeRaw(char c) throws JacksonException {
_writer.writeRaw(c);
return this;
}
@Override
public JsonGenerator writeRawValue(String text) throws JacksonException {
_verifyValueWrite("write Raw value");
if (!_skipValue) {
// NOTE: ignore array stuff
_writer.writeNonEscaped(_columnIndex(), text);
}
return this;
}
@Override
public JsonGenerator writeRawValue(String text, int offset, int len) throws JacksonException {
_verifyValueWrite("write Raw value");
if (!_skipValue) {
// NOTE: ignore array stuff
_writer.writeNonEscaped(_columnIndex(), text.substring(offset, offset+len));
}
return this;
}
@Override
public JsonGenerator writeRawValue(char[] text, int offset, int len) throws JacksonException {
_verifyValueWrite("write Raw value");
if (!_skipValue) {
// NOTE: ignore array stuff
_writer.writeNonEscaped(_columnIndex(), new String(text, offset, len));
}
return this;
}
/*
/**********************************************************************
/* Output method implementations, base64-encoded binary
/**********************************************************************
*/
@Override
public JsonGenerator writeBinary(Base64Variant b64variant, byte[] data, int offset, int len)
throws JacksonException
{
if (data == null) {
return writeNull();
}
_verifyValueWrite("write Binary value");
if (!_skipValue) {
// ok, better just Base64 encode as a String...
if (offset > 0 || (offset+len) != data.length) {
data = Arrays.copyOfRange(data, offset, offset+len);
}
String encoded = b64variant.encode(data);
if (!_arraySeparator.isEmpty()) {
_addToArray(encoded);
} else {
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations!
if (_nextColumnDecorator != null) {
encoded = _nextColumnDecorator.decorateValue(this, encoded);
}
_writer.write(_columnIndex(), encoded);
}
}
return this;
}
/*
/**********************************************************************
/* Output method implementations, primitive
/**********************************************************************
*/
@Override
public JsonGenerator writeBoolean(boolean state) throws JacksonException
{
_verifyValueWrite("write boolean value");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(state ? "true" : "false");
} else {
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations!
if (_nextColumnDecorator != null) {
String text = _nextColumnDecorator.decorateValue(this, state ? "true" : "false");
_writer.write(_columnIndex(), text);
} else {
_writer.write(_columnIndex(), state);
}
}
}
return this;
}
@Override
public JsonGenerator writeNull() throws JacksonException
{
_verifyValueWrite("write null value");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(_schema.getNullValueOrEmpty());
} else if (_streamWriteContext.inObject()) {
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
if (_nextColumnDecorator != null) {
String nvl = _nextColumnDecorator.decorateNull(this);
if (nvl != null) {
_writer.write(_columnIndex(), nvl);
return this;
}
}
_writer.writeNull(_columnIndex());
} else if (_streamWriteContext.inArray()) {
// [dataformat-csv#106]: Need to make sure we don't swallow nulls in arrays either
// 04-Jan-2016, tatu: but check for case of array-wrapping, in which case null stands for absence
// of Object. In this case, could either add an empty row, or skip -- for now, we'll
// just skip; can change, if so desired, to expose "root null" as empty rows, possibly
// based on either schema property, or CsvGenerator.Feature.
// Note: if nulls are to be written that way, would need to call `finishRow()` right after `writeNull()`
// [dataformats-text#10]: When we have a schema and we haven't just finished a row,
// it means we're inside an array-as-row (like Object[]), so null is a column value
boolean writeNullValue = !_streamWriteContext.getParent().inRoot()
|| (_schema.size() > 0 && !_justFinishedRow);
if (writeNullValue) {
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
if (_nextColumnDecorator != null) {
String nvl = _nextColumnDecorator.decorateNull(this);
if (nvl != null) {
_writer.write(_columnIndex(), nvl);
return this;
}
}
_writer.writeNull(_columnIndex());
}
// ... so, for "root-level nulls" (with or without array-wrapping), we would do:
/*
_writer.writeNull(_columnIndex());
finishRow();
*/
}
}
return this;
}
@Override
public JsonGenerator writeNumber(short v) throws JacksonException {
return writeNumber((int) v);
}
@Override
public JsonGenerator writeNumber(int v) throws JacksonException
{
_verifyValueWrite("write number");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(String.valueOf(v));
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
} else if (_nextColumnDecorator != null) {
_writer.write(_columnIndex(),
_nextColumnDecorator.decorateValue(this, String.valueOf(v)));
} else {
_writer.write(_columnIndex(), v);
}
}
return this;
}
@Override
public JsonGenerator writeNumber(long v) throws JacksonException
{
// First: maybe 32 bits is enough?
if (v <= MAX_INT_AS_LONG && v >= MIN_INT_AS_LONG) {
return writeNumber((int) v);
}
_verifyValueWrite("write number");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(String.valueOf(v));
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
} else if (_nextColumnDecorator != null) {
_writer.write(_columnIndex(),
_nextColumnDecorator.decorateValue(this, String.valueOf(v)));
} else {
_writer.write(_columnIndex(), v);
}
}
return this;
}
@Override
public JsonGenerator writeNumber(BigInteger v) throws JacksonException
{
if (v == null) {
return writeNull();
}
_verifyValueWrite("write number");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(String.valueOf(v));
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
} else if (_nextColumnDecorator != null) {
_writer.write(_columnIndex(),
_nextColumnDecorator.decorateValue(this, String.valueOf(v)));
} else {
_writer.write(_columnIndex(), v);
}
}
return this;
}
@Override
public JsonGenerator writeNumber(double v) throws JacksonException
{
_verifyValueWrite("write number");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(String.valueOf(v));
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
} else if (_nextColumnDecorator != null) {
_writer.write(_columnIndex(),
_nextColumnDecorator.decorateValue(this, String.valueOf(v)));
} else {
_writer.write(_columnIndex(), v);
}
}
return this;
}
@Override
public JsonGenerator writeNumber(float v) throws JacksonException
{
_verifyValueWrite("write number");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(String.valueOf(v));
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
} else if (_nextColumnDecorator != null) {
_writer.write(_columnIndex(),
_nextColumnDecorator.decorateValue(this, String.valueOf(v)));
} else {
_writer.write(_columnIndex(), v);
}
}
return this;
}
@Override
public JsonGenerator writeNumber(BigDecimal v) throws JacksonException
{
if (v == null) {
return writeNull();
}
_verifyValueWrite("write number");
if (!_skipValue) {
boolean plain = isEnabled(StreamWriteFeature.WRITE_BIGDECIMAL_AS_PLAIN);
if (!_arraySeparator.isEmpty()) {
_addToArray(plain ? v.toPlainString() : v.toString());
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
} else if (_nextColumnDecorator != null) {
String numStr = plain ? v.toPlainString() : v.toString();
_writer.write(_columnIndex(),
_nextColumnDecorator.decorateValue(this, numStr));
} else {
_writer.write(_columnIndex(), v, plain);
}
}
return this;
}
@Override
public JsonGenerator writeNumber(String encodedValue) throws JacksonException
{
if (encodedValue == null) {
return writeNull();
}
_verifyValueWrite("write number");
if (!_skipValue) {
if (!_arraySeparator.isEmpty()) {
_addToArray(encodedValue);
// 26-Aug-2024, tatu: [dataformats-text#495] Decorations?
} else if (_nextColumnDecorator != null) {
_writer.write(_columnIndex(),
_nextColumnDecorator.decorateValue(this, encodedValue));
} else {
_writer.write(_columnIndex(), encodedValue);
}
}
return this;
}
/*
/**********************************************************************
/* Overrides for property write methods
/**********************************************************************
*/
@Override
public JsonGenerator writeOmittedProperty(String propName) throws JacksonException
{
// Hmmh. Should we require a match? Actually, let's use logic: if property found,
// assumption is we must add a placeholder; if not, we can merely ignore
CsvSchema.Column col = _schema.column(propName);
if (col == null) {
// assumed to have been removed from schema too
} else {
// basically combination of "writeName()" and "writeNull()"
if (!_streamWriteContext.writeName(propName)) {
_reportError("Cannot skip a property, expecting a value");
}
// and all we do is just note index to use for following value write
_nextColumnByName = col.getIndex();
// We can basically copy what 'writeNull()' does...
_verifyValueWrite("skip positional value due to filtering");
_writer.write(_columnIndex(), "");
}
return this;
}
/*
/**********************************************************************
/* Implementations for methods from base class
/**********************************************************************
*/
@Override
protected final void _verifyValueWrite(String typeMsg) throws JacksonException
{
if (!_streamWriteContext.writeValue()) {
_reportError("Cannot "+typeMsg+", expecting a property name");
}
if (_handleFirstLine) {
_handleFirstLine();
}
}
@Override
protected void _releaseBuffers() {
_writer._releaseBuffers();
}
/*
/**********************************************************************
/* Internal methods, error reporting
/**********************************************************************
*/
/**
* Method called when there is a problem related to mapping data
* (compared to a low-level generation); if so, should be surfaced
* as
*/
protected void _reportCsvWriteError(String msg) throws JacksonException {
throw CsvWriteException.from(this, msg, _schema);
}
/*
/**********************************************************************
/* Internal methods, other
/**********************************************************************
*/
protected final int _columnIndex()
{
int ix = _nextColumnByName;
if (ix < 0) { // if we had one, remove now
ix = _writer.nextColumnIndex();
}
return ix;
}
/**
* Method called when the current row is complete; typically
* will flush possibly buffered column values, append linefeed
* and reset state appropriately.
*/
protected void finishRow() throws JacksonException
{
_writer.endRow();
_nextColumnByName = -1;
_justFinishedRow = true;
}
protected void _handleFirstLine() throws JacksonException
{
_handleFirstLine = false;
if (_schema.usesHeader()) {
int count = _schema.size();
if (count == 0) {
_reportCsvWriteError("Schema specified that header line is to be written; but contains no column names");
}
for (CsvSchema.Column column : _schema) {
_writer.writeColumnName(column.getName());
}
_writer.endRow();
}
}
protected void _addToArray(String value) {
if (_arrayElements > 0) {
_arrayContents.append(_arraySeparator);
}
++_arrayElements;
_arrayContents.append(value);
}
protected void _addToArray(char[] value) {
if (_arrayElements > 0) {
_arrayContents.append(_arraySeparator);
}
++_arrayElements;
_arrayContents.append(value);
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvMapper.java
================================================
package tools.jackson.dataformat.csv;
import java.util.Collection;
import java.util.Objects;
import tools.jackson.core.type.TypeReference;
import tools.jackson.databind.*;
import tools.jackson.databind.cfg.MapperBuilder;
import tools.jackson.databind.cfg.MapperBuilderState;
import tools.jackson.databind.introspect.AnnotatedMember;
import tools.jackson.databind.introspect.BeanPropertyDefinition;
import tools.jackson.databind.util.NameTransformer;
import tools.jackson.databind.util.SimpleLookupCache;
import tools.jackson.databind.util.ViewMatcher;
/**
* Specialized {@link ObjectMapper}, with extended functionality to
* produce {@link CsvSchema} instances out of POJOs.
*/
public class CsvMapper extends ObjectMapper
{
private static final long serialVersionUID = 1;
/**
* Base implementation for "Vanilla" {@link ObjectMapper}, used with
* CSV backend.
*
* @since 3.0
*/
public static class Builder extends MapperBuilder
{
public Builder(CsvFactory f) {
super(f);
}
public Builder(StateImpl state) {
super(state);
}
@Override
public CsvMapper build() {
return new CsvMapper(this);
}
@Override
protected MapperBuilderState _saveState() {
// nothing extra, just format features
return new StateImpl(this);
}
/*
/******************************************************************
/* Format features
/******************************************************************
*/
public Builder enable(CsvReadFeature... features) {
for (CsvReadFeature f : features) {
_formatReadFeatures |= f.getMask();
}
return this;
}
public Builder disable(CsvReadFeature... features) {
for (CsvReadFeature f : features) {
_formatReadFeatures &= ~f.getMask();
}
return this;
}
public Builder configure(CsvReadFeature feature, boolean state)
{
if (state) {
_formatReadFeatures |= feature.getMask();
} else {
_formatReadFeatures &= ~feature.getMask();
}
return this;
}
public Builder enable(CsvWriteFeature... features) {
for (CsvWriteFeature f : features) {
_formatWriteFeatures |= f.getMask();
}
return this;
}
public Builder disable(CsvWriteFeature... features) {
for (CsvWriteFeature f : features) {
_formatWriteFeatures &= ~f.getMask();
}
return this;
}
public Builder configure(CsvWriteFeature feature, boolean state)
{
if (state) {
_formatWriteFeatures |= feature.getMask();
} else {
_formatWriteFeatures &= ~feature.getMask();
}
return this;
}
protected static class StateImpl extends MapperBuilderState
implements java.io.Serializable // important!
{
private static final long serialVersionUID = 3L;
public StateImpl(Builder src) {
super(src);
}
// We also need actual instance of state as base class can not implement logic
// for reinstating mapper (via mapper builder) from state.
@Override
protected Object readResolve() {
return new Builder(this).build();
}
}
}
/*
/**********************************************************************
/* Caching of schemas
/**********************************************************************
*/
/**
* Simple caching for schema instances, given that they are relatively expensive
* to construct; this one is for "loose" (non-typed) schemas
*/
protected final SimpleLookupCache _untypedSchemas;
/**
* Simple caching for schema instances, given that they are relatively expensive
* to construct; this one is for typed schemas
*/
protected final SimpleLookupCache _typedSchemas;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public CsvMapper() {
this(new Builder(new CsvFactory()));
}
public CsvMapper(CsvFactory f) {
this(new Builder(f));
}
/**
* @since 3.0
*/
public CsvMapper(CsvMapper.Builder b) {
super(b);
_untypedSchemas = new SimpleLookupCache<>(8,32);
_typedSchemas = new SimpleLookupCache<>(8,32);
}
public static CsvMapper.Builder builder() {
return new CsvMapper.Builder(new CsvFactory());
}
public static CsvMapper.Builder builder(CsvFactory streamFactory) {
return new CsvMapper.Builder(streamFactory);
}
@SuppressWarnings("unchecked")
@Override
public Builder rebuild() {
return new Builder((Builder.StateImpl) _savedBuilderState);
}
/*
/**********************************************************************
/* Life-cycle, shared "vanilla" (default configuration) instance
/**********************************************************************
*/
/**
* Accessor method for getting globally shared "default" {@link CsvMapper}
* instance: one that has default configuration, no modules registered, no
* config overrides. Usable mostly when dealing "untyped" or Tree-style
* content reading and writing.
*/
public static CsvMapper shared() {
return SharedWrapper.wrapped();
}
/*
/**********************************************************************
/* Life-cycle: JDK serialization support
/**********************************************************************
*/
// 27-Feb-2018, tatu: Not sure why but it seems base class definitions
// are not sufficient alone; sub-classes must re-define.
@Override
protected Object writeReplace() {
return _savedBuilderState;
}
@Override
protected Object readResolve() {
throw new IllegalStateException("Should never deserialize `"+getClass().getName()+"` directly");
}
/*
/**********************************************************************
/* Additional typed accessors
/**********************************************************************
*/
/**
* Overridden with more specific type, since factory we have
* is always of type {@link CsvFactory}
*/
@Override
public CsvFactory tokenStreamFactory() {
return (CsvFactory) _streamFactory;
}
/*
/**********************************************************************
/* Format-specific
/**********************************************************************
*/
public boolean isEnabled(CsvReadFeature f) {
return _deserializationConfig.hasFormatFeature(f);
}
public boolean isEnabled(CsvWriteFeature f) {
return _serializationConfig.hasFormatFeature(f);
}
/*
/**********************************************************************
/* Additional ObjectReader factory methods
/**********************************************************************
*/
/**
* Convenience method which is functionally equivalent to:
*
* reader(pojoType).withSchema(schemaFor(pojoType));
*
* that is, constructs a {@link ObjectReader} which both binds to
* specified type and uses "loose" {@link CsvSchema} introspected from
* specified type (one without strict inferred typing).
*
* @param pojoType Type used both for data-binding (result type) and for
* schema introspection. NOTE: must NOT be an array or Collection type, since
* these only make sense for data-binding (like arrays of objects to bind),
* but not for schema construction (no CSV types can be mapped to arrays
* or Collections)
*/
public ObjectReader readerWithSchemaFor(Class> pojoType)
{
JavaType type = constructType(pojoType);
/* sanity check: not useful for structured types, since
* schema type will need to differ from data-bind type
*/
if (type.isArrayType() || type.isCollectionLikeType()) {
throw new IllegalArgumentException("Type can NOT be a Collection or array type");
}
return readerFor(type).with(schemaFor(type));
}
/**
* Convenience method which is functionally equivalent to:
*
* reader(pojoType).withSchema(typedSchemaFor(pojoType));
*
* that is, constructs a {@link ObjectReader} which both binds to
* specified type and uses "strict" {@link CsvSchema} introspected from
* specified type (one where typing is inferred).
*/
public ObjectReader readerWithTypedSchemaFor(Class> pojoType)
{
JavaType type = constructType(pojoType);
// sanity check: not useful for structured types, since
// schema type will need to differ from data-bind type
if (type.isArrayType() || type.isCollectionLikeType()) {
throw new IllegalArgumentException("Type can NOT be a Collection or array type");
}
return readerFor(type).with(typedSchemaFor(type));
}
/*
/**********************************************************************
/* Additional ObjectWriter factory methods
/**********************************************************************
*/
/**
* Convenience method which is functionally equivalent to:
*
* writer(pojoType).with(schemaFor(pojoType));
*
* that is, constructs a {@link ObjectWriter} which both binds to
* specified type and uses "loose" {@link CsvSchema} introspected from
* specified type (one without strict inferred typing).
*
* @param pojoType Type used both for data-binding (result type) and for
* schema introspection. NOTE: must NOT be an array or Collection type, since
* these only make sense for data-binding (like arrays of objects to bind),
* but not for schema construction (no root-level CSV types can be mapped to arrays
* or Collections)
*/
public ObjectWriter writerWithSchemaFor(Class> pojoType)
{
JavaType type = constructType(pojoType);
// sanity check as per javadoc above
if (type.isArrayType() || type.isCollectionLikeType()) {
throw new IllegalArgumentException("Type can NOT be a Collection or array type");
}
return writerFor(type).with(schemaFor(type));
}
/**
* Convenience method which is functionally equivalent to:
*
* writer(pojoType).with(typedSchemaFor(pojoType));
*
* that is, constructs a {@link ObjectWriter} which both binds to
* specified type and uses "strict" {@link CsvSchema} introspected from
* specified type (one where typing is inferred).
*/
public ObjectWriter writerWithTypedSchemaFor(Class> pojoType)
{
JavaType type = constructType(pojoType);
// sanity check as per javadoc above
if (type.isArrayType() || type.isCollectionLikeType()) {
throw new IllegalArgumentException("Type can NOT be a Collection or array type");
}
return writerFor(type).with(typedSchemaFor(type));
}
/*
/**********************************************************************
/* CsvSchema construction; overrides, new methods
/**********************************************************************
*/
/**
* Convenience method that is same as
*
* CsvSchema.emptySchema().withHeader();
*
* and returns a {@link CsvSchema} instance that uses default configuration
* with additional setting that the first content line contains intended
* column names.
*/
public CsvSchema schemaWithHeader() {
return CsvSchema.emptySchema().withHeader();
}
/**
* Convenience method that is same as
*
* CsvSchema.emptySchema()
*
* that is, returns an "empty" Schema; one with default values and no
* column definitions.
*/
public CsvSchema schema() {
return CsvSchema.emptySchema();
}
/**
* Method that can be used to determine a CSV schema to use for given
* POJO type, using default serialization settings including ordering.
* Definition will not be strictly typed (that is, all columns are
* just defined to be exposed as String tokens).
*
* NOTE: the column order in the resulting schema is determined by POJO
* property introspection order (which may differ from CSV column order).
* If the schema is used with
* {@link CsvSchema#withHeader() withHeader()} to read CSV data whose
* column order differs from the POJO property order, also enable
* {@link CsvSchema#withColumnReordering(boolean) withColumnReordering(true)}
* so that columns are matched by name rather than position.
*/
public CsvSchema schemaFor(JavaType pojoType) {
return _schemaFor(pojoType, _untypedSchemas, false, null);
}
public CsvSchema schemaForWithView(JavaType pojoType, Class> view) {
return _schemaFor(pojoType, _untypedSchemas, false, view);
}
public final CsvSchema schemaFor(Class> pojoType) {
return _schemaFor(constructType(pojoType), _untypedSchemas, false, null);
}
public final CsvSchema schemaForWithView(Class> pojoType, Class> view) {
return _schemaFor(constructType(pojoType), _untypedSchemas, false, view);
}
public final CsvSchema schemaFor(TypeReference> pojoTypeRef) {
return _schemaFor(constructType(pojoTypeRef.getType()), _untypedSchemas, false, null);
}
public final CsvSchema schemaForWithView(TypeReference> pojoTypeRef, Class> view) {
return _schemaFor(constructType(pojoTypeRef.getType()), _untypedSchemas, false, view);
}
/**
* Method that can be used to determine a CSV schema to use for given
* POJO type, using default serialization settings including ordering.
* Definition WILL be strictly typed: that is, code will try to
* determine type limitations which may make parsing more efficient
* (especially for numeric types like java.lang.Integer).
*
* NOTE: the column order in the resulting schema is determined by POJO
* property introspection order (which may differ from CSV column order).
* If the schema is used with
* {@link CsvSchema#withHeader() withHeader()} to read CSV data whose
* column order differs from the POJO property order, also enable
* {@link CsvSchema#withColumnReordering(boolean) withColumnReordering(true)}
* so that columns are matched by name rather than position.
*/
public CsvSchema typedSchemaFor(JavaType pojoType) {
return _schemaFor(pojoType, _typedSchemas, true, null);
}
public CsvSchema typedSchemaForWithView(JavaType pojoType, Class> view) {
return _schemaFor(pojoType, _typedSchemas, true, view);
}
public final CsvSchema typedSchemaFor(Class> pojoType) {
return _schemaFor(constructType(pojoType), _typedSchemas, true, null);
}
public final CsvSchema typedSchemaForWithView(Class> pojoType, Class> view) {
return _schemaFor(constructType(pojoType), _typedSchemas, true, view);
}
public final CsvSchema typedSchemaFor(TypeReference> pojoTypeRef) {
return _schemaFor(constructType(pojoTypeRef.getType()), _typedSchemas, true, null);
}
public final CsvSchema typedSchemaForWithView(TypeReference> pojoTypeRef, Class> view) {
return _schemaFor(constructType(pojoTypeRef.getType()), _typedSchemas, true, view);
}
/*
/**********************************************************************
/* Internal methods
/**********************************************************************
*/
protected CsvSchema _schemaFor(JavaType pojoType, SimpleLookupCache schemas,
boolean typed, Class> view)
{
final ViewKey viewKey = new ViewKey(pojoType, view);
CsvSchema s = schemas.get(viewKey);
if (s != null) {
return s;
}
// 15-Oct-2019, tatu: Since 3.0, need context for introspection
final SerializationContext ctxt = _serializationContext();
CsvSchema.Builder builder = CsvSchema.builder();
_addSchemaProperties(ctxt, builder, typed, pojoType, null, view);
CsvSchema result = builder.build();
schemas.put(viewKey, result);
return result;
}
protected boolean _nonPojoType(JavaType t)
{
if (t.isPrimitive() || t.isEnumType()) {
return true;
}
Class> raw = t.getRawClass();
// Wrapper types for numbers
if (Number.class.isAssignableFrom(raw)) {
if ((raw == Byte.class)
|| (raw == Short.class)
|| (raw == Character.class)
|| (raw == Integer.class)
|| (raw == Long.class)
|| (raw == Float.class)
|| (raw == Double.class)
) {
return true;
}
}
// Some other well-known non-POJO types
if ((raw == Boolean.class)
|| (raw == String.class)
) {
return true;
}
return false;
}
protected void _addSchemaProperties(SerializationContext ctxt, CsvSchema.Builder builder,
boolean typed, JavaType pojoType, NameTransformer unwrapper, Class> view)
{
// 09-Aug-2015, tatu: From [dataformat-csv#87], realized that one can not have
// real schemas for primitive/wrapper
if (_nonPojoType(pojoType)) {
return;
}
BeanDescription beanDesc = ctxt.introspectBeanDescription(pojoType);
final AnnotationIntrospector intr = ctxt.getAnnotationIntrospector();
final boolean includeByDefault = isEnabled(MapperFeature.DEFAULT_VIEW_INCLUSION);
for (BeanPropertyDefinition prop : beanDesc.findProperties()) {
if (view != null) {
Class>[] views = prop.findViews();
if (views == null) {
views = beanDesc.findDefaultViews();
}
// If property defines no Views AND non-view-enabled included by default,
// should include
if ((views == null) && includeByDefault) {
;
} else if (!ViewMatcher.construct(views).isVisibleForView(view)) {
continue;
}
}
// ignore setter-only properties:
if (!prop.couldSerialize()) {
continue;
}
// [dataformat-csv#15]: handle unwrapped props
AnnotatedMember m = prop.getPrimaryMember();
if (m != null) {
NameTransformer nextUnwrapper = intr.findUnwrappingNameTransformer(ctxt.getConfig(),
prop.getPrimaryMember());
if (nextUnwrapper != null) {
if (unwrapper != null) {
nextUnwrapper = NameTransformer.chainedTransformer(unwrapper, nextUnwrapper);
}
JavaType nextType = m.getType();
_addSchemaProperties(ctxt, builder, typed, nextType, nextUnwrapper, view);
continue;
}
}
// Then name wrapping/unwrapping
String name = prop.getName();
if (unwrapper != null) {
name = unwrapper.transform(name);
}
if (typed && m != null) {
builder.addColumn(name, _determineType(m.getRawType()));
} else {
builder.addColumn(name);
}
}
}
// should not be null since couldSerialize() returned true, so:
protected CsvSchema.ColumnType _determineType(Class> propType)
{
// very first thing: arrays
if (propType.isArray()) {
// one exception; byte[] assumed to come in as Base64 encoded
if (propType == byte[].class) {
return CsvSchema.ColumnType.STRING;
}
return CsvSchema.ColumnType.ARRAY;
}
// First let's check certain cases that ought to be just presented as Strings...
if (propType == String.class
|| propType == Character.TYPE
|| propType == Character.class) {
return CsvSchema.ColumnType.STRING;
}
if (propType == Boolean.class
|| propType == Boolean.TYPE) {
return CsvSchema.ColumnType.BOOLEAN;
}
// all primitive types are good for NUMBER, since 'char', 'boolean' handled above
if (propType.isPrimitive()) {
return CsvSchema.ColumnType.NUMBER;
}
if (Number.class.isAssignableFrom(propType)) {
return CsvSchema.ColumnType.NUMBER;
}
if (Collection.class.isAssignableFrom(propType)) { // since 2.5
return CsvSchema.ColumnType.ARRAY;
}
// but in general we will just do what we can:
return CsvSchema.ColumnType.NUMBER_OR_STRING;
}
/*
/**********************************************************************
/* Helper classes
/**********************************************************************
*/
/**
* Simple class in order to create a map key based on {@link JavaType} and a given view.
* Used for caching associated schemas in {@code _untypedSchemas} and {@code _typedSchemas}.
*/
public static final class ViewKey
implements java.io.Serializable
{
private static final long serialVersionUID = 1L;
private final JavaType _pojoType;
private final Class> _view;
private final int _hashCode;
public ViewKey(final JavaType pojoType, final Class> view)
{
_pojoType = pojoType;
_view = view;
_hashCode = Objects.hash(pojoType, view);
}
@Override
public int hashCode() { return _hashCode; }
@Override
public boolean equals(final Object o)
{
if (o == this) { return true; }
if (o == null || o.getClass() != getClass()) { return false; }
final ViewKey other = (ViewKey) o;
if (_hashCode != other._hashCode || _view != other._view) { return false; }
return Objects.equals(_pojoType, other._pojoType);
}
@Override
public String toString()
{
String viewName = _view != null ? _view.getName() : null;
return "[ViewKey: pojoType=" + _pojoType + ", view=" + viewName + "]";
}
}
/**
* Helper class to contain dynamically constructed "shared" instance of
* mapper, should one be needed via {@link #shared}.
*/
private final static class SharedWrapper {
private final static CsvMapper MAPPER = CsvMapper.builder().build();
public static CsvMapper wrapped() { return MAPPER; }
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvParser.java
================================================
package tools.jackson.dataformat.csv;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.Set;
import tools.jackson.core.*;
import tools.jackson.core.base.ParserMinimalBase;
import tools.jackson.core.exc.StreamReadException;
import tools.jackson.core.io.IOContext;
import tools.jackson.core.json.DupDetector;
import tools.jackson.core.util.ByteArrayBuilder;
import tools.jackson.core.util.JacksonFeatureSet;
import tools.jackson.core.util.SimpleStreamReadContext;
import tools.jackson.core.util.TextBuffer;
import tools.jackson.dataformat.csv.impl.CsvDecoder;
/**
* {@link JsonParser} implementation used to expose CSV documents
* in form that allows other Jackson functionality to deal
* with it.
*
* Implementation is based on a state-machine that pulls information
* using {@link CsvDecoder}.
*/
public class CsvParser
extends ParserMinimalBase
{
// Just to protect against bugs, DoS, limit number of column defs we may read
private final static int MAX_COLUMNS = 99999;
private final static CsvSchema EMPTY_SCHEMA;
static {
EMPTY_SCHEMA = CsvSchema.emptySchema();
}
/**
* CSV is slightly different from defaults, having essentially untyped
* scalars except if indicated by schema
*
* @since 2.12
*/
protected final static JacksonFeatureSet STREAM_READ_CAPABILITIES =
DEFAULT_READ_CAPABILITIES
.with(StreamReadCapability.UNTYPED_SCALARS)
;
/*
/**********************************************************************
/* State constants
/**********************************************************************
*/
/**
* Initial state before anything is read from document.
*/
protected final static int STATE_DOC_START = 0;
/**
* State before logical start of a record, in which next
* token to return will be {@link JsonToken#START_OBJECT}
* (or if no Schema is provided, {@link JsonToken#START_ARRAY}).
*/
protected final static int STATE_RECORD_START = 1;
/**
* State in which next entry will be available, returning
* either {@link JsonToken#PROPERTY_NAME} or value
* (depending on whether entries are expressed as
* Objects or just Arrays); or
* matching close marker.
*/
protected final static int STATE_NEXT_ENTRY = 2;
/**
* State in which value matching property name will
* be returned.
*/
protected final static int STATE_NAMED_VALUE = 3;
/**
* State in which "unnamed" value (entry in an array)
* will be returned, if one available; otherwise
* end-array is returned.
*/
protected final static int STATE_UNNAMED_VALUE = 4;
/**
* State in which a column value has been determined to be of
* an array type, and will need to be split into multiple
* values. This can currently only occur for named values.
*/
protected final static int STATE_IN_ARRAY = 5;
/**
* State in which we have encountered more column values than there should be,
* and need to basically skip extra values if callers tries to advance parser
* state.
*/
protected final static int STATE_SKIP_EXTRA_COLUMNS = 6;
/**
* State in which we should expose name token for a "missing column"
* (for which placeholder `null` value is to be added as well);
* see {@link CsvReadFeature#INSERT_NULLS_FOR_MISSING_COLUMNS} for details.
*/
protected final static int STATE_MISSING_NAME = 7;
/**
* State in which we should expose `null` value token as a value for
* "missing" column;
* see {@link CsvReadFeature#INSERT_NULLS_FOR_MISSING_COLUMNS} for details.
*/
protected final static int STATE_MISSING_VALUE = 8;
/**
* State in which end marker is returned; either
* null (if no array wrapping), or
* {@link JsonToken#END_ARRAY} for wrapping.
* This step will loop, returning series of nulls
* if {@link #nextToken} is called multiple times.
*/
protected final static int STATE_DOC_END = 9;
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected int _formatFeatures;
/**
* Definition of columns being read.
*/
protected CsvSchema _schema;
/**
* Number of columns defined by schema.
*/
protected int _columnCount = 0;
protected final boolean _cfgEmptyStringAsNull;
/**
* @since 2.18
*/
protected final boolean _cfgEmptyUnquotedStringAsNull;
/**
* @since 3.1
*/
protected final boolean _cfgOnlyUnquotedNullValuesAsNull;
/**
* @since 3.2
*/
protected final boolean _cfgEmptyStringAsMissing;
/*
/**********************************************************************
/* State
/**********************************************************************
*/
/**
* Information about parser context, context in which
* the next token is to be parsed (root, array, object).
*/
protected SimpleStreamReadContext _streamReadContext;
/**
* Name of column that we exposed most recently, accessible after
* {@link JsonToken#PROPERTY_NAME} as well as value tokens immediately
* following property name.
*/
protected String _currentName;
/**
* String value for the current column, if accessed.
*/
protected String _currentValue;
/**
* Index of the column we are exposing
*/
protected int _columnIndex;
/**
* Current logical state of the parser; one of STATE_
* constants.
*/
protected int _state = STATE_DOC_START;
/**
* We will hold on to decoded binary data, for duration of
* current event, so that multiple calls to
* {@link #getBinaryValue} will not need to decode data more
* than once.
*/
protected byte[] _binaryValue;
/**
* Pointer to the first character of the next array value to return.
*/
protected int _arrayValueStart;
/**
* Contents of the cell, to be split into distinct array values.
*/
protected String _arrayValue;
protected String _arraySeparator;
protected String _nullValue;
/*
/**********************************************************************
/* Helper objects
/**********************************************************************
*/
/**
* Thing that actually reads the CSV content
*/
protected final CsvDecoder _reader;
/**
* Buffer that contains contents of all values after processing
* of doubled-quotes, escaped characters.
*/
protected final TextBuffer _textBuffer;
protected ByteArrayBuilder _byteArrayBuilder;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public CsvParser(ObjectReadContext readCtxt, IOContext ioCtxt,
int stdFeatures, int csvFeatures, CsvSchema schema,
Reader reader)
{
super(readCtxt, ioCtxt, stdFeatures);
if (reader == null) {
throw new IllegalArgumentException("Can not pass `null` as `java.io.Reader` to read from");
}
_formatFeatures = csvFeatures;
DupDetector dups = StreamReadFeature.STRICT_DUPLICATE_DETECTION.enabledIn(stdFeatures)
? DupDetector.rootDetector(this) : null;
_streamReadContext = SimpleStreamReadContext.createRootContext(dups);
_textBuffer = ioCtxt.constructReadConstrainedTextBuffer();
_reader = new CsvDecoder(ioCtxt, this, reader, schema, _textBuffer,
stdFeatures, csvFeatures);
_setSchema(schema);
_cfgEmptyStringAsNull = CsvReadFeature.EMPTY_STRING_AS_NULL.enabledIn(csvFeatures);
_cfgEmptyUnquotedStringAsNull = CsvReadFeature.EMPTY_UNQUOTED_STRING_AS_NULL.enabledIn(csvFeatures);
_cfgOnlyUnquotedNullValuesAsNull = CsvReadFeature.ONLY_UNQUOTED_NULL_VALUES_AS_NULL.enabledIn(csvFeatures);
_cfgEmptyStringAsMissing = CsvReadFeature.EMPTY_UNQUOTED_STRING_AS_MISSING.enabledIn(csvFeatures);
}
/*
/**********************************************************************
/* Versioned
/**********************************************************************
*/
@Override
public Version version() {
return PackageVersion.VERSION;
}
/*
/**********************************************************************
/* Overrides: capability introspection methods
/**********************************************************************
*/
@Override
public boolean canReadObjectId() { return false; }
@Override
public boolean canReadTypeId() { return false; }
// @since 3.2
@Override
public boolean willInternPropertyNames() { return false; }
@Override
public JacksonFeatureSet streamReadCapabilities() {
return STREAM_READ_CAPABILITIES;
}
/*
/**********************************************************************
/* Overridden methods
/**********************************************************************
*/
@Override
public int releaseBuffered(Writer out) {
try {
return _reader.releaseBuffered(out);
} catch (IOException e) {
throw _wrapIOFailure(e);
}
}
// Default close() works fine
//
// public void close() throws JacksonException
@Override
protected void _closeInput() throws IOException {
_reader.close();
}
@Override
protected void _releaseBuffers() { }
/*
/**********************************************************************
/* Public API, configuration
/**********************************************************************
*/
// [dataformats-text#604]: remove these 3 methods from 3.1
/*
public JsonParser enable(CsvReadFeature f)
{
_formatFeatures |= f.getMask();
_cfgEmptyStringAsNull = CsvReadFeature.EMPTY_STRING_AS_NULL.enabledIn(_formatFeatures);
_cfgEmptyUnquotedStringAsNull = CsvReadFeature.EMPTY_UNQUOTED_STRING_AS_NULL.enabledIn(_formatFeatures);
_cfgOnlyUnquotedNullValuesAsNull = CsvReadFeature.ONLY_UNQUOTED_NULL_VALUES_AS_NULL.enabledIn(_formatFeatures);
_cfgEmptyStringAsMissing = CsvReadFeature.EMPTY_STRING_AS_MISSING.enabledIn(_formatFeatures);
return this;
}
public JsonParser disable(CsvReadFeature f)
{
_formatFeatures &= ~f.getMask();
_cfgEmptyStringAsNull = CsvReadFeature.EMPTY_STRING_AS_NULL.enabledIn(_formatFeatures);
_cfgEmptyUnquotedStringAsNull = CsvReadFeature.EMPTY_UNQUOTED_STRING_AS_NULL.enabledIn(_formatFeatures);
_cfgOnlyUnquotedNullValuesAsNull = CsvReadFeature.ONLY_UNQUOTED_NULL_VALUES_AS_NULL.enabledIn(_formatFeatures);
_cfgEmptyStringAsMissing = CsvReadFeature.EMPTY_STRING_AS_MISSING.enabledIn(_formatFeatures);
return this;
}
public JsonParser configure(CsvReadFeature f, boolean state)
{
if (state) {
enable(f);
} else {
disable(f);
}
return this;
}
*/
/**
* Method for checking whether specified CSV {@link CsvReadFeature}
* is enabled.
*/
public boolean isEnabled(CsvReadFeature f) {
return (_formatFeatures & f.getMask()) != 0;
}
/**
* Accessor for getting active schema definition: it may be
* "empty" (no column definitions), but will never be null
* since it defaults to an empty schema (and default configuration)
*/
@Override
public CsvSchema getSchema() {
return _schema;
}
/*
/**********************************************************************
/* Location info
/**********************************************************************
*/
@Override
public TokenStreamContext streamReadContext() { return _streamReadContext; }
@Override public void assignCurrentValue(Object v) { _streamReadContext.assignCurrentValue(v); }
@Override public Object currentValue() { return _streamReadContext.currentValue(); }
@Override
public TokenStreamLocation currentTokenLocation() {
return _reader.getTokenLocation();
}
@Override
public TokenStreamLocation currentLocation() {
return _reader.getCurrentLocation();
}
@Override
public Object streamReadInputSource() {
return _reader.getInputSource();
}
/*
/**********************************************************************
/* Parsing, basic
/**********************************************************************
*/
/**
* We need to override this method to support coercion from basic
* String value into array, in cases where schema does not
* specify actual type.
*/
@Override
public boolean isExpectedStartArrayToken() {
if (_currToken == null) {
return false;
}
switch (_currToken.id()) {
case JsonTokenId.ID_PROPERTY_NAME:
case JsonTokenId.ID_START_OBJECT:
case JsonTokenId.ID_END_OBJECT:
case JsonTokenId.ID_END_ARRAY:
return false;
case JsonTokenId.ID_START_ARRAY:
return true;
}
// Otherwise: may coerce into array, iff we have essentially "untyped" column
if (_columnIndex < _columnCount) {
CsvSchema.Column column = _schema.column(_columnIndex);
if (column.getType() == CsvSchema.ColumnType.STRING) {
_startArray(column);
return true;
}
}
// 30-Dec-2014, tatu: Seems like it should be possible to allow this
// in non-array-wrapped case too (for 2.5), so let's try that:
else if (_currToken == JsonToken.VALUE_STRING) {
_startArray(CsvSchema.Column.PLACEHOLDER);
return true;
}
return false;
}
@Override // since 2.12
public boolean isExpectedNumberIntToken()
{
JsonToken t = _currToken;
if (t == JsonToken.VALUE_STRING) {
if (_reader.isExpectedNumberIntToken()) {
_updateToken(JsonToken.VALUE_NUMBER_INT);
return true;
}
return false;
}
return (t == JsonToken.VALUE_NUMBER_INT);
}
@Override
public String currentName() {
return _currentName;
}
@Override
public JsonToken nextToken() throws JacksonException
{
_binaryValue = null;
switch (_state) {
case STATE_DOC_START:
return _updateToken(_handleStartDoc());
case STATE_RECORD_START:
return _updateToken(_handleRecordStart());
case STATE_NEXT_ENTRY:
return _updateToken(_handleNextEntry());
case STATE_NAMED_VALUE:
return _updateToken(_handleNamedValue());
case STATE_UNNAMED_VALUE:
return _updateToken(_handleUnnamedValue());
case STATE_IN_ARRAY:
return _updateToken(_handleArrayValue());
case STATE_SKIP_EXTRA_COLUMNS:
// Need to just skip whatever remains
return _skipUntilEndOfLine();
case STATE_MISSING_NAME:
return _updateToken(_handleMissingName());
case STATE_MISSING_VALUE:
return _updateToken(_handleMissingValue());
case STATE_DOC_END:
try {
_reader.close();
} catch (IOException e) {
throw _wrapIOFailure(e);
}
if (_streamReadContext.inRoot()) {
return null;
}
// should always be in array, actually... but:
boolean inArray = _streamReadContext.inArray();
_streamReadContext = _streamReadContext.clearAndGetParent();
return inArray ? JsonToken.END_ARRAY : JsonToken.END_OBJECT;
default:
throw new IllegalStateException();
}
}
/*
/**********************************************************************
/* Parsing, optimized methods
/**********************************************************************
*/
@Override
public boolean nextName(SerializableString str) throws JacksonException
{
// Optimize for expected case of getting PROPERTY_NAME:
if (_state == STATE_NEXT_ENTRY) {
_binaryValue = null;
final JsonToken t = _updateToken(_handleNextEntry());
if (t == JsonToken.PROPERTY_NAME) {
return str.getValue().equals(_currentName);
}
return false;
}
// unlikely, but verify just in case
return (nextToken() == JsonToken.PROPERTY_NAME) && str.getValue().equals(currentName());
}
@Override
public String nextName() throws JacksonException
{
// Optimize for expected case of getting PROPERTY_NAME:
if (_state == STATE_NEXT_ENTRY) {
_binaryValue = null;
final JsonToken t = _updateToken(_handleNextEntry());
if (t == JsonToken.PROPERTY_NAME) {
return _currentName;
}
return null;
}
// unlikely, but verify just in case
return (nextToken() == JsonToken.PROPERTY_NAME) ? currentName() : null;
}
@Override
public String nextStringValue() throws JacksonException
{
_binaryValue = null;
JsonToken t;
if (_state == STATE_NAMED_VALUE) {
t = _updateToken(_handleNamedValue());
if (t == JsonToken.VALUE_STRING) {
return _currentValue;
}
} else if (_state == STATE_UNNAMED_VALUE) {
t = _updateToken(_handleUnnamedValue());
if (t == JsonToken.VALUE_STRING) {
return _currentValue;
}
} else {
t = nextToken();
if (t == JsonToken.VALUE_STRING) {
return getString();
}
}
return null;
}
/*
/**********************************************************************
/* Parsing, helper methods, regular
/**********************************************************************
*/
/**
* Method called to process the expected header line
*/
protected void _readHeaderLine() throws JacksonException {
/*
When the header line is present and the settings ask for it
to be processed, two different options are possible:
a) The schema has been populated. In this case, build a new
schema where the order matches the *actual* order in which
the given CSV file offers its columns, if _schema.reordersColumns()
is set to true; there are cases where the consumer of the CSV file
knows about the columns but not necessarily the order in
which they are defined.
b) The schema has not been populated. In this case, build a
default schema based on the columns found in the header.
*/
final int schemaColumnCount = _schema.size();
if (schemaColumnCount > 0 && !_schema.reordersColumns()) {
if (_schema.strictHeaders()) {
String name;
int ix = 0;
for (CsvSchema.Column column : _schema._columns) {
name = _reader.nextString();
++ix;
if (name == null) {
_reportError(String.format("Missing header column #%d, expecting \"%s\"", ix, column.getName()));
} else {
// [dataformats-text#634]: validate header name length
_streamReadConstraints.validateNameLength(name.length());
if (!column.getName().equals(name)) {
_reportError(String.format(
"Mismatched header column #%d: expected \"%s\", actual \"%s\"", ix, column.getName(), name));
}
}
}
if ((name = _reader.nextString()) != null) {
_reportError(String.format("Extra header column \"%s\"", name));
}
} else {
int allowed = MAX_COLUMNS;
String name;
while ((name = _reader.nextString()) != null) {
// [dataformats-text#634]: validate header name length
_streamReadConstraints.validateNameLength(name.length());
// If we don't care about validation, just skip. But protect against infinite loop
if (--allowed < 0) {
_reportError("Internal error: skipped "+MAX_COLUMNS+" header columns");
}
}
}
return;
}
// either the schema is empty or reorder columns flag is set
String name;
CsvSchema.Builder builder = _schema.rebuild().clearColumns();
int count = 0;
// [dataformats-text#327]: Optionally check for duplicate column names
final boolean failOnDupHeaders = CsvReadFeature.FAIL_ON_DUPLICATE_HEADER_COLUMNS.enabledIn(_formatFeatures);
final Set seenNames = failOnDupHeaders ? new HashSet<>() : null;
// [dataformats-text#657]: Optionally match header names case-insensitively
final boolean caseInsensitive = CsvReadFeature.CASE_INSENSITIVE_HEADERS.enabledIn(_formatFeatures);
final boolean trimHeaderNames = CsvReadFeature.TRIM_HEADER_SPACES.enabledIn(_formatFeatures);
while ((name = _reader.nextString()) != null) {
// one more thing: always trim names, regardless of config settings
// [dataformats-text#31]: Allow disabling of trimming
if (trimHeaderNames) {
name = name.trim();
}
// [dataformats-text#634]: validate header name length
_streamReadConstraints.validateNameLength(name.length());
// [dataformats-text#327]: check for duplicate column names
if (failOnDupHeaders && !seenNames.add(name)) {
_reportCsvReadError(String.format(
"Duplicate header column \"%s\"", name));
}
// See if "old" schema defined type; if so, use that type...
// [dataformats-text#657]: optionally use case-insensitive lookup
CsvSchema.Column prev = caseInsensitive
? _schema.columnIgnoreCase(name)
: _schema.column(name);
if (prev != null) {
builder.addColumn(prev.getName(), prev.getType());
} else {
builder.addColumn(name);
}
if (++count > MAX_COLUMNS) {
_reportError("Internal error: reached maximum of "+MAX_COLUMNS+" header columns");
}
}
// [dataformats-text#204]: Drop trailing empty name if so instructed
if (CsvReadFeature.ALLOW_TRAILING_COMMA.enabledIn(_formatFeatures)) {
builder.dropLastColumnIfEmpty();
}
// Ok: did we get any columns?
CsvSchema newSchema = builder.build();
int newColumnCount = newSchema.size();
if (newColumnCount < 2) { // 1 just because we may get 'empty' header name
String first = (newColumnCount == 0) ? "" : newSchema.columnName(0).trim();
if (first.isEmpty()) {
_reportCsvReadError("Empty header line: can not bind data");
}
}
// [dataformats-text#285]: Are we missing something?
if (CsvReadFeature.FAIL_ON_MISSING_HEADER_COLUMNS.enabledIn(_formatFeatures)) {
Set oldColumnNames = new LinkedHashSet<>();
_schema.getColumnNames(oldColumnNames);
oldColumnNames.removeAll(newSchema.getColumnNames());
int diff = oldColumnNames.size();
if (diff > 0) {
_reportCsvReadError(String.format("Missing %d header column%s: [\"%s\"]",
diff, (diff == 1) ? "" : "s", String.join("\",\"", oldColumnNames)
));
}
}
// otherwise we will use what we got
_setSchema(builder.build());
}
/**
* Method called to handle details of initializing things to return
* the very first token.
*/
protected JsonToken _handleStartDoc() throws JacksonException
{
// also, if comments enabled, or skip empty lines, may need to skip leading ones
_reader.skipLinesWhenNeeded();
// First things first: are we expecting header line? If so, read, process
if (_schema.usesHeader()) {
_readHeaderLine();
_reader.skipLinesWhenNeeded();
}
// and if we are to skip the first data line, skip it
if (_schema.skipsFirstDataRow()) {
_reader.skipLine();
_reader.skipLinesWhenNeeded();
}
// Only one real complication, actually; empty documents (zero bytes).
// Those have no entries. Should be easy enough to detect like so:
final boolean wrapAsArray = CsvReadFeature.WRAP_AS_ARRAY.enabledIn(_formatFeatures);
if (!_reader.hasMoreInput()) {
_state = STATE_DOC_END;
// but even empty sequence must still be wrapped in logical array
if (wrapAsArray) {
_streamReadContext = _reader.childArrayContext(_streamReadContext);
return JsonToken.START_ARRAY;
}
return null;
}
if (wrapAsArray) {
_streamReadContext = _reader.childArrayContext(_streamReadContext);
_state = STATE_RECORD_START;
return JsonToken.START_ARRAY;
}
// otherwise, same as regular new entry...
return _handleRecordStart();
}
protected JsonToken _handleRecordStart() throws JacksonException
{
_columnIndex = 0;
if (_columnCount == 0) { // no schema; exposed as an array
_state = STATE_UNNAMED_VALUE;
_streamReadContext = _reader.childArrayContext(_streamReadContext);
return JsonToken.START_ARRAY;
}
// otherwise, exposed as an Object
_streamReadContext = _reader.childObjectContext(_streamReadContext);
_state = STATE_NEXT_ENTRY;
return JsonToken.START_OBJECT;
}
protected JsonToken _handleNextEntry() throws JacksonException
{
// NOTE: only called when we do have real Schema
String next;
// [dataformats-text#355]: loop to skip empty unquoted values when
// EMPTY_STRING_AS_MISSING enabled (avoids recursion for consecutive empty cells)
while (true) {
try {
next = _reader.nextString();
} catch (RuntimeException e) {
// 12-Oct-2015, tatu: Need to resync here as well...
_state = STATE_SKIP_EXTRA_COLUMNS;
throw e;
}
if (next == null) { // end of record or input...
// 16-Mar-2017, tatu: [dataformat-csv#137] Missing column(s)?
if (_columnIndex < _columnCount) {
return _handleMissingColumns();
}
return _handleObjectRowEnd();
}
if (_columnIndex >= _columnCount) {
_currentValue = next;
return _handleExtraColumn(next);
}
// [dataformats-text#355]: skip empty unquoted values when EMPTY_STRING_AS_MISSING enabled
if (_cfgEmptyStringAsMissing
&& next.isEmpty()
&& !_reader.isCurrentTokenQuoted()) {
++_columnIndex;
continue;
}
break;
}
final CsvSchema.Column column = _schema.column(_columnIndex);
_state = STATE_NAMED_VALUE;
_currentName = column.getName();
// 25-Aug-2024, tatu: [dataformats-text#442] May have value decorator
CsvValueDecorator dec = column.getValueDecorator();
if (dec == null) {
_currentValue = next;
} else {
_currentValue = dec.undecorateValue(this, next);
}
return JsonToken.PROPERTY_NAME;
}
protected JsonToken _handleNamedValue() throws JacksonException
{
// 06-Oct-2015, tatu: During recovery, may get past all regular columns,
// but we also need to allow access past... sort of.
if (_columnIndex < _columnCount) {
CsvSchema.Column column = _schema.column(_columnIndex);
++_columnIndex;
if (column.isArray()) {
_startArray(column);
return JsonToken.START_ARRAY;
}
}
_state = STATE_NEXT_ENTRY;
if (_isNullValue(_currentValue)) {
return JsonToken.VALUE_NULL;
}
return JsonToken.VALUE_STRING;
}
protected JsonToken _handleUnnamedValue() throws JacksonException
{
String next = _reader.nextString();
if (next == null) { // end of record or input...
_streamReadContext = _streamReadContext.clearAndGetParent();
if (!_reader.startNewLine()) { // end of whole thing...
_state = STATE_DOC_END;
} else {
// no, just end of record
_state = STATE_RECORD_START;
}
return JsonToken.END_ARRAY;
}
// state remains the same
_currentValue = next;
++_columnIndex;
if (_isNullValue(next)) {
return JsonToken.VALUE_NULL;
}
return JsonToken.VALUE_STRING;
}
protected JsonToken _handleArrayValue() throws JacksonException
{
int offset = _arrayValueStart;
if (offset < 0) { // just returned last value
_streamReadContext = _streamReadContext.clearAndGetParent();
// [dataformats-text#9]: for schema-less case (unnamed values),
// need to go back to unnamed value state, not named entry state
if (_columnCount == 0) {
_state = STATE_UNNAMED_VALUE;
} else {
_state = STATE_NEXT_ENTRY;
}
return JsonToken.END_ARRAY;
}
int end = _arrayValue.indexOf(_arraySeparator, offset);
if (end < 0) { // last value
_arrayValueStart = end; // end marker, regardless
// 11-Feb-2015, tatu: Tricky, As per [dataformat-csv#66]; empty Strings really
// should not emit any values. Not sure if trim
if (offset == 0) { // no separator
// for now, let's use trimming for checking
if (_arrayValue.isEmpty() || _arrayValue.trim().isEmpty()) {
_streamReadContext = _streamReadContext.clearAndGetParent();
_state = STATE_NEXT_ENTRY;
return JsonToken.END_ARRAY;
}
_currentValue = _arrayValue;
} else {
_currentValue = _arrayValue.substring(offset);
}
} else {
_currentValue = _arrayValue.substring(offset, end);
_arrayValueStart = end+_arraySeparator.length();
}
if (isEnabled(CsvReadFeature.TRIM_SPACES)) {
_currentValue = _currentValue.trim();
}
if (_isNullValue(_currentValue)) {
return JsonToken.VALUE_NULL;
}
return JsonToken.VALUE_STRING;
}
/*
/**********************************************************************
/* Parsing, helper methods, extra column(s)
/**********************************************************************
*/
/**
* Helper method called when an extraneous column value is found.
* What happens then depends on configuration, but there are three
* main choices: ignore value (and rest of line); expose extra value
* as "any property" using configured name, or throw an exception.
*/
protected JsonToken _handleExtraColumn(String value) throws JacksonException
{
// If "any properties" enabled, expose as such
String anyProp = _schema.getAnyPropertyName();
if (anyProp != null) {
_currentName = anyProp;
_state = STATE_NAMED_VALUE;
return JsonToken.PROPERTY_NAME;
}
_currentName = null;
// With [dataformat-csv#95] we'll simply ignore extra
if (CsvReadFeature.IGNORE_TRAILING_UNMAPPABLE.enabledIn(_formatFeatures)) {
_state = STATE_SKIP_EXTRA_COLUMNS;
return _skipUntilEndOfLine();
}
// 14-Mar-2012, tatu: As per [dataformat-csv#1], let's allow one specific case
// of extra: if we get just one all-whitespace entry, that can be just skipped
_state = STATE_SKIP_EXTRA_COLUMNS;
if (_columnIndex == _columnCount && CsvReadFeature.ALLOW_TRAILING_COMMA.enabledIn(_formatFeatures)) {
value = value.trim();
if (value.isEmpty()) {
// if so, need to verify we then get the end-of-record;
// easiest to do by just calling ourselves again...
String next = _reader.nextString();
if (next == null) { // should end of record or input
return _handleObjectRowEnd();
}
}
}
// 21-May-2015, tatu: Need to enter recovery mode, to skip remainder of the line
return _reportCsvReadError("Too many entries: expected at most %d (value #%d (%d chars) \"%s\")",
_columnCount, _columnIndex, value.length(), value);
}
/*
/**********************************************************************
/* Parsing, helper methods, missing column(s)
/**********************************************************************
*/
/**
* Helper method called when end of row occurs before finding values for
* all schema-specified columns.
*/
protected JsonToken _handleMissingColumns() throws JacksonException
{
if (CsvReadFeature.FAIL_ON_MISSING_COLUMNS.enabledIn(_formatFeatures)) {
// First: to allow recovery, set states to expose next line, if any
_handleObjectRowEnd();
// and then report actual problem
return _reportCsvReadError("Not enough column values: expected %d, found %d",
_columnCount, _columnIndex);
}
if (CsvReadFeature.INSERT_NULLS_FOR_MISSING_COLUMNS.enabledIn(_formatFeatures)) {
_state = STATE_MISSING_VALUE;
_currentName = _schema.columnName(_columnIndex);
_currentValue = null;
return JsonToken.PROPERTY_NAME;
}
return _handleObjectRowEnd();
}
protected JsonToken _handleMissingName() throws JacksonException
{
if (++_columnIndex < _columnCount) {
_state = STATE_MISSING_VALUE;
_currentName = _schema.columnName(_columnIndex);
// _currentValue already set to null earlier
return JsonToken.PROPERTY_NAME;
}
return _handleObjectRowEnd();
}
protected JsonToken _handleMissingValue() throws JacksonException
{
_state = STATE_MISSING_NAME;
return JsonToken.VALUE_NULL;
}
/*
/**********************************************************************
/* Parsing, helper methods: row end handling, recover
/**********************************************************************
*/
/**
* Helper method called to handle details of state update when end of logical
* record occurs.
*/
protected final JsonToken _handleObjectRowEnd() throws JacksonException
{
_streamReadContext = _streamReadContext.clearAndGetParent();
if (!_reader.startNewLine()) {
_state = STATE_DOC_END;
} else {
_state = STATE_RECORD_START;
}
return JsonToken.END_OBJECT;
}
protected final JsonToken _skipUntilEndOfLine() throws JacksonException
{
while (_reader.nextString() != null) { }
// But once we hit the end of the logical line, get out
// NOTE: seems like we should always be within Object, but let's be conservative
// and check just in case
_streamReadContext = _streamReadContext.clearAndGetParent();
_state = _reader.startNewLine() ? STATE_RECORD_START : STATE_DOC_END;
return _updateToken(_streamReadContext.inArray()
? JsonToken.END_ARRAY : JsonToken.END_OBJECT);
}
/*
/**********************************************************************
/* String value handling
/**********************************************************************
*/
// For now we do not store char[] representation...
@Override
public boolean hasStringCharacters() {
if (_currToken == JsonToken.PROPERTY_NAME) {
return false;
}
return _textBuffer.hasTextAsCharacters();
}
@Override
public String getString() throws JacksonException {
if (_currToken == JsonToken.PROPERTY_NAME) {
return _currentName;
}
// 08-Sep-2020, tatu: Used to check for empty String wrt EMPTY_STRING_AS_NULL
// here, but now demoted to actual "nextToken()" handling
return _currentValue;
}
@Override
public char[] getStringCharacters() throws JacksonException {
if (_currToken == JsonToken.PROPERTY_NAME) {
return _currentName.toCharArray();
}
return _textBuffer.contentsAsArray();
}
@Override
public int getStringLength() throws JacksonException {
if (_currToken == JsonToken.PROPERTY_NAME) {
return _currentName.length();
}
return _textBuffer.size();
}
@Override
public int getStringOffset() throws JacksonException {
return 0;
}
@Override
public int getString(Writer w) throws JacksonException {
String value = (_currToken == JsonToken.PROPERTY_NAME) ?
_currentName : _currentValue;
if (value == null) {
return 0;
}
try {
w.write(value);
} catch (IOException e) {
throw _wrapIOFailure(e);
}
return value.length();
}
/*
/**********************************************************************
/* Binary (base64)
/**********************************************************************
*/
@Override
public Object getEmbeddedObject() throws JacksonException {
// in theory may access binary data using this method so...
return _binaryValue;
}
@SuppressWarnings("resource")
@Override
public byte[] getBinaryValue(Base64Variant variant) throws JacksonException
{
if (_binaryValue == null) {
if (_currToken != JsonToken.VALUE_STRING) {
_reportError("Current token (%s) not VALUE_STRING, can not access as binary", _currToken);
}
ByteArrayBuilder builder = _getByteArrayBuilder();
_decodeBase64(_currentValue, builder, variant);
_binaryValue = builder.toByteArray();
}
return _binaryValue;
}
/*
/**********************************************************************
/* Number accessors
/**********************************************************************
*/
@Override
public NumberType getNumberType() throws JacksonException {
// 31-May-2025, tatu: in 3.x no longer exception but null for non-number token
if (_currToken == JsonToken.VALUE_NUMBER_INT) {
return _reader.getNumberType();
}
return null;
}
@Override
public Number getNumberValue() throws JacksonException {
_verifyNumberToken();
return _reader.getNumberValue(false);
}
@Override
public Number getNumberValueExact() throws JacksonException {
_verifyNumberToken();
return _reader.getNumberValue(true);
}
@Override
public int getIntValue() throws JacksonException {
_verifyNumberToken();
return _reader.getIntValue();
}
@Override
public long getLongValue() throws JacksonException {
_verifyNumberToken();
return _reader.getLongValue();
}
@Override
public BigInteger getBigIntegerValue() throws JacksonException {
_verifyNumberToken();
return _reader.getBigIntegerValue();
}
@Override
public float getFloatValue() throws JacksonException {
_verifyNumberToken();
return _reader.getFloatValue();
}
@Override
public double getDoubleValue() throws JacksonException {
_verifyNumberToken();
return _reader.getDoubleValue();
}
@Override
public BigDecimal getDecimalValue() throws JacksonException {
_verifyNumberToken();
return _reader.getDecimalValue();
}
// not yet supported...
@Override
public boolean isNaN() {
return false;
}
/*
/**********************************************************************
/* Helper methods from base class
/**********************************************************************
*/
@Override
protected void _handleEOF() throws StreamReadException {
// I don't think there's problem with EOFs usually; except maybe in quoted stuff?
_reportInvalidEOF(": expected closing quote character", null);
}
/*
/**********************************************************************
/* Internal methods, error reporting
/**********************************************************************
*/
/**
* Method called when there is a problem related to mapping CSV columns
* to property names, i.e. is CSV-specific aspect
*/
public T _reportCsvReadError(String msg, Object... args) throws JacksonException {
if (args.length > 0) {
msg = String.format(msg, args);
}
throw CsvReadException.from(this, msg, _schema);
}
public T _reportUnexpectedCsvChar(int ch, String msg) throws JacksonException {
return super._reportUnexpectedChar(ch, msg);
}
@Override // just to make visible to decoder
public T _reportError(String msg) throws StreamReadException {
return super._reportError(msg);
}
@Override // just to make visible to decoder
public JacksonException _wrapIOFailure(IOException e) {
return super._wrapIOFailure(e);
}
protected void _verifyNumberToken() throws JacksonException {
if (_currToken != JsonToken.VALUE_NUMBER_INT) {
_reportNotNumericError();
}
}
protected T _reportNotNumericError() throws JacksonException {
_reportError("Current token (%s) not numeric, cannot use numeric value accessors",
_currToken);
return null;
}
/*
/**********************************************************************
/* Internal methods
/**********************************************************************
*/
protected void _setSchema(CsvSchema schema)
{
if (schema == null) {
_schema = EMPTY_SCHEMA;
} else {
_schema = schema;
_nullValue = _schema.getNullValueString();
}
_columnCount = _schema.size();
_reader.setSchema(_schema);
}
public ByteArrayBuilder _getByteArrayBuilder()
{
if (_byteArrayBuilder == null) {
_byteArrayBuilder = new ByteArrayBuilder();
} else {
_byteArrayBuilder.reset();
}
return _byteArrayBuilder;
}
protected void _startArray(CsvSchema.Column column)
{
_updateToken(JsonToken.START_ARRAY);
_streamReadContext = _streamReadContext.createChildArrayContext(_reader.getCurrentRow(),
_reader.getCurrentColumn());
_state = STATE_IN_ARRAY;
_arrayValueStart = 0;
_arrayValue = _currentValue;
String sep = column.getArrayElementSeparator();
if (sep.isEmpty()) {
sep = _schema.getArrayElementSeparator();
}
_arraySeparator = sep;
}
/**
* Helper method called to check whether specified String value should be considered
* "null" value, if so configured.
*
* @since 2.17.1
*/
protected boolean _isNullValue(String value) {
if (_nullValue != null) {
if (_nullValue.equals(value)) {
// [dataformats-text#601]: If `ONLY_UNQUOTED_NULL_VALUES_AS_NULL` is enabled,
// only treat unquoted values as null
return !_cfgOnlyUnquotedNullValuesAsNull || !_reader.isCurrentTokenQuoted();
}
}
if (value.isEmpty()) {
return _cfgEmptyStringAsNull
|| (_cfgEmptyUnquotedStringAsNull && !_reader.isCurrentTokenQuoted());
}
return false;
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvReadException.java
================================================
package tools.jackson.dataformat.csv;
import tools.jackson.core.exc.StreamReadException;
/**
* Format-specific exception used to indicate problems regarding low-level
* decoding/parsing issues specific to CSV content;
* usually problems with field-to-column mapping as defined by {@link CsvSchema}.
*
* In Jackson 2.x this type extends
* {@link tools.jackson.databind.DatabindException}, but for Jackson 3.0
* will become streaming-level exception
*/
public class CsvReadException
extends StreamReadException
{
private static final long serialVersionUID = 3L;
protected final CsvSchema _schema;
public CsvReadException(CsvParser p, String msg, CsvSchema schema) {
super(p, msg);
_schema = schema;
}
public static CsvReadException from(CsvParser p, String msg, CsvSchema schema) {
return new CsvReadException(p, msg, schema);
}
public CsvSchema getSchema() { return _schema; }
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvReadFeature.java
================================================
package tools.jackson.dataformat.csv;
import tools.jackson.core.FormatFeature;
/**
* Enumeration that defines all togglable features for CSV parsers
*
* NOTE: in Jackson 2.x this was named {@code CsvParser.Feature}.
*/
public enum CsvReadFeature
implements FormatFeature
{
/**
* Feature determines whether spaces around separator characters
* (commas) are to be automatically trimmed before being reported
* or not.
* Note that this does NOT force trimming of possible white space from
* within double-quoted values, but only those surrounding unquoted
* values (white space outside of double-quotes is never included regardless
* of trimming).
*
* Default value is false, as per RFC-4180.
*/
TRIM_SPACES(false),
/**
* Feature determines whether spaces around separator characters
* (commas) in header line entries (header names) are to be automatically
* trimmed before being reported or not.
* Note that this does NOT force trimming of possible white space from
* within double-quoted values, but only those surrounding unquoted
* values (white space outside of double-quotes is never included regardless
* of trimming).
*
* Default value is {@code true}.
*/
TRIM_HEADER_SPACES(true),
/**
* Feature that determines how stream of records (usually CSV lines, but sometimes
* multiple lines when line-feeds are included in quoted values) is exposed:
* either as a sequence of Objects (false), or as an Array of Objects (true).
* Using stream of Objects is convenient when using
* ObjectMapper.readValues(...)
* and array of Objects convenient when binding to Lists or
* arrays of values.
*
* Default value is false, meaning that by default a CSV document is exposed as
* a sequence of root-level Object entries.
*/
WRAP_AS_ARRAY(false),
/**
* Feature that allows ignoring of unmappable "extra" columns; that is, values for
* columns that appear after columns for which types are defined. When disabled,
* an exception is thrown for such column values, but if enabled, they are
* silently ignored.
*
* Feature is disabled by default.
*/
IGNORE_TRAILING_UNMAPPABLE(false),
/**
* Feature that allows skipping input lines that are completely empty or blank (composed only of whitespace),
* instead of being decoded as lines of just a single column with an empty/blank String value (or,
* depending on binding, `null`).
*
* Feature is disabled by default.
*/
SKIP_EMPTY_LINES(false),
/**
* Feature that allows there to be a trailing single extraneous data
* column that is empty. When this feature is disabled, any extraneous
* column, regardless of content will cause an exception to be thrown.
* Disabling this feature is only useful when
* IGNORE_TRAILING_UNMAPPABLE is also disabled.
*/
ALLOW_TRAILING_COMMA(true),
/**
* Feature that allows accepting "hash comments" by default, similar to
* {@link CsvSchema#withAllowComments(boolean)}. If enabled, such comments
* are by default allowed on all columns of all documents.
*/
ALLOW_COMMENTS(false),
/**
* Feature that allows failing (with a {@link CsvReadException}) in cases
* where number of column values encountered is less than number of columns
* declared in the active schema ("missing columns").
*
* Note that this feature has precedence over {@link #INSERT_NULLS_FOR_MISSING_COLUMNS}
*
* Feature is disabled by default.
*/
FAIL_ON_MISSING_COLUMNS(false),
/**
* Feature that allows failing (with a {@link CsvReadException}) in cases
* where number of header columns encountered is less than number of columns
* declared in the active schema (if there is one).
*
* Feature is enabled by default.
*/
FAIL_ON_MISSING_HEADER_COLUMNS(true),
/**
* Feature that allows "inserting" virtual key / `null` value pairs in case
* a row contains fewer columns than declared by configured schema.
* This typically has the effect of forcing an explicit `null` assigment (or
* corresponding "null value", if so configured) at databinding level.
* If disabled, no extra work is done and values for "missing" columns are
* not exposed as part of the token stream.
*
* Note that this feature is only considered if
* {@link #FAIL_ON_MISSING_COLUMNS}
* is disabled.
*
* Feature is disabled by default.
*/
INSERT_NULLS_FOR_MISSING_COLUMNS(false),
/**
* Feature that enables coercing an empty {@link String} (quoted or unquoted)
* to {@code null}.
*
* Note that if this setting is enabled, {@link #EMPTY_UNQUOTED_STRING_AS_NULL}
* has no effect.
*
* Feature is disabled by default for backwards compatibility.
*/
EMPTY_STRING_AS_NULL(false),
/**
* Feature that enables coercing an empty un-quoted {@link String} to {@code null}.
* This feature allow differentiating between an empty quoted {@link String} and an empty un-quoted {@link String}.
*
* Note that this feature is only considered if
* {@link #EMPTY_STRING_AS_NULL}
* is disabled.
*
* Feature is disabled by default for backwards compatibility.
*/
EMPTY_UNQUOTED_STRING_AS_NULL(false),
/**
* Feature that enables treating empty unquoted cell values as "missing",
* effectively suppressing the token pair (property name + value) for such cells.
* This means that if the target POJO field has a default value, it will be
* preserved instead of being overwritten with an empty String.
*
* This is different from {@link #EMPTY_STRING_AS_NULL} which coerces the value
* to {@code null}: this feature causes the value to not be included in the token
* stream at all, similar to how truly missing columns (row shorter than schema)
* are handled.
*
* Only applies to unquoted empty values; a quoted empty string ({@code ""}) is
* still reported normally.
*
* Feature is disabled by default for backwards compatibility.
*
* @since 3.2
*/
EMPTY_UNQUOTED_STRING_AS_MISSING(false),
/**
* Feature that enables treating only un-quoted values matching the configured
* "null value" String (see {@link CsvSchema#getNullValueString()}) as {@code null},
* but not quoted values:
* differentiating between a quoted null value String (like {@code "null"})
* which remains as a String, and an unquoted null value (like {@code null})
* which becomes {@code null}.
*
* This is similar to {@link #EMPTY_UNQUOTED_STRING_AS_NULL} but applies to the
* explicitly configured null value rather than empty strings.
*
* Note: This feature only has an effect if a null value is configured via
* {@link CsvSchema.Builder#setNullValue(String)}.
*
* Feature is disabled by default for backwards compatibility.
*
* @since 3.1
*/
ONLY_UNQUOTED_NULL_VALUES_AS_NULL(false),
/**
* Feature that allows skipping input rows that consist solely of column separator
* characters (for example, a line containing only {@code ,,} with the default
* comma separator).
* This is different from {@link #SKIP_EMPTY_LINES} which only skips lines that are
* completely empty or blank (whitespace only): this feature skips lines that
* contain only consecutive separator characters followed by a linefeed.
*
* Feature is disabled by default.
*
* @since 3.2
*/
SKIP_EMPTY_ROWS(false),
/**
* Feature that enables failing (with a {@link CsvReadException}) when
* duplicate column names are encountered in the header line.
*
* When enabled, parsing will fail if the header line contains two or more
* columns with the same name. When disabled, duplicates are allowed and
* the last column with a given name will be the one accessible by name
* (earlier columns with the same name are effectively hidden).
*
* Feature is enabled by default.
*
* @since 3.2
*/
FAIL_ON_DUPLICATE_HEADER_COLUMNS(true),
/**
* Feature that enables case-insensitive matching of header column names
* against schema column names. When enabled, a CSV header column named
* "TEMP_MAX" will match a schema column named "temp_max" (and vice versa).
*
* This is useful when used together with
* {@code MapperFeature.ACCEPT_CASE_INSENSITIVE_PROPERTIES} to allow
* case-insensitive header matching at the parser level, preventing
* {@link #FAIL_ON_MISSING_HEADER_COLUMNS} from incorrectly reporting
* columns as missing when they differ only by case.
*
* Feature is disabled by default.
*
* @since 3.2
*/
CASE_INSENSITIVE_HEADERS(false),
;
private final boolean _defaultState;
private final int _mask;
/**
* Method that calculates bit set (flags) of all features that
* are enabled by default.
*/
public static int collectDefaults()
{
int flags = 0;
for (CsvReadFeature f : values()) {
if (f.enabledByDefault()) {
flags |= f.getMask();
}
}
return flags;
}
private CsvReadFeature(boolean defaultState) {
_defaultState = defaultState;
_mask = (1 << ordinal());
}
@Override
public boolean enabledByDefault() { return _defaultState; }
@Override
public boolean enabledIn(int flags) { return (flags & _mask) != 0; }
@Override
public int getMask() { return _mask; }
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvSchema.java
================================================
package tools.jackson.dataformat.csv;
import java.util.*;
import java.util.function.UnaryOperator;
import tools.jackson.core.FormatSchema;
/**
* Simple {@link FormatSchema} sub-type that defines properties of
* a CSV document to read or write.
* Instances are thread-safe and immutable.
*
* Properties supported currently are:
*
* - {@code columns} (List of {@link Column}) [default: empty List]: Ordered list of columns (which may be empty, see below).
* Each column has name (mandatory) as well as type (optional; if not
* defined, defaults to "String").
* Note that
*
* - {@code useHeader} (boolean) [default: false]: whether the first line of physical document defines
* column names (true) or not (false): if enabled, parser will take
* first-line values to define column names; and generator will output
* column names as the first line
*
* - {@code quoteChar} (char) [default: double-quote ('")]: character used for quoting values
* that contain quote characters or linefeeds.
*
* - {@code columnSeparator} (char) [default: comma (',')]: character used to separate values.
* Other commonly used values include tab ('\t') and pipe ('|')
*
* - {@code arrayElementSeparator} (String) [default: semicolon (";")]: string used to separate array elements.
*
* - {@code lineSeparator} (String) [default: "\n"]: character used to separate data rows.
* Only used by generator; parser accepts three standard linefeeds ("\r", "\r\n", "\n").
*
* - {@code escapeChar} (int) [default: -1 meaning "none"]: character, if any, used to
* escape values. Most commonly defined as backslash ('\'). Only used by parser;
* generator only uses quoting, including doubling up of quotes to indicate quote char
* itself.
*
* - {@code skipFirstDataRow} (boolean) [default: false]: whether the first data line (either
* first line of the document, if useHeader=false, or second, if useHeader=true)
* should be completely ignored by parser. Needed to support CSV-like file formats
* that include additional non-data content before real data begins (specifically
* some database dumps do this)
*
* - {@code nullValue} (String) [default: "" (empty String)]: When asked to write Java `null`,
* this String value will be used instead.
* Null values will also be recognized during value reads.
*
* - {@code strictHeaders} (boolean) [default: false] (added in Jackson 2.7): whether names of
* columns defined in the schema MUST match with actual declaration from
* the header row (if header row handling enabled): if true, they must be and
* an exception if thrown if order differs: if false, no verification is performed.
*
* - {@code allowComments} (boolean) [default: false]: whether lines that start with character "#"
* are processed as comment lines and skipped/ignored.
*
* - {@code anyProperty} (String] [default: none]: if "any properties" (properties for
* 'extra' columns; ones not specified in schema) are enabled, they are mapped to
* this name: leaving it as {@code null} disables use of
* "any properties" (and they are either ignored, or an exception
* is thrown, depending on other settings); setting it to a non-null
* String value will expose all extra properties under one specified name.
* Most often used with Jackson {@code @JsonAnySetter} annotation.
*
* Note that schemas without any columns are legal, but if no columns
* are added, behavior of parser/generator is usually different and
* content will be exposed as logical Arrays instead of Objects.
*
* There are 4 ways to create CsvSchema instances:
*
* - Manually build one, using {@link Builder}
*
* - Modify existing schema (using
withXxx methods
* or {@link #rebuild} for creating {@link Builder})
*
* - Create schema based on a POJO definition (Class), using
* {@link CsvMapper} methods like {@link CsvMapper#schemaFor(java.lang.Class)}.
*
* - Request that {@link CsvParser} reads schema from the first line:
* enable "useHeader" property for the initial schema, and let parser
* read column names from the document itself.
*
*
*/
public class CsvSchema
implements FormatSchema,
Iterable,
java.io.Serializable
{
private static final long serialVersionUID = 3L;
/*
/**********************************************************************
/* Constants, feature flags
/**********************************************************************
*/
protected final static int ENCODING_FEATURE_USE_HEADER = 0x0001;
protected final static int ENCODING_FEATURE_SKIP_FIRST_DATA_ROW = 0x0002;
protected final static int ENCODING_FEATURE_ALLOW_COMMENTS = 0x0004;
protected final static int ENCODING_FEATURE_REORDER_COLUMNS = 0x0008;
protected final static int ENCODING_FEATURE_STRICT_HEADERS = 0x0010;
protected final static int DEFAULT_ENCODING_FEATURES = 0;
protected final static char[] NO_CHARS = new char[0];
/*
/**********************************************************************
/* Constants, default settings
/**********************************************************************
*/
/**
* Default separator for column values is comma (hence "Comma-Separated Values")
*/
public final static char DEFAULT_COLUMN_SEPARATOR = ',';
/**
* Default separator for array elements within a column value is
* semicolon.
*/
public final static String DEFAULT_ARRAY_ELEMENT_SEPARATOR = ";";
/**
* Marker for the case where no array element separator is used
*/
public final static String NO_ARRAY_ELEMENT_SEPARATOR = "";
/**
* By default no "any properties" (properties for 'extra' columns; ones
* not specified in schema) are used, so null is used as marker.
*/
public final static String DEFAULT_ANY_PROPERTY_NAME = null;
public final static char DEFAULT_QUOTE_CHAR = '"';
/**
* By default, nulls are written as empty Strings (""); and no coercion
* is performed from any String (higher level databind may, however,
* coerce Strings into Java nulls).
* To use automatic coercion on reading, null value must be set explicitly
* to empty String ("").
*/
public final static char[] DEFAULT_NULL_VALUE = null;
/**
* By default, no escape character is used -- this is denoted by
* int value that does not map to a valid character
*/
public final static int DEFAULT_ESCAPE_CHAR = -1;
public final static char[] DEFAULT_LINEFEED = "\n".toCharArray();
/*
/**********************************************************************
/* Constants, other
/**********************************************************************
*/
protected final static Column[] NO_COLUMNS = new Column[0];
/*
/**********************************************************************
/* Helper classes
/**********************************************************************
*/
/**
* Enumeration that defines optional type indicators that can be passed
* with schema. If used type is used to determine type of
* {@link tools.jackson.core.JsonToken}
* that column values are exposed as.
*/
public enum ColumnType
{
/**
* Default type if not explicitly defined; value will
* be presented as VALUE_STRING by parser,
* that is, no type-inference is performed, and value is
* not trimmed.
*
* Note that this type allows coercion into array, if higher
* level application calls
* {@link tools.jackson.core.JsonParser#isExpectedStartArrayToken},
* unlike more explicit types.
*/
STRING,
/**
* Value is considered to be a String, except that tokens
* "null", "true" and "false" are recognized as matching
* tokens and reported as such;
* and values are trimmed (leading/trailing white space)
*/
STRING_OR_LITERAL,
/**
* Value should be a number, but literals "null", "true" and "false"
* are also understood, and an empty String is considered null.
* Values are also trimmed (leading/trailing white space)
* Other non-numeric Strings may cause parsing exception.
*/
NUMBER,
/**
* Value is taken to be a number (if it matches valid JSON number
* formatting rules), literal (null, true or false) or String,
* depending on best match.
* Values are also trimmed (leading/trailing white space)
*/
NUMBER_OR_STRING,
/**
* Value is expected to be a boolean ("true", "false") String,
* or "null", or empty String (equivalent to null).
* Values are trimmed (leading/trailing white space).
* Values other than indicated above may result in an exception.
*/
BOOLEAN,
/**
* Value will be a multi-value sequence, separated by array element
* separator. Element type itself may be any scalar type (that is, number
* or String) and will not be optimized.
* Separator may be overridden on per-column basis.
*
* Note that this type is used for generic concept of multiple values, and
* not specifically to match Java arrays: data-binding may match such columns
* to {@link java.util.Collection}s as well, or even other types as necessary.
*/
ARRAY,
;
}
/**
* Representation of info for a single column
*/
public static class Column implements java.io.Serializable
{
private static final long serialVersionUID = 1L;
public final static Column PLACEHOLDER = new Column(0, "");
private final String _name;
private final int _index;
private final ColumnType _type;
private final String _arrayElementSeparator;
/**
* Value decorator used for this column, if any; {@code null} if none.
* Used to add decoration on serialization (writing) and remove decoration
* on deserialization (reading).
*/
private final CsvValueDecorator _valueDecorator;
/**
* Link to the next column within schema, if one exists;
* null for the last column.
*/
private final Column _next;
public Column(int index, String name) {
this(index, name, ColumnType.STRING, "");
}
public Column(int index, String name, ColumnType type) {
this(index, name, type, "");
}
public Column(int index, String name, ColumnType type, String arrayElementSep)
{
_index = index;
_name = name;
_type = type;
_arrayElementSeparator = _validArrayElementSeparator(arrayElementSep);
_valueDecorator = null;
_next = null;
}
public Column(Column src, Column next) {
this(src, src._index, src._valueDecorator, next);
}
protected Column(Column src, int index, Column next) {
this(src, index, src._valueDecorator, next);
}
protected Column(Column src, CsvValueDecorator valueDecorator) {
this(src, src._index, valueDecorator, src._next);
}
protected Column(Column src, int index, CsvValueDecorator valueDecorator,
Column next)
{
_index = index;
_name = src._name;
_type = src._type;
_arrayElementSeparator = src._arrayElementSeparator;
_valueDecorator = valueDecorator;
_next = next;
}
public Column withName(String newName) {
if (_name == newName) {
return this;
}
return new Column(_index, newName, _type, _arrayElementSeparator);
}
public Column withType(ColumnType newType) {
if (newType == _type) {
return this;
}
return new Column(_index, _name, newType, _arrayElementSeparator);
}
public Column withArrayElementSeparator(String separator) {
String sep = _validArrayElementSeparator(separator);
if (_arrayElementSeparator.equals(sep)) {
return this;
}
return new Column(_index, _name, _type, sep);
}
public Column withValueDecorator(CsvValueDecorator valueDecorator) {
if (valueDecorator == _valueDecorator) {
return this;
}
return new Column(this, valueDecorator);
}
public Column withNext(Column next) {
if (_next == next) {
return this;
}
return new Column(this, next);
}
public Column withNext(int index, Column next) {
if ((_index == index) && (_next == next)) {
return this;
}
return new Column(this, index, next);
}
public int getIndex() { return _index; }
public String getName() { return _name; }
public ColumnType getType() { return _type; }
public Column getNext() { return _next; }
/**
* Access that returns same as {@link #getNext} iff name of that
* column is same as given name
*/
public Column getNextWithName(String name) {
if (_next != null && name.equals(_next._name)) {
return _next;
}
return null;
}
public boolean hasName(String n) {
return (_name == n) || _name.equals(n);
}
public String getArrayElementSeparator() { return _arrayElementSeparator; }
public CsvValueDecorator getValueDecorator() { return _valueDecorator; }
public boolean isArray() {
return (_type == ColumnType.ARRAY);
}
}
/**
* Class used for building {@link CsvSchema} instances.
*/
public static class Builder
{
protected final ArrayList _columns = new ArrayList();
/**
* Bit-flag for general-purpose on/off features.
*/
protected int _encodingFeatures = DEFAULT_ENCODING_FEATURES;
protected char _columnSeparator = DEFAULT_COLUMN_SEPARATOR;
protected String _arrayElementSeparator = DEFAULT_ARRAY_ELEMENT_SEPARATOR;
/**
* If "any properties" (properties for 'extra' columns; ones
* not specified in schema) are enabled, they are mapped to
* this name: leaving it as {@code null} disables use of
* "any properties" (and they are either ignored, or an exception
* is thrown, depending on other settings); setting it to a non-null
* String value will expose all extra properties under one specified
* name.
*/
protected String _anyPropertyName = DEFAULT_ANY_PROPERTY_NAME;
// note: need to use int to allow -1 for 'none'
protected int _quoteChar = DEFAULT_QUOTE_CHAR;
// note: need to use int to allow -1 for 'none'
protected int _escapeChar = DEFAULT_ESCAPE_CHAR;
protected char[] _lineSeparator = DEFAULT_LINEFEED;
protected char[] _nullValue = DEFAULT_NULL_VALUE;
public Builder() { }
/**
* "Copy" constructor which creates builder that has settings of
* given source schema
*/
public Builder(CsvSchema src)
{
for (Column col : src._columns) {
_columns.add(col);
}
_encodingFeatures = src._features;
_columnSeparator = src._columnSeparator;
_arrayElementSeparator = src._arrayElementSeparator;
_quoteChar = src._quoteChar;
_escapeChar = src._escapeChar;
_lineSeparator = src._lineSeparator;
_nullValue = src._nullValue;
_anyPropertyName = src._anyPropertyName;
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*/
public Builder addColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name));
}
/**
* Add column with given name, and with changes to apply (as specified
* by second argument, {@code transformer}).
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*
* @param name Name of column to add
* @param transformer Changes to apply to column definition
*/
public Builder addColumn(String name, UnaryOperator transformer) {
Column col = transformer.apply(new Column(_columns.size(), name));
return addColumn(col);
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*/
public Builder addColumn(String name, ColumnType type) {
int index = _columns.size();
return addColumn(new Column(index, name, type));
}
/**
* Add column with given name, and with changes to apply (as specified
* by second argument, {@code transformer}).
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*
* @param name Name of column to add
* @param type Type of the column to add
* @param transformer Changes to apply to column definition
*/
public Builder addColumn(String name, ColumnType type,
UnaryOperator transformer) {
Column col = transformer.apply(new Column(_columns.size(), name, type));
return addColumn(col);
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*/
public Builder addColumn(Column c) {
_columns.add(c);
return this;
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*/
public Builder addColumns(Iterable cs) {
for (Column c : cs) {
_columns.add(c);
}
return this;
}
/**
* NOTE: does NOT check for duplicate column names so it is possibly to
* accidentally add duplicates.
*/
public Builder addColumns(Iterable names, ColumnType type) {
Builder result = this;
for (String name : names) {
result = addColumn(name, type);
}
return result;
}
/**
* NOTE: unlike many other add methods, this method DOES check for, and
* discard, possible duplicate columns: that is, if this builder already
* has a column with same name as column to be added, existing column
* is retained and new column ignored.
*/
public Builder addColumnsFrom(CsvSchema schema) {
Builder result = this;
for (Column col : schema) {
if (!hasColumn(col.getName())) {
result = result.addColumn(col);
}
}
return result;
}
public Builder addArrayColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name, ColumnType.ARRAY, ""));
}
/**
* @since 2.7
*/
public Builder addArrayColumn(String name, String elementSeparator) {
int index = _columns.size();
return addColumn(new Column(index, name, ColumnType.ARRAY, elementSeparator));
}
public Builder addNumberColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name, ColumnType.NUMBER));
}
public Builder addBooleanColumn(String name) {
int index = _columns.size();
return addColumn(new Column(index, name, ColumnType.BOOLEAN));
}
public Builder renameColumn(int index, String newName) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withName(newName));
return this;
}
public Builder replaceColumn(int index, Column c) {
_checkIndex(index);
_columns.set(index, c);
return this;
}
/**
* @since 2.16
*/
public Builder removeColumn(int index) {
_checkIndex(index);
_columns.remove(index);
return this;
}
/**
* Helper method called to drop the last collected column name if
* it is empty: called if {link CsvParser.Feature#ALLOW_TRAILING_COMMA}
* enabled to remove the last entry after being added initially.
*
* @since 2.11.2
*/
public void dropLastColumnIfEmpty() {
final int ix = _columns.size() - 1;
if (ix >= 0) {
if (_columns.get(ix).getName().isEmpty()) {
_columns.remove(ix);
}
}
}
public Builder setColumnType(int index, ColumnType type) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withType(type));
return this;
}
public Builder removeArrayElementSeparator(int index) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withArrayElementSeparator(""));
return this;
}
/**
* @since 2.7
*/
public Builder setArrayElementSeparator(int index, String sep) {
_checkIndex(index);
_columns.set(index, _columns.get(index).withArrayElementSeparator(sep));
return this;
}
public Builder setAnyPropertyName(String name) {
_anyPropertyName = name;
return this;
}
public Builder clearColumns() {
_columns.clear();
return this;
}
public int size() {
return _columns.size();
}
public Iterator getColumns() {
return _columns.iterator();
}
/**
*
* NOTE: this method requires linear scan over existing columns
* so it may be more efficient to use other types of lookups if
* available (for example, {@link CsvSchema#column(String)} has a
* hash lookup to use).
*
* @since 2.9
*/
public boolean hasColumn(String name) {
for (int i = 0, end = _columns.size(); i < end; ++i) {
if (_columns.get(i).getName().equals(name)) {
return true;
}
}
return false;
}
/**
* Method for specifying whether Schema should indicate that
* a header line (first row that contains column names) is to be
* used for reading and writing or not.
*
* NOTE: when the schema already has columns defined, enabling this flag
* alone will cause the header line to be consumed but will NOT reorder
* the schema columns to match the CSV header order — columns are matched
* by position. To match columns by header name instead, also call
* {@link #setReorderColumns(boolean) setReorderColumns(true)}.
*
* @see #setReorderColumns(boolean)
*/
public Builder setUseHeader(boolean b) {
_feature(ENCODING_FEATURE_USE_HEADER, b);
return this;
}
/**
* Use in combination with {@link #setUseHeader(boolean)}. When the use-header
* flag is set, enabling this setting will cause the parser to rebuild the
* schema column order from the actual CSV header line, matching columns
* by name (while preserving their types from the original schema).
* Without this, columns are matched by position only, regardless of header names.
*
* @param b {@code true} to enable column reordering from CSV header,
* {@code false} to disable (default)
* @return This Builder instance
*
* @see #setUseHeader(boolean)
* @since 2.7
*/
public Builder setReorderColumns(boolean b) {
_feature(ENCODING_FEATURE_REORDER_COLUMNS, b);
return this;
}
/**
* Use in combination with {@link #setUseHeader}. When `strict-headers`
* is set, encoder will ensure the headers are in the order
* of the schema; if order differs, an exception is thrown.
*
* @param b Enable / Disable this setting
* @return This Builder instance
*
* @since 2.7
*/
public Builder setStrictHeaders(boolean b) {
_feature(ENCODING_FEATURE_STRICT_HEADERS, b);
return this;
}
/**
* Method for specifying whether Schema should indicate that
* the first line that is not a header (if header handling enabled)
* should be skipped in its entirety.
*/
public Builder setSkipFirstDataRow(boolean b) {
_feature(ENCODING_FEATURE_SKIP_FIRST_DATA_ROW, b);
return this;
}
/**
* Method for specifying whether Schema should indicate that
* "hash comments" (lines where the first non-whitespace character
* is '#') are allowed; if so, they will be skipped without processing.
*
* @since 2.5
*/
public Builder setAllowComments(boolean b) {
_feature(ENCODING_FEATURE_ALLOW_COMMENTS, b);
return this;
}
protected final void _feature(int feature, boolean state) {
_encodingFeatures = state ? (_encodingFeatures | feature) : (_encodingFeatures & ~feature);
}
/**
* Method for specifying character used to separate column
* values.
* Default is comma (',').
*/
public Builder setColumnSeparator(char c) {
_columnSeparator = c;
return this;
}
/**
* Method for specifying character used to separate array element
* values.
* Default value is semicolon (";")
*
* @since 2.7
*/
public Builder setArrayElementSeparator(String separator) {
_arrayElementSeparator = _validArrayElementSeparator(separator);
return this;
}
/**
* @since 2.7
*/
public Builder disableArrayElementSeparator() {
_arrayElementSeparator = NO_ARRAY_ELEMENT_SEPARATOR;
return this;
}
/**
* Method for specifying character used for optional quoting
* of values.
* Default is double-quote ('"').
*/
public Builder setQuoteChar(char c) {
_quoteChar = c;
return this;
}
/**
* @since 2.4
*/
public Builder disableQuoteChar() {
_quoteChar = -1;
return this;
}
/**
* Method for specifying character used for optional escaping
* of characters in quoted String values.
* Default is "not used", meaning that no escaping used.
*/
public Builder setEscapeChar(char c) {
_escapeChar = c;
return this;
}
/**
* Method for specifying that no escape character is to be used
* with CSV documents this schema defines.
*/
public Builder disableEscapeChar() {
_escapeChar = -1;
return this;
}
public Builder setLineSeparator(String lf) {
_lineSeparator = lf.toCharArray();
return this;
}
public Builder setLineSeparator(char lf) {
_lineSeparator = new char[] { lf };
return this;
}
public Builder setNullValue(String nvl) {
return setNullValue((nvl == null) ? null : nvl.toCharArray());
}
public Builder setNullValue(char[] nvl) {
_nullValue = nvl;
return this;
}
public CsvSchema build()
{
Column[] cols = _columns.toArray(new Column[_columns.size()]);
return new CsvSchema(cols, _encodingFeatures,
_columnSeparator, _quoteChar, _escapeChar,
_lineSeparator, _arrayElementSeparator,
_nullValue, _anyPropertyName);
}
protected void _checkIndex(int index) {
if (index < 0 || index >= _columns.size()) {
throw new IllegalArgumentException("Illegal index "+index+"; only got "+_columns.size()+" columns");
}
}
}
/*
/**********************************************************************
/* Configuration, construction
/**********************************************************************
*/
/**
* Column definitions, needed for optional header and/or mapping
* of field names to column positions.
*/
protected final Column[] _columns;
protected final Map _columnsByName;
/**
* Bitflag for general-purpose on/off features.
*
* @since 2.5 (final since 2.19)
*/
protected final int _features;
protected final char _columnSeparator;
protected final String _arrayElementSeparator;
protected final int _quoteChar;
protected final int _escapeChar;
protected final char[] _lineSeparator;
/**
* @since 2.5
*/
protected final char[] _nullValue;
protected transient String _nullValueAsString;
/**
* If "any properties" (properties for 'extra' columns; ones
* not specified in schema) are enabled, they are mapped to
* this name: leaving it as null disables use of
* "any properties" (and they are either ignored, or an exception
* is thrown, depending on other settings); setting it to a non-null
* String value will expose all extra properties under one specified
* name.
*
* @since 2.7
*/
protected final String _anyPropertyName;
/**
* @since 2.7
*/
public CsvSchema(Column[] columns, int features,
char columnSeparator, int quoteChar, int escapeChar,
char[] lineSeparator, String arrayElementSeparator,
char[] nullValue, String anyPropertyName)
{
if (columns == null) {
columns = NO_COLUMNS;
} else {
columns = _link(columns);
}
_columns = columns;
_features = features;
_columnSeparator = columnSeparator;
_arrayElementSeparator = arrayElementSeparator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
_lineSeparator = lineSeparator;
_nullValue = nullValue;
_anyPropertyName = anyPropertyName;
// and then we may need to create a mapping
if (_columns.length == 0) {
_columnsByName = Collections.emptyMap();
} else {
_columnsByName = new LinkedHashMap<>(4 + _columns.length);
for (Column c : _columns) {
_columnsByName.put(c.getName(), c);
}
}
}
/**
* Copy constructor used for creating variants using
* withXxx() methods.
*/
protected CsvSchema(Column[] columns, int features,
char columnSeparator, int quoteChar, int escapeChar,
char[] lineSeparator, String arrayElementSeparator,
char[] nullValue,
Map columnsByName, String anyPropertyName)
{
_columns = columns;
_features = features;
_columnSeparator = columnSeparator;
_quoteChar = quoteChar;
_escapeChar = escapeChar;
_lineSeparator = lineSeparator;
_arrayElementSeparator = arrayElementSeparator;
_nullValue = nullValue;
_columnsByName = columnsByName;
_anyPropertyName = anyPropertyName;
}
/**
* Copy constructor used for creating variants using
* sortedBy() methods.
*/
protected CsvSchema(CsvSchema base, Column[] columns)
{
_columns = _link(columns);
_features = base._features;
_columnSeparator = base._columnSeparator;
_quoteChar = base._quoteChar;
_escapeChar = base._escapeChar;
_lineSeparator = base._lineSeparator;
_arrayElementSeparator = base._arrayElementSeparator;
_nullValue = base._nullValue;
_anyPropertyName = base._anyPropertyName;
// and then we may need to create a mapping
if (_columns.length == 0) {
_columnsByName = Collections.emptyMap();
} else {
_columnsByName = new LinkedHashMap<>(4 + _columns.length);
for (Column c : _columns) {
_columnsByName.put(c.getName(), c);
}
}
}
/**
* Copy constructor used for creating variants for on/off features
*
* @since 2.5
*/
protected CsvSchema(CsvSchema base, int features) {
_columns = base._columns;
_features = features;
_columnSeparator = base._columnSeparator;
_quoteChar = base._quoteChar;
_escapeChar = base._escapeChar;
_lineSeparator = base._lineSeparator;
_arrayElementSeparator = base._arrayElementSeparator;
_nullValue = base._nullValue;
_anyPropertyName = base._anyPropertyName;
_columnsByName = base._columnsByName;
}
/**
* Helper method used for chaining columns together using next-linkage,
* as well as ensuring that indexes are correct.
*/
private static Column[] _link(Column[] orig)
{
int i = orig.length;
Column[] result = new Column[i];
Column prev = null;
for (; --i >= 0; ) {
Column curr = orig[i].withNext(i, prev);
result[i] = curr;
prev = curr;
}
return result;
}
public static Builder builder() {
return new Builder();
}
/**
* Accessor for creating a "default" CSV schema instance, with following
* settings:
*
* - Does NOT use header line
*
* - Uses double quotes ('"') for quoting of field values (if necessary)
*
* - Uses comma (',') as the field separator
*
* - Uses Unix linefeed ('\n') as row separator
*
* - Does NOT use any escape characters
*
* - Does NOT have any columns defined
*
*
*/
public static CsvSchema emptySchema() {
return builder().build();
}
/**
* Helper method for constructing Builder that can be used to create modified
* schema.
*/
public Builder rebuild() {
return new Builder(this);
}
/*
/**********************************************************************
/* Mutant factories
/**********************************************************************
*/
public CsvSchema withUseHeader(boolean state) {
return _withFeature(ENCODING_FEATURE_USE_HEADER, state);
}
/**
* Returns a clone of this instance with column reordering enabled or disabled.
* Only meaningful when used in combination with {@link #withHeader()}.
*
* When this is enabled (and the schema uses a header line), the parser will
* rebuild the column ordering from the actual CSV header, matching columns
* by name against those defined in the schema (preserving their types).
* This allows parsing CSV files whose column order differs from the order
* in which columns were defined in the schema (for example, via
* {@link CsvMapper#schemaFor(Class)}).
*
* When disabled (the default), columns are matched by position only:
* the first CSV value maps to the first schema column, the second to the
* second, and so on — regardless of what the header names say.
*
* @param state {@code true} to enable column reordering from CSV header,
* {@code false} to disable (default)
* @return A copy of this schema with the updated setting
*
* @see #withHeader()
* @since 2.7
*/
public CsvSchema withColumnReordering(boolean state) {
return _withFeature(ENCODING_FEATURE_REORDER_COLUMNS, state);
}
/**
* Returns a clone of this instance by changing or setting the
* strict headers flag
*
* @param state New value for setting
* @return A copy of itself, ensuring the setting for
* the strict headers feature.
* @since 2.7
*/
public CsvSchema withStrictHeaders(boolean state) {
return _withFeature(ENCODING_FEATURE_STRICT_HEADERS, state);
}
/**
* Helper method for constructing and returning schema instance that
* is similar to this one, except that it will be using header line
* (first row of CSV content used for reading and/or writing column names).
*<p>
* NOTE: when this schema already has columns defined (for example, via
* {@link CsvMapper#schemaFor(Class)}), and the CSV input has a header line
* with columns in a different order than defined in the schema, columns
* will be matched by their position in the schema — not by
* the header names — unless {@link #withColumnReordering(boolean)} is
* also enabled. Without column reordering, the header line is consumed
* (validated or skipped) but does NOT change the column order of the schema.
* This can lead to values being mapped to wrong properties if the CSV column
* order differs from the schema column order.
*
* To use header names for column matching regardless of order, call:
*
* schema.withHeader().withColumnReordering(true)
*
* Alternatively, if you do not need type information from the schema,
* use {@link CsvSchema#emptySchema()}{@code .withHeader()} which always
* builds its column definitions from the header line.
*
* @see #withColumnReordering(boolean)
*/
public CsvSchema withHeader() {
return _withFeature(ENCODING_FEATURE_USE_HEADER, true);
}
/**
* Helper method for constructing and returning schema instance that
* is similar to this one, except that it will not be using header line.
*/
public CsvSchema withoutHeader() {
return _withFeature(ENCODING_FEATURE_USE_HEADER, false);
}
public CsvSchema withSkipFirstDataRow(boolean state) {
return _withFeature(ENCODING_FEATURE_SKIP_FIRST_DATA_ROW, state);
}
/**
* Method to indicate whether "hash comments" are allowed
* for document described by this schema.
*
* @since 2.5
*/
public CsvSchema withAllowComments(boolean state) {
return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, state);
}
/**
* Method to indicate that "hash comments" ARE allowed
* for document described by this schema.
*
* @since 2.5
*/
public CsvSchema withComments() {
return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, true);
}
/**
* Method to indicate that "hash comments" are NOT allowed for document
* described by this schema.
*
* @since 2.5
*/
public CsvSchema withoutComments() {
return _withFeature(ENCODING_FEATURE_ALLOW_COMMENTS, false);
}
protected CsvSchema _withFeature(int feature, boolean state) {
int newFeatures = state ? (_features | feature) : (_features & ~feature);
return (newFeatures == _features) ? this : new CsvSchema(this, newFeatures);
}
public CsvSchema withColumnSeparator(char sep) {
return (_columnSeparator == sep) ? this :
new CsvSchema(_columns, _features,
sep, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withQuoteChar(char c) {
return (_quoteChar == c) ? this :
new CsvSchema(_columns, _features,
_columnSeparator, c, _escapeChar, _lineSeparator,_arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withoutQuoteChar() {
return (_quoteChar == -1) ? this :
new CsvSchema(_columns, _features,
_columnSeparator, -1, _escapeChar, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withEscapeChar(char c) {
return (_escapeChar == c) ? this
: new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, c, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withoutEscapeChar() {
return (_escapeChar == -1) ? this
: new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, -1, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
/**
* @since 2.7
*/
public CsvSchema withArrayElementSeparator(String separator) {
String sep = separator == null ? "" : separator;
return (_arrayElementSeparator.equals(sep)) ? this : new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, separator,
_nullValue, _columnsByName, _anyPropertyName);
}
/**
* @since 2.5
*/
public CsvSchema withoutArrayElementSeparator() {
return (_arrayElementSeparator.isEmpty()) ? this
: new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, "",
_nullValue, _columnsByName, _anyPropertyName);
}
public CsvSchema withLineSeparator(String sep) {
return new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, sep.toCharArray(),
_arrayElementSeparator, _nullValue, _columnsByName, _anyPropertyName);
}
/**
* @since 2.5
*/
public CsvSchema withNullValue(String nvl) {
return new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator,
_arrayElementSeparator,
(nvl == null) ? null : nvl.toCharArray(),
_columnsByName, _anyPropertyName);
}
public CsvSchema withoutColumns() {
return new CsvSchema(NO_COLUMNS, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, _anyPropertyName);
}
/**
* Mutant factory method that will try to combine columns of this schema with those
* from `toAppend`, starting with columns of this instance, and ignoring
* duplicates (if any) from argument `toAppend`.
* All settings aside from column sets are copied from `this` instance.
*
* As with all `withXxx()` methods this method never modifies `this` but either
* returns it unmodified (if no new columns found from `toAppend`), or constructs
* a new instance and returns that.
*
* @return Either this schema (if nothing changed), or newly constructed {@link CsvSchema}
* with appended columns.
*
* @since 2.9
*/
public CsvSchema withColumnsFrom(CsvSchema toAppend) {
int addCount = toAppend.size();
if (addCount == 0) {
return this;
}
Builder b = rebuild();
for (int i = 0; i < addCount; ++i) {
Column col = toAppend.column(i);
if (column(col.getName()) == null) {
b.addColumn(col);
}
}
return b.build();
}
/**
* Mutant factory method that will try to replace specified column with
* changed definition (but same name), leaving other columns as-is.
*
* As with all `withXxx()` methods this method never modifies `this` but either
* returns it unmodified (if no change to column), or constructs
* a new schema instance and returns that.
*
* @param columnName Name of column to replace
* @param transformer Transformation to apply to the column
*
* @return Either this schema (if column did not change), or newly constructed {@link CsvSchema}
* with changed column
*
* @since 2.18
*/
public CsvSchema withColumn(String columnName, UnaryOperator transformer) {
Column old = column(columnName);
if (old == null) {
throw new IllegalArgumentException("No column '"+columnName+"' in CsvSchema (known columns: "
+getColumnNames()+")");
}
Column newColumn = transformer.apply(old);
if (newColumn == old) {
return this;
}
return _withColumn(old.getIndex(), newColumn);
}
/**
* Mutant factory method that will try to replace specified column with
* changed definition (but same name), leaving other columns as-is.
*
* As with all `withXxx()` methods this method never modifies `this` but either
* returns it unmodified (if no change to column), or constructs
* a new schema instance and returns that.
*
* @param columnIndex Index of column to replace
* @param transformer Transformation to apply to the column
*
* @return Either this schema (if column did not change), or newly constructed {@link CsvSchema}
* with changed column
*
* @since 2.18
*/
public CsvSchema withColumn(int columnIndex, UnaryOperator transformer) {
if (columnIndex < 0 || columnIndex >= size()) {
throw new IllegalArgumentException("Illegal index "+columnIndex+"; `CsvSchema` has "+size()+" columns");
}
Column old = _columns[columnIndex];
Column newColumn = transformer.apply(old);
if (newColumn == old) {
return this;
}
return _withColumn(old.getIndex(), newColumn);
}
/**
* @since 2.18
*/
protected CsvSchema _withColumn(int ix, Column toReplace) {
Objects.requireNonNull(toReplace);
if (ix < 0 || ix >= size()) {
throw new IllegalArgumentException("Illegal index for column '"+toReplace.getName()+"': "
+ix+" (column count: "+size()+")");
}
return rebuild()
.replaceColumn(ix, toReplace)
.build();
}
/**
* @since 2.7
*/
public CsvSchema withAnyPropertyName(String name) {
return new CsvSchema(_columns, _features,
_columnSeparator, _quoteChar, _escapeChar, _lineSeparator, _arrayElementSeparator,
_nullValue, _columnsByName, name);
}
/**
* Mutant factory method that will construct a new instance in which columns
* are sorted based on names given as argument. Columns not listed in argument
* will be sorted after those within list, using existing ordering.
*
* For example, schema that has columns:
*
"a", "d", "c", "b"
*
* ordered with schema.sortedBy("a", "b");
* would result instance that columns in order:
*"a", "b", "d", "c"
*
*
* @since 2.4
*/
public CsvSchema sortedBy(String... columnNames)
{
LinkedHashMap map = new LinkedHashMap<>();
for (String colName : columnNames) {
Column col = _columnsByName.get(colName);
if (col != null) {
map.put(col.getName(), col);
}
}
for (Column col : _columns) {
map.put(col.getName(), col);
}
return new CsvSchema(this, map.values().toArray(new Column[map.size()]));
}
/**
* Mutant factory method that will construct a new instance in which columns
* are sorted using given {@link Comparator} over column names.
*
* @since 2.4
*/
public CsvSchema sortedBy(Comparator cmp) {
TreeMap map = new TreeMap<>(cmp);
for (Column col : _columns) {
map.put(col.getName(), col);
}
return new CsvSchema(this, map.values().toArray(new Column[map.size()]));
}
/*
/**********************************************************************
/* Public API, FormatSchema
/**********************************************************************
*/
@Override
public String getSchemaType() {
return "CSV";
}
/*
/**********************************************************************
/* Public API, extended, properties
/**********************************************************************
*/
public boolean usesHeader() { return (_features & ENCODING_FEATURE_USE_HEADER) != 0; }
/**
* Whether column reordering from the CSV header line is enabled.
*
* @see #withColumnReordering(boolean)
* @since 2.7
*/
public boolean reordersColumns() { return (_features & ENCODING_FEATURE_REORDER_COLUMNS) != 0; }
public boolean skipsFirstDataRow() { return (_features & ENCODING_FEATURE_SKIP_FIRST_DATA_ROW) != 0; }
public boolean allowsComments() { return (_features & ENCODING_FEATURE_ALLOW_COMMENTS) != 0; }
public boolean strictHeaders() { return (_features & ENCODING_FEATURE_STRICT_HEADERS) != 0; }
public char getColumnSeparator() { return _columnSeparator; }
public String getArrayElementSeparator() { return _arrayElementSeparator; }
public int getQuoteChar() { return _quoteChar; }
public int getEscapeChar() { return _escapeChar; }
public char[] getLineSeparator() { return _lineSeparator; }
/**
* @return Null value defined, as char array, if one is defined to be recognized; Java null
* if not.
*
* @since 2.5
*/
public char[] getNullValue() { return _nullValue; }
/**
* Same as {@link #getNullValue()} except that undefined null value (one that remains as null,
* or explicitly set as such) will be returned as empty char[]
*
* @since 2.6
*/
public char[] getNullValueOrEmpty() {
if (_nullValue == null) {
return NO_CHARS;
}
return _nullValue;
}
/**
* @since 2.6
*/
public String getNullValueString() {
String str = _nullValueAsString;
if (str == null) {
if (_nullValue == null) {
return null;
}
str = (_nullValue.length == 0) ? "" : new String(_nullValue);
_nullValueAsString = str;
}
return str;
}
public boolean usesQuoteChar() { return _quoteChar >= 0; }
public boolean usesEscapeChar() { return _escapeChar >= 0; }
/**
* @since 2.5
*/
public boolean hasArrayElementSeparator() { return !_arrayElementSeparator.isEmpty(); }
/**
* @since 2.7
*/
public String getAnyPropertyName() { return _anyPropertyName; }
/*
/**********************************************************************
/* Public API, extended; column access
/**********************************************************************
*/
@Override
public Iterator iterator() {
return Arrays.asList(_columns).iterator();
}
/**
* Accessor for finding out how many columns this schema defines.
*
* @return Number of columns this schema defines
*/
public int size() { return _columns.length; }
/**
* Accessor for column at specified index (0-based); index having to be within
*
* 0 <= index < size()
*
*/
public Column column(int index) {
return _columns[index];
}
/**
* Method for finding index of a named column within this schema.
*
* @param name Name of column to find
* @return Index of the specified column, if one exists; {@code -1} if not
*
* @since 2.18
*/
public int columnIndex(String name) {
Column col = column(name);
return (col == null) ? -1 : col.getIndex();
}
/**
* @since 2.6
*/
public String columnName(int index) {
return _columns[index].getName();
}
public Column column(String name) {
return _columnsByName.get(name);
}
/**
* Case-insensitive variant of {@link #column(String)}: looks up a column
* by name, ignoring case differences.
*
* @param name Column name to look up (case-insensitive)
* @return Column with matching name, or {@code null} if not found
*
* @since 3.2
*/
public Column columnIgnoreCase(String name) {
// Try exact match first for efficiency
Column col = _columnsByName.get(name);
if (col != null) {
return col;
}
for (Column c : _columns) {
if (c.getName().equalsIgnoreCase(name)) {
return c;
}
}
return null;
}
/**
* Optimized variant where a hint is given as to likely index of the column
* name.
*
* @since 2.6
*/
public Column column(String name, int probableIndex) {
if (probableIndex < _columns.length) {
Column col = _columns[probableIndex];
if (col.hasName(name)) {
return col;
}
}
return _columnsByName.get(name);
}
/**
* Accessor for getting names of included columns, in the order they are
* included in the schema.
*
* @since 2.14
*/
public List getColumnNames() {
return (List) getColumnNames(new ArrayList(_columns.length));
}
/**
* Accessor for getting names of included columns, added in given
* {@code Collection}.
*
* @since 2.14
*/
public Collection getColumnNames(Collection names) {
final int len = _columns.length;
for (int i = 0; i < len; ++i) {
names.add(_columns[i].getName());
}
return names;
}
/**
* Method for getting description of column definitions in
* developer-readable form
*/
public String getColumnDesc()
{
StringBuilder sb = new StringBuilder(100);
sb.append('[');
for (Column col : _columns) {
if (sb.length() > 1) {
sb.append(',');
}
sb.append('"');
sb.append(col.getName());
sb.append('"');
}
sb.append(']');
return sb.toString();
}
/*
/**********************************************************************
/* Other overrides
/**********************************************************************
*/
@Override
public String toString()
{
StringBuilder sb = new StringBuilder(150);
sb.append("[CsvSchema: ")
.append("columns=[");
boolean first = true;
for (Column col : _columns) {
if (first) {
first = false;
} else {
sb.append(',');
}
sb.append('"');
sb.append(col.getName());
sb.append("\"/");
sb.append(col.getType());
}
sb.append(']');
sb.append(", header? ").append(usesHeader());
sb.append(", skipFirst? ").append(skipsFirstDataRow());
sb.append(", comments? ").append(allowsComments());
sb.append(", any-properties? ");
String anyProp = getAnyPropertyName();
if (anyProp == null) {
sb.append("N/A");
} else {
sb.append("as '").append(anyProp).append("'");
}
sb.append(']');
return sb.toString();
}
/*
/**********************************************************************
/* Helper methods
/**********************************************************************
*/
protected static String _validArrayElementSeparator(String sep) {
if (sep == null || sep.isEmpty()) {
return NO_ARRAY_ELEMENT_SEPARATOR;
}
return sep;
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvValueDecorator.java
================================================
package tools.jackson.dataformat.csv;
import tools.jackson.core.JacksonException;
/**
* Interface defining API for handlers that can add and remove "decorations"
* to CSV values: for example, brackets around Array (List) values encoded
* in a single physical String column.
*
* Decorations are handled after handling other encoding aspects such as
* optional quoting and/or escaping.
*
* Decorators can be registered on specific columns of {@link CsvSchema}.
*
* @since 2.18
*/
public interface CsvValueDecorator
{
/**
* Method called during serialization when encoding a value,
* to produce "decorated" value to include in output (possibly
* escaped and/or quoted).
* Note that possible escaping and/or quoting (as per configuration
* of {@link CsvSchema} is applied on decorated value.
*
* @param gen Generator that will be used for actual serialization
* @param plainValue Value to decorate
*
* @return Decorated value (which may be {@code plainValue} as-is) but
* Must Not be {@code null}
*
* @throws JacksonException if attempt to decorate the value somehow fails
*/
public String decorateValue(CsvGenerator gen, String plainValue)
throws JacksonException;
/**
* Method called instead of {@link #decorateValue} in case where value being
* written is from Java {@code null} value: this is often left as-is, without
* decoration (and this is the default implementation), but may be
* decorated.
* To let default Null Value Replacement be used, should return {@code null}:
* this is the default implementation.
*
* @param gen Generator that will be used for actual serialization
*
* @return Decorated value to use, IF NOT {@code null}: if {@code null} will use
* default null replacement value.
*
* @throws JacksonException if attempt to decorate the value somehow fails
*/
public default String decorateNull(CsvGenerator gen)
throws JacksonException
{
return null;
}
/**
* Method called during deserialization, to remove possible decoration
* applied with {@link #decorateValue}.
* Call is made after textual value for a cell (column
* value) has been read using {@code parser} and after removing (decoding)
* possible quoting and/or escaping of the value. Value passed in
* has no escaping or quoting left.
*
* @param parser Parser that was used to decode textual value from input
* @param decoratedValue Value from which to remove decorations, if any
* (some decorators can allow optional decorations; others may fail
* if none found)
*
* @return Value after removing decorations, if any.
*
* @throws JacksonException if attempt to un-decorate the value fails
*/
public String undecorateValue(CsvParser parser, String decoratedValue)
throws JacksonException;
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvValueDecorators.java
================================================
package tools.jackson.dataformat.csv;
import java.util.Objects;
import tools.jackson.core.JacksonException;
/**
* Container class for default {@link CsvValueDecorator} implementations
*
* @since 2.18
*/
public class CsvValueDecorators
{
/**
* {@link StringPrefixSuffixDecorator} that uses square brackets ({@code []})
* around decorated value, but does not require their use (removes if used,
* ignores it not).
*/
public final static CsvValueDecorator OPTIONAL_BRACKETS_DECORATOR
= new StringPrefixSuffixDecorator("[", "]", true);
/**
* {@link StringPrefixSuffixDecorator} that uses square brackets ({@code []})
* around decorated value, and requires their use (if value has no matching
* decoration, an exception is thrown when attempting to read the value).
*/
public final static CsvValueDecorator STRICT_BRACKETS_DECORATOR
= new StringPrefixSuffixDecorator("[", "]", false);
/**
* Factory method for constructing a {@link StringPrefixSuffixDecorator} with
* given prefix and suffix, both optional.
*/
public static CsvValueDecorator optionalPrefixSuffixDecorator(String prefix, String suffix) {
return new StringPrefixSuffixDecorator(prefix, suffix, true);
}
/**
* Factory method for constructing a {@link StringPrefixSuffixDecorator} with
* given prefix and suffix, both required.
*/
public static CsvValueDecorator requiredPrefixSuffixDecorator(String prefix, String suffix) {
return new StringPrefixSuffixDecorator(prefix, suffix, false);
}
/**
* Decorated that adds static prefix and suffix around value to decorate value;
* removes the same when un-decorating. Handling of the case where decoration
* is missing on deserialization (reading) depends on where decorator is
* created with "optional" or "strict" setting
* (see {@link StringPrefixSuffixDecorator#StringPrefixSuffixDecorator}).
*/
public static class StringPrefixSuffixDecorator
implements CsvValueDecorator
{
/**
* Decoration added before value being decorated: for example, if decorating
* with brackets, this would be opening bracket {@code [ }.
*/
protected final String _prefix;
/**
* Decoration added after value being decorated: for example, if decorating
* with brackets, this would be closing bracket {@code ] }.
*/
protected final String _suffix;
/**
* Whether existence of prefix and suffix decoration is optional
* ({@code true}) or required ({@code false}): if required
* and value does does not have decorations, deserialization (reading)
* will fail with an exception; if optional value is exposed as is.
*/
protected final boolean _optional;
public StringPrefixSuffixDecorator(String prefix, String suffix, boolean optional) {
_prefix = Objects.requireNonNull(prefix);
_suffix = Objects.requireNonNull(suffix);
_optional = optional;
}
@Override
public String decorateValue(CsvGenerator gen, String plainValue) throws JacksonException {
return new StringBuilder(plainValue.length() + _prefix.length() + _suffix.length())
.append(_prefix)
.append(plainValue)
.append(_suffix)
.toString()
;
}
@Override
public String undecorateValue(CsvParser parser, String decoratedValue) throws JacksonException {
if (!decoratedValue.startsWith(_prefix)) {
if (!_optional) {
parser._reportCsvReadError(String.format(
"Decorated value of column '%s' does not start with expected prefix (\"%s\"); value: \"%s\"",
parser.currentName(), _prefix, decoratedValue));
}
return decoratedValue;
}
if (!decoratedValue.endsWith(_suffix)) {
if (!_optional) {
parser._reportCsvReadError(String.format(
"Decorated value of column '%s' does not end with expected suffix (\"%s\"); value: \"%s\"",
parser.currentName(), _suffix, decoratedValue));
}
return decoratedValue;
}
int start = _prefix.length();
int end = decoratedValue.length() - _suffix.length();
// One minor complication: suffix and prefix could overlap
if (start >= end) {
return "";
}
return decoratedValue.substring(start, end);
}
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvWriteException.java
================================================
package tools.jackson.dataformat.csv;
import tools.jackson.core.exc.StreamWriteException;
/**
* Format-specific exception used to indicate problems regarding low-level
* generation issues specific to CSV content;
* usually problems with field-to-column mapping as defined by {@link CsvSchema}.
*/
public class CsvWriteException
extends StreamWriteException
{
private static final long serialVersionUID = 3L;
protected final CsvSchema _schema;
public CsvWriteException(CsvGenerator gen, String msg, CsvSchema schema) {
super(gen, msg);
_schema = schema;
}
public static CsvWriteException from(CsvGenerator gen, String msg, CsvSchema schema) {
return new CsvWriteException(gen, msg, schema);
}
public CsvSchema getSchema() { return _schema; }
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/CsvWriteFeature.java
================================================
package tools.jackson.dataformat.csv;
import tools.jackson.core.FormatFeature;
/**
* Enumeration that defines all togglable features for CSV writers
*
* NOTE: in Jackson 2.x this was named {@code CsvGenerator.Feature}.
*/
public enum CsvWriteFeature
implements FormatFeature
{
/**
* Feature that determines how much work is done before determining that
* a column value requires quoting: when set as true, full
* check is made to only use quoting when it is strictly necessary;
* but when {@code false}, a faster but more conservative check
* is made, and possibly quoting is used for values that might not need it.
* Trade-off is basically between optimal/minimal quoting ({@code true}), and
* faster handling ({@code false}).
* Faster check involves only checking first N characters of value, as well
* as possible looser checks.
*
* Note, however, that regardless setting, all values that need to be quoted
* will be: it is just that when set to {@code false}, other values may
* also be quoted (to avoid having to do more expensive checks).
*
* Default value is {@code false} for "loose" (approximate, conservative)
* checking.
*/
STRICT_CHECK_FOR_QUOTING(false),
/**
* Feature that determines whether columns without matching value may be omitted,
* when they are the last values of the row.
* If {@code true}, values and separators between values may be omitted, to slightly reduce
* length of the row; if {@code false}, separators need to stay in place and values
* are indicated by empty Strings.
*/
OMIT_MISSING_TAIL_COLUMNS(false),
/**
* Feature that determines whether values written as Strings (from java.lang.String
* valued POJO properties) should be forced to be quoted, regardless of whether they
* actually need this.
* Note that this feature has precedence over {@link #STRICT_CHECK_FOR_QUOTING}, when
* both would be applicable.
* Note that this setting does NOT affect quoting of typed values like {@code Number}s
* or {@code Boolean}s.
*/
ALWAYS_QUOTE_STRINGS(false),
/**
* Feature that determines whether values written as empty Strings (from java.lang.String
* valued POJO properties) should be forced to be quoted.
*/
ALWAYS_QUOTE_EMPTY_STRINGS(false),
/**
* Feature that determines whether String values with leading or trailing
* whitespace (any character {@code <= 0x0020}, including space and tab)
* should be forced to be quoted.
* This is useful for interoperability with CSV parsers that trim unquoted
* whitespace.
*
* Default value is {@code false} so that leading/trailing whitespace
* does not by itself trigger quoting.
*
* @since 3.2
*/
QUOTE_STRINGS_WITH_LEADING_TRAILING_WHITESPACE(false),
/**
* Feature that determines whether values written as Nymbers (from {@code java.lang.Number}
* valued POJO properties) should be forced to be quoted, regardless of whether they
* actually need this.
*/
ALWAYS_QUOTE_NUMBERS(false),
/**
* Feature that determines whether quote characters within quoted String values are escaped
* using configured escape character, instead of being "doubled up" (that is: a quote character
* is written twice in a row).
*
* Default value is false so that quotes are doubled as necessary, not escaped.
*/
ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR(false),
/**
* Feature that determines whether control characters (non-printable) are escaped using the
* configured escape character. This feature allows LF and CR characters to be output as
\n
* and \r
instead of being echoed out. This is a compatibility feature for some
* parsers that can not read such output back in.
*
* Default value is false so that control characters are echoed out (backwards compatible).
*/
ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR(false),
/**
* Feature that determines whether a line-feed will be written at the end of content,
* after the last row of output.
*
* NOTE! When disabling this feature it is important that
* {@link CsvGenerator#flush()} is NOT called before {@link CsvGenerator#close()} is called;
* the current implementation relies on ability to essentially remove the
* last linefeed that was appended in the output buffer.
*
* Default value is {@code true} so all rows, including the last, are terminated by
* a line feed.
*/
WRITE_LINEFEED_AFTER_LAST_ROW(true)
;
private final boolean _defaultState;
private final int _mask;
/**
* Method that calculates bit set (flags) of all features that
* are enabled by default.
*/
public static int collectDefaults()
{
int flags = 0;
for (CsvWriteFeature f : values()) {
if (f.enabledByDefault()) {
flags |= f.getMask();
}
}
return flags;
}
private CsvWriteFeature(boolean defaultState) {
_defaultState = defaultState;
_mask = (1 << ordinal());
}
@Override
public boolean enabledIn(int flags) { return (flags & _mask) != 0; }
@Override
public boolean enabledByDefault() { return _defaultState; }
@Override
public int getMask() { return _mask; }
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/PackageVersion.java.in
================================================
package @package@;
import tools.jackson.core.Version;
import tools.jackson.core.Versioned;
import tools.jackson.core.util.VersionUtil;
/**
* Automatically generated from PackageVersion.java.in during
* packageVersion-generate execution of maven-replacer-plugin in
* pom.xml.
*/
public final class PackageVersion implements Versioned {
public final static Version VERSION = VersionUtil.parseVersion(
"@projectversion@", "@projectgroupid@", "@projectartifactid@");
@Override
public Version version() {
return VERSION;
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/impl/BufferedValue.java
================================================
package tools.jackson.dataformat.csv.impl;
import tools.jackson.core.JacksonException;
/**
* Helper class used for holding values for a while until output
* can proceed in expected order.
*/
public abstract class BufferedValue
{
protected BufferedValue() { }
public abstract void write(CsvEncoder w) throws JacksonException;
public static BufferedValue buffered(String v) { return new TextValue(v); }
public static BufferedValue bufferedRaw(String v) { return new RawValue(v); }
public static BufferedValue buffered(int v) { return new IntValue(v); }
public static BufferedValue buffered(long v) { return new LongValue(v); }
public static BufferedValue buffered(float v) { return new FloatValue(v); }
public static BufferedValue buffered(double v) { return new DoubleValue(v); }
public static BufferedValue bufferedNumber(String numStr) { return new BigNumberValue(numStr); }
public static BufferedValue buffered(boolean v) {
return v ? BooleanValue.TRUE : BooleanValue.FALSE;
}
public static BufferedValue bufferedNull() {
return NullValue.std;
}
protected final static class TextValue extends BufferedValue
{
private final String _value;
public TextValue(String v) { _value = v; }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendValue(_value);
}
}
/**
* @since 2.5
*/
protected final static class RawValue extends BufferedValue
{
private final String _value;
public RawValue(String v) { _value = v; }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendRawValue(_value);
}
}
protected final static class IntValue extends BufferedValue
{
private final int _value;
public IntValue(int v) { _value = v; }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendValue(_value);
}
}
protected final static class LongValue extends BufferedValue
{
private final long _value;
public LongValue(long v) { _value = v; }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendValue(_value);
}
}
// @since 2.16
protected final static class FloatValue extends BufferedValue
{
private final float _value;
public FloatValue(float v) { _value = v; }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendValue(_value);
}
}
protected final static class DoubleValue extends BufferedValue
{
private final double _value;
public DoubleValue(double v) { _value = v; }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendValue(_value);
}
}
protected final static class BigNumberValue extends BufferedValue
{
private final String _value;
public BigNumberValue(String v) { _value = v; }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendNumberValue(_value);
}
}
protected final static class BooleanValue extends BufferedValue
{
public final static BooleanValue FALSE = new BooleanValue(false);
public final static BooleanValue TRUE = new BooleanValue(true);
private final boolean _value;
public BooleanValue(boolean v) { _value = v; }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendValue(_value);
}
}
protected final static class NullValue extends BufferedValue {
public final static NullValue std = new NullValue();
private NullValue() { }
@Override
public void write(CsvEncoder w) throws JacksonException {
w.appendNull();
}
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/impl/CsvDecoder.java
================================================
package tools.jackson.dataformat.csv.impl;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Arrays;
import tools.jackson.core.*;
import tools.jackson.core.JsonParser.NumberType;
import tools.jackson.core.StreamReadFeature;
import tools.jackson.core.exc.StreamReadException;
import tools.jackson.core.io.NumberInput;
import tools.jackson.core.io.IOContext;
import tools.jackson.core.util.SimpleStreamReadContext;
import tools.jackson.core.util.TextBuffer;
import tools.jackson.dataformat.csv.CsvParser;
import tools.jackson.dataformat.csv.CsvReadFeature;
import tools.jackson.dataformat.csv.CsvSchema;
/**
* Low-level helper class that handles actual reading of CSV,
* purely based on indexes given without worrying about reordering etc.
*/
public class CsvDecoder
{
private final static int INT_SPACE = 0x0020;
private final static int INT_CR = '\r';
private final static int INT_LF = '\n';
/*
/**********************************************************************
/* Input handling, configuration
/**********************************************************************
*/
/**
* Unfortunate back reference, needed for error reporting
*/
protected final CsvParser _owner;
/**
* I/O context for this reader. It handles buffer allocation
* for the reader.
*/
protected final IOContext _ioContext;
/**
* Input stream that can be used for reading more content, if one
* in use. May be null, if input comes just as a full buffer,
* or if the stream has been closed.
*
* NOTE: renamed in 2.13 from {@code _inputSource}.
*/
protected Reader _inputReader;
/**
* Flag that indicates whether the input buffer is recycable (and
* needs to be returned to recycler once we are done) or not.
*
* If it is not, it also means that parser can NOT modify underlying
* buffer.
*/
protected boolean _bufferRecyclable;
protected boolean _autoCloseInput;
/**
* Configuration flag that determines whether spaces surrounding
* separator characters are to be automatically trimmed or not.
*/
protected boolean _trimSpaces;
protected boolean _allowComments;
protected boolean _skipBlankLines;
protected boolean _skipEmptyRows;
/**
* Number of separator characters consumed by {@link #_trySkipEmptyRow()} when
* a row starting with separators turns out to contain non-empty values.
* These consumed separators are replayed as empty String values by
* {@link #nextString()}.
*
* @since 3.2
*/
protected int _pendingEmptyColumns;
/**
* Maximum of quote character, linefeeds (\r and \n), escape character.
*/
protected int _maxSpecialChar;
protected int _separatorChar;
protected int _quoteChar;
protected int _escapeChar;
/*
/**********************************************************************
/* Input handling, state
/**********************************************************************
*/
/**
* Buffer that contains contents of all values after processing
* of doubled-quotes, escaped characters.
*/
protected final TextBuffer _textBuffer;
/**
* Current buffer from which data is read; generally data is read into
* buffer from input source, but in some cases pre-loaded buffer
* is handed to the parser.
*/
protected char[] _inputBuffer;
/**
* Pointer to next available character in buffer
*/
protected int _inputPtr = 0;
/**
* Index of character after last available one in the buffer.
*/
protected int _inputEnd = 0;
/**
* Lazily-allocated buffer for tracking leading whitespace consumed
* while looking ahead for a quote character ([dataformats-text#643]).
* Reused across calls to avoid repeated allocation.
*
* @since 3.2
*/
protected char[] _leadingSpaceBuf;
/**
* Marker to indicate that a linefeed was encountered and now
* needs to be handled (indicates end-of-record).
*/
protected int _pendingLF = 0;
/**
* Flag that indicates whether parser is closed or not. Gets
* set when parser is either closed by explicit call
* ({@link #close}) or when end-of-input is reached.
*/
protected boolean _closed;
/*
/**********************************************************************
/* Current input location information
/**********************************************************************
*/
/**
* Number of characters/bytes that were contained in previous blocks
* (blocks that were already processed prior to the current buffer).
*/
protected long _currInputProcessed = 0L;
/**
* Current row location of current point in input buffer, starting
* from 1, if available.
*/
protected int _currInputRow = 1;
/**
* Current index of the first character of the current row in input
* buffer. Needed to calculate column position, if necessary; benefit
* of not having column itself is that this only has to be updated
* once per line.
*/
protected int _currInputRowStart = 0;
/**
* Flag that indicates whether the current token has been quoted or not.
*
* @since 2.18
*/
protected boolean _currInputQuoted = false;
// // // Location info at point when current token was started
/**
* Total number of bytes/characters read before start of current token.
* For big (gigabyte-sized) sizes are possible, needs to be long,
* unlike pointers and sizes related to in-memory buffers.
*/
protected long _tokenInputTotal = 0;
/**
* Input row on which current token starts, 1-based
*/
protected int _tokenInputRow = 1;
/**
* Column on input row that current token starts; 0-based (although
* in the end it'll be converted to 1-based)
*/
protected int _tokenInputCol = 0;
/*
/**********************************************************************
/* Constants and fields of former 'JsonNumericParserBase'
/**********************************************************************
*/
final protected static int NR_UNKNOWN = 0;
// First, integer types
final protected static int NR_INT = 0x0001;
final protected static int NR_LONG = 0x0002;
final protected static int NR_BIGINT = 0x0004;
// And then floating point types
final protected static int NR_DOUBLE = 0x008;
final protected static int NR_BIGDECIMAL = 0x0010;
// Also, we need some numeric constants (copied from ParserBase)
final static BigInteger BI_MIN_INT = BigInteger.valueOf(Integer.MIN_VALUE);
final static BigInteger BI_MAX_INT = BigInteger.valueOf(Integer.MAX_VALUE);
final static BigInteger BI_MIN_LONG = BigInteger.valueOf(Long.MIN_VALUE);
final static BigInteger BI_MAX_LONG = BigInteger.valueOf(Long.MAX_VALUE);
final static BigDecimal BD_MIN_LONG = new BigDecimal(Long.MIN_VALUE);
final static BigDecimal BD_MAX_LONG = new BigDecimal(Long.MAX_VALUE);
final static BigDecimal BD_MIN_INT = new BigDecimal(Long.MIN_VALUE);
final static BigDecimal BD_MAX_INT = new BigDecimal(Long.MAX_VALUE);
final static long MIN_INT_L = Integer.MIN_VALUE;
final static long MAX_INT_L = Integer.MAX_VALUE;
// These are not very accurate, but have to do... (for bounds checks)
final static double MIN_LONG_D = Long.MIN_VALUE;
final static double MAX_LONG_D = Long.MAX_VALUE;
final static double MIN_INT_D = Integer.MIN_VALUE;
final static double MAX_INT_D = Integer.MAX_VALUE;
// Digits, numeric
final protected static int INT_0 = '0';
final protected static int INT_1 = '1';
final protected static int INT_2 = '2';
final protected static int INT_3 = '3';
final protected static int INT_4 = '4';
final protected static int INT_5 = '5';
final protected static int INT_6 = '6';
final protected static int INT_7 = '7';
final protected static int INT_8 = '8';
final protected static int INT_9 = '9';
final protected static int INT_MINUS = '-';
final protected static int INT_PLUS = '+';
final protected static int INT_DECIMAL_POINT = '.';
final protected static int INT_e = 'e';
final protected static int INT_E = 'E';
final protected static char CHAR_NULL = '\0';
// Numeric value holders: multiple fields used for
// for efficiency
/**
* Bitfield that indicates which numeric representations
* have been calculated for the current type
*/
protected int _numTypesValid = NR_UNKNOWN;
// First primitives
protected int _numberInt;
protected long _numberLong;
protected double _numberDouble;
// And then object types
protected BigInteger _numberBigInt;
protected BigDecimal _numberBigDecimal;
/**
* Textual number representation captured from input in cases lazy-parsing
* is desired.
*
* As of 2.14, this only applies to {@link BigInteger} and {@link BigDecimal}.
*
* @since 2.14
*/
protected String _numberString;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public CsvDecoder(IOContext ctxt, CsvParser owner, Reader r,
CsvSchema schema, TextBuffer textBuffer,
int stdFeatures, int csvFeatures)
{
_owner = owner;
_ioContext = ctxt;
_inputReader = r;
_textBuffer = textBuffer;
_autoCloseInput = StreamReadFeature.AUTO_CLOSE_SOURCE.enabledIn(stdFeatures);
_allowComments = CsvReadFeature.ALLOW_COMMENTS.enabledIn(csvFeatures);
_trimSpaces = CsvReadFeature.TRIM_SPACES.enabledIn(csvFeatures);
_skipBlankLines = CsvReadFeature.SKIP_EMPTY_LINES.enabledIn(csvFeatures);
_skipEmptyRows = CsvReadFeature.SKIP_EMPTY_ROWS.enabledIn(csvFeatures);
_inputBuffer = ctxt.allocTokenBuffer();
_bufferRecyclable = true; // since we allocated it
_tokenInputRow = -1;
_tokenInputCol = -1;
setSchema(schema);
}
public void setSchema(CsvSchema schema) {
_separatorChar = schema.getColumnSeparator();
_quoteChar = schema.getQuoteChar();
_escapeChar = schema.getEscapeChar();
if (!_allowComments) {
_allowComments = schema.allowsComments();
}
int max = Math.max(_separatorChar, _quoteChar);
max = Math.max(max, _escapeChar);
max = Math.max(max, '\r');
max = Math.max(max, '\n');
_maxSpecialChar = max;
}
/*
/**********************************************************************
/* JsonParser implementations passed-through by CsvParser
/**********************************************************************
*/
public Object getInputSource() {
return _inputReader;
}
public void close() throws IOException {
_pendingLF = 1; // just to ensure we'll also check _closed flag later on
if (!_closed) {
_closed = true;
try {
_closeInput();
} finally {
// Also, internal buffer(s) can now be released as well
releaseBuffers();
}
}
}
public int releaseBuffered(Writer out) throws IOException {
int count = _inputEnd - _inputPtr;
if (count < 1) {
return 0;
}
// let's just advance ptr to end
int origPtr = _inputPtr;
out.write(_inputBuffer, origPtr, count);
return count;
}
public SimpleStreamReadContext childArrayContext(SimpleStreamReadContext context) {
int col = _inputPtr - _currInputRowStart + 1; // 1-based
return context.createChildArrayContext(_currInputRow, col);
}
public SimpleStreamReadContext childObjectContext(SimpleStreamReadContext context) {
int col = _inputPtr - _currInputRowStart + 1; // 1-based
return context.createChildObjectContext(_currInputRow, col);
}
public TokenStreamLocation getTokenLocation()
{
return new TokenStreamLocation(_ioContext.contentReference(),
getTokenCharacterOffset(),
getTokenLineNr(), getTokenColumnNr());
}
public TokenStreamLocation getCurrentLocation() {
int ptr = _inputPtr;
// One twist: when dealing with a "pending LF", need to
// go back one position when calculating location
if (_pendingLF > 1) { // 1 is used as marker for end-of-input
--ptr;
}
int col = ptr - _currInputRowStart + 1; // 1-based
return new TokenStreamLocation(_ioContext.contentReference(),
_currInputProcessed + ptr - 1L, _currInputRow, col);
}
public final int getCurrentRow() {
return _currInputRow;
}
public final int getCurrentColumn() {
int ptr = _inputPtr;
// One twist: when dealing with a "pending LF", need to
// go back one position when calculating location
if (_pendingLF > 1) { // 1 is used as marker for end-of-input
--ptr;
}
return ptr - _currInputRowStart + 1; // 1-based
}
/**
* Tell if the current token has been quoted or not.
* @return True if the current token has been quoted, false otherwise
*
* @since 2.18
*/
public final boolean isCurrentTokenQuoted() {
return _currInputQuoted;
}
/*
/**********************************************************************
/* Helper methods, input handling
/**********************************************************************
*/
protected final long getTokenCharacterOffset() {
return _tokenInputTotal;
}
protected final int getTokenLineNr() {
return _tokenInputRow;
}
protected final int getTokenColumnNr() {
// note: value of -1 means "not available"; otherwise convert from 0-based to 1-based
int col = _tokenInputCol;
return (col < 0) ? col : (col + 1);
}
protected void releaseBuffers() throws JacksonException {
_textBuffer.releaseBuffers();
char[] buf = _inputBuffer;
if (buf != null) {
_inputBuffer = null;
_ioContext.releaseTokenBuffer(buf);
}
}
protected void _closeInput() throws IOException {
_pendingLF = 1; // just to ensure we'll also check _closed flag later on
/* 25-Nov-2008, tatus: As per [JACKSON-16] we are not to call close()
* on the underlying Reader, unless we "own" it, or auto-closing
* feature is enabled.
* One downside is that when using our optimized
* Reader (granted, we only do that for UTF-32...) this
* means that buffer recycling won't work correctly.
*/
if (_inputReader != null) {
if (_autoCloseInput || _ioContext.isResourceManaged()) {
_inputReader.close();
}
_inputReader = null;
}
}
protected final boolean loadMore() throws JacksonException {
_currInputProcessed += _inputEnd;
_currInputRowStart -= _inputEnd;
// [dataformats-text#613]: enforce max document length
_ioContext.streamReadConstraints().validateDocumentLength(_currInputProcessed);
try {
if (_inputReader != null) {
int count = _inputReader.read(_inputBuffer, 0, _inputBuffer.length);
_inputEnd = count;
_inputPtr = 0;
if (count > 0) {
return true;
}
// End of input; close here -- but note, do NOT yet call releaseBuffers()
// as there may be buffered input to handle
_closeInput();
// Should never return 0, so let's fail
if (count == 0) {
throw new IOException("InputStream.read() returned 0 characters when trying to read "+_inputBuffer.length+" bytes");
}
}
} catch (IOException e) {
throw _owner._wrapIOFailure(e);
}
return false;
}
/*
/**********************************************************************
/* Actual parsing, access methods
/**********************************************************************
*/
public String getText() throws JacksonException {
return _textBuffer.contentsAsString();
}
/**
* Method that can be called to see if there is at least one more
* character to be parsed.
*/
public boolean hasMoreInput() throws JacksonException {
if (_inputPtr < _inputEnd) {
return true;
}
return loadMore();
}
/**
* Method called to handle details of starting a new line, which may
* include skipping a linefeed.
*
* @return True if there is a new data line to handle; false if not
*/
public boolean startNewLine() throws JacksonException {
_pendingEmptyColumns = 0;
// first: if pending LF, skip it
if (_pendingLF != 0) {
if (_inputReader == null) {
return false;
}
_handleLF();
}
return skipLinesWhenNeeded();
}
/**
* Optionally skip lines that are empty or are comments, depending on the feature activated in the parser
*
* @return false if the end of input was reached
* @throws JacksonException
*/
public boolean skipLinesWhenNeeded() throws JacksonException {
if (_allowComments) {
return _skipCommentLines();
}
if (!_skipBlankLines && !_skipEmptyRows) {
return hasMoreInput();
}
// only need to skip fully empty lines (and optionally empty rows)
while (hasMoreInput()) {
char ch = _inputBuffer[_inputPtr];
if (ch == '\r' || ch == '\n') {
++_inputPtr;
_pendingLF = ch;
_handleLF();
continue;
}
if (ch == ' ' && _skipBlankLines) {
++_inputPtr;
continue;
}
// [dataformats-text#368]: Row of only separator characters?
if (_skipEmptyRows && ch == _separatorChar) {
if (_trySkipEmptyRow()) {
continue;
}
}
return true; // processing can go on
}
return false; // end of input
}
public boolean _skipCommentLines() throws JacksonException
{
while ((_inputPtr < _inputEnd) || loadMore()) {
char ch = _inputBuffer[_inputPtr];
switch (ch) {
case '#':
++_inputPtr;
_skipCommentContents();
continue;
case '\r':
case '\n':
++_inputPtr;
_pendingLF = ch;
_handleLF();
continue;
case ' ':
// skip all blanks (in both comments/blanks skip mode)
++_inputPtr;
continue;
default:
// [dataformats-text#368]: Check if line consists only of separators
if (_skipEmptyRows && ch == _separatorChar) {
if (_trySkipEmptyRow()) {
continue;
}
}
return true;
}
}
return false; // end of input
}
protected void _skipCommentContents() throws JacksonException
{
while ((_inputPtr < _inputEnd) || loadMore()) {
char ch = _inputBuffer[_inputPtr++];
if (ch == '\r' || ch == '\n') {
_pendingLF = ch;
_handleLF();
break;
}
}
}
/**
* Helper method called when we see a separator character at the start of a line
* and need to determine if the entire row consists only of consecutive separator
* characters (no other content) followed by a linefeed or EOF.
*
* Consumes separator characters one at a time. If a linefeed or EOF is reached,
* the row is empty and is skipped (returns {@code true}). If any other character
* is found, the consumed separators are recorded in {@link #_pendingEmptyColumns}
* so that {@link #nextString()} can replay them as empty values.
*
* @return {@code true} if the row was determined to be empty and was skipped;
* {@code false} if the row contains non-empty content (consumed separators
* will be replayed via {@link #_pendingEmptyColumns})
*/
protected boolean _trySkipEmptyRow() throws JacksonException
{
int separatorCount = 0;
while (true) {
// Consume the separator character at current position
++_inputPtr;
++separatorCount;
// Need more input?
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
// EOF after separators only: empty row at end of input
return true;
}
}
char ch = _inputBuffer[_inputPtr];
if (ch == _separatorChar) {
continue;
}
if (ch == '\r' || ch == '\n') {
// Row consisted only of separators: skip it
++_inputPtr;
_pendingLF = ch;
_handleLF();
return true;
}
// Found non-separator content: not an empty row.
// Record consumed separators so nextString() replays them.
_pendingEmptyColumns = separatorCount;
return false;
}
}
/**
* Method called to blindly skip a single line of content, without considering
* aspects like quoting or escaping. Used currently simply to skip the first
* line of input document, if instructed to do so.
*/
public boolean skipLine() throws JacksonException {
if (_pendingLF != 0) {
if (_inputReader == null) {
return false;
}
_handleLF();
}
while (_inputPtr < _inputEnd || loadMore()) {
char c = _inputBuffer[_inputPtr++];
if (c == '\r' || c == '\n') {
// important: handle trailing linefeed now, so caller need not bother
_pendingLF = c;
_handleLF();
return true;
}
}
return false;
}
/**
* Method called to parse the next token when we don't have any type
* information, so that all tokens are exposed as basic String
* values.
*
* @return Column value if more found; null to indicate end of line
* of input
*/
public String nextString() throws JacksonException {
_numTypesValid = NR_UNKNOWN;
_currInputQuoted = false; // Reset; set to true below only if opening quote found
// [dataformats-text#368]: Replay separator characters consumed by _trySkipEmptyRow()
// as empty column values
if (_pendingEmptyColumns > 0) {
--_pendingEmptyColumns;
_textBuffer.resetWithString("");
return "";
}
if (_pendingLF > 0) { // either pendingLF, or closed
if (_inputReader != null) { // if closed, we just need to return null
_handleLF();
}
return null; // end of line without new value
}
int i;
if (_trimSpaces) {
i = _skipLeadingSpace();
} else {
i = _nextChar();
// [dataformats-text#643]: Even without TRIM_SPACES, spaces before
// a quoted value are insignificant and should be skipped.
// Consume whitespace across buffer boundaries; if a quote follows,
// discard the spaces and parse as quoted. If not, restore consumed
// spaces into the unquoted output.
if (i <= ' ' && i > 0
&& i != _separatorChar && i != INT_CR && i != INT_LF) {
// Lazily allocate / reuse buffer for consumed whitespace
char[] spaceBuf = _leadingSpaceBuf;
if (spaceBuf == null) {
_leadingSpaceBuf = spaceBuf = new char[16];
}
int spaceCount = 0;
spaceBuf[spaceCount++] = (char) i;
boolean foundQuote = false;
while (true) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
break;
}
}
final char ch = _inputBuffer[_inputPtr];
if (ch == _quoteChar) {
// Found quote: discard leading spaces, proceed as quoted
_inputPtr++;
i = _quoteChar;
foundQuote = true;
break;
}
if (ch > ' ' || ch == _separatorChar
|| ch == '\r' || ch == '\n') {
break;
}
// Consume this whitespace character, tracking it for restoration
_inputPtr++;
if (spaceCount >= spaceBuf.length) {
_leadingSpaceBuf = spaceBuf =
Arrays.copyOf(spaceBuf, spaceBuf.length * 2);
}
spaceBuf[spaceCount++] = ch;
}
if (!foundQuote && spaceCount > 0) {
// Not a quoted value: restore consumed whitespace into unquoted output
_tokenInputTotal = _currInputProcessed + _inputPtr - 1;
_tokenInputRow = _currInputRow;
_tokenInputCol = _inputPtr - _currInputRowStart - 1;
_textBuffer.resetWithCopy(spaceBuf, 0, spaceCount);
return _nextUnquotedString(_textBuffer.getBufferWithoutReset(),
_textBuffer.getCurrentSegmentSize());
}
}
}
// First, need to ensure we know the starting location of token
_tokenInputTotal = _currInputProcessed + _inputPtr - 1;
_tokenInputRow = _currInputRow;
_tokenInputCol = _inputPtr - _currInputRowStart - 1;
if (i < 0) { // EOF at this point signifies empty value
_textBuffer.resetWithString("");
return "";
}
if (i == INT_CR || i == INT_LF) { // end-of-line means end of record; but also need to handle LF later on
_pendingLF = i;
_textBuffer.resetWithString("");
return "";
}
// two modes: quoted, unquoted
_currInputQuoted = i == _quoteChar; // Keep track of quoting
if (_currInputQuoted) { // offline quoted case (longer)
return _nextQuotedString();
}
if (i == _separatorChar) {
_textBuffer.resetWithString("");
return "";
}
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
outBuf[0] = (char) i;
int outPtr = 1;
if (i == _escapeChar) {
// Reset the escaped character
outBuf[0] = _unescape();
return _nextUnquotedString(outBuf, outPtr);
}
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
if (!loadMore()) { // ok to have end-of-input but...
return _textBuffer.finishAndReturn(outPtr, _trimSpaces);
}
ptr = _inputPtr;
}
final int end;
{
int max = Math.min(_inputEnd - ptr, outBuf.length - outPtr);
end = ptr + max;
}
// handle unquoted case locally if it can be handled without
// crossing buffer boundary...
char[] inputBuffer = _inputBuffer;
while (ptr < end) {
char c = inputBuffer[ptr++];
if (c <= _maxSpecialChar) {
if (c == _separatorChar) { // end of value, yay!
_inputPtr = ptr;
return _textBuffer.finishAndReturn(outPtr, _trimSpaces);
}
if (c == '\r' || c == '\n') {
_pendingLF = c;
_inputPtr = ptr;
return _textBuffer.finishAndReturn(outPtr, _trimSpaces);
}
if (c == _escapeChar) {
--ptr;
break;
}
}
outBuf[outPtr++] = c;
}
// ok, either input or output across buffer boundary, offline
_inputPtr = ptr;
return _nextUnquotedString(outBuf, outPtr);
}
public JsonToken nextStringOrLiteral() throws JacksonException {
_numTypesValid = NR_UNKNOWN;
// !!! TODO: implement properly
String value = nextString();
if (value == null) {
return null;
}
return JsonToken.VALUE_STRING;
}
public JsonToken nextNumber() throws JacksonException {
_numTypesValid = NR_UNKNOWN;
// !!! TODO: implement properly
String value = nextString();
if (value == null) {
return null;
}
return JsonToken.VALUE_STRING;
}
public JsonToken nextNumberOrString() throws JacksonException {
_numTypesValid = NR_UNKNOWN;
// !!! TODO: implement properly
String value = nextString();
if (value == null) {
return null;
}
return JsonToken.VALUE_STRING;
}
/*
/**********************************************************************
/* Actual parsing, private helper methods
/**********************************************************************
*/
protected String _nextUnquotedString(char[] outBuf, int outPtr) throws JacksonException {
int c;
final char[] inputBuffer = _inputBuffer;
main_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
if (!loadMore()) { // ok to have end-of-input, are done
_inputPtr = ptr;
break main_loop;
}
ptr = _inputPtr;
}
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
final int max = Math.min(_inputEnd, (ptr + (outBuf.length - outPtr)));
while (ptr < max) {
c = inputBuffer[ptr++];
if (c <= _maxSpecialChar) {
if (c == _separatorChar) { // end of value, yay!
_inputPtr = ptr;
break main_loop;
}
if (c == '\r' || c == '\n') { // end of line is end of value as well
_inputPtr = ptr;
_pendingLF = c;
break main_loop;
}
if (c == _escapeChar) {
_inputPtr = ptr;
outBuf[outPtr++] = _unescape();
// May have passed input boundary, need to re-set
continue main_loop;
}
}
outBuf[outPtr++] = (char) c;
}
_inputPtr = ptr;
}
return _textBuffer.finishAndReturn(outPtr, _trimSpaces);
}
protected String _nextQuotedString() throws JacksonException {
char[] outBuf = _textBuffer.emptyAndGetCurrentSegment();
int outPtr = 0;
final char[] inputBuffer = _inputBuffer;
boolean checkLF = false; // marker for split CR+LF
main_loop:
while (true) {
int ptr = _inputPtr;
if (ptr >= _inputEnd) {
if (!loadMore()) { // not ok, missing end quote
_owner._reportError("Missing closing quote for value"); // should indicate start position?
}
ptr = _inputPtr;
if (checkLF) {
checkLF = false; // better reset
if (inputBuffer[ptr] == '\n') {
// undo earlier advancement, to keep line number correct
--_currInputRow;
}
}
}
// 11-Feb-2024, tatu: Not quite sure what was supposed to happen here;
// but nothing was done. Leaving for now, remove from 2.18 or later
/*
if (checkLF) { // had a "hanging" CR in parse loop; check now
}
*/
if (outPtr >= outBuf.length) {
outBuf = _textBuffer.finishCurrentSegment();
outPtr = 0;
}
final int max = Math.min(_inputEnd, (ptr + (outBuf.length - outPtr)));
inner_loop:
while (true) {
char c = inputBuffer[ptr++];
if (c <= _maxSpecialChar) {
if (c == _quoteChar) {
_inputPtr = ptr;
break;
}
// Embedded linefeeds are fine
if (c == '\r') {
// bit crappy check but has to do:
if (ptr >= max) {
checkLF = true; // will need to be checked in beginning of next loop
++_currInputRow;
_currInputRowStart = ptr;
} else if (inputBuffer[ptr] != '\n') {
++_currInputRow;
_currInputRowStart = ptr;
}
} else if (c == '\n') {
++_currInputRow;
_currInputRowStart = ptr;
} else if (c == _escapeChar) {
_inputPtr = ptr;
c = _unescape();
outBuf[outPtr++] = c;
// May have passed input boundary, need to re-set
continue main_loop;
}
}
outBuf[outPtr++] = c;
if (ptr >= max) {
_inputPtr = ptr;
continue main_loop;
}
continue inner_loop;
}
// We get here if we hit a quote: check if it's doubled up, or end of value:
if (_inputPtr < _inputEnd || loadMore()) {
if (_inputBuffer[_inputPtr] == _quoteChar) { // doubled up, append
// note: should have enough room, is safe
outBuf[outPtr++] = (char) _quoteChar;
++_inputPtr;
continue main_loop;
}
}
// Not doubled; leave next char as is
break;
}
// note: do NOT trim from within quoted Strings
String result = _textBuffer.finishAndReturn(outPtr, false);
// good, but we also need to locate and skip trailing space, separator
// (note: space outside quotes never included, but must be skipped)
while (_inputPtr < _inputEnd || loadMore()) { // end-of-input is fine
int ch = _inputBuffer[_inputPtr++];
if (ch == _separatorChar) { // common case, separator between columns
break;
}
if (ch <= INT_SPACE) { // extra space, fine as well
if (ch == INT_CR || ch == INT_LF) { // but end-of-line can't be yet skipped
_pendingLF = ch;
break;
}
continue;
}
_owner._reportUnexpectedCsvChar(ch, String.format(
"Expected column separator character (%s) or end-of-line", _getCharDesc(_separatorChar)));
}
return result;
}
protected final void _handleLF() throws JacksonException
{
// already skipped past first part; but may get \r\n so skip the other char too?
if (_pendingLF == INT_CR) {
if (_inputPtr < _inputEnd || loadMore()) {
if (_inputBuffer[_inputPtr] == '\n') {
++_inputPtr;
}
}
}
_pendingLF = 0;
++_currInputRow;
_currInputRowStart = _inputPtr;
}
protected char _unescape() throws JacksonException {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
_reportError("Unexpected EOF in escaped character");
}
}
// Some characters are more special than others, so:
char c = _inputBuffer[_inputPtr++];
switch (c) {
case '0':
return '\0';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
}
// others, return as is...
return c;
}
protected final int _nextChar() throws JacksonException {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
return -1;
}
}
return _inputBuffer[_inputPtr++];
}
protected final int _skipLeadingSpace() throws JacksonException {
final int sep = _separatorChar;
while (true) {
if (_inputPtr >= _inputEnd) {
if (!loadMore()) {
return -1;
}
}
char ch = _inputBuffer[_inputPtr++];
if ((ch > ' ') || (ch == sep)) {
return ch;
}
switch (ch) {
case '\r':
case '\n':
return ch;
}
}
}
/*
/**********************************************************************
/* Numeric accessors for CsvParser
/**********************************************************************
*/
/**
* Method used by {@link CsvParser#isExpectedNumberIntToken()} to coerce
* current token into integer number, if it looks like one.
*/
public boolean isExpectedNumberIntToken() throws JacksonException
{
if (looksLikeInt()) {
_parseIntValue();
return true;
}
return false;
}
/**
* @param exact Whether we should try to retain maximum precision or not;
* passed as {@code true} by {@code getNumberValueExact()}, and as
* {@code false} by regular {@code getNumberValue)}.
*/
public Number getNumberValue(boolean exact) throws JacksonException
{
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(exact); // will also check event type
}
// Separate types for int types
if ((_numTypesValid & NR_INT) != 0) {
return Integer.valueOf(_numberInt);
}
if ((_numTypesValid & NR_LONG) != 0) {
return Long.valueOf(_numberLong);
}
if ((_numTypesValid & NR_BIGINT) != 0) {
return _getBigInteger();
}
// And then floating point types. But here optimal type
// needs to be big decimal, to avoid losing any data?
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
return _getBigDecimal();
}
if ((_numTypesValid & NR_DOUBLE) == 0) { // sanity check
_throwInternal();
}
return Double.valueOf(_numberDouble);
}
public NumberType getNumberType() throws JacksonException {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(false); // will also check event type
}
if ((_numTypesValid & NR_INT) != 0) {
return NumberType.INT;
}
if ((_numTypesValid & NR_LONG) != 0) {
return NumberType.LONG;
}
if ((_numTypesValid & NR_BIGINT) != 0) {
return NumberType.BIG_INTEGER;
}
// And then floating point types. Here optimal type
// needs to be big decimal, to avoid losing any data?
// However... using BD is slow, so let's allow returning
// double as type if no explicit call has been made to access data as BD?
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
return NumberType.BIG_DECIMAL;
}
return NumberType.DOUBLE;
}
public int getIntValue() throws JacksonException {
if ((_numTypesValid & NR_INT) == 0) {
if (_numTypesValid == NR_UNKNOWN) { // not parsed at all
_parseNumericValue(false); // will also check event type
}
if ((_numTypesValid & NR_INT) == 0) { // wasn't an int natively?
convertNumberToInt(); // let's make it so, if possible
}
}
return _numberInt;
}
public long getLongValue() throws JacksonException {
if ((_numTypesValid & NR_LONG) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(false);
}
if ((_numTypesValid & NR_LONG) == 0) {
convertNumberToLong();
}
}
return _numberLong;
}
public BigInteger getBigIntegerValue() throws JacksonException {
if ((_numTypesValid & NR_BIGINT) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(true);
}
if ((_numTypesValid & NR_BIGINT) == 0) {
convertNumberToBigInteger();
}
}
return _getBigInteger();
}
public float getFloatValue() throws JacksonException {
double value = getDoubleValue();
// Bounds/range checks would be tricky here, so let's not bother...
return (float) value;
}
public double getDoubleValue() throws JacksonException {
if ((_numTypesValid & NR_DOUBLE) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(false);
}
if ((_numTypesValid & NR_DOUBLE) == 0) {
convertNumberToDouble();
}
}
return _numberDouble;
}
public BigDecimal getDecimalValue() throws JacksonException {
if ((_numTypesValid & NR_BIGDECIMAL) == 0) {
if (_numTypesValid == NR_UNKNOWN) {
_parseNumericValue(true);
}
if ((_numTypesValid & NR_BIGDECIMAL) == 0) {
convertNumberToBigDecimal();
}
}
return _getBigDecimal();
}
/**
* Internal accessor that needs to be used for accessing number value of type
* {@link BigInteger} which -- as of 2.14 -- is typically lazily parsed.
*/
protected BigInteger _getBigInteger() throws JacksonException {
if (_numberBigInt != null) {
return _numberBigInt;
} else if (_numberString == null) {
throw new IllegalStateException("cannot get BigInteger from current parser state");
}
_ioContext.streamReadConstraints().validateIntegerLength(_numberString.length());
_numberBigInt = NumberInput.parseBigInteger(
_numberString, _owner.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
_numberString = null;
return _numberBigInt;
}
/**
* Internal accessor that needs to be used for accessing number value of type
* {@link BigDecimal} which -- as of 2.14 -- is typically lazily parsed.
*/
protected BigDecimal _getBigDecimal() throws JacksonException {
if (_numberBigDecimal != null) {
return _numberBigDecimal;
} else if (_numberString == null) {
throw new IllegalStateException("cannot get BigDecimal from current parser state");
}
_ioContext.streamReadConstraints().validateFPLength(_numberString.length());
_numberBigDecimal = NumberInput.parseBigDecimal(
_numberString, _owner.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
_numberString = null;
return _numberBigDecimal;
}
/*
/**********************************************************************
/* Conversion from textual to numeric representation
/**********************************************************************
*/
/**
* Method that will parse actual numeric value out of a syntactically
* valid number value. Type it will parse into depends on whether
* it is a floating point number, as well as its magnitude: smallest
* legal type (of ones available) is used for efficiency.
*
* @param exactNumber Whether to try to retain the highest precision for
* floating-point values or not
*/
protected void _parseNumericValue(boolean exactNumber)
throws JacksonException
{
if (looksLikeInt()) {
_parseIntValue();
return;
}
/*
if (_hasFloatToken() == JsonToken.VALUE_NUMBER_FLOAT) {
_parseSlowFloatValue(expType);
return;
}
_reportError("Current token ("+_currToken+") not numeric, can not use numeric value accessors");
*/
_parseSlowFloatValue(exactNumber);
}
private boolean looksLikeInt() throws JacksonException {
final char[] ch = _textBuffer.contentsAsArray();
final int len = ch.length;
if (len == 0) {
return false;
}
int i = 0;
char c = ch[0];
if (c == '-' || c == '+') {
if (len == 1) {
return false;
}
++i;
}
for (; i < len; ++i) {
c = ch[i];
if (c > '9' || c < '0') {
return false;
}
}
return true;
}
// @since 2.12
protected void _parseIntValue() throws JacksonException
{
char[] buf = _textBuffer.getTextBuffer();
int offset = _textBuffer.getTextOffset();
char c = buf[offset];
boolean neg;
if (c == '-') {
neg = true;
++offset;
} else {
neg = false;
if (c == '+') {
++offset;
}
}
int len = buf.length - offset;
if (len <= 9) { // definitely fits in int
int i = NumberInput.parseInt(buf, offset, len);
_numberInt = neg ? -i : i;
_numTypesValid = NR_INT;
return;
}
if (len <= 18) { // definitely fits AND is easy to parse using 2 int parse calls
long l = NumberInput.parseLong(buf, offset, len);
if (neg) {
l = -l;
}
// [JACKSON-230] Could still fit in int, need to check
if (len == 10) {
if (neg) {
if (l >= MIN_INT_L) {
_numberInt = (int) l;
_numTypesValid = NR_INT;
return;
}
} else {
if (l <= MAX_INT_L) {
_numberInt = (int) l;
_numTypesValid = NR_INT;
return;
}
}
}
_numberLong = l;
_numTypesValid = NR_LONG;
return;
}
_parseSlowIntValue(buf, offset, len, neg);
}
private final void _parseSlowFloatValue(boolean exactNumber)
throws JacksonException
{
/* Nope: floating point. Here we need to be careful to get
* optimal parsing strategy: choice is between accurate but
* slow (BigDecimal) and lossy but fast (Double). For now
* let's only use BD when explicitly requested -- it can
* still be constructed correctly at any point since we do
* retain textual representation
*/
try {
if (exactNumber) {
_numberBigDecimal = null;
_numberString = _textBuffer.contentsAsString();
_numTypesValid = NR_BIGDECIMAL;
} else {
// Otherwise double has to do
_numberDouble = _textBuffer.contentsAsDouble(_owner.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
_numTypesValid = NR_DOUBLE;
}
} catch (NumberFormatException nex) {
// Can this ever occur? Due to overflow, maybe?
throw _constructReadException(nex, "Malformed numeric value '" + _textBuffer.contentsAsString() + "'");
}
}
private final void _parseSlowIntValue(char[] buf, int offset, int len,
boolean neg)
throws JacksonException
{
String numStr = _textBuffer.contentsAsString();
try {
if (NumberInput.inLongRange(buf, offset, len, neg)) {
// Probably faster to construct a String, call parse, than to use BigInteger
_numberLong = Long.parseLong(numStr);
_numTypesValid = NR_LONG;
} else {
// nope, need the heavy guns... (rare case)
_numberBigInt = null;
_numberString = numStr;
_numTypesValid = NR_BIGINT;
}
} catch (NumberFormatException nex) {
// Can this ever occur? Due to overflow, maybe?
throw _constructReadException(nex, "Malformed numeric value '" + numStr + "'");
}
}
/*
/**********************************************************************
/* Numeric conversions
/**********************************************************************
*/
protected void convertNumberToInt() throws JacksonException {
// First, converting from long ought to be easy
if ((_numTypesValid & NR_LONG) != 0) {
// Let's verify it's lossless conversion by simple roundtrip
int result = (int) _numberLong;
if (result != _numberLong) {
_reportError("Numeric value (" + getText() + ") out of range of int");
}
_numberInt = result;
} else if ((_numTypesValid & NR_BIGINT) != 0) {
final BigInteger bigInteger = _getBigInteger();
if (BI_MIN_INT.compareTo(bigInteger) > 0
|| BI_MAX_INT.compareTo(bigInteger) < 0) {
reportOverflowInt();
}
_numberInt = bigInteger.intValue();
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
// Need to check boundaries
if (_numberDouble < MIN_INT_D || _numberDouble > MAX_INT_D) {
reportOverflowInt();
}
_numberInt = (int) _numberDouble;
} else if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
final BigDecimal bigDecimal = _getBigDecimal();
if (BD_MIN_INT.compareTo(bigDecimal) > 0
|| BD_MAX_INT.compareTo(bigDecimal) < 0) {
reportOverflowInt();
}
_numberInt = bigDecimal.intValue();
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_INT;
}
protected void convertNumberToLong() throws JacksonException {
if ((_numTypesValid & NR_INT) != 0) {
_numberLong = _numberInt;
} else if ((_numTypesValid & NR_BIGINT) != 0) {
final BigInteger bigInteger = _getBigInteger();
if (BI_MIN_LONG.compareTo(bigInteger) > 0
|| BI_MAX_LONG.compareTo(bigInteger) < 0) {
reportOverflowLong();
}
_numberLong = bigInteger.longValue();
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
// Need to check boundaries
if (_numberDouble < MIN_LONG_D || _numberDouble > MAX_LONG_D) {
reportOverflowLong();
}
_numberLong = (long) _numberDouble;
} else if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
final BigDecimal bigDecimal = _getBigDecimal();
if (BD_MIN_LONG.compareTo(bigDecimal) > 0
|| BD_MAX_LONG.compareTo(bigDecimal) < 0) {
reportOverflowLong();
}
_numberLong = bigDecimal.longValue();
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_LONG;
}
protected void convertNumberToBigInteger()
throws JacksonException {
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
// here it'll just get truncated, no exceptions thrown
final BigDecimal bd = _getBigDecimal();
_ioContext.streamReadConstraints().validateBigIntegerScale(bd.scale());
_numberBigInt = bd.toBigInteger();
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberBigInt = BigInteger.valueOf(_numberLong);
} else if ((_numTypesValid & NR_INT) != 0) {
_numberBigInt = BigInteger.valueOf(_numberInt);
} else if ((_numTypesValid & NR_DOUBLE) != 0) {
_numberBigInt = BigDecimal.valueOf(_numberDouble).toBigInteger();
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_BIGINT;
}
protected void convertNumberToDouble()
throws JacksonException {
// 05-Aug-2008, tatus: Important note: this MUST start with
// more accurate representations, since we don't know which
// value is the original one (others get generated when
// requested)
if ((_numTypesValid & NR_BIGDECIMAL) != 0) {
_numberDouble = _getBigDecimal().doubleValue();
} else if ((_numTypesValid & NR_BIGINT) != 0) {
_numberDouble = _getBigInteger().doubleValue();
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberDouble = _numberLong;
} else if ((_numTypesValid & NR_INT) != 0) {
_numberDouble = _numberInt;
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_DOUBLE;
}
protected void convertNumberToBigDecimal() throws JacksonException {
if ((_numTypesValid & NR_DOUBLE) != 0) {
// Let's actually parse from String representation, to avoid
// rounding errors that non-decimal floating operations could incur
final String text = getText();
_ioContext.streamReadConstraints().validateFPLength(text.length());
_numberBigDecimal = NumberInput.parseBigDecimal(
text, _owner.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
} else if ((_numTypesValid & NR_BIGINT) != 0) {
_numberBigDecimal = new BigDecimal(_getBigInteger());
} else if ((_numTypesValid & NR_LONG) != 0) {
_numberBigDecimal = BigDecimal.valueOf(_numberLong);
} else if ((_numTypesValid & NR_INT) != 0) {
_numberBigDecimal = BigDecimal.valueOf(_numberInt);
} else {
_throwInternal(); // should never get here
}
_numTypesValid |= NR_BIGDECIMAL;
}
/*
/**********************************************************
/* Number handling exceptions
/**********************************************************
*/
protected void reportUnexpectedNumberChar(int ch, String comment)
throws StreamReadException
{
String msg = "Unexpected character (" + _getCharDesc(ch) + ") in numeric value";
if (comment != null) {
msg += ": " + comment;
}
_reportError(msg);
}
protected void reportInvalidNumber(String msg) throws StreamReadException {
_reportError("Invalid numeric value: " + msg);
}
protected void reportOverflowInt() throws StreamReadException {
_reportError("Numeric value (" + getText() + ") out of range of int (" + Integer.MIN_VALUE + " - " + Integer.MAX_VALUE + ")");
}
protected void reportOverflowLong() throws StreamReadException {
_reportError("Numeric value (" + getText() + ") out of range of long (" + Long.MIN_VALUE + " - " + Long.MAX_VALUE + ")");
}
protected final StreamReadException _constructReadException(Throwable t, String msg) {
return new StreamReadException(_owner, msg, t);
}
protected final static String _getCharDesc(int ch) {
char c = (char) ch;
if (Character.isISOControl(c)) {
return "(CTRL-CHAR, code " + ch + ")";
}
if (ch > 255) {
return "'" + c + "' (code " + ch + " / 0x" + Integer.toHexString(ch) + ")";
}
return "'" + c + "' (code " + ch + ")";
}
private void _throwInternal() {
throw new IllegalStateException("Internal error: code path should never get executed");
}
/**
* Method for reporting low-level decoding (parsing) problems
*/
protected final void _reportError(String msg) throws StreamReadException {
throw new StreamReadException(_owner, msg);
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/impl/CsvEncoder.java
================================================
package tools.jackson.dataformat.csv.impl;
import java.io.IOException;
import java.io.Writer;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Arrays;
import tools.jackson.core.JacksonException;
import tools.jackson.core.exc.JacksonIOException;
import tools.jackson.core.io.CharTypes;
import tools.jackson.core.io.CharacterEscapes;
import tools.jackson.core.io.IOContext;
import tools.jackson.core.io.NumberOutput;
import tools.jackson.dataformat.csv.CsvSchema;
import tools.jackson.dataformat.csv.CsvWriteException;
import tools.jackson.dataformat.csv.CsvWriteFeature;
/**
* Helper class that handles actual low-level construction of
* CSV output, based only on indexes given without worrying about reordering,
* or binding from logical properties.
*/
public class CsvEncoder
{
// Default set of escaped characters (none)
private static final int [] sOutputEscapes = new int[0];
// Upper case hex chars:
protected final static char[] HEX_CHARS = CharTypes.copyHexChars(true);
/**
* As an optimization we try coalescing short writes into
* buffer; but pass longer directly.
*/
protected final static int SHORT_WRITE = 32;
/**
* Default maximum length of a column value that will be inspected
* character-by-character to determine whether quoting is needed
* (when not using {@link CsvWriteFeature#STRICT_CHECK_FOR_QUOTING}).
* Values longer than this are always quoted as a performance optimization.
*
* Configurable via {@link tools.jackson.dataformat.csv.CsvFactoryBuilder#maxQuoteCheckChars(int)}.
*
* @since 3.2
*/
public final static int DEFAULT_MAX_QUOTE_CHECK = 24;
protected final BufferedValue[] NO_BUFFERED = new BufferedValue[0];
private final static char[] TRUE_CHARS = "true".toCharArray();
private final static char[] FALSE_CHARS = "false".toCharArray();
/**
* Currently active set of output escape code definitions (whether
* and how to escape or not).
*/
protected int[] _outputEscapes = sOutputEscapes;
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected final IOContext _ioContext;
/**
* Underlying {@link Writer} used for output.
*/
protected final Writer _out;
protected final char _cfgColumnSeparator;
protected final int _cfgQuoteCharacter;
protected final int _cfgEscapeCharacter;
protected final char[] _cfgLineSeparator;
protected final char[] _cfgNullValue;
protected final int _cfgLineSeparatorLength;
protected final int _cfgMaxQuoteCheckChars;
/**
* Lowest-valued character that is safe to output without using
* quotes around value, NOT including possible escape character.
*/
protected final int _cfgMinSafeChar;
protected final int _csvFeatures;
/**
* Marker flag used to determine if to do optimal (aka "strict") quoting
* checks or not (looser conservative check)
*/
protected final boolean _cfgOptimalQuoting;
protected final boolean _cfgAllowsComments;
protected final boolean _cfgIncludeMissingTail;
protected final boolean _cfgAlwaysQuoteStrings;
protected final boolean _cfgAlwaysQuoteEmptyStrings;
// @since 2.16
protected final boolean _cfgAlwaysQuoteNumbers;
// @since 3.2
protected final boolean _cfgQuoteLeadingTrailingWhitespace;
protected final boolean _cfgEscapeQuoteCharWithEscapeChar;
protected final boolean _cfgEscapeControlCharWithEscapeChar;
/**
* @since 2.14
*/
protected final boolean _cfgUseFastDoubleWriter;
protected final char _cfgQuoteCharEscapeChar;
protected final char _cfgControlCharEscapeChar;
/*
/**********************************************************************
/* Output state
/**********************************************************************
*/
protected int _columnCount;
/**
* Index of column we expect to write next
*/
protected int _nextColumnToWrite = 0;
/**
* And if output comes in shuffled order we will need to do
* bit of ordering.
*/
protected BufferedValue[] _buffered = NO_BUFFERED;
/**
* Index of the last buffered value
*/
protected int _lastBuffered = -1;
protected boolean _trailingLFRemoved = false;
/*
/**********************************************************************
/* Output buffering, low-level
/**********************************************************************
*/
/**
* Intermediate buffer in which contents are buffered before
* being written using {@link #_out}.
*/
protected char[] _outputBuffer;
/**
* Flag that indicates whether the _outputBuffer is recyclable (and
* needs to be returned to recycler once we are done) or not.
*/
protected boolean _bufferRecyclable;
/**
* Pointer to the next available char position in {@link #_outputBuffer}
*/
protected int _outputTail = 0;
/**
* Offset to index after the last valid index in {@link #_outputBuffer}.
* Typically same as length of the buffer.
*/
protected final int _outputEnd;
/**
* Let's keep track of how many bytes have been output, may prove useful
* when debugging. This does not include bytes buffered in
* the output buffer, just bytes that have been written using underlying
* stream writer.
*/
protected int _charsWritten;
/*
/**********************************************************************
/* Construction, (re)configuration
/**********************************************************************
*/
/**
* @since 3.2
*/
public CsvEncoder(IOContext ctxt, int csvFeatures, Writer out, CsvSchema schema,
CharacterEscapes esc, boolean useFastDoubleWriter,
int maxQuoteCheckChars)
{
_ioContext = ctxt;
_csvFeatures = csvFeatures;
_cfgUseFastDoubleWriter = useFastDoubleWriter;
_cfgOptimalQuoting = CsvWriteFeature.STRICT_CHECK_FOR_QUOTING.enabledIn(csvFeatures);
_cfgIncludeMissingTail = !CsvWriteFeature.OMIT_MISSING_TAIL_COLUMNS.enabledIn(_csvFeatures);
_cfgAlwaysQuoteStrings = CsvWriteFeature.ALWAYS_QUOTE_STRINGS.enabledIn(csvFeatures);
_cfgAlwaysQuoteEmptyStrings = CsvWriteFeature.ALWAYS_QUOTE_EMPTY_STRINGS.enabledIn(csvFeatures);
_cfgAlwaysQuoteNumbers = CsvWriteFeature.ALWAYS_QUOTE_NUMBERS.enabledIn(csvFeatures);
_cfgQuoteLeadingTrailingWhitespace = CsvWriteFeature.QUOTE_STRINGS_WITH_LEADING_TRAILING_WHITESPACE.enabledIn(csvFeatures);
_cfgEscapeQuoteCharWithEscapeChar = CsvWriteFeature.ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR.enabledIn(csvFeatures);
_cfgEscapeControlCharWithEscapeChar = CsvWriteFeature.ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR.enabledIn(csvFeatures);
_outputBuffer = ctxt.allocConcatBuffer();
_bufferRecyclable = true;
_outputEnd = _outputBuffer.length;
_out = out;
_cfgColumnSeparator = schema.getColumnSeparator();
_cfgQuoteCharacter = schema.getQuoteChar();
_cfgEscapeCharacter = schema.getEscapeChar();
_cfgLineSeparator = schema.getLineSeparator();
_cfgLineSeparatorLength = (_cfgLineSeparator == null) ? 0 : _cfgLineSeparator.length;
_cfgNullValue = schema.getNullValueOrEmpty();
_cfgAllowsComments = schema.allowsComments();
_columnCount = schema.size();
_outputEscapes = (esc == null) ? sOutputEscapes : esc.getEscapeCodesForAscii();
_cfgMinSafeChar = _calcSafeChar();
_cfgMaxQuoteCheckChars = maxQuoteCheckChars;
_cfgQuoteCharEscapeChar = _getQuoteCharEscapeChar(
_cfgEscapeQuoteCharWithEscapeChar,
_cfgQuoteCharacter,
_cfgEscapeCharacter
);
_cfgControlCharEscapeChar = _cfgEscapeCharacter > 0 ? (char) _cfgEscapeCharacter : '\\';
_verifyConfiguration(schema);
}
@Deprecated // @since 3.2
public CsvEncoder(IOContext ctxt, int csvFeatures, Writer out, CsvSchema schema,
CharacterEscapes esc, boolean useFastDoubleWriter)
{
this(ctxt, csvFeatures, out, schema, esc, useFastDoubleWriter,
DEFAULT_MAX_QUOTE_CHECK);
}
public CsvEncoder(CsvEncoder base, CsvSchema newSchema)
{
_ioContext = base._ioContext;
_csvFeatures = base._csvFeatures;
_cfgUseFastDoubleWriter = base._cfgUseFastDoubleWriter;
_cfgOptimalQuoting = base._cfgOptimalQuoting;
_cfgIncludeMissingTail = base._cfgIncludeMissingTail;
_cfgAlwaysQuoteStrings = base._cfgAlwaysQuoteStrings;
_cfgAlwaysQuoteEmptyStrings = base._cfgAlwaysQuoteEmptyStrings;
_cfgAlwaysQuoteNumbers = base._cfgAlwaysQuoteNumbers;
_cfgQuoteLeadingTrailingWhitespace = base._cfgQuoteLeadingTrailingWhitespace;
_cfgEscapeQuoteCharWithEscapeChar = base._cfgEscapeQuoteCharWithEscapeChar;
_cfgEscapeControlCharWithEscapeChar = base._cfgEscapeControlCharWithEscapeChar;
_outputBuffer = base._outputBuffer;
_bufferRecyclable = base._bufferRecyclable;
_outputEnd = base._outputEnd;
_out = base._out;
_cfgMaxQuoteCheckChars = base._cfgMaxQuoteCheckChars;
_outputEscapes = base._outputEscapes;
_cfgColumnSeparator = newSchema.getColumnSeparator();
_cfgQuoteCharacter = newSchema.getQuoteChar();
_cfgEscapeCharacter = newSchema.getEscapeChar();
_cfgLineSeparator = newSchema.getLineSeparator();
_cfgLineSeparatorLength = _cfgLineSeparator.length;
_cfgNullValue = newSchema.getNullValueOrEmpty();
_cfgAllowsComments = newSchema.allowsComments();
_cfgMinSafeChar = _calcSafeChar();
_columnCount = newSchema.size();
_cfgQuoteCharEscapeChar = _getQuoteCharEscapeChar(
base._cfgEscapeQuoteCharWithEscapeChar,
newSchema.getQuoteChar(),
newSchema.getEscapeChar()
);
_cfgControlCharEscapeChar = _cfgEscapeCharacter > 0 ? (char) _cfgEscapeCharacter : '\\';
_verifyConfiguration(newSchema);
}
private void _verifyConfiguration(CsvSchema schema)
{
// 21-Feb-2023, tatu: [dataformats-text#374]: Need to verify that Escape character
// is defined if need to use it
if (_cfgEscapeQuoteCharWithEscapeChar || _cfgEscapeControlCharWithEscapeChar) {
if (!schema.usesEscapeChar()) {
throw CsvWriteException.from(null,
"Cannot use `CsvGenerator.Feature.ESCAPE_QUOTE_CHAR_WITH_ESCAPE_CHAR` or `CsvGenerator.Feature.ESCAPE_CONTROL_CHARS_WITH_ESCAPE_CHAR`"
+" if no escape character defined in `CsvSchema`",
schema);
}
}
}
private final char _getQuoteCharEscapeChar(
final boolean escapeQuoteCharWithEscapeChar,
final int quoteCharacter,
final int escapeCharacter)
{
final char quoteEscapeChar;
if (escapeQuoteCharWithEscapeChar && escapeCharacter > 0) {
quoteEscapeChar = (char) escapeCharacter;
}
else if (quoteCharacter > 0) {
quoteEscapeChar = (char) quoteCharacter;
}
else {
quoteEscapeChar = '\\';
}
return quoteEscapeChar;
}
private final int _calcSafeChar()
{
// note: quote char may be -1 to signify "no quoting":
int min = Math.max(_cfgColumnSeparator, _cfgQuoteCharacter);
// 06-Nov-2015, tatu: We will NOT apply escape character, because it usually
// has higher ascii value (with backslash); better handle separately.
// 23-Sep-2020, tatu: Should not actually need to consider anything but the
// first character when checking... but leaving rest for now
for (int i = 0; i < _cfgLineSeparatorLength; ++i) {
min = Math.max(min, _cfgLineSeparator[i]);
}
return min+1;
}
public CsvEncoder withSchema(CsvSchema schema) {
return new CsvEncoder(this, schema);
}
public CsvEncoder setOutputEscapes(int[] esc) {
_outputEscapes = (esc != null) ? esc : sOutputEscapes;
return this;
}
/*
/**********************************************************************
/* Read-access to output state
/**********************************************************************
*/
public Object getOutputTarget() {
return _out;
}
/**
* NOTE: while value does indeed indicate amount that has been written in the buffer,
* there may be more intermediate data that is buffered as values but not yet in
* buffer.
*/
public int getOutputBuffered() {
return _outputTail;
}
public int nextColumnIndex() {
return _nextColumnToWrite;
}
/*
/**********************************************************************
/* Writer API, writes from generator
/**********************************************************************
*/
public final void write(int columnIndex, String value) throws JacksonException
{
// easy case: all in order
if (columnIndex == _nextColumnToWrite) {
// inlined 'appendValue(String)`
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
appendColumnSeparator();
}
final int len = value.length();
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len, columnIndex)) {
if (_cfgEscapeCharacter > 0) {
_writeQuotedAndEscaped(value, (char) _cfgEscapeCharacter);
} else {
_writeQuoted(value);
}
} else {
writeRaw(value);
}
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.buffered(value));
}
public final void write(int columnIndex, char[] ch, int offset, int len) throws JacksonException
{
// !!! TODO: optimize
write(columnIndex, new String(ch, offset, len));
}
public final void write(int columnIndex, int value) throws JacksonException
{
// easy case: all in order
if (columnIndex == _nextColumnToWrite) {
// inlined 'appendValue(int)'
// up to 10 digits and possible minus sign, leading comma, possible quotes
if ((_outputTail + 14) > _outputEnd) {
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
_outputBuffer[_outputTail++] = _cfgColumnSeparator;
}
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
}
_outputTail = NumberOutput.outputInt(value, _outputBuffer, _outputTail);
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
}
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.buffered(value));
}
public final void write(int columnIndex, long value) throws JacksonException
{
// easy case: all in order
if (columnIndex == _nextColumnToWrite) {
// inlined 'appendValue(int)'
// up to 20 digits, minus sign, leading comma, possible quotes
if ((_outputTail + 24) > _outputEnd) {
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
_outputBuffer[_outputTail++] = _cfgColumnSeparator;
}
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
}
_outputTail = NumberOutput.outputLong(value, _outputBuffer, _outputTail);
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
}
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.buffered(value));
}
public final void write(int columnIndex, BigInteger value) throws JacksonException
{
// easy case: all in order
final String numStr = value.toString();
if (columnIndex == _nextColumnToWrite) {
appendNumberValue(numStr);
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.bufferedNumber(numStr));
}
public final void write(int columnIndex, float value) throws JacksonException
{
// easy case: all in order
if (columnIndex == _nextColumnToWrite) {
appendValue(value);
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.buffered(value));
}
public final void write(int columnIndex, double value) throws JacksonException
{
// easy case: all in order
if (columnIndex == _nextColumnToWrite) {
appendValue(value);
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.buffered(value));
}
public final void write(int columnIndex, BigDecimal value, boolean plain) throws JacksonException
{
final String numStr = plain ? value.toPlainString() : value.toString();
// easy case: all in order
if (columnIndex == _nextColumnToWrite) {
appendNumberValue(numStr);
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.bufferedNumber(numStr));
}
public final void write(int columnIndex, boolean value) throws JacksonException
{
// easy case: all in order
if (columnIndex == _nextColumnToWrite) {
appendValue(value);
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.buffered(value));
}
public final void writeNonEscaped(int columnIndex, String rawValue) throws JacksonException
{
if (columnIndex == _nextColumnToWrite) {
appendRawValue(rawValue);
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.bufferedRaw(rawValue));
}
public final void writeNull(int columnIndex) throws JacksonException
{
if (columnIndex == _nextColumnToWrite) {
appendNull();
++_nextColumnToWrite;
return;
}
_buffer(columnIndex, BufferedValue.bufferedNull());
}
public final void writeColumnName(String name) throws JacksonException
{
appendValue(name);
++_nextColumnToWrite;
}
public void endRow() throws JacksonException
{
// First things first; any buffered?
if (_lastBuffered >= 0) {
final int last = _lastBuffered;
_lastBuffered = -1;
for (; _nextColumnToWrite <= last; ++_nextColumnToWrite) {
BufferedValue value = _buffered[_nextColumnToWrite];
if (value != null) {
_buffered[_nextColumnToWrite] = null;
value.write(this);
} else if (_nextColumnToWrite > 0) { // ) {
// note: write method triggers prepending of separator; but for missing
// values we need to do it explicitly.
appendColumnSeparator();
}
}
} else if (_nextColumnToWrite <= 0) { // empty line; do nothing
return;
}
// Any missing values?
if (_nextColumnToWrite < _columnCount) {
if (_cfgIncludeMissingTail) {
do {
appendColumnSeparator();
} while (++_nextColumnToWrite < _columnCount);
}
}
// write line separator
_nextColumnToWrite = 0;
if ((_outputTail + _cfgLineSeparatorLength) > _outputEnd) {
_flushBuffer();
}
System.arraycopy(_cfgLineSeparator, 0, _outputBuffer, _outputTail, _cfgLineSeparatorLength);
_outputTail += _cfgLineSeparatorLength;
}
/*
/**********************************************************************
/* Writer API, writes via buffered values
/**********************************************************************
*/
protected void appendValue(String value) throws JacksonException
{
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
appendColumnSeparator();
}
// First: determine if we need quotes; simple heuristics;
// only check for short Strings, stop if something found
final int len = value.length();
if (_cfgAlwaysQuoteStrings || _mayNeedQuotes(value, len, _nextColumnToWrite)) {
if (_cfgEscapeCharacter > 0) {
_writeQuotedAndEscaped(value, (char) _cfgEscapeCharacter);
} else {
_writeQuoted(value);
}
} else {
writeRaw(value);
}
}
protected void appendRawValue(String value) throws JacksonException
{
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
appendColumnSeparator();
}
writeRaw(value);
}
protected void appendValue(int value) throws JacksonException
{
// up to 10 digits and possible minus sign, leading comma, possible quotes
if ((_outputTail + 14) > _outputEnd) {
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
_outputBuffer[_outputTail++] = _cfgColumnSeparator;
}
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
}
_outputTail = NumberOutput.outputInt(value, _outputBuffer, _outputTail);
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
}
}
protected void appendValue(long value) throws JacksonException
{
// up to 20 digits, minus sign, leading comma, possible quotes
if ((_outputTail + 24) > _outputEnd) {
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
_outputBuffer[_outputTail++] = _cfgColumnSeparator;
}
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
}
_outputTail = NumberOutput.outputLong(value, _outputBuffer, _outputTail);
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
}
}
protected void appendValue(float value) throws JacksonException
{
String str = NumberOutput.toString(value, _cfgUseFastDoubleWriter);
final int len = str.length();
if ((_outputTail + len) >= _outputEnd) { // >= to include possible comma too
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
_outputBuffer[_outputTail++] = _cfgColumnSeparator;
}
writeNumber(str);
}
protected void appendValue(double value) throws JacksonException
{
String str = NumberOutput.toString(value, _cfgUseFastDoubleWriter);
final int len = str.length();
if ((_outputTail + len) >= _outputEnd) { // >= to include possible comma too
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
_outputBuffer[_outputTail++] = _cfgColumnSeparator;
}
writeNumber(str);
}
// @since 2.16: pre-encoded BigInteger/BigDecimal value
protected void appendNumberValue(String numStr) throws JacksonException
{
// Same as "appendRawValue()", except may want quoting
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
appendColumnSeparator();
}
writeNumber(numStr);
}
protected void appendValue(boolean value) throws JacksonException {
_append(value ? TRUE_CHARS : FALSE_CHARS);
}
protected void appendNull() throws JacksonException {
_append(_cfgNullValue);
}
protected void _append(char[] ch) throws JacksonException {
final int len = ch.length;
if ((_outputTail + len) >= _outputEnd) { // >= to include possible comma too
_flushBuffer();
}
if (_nextColumnToWrite > 0) {
_outputBuffer[_outputTail++] = _cfgColumnSeparator;
}
if (len > 0) {
System.arraycopy(ch, 0, _outputBuffer, _outputTail, len);
}
_outputTail += len;
}
protected void appendColumnSeparator() throws JacksonException {
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
_outputBuffer[_outputTail++] = _cfgColumnSeparator;
}
/*
/**********************************************************************
/* Output methods, unprocessed ("raw")
/**********************************************************************
*/
public void writeRaw(String text) throws JacksonException
{
// Nothing to check, can just output as is
int len = text.length();
int room = _outputEnd - _outputTail;
if (room == 0) {
_flushBuffer();
room = _outputEnd - _outputTail;
}
// But would it nicely fit in? If yes, it's easy
if (room >= len) {
text.getChars(0, len, _outputBuffer, _outputTail);
_outputTail += len;
} else {
writeRawLong(text);
}
}
public void writeRaw(String text, int start, int len) throws JacksonException
{
// Nothing to check, can just output as is
int room = _outputEnd - _outputTail;
if (room < len) {
_flushBuffer();
room = _outputEnd - _outputTail;
}
// But would it nicely fit in? If yes, it's easy
if (room >= len) {
text.getChars(start, start+len, _outputBuffer, _outputTail);
_outputTail += len;
} else {
writeRawLong(text.substring(start, start+len));
}
}
public void writeRaw(char[] text, int offset, int len) throws JacksonException
{
// Only worth buffering if it's a short write?
if (len < SHORT_WRITE) {
int room = _outputEnd - _outputTail;
if (len > room) {
_flushBuffer();
}
System.arraycopy(text, offset, _outputBuffer, _outputTail, len);
_outputTail += len;
return;
}
// Otherwise, better just pass through:
_flushBuffer();
try {
_out.write(text, offset, len);
} catch (IOException e) {
throw _wrapIOFailure(e);
}
}
public void writeRaw(char c) throws JacksonException
{
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
_outputBuffer[_outputTail++] = c;
}
private void writeRawLong(String text) throws JacksonException
{
int room = _outputEnd - _outputTail;
// If not, need to do it by looping
text.getChars(0, room, _outputBuffer, _outputTail);
_outputTail += room;
_flushBuffer();
int offset = room;
int len = text.length() - room;
while (len > _outputEnd) {
int amount = _outputEnd;
text.getChars(offset, offset+amount, _outputBuffer, 0);
_outputTail = amount;
_flushBuffer();
offset += amount;
len -= amount;
}
// And last piece (at most length of buffer)
text.getChars(offset, offset+len, _outputBuffer, 0);
_outputTail = len;
}
// @since 2.16
private void writeNumber(String text) throws JacksonException
{
final int len = text.length();
if ((_outputTail + len + 2) > _outputEnd) {
_flushBuffer();
}
if (_cfgAlwaysQuoteNumbers) {
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
text.getChars(0, len, _outputBuffer, _outputTail);
_outputTail += len;
_outputBuffer[_outputTail++] = (char) _cfgQuoteCharacter;
} else {
text.getChars(0, len, _outputBuffer, _outputTail);
_outputTail += len;
}
}
/*
/**********************************************************************
/* Output methods, with quoting and escaping
/**********************************************************************
*/
public void _writeQuoted(String text) throws JacksonException
{
final int[] escCodes = _outputEscapes;
final int escLen = escCodes.length;
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
// NOTE: caller should guarantee quote char is valid (not -1) at this point:
final char q = (char) _cfgQuoteCharacter;
_outputBuffer[_outputTail++] = q;
// simple case: if we have enough room, no need for boundary checks
final int len = text.length();
if ((_outputTail + len + len) >= _outputEnd) {
_writeLongQuoted(text, q);
return;
}
// 22-Jan-2015, tatu: Common case is that of no quoting needed, so let's
// make a speculative copy, then scan
// 06-Nov-2015, tatu: Not sure if copy actually improves perf; it did with
// older JVMs (1.5 at least), but not sure about 1.8 and later
final char[] buf = _outputBuffer;
int ptr = _outputTail;
text.getChars(0, len, buf, ptr);
final int end = ptr+len;
for (; ptr < end; ++ptr) {
char c = buf[ptr];
// see if any of the characters need escaping.
// if yes, fall back to the more convoluted write method
if ((c == q) || (c < escLen && escCodes[c] != 0)) {
break; // for
}
}
if (ptr == end) { // all good, no quoting or escaping!
_outputBuffer[ptr] = q;
_outputTail = ptr+1;
} else { // doh. do need quoting
_writeQuoted(text, q, ptr - _outputTail);
}
}
protected void _writeQuoted(String text, char q, int i) throws JacksonException
{
final int[] escCodes = _outputEscapes;
final int escLen = escCodes.length;
final char[] buf = _outputBuffer;
_outputTail += i;
final int len = text.length();
for (; i < len; ++i) {
char c = text.charAt(i);
if (c < escLen) {
int escCode = escCodes[c];
if (escCode != 0) { // for escape control and double quotes, c will be 0
_appendCharacterEscape(c, escCode);
continue; // for
}
}
if (c == q) { // double up
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
buf[_outputTail++] = _cfgQuoteCharEscapeChar; // this will be the quote
}
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
buf[_outputTail++] = c;
}
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
buf[_outputTail++] = q;
}
private final void _writeLongQuoted(String text, char q) throws JacksonException
{
final int[] escCodes = _outputEscapes;
final int escLen = escCodes.length;
final int len = text.length();
for (int i = 0; i < len; ++i) {
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
char c = text.charAt(i);
if (c < escLen) {
int escCode = escCodes[c];
if (escCode != 0) { // for escape control and double quotes, c will be 0
_appendCharacterEscape(c, escCode);
continue; // for
}
}
if (c == q) { // double up
_outputBuffer[_outputTail++] = _cfgQuoteCharEscapeChar;
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
}
_outputBuffer[_outputTail++] = c;
}
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
_outputBuffer[_outputTail++] = q;
}
public void _writeQuotedAndEscaped(String text, char esc) throws JacksonException
{
final int[] escCodes = _outputEscapes;
final int escLen = escCodes.length;
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
// NOTE: caller should guarantee quote char is valid (not -1) at this point:
final char q = (char) _cfgQuoteCharacter;
_outputBuffer[_outputTail++] = q;
final int len = text.length();
if ((_outputTail + len + len) >= _outputEnd) {
_writeLongQuotedAndEscaped(text, esc);
return;
}
final char[] buf = _outputBuffer;
int ptr = _outputTail;
text.getChars(0, len, buf, ptr);
final int end = ptr+len;
for (; ptr < end; ++ptr) {
char c = buf[ptr];
if ((c == q) || (c == esc) || (c < escLen && escCodes[c] != 0)) {
break;
}
}
if (ptr == end) { // all good, no quoting or escaping!
_outputBuffer[ptr] = q;
_outputTail = ptr+1;
} else { // quoting AND escaping
_writeQuotedAndEscaped(text, q, esc, ptr - _outputTail);
}
}
protected void _writeQuotedAndEscaped(String text, char q, char esc, int i) throws JacksonException
{
final int[] escCodes = _outputEscapes;
final int escLen = escCodes.length;
final char[] buf = _outputBuffer;
_outputTail += i;
final int len = text.length();
for (; i < len; ++i) {
char c = text.charAt(i);
if (c < escLen) {
int escCode = escCodes[c];
if (escCode != 0) { // for escape control and double quotes, c will be 0
_appendCharacterEscape(c, escCode);
continue; // for
}
}
if (c == q) { // double up
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
_outputBuffer[_outputTail++] = _cfgQuoteCharEscapeChar;
} else if (c == esc) { // double up
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
_outputBuffer[_outputTail++] = _cfgControlCharEscapeChar;
}
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
buf[_outputTail++] = c;
}
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
buf[_outputTail++] = q;
}
private final void _writeLongQuotedAndEscaped(String text, char esc) throws JacksonException
{
final int[] escCodes = _outputEscapes;
final int escLen = escCodes.length;
final int len = text.length();
// NOTE: caller should guarantee quote char is valid (not -1) at this point:
final char q = (char) _cfgQuoteCharacter;
// 23-Sep-2020, tatu: Why was this defined but not used? Commented out in 2.11.3
// final char quoteEscape = _cfgEscapeQuoteCharWithEscapeChar ? esc : q;
for (int i = 0; i < len; ++i) {
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
char c = text.charAt(i);
if (c < escLen) {
int escCode = escCodes[c];
if (escCode != 0) { // for escape control and double quotes, c will be 0
_appendCharacterEscape(c, escCode);
continue; // for
}
}
if (c == q) { // double up
_outputBuffer[_outputTail++] = _cfgQuoteCharEscapeChar;
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
} else if (c == esc) { // double up
_outputBuffer[_outputTail++] = _cfgControlCharEscapeChar;
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
}
_outputBuffer[_outputTail++] = c;
}
if (_outputTail >= _outputEnd) {
_flushBuffer();
}
_outputBuffer[_outputTail++] = q;
}
/*
/**********************************************************************
/* Writer API, state changes
/**********************************************************************
*/
public void flush(boolean flushStream) throws IOException
{
_flushBuffer();
if (flushStream) {
_out.flush();
}
}
public void close(boolean autoClose, boolean flushStream) throws IOException
{
// May need to remove the linefeed appended after the last row written
// (if not yet done)
if (!CsvWriteFeature.WRITE_LINEFEED_AFTER_LAST_ROW.enabledIn(_csvFeatures)) {
_removeTrailingLF();
}
try {
_flushBuffer();
} finally {
if (autoClose) {
_out.close();
} else if (flushStream) {
// If we can't close it, we should at least flush
_out.flush();
}
// Internal buffer(s) generator has can now be released as well
_releaseBuffers();
}
}
private void _removeTrailingLF() throws IOException {
if (!_trailingLFRemoved) {
_trailingLFRemoved = true;
// Remove trailing LF if (but only if) it appears to be in output
// buffer (may not be possible if `flush()` has been called)
_outputTail = Math.max(0, _outputTail - _cfgLineSeparatorLength);
}
}
/*
/**********************************************************************
/* Internal methods
/**********************************************************************
*/
/**
* Helper method that determines whether given String is likely
* to require quoting; check tries to optimize for speed.
*/
protected boolean _mayNeedQuotes(String value, int length, int columnIndex)
{
// 21-Mar-2014, tatu: If quoting disabled, don't quote
if (_cfgQuoteCharacter < 0) {
return false;
}
// [dataformats-text#210]: check for leading/trailing whitespace
if (_cfgQuoteLeadingTrailingWhitespace && length > 0) {
char first = value.charAt(0);
char last = value.charAt(length - 1);
if (first <= ' ' || last <= ' ') {
return true;
}
}
// may skip checks unless we want exact checking
if (_cfgOptimalQuoting) {
// 31-Dec-2014, tatu: Comment lines start with # so quote if starts with #
// 28-May-2021, tatu: As per [dataformats-text#270] only check if first column
if (_cfgAllowsComments && (columnIndex == 0)
&& (length > 0) && (value.charAt(0) == '#')) {
return true;
}
if (_cfgEscapeCharacter > 0) {
return _needsQuotingStrict(value, _cfgEscapeCharacter);
}
return _needsQuotingStrict(value);
}
if (length > _cfgMaxQuoteCheckChars) {
return true;
}
if (_cfgEscapeCharacter > 0) {
return _needsQuotingLoose(value, _cfgEscapeCharacter);
}
if (_cfgAlwaysQuoteEmptyStrings && length == 0) {
return true;
}
return _needsQuotingLoose(value);
}
/**
* @since 2.4
*/
protected final boolean _needsQuotingLoose(String value)
{
char esc1 = _cfgQuoteCharEscapeChar;
char esc2 = _cfgControlCharEscapeChar;
for (int i = 0, len = value.length(); i < len; ++i) {
char c = value.charAt(i);
if ((c < _cfgMinSafeChar)
|| (c == esc1)
|| (c == esc2)) {
return true;
}
}
return false;
}
protected final boolean _needsQuotingLoose(String value, int esc)
{
for (int i = 0, len = value.length(); i < len; ++i) {
int ch = value.charAt(i);
if ((ch < _cfgMinSafeChar) || (ch == esc)) {
return true;
}
}
return false;
}
protected boolean _needsQuotingStrict(String value)
{
final int minSafe = _cfgMinSafeChar;
final int[] escCodes = _outputEscapes;
final int escLen = escCodes.length;
// 23-Sep-2020, tatu: [dataformats-text#217] Must also ensure line separator
// leads to quoting
// 17-Dec-2025, tatu: [dataformats-text#479] Must check for ALL line separators
// (LF, CR) per RFC 4180, not just the configured schema line separator.
// NOTE: escCodes is empty by default (unless custom CharacterEscapes provided),
// so we cannot rely on `escCodes[c] != 0` check to catch newlines - we must
// check for them explicitly.
final int lfFirst = (_cfgLineSeparatorLength == 0) ? 0 : _cfgLineSeparator[0];
for (int i = 0, len = value.length(); i < len; ++i) {
int c = value.charAt(i);
if (c < minSafe) {
if (c == _cfgColumnSeparator || c == _cfgQuoteCharacter
|| (c < escLen && escCodes[c] != 0)
|| (c == lfFirst)
// Per RFC 4180: must quote if contains LF or CR
|| (c == '\n') || (c == '\r')) {
return true;
}
}
}
return false;
}
protected boolean _needsQuotingStrict(String value, int esc)
{
final int minSafe = _cfgMinSafeChar;
final int[] escCodes = _outputEscapes;
final int escLen = escCodes.length;
// 23-Sep-2020, tatu: [dataformats-text#217] Must also ensure line separator
// leads to quoting
// 17-Dec-2025, tatu: [dataformats-text#479] Must check for ALL line separators
// (LF, CR) per RFC 4180, not just the configured schema line separator.
// NOTE: escCodes is empty by default (unless custom CharacterEscapes provided),
// so we cannot rely on `escCodes[c] != 0` check to catch newlines - we must
// check for them explicitly.
final int lfFirst = (_cfgLineSeparatorLength == 0) ? 0 : _cfgLineSeparator[0];
for (int i = 0, len = value.length(); i < len; ++i) {
int c = value.charAt(i);
if (c < minSafe) {
if (c == _cfgColumnSeparator || c == _cfgQuoteCharacter
|| (c < escLen && escCodes[c] != 0)
|| (c == lfFirst)
// Per RFC 4180: must quote if contains LF or CR
|| (c == '\n') || (c == '\r')) {
return true;
}
} else if (c == esc) {
return true;
}
}
return false;
}
protected void _buffer(int index, BufferedValue v)
{
_lastBuffered = Math.max(_lastBuffered, index);
if (index >= _buffered.length) {
_buffered = Arrays.copyOf(_buffered, Math.max(index+1, _columnCount));
}
_buffered[index] = v;
}
protected void _flushBuffer() throws JacksonException
{
if (_outputTail > 0) {
_charsWritten += _outputTail;
try {
_out.write(_outputBuffer, 0, _outputTail);
} catch (IOException e) {
throw _wrapIOFailure(e);
}
_outputTail = 0;
}
}
public void _releaseBuffers()
{
char[] buf = _outputBuffer;
if (buf != null && _bufferRecyclable) {
_outputBuffer = null;
_ioContext.releaseConcatBuffer(buf);
}
}
/**
* Method called to append escape sequence for given character, at the
* end of standard output buffer; or if not possible, write out directly.
*/
private void _appendCharacterEscape(char ch, int escCode)
throws JacksonException
{
if (escCode >= 0) { // \\N (2 char)
if ((_outputTail + 2) > _outputEnd) {
_flushBuffer();
}
_outputBuffer[_outputTail++] = _cfgControlCharEscapeChar;
_outputBuffer[_outputTail++] = (char) escCode;
return;
}
if ((_outputTail + 5) >= _outputEnd) {
_flushBuffer();
}
int ptr = _outputTail;
char[] buf = _outputBuffer;
buf[ptr++] = '\\';
buf[ptr++] = 'u';
// We know it's a control char, so only the last 2 chars are non-0
if (ch > 0xFF) { // beyond 8 bytes
int hi = (ch >> 8) & 0xFF;
buf[ptr++] = HEX_CHARS[hi >> 4];
buf[ptr++] = HEX_CHARS[hi & 0xF];
ch &= 0xFF;
} else {
buf[ptr++] = '0';
buf[ptr++] = '0';
}
buf[ptr++] = HEX_CHARS[ch >> 4];
buf[ptr++] = HEX_CHARS[ch & 0xF];
_outputTail = ptr;
return;
}
// @since 3.0: defined by basic JsonParser/JsonGenerator but since we are
// not extending need to copy here
protected JacksonException _wrapIOFailure(IOException e) {
return JacksonIOException.construct(e);
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/impl/CsvParserBootstrapper.java
================================================
package tools.jackson.dataformat.csv.impl;
import java.io.*;
import tools.jackson.core.*;
import tools.jackson.core.exc.JacksonIOException;
import tools.jackson.core.io.IOContext;
import tools.jackson.core.io.MergedStream;
import tools.jackson.core.io.UTF32Reader;
import tools.jackson.dataformat.csv.CsvParser;
import tools.jackson.dataformat.csv.CsvSchema;
/**
* This class is used to determine the encoding of byte stream
* that is to contain CSV document.
* Since there is no real specification for how this should work
* with CSV, it will be based on rules used with JSON (which themselves
* are similar to those used with XML); main points are to check for
* BOM first, then look for multi-byted fixed-length encodings
* (UTF-16, UTF-32). And finally, if neither found, must decide
* between most likely alternatives, UTF-8 and Latin-1.
*/
public final class CsvParserBootstrapper
{
final static byte UTF8_BOM_1 = (byte) 0xEF;
final static byte UTF8_BOM_2 = (byte) 0xBB;
final static byte UTF8_BOM_3 = (byte) 0xBF;
/*
/**********************************************************************
/* Configuration
/**********************************************************************
*/
protected final IOContext _context;
/*
/**********************************************************************
/* Input buffering
/**********************************************************************
*/
protected final InputStream _in;
protected final byte[] _inputBuffer;
private int _inputPtr;
private int _inputEnd;
/*
/**********************************************************************
/* Input location
/**********************************************************************
*/
/**
* Current number of input units (bytes or chars) that were processed in
* previous blocks,
* before contents of current input buffer.
*
* Note: includes possible BOMs, if those were part of the input.
*/
protected int _inputProcessed;
/*
/**********************************************************************
/* Data gathered
/**********************************************************************
*/
protected boolean _bigEndian = true;
protected int _bytesPerChar = 0; // 0 means "dunno yet"
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public CsvParserBootstrapper(IOContext ctxt, InputStream in)
{
_context = ctxt;
_in = in;
_inputBuffer = ctxt.allocReadIOBuffer();
_inputEnd = _inputPtr = 0;
_inputProcessed = 0;
}
public CsvParserBootstrapper(IOContext ctxt,
byte[] inputBuffer, int inputStart, int inputLen)
{
_context = ctxt;
_in = null;
_inputBuffer = inputBuffer;
_inputPtr = inputStart;
_inputEnd = (inputStart + inputLen);
// Need to offset this for correct location info
_inputProcessed = -inputStart;
}
/*
/**********************************************************************
/* Public API
/**********************************************************************
*/
public CsvParser constructParser(ObjectReadContext readCtxt,
int parserFeatures, int csvFeatures,
CsvSchema schema)
throws JacksonException
{
boolean foundEncoding = false;
// First things first: BOM handling
if (ensureLoaded(4)) {
int quad = (_inputBuffer[_inputPtr] << 24)
| ((_inputBuffer[_inputPtr+1] & 0xFF) << 16)
| ((_inputBuffer[_inputPtr+2] & 0xFF) << 8)
| (_inputBuffer[_inputPtr+3] & 0xFF);
if (handleBOM(quad)) {
foundEncoding = true;
} else {
/* If no BOM, let's see if it's a fixed-width multi-byte
* (since we can be fairly certain no CSV document should
* start with null bytes otherwise...)
*/
// UTF-32?
if (checkUTF32(quad)) {
foundEncoding = true;
} else if (checkUTF16(quad >>> 16)) {
foundEncoding = true;
}
}
} else if (ensureLoaded(2)) {
int i16 = ((_inputBuffer[_inputPtr] & 0xFF) << 8)
| (_inputBuffer[_inputPtr+1] & 0xFF);
if (checkUTF16(i16)) {
foundEncoding = true;
}
}
JsonEncoding enc;
/* Not found yet? As per specs, this means it must be UTF-8. */
if (!foundEncoding || _bytesPerChar == 1) {
enc = JsonEncoding.UTF8;
} else if (_bytesPerChar == 2) {
enc = _bigEndian ? JsonEncoding.UTF16_BE : JsonEncoding.UTF16_LE;
} else if (_bytesPerChar == 4) {
enc = _bigEndian ? JsonEncoding.UTF32_BE : JsonEncoding.UTF32_LE;
} else {
throw new RuntimeException("Internal error"); // should never get here
}
_context.setEncoding(enc);
final boolean autoClose = _context.isResourceManaged()
|| StreamReadFeature.AUTO_CLOSE_SOURCE.enabledIn(parserFeatures);
return new CsvParser(readCtxt, _context,
parserFeatures, csvFeatures, schema,
_createReader(enc, autoClose));
}
@SuppressWarnings("resource")
private Reader _createReader(JsonEncoding enc, boolean autoClose)
throws JacksonException
{
switch (enc) {
case UTF32_BE:
case UTF32_LE:
return new UTF32Reader(_context, _in, autoClose,
_inputBuffer, _inputPtr, _inputEnd, enc.isBigEndian());
case UTF16_BE:
case UTF16_LE:
{
// First: do we have a Stream? If not, need to create one:
InputStream in = _in;
if (in == null) {
in = new ByteArrayInputStream(_inputBuffer, _inputPtr, _inputEnd);
} else {
// Also, if we have any read but unused input (usually true),
// need to merge that input in:
if (_inputPtr < _inputEnd) {
in = new MergedStream(_context, in, _inputBuffer, _inputPtr, _inputEnd);
}
}
try {
return new InputStreamReader(in, enc.getJavaName());
} catch (IOException e) {
throw _wrapIOFailure(e);
}
}
case UTF8:
// Important: do not pass context, if we got byte[], nothing to release
return new UTF8Reader((_in == null) ? null : _context, _in, autoClose,
_inputBuffer, _inputPtr, _inputEnd - _inputPtr);
default:
throw new RuntimeException();
}
}
/*
/**********************************************************************
/* Internal methods, parsing
/**********************************************************************
*/
/**
* @return True if a BOM was successfully found, and encoding
* thereby recognized.
*/
private boolean handleBOM(int quad) throws JacksonException
{
// Handling of (usually) optional BOM (required for
// multi-byte formats); first 32-bit charsets:
switch (quad) {
case 0x0000FEFF:
_bigEndian = true;
_inputPtr += 4;
_bytesPerChar = 4;
return true;
case 0xFFFE0000: // UCS-4, LE?
_inputPtr += 4;
_bytesPerChar = 4;
_bigEndian = false;
return true;
case 0x0000FFFE: // UCS-4, in-order...
reportWeirdUCS4("2143"); // throws exception
case 0xFEFF0000: // UCS-4, in-order...
reportWeirdUCS4("3412"); // throws exception
}
// Ok, if not, how about 16-bit encoding BOMs?
int msw = quad >>> 16;
if (msw == 0xFEFF) { // UTF-16, BE
_inputPtr += 2;
_bytesPerChar = 2;
_bigEndian = true;
return true;
}
if (msw == 0xFFFE) { // UTF-16, LE
_inputPtr += 2;
_bytesPerChar = 2;
_bigEndian = false;
return true;
}
// And if not, then UTF-8 BOM?
if ((quad >>> 8) == 0xEFBBBF) { // UTF-8
_inputPtr += 3;
_bytesPerChar = 1;
_bigEndian = true; // doesn't really matter
return true;
}
return false;
}
private boolean checkUTF32(int quad) throws JacksonException
{
/* Handling of (usually) optional BOM (required for
* multi-byte formats); first 32-bit charsets:
*/
if ((quad >> 8) == 0) { // 0x000000?? -> UTF32-BE
_bigEndian = true;
} else if ((quad & 0x00FFFFFF) == 0) { // 0x??000000 -> UTF32-LE
_bigEndian = false;
} else if ((quad & ~0x00FF0000) == 0) { // 0x00??0000 -> UTF32-in-order
reportWeirdUCS4("3412");
} else if ((quad & ~0x0000FF00) == 0) { // 0x0000??00 -> UTF32-in-order
reportWeirdUCS4("2143");
} else {
// Can not be valid UTF-32 encoded JSON...
return false;
}
// Not BOM (just regular content), nothing to skip past:
//_inputPtr += 4;
_bytesPerChar = 4;
return true;
}
private boolean checkUTF16(int i16)
{
if ((i16 & 0xFF00) == 0) { // UTF-16BE
_bigEndian = true;
} else if ((i16 & 0x00FF) == 0) { // UTF-16LE
_bigEndian = false;
} else { // nope, not UTF-16
return false;
}
// Not BOM (just regular content), nothing to skip past:
//_inputPtr += 2;
_bytesPerChar = 2;
return true;
}
/*
/**********************************************************************
/* Internal methods, problem reporting
/**********************************************************************
*/
private void reportWeirdUCS4(String type) throws JacksonException {
throw _createIOFailure("Unsupported UCS-4 endianness ("+type+") detected");
}
/*
/**********************************************************************
/* Internal methods, raw input access
/**********************************************************************
*/
protected boolean ensureLoaded(int minimum) throws JacksonException
{
/* Let's assume here buffer has enough room -- this will always
* be true for the limited used this method gets
*/
int gotten = (_inputEnd - _inputPtr);
while (gotten < minimum) {
int count;
if (_in == null) { // block source
count = -1;
} else {
try {
count = _in.read(_inputBuffer, _inputEnd, _inputBuffer.length - _inputEnd);
} catch (IOException e) {
throw _wrapIOFailure(e);
}
}
if (count < 1) {
return false;
}
_inputEnd += count;
gotten += count;
}
return true;
}
/*
/**********************************************************************
/* Internal methods, exception handling
/**********************************************************************
*/
private static JacksonException _createIOFailure(String msg) throws JacksonException {
// 12-Jan-2021, tatu: Couple of alternatives, but since this is before
// actual parser created, seems best to simply fake this was "true"
// IOException
return _wrapIOFailure(new IOException(msg));
}
private static JacksonException _wrapIOFailure(IOException e) throws JacksonException {
return JacksonIOException.construct(e);
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/impl/UTF8Reader.java
================================================
package tools.jackson.dataformat.csv.impl;
import java.io.*;
import java.util.Objects;
import tools.jackson.core.io.IOContext;
/**
* Optimized Reader that reads UTF-8 encoded content from an input stream.
* In addition to doing (hopefully) optimal conversion, it can also take
* array of "pre-read" (leftover) bytes; this is necessary when preliminary
* stream/reader is trying to figure out underlying character encoding.
*/
public final class UTF8Reader
extends Reader
{
/**
* IO context to use for returning input buffer, iff
* buffer is to be recycled when input ends.
*/
private final IOContext _ioContext;
private InputStream _inputSource;
private final boolean _autoClose;
private byte[] _inputBuffer;
/**
* Flag set to indicate {@code inputBuffer} is read-only, and its
* content should not be modified. This is the case when caller
* has passed in a buffer of contents already read, instead of Jackson
* allocating read buffer.
*
* @since 2.19
*/
private final boolean _inputBufferReadOnly;
/**
* Pointer to the next available byte (if any), iff less than
* mByteBufferEnd
*/
private int _inputPtr;
/**
* Pointed to the end marker, that is, position one after the last
* valid available byte.
*/
private int _inputEnd;
/**
* Decoded first character of a surrogate pair, if one needs to be buffered
*/
private int _surrogate = -1;
/**
* Total read character count; used for error reporting purposes
*/
private int _charCount = 0;
/**
* Total read byte count; used for error reporting purposes
*/
private int _byteCount = 0;
/**
* Flag that is set when a pending decode error has been detected; needed
* to properly handle deferred reporting.
*/
private int _decodeErrorOffset;
/*
/**********************************************************************
/* Life-cycle
/**********************************************************************
*/
public UTF8Reader(IOContext ctxt, InputStream in, boolean autoClose,
byte[] buf, int ptr, int len)
{
super((in == null) ? buf : in);
_ioContext = ctxt;
_inputSource = in;
_inputBuffer = buf;
_inputPtr = ptr;
_inputEnd = ptr+len;
_autoClose = autoClose;
// Unmodifiable if there is no stream to actually read from
// (ideally caller should pass explicitly)
_inputBufferReadOnly = (in == null);
}
public UTF8Reader(IOContext ctxt, byte[] buf, int ptr, int len)
{
super(new Object());
_ioContext = ctxt;
_inputSource = null;
_inputBuffer = buf;
_inputPtr = ptr;
_inputEnd = ptr+len;
_autoClose = true;
// This is the case when we have a buffer of contents already read
_inputBufferReadOnly = true;
}
public UTF8Reader(IOContext ctxt, InputStream in, boolean autoClose)
{
super(in);
_ioContext = ctxt;
_inputSource = in;
_inputBuffer = ctxt.allocReadIOBuffer();
_inputPtr = 0;
_inputEnd = 0;
_autoClose = autoClose;
// Buffer allocated above, modifiable as needed
_inputBufferReadOnly = false;
}
/*
/**********************************************************************
/* Reader API
/**********************************************************************
*/
@Override
public void close() throws IOException
{
InputStream in = _inputSource;
if (in != null) {
_inputSource = null;
if (_autoClose) {
in.close();
}
}
freeBuffers();
}
private char[] _tmpBuffer = null;
/**
* Although this method is implemented by the base class, AND it should
* never be called by parser code, let's still implement it bit more
* efficiently just in case
*/
@Override
public int read() throws IOException
{
if (_tmpBuffer == null) {
_tmpBuffer = new char[1];
}
if (read(_tmpBuffer, 0, 1) < 1) {
return -1;
}
return _tmpBuffer[0];
}
@Override
public int read(char[] cbuf) throws IOException {
return read(cbuf, 0, cbuf.length);
}
@Override
public int read(final char[] cbuf, final int start, int len) throws IOException
{
// validate input parameters
Objects.requireNonNull(cbuf, "cbuf == null");
Objects.checkFromIndexSize(start, len, cbuf.length);
// Already EOF?
if (_inputBuffer == null) {
return -1;
} else if (len == 0) {
// if len=0, we don't need to return anything
return 0;
}
len += start;
int outPtr = start;
// Ok, first; do we have a surrogate from last round?
if (_surrogate >= 0) {
cbuf[outPtr++] = (char) _surrogate;
_surrogate = -1;
// No need to load more, already got one char
// 15-Sep-2022, tatu: But need to avoid having empty buffer
if (_inputPtr >= _inputEnd) {
_charCount += 1;
return 1;
}
// otherwise let things work the way they should
} else {
if (_decodeErrorOffset != 0) {
reportDeferredInvalid();
}
// To prevent unnecessary blocking (esp. with network streams),
// we'll only require decoding of a single char
int left = (_inputEnd - _inputPtr);
// So; only need to load more if we can't provide at least one more character.
// We need not do thorough check here, but let's check the common cases here:
// either completely empty buffer (left == 0), or one with less than max. byte
// count for a single char, and starting of a multi-byte encoding (this leaves
// possibility of a 2/3-byte char that is still fully accessible...
// but that can be checked by the load method)
if (left < 4) {
// Need to load more?
if (left < 1 || _inputBuffer[_inputPtr] < 0) {
if (!loadMore(left)) { // (legal) EOF?
return -1;
}
}
}
}
final byte[] buf = _inputBuffer;
int inPtr = _inputPtr;
final int inBufLen = _inputEnd;
main_loop:
while (outPtr < len) {
// At this point we have at least one byte available
int c = buf[inPtr++];
// Let's first do the quickie loop for common case; 7-bit ASCII
if (c >= 0) { // ASCII? can probably loop, then
cbuf[outPtr++] = (char) c; // ok since MSB is never on
// Ok, how many such chars could we safely process without overruns?
// (will combine 2 in-loop comparisons into just one)
int outMax = (len - outPtr); // max output
int inMax = (inBufLen - inPtr); // max input
int inEnd = inPtr + ((inMax < outMax) ? inMax : outMax);
ascii_loop:
while (true) {
if (inPtr >= inEnd) {
break main_loop;
}
c = buf[inPtr++];
if (c < 0) { // or multi-byte
break ascii_loop;
}
cbuf[outPtr++] = (char) c;
}
}
int needed;
// Ok; if we end here, we got multi-byte combination
if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF), from 110xxxxx
c = (c & 0x1F);
needed = 1;
} else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF), from 1110xxxx
c = (c & 0x0F);
needed = 2;
} else if ((c & 0xF8) == 0xF0) { // 4 bytes from 11110xxx (double-char w/ surrogates and all)
c = (c & 0x0F);
needed = 3;
} else {
reportInvalidInitial(c & 0xFF, outPtr-start);
// 25-Aug-2016, tatu: As per [dataformat-csv#132], only returns
// if we are ok to return already decoded content and defer error reporting
--inPtr;
break main_loop;
}
/* Do we have enough bytes? If not, let's just push back the
* byte and leave, since we have already gotten at least one
* char decoded. This way we will only block (with read from
* input stream) when absolutely necessary.
*/
if ((inBufLen - inPtr) < needed) {
--inPtr;
break main_loop;
}
int d = buf[inPtr];
if ((d & 0xC0) != 0x080) {
reportInvalidOther(d & 0xFF, outPtr-start, 2);
break main_loop;
}
++inPtr;
c = (c << 6) | (d & 0x3F);
if (needed > 1) { // needed == 1 means 2 bytes total
d = buf[inPtr]; // 3rd byte
if ((d & 0xC0) != 0x080) {
reportInvalidOther(d & 0xFF, outPtr-start, 3);
break main_loop;
}
++inPtr;
c = (c << 6) | (d & 0x3F);
if (needed > 2) { // 4 bytes? (need surrogates)
d = buf[inPtr];
if ((d & 0xC0) != 0x080) {
reportInvalidOther(d & 0xFF, outPtr-start, 4);
break main_loop;
}
++inPtr;
c = (c << 6) | (d & 0x3F);
/* Ugh. Need to mess with surrogates. Ok; let's inline them
* there, then, if there's room: if only room for one,
* need to save the surrogate for the rainy day...
*/
c -= 0x10000; // to normalize it starting with 0x0
cbuf[outPtr++] = (char) (0xD800 + (c >> 10));
// hmmh. can this ever be 0? (not legal, at least?)
c = (0xDC00 | (c & 0x03FF));
// Room for second part?
if (outPtr >= len) { // nope
_surrogate = c;
break main_loop;
}
// sure, let's fall back to normal processing:
}
// Otherwise, should we check that 3-byte chars are
// legal ones (should not expand to surrogates?
// For now, let's not...
/*
else {
if (c >= 0xD800 && c < 0xE000) {
reportInvalid(c, outPtr-start, "(a surrogate character) ");
}
}
*/
}
cbuf[outPtr++] = (char) c;
if (inPtr >= inBufLen) {
break main_loop;
}
}
_inputPtr = inPtr;
final int actualLen = outPtr - start;
_charCount += actualLen;
return actualLen;
}
/*
/**********************************************************************
/* Internal/package methods:
/**********************************************************************
*/
protected final InputStream getStream() { return _inputSource; }
/**
* Method for reading as many bytes from the underlying stream as possible
* (that fit in the buffer), to the beginning of the buffer.
*/
protected final int readBytes()
throws IOException
{
_inputPtr = 0;
_inputEnd = 0;
if (_inputSource != null) {
int count = _inputSource.read(_inputBuffer, 0, _inputBuffer.length);
if (count > 0) {
_inputEnd = count;
}
return count;
}
return -1;
}
/**
* Method for reading as many bytes from the underlying stream as possible
* (that fit in the buffer considering offset), to the specified offset.
*
* @return Number of bytes read, if any; -1 to indicate none available
* (that is, end of input)
*/
protected final int readBytesAt(int offset)
throws IOException
{
// shouldn't modify mBytePtr, assumed to be 'offset'
if (_inputSource != null) {
int count = _inputSource.read(_inputBuffer, offset, _inputBuffer.length - offset);
if (count > 0) {
_inputEnd += count;
}
return count;
}
return -1;
}
/**
* This method should be called along with (or instead of) normal
* close. After calling this method, no further reads should be tried.
* Method will try to recycle read buffers (if any).
*/
public final void freeBuffers()
{
if (_ioContext != null) {
byte[] buf = _inputBuffer;
if (buf != null) {
_inputBuffer = null;
_ioContext.releaseReadIOBuffer(buf);
}
}
}
/*
/**********************************************************************
/* Internal methods
/**********************************************************************
*/
/**
* @param available Number of "unused" bytes in the input buffer
*
* @return True, if enough bytes were read to allow decoding of at least
* one full character; false if EOF was encountered instead.
*/
private boolean loadMore(int available) throws IOException
{
_byteCount += (_inputEnd - available);
if (available > 0) {
// Should we move bytes to the beginning of buffer?
if (_inputPtr > 0) {
// Can only do so if buffer mutable
if (!_inputBufferReadOnly) {
for (int i = 0; i < available; ++i) {
_inputBuffer[i] = _inputBuffer[_inputPtr+i];
}
_inputPtr = 0;
_inputEnd = available;
}
}
} else {
// Ok; here we can actually reasonably expect an EOF, so let's do a separate read right away:
int count = readBytes();
if (count < 1) {
freeBuffers(); // to help GC?
if (count < 0) { // -1
return false;
}
// 0 count is no good; let's err out
reportStrangeStream();
}
}
// We now have at least one byte... and that allows us to
// calculate exactly how many bytes we need!
@SuppressWarnings("cast")
int c = (int) _inputBuffer[_inputPtr];
if (c >= 0) { // single byte (ascii) char... cool, can return
return true;
}
// Ok, a multi-byte char, let's check how many bytes we'll need:
int needed;
if ((c & 0xE0) == 0xC0) { // 2 bytes (0x0080 - 0x07FF)
needed = 2;
} else if ((c & 0xF0) == 0xE0) { // 3 bytes (0x0800 - 0xFFFF)
needed = 3;
} else if ((c & 0xF8) == 0xF0) {
// 4 bytes; double-char BS, with surrogates and all...
needed = 4;
} else {
// 25-Aug-2016, tatu: As per [dataformat-csv#132], let's not throw
// exception from here but let caller handle
return true;
}
// And then we'll just need to load up to that many bytes;
// if an EOF is hit, that'll be an error. But we need not do
// actual decoding here, just load enough bytes.
while ((_inputPtr + needed) > _inputEnd) {
int count = readBytesAt(_inputEnd);
if (count < 1) {
if (count < 0) { // -1, EOF... no good!
freeBuffers();
reportUnexpectedEOF(_inputEnd - _inputPtr, needed);
}
// 0 count is no good; let's err out
reportStrangeStream();
}
}
return true;
}
protected void reportBounds(char[] cbuf, int start, int len) throws IOException {
throw new ArrayIndexOutOfBoundsException("read(buf,"+start+","+len+"), cbuf["+cbuf.length+"]");
}
protected void reportStrangeStream() throws IOException {
throw new IOException("Strange I/O stream, returned 0 bytes on read");
}
protected void reportInvalidInitial(int mask, int outputDecoded) throws IOException
{
// 25-Aug-2016, tatu: As per [dataformat-csv#132] defer error reporting if
// (but only if) some content has been decoded successfully
if (_decodeErrorOffset == 0) {
if (outputDecoded > 0) {
_decodeErrorOffset = 1;
return;
}
}
// input (byte) ptr has been advanced by one, by now:
int bytePos = _byteCount + _inputPtr - 1;
int charPos = _charCount + outputDecoded + 1;
throw new CharConversionException(String.format(
"Invalid UTF-8 start byte 0x%s (at char #%d, byte #%d): check content encoding, does not look like UTF-8",
Integer.toHexString(mask), charPos, bytePos));
}
protected void reportInvalidOther(int mask, int outputDecoded, int errorPosition) throws IOException
{
// 25-Aug-2016, tatu: As per [dataformat-csv#132] defer error reporting if
// (but only if) some content has been decoded successfully
if (_decodeErrorOffset == 0) {
if (outputDecoded > 0) {
_decodeErrorOffset = errorPosition;
return;
}
}
int bytePos = _byteCount + _inputPtr - 1;
int charPos = _charCount + outputDecoded + 1;
throw new CharConversionException(String.format(
"Invalid UTF-8 middle byte 0x%s (at char #%d, byte #%d): check content encoding, does not look like UTF-8",
Integer.toHexString(mask), charPos, bytePos));
}
protected void reportDeferredInvalid() throws IOException
{
int ch = _inputBuffer[_inputPtr] & 0xFF;
if (_decodeErrorOffset == 1) {
reportInvalidInitial(ch, 0);
} else {
reportInvalidOther(ch, 0, _decodeErrorOffset);
}
}
protected void reportUnexpectedEOF(int gotBytes, int needed) throws IOException
{
int bytePos = _byteCount + gotBytes;
int charPos = _charCount;
throw new CharConversionException(String.format(
"Unexpected EOF in the middle of a multi-byte UTF-8 character: got %d, needed %d, at char #%d, byte #%d)",
gotBytes, needed, charPos, bytePos));
}
}
================================================
FILE: csv/src/main/java/tools/jackson/dataformat/csv/impl/UTF8Writer.java
================================================
package tools.jackson.dataformat.csv.impl;
import java.io.*;
import tools.jackson.core.io.IOContext;
/**
* Efficient UTF-8 backed writer.
*
* Note: original implementation based on writer from Jackson core package;
* modified slightly, copied to reduce dependency to impl details.
*/
public final class UTF8Writer
extends Writer
{
final private static int SURR1_FIRST = 0xD800;
final private static int SURR1_LAST = 0xDBFF;
final private static int SURR2_FIRST = 0xDC00;
final private static int SURR2_LAST = 0xDFFF;
final private IOContext _context;
private OutputStream _out;
private byte[] _outBuffer;
final private int _outBufferEnd;
private int _outPtr;
/**
* When outputting chars from BMP, surrogate pairs need to be coalesced.
* To do this, both pairs must be known first; and since it is possible
* pairs may be split, we need temporary storage for the first half
*/
private int _surrogate = 0;
public UTF8Writer(IOContext ctxt, OutputStream out)
{
_context = ctxt;
_out = out;
_outBuffer = ctxt.allocWriteEncodingBuffer();
// Max. expansion for a single char (in unmodified UTF-8) is 4 bytes (or 3 depending
// on how you view it -- 4 when recombining surrogate pairs)
_outBufferEnd = _outBuffer.length - 4;
_outPtr = 0;
}
@Override
public Writer append(char c) throws IOException
{
write(c);
return this;
}
@Override
public void close() throws IOException
{
if (_out != null) {
if (_outPtr > 0) {
_out.write(_outBuffer, 0, _outPtr);
_outPtr = 0;
}
OutputStream out = _out;
_out = null;
byte[] buf = _outBuffer;
if (buf != null) {
_outBuffer = null;
_context.releaseWriteEncodingBuffer(buf);
}
out.close();
/* Let's 'flush' orphan surrogate, no matter what; but only
* after cleanly closing everything else.
*/
int code = _surrogate;
_surrogate = 0;
if (code > 0) {
throwIllegal(code);
}
}
}
@Override
public void flush() throws IOException
{
if (_out != null) {
if (_outPtr > 0) {
_out.write(_outBuffer, 0, _outPtr);
_outPtr = 0;
}
_out.flush();
}
}
@Override
public void write(char[] cbuf) throws IOException {
write(cbuf, 0, cbuf.length);
}
@Override
public void write(char[] cbuf, int off, int len) throws IOException
{
if (len < 2) {
if (len == 1) {
write(cbuf[off]);
}
return;
}
// First: do we have a leftover surrogate to deal with?
if (_surrogate > 0) {
char second = cbuf[off++];
--len;
write(convertSurrogate(second));
// will have at least one more char
}
int outPtr = _outPtr;
byte[] outBuf = _outBuffer;
int outBufLast = _outBufferEnd; // has 4 'spare' bytes
// All right; can just loop it nice and easy now:
len += off; // len will now be the end of input buffer
output_loop:
for (; off < len; ) {
/* First, let's ensure we can output at least 4 bytes
* (longest UTF-8 encoded codepoint):
*/
if (outPtr >= outBufLast) {
_out.write(outBuf, 0, outPtr);
outPtr = 0;
}
int c = cbuf[off++];
// And then see if we have an ASCII char:
if (c < 0x80) { // If so, can do a tight inner loop:
outBuf[outPtr++] = (byte)c;
// Let's calc how many ascii chars we can copy at most:
int maxInCount = (len - off);
int maxOutCount = (outBufLast - outPtr);
if (maxInCount > maxOutCount) {
maxInCount = maxOutCount;
}
maxInCount += off;
ascii_loop:
while (true) {
if (off >= maxInCount) { // done with max. ascii seq
continue output_loop;
}
c = cbuf[off++];
if (c >= 0x80) {
break ascii_loop;
}
outBuf[outPtr++] = (byte) c;
}
}
// Nope, multi-byte:
if (c < 0x800) { // 2-byte
outBuf[outPtr++] = (byte) (0xc0 | (c >> 6));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
} else { // 3 or 4 bytes
// Surrogates?
if (c < SURR1_FIRST || c > SURR2_LAST) {
outBuf[outPtr++] = (byte) (0xe0 | (c >> 12));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
continue;
}
// Yup, a surrogate:
if (c > SURR1_LAST) { // must be from first range
_outPtr = outPtr;
throwIllegal(c);
}
_surrogate = c;
// and if so, followed by another from next range
if (off >= len) { // unless we hit the end?
break;
}
c = convertSurrogate(cbuf[off++]);
if (c > 0x10FFFF) { // illegal in JSON as well as in XML
_outPtr = outPtr;
throwIllegal(c);
}
outBuf[outPtr++] = (byte) (0xf0 | (c >> 18));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
}
}
_outPtr = outPtr;
}
@Override
public void write(int c) throws IOException
{
// First; do we have a left over surrogate?
if (_surrogate > 0) {
c = convertSurrogate(c);
// If not, do we start with a surrogate?
} else if (c >= SURR1_FIRST && c <= SURR2_LAST) {
// Illegal to get second part without first:
if (c > SURR1_LAST) {
throwIllegal(c);
}
// First part just needs to be held for now
_surrogate = c;
return;
}
if (_outPtr >= _outBufferEnd) { // let's require enough room, first
_out.write(_outBuffer, 0, _outPtr);
_outPtr = 0;
}
if (c < 0x80) { // ascii
_outBuffer[_outPtr++] = (byte) c;
} else {
int ptr = _outPtr;
if (c < 0x800) { // 2-byte
_outBuffer[ptr++] = (byte) (0xc0 | (c >> 6));
_outBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
} else if (c <= 0xFFFF) { // 3 bytes
_outBuffer[ptr++] = (byte) (0xe0 | (c >> 12));
_outBuffer[ptr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
_outBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
} else { // 4 bytes
if (c > 0x10FFFF) { // illegal
throwIllegal(c);
}
_outBuffer[ptr++] = (byte) (0xf0 | (c >> 18));
_outBuffer[ptr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
_outBuffer[ptr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
_outBuffer[ptr++] = (byte) (0x80 | (c & 0x3f));
}
_outPtr = ptr;
}
}
@Override
public void write(String str) throws IOException {
write(str, 0, str.length());
}
@Override
public void write(String str, int off, int len) throws IOException
{
if (len < 2) {
if (len == 1) {
write(str.charAt(off));
}
return;
}
// First: do we have a leftover surrogate to deal with?
if (_surrogate > 0) {
char second = str.charAt(off++);
--len;
write(convertSurrogate(second));
// will have at least one more char (case of 1 char was checked earlier on)
}
int outPtr = _outPtr;
byte[] outBuf = _outBuffer;
int outBufLast = _outBufferEnd; // has 4 'spare' bytes
// All right; can just loop it nice and easy now:
len += off; // len will now be the end of input buffer
output_loop:
for (; off < len; ) {
/* First, let's ensure we can output at least 4 bytes
* (longest UTF-8 encoded codepoint):
*/
if (outPtr >= outBufLast) {
_out.write(outBuf, 0, outPtr);
outPtr = 0;
}
int c = str.charAt(off++);
// And then see if we have an Ascii char:
if (c < 0x80) { // If so, can do a tight inner loop:
outBuf[outPtr++] = (byte)c;
// Let's calc how many ascii chars we can copy at most:
int maxInCount = (len - off);
int maxOutCount = (outBufLast - outPtr);
if (maxInCount > maxOutCount) {
maxInCount = maxOutCount;
}
maxInCount += off;
ascii_loop:
while (true) {
if (off >= maxInCount) { // done with max. ascii seq
continue output_loop;
}
c = str.charAt(off++);
if (c >= 0x80) {
break ascii_loop;
}
outBuf[outPtr++] = (byte) c;
}
}
// Nope, multi-byte:
if (c < 0x800) { // 2-byte
outBuf[outPtr++] = (byte) (0xc0 | (c >> 6));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
} else { // 3 or 4 bytes
// Surrogates?
if (c < SURR1_FIRST || c > SURR2_LAST) {
outBuf[outPtr++] = (byte) (0xe0 | (c >> 12));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
continue;
}
// Yup, a surrogate:
if (c > SURR1_LAST) { // must be from first range
_outPtr = outPtr;
throwIllegal(c);
}
_surrogate = c;
// and if so, followed by another from next range
if (off >= len) { // unless we hit the end?
break;
}
c = convertSurrogate(str.charAt(off++));
if (c > 0x10FFFF) { // illegal, as per RFC 4627
_outPtr = outPtr;
throwIllegal(c);
}
outBuf[outPtr++] = (byte) (0xf0 | (c >> 18));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f));
outBuf[outPtr++] = (byte) (0x80 | (c & 0x3f));
}
}
_outPtr = outPtr;
}
/*
/**********************************************************
/* Internal methods
/**********************************************************
*/
/**
* Method called to calculate UTF code point, from a surrogate pair.
*/
private int convertSurrogate(int secondPart) throws IOException
{
int firstPart = _surrogate;
_surrogate = 0;
// Ok, then, is the second part valid?
if (secondPart < SURR2_FIRST || secondPart > SURR2_LAST) {
throw new IOException("Broken surrogate pair: first char 0x"+Integer.toHexString(firstPart)+", second 0x"+Integer.toHexString(secondPart)+"; illegal combination");
}
return 0x10000 + ((firstPart - SURR1_FIRST) << 10) + (secondPart - SURR2_FIRST);
}
private void throwIllegal(int code) throws IOException
{
if (code > 0x10FFFF) { // over max?
throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output; max is 0x10FFFF as per RFC 4627");
}
if (code >= SURR1_FIRST) {
if (code <= SURR1_LAST) { // Unmatched first part (closing without second part?)
throw new IOException("Unmatched first part of surrogate pair (0x"+Integer.toHexString(code)+")");
}
throw new IOException("Unmatched second part of surrogate pair (0x"+Integer.toHexString(code)+")");
}
// should we ever get this?
throw new IOException("Illegal character point (0x"+Integer.toHexString(code)+") to output");
}
}
================================================
FILE: csv/src/main/resources/META-INF/LICENSE
================================================
This copy of Jackson JSON processor CSV module is licensed under the
Apache (Software) License, version 2.0 ("the License").
See the License for details about distribution rights, and the
specific rights regarding derivative works.
You may obtain a copy of the License at:
http://www.apache.org/licenses/LICENSE-2.0
================================================
FILE: csv/src/main/resources/META-INF/NOTICE
================================================
# Jackson JSON processor
Jackson is a high-performance, Free/Open Source JSON processing library.
It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
been in development since 2007.
It is currently developed by a community of developers.
## Copyright
Copyright 2007-, Tatu Saloranta (tatu.saloranta@iki.fi)
## Licensing
Jackson components are licensed under Apache (Software) License, version 2.0,
as per accompanying LICENSE file.
## Credits
A list of contributors may be found from CREDITS file, which is included
in some artifacts (usually source distributions); but is always available
from the source code management (SCM) system project uses.
================================================
FILE: csv/src/main/resources/META-INF/services/tools.jackson.core.TokenStreamFactory
================================================
tools.jackson.dataformat.csv.CsvFactory
================================================
FILE: csv/src/main/resources/META-INF/services/tools.jackson.databind.ObjectMapper
================================================
tools.jackson.dataformat.csv.CsvMapper
================================================
FILE: csv/src/test/java/module-info.java
================================================
// CSV unit test Module descriptor
module tools.jackson.dataformat.csv
{
// Since we are not split from Main artifact, will not
// need to depend on Main artifact -- but need its dependencies
requires tools.jackson.core;
requires tools.jackson.databind;
// Additional test lib/framework dependencies
requires org.assertj.core;
requires org.junit.jupiter.api;
requires org.junit.jupiter.params;
// Further, need to open up some packages for JUnit et al
opens tools.jackson.dataformat.csv;
opens tools.jackson.dataformat.csv.deser;
opens tools.jackson.dataformat.csv.filter;
opens tools.jackson.dataformat.csv.fuzz;
opens tools.jackson.dataformat.csv.limits;
opens tools.jackson.dataformat.csv.schema;
opens tools.jackson.dataformat.csv.ser;
opens tools.jackson.dataformat.csv.ser.dos;
opens tools.jackson.dataformat.csv.testutil;
opens tools.jackson.dataformat.csv.testutil.failure;
opens tools.jackson.dataformat.csv.tofix;
}
================================================
FILE: csv/src/test/java/perf/BogusOutputStream.java
================================================
package perf;
import java.io.IOException;
import java.io.OutputStream;
public class BogusOutputStream extends OutputStream
{
protected int _bytes;
@Override
public void write(byte[] buf) { write(buf, 0, buf.length); }
@Override
public void write(byte[] buf, int offset, int len) {
_bytes += len;
}
@Override
public void write(int b) throws IOException {
_bytes++;
}
public int length() { return _bytes; }
}
================================================
FILE: csv/src/test/java/perf/F5500Entry.java
================================================
package perf;
import com.fasterxml.jackson.annotation.JsonProperty;
/**
* POJO representing data from form F-5500
*/
public class F5500Entry
{
// 1 - 10
@JsonProperty("ACK_ID") public String ackId;
@JsonProperty("SF_PLAN_YEAR_BEGIN_DATE") public String sfPlanYearBeginDate;
@JsonProperty("SF_TAX_PRD") public String sfTaxPrd;
@JsonProperty("SF_PLAN_ENTITY_CD") public String sfPlanEntityCD;
@JsonProperty("SF_INITIAL_FILING_IND") public String sfInitialFilingInd;
@JsonProperty("SF_AMENDED_IND") public String sfAmendedInd;
@JsonProperty("SF_FINAL_FILING_IND") public String sfFinalFilingInd;
@JsonProperty("SF_SHORT_PLAN_YR_IND") public String sfShortPlanYearInd;
@JsonProperty("SF_5558_APPLICATION_FILED_IND") public String sf5558ApplicationFiledInd;
@JsonProperty("SF_EXT_AUTOMATIC_IND") public String sfExtAutomaticInd;
// 11 - 20
@JsonProperty("SF_DFVC_PROGRAM_IND") public String sfDFVCProgramInd;
@JsonProperty("SF_EXT_SPECIAL_IND") public String sfExtSpecialInd;
@JsonProperty("SF_EXT_SPECIAL_TEXT") public String sfExtSpecialText;
@JsonProperty("SF_PLAN_NAME") public String sfPlanName;
@JsonProperty("SF_PLAN_NUM") public long sfPlanNumber;
@JsonProperty("SF_PLAN_EFF_DATE") public String sfPlanEffectiveDate;
@JsonProperty("SF_SPONSOR_NAME") public String sfSponsorName;
@JsonProperty("SF_SPONSOR_DFE_DBA_NAME") public String sfSponsorDFEDBAName;
@JsonProperty("SF_SPONS_US_ADDRESS1") public String sfSponsorUSAddress1;
@JsonProperty("SF_SPONS_US_ADDRESS2") public String sfSponsorUSAddress2;
// 21 - 30
@JsonProperty("SF_SPONS_US_CITY") public String sfSponsorUSCity;
@JsonProperty("SF_SPONS_US_STATE") public String sfSponsorUSState;
@JsonProperty("SF_SPONS_US_ZIP") public String sfSponsorUSZip;
@JsonProperty("SF_SPONS_FOREIGN_ADDRESS1") public String sfSponsorForeignAddress1;
@JsonProperty("SF_SPONS_FOREIGN_ADDRESS2") public String sfSponsorForeignAddress2;
@JsonProperty("SF_SPONS_FOREIGN_CITY") public String sfSponsorForeignCity;
@JsonProperty("SF_SPONS_FOREIGN_PROV_STATE") public String sfSponsorForeignProvinceOrState;
@JsonProperty("SF_SPONS_FOREIGN_CNTRY") public String sfSponsorForeignCountry;
@JsonProperty("SF_SPONS_FOREIGN_POSTAL_CD") public String sfSponsorForeignPostalCD;
@JsonProperty("SF_SPONS_EIN") public String sfSponsorEIN;
// 31 - 40
@JsonProperty("SF_SPONS_PHONE_NUM") public String sfSponsorPhoneNumber;
@JsonProperty("SF_BUSINESS_CODE") public String sfBusinessCode;
@JsonProperty("SF_ADMIN_NAME") public String sfAdminName;
@JsonProperty("SF_ADMIN_CARE_OF_NAME") public String sfAdminCareOfName;
@JsonProperty("SF_ADMIN_US_ADDRESS1") public String sfAdminUSAddress1;
@JsonProperty("SF_ADMIN_US_ADDRESS2") public String sfAdminUSAddress2;
@JsonProperty("SF_ADMIN_US_CITY") public String sfAdminUSCity;
@JsonProperty("SF_ADMIN_US_STATE") public String sfAdminUSState;
@JsonProperty("SF_ADMIN_US_ZIP") public String sfAdminUSZip;
@JsonProperty("SF_ADMIN_FOREIGN_ADDRESS1") public String sfAdminForeignAddress1;
// 41-50
@JsonProperty("SF_ADMIN_FOREIGN_ADDRESS2") public String sfAdminForeignAddress2;
@JsonProperty("SF_ADMIN_FOREIGN_CITY") public String sfAdminForeignCity;
@JsonProperty("SF_ADMIN_FOREIGN_PROV_STATE") public String sfAdminForeignProvinceState;
@JsonProperty("SF_ADMIN_FOREIGN_CNTRY") public String sfAdminForeignCountry;
@JsonProperty("SF_ADMIN_FOREIGN_POSTAL_CD") public String sfAdminForeignPostalCD;
@JsonProperty("SF_ADMIN_EIN") public String sfAdminEin;
@JsonProperty("SF_ADMIN_PHONE_NUM") public String sfAdminPhoneNumber;
@JsonProperty("SF_LAST_RPT_SPONS_NAME") public String sfLastRptSponsorName;
@JsonProperty("SF_LAST_RPT_SPONS_EIN") public String sfLastRptSponsorEIN;
@JsonProperty("SF_LAST_RPT_PLAN_NUM") public String sfLastRptPlanNumber;
// 51-60
@JsonProperty("SF_TOT_PARTCP_BOY_CNT") public int sfTotalParcpBoyCount;
@JsonProperty("SF_TOT_ACT_RTD_SEP_BENEF_CNT") public int sfTotalAccountRtdSepBenefCount;
@JsonProperty("SF_PARTCP_ACCOUNT_BAL_CNT") public int sfPartcpAccountBalanceCount;
@JsonProperty("SF_ELIGIBLE_ASSETS_IND") public String sfEligibleAssetsInd;
@JsonProperty("SF_IQPA_WAIVER_IND") public String sfIQPAWaiverInd;
@JsonProperty("SF_TOT_ASSETS_BOY_AMT") public int sfTotalAssetsBoyAmount;
@JsonProperty("SF_TOT_LIABILITIES_BOY_AMT") public int sfTotalLiabilitiesBoyAmount;
@JsonProperty("SF_NET_ASSETS_BOY_AMT") public int sfNetAssetsBoyAmt;
@JsonProperty("SF_TOT_ASSETS_EOY_AMT") public int sfTotAssetsEoyAmt;
@JsonProperty("SF_TOT_LIABILITIES_EOY_AMT") public int sfTotalLiabilitiesEOYAmount;
// 61-70
@JsonProperty("SF_NET_ASSETS_EOY_AMT") public int sfNetAssetsEoyAmount;
@JsonProperty("SF_EMPLR_CONTRIB_INCOME_AMT") public int sfEmployerContribIncomeAmount;
@JsonProperty("SF_PARTICIP_CONTRIB_INCOME_AMT") public int sfParticipContribIncomeAmount;
@JsonProperty("SF_OTH_CONTRIB_RCVD_AMT") public int sfOtherContribReceivedAmount;
@JsonProperty("SF_OTHER_INCOME_AMT") public int sfOtherIncomeAmount;
@JsonProperty("SF_TOT_INCOME_AMT") public int sfTotalIncomeAmount;
@JsonProperty("SF_TOT_DISTRIB_BNFT_AMT") public int sftotalDistribuBenefitAmount;
@JsonProperty("SF_CORRECTIVE_DEEMED_DISTR_AMT") public int sfCorrectiveDeemedDistrAmount;
@JsonProperty("SF_ADMIN_SRVC_PROVIDERS_AMT") public int sfAdminSrvcProvidersAmount;
@JsonProperty("SF_OTH_EXPENSES_AMT") public int sfOtherExpensesAmount;
// 71-80
@JsonProperty("SF_TOT_EXPENSES_AMT") public int sfTotalExpensesAmount;
@JsonProperty("SF_NET_INCOME_AMT") public int sfNetIncomeAmount;
@JsonProperty("SF_TOT_PLAN_TRANSFERS_AMT") public int sfTotalPlanTransfersAmount;
@JsonProperty("SF_TYPE_PENSION_BNFT_CODE") public String sfTypePensionBenefitCode;
@JsonProperty("SF_TYPE_WELFARE_BNFT_CODE") public String sfTypeWelfareBenefitCode;
@JsonProperty("SF_FAIL_TRANSMIT_CONTRIB_IND") public String sfFailTransmitContribInd;
@JsonProperty("SF_FAIL_TRANSMIT_CONTRIB_AMT") public int sfFailTransmitContribAmount;
@JsonProperty("SF_PARTY_IN_INT_NOT_RPTD_IND") public String sfPartyInIntNotRptdInd;
@JsonProperty("SF_PARTY_IN_INT_NOT_RPTD_AMT") public int sfPartyInIntNotRptdAmount;
@JsonProperty("SF_PLAN_INS_FDLTY_BOND_IND") public String sfPanInsFidelityBondInd;
// 81-90
@JsonProperty("SF_PLAN_INS_FDLTY_BOND_AMT") public long sfPlanInsFidelityBondAmount;
@JsonProperty("SF_LOSS_DISCV_DUR_YEAR_IND") public String sfLossDiscvDuringYearInd;
@JsonProperty("SF_LOSS_DISCV_DUR_YEAR_AMT") public int sfLossDiscvDuringYearAmount;
@JsonProperty("SF_BROKER_FEES_PAID_IND") public String sfBrokerFeesPaidInd;
@JsonProperty("SF_BROKER_FEES_PAID_AMT") public int sfBrokerFeesPaidAmount;
@JsonProperty("SF_FAIL_PROVIDE_BENEF_DUE_IND") public String sfFailProvideBenefitDueInd;
@JsonProperty("SF_FAIL_PROVIDE_BENEF_DUE_AMT") public int sfFailProvideBenefitDueAmount;
@JsonProperty("SF_PARTCP_LOANS_IND") public String sfPartcpLoansInd;
@JsonProperty("SF_PARTCP_LOANS_EOY_AMT") public int sfPartcpLoansEOYAmount;
@JsonProperty("SF_PLAN_BLACKOUT_PERIOD_IND") public String sfPlanBlackoutPeriodInd;
// 91-100
@JsonProperty("SF_COMPLY_BLACKOUT_NOTICE_IND") public String sfComplyBlackoutNoticeInd;
@JsonProperty("SF_DB_PLAN_FUNDING_REQD_IND") public String sfDBPlanFundingRequiredInd;
@JsonProperty("SF_DC_PLAN_FUNDING_REQD_IND") public String sfDCPlanFundingRequiredInd;
@JsonProperty("SF_RULING_LETTER_GRANT_DATE") public String sfRulingLetterGrantDate;
@JsonProperty("SF_SEC_412_REQ_CONTRIB_AMT") public int sfSec412RequiredContribAmount;
@JsonProperty("SF_EMPLR_CONTRIB_PAID_AMT") public int sfEmployerContribPaidAmount;
@JsonProperty("SF_FUNDING_DEFICIENCY_AMT") public int sfFundingDeficiencyAmount;
@JsonProperty("SF_FUNDING_DEADLINE_IND") public String sfFundingDeadlineInd;
@JsonProperty("SF_RES_TERM_PLAN_ADPT_IND") public String sfResTermPlanAdptInd;
@JsonProperty("SF_RES_TERM_PLAN_ADPT_AMT") public int sfResTermPlanAdptAmount;
// 100-109
@JsonProperty("SF_ALL_PLAN_AST_DISTRIB_IND") public String sfAllPlanAstDistribInd;
@JsonProperty("SF_ADMIN_SIGNED_DATE") public String sfAdminSignedDate;
@JsonProperty("SF_ADMIN_SIGNED_NAME") public String sfAdminSignedName;
@JsonProperty("SF_SPONS_SIGNED_DATE") public String sfSponsorSignedDate;
@JsonProperty("SF_SPONS_SIGNED_NAME") public String sfSponsorSignedName;
@JsonProperty("FILING_STATUS") public String filingStatus;
@JsonProperty("DATE_RECEIVED") public String dateReceived;
@JsonProperty("VALID_ADMIN_SIGNATURE") public String validAdminSignature;
@JsonProperty("VALID_SPONSOR_SIGNATURE") public String validSponsorSignature;
}
================================================
FILE: csv/src/test/java/perf/F5500Reader.java
================================================
package perf;
import java.io.File;
import java.io.IOException;
import java.util.Map;
import tools.jackson.databind.MappingIterator;
import tools.jackson.dataformat.csv.CsvMapper;
import tools.jackson.dataformat.csv.CsvSchema;
/**
* Manual test for checking how fast a F-5500 file
* (from http:///www.dol.gov/) can be read from a file
*/
public final class F5500Reader
{
public static void main(String[] args) throws Exception
{
if (args.length != 1) {
System.err.println("Usage: java .... [input file]");
System.exit(1);
}
new F5500Reader().read(new File(args[0]));
}
private void read(File inputFile) throws IOException, InterruptedException
{
int x = 1;
while (true) {
Class> cls = ((x & 1) == 0) ? Map.class : F5500Entry.class;
++x;
long now = System.currentTimeMillis();
int count = readAll(inputFile, cls);
long time = System.currentTimeMillis() - now;
System.out.printf("DONE! Read %d rows as %s in %.1f seconds.\n",
count, cls.getName(), time / 1000.0);
Thread.sleep(500L);
}
}
private int readAll(File inputFile, Class cls) throws IOException
{
System.out.print("Reading input as "+cls.getName()+" instances: ");
int count = 0;
CsvMapper mapper = new CsvMapper();
CsvSchema schema = CsvSchema.builder()
.setUseHeader(true)
.build();
MappingIterator it = mapper.readerFor(cls)
.with(schema).readValues(inputFile);
while (it.hasNext()) {
@SuppressWarnings("unused")
T row = it.nextValue();
++count;
if ((count & 0x3FFF) == 0) {
System.out.print('.');
}
}
System.out.println();
it.close();
return count;
}
}
================================================
FILE: csv/src/test/java/perf/ManualPerfComparison.java
================================================
package perf;
import java.io.*;
import java.util.ArrayList;
import java.util.Iterator;
import tools.jackson.core.JsonGenerator;
import tools.jackson.databind.*;
import tools.jackson.dataformat.csv.CsvMapper;
import tools.jackson.dataformat.csv.CsvSchema;
/**
* Simple manual performance micro-benchmark for testing of CSV
* reading and writing performance.
*/
@SuppressWarnings("resource")
public final class ManualPerfComparison
{
private ObjectMapper jsonMapper;
private ObjectReader csvReader;
private ObjectWriter csvWriter;
public ManualPerfComparison()
{
jsonMapper = new ObjectMapper();
CsvMapper mapper = new CsvMapper();
CsvSchema schema = mapper.schemaFor(RequestEntry.class)
.withColumnSeparator('|')
.withUseHeader(true)
.withSkipFirstDataRow(true)
;
csvReader = mapper.readerFor(RequestEntry.class).with(schema);
csvWriter = mapper.writer(schema);
}
private RequestEntry[] readCsv(byte[] csvInput) throws IOException
{
ArrayList entries = new ArrayList();
Iterator it = csvReader.readValues(new ByteArrayInputStream(csvInput));
while (it.hasNext()) {
entries.add(it.next());
}
return entries.toArray(new RequestEntry[entries.size()]);
}
private byte[] writeAsJson(RequestEntry[] entries) throws IOException
{
ByteArrayOutputStream bytes = new ByteArrayOutputStream(256 + entries.length * 100);
JsonGenerator jgen = jsonMapper.createGenerator(bytes);
for (RequestEntry entry : entries) {
jsonMapper.writeValue(jgen, entry);
}
jgen.close();
return bytes.toByteArray();
}
private void test(byte[] csvInput) throws IOException
{
final RequestEntry[] entries = readCsv(csvInput);
final byte[] jsonInput = writeAsJson(entries);
// Let's try to guestimate suitable size... to get to 10 megs to process
final int REPS = (int) ((double) (10 * 1000 * 1000) / (double) csvInput.length);
System.out.printf("Input: %d entries; %d bytes as CSV, %d bytes as JSON\n",
entries.length, csvInput.length, jsonInput.length);
System.out.printf("Will do %d repetitions per test.\n\n", REPS);
int i = 0;
while (true) {
try { Thread.sleep(100L); } catch (InterruptedException ie) { }
int round = (i++ % 4);
// if (true) round = 0;
String msg;
boolean lf = (round == 0);
long msecs;
switch (round) {
case 0:
msg = "CSV, read";
msecs = testCsvRead(REPS, csvInput);
break;
case 1:
msg = "CSV, write";
msecs = testCsvWrite(REPS, entries);
break;
case 2:
msg = "JSON, read";
msecs = testJsonRead(REPS, jsonInput);
break;
case 3:
msg = "JSON, write";
msecs = testJsonWrite(REPS, entries);
break;
default:
throw new Error();
}
if (lf) {
System.out.println();
}
System.out.println("Test '"+msg+"' -> "+msecs+" msecs");
}
}
private final long testJsonRead(int REPS, byte[] input) throws IOException
{
long start = System.currentTimeMillis();
while (--REPS >= 0) {
Iterator it = jsonMapper.readerFor(RequestEntry.class).readValues(
input, 0, input.length);
while (it.hasNext()) {
it.next();
}
}
return System.currentTimeMillis() - start;
}
private final long testCsvRead(int REPS, byte[] input) throws IOException
{
long start = System.currentTimeMillis();
while (--REPS >= 0) {
Iterator it = csvReader.readValues(input, 0, input.length);
while (it.hasNext()) {
it.next();
}
}
return System.currentTimeMillis() - start;
}
private final long testJsonWrite(int REPS, RequestEntry[] entries) throws IOException
{
long start = System.currentTimeMillis();
@SuppressWarnings("unused")
int size = 0;
while (--REPS >= 0) {
BogusOutputStream bogus = new BogusOutputStream();
jsonMapper.writeValue(bogus, entries);
size = bogus.length();
}
return System.currentTimeMillis() - start;
}
private final long testCsvWrite(int REPS, RequestEntry[] entries) throws IOException
{
long start = System.currentTimeMillis();
@SuppressWarnings("unused")
int size = 0;
while (--REPS >= 0) {
BogusOutputStream bogus = new BogusOutputStream();
csvWriter.writeValue(bogus, entries);
size = bogus.length();
}
return System.currentTimeMillis() - start;
}
public static void main(String[] args) throws IOException
{
if (args.length != 1) {
System.err.println("Usage: java ... [file]");
System.exit(1);
}
new ManualPerfComparison().test(readAll(args[0]));
}
public static byte[] readAll(String filename) throws IOException
{
File f = new File(filename);
ByteArrayOutputStream bytes = new ByteArrayOutputStream((int) f.length());
byte[] buffer = new byte[4000];
int count;
FileInputStream in = new FileInputStream(f);
while ((count = in.read(buffer)) > 0) {
bytes.write(buffer, 0, count);
}
in.close();
return bytes.toByteArray();
}
}
================================================
FILE: csv/src/test/java/perf/RequestEntry.java
================================================
package perf;
import com.fasterxml.jackson.annotation.JsonProperty;
public class RequestEntry
{
@JsonProperty("APP_ID") public long appId;
@JsonProperty("USER_SCREEN_NAME") public String userScreenName;
@JsonProperty("REPORTER_SCREEN_NAME") public String reportScreenName;
@JsonProperty("EVENT_DATE") public String eventDate;
@JsonProperty("HOST") public String host;
@JsonProperty("PATH") public String path;
@JsonProperty("USER_AGENT") public String userAgent;
@JsonProperty("IP") public String ip;
@JsonProperty("COOKIE") public String cookie;
@JsonProperty("SUBDOMAIN") public String subdomain;
@JsonProperty("REQUEST_METHOD") public String requestMethod; // or Enum
@JsonProperty("TRACE") public String trace;
@JsonProperty("REFERRER") public String referrer;
@JsonProperty("RELOAD_COUNT") public int reloadCount;
@JsonProperty("SESSION_ID") public String sessionId;
@JsonProperty("ACTION") public String action;
@JsonProperty("CONTENT") public String content;
@JsonProperty("KILL_COUNT") public int killCount;
@JsonProperty("ABUSE_TYPE") public String abuseType;
}
================================================
FILE: csv/src/test/java/tools/jackson/dataformat/csv/CSVFactoryFeaturesTest.java
================================================
package tools.jackson.dataformat.csv;
import java.io.StringReader;
import java.io.StringWriter;
import org.junit.jupiter.api.Test;
import tools.jackson.core.StreamReadFeature;
import tools.jackson.core.StreamWriteFeature;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
import tools.jackson.core.*;
public class CSVFactoryFeaturesTest extends ModuleTestBase
{
@Test
public void testFactoryFeatures() throws Exception
{
final CsvMapper mapper = mapperForCsv();
CsvFactory f = mapper.tokenStreamFactory();
assertFalse(f.canHandleBinaryNatively());
assertFalse(f.canUseCharArrays());
assertTrue(f.canUseSchema(CsvSchema.emptySchema()));
JsonParser p = mapper.createParser("");
assertFalse(p.canReadObjectId());
assertFalse(p.canReadTypeId());
p.close();
JsonGenerator g = mapper.createGenerator(new StringWriter());
assertFalse(g.canOmitProperties());
assertFalse(g.canWriteObjectId());
assertFalse(g.canWriteTypeId());
g.close();
}
@Test
public void testFactoryFastFeatures() throws Exception
{
CsvFactory f = CsvFactory.builder()
.enable(StreamReadFeature.USE_FAST_DOUBLE_PARSER)
.enable(StreamWriteFeature.USE_FAST_DOUBLE_WRITER)
.build();
assertTrue(f.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
assertTrue(f.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
assertTrue(f.isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER));
final CsvMapper mapper = mapperForCsv(f);
JsonParser parser = mapper.createParser(new StringReader(""));
assertTrue(parser.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
assertTrue(parser.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
JsonGenerator generator = mapper.createGenerator(new StringWriter());
assertTrue(generator.isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER));
}
@Test
public void testFactoryFastBigNumberFeature() throws Exception
{
CsvFactory f = CsvFactory.builder()
.disable(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER)
.build();
assertTrue(f.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
assertFalse(f.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
final CsvMapper mapper = mapperForCsv(f);
JsonParser parser = mapper.createParser(new StringReader(""));
assertTrue(parser.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
assertFalse(parser.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
}
@Test
public void testFactoryBuilderFastFeatures() throws Exception
{
CsvFactory f = CsvFactory.builder()
.disable(StreamReadFeature.USE_FAST_DOUBLE_PARSER)
.disable(StreamWriteFeature.USE_FAST_DOUBLE_WRITER)
.build();
assertFalse(f.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
assertTrue(f.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
assertFalse(f.isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER));
final CsvMapper mapper = mapperForCsv(f);
JsonParser parser = mapper.createParser(new StringReader(""));
assertFalse(parser.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
assertTrue(parser.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
JsonGenerator generator = mapper.createGenerator(new StringWriter());
assertFalse(generator.isEnabled(StreamWriteFeature.USE_FAST_DOUBLE_WRITER));
}
@Test
public void testFactoryBuilderFastBigNumberFeature() throws Exception
{
CsvFactory f = CsvFactory.builder()
.disable(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER)
.build();
assertFalse(f.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
assertTrue(f.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
final CsvMapper mapper = mapperForCsv(f);
JsonParser parser = mapper.createParser(new StringReader(""));
assertFalse(parser.isEnabled(StreamReadFeature.USE_FAST_BIG_NUMBER_PARSER));
assertTrue(parser.isEnabled(StreamReadFeature.USE_FAST_DOUBLE_PARSER));
}
// for [dataformats-text#581]
@Test
void testFormatFeatureDefaults() {
CsvMapper mapper = CsvMapper.shared();
assertFalse(mapper.isEnabled(CsvReadFeature.ALLOW_COMMENTS));
assertFalse(mapper.isEnabled(CsvWriteFeature.ALWAYS_QUOTE_EMPTY_STRINGS));
}
}
================================================
FILE: csv/src/test/java/tools/jackson/dataformat/csv/ModuleTestBase.java
================================================
package tools.jackson.dataformat.csv;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.*;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import tools.jackson.core.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.fail;
public abstract class ModuleTestBase
{
public enum Gender { MALE, FEMALE };
public static class Address {
private String streetName;
private String city;
public Address(String streetName, String city) {
this.streetName = streetName;
this.city = city;
}
public String getStreetName() {
return streetName;
}
public void setStreetName(String streetName) {
this.streetName = streetName;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
}
public static class LocalizedValue {
private String value;
public LocalizedValue(String value) {
this.value = value;
}
public String getValue() {
return value;
}
public void setValue(String value) {
this.value = value;
}
}
/**
* Slightly modified sample class from Jackson tutorial ("JacksonInFiveMinutes")
*/
@JsonPropertyOrder({"firstName", "lastName", "gender" ,"verified", "userImage"})
public static class FiveMinuteUser
{
private Gender _gender;
public String firstName, lastName;
private boolean _isVerified;
private byte[] _userImage;
public FiveMinuteUser() { }
public FiveMinuteUser(String first, String last, boolean verified, Gender g, byte[] data)
{
firstName = first;
lastName = last;
_isVerified = verified;
_gender = g;
_userImage = data;
}
public boolean isVerified() { return _isVerified; }
public Gender getGender() { return _gender; }
public byte[] getUserImage() { return _userImage; }
public void setVerified(boolean b) { _isVerified = b; }
public void setGender(Gender g) { _gender = g; }
public void setUserImage(byte[] b) { _userImage = b; }
@Override
public boolean equals(Object o)
{
if (o == this) return true;
if (o == null || o.getClass() != getClass()) return false;
FiveMinuteUser other = (FiveMinuteUser) o;
if (_isVerified != other._isVerified) return false;
if (_gender != other._gender) return false;
if (!firstName.equals(other.firstName)) return false;
if (!lastName.equals(other.lastName)) return false;
byte[] otherImage = other._userImage;
if (otherImage.length != _userImage.length) return false;
for (int i = 0, len = _userImage.length; i < len; ++i) {
if (_userImage[i] != otherImage[i]) {
return false;
}
}
return true;
}
@Override
public int hashCode() {
// not really good but whatever:
return firstName.hashCode();
}
}
public static class TenMinuteUser extends FiveMinuteUser {
private Address _address;
public TenMinuteUser(String first, String last, boolean verified, Gender g, byte[] data, Address address)
{
super(first, last, verified, g, data);
_address = address;
}
public Address getAddress() {
return _address;
}
public void setAddress(Address address) {
this._address = address;
}
}
public static class FifteenMinuteUser extends FiveMinuteUser {
private Map localizedName;
public FifteenMinuteUser(String first, String last, boolean verified, Gender g, byte[] data, Map localizedName) {
super(first, last, verified, g, data);
this.localizedName = localizedName;
}
public Map getLocalizedName() {
return localizedName;
}
public void setLocalizedName(Map localizedName) {
this.localizedName = localizedName;
}
}
@JsonPropertyOrder({"id", "desc"})
public static class IdDesc {
public String id, desc;
protected IdDesc() { }
public IdDesc(String id, String desc) {
this.id = id;
this.desc = desc;
}
}
@JsonPropertyOrder({ "x", "y" })
public static class Point {
public int x, y;
protected Point() { }
public Point(int x0, int y0) {
x = x0;
y = y0;
}
}
public static class Points {
public List p;
protected Points() { }
public Points(Point... p0) {
p = Arrays.asList(p0);
}
}
protected ModuleTestBase() { }
/*
/**********************************************************************
/* Helper methods, setup
/**********************************************************************
*/
protected CsvFactoryBuilder streamFactoryBuilder() {
return CsvFactory.builder();
}
protected CsvMapper mapperForCsv() {
return new CsvMapper();
}
protected CsvMapper mapperForCsv(CsvFactory f) {
return new CsvMapper(f);
}
protected CsvMapper newObjectMapper() {
return CsvMapper.builder().build();
}
protected CsvMapper.Builder mapperBuilder() {
return CsvMapper.builder();
}
protected CsvMapper.Builder mapperBuilder(CsvFactory f) {
return CsvMapper.builder(f);
}
/*
/**********************************************************
/* Helper methods; low-level
/**********************************************************
*/
public String q(String str) {
return '"'+str+'"';
}
public byte[] utf8(String str) {
return str.getBytes(StandardCharsets.UTF_8);
}
protected String a2q(String json) {
return json.replace("'", "\"");
}
protected static Map mapOf(Object...strings)
{
final Map map = new LinkedHashMap<>();
for (int i = 0, end = strings.length; i < end; i += 2) {
map.put(strings[i].toString(), strings[i+1]);
}
return map;
}
protected void assertToken(JsonToken expToken, JsonToken actToken) {
if (actToken != expToken) {
fail("Expected token "+expToken+", current token "+actToken);
}
}
protected void assertToken(JsonToken expToken, JsonParser jp) {
assertToken(expToken, jp.currentToken());
}
protected void assertType(Object ob, Class> expType)
{
if (ob == null) {
fail("Expected an object of type "+expType.getName()+", got null");
}
Class> cls = ob.getClass();
if (!expType.isAssignableFrom(cls)) {
fail("Expected type "+expType.getName()+", got "+cls.getName());
}
}
/**
* Method that gets textual contents of the current token using
* available methods, and ensures results are consistent, before
* returning them
*/
protected String getAndVerifyText(JsonParser jp)
{
// Ok, let's verify other accessors
int actLen = jp.getStringLength();
char[] ch = jp.getStringCharacters();
String str2 = new String(ch, jp.getStringOffset(), actLen);
String str = jp.getString();
if (str.length() != actLen) {
fail("Internal problem (jp.token == "+jp.currentToken()+"): jp.getText().length() ['"+str+"'] == "+str.length()+"; jp.getTextLength() == "+actLen);
}
assertEquals("String access via getText(), getTextXxx() must be the same", str, str2);
return str;
}
protected void verifyFieldName(JsonParser p, String expName)
{
assertEquals(expName, p.getString());
assertEquals(expName, p.currentName());
}
protected void verifyIntValue(JsonParser jp, long expValue)
{
// First, via textual
assertEquals(String.valueOf(expValue), jp.getString());
}
protected void verifyException(Throwable e, String... matches)
{
String msg = e.getMessage();
String lmsg = (msg == null) ? "" : msg.toLowerCase();
for (String match : matches) {
String lmatch = match.toLowerCase();
if (lmsg.indexOf(lmatch) >= 0) {
return;
}
}
fail("Expected an exception with one of substrings ("+Arrays.asList(matches)+"): got one with message \""+msg+"\"");
}
protected byte[] readResource(String ref)
{
ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final byte[] buf = new byte[4000];
try (InputStream in = getClass().getResourceAsStream(ref)) {
if (in != null) {
int len;
while ((len = in.read(buf)) > 0) {
bytes.write(buf, 0, len);
}
}
} catch (IOException e) {
throw new RuntimeException("Failed to read resource '"+ref+"': "+e);
}
if (bytes.size() == 0) {
throw new IllegalArgumentException("Failed to read resource '"+ref+"': empty resource?");
}
return bytes.toByteArray();
}
}
================================================
FILE: csv/src/test/java/tools/jackson/dataformat/csv/NullReader122Test.java
================================================
package tools.jackson.dataformat.csv;
import java.io.Reader;
import java.util.Map;
import org.junit.jupiter.api.Test;
import tools.jackson.databind.ObjectReader;
import static org.junit.jupiter.api.Assertions.fail;
public class NullReader122Test extends ModuleTestBase
{
private final CsvMapper MAPPER = mapperForCsv();
// for [dataformats-text#122]: passing `null` Reader leads to infinite loop
@Test
public void testEmptyStream() throws Exception {
CsvSchema columns = CsvSchema.emptySchema().withHeader().withColumnSeparator(';');
ObjectReader r = MAPPER.readerFor(Map.class).with(columns);
try {
/*Object ob =*/ r.readValue((Reader) null);
fail("Should not pass");
} catch (IllegalArgumentException e) {
verifyException(e, "Argument \"r\" is null");
}
}
}
================================================
FILE: csv/src/test/java/tools/jackson/dataformat/csv/SchemaCaching288Test.java
================================================
package tools.jackson.dataformat.csv;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import com.fasterxml.jackson.annotation.JsonView;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class SchemaCaching288Test extends ModuleTestBase
{
static class ViewA { }
static class ViewB { }
@JsonPropertyOrder({ "a", "aa", "b" })
static class Bean288
{
@JsonView({ ViewA.class, ViewB.class })
public String a = "1";
@JsonView({ViewA.class })
public String aa = "2";
@JsonView(ViewB.class)
public String b = "3";
}
/*
/**********************************************************
/* Test methods
/**********************************************************
*/
// [dataformats-text#288]: caching should not overlap with View
@Test
public void testCachingNoViewFirst() throws Exception
{
CsvMapper mapper1 = mapperForCsv();
CsvSchema schemaNoView = mapper1.schemaFor(Bean288.class);
assertEquals("1,2,3",
mapper1.writer(schemaNoView).writeValueAsString(new Bean288()).trim());
assertEquals(1, mapper1._untypedSchemas.size());
CsvSchema schemaB = mapper1.schemaForWithView(Bean288.class, ViewB.class);
assertEquals("1,3", mapper1.writer(schemaB).withView(ViewB.class)
.writeValueAsString(new Bean288()).trim());
assertEquals(2, mapper1._untypedSchemas.size());
// check hash
mapper1.schemaFor(Bean288.class);
assertEquals(2, mapper1._untypedSchemas.size());
mapper1.schemaForWithView(Bean288.class, ViewA.class);
assertEquals(3, mapper1._untypedSchemas.size());
mapper1.schemaForWithView(Bean288.class, ViewB.class);
assertEquals(3, mapper1._untypedSchemas.size());
}
// [dataformats-text#288]: caching should not overlap with View
@Test
public void testCachingWithViewFirst() throws Exception
{
CsvMapper mapper1 = mapperForCsv();
CsvSchema schemaA = mapper1.schemaForWithView(Bean288.class, ViewA.class);
assertEquals("1,2", mapper1.writer(schemaA).withView(ViewA.class)
.writeValueAsString(new Bean288()).trim());
assertEquals(1, mapper1._untypedSchemas.size());
CsvSchema schemaNoView = mapper1.schemaFor(Bean288.class);
assertEquals("1,2,3",
mapper1.writer(schemaNoView).writeValueAsString(new Bean288()).trim());
assertEquals(2, mapper1._untypedSchemas.size());
// check hash
mapper1.schemaFor(Bean288.class);
assertEquals(2, mapper1._untypedSchemas.size());
mapper1.schemaForWithView(Bean288.class, ViewA.class);
assertEquals(2, mapper1._untypedSchemas.size());
mapper1.schemaForWithView(Bean288.class, ViewB.class);
assertEquals(3, mapper1._untypedSchemas.size());
}
}
================================================
FILE: csv/src/test/java/tools/jackson/dataformat/csv/TestVersions.java
================================================
package tools.jackson.dataformat.csv;
import java.io.*;
import tools.jackson.core.*;
import tools.jackson.databind.MapperFeature;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
public class TestVersions extends ModuleTestBase
{
public void testMapperVersions() throws IOException
{
// Test shared instance for funsies
CsvMapper mapper = CsvMapper.shared();
assertVersion(mapper.tokenStreamFactory());
JsonParser p = mapper.createParser("abc");
assertVersion(p);
p.close();
JsonGenerator g = mapper.createGenerator(new ByteArrayOutputStream());
assertVersion(g);
g.close();
}
// Mostly to verify #11
public void testMapperDefaults()
{
CsvMapper mapper = new CsvMapper();
assertTrue(mapper.isEnabled(MapperFeature.SORT_PROPERTIES_ALPHABETICALLY));
}
/*
/**********************************************************
/* Helper methods
/**********************************************************
*/
private void assertVersion(Versioned vers)
{
assertEquals(PackageVersion.VERSION, vers.version());
}
}
================================================
FILE: csv/src/test/java/tools/jackson/dataformat/csv/deser/AnySetterTest.java
================================================
package tools.jackson.dataformat.csv.deser;
import java.util.LinkedHashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.annotation.JsonAnySetter;
import tools.jackson.dataformat.csv.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
public class AnySetterTest extends ModuleTestBase
{
static class Entry {
Map stuff = new LinkedHashMap();
public int age;
public String name;
@JsonAnySetter
public void set(String key, Object value) {
// for secondary test, where name remains empty:
if (key.isEmpty()) {
key = String.valueOf(stuff.size());
}
stuff.put(key, value);
}
}
/*
/**********************************************************************
/* Test methods
/**********************************************************************
*/
@Test
public void testSimpleHeader() throws Exception
{
CsvMapper mapper = mapperForCsv();
CsvSchema schema = CsvSchema.emptySchema().withHeader();
Entry entry = mapper.readerFor(Entry.class).with(schema).readValue(
"name,age,gender,extra\nBarbara,35,F,1246\n");
assertEquals(35, entry.age);
assertEquals("F", entry.stuff.get("gender"));
assertEquals("1246", entry.stuff.get("extra"));
assertEquals(2, entry.stuff.size());
}
// [dataformat-csv@109]: allow "any-setter-like"
@Test
public void testWithMapToAny() throws Exception
{
CsvMapper mapper = mapperForCsv();
CsvSchema schema = CsvSchema.emptySchema().withHeader()
.withAnyPropertyName("");
Entry entry = mapper.readerFor(Entry.class).with(schema)
.readValue("name,age\nJoe,28,first,second\n");
assertEquals("Joe", entry.name);
assertEquals(28, entry.age);
assertEquals("first", entry.stuff.get("0"));
assertEquals("second", entry.stuff.get("1"));
assertEquals(2, entry.stuff.size());
}
}
================================================
FILE: csv/src/test/java/tools/jackson/dataformat/csv/deser/ArrayReadTest.java
================================================
package tools.jackson.dataformat.csv.deser;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.annotation.*;
import tools.jackson.dataformat.csv.*;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
// for [dataformat-csv#57]
public class ArrayReadTest extends ModuleTestBase
{
@JsonPropertyOrder({"id", "values", "extra"})
static class ValueEntry {
public String id, extra;
public int[] values;
@JsonCreator
public ValueEntry(@JsonProperty("id") String id,
@JsonProperty("extra") String extra,
@JsonProperty("values") int[] values) {
this.id = id;
this.extra = extra;
this.values = values;
}
}
/*
/**********************************************************************
/* Test methods
/**********************************************************************
*/
private final CsvMapper MAPPER = mapperForCsv();
@Test
public void testSimpleExplicitLooseTyping() throws Exception
{
ValueEntry value = MAPPER.readerWithSchemaFor(ValueEntry.class)
.readValue("foo,1;2;3,stuff");
assertNotNull(value);
assertEquals("foo", value.id);
assertEquals("stuff", value.extra);
int[] v = value.values;
assertNotNull(v);
assertEquals(3, v.length);
assertEquals(1, v[0]);
assertEquals(2, v[1]);
assertEquals(3, v[2]);
}
// Same as above, but Array value in double-quotes:
@Test
public void testSimpleExplicitLooseTypingWithQuotes() throws Exception
{
ValueEntry value = MAPPER.readerWithSchemaFor(ValueEntry.class)
.readValue("foo,\"1;2;3\",stuff");
assertNotNull(value);
assertEquals("foo", value.id);
assertEquals("stuff", value.extra);
int[] v = value.values;
assertNotNull(v);
assertEquals(3, v.length);
assertEquals(1, v[0]);
assertEquals(2, v[1]);
assertEquals(3, v[2]);
}
@Test
public void testSimpleExplicitStrictTyping() throws Exception
{
ValueEntry value = MAPPER.readerWithTypedSchemaFor(ValueEntry.class)
.readValue("foo,1;2;3,stuff");
assertNotNull(value);
assertEquals("foo", value.id);
assertEquals("stuff", value.extra);
int[] v = value.values;
assertNotNull(v);
assertEquals(3, v.length);
assertEquals(1, v[0]);
assertEquals(2, v[1]);
assertEquals(3, v[2]);
// one more thing: for [dataformat-csv#66]:
value = MAPPER.readerWithTypedSchemaFor(ValueEntry.class)
.readValue("foo,,stuff");
assertNotNull(value);
assertEquals("foo", value.id);
assertEquals("stuff", value.extra);
v = value.values;
assertNotNull(v);
assertEquals(0, v.length);
}
@Test
public void testSeparatorOverrideSpace() throws Exception
{
ValueEntry input = new ValueEntry("foo", "stuff", new int[] {1, 2, 3});
String csv = MAPPER.writer(CsvSchema.builder()
.addColumn("id")
.addArrayColumn("values", " ")
.addColumn("extra")
.build())
.writeValueAsString(input)
.trim();
// gets quoted due to white space
assertEquals("foo,\"1 2 3\",stuff", csv);
ValueEntry value = MAPPER.reader(MAPPER.schemaFor(ValueEntry.class).withArrayElementSeparator(" ")).forType(ValueEntry.class)
.readValue(csv);
assertEquals("foo", value.id);
assertEquals("stuff", value.extra);
int[] v = value.values;
assertNotNull(v);
assertEquals(3, v.length);
assertEquals(1, v[0]);
assertEquals(2, v[1]);
assertEquals(3, v[2]);
}
@Test
public void testSeparatorOverrideMulti() throws Exception
{
ValueEntry input = new ValueEntry("foo", "stuff", new int[] {1, 2, 3});
String csv = MAPPER.writer(CsvSchema.builder()
.addColumn("id")
.addArrayColumn("values", "::")
.addColumn("extra")
.build())
.writeValueAsString(input)
.trim();
assertEquals("foo,1::2::3,stuff", csv);
ValueEntry value = MAPPER.reader(MAPPER.schemaFor(ValueEntry.class).withArrayElementSeparator("::")).forType(ValueEntry.class)
.readValue(csv);
assertEquals("foo", value.id);
assertEquals("stuff", value.extra);
int[] v = value.values;
assertNotNull(v);
assertEquals(3, v.length);
assertEquals(1, v[0]);
assertEquals(2, v[1]);
assertEquals(3, v[2]);
}
}
================================================
FILE: csv/src/test/java/tools/jackson/dataformat/csv/deser/BasicCSVParserTest.java
================================================
package tools.jackson.dataformat.csv.deser;
import java.io.ByteArrayOutputStream;
import java.util.*;
import org.junit.jupiter.api.Test;
import com.fasterxml.jackson.annotation.JsonPropertyOrder;
import tools.jackson.core.JsonParser;
import tools.jackson.core.JsonToken;
import tools.jackson.core.exc.StreamReadException;
import tools.jackson.databind.*;
import tools.jackson.dataformat.csv.*;
import static org.junit.jupiter.api.Assertions.*;
public class BasicCSVParserTest extends ModuleTestBase
{
@JsonPropertyOrder({"x", "y", "z"})
public static class Point {
public int x;
public Integer y;
public Integer z = 8;
}
final static CsvSchema SIMPLE_SCHEMA = CsvSchema.builder()
.addColumn("firstName")
.addColumn("lastName")
.addColumn("gender")
.addColumn("userImage")
.addColumn("verified")
.build();
/*
/**********************************************************
/* Test methods
/**********************************************************
*/
final CsvMapper MAPPER = mapperForCsv();
@Test
public void testSimpleExplicit() throws Exception
{
ObjectReader r = MAPPER.reader(SIMPLE_SCHEMA);
_testSimpleExplicit(r, false);
_testSimpleExplicit(r, true);
}
private void _testSimpleExplicit(ObjectReader r, boolean useBytes) throws Exception
{
r = r.forType(FiveMinuteUser.class);
FiveMinuteUser user;
final String INPUT = "Bob,Robertson,MALE,AQIDBAU=,false\n";
if (useBytes) {
user = r.readValue(INPUT);
} else {
user = r.readValue(utf8(INPUT));
}
assertEquals("Bob", user.firstName);
assertEquals("Robertson", user.lastName);
assertEquals(Gender.MALE, user.getGender());
assertFalse(user.isVerified());
assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, user.getUserImage());
}
@Test
public void testSimpleExplicitWithBOM() throws Exception {
ObjectReader r = MAPPER.reader(SIMPLE_SCHEMA);
r = r.forType(FiveMinuteUser.class);
FiveMinuteUser user;
ByteArrayOutputStream b = new ByteArrayOutputStream();
// first, UTF-8 BOM:
b.write(new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF});
b.write(utf8("Bob,Robertson,MALE,AQIDBAU=,false\n"));
b.close();
user = r.readValue(b.toByteArray());
String fn = user.firstName;
if (!fn.equals("Bob")) {
fail("Expected 'Bob' (3), got '" + fn + "' (" + fn.length() + ")");
}
assertEquals("Robertson", user.lastName);
assertEquals(Gender.MALE, user.getGender());
assertFalse(user.isVerified());
assertArrayEquals(new byte[]{1, 2, 3, 4, 5}, user.getUserImage());
}
@Test
public void testSimpleWithAutoSchema() throws Exception {
CsvSchema schema = MAPPER.schemaFor(FiveMinuteUser.class);
// NOTE: order different from above test (as per POJO def!)
FiveMinuteUser user = MAPPER.reader(schema).forType(FiveMinuteUser.class).readValue("Joe,Josephson,MALE,true,AwE=\n");
assertEquals("Joe", user.firstName);
assertEquals("Josephson", user.lastName);
assertEquals(Gender.MALE, user.getGender());
assertTrue(user.isVerified());
assertArrayEquals(new byte[]{3, 1}, user.getUserImage());
}
/**
* Test to verify that we can mix "untyped" access as Maps
* with schema information...
*/
@Test
public void testSimpleAsMaps() throws Exception {
CsvSchema schema = MAPPER.schemaFor(FiveMinuteUser.class);
MappingIterator