*
* @since 0.0.1
*/
public class DataType {
public final static byte INTEGER = 0;
public final static byte DOUBLE = 1;
public final static byte BOOLEAN = 2;
public final static byte STRING = 3;
public final static byte DATETIME = 4;
public static String toString(int type) {
switch (type) {
case INTEGER:
return "bigint";
case DOUBLE:
return "double";
case BOOLEAN:
return "boolean";
case STRING:
return "string";
case DATETIME:
return "datetime";
default:
throw new IllegalArgumentException("type=" + type);
}
}
/**
* 字符串的数据类型转换为byte常量定义的数据类型.
*
* 转换规则:
*
*
tinyint, int, bigint, long - {@link #INTEGER}
*
double, float - {@link #DOUBLE}
*
string - {@link #STRING}
*
boolean, bool - {@link #BOOLEAN}
*
datetime - {@link #DATETIME}
*
*
*
* @param type 字符串的数据类型
* @return byte常量定义的数据类型
* @throws IllegalArgumentException
*/
public static byte convertToDataType(String type) throws IllegalArgumentException {
type = type.toLowerCase().trim();
if ("string".equals(type)) {
return STRING;
} else if ("bigint".equals(type) || "int".equals(type) || "tinyint".equals(type) || "long".equals(type)) {
return INTEGER;
} else if ("boolean".equals(type) || "bool".equals(type)) {
return BOOLEAN;
} else if ("double".equals(type) || "float".equals(type)) {
return DOUBLE;
} else if ("datetime".equals(type)) {
return DATETIME;
} else {
throw new IllegalArgumentException("unknown type: " + type);
}
}
}
================================================
FILE: adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/FieldSchema.java
================================================
package com.alibaba.datax.plugin.writer.adswriter.odps;
/**
* ODPS列属性,包含列名和类型 列名和类型与SQL的DESC表或分区显示的列名和类型一致
*
* @since 0.0.1
*/
public class FieldSchema {
/** 列名 */
private String name;
/** 列类型,如:string, bigint, boolean, datetime等等 */
private String type;
private String comment;
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getType() {
return type;
}
public void setType(String type) {
this.type = type;
}
public String getComment() {
return comment;
}
public void setComment(String comment) {
this.comment = comment;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("FieldSchema [name=").append(name).append(", type=").append(type).append(", comment=")
.append(comment).append("]");
return builder.toString();
}
/**
* @return "col_name data_type [COMMENT col_comment]"
*/
public String toDDL() {
StringBuilder builder = new StringBuilder();
builder.append(name).append(" ").append(type);
String comment = this.comment;
if (comment != null && comment.length() > 0) {
builder.append(" ").append("COMMENT \"" + comment + "\"");
}
return builder.toString();
}
}
================================================
FILE: adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/TableMeta.java
================================================
package com.alibaba.datax.plugin.writer.adswriter.odps;
import java.util.Iterator;
import java.util.List;
/**
* ODPS table meta.
*
* @since 0.0.1
*/
public class TableMeta {
private String tableName;
private List cols;
private List partitionKeys;
private int lifeCycle;
private String comment;
public String getTableName() {
return tableName;
}
public void setTableName(String tableName) {
this.tableName = tableName;
}
public List getCols() {
return cols;
}
public void setCols(List cols) {
this.cols = cols;
}
public List getPartitionKeys() {
return partitionKeys;
}
public void setPartitionKeys(List partitionKeys) {
this.partitionKeys = partitionKeys;
}
public int getLifeCycle() {
return lifeCycle;
}
public void setLifeCycle(int lifeCycle) {
this.lifeCycle = lifeCycle;
}
public String getComment() {
return comment;
}
public void setComment(String comment) {
this.comment = comment;
}
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
builder.append("TableMeta [tableName=").append(tableName).append(", cols=").append(cols)
.append(", partitionKeys=").append(partitionKeys).append(", lifeCycle=").append(lifeCycle)
.append(", comment=").append(comment).append("]");
return builder.toString();
}
/**
* @return
* "CREATE TABLE [IF NOT EXISTS] table_name
* [(col_name data_type [COMMENT col_comment], ...)]
* [COMMENT table_comment]
* [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
* [LIFECYCLE days]
* [AS select_statement] "
*/
public String toDDL() {
StringBuilder builder = new StringBuilder();
builder.append("CREATE TABLE " + tableName).append(" ");
List cols = this.cols;
if (cols != null && cols.size() > 0) {
builder.append("(").append(toDDL(cols)).append(")").append(" ");
}
String comment = this.comment;
if (comment != null && comment.length() > 0) {
builder.append("COMMENT \"" + comment + "\" ");
}
List partitionKeys = this.partitionKeys;
if (partitionKeys != null && partitionKeys.size() > 0) {
builder.append("PARTITIONED BY ");
builder.append("(").append(toDDL(partitionKeys)).append(")").append(" ");
}
if (lifeCycle > 0) {
builder.append("LIFECYCLE " + lifeCycle).append(" ");
}
builder.append(";");
return builder.toString();
}
private String toDDL(List cols) {
StringBuilder builder = new StringBuilder();
Iterator iter = cols.iterator();
builder.append(iter.next().toDDL());
while (iter.hasNext()) {
builder.append(", ").append(iter.next().toDDL());
}
return builder.toString();
}
}
================================================
FILE: adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/odps/package-info.java
================================================
/**
* ODPS meta.
*
* @since 0.0.1
*/
package com.alibaba.datax.plugin.writer.adswriter.odps;
================================================
FILE: adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/package-info.java
================================================
/**
* ADS Writer.
*
* @since 0.0.1
*/
package com.alibaba.datax.plugin.writer.adswriter;
================================================
FILE: adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/AdsUtil.java
================================================
package com.alibaba.datax.plugin.writer.adswriter.util;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.util.DBUtil;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.writer.adswriter.load.AdsHelper;
import com.alibaba.datax.plugin.writer.adswriter.AdsWriterErrorCode;
import com.alibaba.datax.plugin.writer.adswriter.load.TransferProjectConf;
import com.alibaba.datax.plugin.writer.adswriter.odps.FieldSchema;
import com.alibaba.datax.plugin.writer.adswriter.odps.TableMeta;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.sql.Connection;
import java.util.ArrayList;
import java.util.List;
public class AdsUtil {
private static final Logger LOG = LoggerFactory.getLogger(AdsUtil.class);
/*检查配置文件中必填的配置项是否都已填
* */
public static void checkNecessaryConfig(Configuration originalConfig, String writeMode) {
//检查ADS必要参数
originalConfig.getNecessaryValue(Key.ADS_URL,
AdsWriterErrorCode.REQUIRED_VALUE);
originalConfig.getNecessaryValue(Key.USERNAME,
AdsWriterErrorCode.REQUIRED_VALUE);
originalConfig.getNecessaryValue(Key.PASSWORD,
AdsWriterErrorCode.REQUIRED_VALUE);
originalConfig.getNecessaryValue(Key.SCHEMA,
AdsWriterErrorCode.REQUIRED_VALUE);
if(Constant.LOADMODE.equals(writeMode)) {
originalConfig.getNecessaryValue(Key.Life_CYCLE,
AdsWriterErrorCode.REQUIRED_VALUE);
Integer lifeCycle = originalConfig.getInt(Key.Life_CYCLE);
if (lifeCycle <= 0) {
throw DataXException.asDataXException(AdsWriterErrorCode.INVALID_CONFIG_VALUE, "配置项[lifeCycle]的值必须大于零.");
}
originalConfig.getNecessaryValue(Key.ADS_TABLE,
AdsWriterErrorCode.REQUIRED_VALUE);
Boolean overwrite = originalConfig.getBool(Key.OVER_WRITE);
if (overwrite == null) {
throw DataXException.asDataXException(AdsWriterErrorCode.REQUIRED_VALUE, "配置项[overWrite]是必填项.");
}
}
if (Constant.STREAMMODE.equalsIgnoreCase(writeMode)) {
originalConfig.getNecessaryValue(Key.OPIndex, AdsWriterErrorCode.REQUIRED_VALUE);
}
}
/*生成AdsHelp实例
* */
public static AdsHelper createAdsHelper(Configuration originalConfig){
//Get adsUrl,userName,password,schema等参数,创建AdsHelp实例
String adsUrl = originalConfig.getString(Key.ADS_URL);
String userName = originalConfig.getString(Key.USERNAME);
String password = originalConfig.getString(Key.PASSWORD);
String schema = originalConfig.getString(Key.SCHEMA);
Long socketTimeout = originalConfig.getLong(Key.SOCKET_TIMEOUT, Constant.DEFAULT_SOCKET_TIMEOUT);
String suffix = originalConfig.getString(Key.JDBC_URL_SUFFIX, "");
return new AdsHelper(adsUrl,userName,password,schema,socketTimeout,suffix);
}
public static AdsHelper createAdsHelperWithOdpsAccount(Configuration originalConfig) {
String adsUrl = originalConfig.getString(Key.ADS_URL);
String userName = originalConfig.getString(TransferProjectConf.KEY_ACCESS_ID);
String password = originalConfig.getString(TransferProjectConf.KEY_ACCESS_KEY);
String schema = originalConfig.getString(Key.SCHEMA);
Long socketTimeout = originalConfig.getLong(Key.SOCKET_TIMEOUT, Constant.DEFAULT_SOCKET_TIMEOUT);
String suffix = originalConfig.getString(Key.JDBC_URL_SUFFIX, "");
return new AdsHelper(adsUrl, userName, password, schema,socketTimeout,suffix);
}
/*生成ODPSWriter Plugin所需要的配置文件
* */
public static Configuration generateConf(Configuration originalConfig, String odpsTableName, TableMeta tableMeta, TransferProjectConf transConf){
Configuration newConfig = originalConfig.clone();
newConfig.set(Key.ODPSTABLENAME, odpsTableName);
newConfig.set(Key.ODPS_SERVER, transConf.getOdpsServer());
newConfig.set(Key.TUNNEL_SERVER,transConf.getOdpsTunnel());
newConfig.set(Key.ACCESS_ID,transConf.getAccessId());
newConfig.set(Key.ACCESS_KEY,transConf.getAccessKey());
newConfig.set(Key.PROJECT,transConf.getProject());
newConfig.set(Key.TRUNCATE, true);
newConfig.set(Key.PARTITION,null);
// newConfig.remove(Key.PARTITION);
List cols = tableMeta.getCols();
List allColumns = new ArrayList();
if(cols != null && !cols.isEmpty()){
for(FieldSchema col:cols){
allColumns.add(col.getName());
}
}
newConfig.set(Key.COLUMN,allColumns);
return newConfig;
}
/*生成ADS数据导入时的source_path
* */
public static String generateSourcePath(String project, String tmpOdpsTableName, String odpsPartition){
StringBuilder builder = new StringBuilder();
String partition = transferOdpsPartitionToAds(odpsPartition);
builder.append("odps://").append(project).append("/").append(tmpOdpsTableName);
if(odpsPartition != null && !odpsPartition.isEmpty()){
builder.append("/").append(partition);
}
return builder.toString();
}
public static String transferOdpsPartitionToAds(String odpsPartition){
if(odpsPartition == null || odpsPartition.isEmpty())
return null;
String adsPartition = formatPartition(odpsPartition);;
String[] partitions = adsPartition.split("/");
for(int last = partitions.length; last > 0; last--){
String partitionPart = partitions[last-1];
String newPart = partitionPart.replace(".*", "*").replace("*", ".*");
if(newPart.split("=")[1].equals(".*")){
adsPartition = adsPartition.substring(0,adsPartition.length()-partitionPart.length());
}else{
break;
}
if(adsPartition.endsWith("/")){
adsPartition = adsPartition.substring(0,adsPartition.length()-1);
}
}
if (adsPartition.contains("*"))
throw DataXException.asDataXException(AdsWriterErrorCode.ODPS_PARTITION_FAILED, "");
return adsPartition;
}
public static String formatPartition(String partition) {
return partition.trim().replaceAll(" *= *", "=")
.replaceAll(" */ *", ",").replaceAll(" *, *", ",")
.replaceAll("'", "").replaceAll(",", "/");
}
public static String prepareJdbcUrl(Configuration conf) {
String adsURL = conf.getString(Key.ADS_URL);
String schema = conf.getString(Key.SCHEMA);
Long socketTimeout = conf.getLong(Key.SOCKET_TIMEOUT,
Constant.DEFAULT_SOCKET_TIMEOUT);
String suffix = conf.getString(Key.JDBC_URL_SUFFIX, "");
return AdsUtil.prepareJdbcUrl(adsURL, schema, socketTimeout, suffix);
}
public static String prepareJdbcUrl(String adsURL, String schema,
Long socketTimeout, String suffix) {
String jdbcUrl = null;
// like autoReconnect=true&failOverReadOnly=false&maxReconnects=10
if (StringUtils.isNotBlank(suffix)) {
jdbcUrl = String
.format("jdbc:mysql://%s/%s?useUnicode=true&characterEncoding=UTF-8&socketTimeout=%s&%s",
adsURL, schema, socketTimeout, suffix);
} else {
jdbcUrl = String
.format("jdbc:mysql://%s/%s?useUnicode=true&characterEncoding=UTF-8&socketTimeout=%s",
adsURL, schema, socketTimeout);
}
return jdbcUrl;
}
public static Connection getAdsConnect(Configuration conf) {
String userName = conf.getString(Key.USERNAME);
String passWord = conf.getString(Key.PASSWORD);
String jdbcUrl = AdsUtil.prepareJdbcUrl(conf);
Connection connection = DBUtil.getConnection(DataBaseType.ADS, jdbcUrl, userName, passWord);
return connection;
}
}
================================================
FILE: adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/Constant.java
================================================
package com.alibaba.datax.plugin.writer.adswriter.util;
public class Constant {
public static final String LOADMODE = "load";
public static final String INSERTMODE = "insert";
public static final String DELETEMODE = "delete";
public static final String REPLACEMODE = "replace";
public static final String STREAMMODE = "stream";
public static final int DEFAULT_BATCH_SIZE = 32;
public static final long DEFAULT_SOCKET_TIMEOUT = 3600000L;
public static final int DEFAULT_RETRY_TIMES = 3;
public static final String INSERT_TEMPLATE = "insert into %s ( %s ) values ";
public static final String DELETE_TEMPLATE = "delete from %s where ";
public static final String ADS_TABLE_INFO = "adsTableInfo";
public static final String ADS_QUOTE_CHARACTER = "`";
}
================================================
FILE: adswriter/src/main/java/com/alibaba/datax/plugin/writer/adswriter/util/Key.java
================================================
package com.alibaba.datax.plugin.writer.adswriter.util;
public final class Key {
public final static String ADS_URL = "url";
public final static String USERNAME = "username";
public final static String PASSWORD = "password";
public final static String SCHEMA = "schema";
public final static String ADS_TABLE = "table";
public final static String Life_CYCLE = "lifeCycle";
public final static String OVER_WRITE = "overWrite";
public final static String WRITE_MODE = "writeMode";
public final static String COLUMN = "column";
public final static String OPIndex = "opIndex";
public final static String EMPTY_AS_NULL = "emptyAsNull";
public final static String BATCH_SIZE = "batchSize";
public final static String BUFFER_SIZE = "bufferSize";
public final static String IGNORE_INSERT = "ignoreInsert";
public final static String PRE_SQL = "preSql";
public final static String POST_SQL = "postSql";
public final static String SOCKET_TIMEOUT = "socketTimeout";
public final static String RETRY_CONNECTION_TIME = "retryTimes";
public final static String RETRY_INTERVAL_TIME = "retryIntervalTime";
public final static String JDBC_URL_SUFFIX = "urlSuffix";
/**
* 以下是odps writer的key
*/
public final static String PARTITION = "partition";
public final static String ODPSTABLENAME = "table";
public final static String ODPS_SERVER = "odpsServer";
public final static String TUNNEL_SERVER = "tunnelServer";
public final static String ACCESS_ID = "accessId";
public final static String ACCESS_KEY = "accessKey";
public final static String PROJECT = "project";
public final static String TRUNCATE = "truncate";
}
================================================
FILE: adswriter/src/main/resources/plugin.json
================================================
{
"name": "adswriter",
"class": "com.alibaba.datax.plugin.writer.adswriter.AdsWriter",
"description": "",
"developer": "alibaba"
}
================================================
FILE: adswriter/src/main/resources/plugin_job_template.json
================================================
{
"name": "adswriter",
"parameter": {
"url": "",
"username": "",
"password": "",
"schema": "",
"table": "",
"partition": "",
"overWrite": "",
"lifeCycle": 2
}
}
================================================
FILE: cassandrareader/doc/cassandrareader.md
================================================
# CassandraReader 插件文档
___
## 1 快速介绍
CassandraReader插件实现了从Cassandra读取数据。在底层实现上,CassandraReader通过datastax的java driver连接Cassandra实例,并执行相应的cql语句将数据从cassandra中SELECT出来。
## 2 实现原理
简而言之,CassandraReader通过java driver连接到Cassandra实例,并根据用户配置的信息生成查询SELECT CQL语句,然后发送到Cassandra,并将该CQL执行返回结果使用DataX自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。
对于用户配置Table、Column的信息,CassandraReader将其拼接为CQL语句发送到Cassandra。
## 3 功能说明
### 3.1 配置样例
* 配置一个从Cassandra同步抽取数据到本地的作业:
```
{
"job": {
"setting": {
"speed": {
"channel": 3
}
},
"content": [
{
"reader": {
"name": "cassandrareader",
"parameter": {
"host": "localhost",
"port": 9042,
"useSSL": false,
"keyspace": "test",
"table": "datax_src",
"column": [
"textCol",
"blobCol",
"writetime(blobCol)",
"boolCol",
"smallintCol",
"tinyintCol",
"intCol",
"bigintCol",
"varintCol",
"floatCol",
"doubleCol",
"decimalCol",
"dateCol",
"timeCol",
"timeStampCol",
"uuidCol",
"inetCol",
"durationCol",
"listCol",
"mapCol",
"setCol"
"tupleCol"
"udtCol",
]
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"print":true
}
}
}
]
}
}
```
### 3.2 参数说明
* **host**
* 描述:Cassandra连接点的域名或ip,多个node之间用逗号分隔。
* 必选:是
* 默认值:无
* **port**
* 描述:Cassandra端口。
* 必选:是
* 默认值:9042
* **username**
* 描述:数据源的用户名
* 必选:否
* 默认值:无
* **password**
* 描述:数据源指定用户名的密码
* 必选:否
* 默认值:无
* **useSSL**
* 描述:是否使用SSL连接。
* 必选:否
* 默认值:false
* **keyspace**
* 描述:需要同步的表所在的keyspace。
* 必选:是
* 默认值:无
* **table**
* 描述:所选取的需要同步的表。
* 必选:是
* 默认值:无
* **column**
* 描述:所配置的表中需要同步的列集合。
其中的元素可以指定列的名称或writetime(column_name),后一种形式会读取column_name列的时间戳而不是数据。
* 必选:是
* 默认值:无
* **where**
* 描述:数据筛选条件的cql表达式,例如:
```
"where":"textcol='a'"
```
* 必选:否
* 默认值:无
* **allowFiltering**
* 描述:是否在服务端过滤数据。参考cassandra文档中ALLOW FILTERING关键字的相关描述。
* 必选:否
* 默认值:无
* **consistancyLevel**
* 描述:数据一致性级别。可选ONE|QUORUM|LOCAL_QUORUM|EACH_QUORUM|ALL|ANY|TWO|THREE|LOCAL_ONE
* 必选:否
* 默认值:LOCAL_QUORUM
### 3.3 类型转换
目前CassandraReader支持除counter和Custom类型之外的所有类型。
下面列出CassandraReader针对Cassandra类型转换列表:
| DataX 内部类型| Cassandra 数据类型 |
| -------- | ----- |
| Long |int, tinyint, smallint,varint,bigint,time|
| Double |float, double, decimal|
| String |ascii,varchar, text,uuid,timeuuid,duration,list,map,set,tuple,udt,inet |
| Date |date, timestamp |
| Boolean |bool |
| Bytes |blob |
请注意:
* 目前不支持counter类型和custom类型。
## 4 性能报告
略
## 5 约束限制
### 5.1 主备同步数据恢复问题
略
## 6 FAQ
================================================
FILE: cassandrareader/pom.xml
================================================
4.0.0com.alibaba.dataxdatax-all0.0.1-SNAPSHOTcassandrareadercassandrareaderjarcom.alibaba.dataxdatax-common${datax-project-version}slf4j-log4j12org.slf4jorg.slf4jslf4j-apich.qos.logbacklogback-classiccom.datastax.cassandracassandra-driver-core3.7.2shadedcom.google.guavaguavacom.google.guavaguava16.0.1commons-codeccommons-codec1.9junitjunittestcom.alibaba.dataxdatax-core${datax-project-version}com.alibaba.dataxdatax-service-faceorg.apache.hadoophadoop-commonorg.apache.hivehive-execorg.apache.hivehive-serdejavolutionjavolutiontestorg.mockitomockito-all1.9.5testmaven-compiler-plugin${jdk-version}${jdk-version}${project-sourceEncoding}maven-assembly-pluginsrc/main/assembly/package.xmldataxdwzippackagesingle
================================================
FILE: cassandrareader/src/main/assembly/package.xml
================================================
dirfalsesrc/main/resourcesplugin.jsonplugin_job_template.jsonplugin/reader/cassandrareadertarget/cassandrareader-0.0.1-SNAPSHOT.jarplugin/reader/cassandrareaderfalseplugin/reader/cassandrareader/libsruntime
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/CassandraReader.java
================================================
package com.alibaba.datax.plugin.reader.cassandrareader;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.driver.core.ResultSet;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.SimpleStatement;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public class CassandraReader extends Reader {
private static final Logger LOG = LoggerFactory
.getLogger(CassandraReader.class);
public static class Job extends Reader.Job {
private Configuration jobConfig = null;
private Cluster cluster = null;
@Override public void init() {
this.jobConfig = super.getPluginJobConf();
this.jobConfig = super.getPluginJobConf();
String username = jobConfig.getString(Key.USERNAME);
String password = jobConfig.getString(Key.PASSWORD);
String hosts = jobConfig.getString(Key.HOST);
Integer port = jobConfig.getInt(Key.PORT,9042);
boolean useSSL = jobConfig.getBool(Key.USESSL);
if ((username != null) && !username.isEmpty()) {
Cluster.Builder clusterBuilder = Cluster.builder().withCredentials(username, password)
.withPort(Integer.valueOf(port)).addContactPoints(hosts.split(","));
if (useSSL) {
clusterBuilder = clusterBuilder.withSSL();
}
cluster = clusterBuilder.build();
} else {
cluster = Cluster.builder().withPort(Integer.valueOf(port))
.addContactPoints(hosts.split(",")).build();
}
CassandraReaderHelper.checkConfig(jobConfig,cluster);
}
@Override public void destroy() {
}
@Override public List split(int adviceNumber) {
List splittedConfigs = CassandraReaderHelper.splitJob(adviceNumber,jobConfig,cluster);
return splittedConfigs;
}
}
public static class Task extends Reader.Task {
private Configuration taskConfig;
private Cluster cluster = null;
private Session session = null;
private String queryString = null;
private ConsistencyLevel consistencyLevel;
private int columnNumber = 0;
private List columnMeta = null;
@Override public void init() {
this.taskConfig = super.getPluginJobConf();
String username = taskConfig.getString(Key.USERNAME);
String password = taskConfig.getString(Key.PASSWORD);
String hosts = taskConfig.getString(Key.HOST);
Integer port = taskConfig.getInt(Key.PORT);
boolean useSSL = taskConfig.getBool(Key.USESSL);
String keyspace = taskConfig.getString(Key.KEYSPACE);
this.columnMeta = taskConfig.getList(Key.COLUMN,String.class);
columnNumber = columnMeta.size();
if ((username != null) && !username.isEmpty()) {
Cluster.Builder clusterBuilder = Cluster.builder().withCredentials(username, password)
.withPort(Integer.valueOf(port)).addContactPoints(hosts.split(","));
if (useSSL) {
clusterBuilder = clusterBuilder.withSSL();
}
cluster = clusterBuilder.build();
} else {
cluster = Cluster.builder().withPort(Integer.valueOf(port))
.addContactPoints(hosts.split(",")).build();
}
session = cluster.connect(keyspace);
String cl = taskConfig.getString(Key.CONSITANCY_LEVEL);
if( cl != null && !cl.isEmpty() ) {
consistencyLevel = ConsistencyLevel.valueOf(cl);
} else {
consistencyLevel = ConsistencyLevel.LOCAL_QUORUM;
}
queryString = CassandraReaderHelper.getQueryString(taskConfig,cluster);
LOG.info("query = " + queryString);
}
@Override public void startRead(RecordSender recordSender) {
ResultSet r = session.execute(new SimpleStatement(queryString).setConsistencyLevel(consistencyLevel));
for (Row row : r ) {
Record record = recordSender.createRecord();
record = CassandraReaderHelper.buildRecord(record,row,r.getColumnDefinitions(),columnNumber,
super.getTaskPluginCollector());
if( record != null )
recordSender.sendToWriter(record);
}
}
@Override public void destroy() {
}
}
}
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/CassandraReaderErrorCode.java
================================================
package com.alibaba.datax.plugin.reader.cassandrareader;
import com.alibaba.datax.common.spi.ErrorCode;
public enum CassandraReaderErrorCode implements ErrorCode {
CONF_ERROR("CassandraReader-00", "配置错误."),
;
private final String code;
private final String description;
private CassandraReaderErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s]. ", this.code,
this.description);
}
}
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/CassandraReaderHelper.java
================================================
package com.alibaba.datax.plugin.reader.cassandrareader;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.InetAddress;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.alibaba.datax.common.element.BoolColumn;
import com.alibaba.datax.common.element.BytesColumn;
import com.alibaba.datax.common.element.DateColumn;
import com.alibaba.datax.common.element.DoubleColumn;
import com.alibaba.datax.common.element.LongColumn;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.TaskPluginCollector;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.fastjson2.JSON;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.CodecRegistry;
import com.datastax.driver.core.ColumnDefinitions;
import com.datastax.driver.core.ColumnMetadata;
import com.datastax.driver.core.DataType;
import com.datastax.driver.core.Duration;
import com.datastax.driver.core.LocalDate;
import com.datastax.driver.core.Row;
import com.datastax.driver.core.TableMetadata;
import com.datastax.driver.core.TupleType;
import com.datastax.driver.core.TupleValue;
import com.datastax.driver.core.UDTValue;
import com.datastax.driver.core.UserType;
import com.google.common.reflect.TypeToken;
import org.apache.commons.codec.binary.Base64;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Created by mazhenlin on 2019/8/21.
*/
public class CassandraReaderHelper {
static CodecRegistry registry = new CodecRegistry();
private static final Logger LOG = LoggerFactory
.getLogger(CassandraReader.class);
static class TypeNotSupported extends Exception{}
static String toJSonString(Object o, DataType type ) throws Exception{
if( o == null ) return JSON.toJSONString(null);
switch (type.getName()) {
case LIST:
case MAP:
case SET:
case TUPLE:
case UDT:
return JSON.toJSONString(transferObjectForJson(o,type));
default:
return JSON.toJSONString(o);
}
}
static Object transferObjectForJson(Object o,DataType type) throws TypeNotSupported{
if( o == null ) return o;
switch (type.getName()) {
case ASCII:
case TEXT:
case VARCHAR:
case BOOLEAN:
case SMALLINT:
case TINYINT:
case INT:
case BIGINT:
case VARINT:
case FLOAT:
case DOUBLE:
case DECIMAL:
case UUID:
case TIMEUUID:
case TIME:
return o;
case BLOB:
ByteBuffer byteBuffer = (ByteBuffer)o;
String s = Base64.encodeBase64String(
Arrays.copyOfRange(byteBuffer.array(),byteBuffer.position(),
byteBuffer.limit()));
return s;
case DATE:
return ((LocalDate)o).getMillisSinceEpoch();
case TIMESTAMP:
return ((Date)o).getTime();
case DURATION:
return o.toString();
case INET:
return ((InetAddress)o).getHostAddress();
case LIST: {
return transferListForJson((List)o,type.getTypeArguments().get(0));
}
case MAP: {
DataType keyType = type.getTypeArguments().get(0);
DataType valType = type.getTypeArguments().get(1);
return transferMapForJson((Map)o,keyType,valType);
}
case SET: {
return transferSetForJson((Set)o, type.getTypeArguments().get(0));
}
case TUPLE: {
return transferTupleForJson((TupleValue)o,((TupleType)type).getComponentTypes());
}
case UDT: {
return transferUDTForJson((UDTValue)o);
}
default:
throw new TypeNotSupported();
}
}
static List transferListForJson(List clist, DataType eleType) throws TypeNotSupported {
List result = new ArrayList();
switch (eleType.getName()) {
case ASCII:
case TEXT:
case VARCHAR:
case BOOLEAN:
case SMALLINT:
case TINYINT:
case INT:
case BIGINT:
case VARINT:
case FLOAT:
case DOUBLE:
case DECIMAL:
case TIME:
case UUID:
case TIMEUUID:
return clist;
case BLOB:
case DATE:
case TIMESTAMP:
case DURATION:
case INET:
case LIST:
case MAP:
case SET:
case TUPLE:
case UDT:
for (Object item : clist) {
Object newItem = transferObjectForJson(item, eleType);
result.add(newItem);
}
break;
default:
throw new TypeNotSupported();
}
return result;
}
static Set transferSetForJson(Set cset,DataType eleType) throws TypeNotSupported{
Set result = new HashSet();
switch (eleType.getName()) {
case ASCII:
case TEXT:
case VARCHAR:
case BOOLEAN:
case SMALLINT:
case TINYINT:
case INT:
case BIGINT:
case VARINT:
case FLOAT:
case DOUBLE:
case DECIMAL:
case TIME:
case UUID:
case TIMEUUID:
return cset;
case BLOB:
case DATE:
case TIMESTAMP:
case DURATION:
case INET:
case LIST:
case MAP:
case SET:
case TUPLE:
case UDT:
for (Object item : cset) {
Object newItem = transferObjectForJson(item,eleType);
result.add(newItem);
}
break;
default:
throw new TypeNotSupported();
}
return result;
}
static Map transferMapForJson(Map cmap,DataType keyType,DataType valueType) throws TypeNotSupported {
Map newMap = new HashMap();
for( Object e : cmap.entrySet() ) {
Object k = ((Map.Entry)e).getKey();
Object v = ((Map.Entry)e).getValue();
Object newKey = transferObjectForJson(k,keyType);
Object newValue = transferObjectForJson(v,valueType);
if( !(newKey instanceof String) ) {
newKey = JSON.toJSONString(newKey);
}
newMap.put(newKey,newValue);
}
return newMap;
}
static List transferTupleForJson(TupleValue tupleValue,List componentTypes) throws TypeNotSupported {
List l = new ArrayList();
for (int j = 0; j < componentTypes.size(); j++ ) {
DataType dataType = componentTypes.get(j);
TypeToken> eltClass = registry.codecFor(dataType).getJavaType();
Object ele = tupleValue.get(j,eltClass);
l.add(transferObjectForJson(ele,dataType));
}
return l;
}
static Map transferUDTForJson(UDTValue udtValue) throws TypeNotSupported {
Map newMap = new HashMap();
int j = 0;
for (UserType.Field f : udtValue.getType()) {
DataType dataType = f.getType();
TypeToken> eltClass = registry.codecFor(dataType).getJavaType();
Object ele = udtValue.get(j, eltClass);
newMap.put(f.getName(),transferObjectForJson(ele,dataType));
j++;
}
return newMap;
}
static Record buildRecord(Record record, Row rs, ColumnDefinitions metaData, int columnNumber,
TaskPluginCollector taskPluginCollector) {
try {
for (int i = 0; i < columnNumber; i++)
try {
if (rs.isNull(i)) {
record.addColumn(new StringColumn());
continue;
}
switch (metaData.getType(i).getName()) {
case ASCII:
case TEXT:
case VARCHAR:
record.addColumn(new StringColumn(rs.getString(i)));
break;
case BLOB:
record.addColumn(new BytesColumn(rs.getBytes(i).array()));
break;
case BOOLEAN:
record.addColumn(new BoolColumn(rs.getBool(i)));
break;
case SMALLINT:
record.addColumn(new LongColumn((int)rs.getShort(i)));
break;
case TINYINT:
record.addColumn(new LongColumn((int)rs.getByte(i)));
break;
case INT:
record.addColumn(new LongColumn(rs.getInt(i)));
break;
case COUNTER:
case BIGINT:
record.addColumn(new LongColumn(rs.getLong(i)));
break;
case VARINT:
record.addColumn(new LongColumn(rs.getVarint(i)));
break;
case FLOAT:
record.addColumn(new DoubleColumn(rs.getFloat(i)));
break;
case DOUBLE:
record.addColumn(new DoubleColumn(rs.getDouble(i)));
break;
case DECIMAL:
record.addColumn(new DoubleColumn(rs.getDecimal(i)));
break;
case DATE:
record.addColumn(new DateColumn(rs.getDate(i).getMillisSinceEpoch()));
break;
case TIME:
record.addColumn(new LongColumn(rs.getTime(i)));
break;
case TIMESTAMP:
record.addColumn(new DateColumn(rs.getTimestamp(i)));
break;
case UUID:
case TIMEUUID:
record.addColumn(new StringColumn(rs.getUUID(i).toString()));
break;
case INET:
record.addColumn(new StringColumn(rs.getInet(i).getHostAddress()));
break;
case DURATION:
record.addColumn(new StringColumn(rs.get(i,Duration.class).toString()));
break;
case LIST: {
TypeToken listEltClass = registry.codecFor(metaData.getType(i).getTypeArguments().get(0)).getJavaType();
List> l = rs.getList(i, listEltClass);
record.addColumn(new StringColumn(toJSonString(l,metaData.getType(i))));
}
break;
case MAP: {
DataType keyType = metaData.getType(i).getTypeArguments().get(0);
DataType valType = metaData.getType(i).getTypeArguments().get(1);
TypeToken> keyEltClass = registry.codecFor(keyType).getJavaType();
TypeToken> valEltClass = registry.codecFor(valType).getJavaType();
Map,?> m = rs.getMap(i, keyEltClass, valEltClass);
record.addColumn(new StringColumn(toJSonString(m,metaData.getType(i))));
}
break;
case SET: {
TypeToken> setEltClass = registry.codecFor(metaData.getType(i).getTypeArguments().get(0))
.getJavaType();
Set> set = rs.getSet(i, setEltClass);
record.addColumn(new StringColumn(toJSonString(set,metaData.getType(i))));
}
break;
case TUPLE: {
TupleValue t = rs.getTupleValue(i);
record.addColumn(new StringColumn(toJSonString(t,metaData.getType(i))));
}
break;
case UDT: {
UDTValue t = rs.getUDTValue(i);
record.addColumn(new StringColumn(toJSonString(t,metaData.getType(i))));
}
break;
default:
throw DataXException
.asDataXException(
CassandraReaderErrorCode.CONF_ERROR,
String.format(
"您的配置文件中的列配置信息有误. 因为DataX 不支持数据库读取这种字段类型. 字段名:[%s], "
+ "字段类型:[%s]. ",
metaData.getName(i),
metaData.getType(i)));
}
} catch (TypeNotSupported t) {
throw DataXException
.asDataXException(
CassandraReaderErrorCode.CONF_ERROR,
String.format(
"您的配置文件中的列配置信息有误. 因为DataX 不支持数据库读取这种字段类型. 字段名:[%s], "
+ "字段类型:[%s]. ",
metaData.getName(i),
metaData.getType(i)));
}
} catch (Exception e) {
//TODO 这里识别为脏数据靠谱吗?
taskPluginCollector.collectDirtyRecord(record, e);
if (e instanceof DataXException) {
throw (DataXException) e;
}
return null;
}
return record;
}
public static List splitJob(int adviceNumber,Configuration jobConfig,Cluster cluster) {
List splittedConfigs = new ArrayList();
if( adviceNumber <= 1 ) {
splittedConfigs.add(jobConfig);
return splittedConfigs;
}
String where = jobConfig.getString(Key.WHERE);
if(where != null && where.toLowerCase().contains("token(")) {
splittedConfigs.add(jobConfig);
return splittedConfigs;
}
String partitioner = cluster.getMetadata().getPartitioner();
if( partitioner.endsWith("RandomPartitioner")) {
BigDecimal minToken = BigDecimal.valueOf(-1);
BigDecimal maxToken = new BigDecimal(new BigInteger("2").pow(127));
BigDecimal step = maxToken.subtract(minToken)
.divide(BigDecimal.valueOf(adviceNumber),2, BigDecimal.ROUND_HALF_EVEN);
for ( int i = 0; i < adviceNumber; i++ ) {
BigInteger l = minToken.add(step.multiply(BigDecimal.valueOf(i))).toBigInteger();
BigInteger r = minToken.add(step.multiply(BigDecimal.valueOf(i+1))).toBigInteger();
if( i == adviceNumber - 1 ) {
r = maxToken.toBigInteger();
}
Configuration taskConfig = jobConfig.clone();
taskConfig.set(Key.MIN_TOKEN,l.toString());
taskConfig.set(Key.MAX_TOKEN,r.toString());
splittedConfigs.add(taskConfig);
}
}
else if( partitioner.endsWith("Murmur3Partitioner") ) {
BigDecimal minToken = BigDecimal.valueOf(Long.MIN_VALUE);
BigDecimal maxToken = BigDecimal.valueOf(Long.MAX_VALUE);
BigDecimal step = maxToken.subtract(minToken)
.divide(BigDecimal.valueOf(adviceNumber),2, BigDecimal.ROUND_HALF_EVEN);
for ( int i = 0; i < adviceNumber; i++ ) {
long l = minToken.add(step.multiply(BigDecimal.valueOf(i))).longValue();
long r = minToken.add(step.multiply(BigDecimal.valueOf(i+1))).longValue();
if( i == adviceNumber - 1 ) {
r = maxToken.longValue();
}
Configuration taskConfig = jobConfig.clone();
taskConfig.set(Key.MIN_TOKEN,String.valueOf(l));
taskConfig.set(Key.MAX_TOKEN,String.valueOf(r));
splittedConfigs.add(taskConfig);
}
}
else {
splittedConfigs.add(jobConfig);
}
return splittedConfigs;
}
public static String getQueryString(Configuration taskConfig,Cluster cluster) {
List columnMeta = taskConfig.getList(Key.COLUMN,String.class);
String keyspace = taskConfig.getString(Key.KEYSPACE);
String table = taskConfig.getString(Key.TABLE);
StringBuilder columns = new StringBuilder();
for( String column : columnMeta ) {
if(columns.length() > 0 ) {
columns.append(",");
}
columns.append(column);
}
StringBuilder where = new StringBuilder();
String whereString = taskConfig.getString(Key.WHERE);
if( whereString != null && !whereString.isEmpty() ) {
where.append(whereString);
}
String minToken = taskConfig.getString(Key.MIN_TOKEN);
String maxToken = taskConfig.getString(Key.MAX_TOKEN);
if( minToken !=null || maxToken !=null ) {
LOG.info("range:" + minToken + "~" + maxToken);
List pks = cluster.getMetadata().getKeyspace(keyspace).getTable(table).getPartitionKey();
StringBuilder sb = new StringBuilder();
for( ColumnMetadata pk : pks ) {
if( sb.length() > 0 ) {
sb.append(",");
}
sb.append(pk.getName());
}
String s = sb.toString();
if (minToken != null && !minToken.isEmpty()) {
if( where.length() > 0 ){
where.append(" AND ");
}
where.append("token(").append(s).append(")").append(" > ").append(minToken);
}
if (maxToken != null && !maxToken.isEmpty()) {
if( where.length() > 0 ){
where.append(" AND ");
}
where.append("token(").append(s).append(")").append(" <= ").append(maxToken);
}
}
boolean allowFiltering = taskConfig.getBool(Key.ALLOW_FILTERING,false);
StringBuilder select = new StringBuilder();
select.append("SELECT ").append(columns.toString()).append(" FROM ").append(table);
if( where.length() > 0 ){
select.append(" where ").append(where.toString());
}
if( allowFiltering ) {
select.append(" ALLOW FILTERING");
}
select.append(";");
return select.toString();
}
public static void checkConfig(Configuration jobConfig,Cluster cluster) {
ensureStringExists(jobConfig,Key.HOST);
ensureStringExists(jobConfig,Key.KEYSPACE);
ensureStringExists(jobConfig,Key.TABLE);
ensureExists(jobConfig,Key.COLUMN);
///keyspace,table是否存在
String keyspace = jobConfig.getString(Key.KEYSPACE);
if( cluster.getMetadata().getKeyspace(keyspace) == null ) {
throw DataXException
.asDataXException(
CassandraReaderErrorCode.CONF_ERROR,
String.format(
"配置信息有错误.keyspace'%s'不存在 .",
keyspace));
}
String table = jobConfig.getString(Key.TABLE);
TableMetadata tableMetadata = cluster.getMetadata().getKeyspace(keyspace).getTable(table);
if( tableMetadata == null ) {
throw DataXException
.asDataXException(
CassandraReaderErrorCode.CONF_ERROR,
String.format(
"配置信息有错误.表'%s'不存在 .",
table));
}
List columns = jobConfig.getList(Key.COLUMN,String.class);
for( String name : columns ) {
if( name == null || name.isEmpty() ) {
throw DataXException
.asDataXException(
CassandraReaderErrorCode.CONF_ERROR,
String.format(
"配置信息有错误.列信息中需要包含'%s'字段 .",Key.COLUMN_NAME));
}
}
}
static void ensureExists(Configuration jobConfig,String keyword) {
if( jobConfig.get(keyword) == null ) {
throw DataXException
.asDataXException(
CassandraReaderErrorCode.CONF_ERROR,
String.format(
"配置信息有错误.参数'%s'为必填项 .",
keyword));
}
}
static void ensureStringExists(Configuration jobConfig,String keyword) {
ensureExists(jobConfig,keyword);
if( jobConfig.getString(keyword).isEmpty() ) {
throw DataXException
.asDataXException(
CassandraReaderErrorCode.CONF_ERROR,
String.format(
"配置信息有错误.参数'%s'不能为空 .",
keyword));
}
}
}
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/Key.java
================================================
package com.alibaba.datax.plugin.reader.cassandrareader;
/**
* Created by mazhenlin on 2019/8/19.
*/
public class Key {
public final static String USERNAME = "username";
public final static String PASSWORD = "password";
public final static String HOST = "host";
public final static String PORT = "port";
public final static String USESSL = "useSSL";
public final static String KEYSPACE = "keyspace";
public final static String TABLE = "table";
public final static String COLUMN = "column";
public final static String WHERE = "where";
public final static String ALLOW_FILTERING = "allowFiltering";
public final static String CONSITANCY_LEVEL = "consistancyLevel";
public final static String MIN_TOKEN = "minToken";
public final static String MAX_TOKEN = "maxToken";
/**
* 每个列的名字
*/
public static final String COLUMN_NAME = "name";
/**
* 列分隔符
*/
public static final String COLUMN_SPLITTER = "format";
public static final String WRITE_TIME = "writetime(";
public static final String ELEMENT_SPLITTER = "splitter";
public static final String ENTRY_SPLITTER = "entrySplitter";
public static final String KV_SPLITTER = "kvSplitter";
public static final String ELEMENT_CONFIG = "element";
public static final String TUPLE_CONNECTOR = "_";
public static final String KEY_CONFIG = "key";
public static final String VALUE_CONFIG = "value";
}
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/LocalStrings.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/LocalStrings_en_US.properties
================================================
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/LocalStrings_ja_JP.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/LocalStrings_zh_CN.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/LocalStrings_zh_HK.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF
================================================
FILE: cassandrareader/src/main/java/com/alibaba/datax/plugin/reader/cassandrareader/LocalStrings_zh_TW.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF
================================================
FILE: cassandrareader/src/main/resources/plugin.json
================================================
{
"name": "cassandrareader",
"class": "com.alibaba.datax.plugin.reader.cassandrareader.CassandraReader",
"description": "useScene: prod. mechanism: execute select cql, retrieve data from the ResultSet. warn: The more you know about the database, the less problems you encounter.",
"developer": "alibaba"
}
================================================
FILE: cassandrareader/src/main/resources/plugin_job_template.json
================================================
{
"name": "cassandrareader",
"parameter": {
"username": "",
"password": "",
"host": "",
"port": "",
"useSSL": false,
"keyspace": "",
"table": "",
"column": [
"c1","c2","c3"
]
}
}
================================================
FILE: cassandrawriter/doc/cassandrawriter.md
================================================
# CassandraWriter 插件文档
___
## 1 快速介绍
CassandraWriter插件实现了向Cassandra写入数据。在底层实现上,CassandraWriter通过datastax的java driver连接Cassandra实例,并执行相应的cql语句将数据写入cassandra中。
## 2 实现原理
简而言之,CassandraWriter通过java driver连接到Cassandra实例,并根据用户配置的信息生成INSERT CQL语句,然后发送到Cassandra。
对于用户配置Table、Column的信息,CassandraReader将其拼接为CQL语句发送到Cassandra。
## 3 功能说明
### 3.1 配置样例
* 配置一个从内存产生到Cassandra导入的作业:
```
{
"job": {
"setting": {
"speed": {
"channel": 5
}
},
"content": [
{
"reader": {
"name": "streamreader",
"parameter": {
"column": [
{"value":"name","type": "string"},
{"value":"false","type":"bool"},
{"value":"1988-08-08 08:08:08","type":"date"},
{"value":"addr","type":"bytes"},
{"value":1.234,"type":"double"},
{"value":12345678,"type":"long"},
{"value":2.345,"type":"double"},
{"value":3456789,"type":"long"},
{"value":"4a0ef8c0-4d97-11d0-db82-ebecdb03ffa5","type":"string"},
{"value":"value","type":"bytes"},
{"value":"-838383838,37377373,-383883838,27272772,393993939,-38383883,83883838,-1350403181,817650816,1630642337,251398784,-622020148","type":"string"},
],
"sliceRecordCount": 10000000
}
},
"writer": {
"name": "cassandrawriter",
"parameter": {
"host": "localhost",
"port": 9042,
"useSSL": false,
"keyspace": "stresscql",
"table": "dst",
"batchSize":10,
"column": [
"name",
"choice",
"date",
"address",
"dbl",
"lval",
"fval",
"ival",
"uid",
"value",
"listval"
]
}
}
}
]
}
}
```
### 3.2 参数说明
* **host**
* 描述:Cassandra连接点的域名或ip,多个node之间用逗号分隔。
* 必选:是
* 默认值:无
* **port**
* 描述:Cassandra端口。
* 必选:是
* 默认值:9042
* **username**
* 描述:数据源的用户名
* 必选:否
* 默认值:无
* **password**
* 描述:数据源指定用户名的密码
* 必选:否
* 默认值:无
* **useSSL**
* 描述:是否使用SSL连接。
* 必选:否
* 默认值:false
* **connectionsPerHost**
* 描述:客户端连接池配置:与服务器每个节点建多少个连接。
* 必选:否
* 默认值:8
* **maxPendingPerConnection**
* 描述:客户端连接池配置:每个连接最大请求数。
* 必选:否
* 默认值:128
* **keyspace**
* 描述:需要同步的表所在的keyspace。
* 必选:是
* 默认值:无
* **table**
* 描述:所选取的需要同步的表。
* 必选:是
* 默认值:无
* **column**
* 描述:所配置的表中需要同步的列集合。
内容可以是列的名称或"writetime()"。如果将列名配置为writetime(),会将这一列的内容作为时间戳。
* 必选:是
* 默认值:无
* **consistancyLevel**
* 描述:数据一致性级别。可选ONE|QUORUM|LOCAL_QUORUM|EACH_QUORUM|ALL|ANY|TWO|THREE|LOCAL_ONE
* 必选:否
* 默认值:LOCAL_QUORUM
* **batchSize**
* 描述:一次批量提交(UNLOGGED BATCH)的记录数大小(条数)。注意batch的大小有如下限制:
(1)不能超过65535。
(2) batch中的内容大小受到服务器端batch_size_fail_threshold_in_kb的限制。
(3) 如果batch中的内容超过了batch_size_warn_threshold_in_kb的限制,会打出warn日志,但并不影响写入,忽略即可。
如果批量提交失败,会把这个批量的所有内容重新逐条写入一遍。
* 必选:否
* 默认值:1
### 3.3 类型转换
目前CassandraReader支持除counter和Custom类型之外的所有类型。
下面列出CassandraReader针对Cassandra类型转换列表:
| DataX 内部类型| Cassandra 数据类型 |
| -------- | ----- |
| Long |int, tinyint, smallint,varint,bigint,time|
| Double |float, double, decimal|
| String |ascii,varchar, text,uuid,timeuuid,duration,list,map,set,tuple,udt,inet |
| Date |date, timestamp |
| Boolean |bool |
| Bytes |blob |
请注意:
* 目前不支持counter类型和custom类型。
## 4 性能报告
略
## 5 约束限制
### 5.1 主备同步数据恢复问题
略
## 6 FAQ
================================================
FILE: cassandrawriter/pom.xml
================================================
datax-allcom.alibaba.datax0.0.1-SNAPSHOT4.0.0cassandrawritercassandrawriter0.0.1-SNAPSHOTjarcom.alibaba.dataxdatax-common${datax-project-version}slf4j-log4j12org.slf4jcom.datastax.cassandracassandra-driver-core3.7.2commons-codeccommons-codec1.9junitjunittestcom.alibaba.dataxdatax-core${datax-project-version}com.alibaba.dataxdatax-service-faceorg.apache.hadoophadoop-commonorg.apache.hivehive-execorg.apache.hivehive-serdejavolutionjavolutiontestorg.mockitomockito-all1.9.5testsrc/main/java**/*.propertiesmaven-compiler-plugin${jdk-version}${jdk-version}${project-sourceEncoding}maven-assembly-pluginsrc/main/assembly/package.xmldataxdwzippackagesingle
================================================
FILE: cassandrawriter/src/main/assembly/package.xml
================================================
dirfalsesrc/main/resourcesplugin.jsonplugin_job_template.jsonplugin/writer/cassandrawritertarget/cassandrawriter-0.0.1-SNAPSHOT.jarplugin/writer/cassandrawriterfalseplugin/writer/cassandrawriter/libsruntime
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/CassandraWriter.java
================================================
package com.alibaba.datax.plugin.writer.cassandrawriter;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import com.datastax.driver.core.BatchStatement;
import com.datastax.driver.core.BatchStatement.Type;
import com.datastax.driver.core.BoundStatement;
import com.datastax.driver.core.Cluster;
import com.datastax.driver.core.ColumnMetadata;
import com.datastax.driver.core.ConsistencyLevel;
import com.datastax.driver.core.DataType;
import com.datastax.driver.core.HostDistance;
import com.datastax.driver.core.PoolingOptions;
import com.datastax.driver.core.PreparedStatement;
import com.datastax.driver.core.ResultSetFuture;
import com.datastax.driver.core.Session;
import com.datastax.driver.core.TableMetadata;
import com.datastax.driver.core.querybuilder.Insert;
import com.datastax.driver.core.querybuilder.QueryBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.datastax.driver.core.querybuilder.QueryBuilder.timestamp;
/**
* Created by mazhenlin on 2019/8/19.
*/
public class CassandraWriter extends Writer {
private static final Logger LOG = LoggerFactory
.getLogger(CassandraWriter.class);
public static class Job extends Writer.Job {
private Configuration originalConfig = null;
@Override public List split(int mandatoryNumber) {
List splitResultConfigs = new ArrayList();
for (int j = 0; j < mandatoryNumber; j++) {
splitResultConfigs.add(originalConfig.clone());
}
return splitResultConfigs;
}
@Override public void init() {
originalConfig = getPluginJobConf();
}
@Override public void destroy() {
}
}
public static class Task extends Writer.Task {
private Configuration taskConfig;
private Cluster cluster = null;
private Session session = null;
private PreparedStatement statement = null;
private int columnNumber = 0;
private List columnTypes;
private List columnMeta = null;
private int writeTimeCol = -1;
private boolean asyncWrite = false;
private long batchSize = 1;
private List unConfirmedWrite;
private List bufferedWrite;
@Override public void startWrite(RecordReceiver lineReceiver) {
try {
Record record;
while ((record = lineReceiver.getFromReader()) != null) {
if (record.getColumnNumber() != columnNumber) {
// 源头读取字段列数与目的表字段写入列数不相等,直接报错
throw DataXException
.asDataXException(
CassandraWriterErrorCode.CONF_ERROR,
String.format(
"列配置信息有错误. 因为您配置的任务中,源头读取字段数:%s 与 目的表要写入的字段数:%s 不相等. 请检查您的配置并作出修改.",
record.getColumnNumber(),
this.columnNumber));
}
BoundStatement boundStmt = statement.bind();
for (int i = 0; i < columnNumber; i++) {
if( writeTimeCol != -1 && i == writeTimeCol ) {
continue;
}
Column col = record.getColumn(i);
int pos = i;
if( writeTimeCol != -1 && pos > writeTimeCol ) {
pos = i - 1;
}
CassandraWriterHelper.setupColumn(boundStmt,pos,columnTypes.get(pos),col);
}
if(writeTimeCol != -1) {
Column col = record.getColumn(writeTimeCol );
boundStmt.setLong(columnNumber - 1,col.asLong());
}
if( batchSize <= 1 ) {
session.execute(boundStmt);
} else {
if( asyncWrite ) {
unConfirmedWrite.add(session.executeAsync(boundStmt));
if (unConfirmedWrite.size() >= batchSize) {
for (ResultSetFuture write : unConfirmedWrite) {
write.getUninterruptibly(10000, TimeUnit.MILLISECONDS);
}
unConfirmedWrite.clear();
}
} else {
bufferedWrite.add(boundStmt);
if( bufferedWrite.size() >= batchSize ) {
BatchStatement batchStatement = new BatchStatement(Type.UNLOGGED);
batchStatement.addAll(bufferedWrite);
try {
session.execute(batchStatement);
} catch (Exception e ) {
LOG.error("batch写入失败,尝试逐条写入.",e);
for( BoundStatement stmt: bufferedWrite ) {
session.execute(stmt);
}
}
///LOG.info("batch finished. size = " + bufferedWrite.size());
bufferedWrite.clear();
}
}
}
}
if( unConfirmedWrite != null && unConfirmedWrite.size() > 0 ) {
for( ResultSetFuture write : unConfirmedWrite ) {
write.getUninterruptibly(10000, TimeUnit.MILLISECONDS);
}
unConfirmedWrite.clear();
}
if( bufferedWrite !=null && bufferedWrite.size() > 0 ) {
BatchStatement batchStatement = new BatchStatement(Type.UNLOGGED);
batchStatement.addAll(bufferedWrite);
session.execute(batchStatement);
bufferedWrite.clear();
}
} catch (Exception e) {
throw DataXException.asDataXException(
CassandraWriterErrorCode.WRITE_DATA_ERROR, e);
}
}
@Override public void init() {
this.taskConfig = super.getPluginJobConf();
String username = taskConfig.getString(Key.USERNAME);
String password = taskConfig.getString(Key.PASSWORD);
String hosts = taskConfig.getString(Key.HOST);
Integer port = taskConfig.getInt(Key.PORT,9042);
boolean useSSL = taskConfig.getBool(Key.USESSL);
String keyspace = taskConfig.getString(Key.KEYSPACE);
String table = taskConfig.getString(Key.TABLE);
batchSize = taskConfig.getLong(Key.BATCH_SIZE,1);
this.columnMeta = taskConfig.getList(Key.COLUMN,String.class);
columnTypes = new ArrayList(columnMeta.size());
columnNumber = columnMeta.size();
asyncWrite = taskConfig.getBool(Key.ASYNC_WRITE,false);
int connectionsPerHost = taskConfig.getInt(Key.CONNECTIONS_PER_HOST,8);
int maxPendingPerConnection = taskConfig.getInt(Key.MAX_PENDING_CONNECTION,128);
PoolingOptions poolingOpts = new PoolingOptions()
.setConnectionsPerHost(HostDistance.LOCAL, connectionsPerHost, connectionsPerHost)
.setMaxRequestsPerConnection(HostDistance.LOCAL, maxPendingPerConnection)
.setNewConnectionThreshold(HostDistance.LOCAL, 100);
Cluster.Builder clusterBuilder = Cluster.builder().withPoolingOptions(poolingOpts);
if ((username != null) && !username.isEmpty()) {
clusterBuilder = clusterBuilder.withCredentials(username, password)
.withPort(Integer.valueOf(port)).addContactPoints(hosts.split(","));
if (useSSL) {
clusterBuilder = clusterBuilder.withSSL();
}
} else {
clusterBuilder = clusterBuilder.withPort(Integer.valueOf(port))
.addContactPoints(hosts.split(","));
}
cluster = clusterBuilder.build();
session = cluster.connect(keyspace);
TableMetadata meta = cluster.getMetadata().getKeyspace(keyspace).getTable(table);
Insert insertStmt = QueryBuilder.insertInto(table);
for( String colunmnName : columnMeta ) {
if( colunmnName.toLowerCase().equals(Key.WRITE_TIME) ) {
if( writeTimeCol != -1 ) {
throw DataXException
.asDataXException(
CassandraWriterErrorCode.CONF_ERROR,
"列配置信息有错误. 只能有一个时间戳列(writetime())");
}
writeTimeCol = columnTypes.size();
continue;
}
insertStmt.value(colunmnName,QueryBuilder.bindMarker());
ColumnMetadata col = meta.getColumn(colunmnName);
if( col == null ) {
throw DataXException
.asDataXException(
CassandraWriterErrorCode.CONF_ERROR,
String.format(
"列配置信息有错误. 表中未找到列名 '%s' .",
colunmnName));
}
columnTypes.add(col.getType());
}
if(writeTimeCol != -1) {
insertStmt.using(timestamp(QueryBuilder.bindMarker()));
}
String cl = taskConfig.getString(Key.CONSITANCY_LEVEL);
if( cl != null && !cl.isEmpty() ) {
insertStmt.setConsistencyLevel(ConsistencyLevel.valueOf(cl));
} else {
insertStmt.setConsistencyLevel(ConsistencyLevel.LOCAL_QUORUM);
}
statement = session.prepare(insertStmt);
if( batchSize > 1 ) {
if( asyncWrite ) {
unConfirmedWrite = new ArrayList();
} else {
bufferedWrite = new ArrayList();
}
}
}
@Override public void destroy() {
}
}
}
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/CassandraWriterErrorCode.java
================================================
package com.alibaba.datax.plugin.writer.cassandrawriter;
import com.alibaba.datax.common.spi.ErrorCode;
/**
* Created by mazhenlin on 2019/8/19.
*/
public enum CassandraWriterErrorCode implements ErrorCode {
CONF_ERROR("CassandraWriter-00", "配置错误."),
WRITE_DATA_ERROR("CassandraWriter-01", "写入数据时失败."),
;
private final String code;
private final String description;
private CassandraWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s].", this.code, this.description);
}
}
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/CassandraWriterHelper.java
================================================
package com.alibaba.datax.plugin.writer.cassandrawriter;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.InetAddress;
import java.nio.ByteBuffer;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import com.alibaba.fastjson2.JSONException;
import com.alibaba.fastjson2.JSONObject;
import com.datastax.driver.core.BoundStatement;
import com.datastax.driver.core.CodecRegistry;
import com.datastax.driver.core.DataType;
import com.datastax.driver.core.DataType.Name;
import com.datastax.driver.core.Duration;
import com.datastax.driver.core.LocalDate;
import com.datastax.driver.core.TupleType;
import com.datastax.driver.core.TupleValue;
import com.datastax.driver.core.UDTValue;
import com.datastax.driver.core.UserType;
import com.datastax.driver.core.UserType.Field;
import com.google.common.base.Splitter;
import org.apache.commons.codec.binary.Base64;
/**
* Created by mazhenlin on 2019/8/21.
*/
public class CassandraWriterHelper {
static CodecRegistry registry = new CodecRegistry();
public static Object parseFromString(String s, DataType sqlType ) throws Exception {
if (s == null || s.isEmpty()) {
if (sqlType.getName() == Name.ASCII || sqlType.getName() == Name.TEXT ||
sqlType.getName() == Name.VARCHAR) {
return s;
} else {
return null;
}
}
switch (sqlType.getName()) {
case ASCII:
case TEXT:
case VARCHAR:
return s;
case BLOB:
if (s.length() == 0) {
return new byte[0];
}
byte[] byteArray = new byte[s.length() / 2];
for (int i = 0; i < byteArray.length; i++) {
String subStr = s.substring(2 * i, 2 * i + 2);
byteArray[i] = ((byte) Integer.parseInt(subStr, 16));
}
return ByteBuffer.wrap(byteArray);
case BOOLEAN:
return Boolean.valueOf(s);
case TINYINT:
return Byte.valueOf(s);
case SMALLINT:
return Short.valueOf(s);
case INT:
return Integer.valueOf(s);
case BIGINT:
return Long.valueOf(s);
case VARINT:
return new BigInteger(s, 10);
case FLOAT:
return Float.valueOf(s);
case DOUBLE:
return Double.valueOf(s);
case DECIMAL:
return new BigDecimal(s);
case DATE: {
String[] a = s.split("-");
if (a.length != 3) {
throw new Exception(String.format("DATE类型数据 '%s' 格式不正确,必须为yyyy-mm-dd格式", s));
}
return LocalDate.fromYearMonthDay(Integer.valueOf(a[0]), Integer.valueOf(a[1]),
Integer.valueOf(a[2]));
}
case TIME:
return Long.valueOf(s);
case TIMESTAMP:
return new Date(Long.valueOf(s));
case UUID:
case TIMEUUID:
return UUID.fromString(s);
case INET:
String[] b = s.split("/");
if (b.length < 2) {
return InetAddress.getByName(s);
}
byte[] addr = InetAddress.getByName(b[1]).getAddress();
return InetAddress.getByAddress(b[0], addr);
case DURATION:
return Duration.from(s);
case LIST:
case MAP:
case SET:
case TUPLE:
case UDT:
Object jsonObject = JSON.parse(s);
return parseFromJson(jsonObject,sqlType);
default:
throw DataXException.asDataXException(CassandraWriterErrorCode.CONF_ERROR,
"不支持您配置的列类型:" + sqlType + ", 请检查您的配置 或者 联系 管理员.");
} // end switch
}
public static Object parseFromJson(Object jsonObject,DataType type) throws Exception {
if( jsonObject == null ) return null;
switch (type.getName()) {
case ASCII:
case TEXT:
case VARCHAR:
case BOOLEAN:
case TIME:
return jsonObject;
case TINYINT:
return ((Number)jsonObject).byteValue();
case SMALLINT:
return ((Number)jsonObject).shortValue();
case INT:
return ((Number)jsonObject).intValue();
case BIGINT:
return ((Number)jsonObject).longValue();
case VARINT:
return new BigInteger(jsonObject.toString());
case FLOAT:
return ((Number)jsonObject).floatValue();
case DOUBLE:
return ((Number)jsonObject).doubleValue();
case DECIMAL:
return new BigDecimal(jsonObject.toString());
case BLOB:
return ByteBuffer.wrap(Base64.decodeBase64((String)jsonObject));
case DATE:
return LocalDate.fromMillisSinceEpoch(((Number)jsonObject).longValue());
case TIMESTAMP:
return new Date(((Number)jsonObject).longValue());
case DURATION:
return Duration.from(jsonObject.toString());
case UUID:
case TIMEUUID:
return UUID.fromString(jsonObject.toString());
case INET:
return InetAddress.getByName((String)jsonObject);
case LIST:
List l = new ArrayList();
for( Object o : (JSONArray)jsonObject ) {
l.add(parseFromJson(o,type.getTypeArguments().get(0)));
}
return l;
case MAP: {
Map m = new HashMap();
for (Map.Entry e : ((JSONObject)jsonObject).entrySet()) {
Object k = parseFromString((String) e.getKey(), type.getTypeArguments().get(0));
Object v = parseFromJson(e.getValue(), type.getTypeArguments().get(1));
m.put(k,v);
}
return m;
}
case SET:
Set s = new HashSet();
for( Object o : (JSONArray)jsonObject ) {
s.add(parseFromJson(o,type.getTypeArguments().get(0)));
}
return s;
case TUPLE: {
TupleValue t = ((TupleType) type).newValue();
int j = 0;
for (Object e : (JSONArray)jsonObject) {
DataType eleType = ((TupleType) type).getComponentTypes().get(j);
t.set(j, parseFromJson(e, eleType), registry.codecFor(eleType).getJavaType());
j++;
}
return t;
}
case UDT: {
UDTValue t = ((UserType) type).newValue();
UserType userType = t.getType();
for (Map.Entry e : ((JSONObject)jsonObject).entrySet()) {
DataType eleType = userType.getFieldType((String)e.getKey());
t.set((String)e.getKey(), parseFromJson(e.getValue(), eleType), registry.codecFor(eleType).getJavaType());
}
return t;
}
}
return null;
}
public static void setupColumn(BoundStatement ps, int pos, DataType sqlType, Column col) throws Exception {
if (col.getRawData() != null) {
switch (sqlType.getName()) {
case ASCII:
case TEXT:
case VARCHAR:
ps.setString(pos, col.asString());
break;
case BLOB:
ps.setBytes(pos, ByteBuffer.wrap(col.asBytes()));
break;
case BOOLEAN:
ps.setBool(pos, col.asBoolean());
break;
case TINYINT:
ps.setByte(pos, col.asLong().byteValue());
break;
case SMALLINT:
ps.setShort(pos, col.asLong().shortValue());
break;
case INT:
ps.setInt(pos, col.asLong().intValue());
break;
case BIGINT:
ps.setLong(pos, col.asLong());
break;
case VARINT:
ps.setVarint(pos, col.asBigInteger());
break;
case FLOAT:
ps.setFloat(pos, col.asDouble().floatValue());
break;
case DOUBLE:
ps.setDouble(pos, col.asDouble());
break;
case DECIMAL:
ps.setDecimal(pos, col.asBigDecimal());
break;
case DATE:
ps.setDate(pos, LocalDate.fromMillisSinceEpoch(col.asDate().getTime()));
break;
case TIME:
ps.setTime(pos, col.asLong());
break;
case TIMESTAMP:
ps.setTimestamp(pos, col.asDate());
break;
case UUID:
case TIMEUUID:
ps.setUUID(pos, UUID.fromString(col.asString()));
break;
case INET:
ps.setInet(pos, InetAddress.getByName(col.asString()));
break;
case DURATION:
ps.set(pos, Duration.from(col.asString()), Duration.class);
break;
case LIST:
ps.setList(pos, (List>) parseFromString(col.asString(), sqlType));
break;
case MAP:
ps.setMap(pos, (Map) parseFromString(col.asString(), sqlType));
break;
case SET:
ps.setSet(pos, (Set) parseFromString(col.asString(), sqlType));
break;
case TUPLE:
ps.setTupleValue(pos, (TupleValue) parseFromString(col.asString(), sqlType));
break;
case UDT:
ps.setUDTValue(pos, (UDTValue) parseFromString(col.asString(), sqlType));
break;
default:
throw DataXException.asDataXException(CassandraWriterErrorCode.CONF_ERROR,
"不支持您配置的列类型:" + sqlType + ", 请检查您的配置 或者 联系 管理员.");
} // end switch
} else {
ps.setToNull(pos);
}
}
}
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/Key.java
================================================
package com.alibaba.datax.plugin.writer.cassandrawriter;
/**
* Created by mazhenlin on 2019/8/19.
*/
public class Key {
public final static String USERNAME = "username";
public final static String PASSWORD = "password";
public final static String HOST = "host";
public final static String PORT = "port";
public final static String USESSL = "useSSL";
public final static String KEYSPACE = "keyspace";
public final static String TABLE = "table";
public final static String COLUMN = "column";
public final static String WRITE_TIME = "writetime()";
public final static String ASYNC_WRITE = "asyncWrite";
public final static String CONSITANCY_LEVEL = "consistancyLevel";
public final static String CONNECTIONS_PER_HOST = "connectionsPerHost";
public final static String MAX_PENDING_CONNECTION = "maxPendingPerConnection";
/**
* 异步写入的批次大小,默认1(不异步写入)
*/
public final static String BATCH_SIZE = "batchSize";
/**
* 每个列的名字
*/
public static final String COLUMN_NAME = "name";
/**
* 列分隔符
*/
public static final String COLUMN_SPLITTER = "format";
public static final String ELEMENT_SPLITTER = "splitter";
public static final String ENTRY_SPLITTER = "entrySplitter";
public static final String KV_SPLITTER = "kvSplitter";
public static final String ELEMENT_CONFIG = "element";
public static final String TUPLE_CONNECTOR = "_";
public static final String KEY_CONFIG = "key";
public static final String VALUE_CONFIG = "value";
}
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/LocalStrings.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF.
errorcode.write_failed_exception=\u5199\u5165\u6570\u636E\u65F6\u5931\u8D25
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/LocalStrings_en_US.properties
================================================
errorcode.config_invalid_exception=Error in parameter configuration.
errorcode.write_failed_exception=\u5199\u5165\u6570\u636E\u65F6\u5931\u8D25
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/LocalStrings_ja_JP.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF.
errorcode.write_failed_exception=\u5199\u5165\u6570\u636E\u65F6\u5931\u8D25
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/LocalStrings_zh_CN.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF.
errorcode.write_failed_exception=\u5199\u5165\u6570\u636E\u65F6\u5931\u8D25
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/LocalStrings_zh_HK.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF.
errorcode.write_failed_exception=\u5199\u5165\u6570\u636E\u65F6\u5931\u8D25
================================================
FILE: cassandrawriter/src/main/java/com/alibaba/datax/plugin/writer/cassandrawriter/LocalStrings_zh_TW.properties
================================================
errorcode.config_invalid_exception=\u914D\u7F6E\u9519\u8BEF.
errorcode.write_failed_exception=\u5199\u5165\u6570\u636E\u65F6\u5931\u8D25
================================================
FILE: cassandrawriter/src/main/resources/plugin.json
================================================
{
"name": "cassandrawriter",
"class": "com.alibaba.datax.plugin.writer.cassandrawriter.CassandraWriter",
"description": "useScene: prod. mechanism: use datax driver, execute insert sql.",
"developer": "alibaba"
}
================================================
FILE: cassandrawriter/src/main/resources/plugin_job_template.json
================================================
{
"name": "cassandrawriter",
"parameter": {
"username": "",
"password": "",
"host": "",
"port": "",
"useSSL": false,
"keyspace": "",
"table": "",
"column": [
"c1","c2","c3"
]
}
}
================================================
FILE: clickhousereader/doc/clickhousereader.md
================================================
# ClickhouseReader 插件文档
___
## 1 快速介绍
ClickhouseReader插件实现了从Clickhouse读取数据。在底层实现上,ClickhouseReader通过JDBC连接远程Clickhouse数据库,并执行相应的sql语句将数据从Clickhouse库中SELECT出来。
## 2 实现原理
简而言之,ClickhouseReader通过JDBC连接器连接到远程的Clickhouse数据库,并根据用户配置的信息生成查询SELECT SQL语句并发送到远程Clickhouse数据库,并将该SQL执行返回结果使用DataX自定义的数据类型拼装为抽象的数据集,并传递给下游Writer处理。
对于用户配置Table、Column、Where的信息,ClickhouseReader将其拼接为SQL语句发送到Clickhouse数据库;对于用户配置querySql信息,Clickhouse直接将其发送到Clickhouse数据库。
## 3 功能说明
### 3.1 配置样例
* 配置一个从Clickhouse数据库同步抽取数据到本地的作业:
```
{
"job": {
"setting": {
"speed": {
//设置传输速度 byte/s 尽量逼近这个速度但是不高于它.
// channel 表示通道数量,byte表示通道速度,如果单通道速度1MB,配置byte为1048576表示一个channel
"byte": 1048576
},
//出错限制
"errorLimit": {
//先选择record
"record": 0,
//百分比 1表示100%
"percentage": 0.02
}
},
"content": [
{
"reader": {
"name": "clickhousereader",
"parameter": {
// 数据库连接用户名
"username": "root",
// 数据库连接密码
"password": "root",
"column": [
"id","name"
],
"connection": [
{
"table": [
"table"
],
"jdbcUrl": [
"jdbc:clickhouse://[HOST_NAME]:PORT/[DATABASE_NAME]"
]
}
]
}
},
"writer": {
//writer类型
"name": "streamwriter",
// 是否打印内容
"parameter": {
"print": true
}
}
}
]
}
}
```
* 配置一个自定义SQL的数据库同步任务到本地内容的作业:
```
{
"job": {
"setting": {
"speed": {
"channel": 5
}
},
"content": [
{
"reader": {
"name": "clickhousereader",
"parameter": {
"username": "root",
"password": "root",
"where": "",
"connection": [
{
"querySql": [
"select db_id,on_line_flag from db_info where db_id < 10"
],
"jdbcUrl": [
"jdbc:clickhouse://1.1.1.1:8123/default"
]
}
]
}
},
"writer": {
"name": "streamwriter",
"parameter": {
"visible": false,
"encoding": "UTF-8"
}
}
}
]
}
}
```
### 3.2 参数说明
* **jdbcUrl**
* 描述:描述的是到对端数据库的JDBC连接信息,使用JSON的数组描述,并支持一个库填写多个连接地址。之所以使用JSON数组描述连接信息,是因为阿里集团内部支持多个IP探测,如果配置了多个,ClickhouseReader可以依次探测ip的可连接性,直到选择一个合法的IP。如果全部连接失败,ClickhouseReader报错。 注意,jdbcUrl必须包含在connection配置单元中。对于阿里集团外部使用情况,JSON数组填写一个JDBC连接即可。
jdbcUrl按照Clickhouse官方规范,并可以填写连接附件控制信息。具体请参看[Clickhouse官方文档](https://clickhouse.com/docs/en/engines/table-engines/integrations/jdbc)。
* 必选:是
* 默认值:无
* **username**
* 描述:数据源的用户名
* 必选:是
* 默认值:无
* **password**
* 描述:数据源指定用户名的密码
* 必选:是
* 默认值:无
* **table**
* 描述:所选取的需要同步的表。使用JSON的数组描述,因此支持多张表同时抽取。当配置为多张表时,用户自己需保证多张表是同一schema结构,ClickhouseReader不予检查表是否同一逻辑表。注意,table必须包含在connection配置单元中。
* 必选:是
* 默认值:无
* **column**
* 描述:所配置的表中需要同步的列名集合,使用JSON的数组描述字段信息。用户使用\*代表默认使用所有列配置,例如['\*']。
支持列裁剪,即列可以挑选部分列进行导出。
支持列换序,即列可以不按照表schema信息进行导出。
支持常量配置,用户需要按照JSON格式:
["id", "`table`", "1", "'bazhen.csy'", "null", "to_char(a + 1)", "2.3" , "true"]
id为普通列名,\`table\`为包含保留在的列名,1为整形数字常量,'bazhen.csy'为字符串常量,null为空指针,to_char(a + 1)为表达式,2.3为浮点数,true为布尔值。
Column必须显示填写,不允许为空!
* 必选:是
* 默认值:无
* **splitPk**
* 描述:ClickhouseReader进行数据抽取时,如果指定splitPk,表示用户希望使用splitPk代表的字段进行数据分片,DataX因此会启动并发任务进行数据同步,这样可以大大提供数据同步的效能。
推荐splitPk用户使用表主键,因为表主键通常情况下比较均匀,因此切分出来的分片也不容易出现数据热点。
目前splitPk仅支持整形数据切分,`不支持浮点、日期等其他类型`。如果用户指定其他非支持类型,ClickhouseReader将报错!
splitPk如果不填写,将视作用户不对单表进行切分,ClickhouseReader使用单通道同步全量数据。
* 必选:否
* 默认值:无
* **where**
* 描述:筛选条件,MysqlReader根据指定的column、table、where条件拼接SQL,并根据这个SQL进行数据抽取。在实际业务场景中,往往会选择当天的数据进行同步,可以将where条件指定为gmt_create > $bizdate 。注意:不可以将where条件指定为limit 10,limit不是SQL的合法where子句。
where条件可以有效地进行业务增量同步。
* 必选:否
* 默认值:无
* **querySql**
* 描述:在有些业务场景下,where这一配置项不足以描述所筛选的条件,用户可以通过该配置型来自定义筛选SQL。当用户配置了这一项之后,DataX系统就会忽略table,column这些配置型,直接使用这个配置项的内容对数据进行筛选,例如需要进行多表join后同步数据,使用select a,b from table_a join table_b on table_a.id = table_b.id
`当用户配置querySql时,ClickhouseReader直接忽略table、column、where条件的配置`。
* 必选:否
* 默认值:无
* **fetchSize**
* 描述:该配置项定义了插件和数据库服务器端每次批量数据获取条数,该值决定了DataX和服务器端的网络交互次数,能够较大的提升数据抽取性能。
`注意,该值过大(>2048)可能造成DataX进程OOM。`。
* 必选:否
* 默认值:1024
* **session**
* 描述:控制写入数据的时间格式,时区等的配置,如果表中有时间字段,配置该值以明确告知写入 clickhouse 的时间格式。通常配置的参数为:NLS_DATE_FORMAT,NLS_TIME_FORMAT。其配置的值为 json 格式,例如:
```
"session": [
"alter session set NLS_DATE_FORMAT='yyyy-mm-dd hh24:mi:ss'",
"alter session set NLS_TIMESTAMP_FORMAT='yyyy-mm-dd hh24:mi:ss'",
"alter session set NLS_TIMESTAMP_TZ_FORMAT='yyyy-mm-dd hh24:mi:ss'",
"alter session set TIME_ZONE='US/Pacific'"
]
```
`(注意"是 " 的转义字符串)`。
* 必选:否
* 默认值:无
### 3.3 类型转换
目前ClickhouseReader支持大部分Clickhouse类型,但也存在部分个别类型没有支持的情况,请注意检查你的类型。
下面列出ClickhouseReader针对Clickhouse类型转换列表:
| DataX 内部类型| Clickhouse 数据类型 |
| -------- |--------------------------------------------------------------------------------------------|
| Long | UInt8, UInt16, UInt32, UInt64, UInt128, UInt256, Int8, Int16, Int32, Int64, Int128, Int256 |
| Double | Float32, Float64, Decimal |
| String | String, FixedString |
| Date | DATE, Date32, DateTime, DateTime64 |
| Boolean | Boolean |
| Bytes | BLOB,BFILE,RAW,LONG RAW |
请注意:
* `除上述罗列字段类型外,其他类型均不支持`。
## 4 性能报告
### 4.1 环境准备
#### 4.1.1 数据特征
为了模拟线上真实数据,我们设计两个Clickhouse数据表,分别为:
#### 4.1.2 机器参数
* 执行DataX的机器参数为:
* Clickhouse数据库机器参数为:
### 4.2 测试报告
#### 4.2.1 表1测试报告
| 并发任务数| DataX速度(Rec/s)|DataX流量|网卡流量|DataX运行负载|DB运行负载|
|--------| --------|--------|--------|--------|--------|
|1| DataX 统计速度(Rec/s)|DataX统计流量|网卡流量|DataX运行负载|DB运行负载|
## 5 约束限制
### 5.1 主备同步数据恢复问题
主备同步问题指Clickhouse使用主从灾备,备库从主库不间断通过binlog恢复数据。由于主备数据同步存在一定的时间差,特别在于某些特定情况,例如网络延迟等问题,导致备库同步恢复的数据与主库有较大差别,导致从备库同步的数据不是一份当前时间的完整镜像。
针对这个问题,我们提供了preSql功能,该功能待补充。
### 5.2 一致性约束
Clickhouse在数据存储划分中属于RDBMS系统,对外可以提供强一致性数据查询接口。例如当一次同步任务启动运行过程中,当该库存在其他数据写入方写入数据时,ClickhouseReader完全不会获取到写入更新数据,这是由于数据库本身的快照特性决定的。关于数据库快照特性,请参看[MVCC Wikipedia](https://en.wikipedia.org/wiki/Multiversion_concurrency_control)
上述是在ClickhouseReader单线程模型下数据同步一致性的特性,由于ClickhouseReader可以根据用户配置信息使用了并发数据抽取,因此不能严格保证数据一致性:当ClickhouseReader根据splitPk进行数据切分后,会先后启动多个并发任务完成数据同步。由于多个并发任务相互之间不属于同一个读事务,同时多个并发任务存在时间间隔。因此这份数据并不是`完整的`、`一致的`数据快照信息。
针对多线程的一致性快照需求,在技术上目前无法实现,只能从工程角度解决,工程化的方式存在取舍,我们提供几个解决思路给用户,用户可以自行选择:
1. 使用单线程同步,即不再进行数据切片。缺点是速度比较慢,但是能够很好保证一致性。
2. 关闭其他数据写入方,保证当前数据为静态数据,例如,锁表、关闭备库同步等等。缺点是可能影响在线业务。
### 5.3 数据库编码问题
ClickhouseReader底层使用JDBC进行数据抽取,JDBC天然适配各类编码,并在底层进行了编码转换。因此ClickhouseReader不需用户指定编码,可以自动获取编码并转码。
对于Clickhouse底层写入编码和其设定的编码不一致的混乱情况,ClickhouseReader对此无法识别,对此也无法提供解决方案,对于这类情况,`导出有可能为乱码`。
### 5.4 增量数据同步
ClickhouseReader使用JDBC SELECT语句完成数据抽取工作,因此可以使用SELECT...WHERE...进行增量数据抽取,方式有多种:
* 数据库在线应用写入数据库时,填充modify字段为更改时间戳,包括新增、更新、删除(逻辑删)。对于这类应用,ClickhouseReader只需要WHERE条件跟上一同步阶段时间戳即可。
* 对于新增流水型数据,ClickhouseReader可以WHERE条件后跟上一阶段最大自增ID即可。
对于业务上无字段区分新增、修改数据情况,ClickhouseReader也无法进行增量数据同步,只能同步全量数据。
### 5.5 Sql安全性
ClickhouseReader提供querySql语句交给用户自己实现SELECT抽取语句,ClickhouseReader本身对querySql不做任何安全性校验。这块交由DataX用户方自己保证。
## 6 FAQ
***
**Q: ClickhouseReader同步报错,报错信息为XXX**
A: 网络或者权限问题,请使用Clickhouse命令行测试
如果上述命令也报错,那可以证实是环境问题,请联系你的DBA。
**Q: ClickhouseReader抽取速度很慢怎么办?**
A: 影响抽取时间的原因大概有如下几个:(来自专业 DBA 卫绾)
1. 由于SQL的plan异常,导致的抽取时间长; 在抽取时,尽可能使用全表扫描代替索引扫描;
2. 合理sql的并发度,减少抽取时间;
3. 抽取sql要简单,尽量不用replace等函数,这个非常消耗cpu,会严重影响抽取速度;
================================================
FILE: clickhousereader/pom.xml
================================================
datax-allcom.alibaba.datax0.0.1-SNAPSHOT4.0.0clickhousereaderclickhousereaderjarru.yandex.clickhouseclickhouse-jdbc0.2.4com.alibaba.dataxdatax-core${datax-project-version}com.alibaba.dataxdatax-common${datax-project-version}org.slf4jslf4j-apich.qos.logbacklogback-classiccom.alibaba.dataxplugin-rdbms-util${datax-project-version}src/main/java**/*.propertiesmaven-compiler-plugin${jdk-version}${jdk-version}${project-sourceEncoding}maven-assembly-pluginsrc/main/assembly/package.xmldataxdwzippackagesingle
================================================
FILE: clickhousereader/src/main/assembly/package.xml
================================================
dirfalsesrc/main/resourcesplugin.jsonplugin_job_template.jsonplugin/reader/clickhousereadertarget/clickhousereader-0.0.1-SNAPSHOT.jarplugin/reader/clickhousereaderfalseplugin/reader/clickhousereader/libsruntime
================================================
FILE: clickhousereader/src/main/java/com/alibaba/datax/plugin/reader/clickhousereader/ClickhouseReader.java
================================================
package com.alibaba.datax.plugin.reader.clickhousereader;
import java.sql.Array;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Types;
import java.util.List;
import com.alibaba.datax.common.element.Record;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.plugin.RecordSender;
import com.alibaba.datax.common.plugin.TaskPluginCollector;
import com.alibaba.datax.common.spi.Reader;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.common.util.MessageSource;
import com.alibaba.datax.plugin.rdbms.reader.CommonRdbmsReader;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.fastjson2.JSON;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ClickhouseReader extends Reader {
private static final DataBaseType DATABASE_TYPE = DataBaseType.ClickHouse;
private static final Logger LOG = LoggerFactory.getLogger(ClickhouseReader.class);
public static class Job extends Reader.Job {
private Configuration jobConfig = null;
private CommonRdbmsReader.Job commonRdbmsReaderMaster;
@Override
public void init() {
this.jobConfig = super.getPluginJobConf();
this.commonRdbmsReaderMaster = new CommonRdbmsReader.Job(DATABASE_TYPE);
this.commonRdbmsReaderMaster.init(this.jobConfig);
}
@Override
public List split(int mandatoryNumber) {
return this.commonRdbmsReaderMaster.split(this.jobConfig, mandatoryNumber);
}
@Override
public void post() {
this.commonRdbmsReaderMaster.post(this.jobConfig);
}
@Override
public void destroy() {
this.commonRdbmsReaderMaster.destroy(this.jobConfig);
}
}
public static class Task extends Reader.Task {
private Configuration jobConfig;
private CommonRdbmsReader.Task commonRdbmsReaderSlave;
@Override
public void init() {
this.jobConfig = super.getPluginJobConf();
this.commonRdbmsReaderSlave = new CommonRdbmsReader.Task(DATABASE_TYPE, super.getTaskGroupId(), super.getTaskId());
this.commonRdbmsReaderSlave.init(this.jobConfig);
}
@Override
public void startRead(RecordSender recordSender) {
int fetchSize = this.jobConfig.getInt(com.alibaba.datax.plugin.rdbms.reader.Constant.FETCH_SIZE, 1000);
this.commonRdbmsReaderSlave.startRead(this.jobConfig, recordSender, super.getTaskPluginCollector(), fetchSize);
}
@Override
public void post() {
this.commonRdbmsReaderSlave.post(this.jobConfig);
}
@Override
public void destroy() {
this.commonRdbmsReaderSlave.destroy(this.jobConfig);
}
}
}
================================================
FILE: clickhousereader/src/main/resources/plugin.json
================================================
{
"name": "clickhousereader",
"class": "com.alibaba.datax.plugin.reader.clickhousereader.ClickhouseReader",
"description": "useScene: prod. mechanism: Jdbc connection using the database, execute select sql.",
"developer": "alibaba"
}
================================================
FILE: clickhousereader/src/main/resources/plugin_job_template.json
================================================
{
"name": "clickhousereader",
"parameter": {
"username": "username",
"password": "password",
"column": ["col1", "col2", "col3"],
"connection": [
{
"jdbcUrl": "jdbc:clickhouse://:[/]",
"table": ["table1", "table2"]
}
],
"preSql": [],
"postSql": []
}
}
================================================
FILE: clickhousereader/src/test/resources/basic1.json
================================================
{
"job": {
"setting": {
"speed": {
"channel": 5
}
},
"content": [
{
"reader": {
"name": "clickhousereader",
"parameter": {
"username": "XXXX",
"password": "XXXX",
"column": [
"uint8_col",
"uint16_col",
"uint32_col",
"uint64_col",
"int8_col",
"int16_col",
"int32_col",
"int64_col",
"float32_col",
"float64_col",
"bool_col",
"str_col",
"fixedstr_col",
"uuid_col",
"date_col",
"datetime_col",
"enum_col",
"ary_uint8_col",
"ary_str_col",
"tuple_col",
"nullable_col",
"nested_col.nested_id",
"nested_col.nested_str",
"ipv4_col",
"ipv6_col",
"decimal_col"
],
"connection": [
{
"table": [
"all_type_tbl"
],
"jdbcUrl":["jdbc:clickhouse://XXXX:8123/default"]
}
]
}
},
"writer": {}
}
]
}
}
================================================
FILE: clickhousereader/src/test/resources/basic1.sql
================================================
CREATE TABLE IF NOT EXISTS default.all_type_tbl
(
`uint8_col` UInt8,
`uint16_col` UInt16,
uint32_col UInt32,
uint64_col UInt64,
int8_col Int8,
int16_col Int16,
int32_col Int32,
int64_col Int64,
float32_col Float32,
float64_col Float64,
bool_col UInt8,
str_col String,
fixedstr_col FixedString(3),
uuid_col UUID,
date_col Date,
datetime_col DateTime,
enum_col Enum('hello' = 1, 'world' = 2),
ary_uint8_col Array(UInt8),
ary_str_col Array(String),
tuple_col Tuple(UInt8, String),
nullable_col Nullable(UInt8),
nested_col Nested
(
nested_id UInt32,
nested_str String
),
ipv4_col IPv4,
ipv6_col IPv6,
decimal_col Decimal(5,3)
)
ENGINE = MergeTree()
ORDER BY (uint8_col);
================================================
FILE: clickhousewriter/pom.xml
================================================
datax-allcom.alibaba.datax0.0.1-SNAPSHOT4.0.0clickhousewriterclickhousewriterjarru.yandex.clickhouseclickhouse-jdbc0.2.4com.alibaba.dataxdatax-core${datax-project-version}com.alibaba.dataxdatax-common${datax-project-version}org.slf4jslf4j-apich.qos.logbacklogback-classiccom.alibaba.dataxplugin-rdbms-util${datax-project-version}src/main/java**/*.propertiesmaven-compiler-plugin${jdk-version}${jdk-version}${project-sourceEncoding}maven-assembly-pluginsrc/main/assembly/package.xmldataxdwzippackagesingle
================================================
FILE: clickhousewriter/src/main/assembly/package.xml
================================================
dirfalsesrc/main/resourcesplugin.jsonplugin_job_template.jsonplugin/writer/clickhousewritertarget/clickhousewriter-0.0.1-SNAPSHOT.jarplugin/writer/clickhousewriterfalseplugin/writer/clickhousewriter/libsruntime
================================================
FILE: clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriter.java
================================================
package com.alibaba.datax.plugin.writer.clickhousewriter;
import com.alibaba.datax.common.element.Column;
import com.alibaba.datax.common.element.StringColumn;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.plugin.RecordReceiver;
import com.alibaba.datax.common.spi.Writer;
import com.alibaba.datax.common.util.Configuration;
import com.alibaba.datax.plugin.rdbms.util.DBUtilErrorCode;
import com.alibaba.datax.plugin.rdbms.util.DataBaseType;
import com.alibaba.datax.plugin.rdbms.writer.CommonRdbmsWriter;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONArray;
import java.sql.Array;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.sql.Types;
import java.util.List;
import java.util.regex.Pattern;
public class ClickhouseWriter extends Writer {
private static final DataBaseType DATABASE_TYPE = DataBaseType.ClickHouse;
public static class Job extends Writer.Job {
private Configuration originalConfig = null;
private CommonRdbmsWriter.Job commonRdbmsWriterMaster;
@Override
public void init() {
this.originalConfig = super.getPluginJobConf();
this.commonRdbmsWriterMaster = new CommonRdbmsWriter.Job(DATABASE_TYPE);
this.commonRdbmsWriterMaster.init(this.originalConfig);
}
@Override
public void prepare() {
this.commonRdbmsWriterMaster.prepare(this.originalConfig);
}
@Override
public List split(int mandatoryNumber) {
return this.commonRdbmsWriterMaster.split(this.originalConfig, mandatoryNumber);
}
@Override
public void post() {
this.commonRdbmsWriterMaster.post(this.originalConfig);
}
@Override
public void destroy() {
this.commonRdbmsWriterMaster.destroy(this.originalConfig);
}
}
public static class Task extends Writer.Task {
private Configuration writerSliceConfig;
private CommonRdbmsWriter.Task commonRdbmsWriterSlave;
@Override
public void init() {
this.writerSliceConfig = super.getPluginJobConf();
this.commonRdbmsWriterSlave = new CommonRdbmsWriter.Task(DATABASE_TYPE) {
@Override
protected PreparedStatement fillPreparedStatementColumnType(PreparedStatement preparedStatement, int columnIndex, int columnSqltype, String typeName, Column column) throws SQLException {
try {
if (column.getRawData() == null) {
preparedStatement.setNull(columnIndex + 1, columnSqltype);
return preparedStatement;
}
java.util.Date utilDate;
switch (columnSqltype) {
case Types.CHAR:
case Types.NCHAR:
case Types.CLOB:
case Types.NCLOB:
case Types.VARCHAR:
case Types.LONGVARCHAR:
case Types.NVARCHAR:
case Types.LONGNVARCHAR:
preparedStatement.setString(columnIndex + 1, column
.asString());
break;
case Types.TINYINT:
case Types.SMALLINT:
case Types.INTEGER:
case Types.BIGINT:
case Types.DECIMAL:
case Types.FLOAT:
case Types.REAL:
case Types.DOUBLE:
String strValue = column.asString();
if (emptyAsNull && "".equals(strValue)) {
preparedStatement.setNull(columnIndex + 1, columnSqltype);
} else {
switch (columnSqltype) {
case Types.TINYINT:
case Types.SMALLINT:
case Types.INTEGER:
preparedStatement.setInt(columnIndex + 1, column.asBigInteger().intValue());
break;
case Types.BIGINT:
preparedStatement.setLong(columnIndex + 1, column.asLong());
break;
case Types.DECIMAL:
preparedStatement.setBigDecimal(columnIndex + 1, column.asBigDecimal());
break;
case Types.REAL:
case Types.FLOAT:
preparedStatement.setFloat(columnIndex + 1, column.asDouble().floatValue());
break;
case Types.DOUBLE:
preparedStatement.setDouble(columnIndex + 1, column.asDouble());
break;
}
}
break;
case Types.DATE:
if (this.resultSetMetaData.getRight().get(columnIndex)
.equalsIgnoreCase("year")) {
if (column.asBigInteger() == null) {
preparedStatement.setString(columnIndex + 1, null);
} else {
preparedStatement.setInt(columnIndex + 1, column.asBigInteger().intValue());
}
} else {
java.sql.Date sqlDate = null;
try {
utilDate = column.asDate();
} catch (DataXException e) {
throw new SQLException(String.format(
"Date 类型转换错误:[%s]", column));
}
if (null != utilDate) {
sqlDate = new java.sql.Date(utilDate.getTime());
}
preparedStatement.setDate(columnIndex + 1, sqlDate);
}
break;
case Types.TIME:
java.sql.Time sqlTime = null;
try {
utilDate = column.asDate();
} catch (DataXException e) {
throw new SQLException(String.format(
"Date 类型转换错误:[%s]", column));
}
if (null != utilDate) {
sqlTime = new java.sql.Time(utilDate.getTime());
}
preparedStatement.setTime(columnIndex + 1, sqlTime);
break;
case Types.TIMESTAMP:
Timestamp sqlTimestamp = null;
if (column instanceof StringColumn && column.asString() != null) {
String timeStampStr = column.asString();
// JAVA TIMESTAMP 类型入参必须是 "2017-07-12 14:39:00.123566" 格式
String pattern = "^\\d+-\\d+-\\d+ \\d+:\\d+:\\d+.\\d+";
boolean isMatch = Pattern.matches(pattern, timeStampStr);
if (isMatch) {
sqlTimestamp = Timestamp.valueOf(timeStampStr);
preparedStatement.setTimestamp(columnIndex + 1, sqlTimestamp);
break;
}
}
try {
utilDate = column.asDate();
} catch (DataXException e) {
throw new SQLException(String.format(
"Date 类型转换错误:[%s]", column));
}
if (null != utilDate) {
sqlTimestamp = new Timestamp(
utilDate.getTime());
}
preparedStatement.setTimestamp(columnIndex + 1, sqlTimestamp);
break;
case Types.BINARY:
case Types.VARBINARY:
case Types.BLOB:
case Types.LONGVARBINARY:
preparedStatement.setBytes(columnIndex + 1, column
.asBytes());
break;
case Types.BOOLEAN:
preparedStatement.setInt(columnIndex + 1, column.asBigInteger().intValue());
break;
// warn: bit(1) -> Types.BIT 可使用setBoolean
// warn: bit(>1) -> Types.VARBINARY 可使用setBytes
case Types.BIT:
if (this.dataBaseType == DataBaseType.MySql) {
Boolean asBoolean = column.asBoolean();
if (asBoolean != null) {
preparedStatement.setBoolean(columnIndex + 1, asBoolean);
} else {
preparedStatement.setNull(columnIndex + 1, Types.BIT);
}
} else {
preparedStatement.setString(columnIndex + 1, column.asString());
}
break;
default:
boolean isHandled = fillPreparedStatementColumnType4CustomType(preparedStatement,
columnIndex, columnSqltype, column);
if (isHandled) {
break;
}
throw DataXException
.asDataXException(
DBUtilErrorCode.UNSUPPORTED_TYPE,
String.format(
"您的配置文件中的列配置信息有误. 因为DataX 不支持数据库写入这种字段类型. 字段名:[%s], 字段类型:[%d], 字段Java类型:[%s]. 请修改表中该字段的类型或者不同步该字段.",
this.resultSetMetaData.getLeft()
.get(columnIndex),
this.resultSetMetaData.getMiddle()
.get(columnIndex),
this.resultSetMetaData.getRight()
.get(columnIndex)));
}
return preparedStatement;
} catch (DataXException e) {
// fix类型转换或者溢出失败时,将具体哪一列打印出来
if (e.getErrorCode() == CommonErrorCode.CONVERT_NOT_SUPPORT ||
e.getErrorCode() == CommonErrorCode.CONVERT_OVER_FLOW) {
throw DataXException
.asDataXException(
e.getErrorCode(),
String.format(
"类型转化错误. 字段名:[%s], 字段类型:[%d], 字段Java类型:[%s]. 请修改表中该字段的类型或者不同步该字段.",
this.resultSetMetaData.getLeft()
.get(columnIndex),
this.resultSetMetaData.getMiddle()
.get(columnIndex),
this.resultSetMetaData.getRight()
.get(columnIndex)));
} else {
throw e;
}
}
}
private Object toJavaArray(Object val) {
if (null == val) {
return null;
} else if (val instanceof JSONArray) {
Object[] valArray = ((JSONArray) val).toArray();
for (int i = 0; i < valArray.length; i++) {
valArray[i] = this.toJavaArray(valArray[i]);
}
return valArray;
} else {
return val;
}
}
boolean fillPreparedStatementColumnType4CustomType(PreparedStatement ps,
int columnIndex, int columnSqltype,
Column column) throws SQLException {
switch (columnSqltype) {
case Types.OTHER:
if (this.resultSetMetaData.getRight().get(columnIndex).startsWith("Tuple")) {
throw DataXException
.asDataXException(ClickhouseWriterErrorCode.TUPLE_NOT_SUPPORTED_ERROR, ClickhouseWriterErrorCode.TUPLE_NOT_SUPPORTED_ERROR.getDescription());
} else {
ps.setString(columnIndex + 1, column.asString());
}
return true;
case Types.ARRAY:
Connection conn = ps.getConnection();
List values = JSON.parseArray(column.asString(), Object.class);
for (int i = 0; i < values.size(); i++) {
values.set(i, this.toJavaArray(values.get(i)));
}
Array array = conn.createArrayOf("String", values.toArray());
ps.setArray(columnIndex + 1, array);
return true;
default:
break;
}
return false;
}
};
this.commonRdbmsWriterSlave.init(this.writerSliceConfig);
}
@Override
public void prepare() {
this.commonRdbmsWriterSlave.prepare(this.writerSliceConfig);
}
@Override
public void startWrite(RecordReceiver recordReceiver) {
this.commonRdbmsWriterSlave.startWrite(recordReceiver, this.writerSliceConfig, super.getTaskPluginCollector());
}
@Override
public void post() {
this.commonRdbmsWriterSlave.post(this.writerSliceConfig);
}
@Override
public void destroy() {
this.commonRdbmsWriterSlave.destroy(this.writerSliceConfig);
}
}
}
================================================
FILE: clickhousewriter/src/main/java/com/alibaba/datax/plugin/writer/clickhousewriter/ClickhouseWriterErrorCode.java
================================================
package com.alibaba.datax.plugin.writer.clickhousewriter;
import com.alibaba.datax.common.spi.ErrorCode;
public enum ClickhouseWriterErrorCode implements ErrorCode {
TUPLE_NOT_SUPPORTED_ERROR("ClickhouseWriter-00", "不支持TUPLE类型导入."),
;
private final String code;
private final String description;
private ClickhouseWriterErrorCode(String code, String description) {
this.code = code;
this.description = description;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.description;
}
@Override
public String toString() {
return String.format("Code:[%s], Description:[%s].", this.code, this.description);
}
}
================================================
FILE: clickhousewriter/src/main/resources/plugin.json
================================================
{
"name": "clickhousewriter",
"class": "com.alibaba.datax.plugin.writer.clickhousewriter.ClickhouseWriter",
"description": "useScene: prod. mechanism: Jdbc connection using the database, execute insert sql.",
"developer": "alibaba"
}
================================================
FILE: clickhousewriter/src/main/resources/plugin_job_template.json
================================================
{
"name": "clickhousewriter",
"parameter": {
"username": "username",
"password": "password",
"column": ["col1", "col2", "col3"],
"connection": [
{
"jdbcUrl": "jdbc:clickhouse://:[/]",
"table": ["table1", "table2"]
}
],
"preSql": [],
"postSql": [],
"batchSize": 65536,
"batchByteSize": 134217728,
"dryRun": false,
"writeMode": "insert"
}
}
================================================
FILE: common/pom.xml
================================================
4.0.0com.alibaba.dataxdatax-all0.0.1-SNAPSHOTdatax-commondatax-commonjarorg.apache.commonscommons-lang3com.alibaba.fastjson2fastjson2commons-iocommons-iojunitjunittestorg.slf4jslf4j-apich.qos.logbacklogback-classicorg.apache.httpcomponentshttpclient4.4testorg.apache.httpcomponentsfluent-hc4.4testorg.apache.commonscommons-math33.1.1src/main/java**/*.propertiesmaven-compiler-plugin${jdk-version}${jdk-version}${project-sourceEncoding}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/base/BaseObject.java
================================================
package com.alibaba.datax.common.base;
import org.apache.commons.lang3.builder.EqualsBuilder;
import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.apache.commons.lang3.builder.ToStringStyle;
public class BaseObject {
@Override
public int hashCode() {
return HashCodeBuilder.reflectionHashCode(this, false);
}
@Override
public boolean equals(Object object) {
return EqualsBuilder.reflectionEquals(this, object, false);
}
@Override
public String toString() {
return ToStringBuilder.reflectionToString(this,
ToStringStyle.MULTI_LINE_STYLE);
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/constant/CommonConstant.java
================================================
package com.alibaba.datax.common.constant;
public final class CommonConstant {
/**
* 用于插件对自身 split 的每个 task 标识其使用的资源,以告知core 对 reader/writer split 之后的 task 进行拼接时需要根据资源标签进行更有意义的 shuffle 操作
*/
public static String LOAD_BALANCE_RESOURCE_MARK = "loadBalanceResourceMark";
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/constant/PluginType.java
================================================
package com.alibaba.datax.common.constant;
/**
* Created by jingxing on 14-8-31.
*/
public enum PluginType {
//pluginType还代表了资源目录,很难扩展,或者说需要足够必要才扩展。先mark Handler(其实和transformer一样),再讨论
READER("reader"), TRANSFORMER("transformer"), WRITER("writer"), HANDLER("handler");
private String pluginType;
private PluginType(String pluginType) {
this.pluginType = pluginType;
}
@Override
public String toString() {
return this.pluginType;
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/BoolColumn.java
================================================
package com.alibaba.datax.common.element;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Date;
/**
* Created by jingxing on 14-8-24.
*/
public class BoolColumn extends Column {
public BoolColumn(Boolean bool) {
super(bool, Column.Type.BOOL, 1);
}
public BoolColumn(final String data) {
this(true);
this.validate(data);
if (null == data) {
this.setRawData(null);
this.setByteSize(0);
} else {
this.setRawData(Boolean.valueOf(data));
this.setByteSize(1);
}
return;
}
public BoolColumn() {
super(null, Column.Type.BOOL, 1);
}
@Override
public Boolean asBoolean() {
if (null == super.getRawData()) {
return null;
}
return (Boolean) super.getRawData();
}
@Override
public Long asLong() {
if (null == this.getRawData()) {
return null;
}
return this.asBoolean() ? 1L : 0L;
}
@Override
public Double asDouble() {
if (null == this.getRawData()) {
return null;
}
return this.asBoolean() ? 1.0d : 0.0d;
}
@Override
public String asString() {
if (null == super.getRawData()) {
return null;
}
return this.asBoolean() ? "true" : "false";
}
@Override
public BigInteger asBigInteger() {
if (null == this.getRawData()) {
return null;
}
return BigInteger.valueOf(this.asLong());
}
@Override
public BigDecimal asBigDecimal() {
if (null == this.getRawData()) {
return null;
}
return BigDecimal.valueOf(this.asLong());
}
@Override
public Date asDate() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date .");
}
@Override
public Date asDate(String dateFormat) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bool类型不能转为Date .");
}
@Override
public byte[] asBytes() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Boolean类型不能转为Bytes .");
}
private void validate(final String data) {
if (null == data) {
return;
}
if ("true".equalsIgnoreCase(data) || "false".equalsIgnoreCase(data)) {
return;
}
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[%s]不能转为Bool .", data));
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/BytesColumn.java
================================================
package com.alibaba.datax.common.element;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
import org.apache.commons.lang3.ArrayUtils;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Date;
/**
* Created by jingxing on 14-8-24.
*/
public class BytesColumn extends Column {
public BytesColumn() {
this(null);
}
public BytesColumn(byte[] bytes) {
super(ArrayUtils.clone(bytes), Column.Type.BYTES, null == bytes ? 0
: bytes.length);
}
@Override
public byte[] asBytes() {
if (null == this.getRawData()) {
return null;
}
return (byte[]) this.getRawData();
}
@Override
public String asString() {
if (null == this.getRawData()) {
return null;
}
try {
return ColumnCast.bytes2String(this);
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("Bytes[%s]不能转为String .", this.toString()));
}
}
@Override
public Long asLong() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Long .");
}
@Override
public BigDecimal asBigDecimal() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为BigDecimal .");
}
@Override
public BigInteger asBigInteger() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为BigInteger .");
}
@Override
public Double asDouble() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Long .");
}
@Override
public Date asDate() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date .");
}
@Override
public Date asDate(String dateFormat) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Date .");
}
@Override
public Boolean asBoolean() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Bytes类型不能转为Boolean .");
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/Column.java
================================================
package com.alibaba.datax.common.element;
import com.alibaba.fastjson2.JSON;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Date;
/**
* Created by jingxing on 14-8-24.
*
*/
public abstract class Column {
private Type type;
private Object rawData;
private int byteSize;
public Column(final Object object, final Type type, int byteSize) {
this.rawData = object;
this.type = type;
this.byteSize = byteSize;
}
public Object getRawData() {
return this.rawData;
}
public Type getType() {
return this.type;
}
public int getByteSize() {
return this.byteSize;
}
protected void setType(Type type) {
this.type = type;
}
protected void setRawData(Object rawData) {
this.rawData = rawData;
}
protected void setByteSize(int byteSize) {
this.byteSize = byteSize;
}
public abstract Long asLong();
public abstract Double asDouble();
public abstract String asString();
public abstract Date asDate();
public abstract Date asDate(String dateFormat);
public abstract byte[] asBytes();
public abstract Boolean asBoolean();
public abstract BigDecimal asBigDecimal();
public abstract BigInteger asBigInteger();
@Override
public String toString() {
return JSON.toJSONString(this);
}
public enum Type {
BAD, NULL, INT, LONG, DOUBLE, STRING, BOOL, DATE, BYTES
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/ColumnCast.java
================================================
package com.alibaba.datax.common.element;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
import com.alibaba.datax.common.util.Configuration;
import org.apache.commons.lang3.time.DateFormatUtils;
import org.apache.commons.lang3.time.FastDateFormat;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.util.*;
public final class ColumnCast {
public static void bind(final Configuration configuration) {
StringCast.init(configuration);
DateCast.init(configuration);
BytesCast.init(configuration);
}
public static Date string2Date(final StringColumn column)
throws ParseException {
return StringCast.asDate(column);
}
public static Date string2Date(final StringColumn column, String dateFormat)
throws ParseException {
return StringCast.asDate(column, dateFormat);
}
public static byte[] string2Bytes(final StringColumn column)
throws UnsupportedEncodingException {
return StringCast.asBytes(column);
}
public static String date2String(final DateColumn column) {
return DateCast.asString(column);
}
public static String bytes2String(final BytesColumn column)
throws UnsupportedEncodingException {
return BytesCast.asString(column);
}
}
class StringCast {
static String datetimeFormat = "yyyy-MM-dd HH:mm:ss";
static String dateFormat = "yyyy-MM-dd";
static String timeFormat = "HH:mm:ss";
static List extraFormats = Collections.emptyList();
static String timeZone = "GMT+8";
static FastDateFormat dateFormatter;
static FastDateFormat timeFormatter;
static FastDateFormat datetimeFormatter;
static TimeZone timeZoner;
static String encoding = "UTF-8";
static void init(final Configuration configuration) {
StringCast.datetimeFormat = configuration.getString(
"common.column.datetimeFormat", StringCast.datetimeFormat);
StringCast.dateFormat = configuration.getString(
"common.column.dateFormat", StringCast.dateFormat);
StringCast.timeFormat = configuration.getString(
"common.column.timeFormat", StringCast.timeFormat);
StringCast.extraFormats = configuration.getList(
"common.column.extraFormats", Collections.emptyList(), String.class);
StringCast.timeZone = configuration.getString("common.column.timeZone",
StringCast.timeZone);
StringCast.timeZoner = TimeZone.getTimeZone(StringCast.timeZone);
StringCast.datetimeFormatter = FastDateFormat.getInstance(
StringCast.datetimeFormat, StringCast.timeZoner);
StringCast.dateFormatter = FastDateFormat.getInstance(
StringCast.dateFormat, StringCast.timeZoner);
StringCast.timeFormatter = FastDateFormat.getInstance(
StringCast.timeFormat, StringCast.timeZoner);
StringCast.encoding = configuration.getString("common.column.encoding",
StringCast.encoding);
}
static Date asDate(final StringColumn column) throws ParseException {
if (null == column.asString()) {
return null;
}
try {
return StringCast.datetimeFormatter.parse(column.asString());
} catch (ParseException ignored) {
}
try {
return StringCast.dateFormatter.parse(column.asString());
} catch (ParseException ignored) {
}
ParseException e;
try {
return StringCast.timeFormatter.parse(column.asString());
} catch (ParseException ignored) {
e = ignored;
}
for (String format : StringCast.extraFormats) {
try{
return FastDateFormat.getInstance(format, StringCast.timeZoner).parse(column.asString());
} catch (ParseException ignored){
e = ignored;
}
}
throw e;
}
static Date asDate(final StringColumn column, String dateFormat) throws ParseException {
ParseException e;
try {
return FastDateFormat.getInstance(dateFormat, StringCast.timeZoner).parse(column.asString());
} catch (ParseException ignored) {
e = ignored;
}
throw e;
}
static byte[] asBytes(final StringColumn column)
throws UnsupportedEncodingException {
if (null == column.asString()) {
return null;
}
return column.asString().getBytes(StringCast.encoding);
}
}
/**
* 后续为了可维护性,可以考虑直接使用 apache 的DateFormatUtils.
*
* 迟南已经修复了该问题,但是为了维护性,还是直接使用apache的内置函数
*/
class DateCast {
static String datetimeFormat = "yyyy-MM-dd HH:mm:ss";
static String dateFormat = "yyyy-MM-dd";
static String timeFormat = "HH:mm:ss";
static String timeZone = "GMT+8";
static TimeZone timeZoner = TimeZone.getTimeZone(DateCast.timeZone);
static void init(final Configuration configuration) {
DateCast.datetimeFormat = configuration.getString(
"common.column.datetimeFormat", datetimeFormat);
DateCast.timeFormat = configuration.getString(
"common.column.timeFormat", timeFormat);
DateCast.dateFormat = configuration.getString(
"common.column.dateFormat", dateFormat);
DateCast.timeZone = configuration.getString("common.column.timeZone",
DateCast.timeZone);
DateCast.timeZoner = TimeZone.getTimeZone(DateCast.timeZone);
return;
}
static String asString(final DateColumn column) {
if (null == column.asDate()) {
return null;
}
switch (column.getSubType()) {
case DATE:
return DateFormatUtils.format(column.asDate(), DateCast.dateFormat,
DateCast.timeZoner);
case TIME:
return DateFormatUtils.format(column.asDate(), DateCast.timeFormat,
DateCast.timeZoner);
case DATETIME:
return DateFormatUtils.format(column.asDate(),
DateCast.datetimeFormat, DateCast.timeZoner);
default:
throw DataXException
.asDataXException(CommonErrorCode.CONVERT_NOT_SUPPORT,
"时间类型出现不支持类型,目前仅支持DATE/TIME/DATETIME。该类型属于编程错误,请反馈给DataX开发团队 .");
}
}
}
class BytesCast {
static String encoding = "utf-8";
static void init(final Configuration configuration) {
BytesCast.encoding = configuration.getString("common.column.encoding",
BytesCast.encoding);
return;
}
static String asString(final BytesColumn column)
throws UnsupportedEncodingException {
if (null == column.asBytes()) {
return null;
}
return new String(column.asBytes(), encoding);
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/DateColumn.java
================================================
package com.alibaba.datax.common.element;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.sql.Time;
import java.util.Date;
/**
* Created by jingxing on 14-8-24.
*/
public class DateColumn extends Column {
private DateType subType = DateType.DATETIME;
private int nanos = 0;
private int precision = -1;
public static enum DateType {
DATE, TIME, DATETIME
}
/**
* 构建值为time(java.sql.Time)的DateColumn,使用Date子类型为TIME,只有时间,没有日期
*/
public DateColumn(Time time, int nanos, int jdbcPrecision) {
this(time);
if (time != null) {
setNanos(nanos);
}
if (jdbcPrecision == 10) {
setPrecision(0);
}
if (jdbcPrecision >= 12 && jdbcPrecision <= 17) {
setPrecision(jdbcPrecision - 11);
}
}
public long getNanos() {
return nanos;
}
public void setNanos(int nanos) {
this.nanos = nanos;
}
public int getPrecision() {
return precision;
}
public void setPrecision(int precision) {
this.precision = precision;
}
/**
* 构建值为null的DateColumn,使用Date子类型为DATETIME
*/
public DateColumn() {
this((Long) null);
}
/**
* 构建值为stamp(Unix时间戳)的DateColumn,使用Date子类型为DATETIME
* 实际存储有date改为long的ms,节省存储
* */
public DateColumn(final Long stamp) {
super(stamp, Column.Type.DATE, (null == stamp ? 0 : 8));
}
/**
* 构建值为date(java.util.Date)的DateColumn,使用Date子类型为DATETIME
* */
public DateColumn(final Date date) {
this(date == null ? null : date.getTime());
}
/**
* 构建值为date(java.sql.Date)的DateColumn,使用Date子类型为DATE,只有日期,没有时间
* */
public DateColumn(final java.sql.Date date) {
this(date == null ? null : date.getTime());
this.setSubType(DateType.DATE);
}
/**
* 构建值为time(java.sql.Time)的DateColumn,使用Date子类型为TIME,只有时间,没有日期
* */
public DateColumn(final java.sql.Time time) {
this(time == null ? null : time.getTime());
this.setSubType(DateType.TIME);
}
/**
* 构建值为ts(java.sql.Timestamp)的DateColumn,使用Date子类型为DATETIME
* */
public DateColumn(final java.sql.Timestamp ts) {
this(ts == null ? null : ts.getTime());
this.setSubType(DateType.DATETIME);
}
@Override
public Long asLong() {
return (Long)this.getRawData();
}
@Override
public String asString() {
try {
return ColumnCast.date2String(this);
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("Date[%s]类型不能转为String .", this.toString()));
}
}
@Override
public Date asDate() {
if (null == this.getRawData()) {
return null;
}
return new Date((Long)this.getRawData());
}
@Override
public Date asDate(String dateFormat) {
return asDate();
}
@Override
public byte[] asBytes() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为Bytes .");
}
@Override
public Boolean asBoolean() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为Boolean .");
}
@Override
public Double asDouble() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为Double .");
}
@Override
public BigInteger asBigInteger() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为BigInteger .");
}
@Override
public BigDecimal asBigDecimal() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Date类型不能转为BigDecimal .");
}
public DateType getSubType() {
return subType;
}
public void setSubType(DateType subType) {
this.subType = subType;
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/DoubleColumn.java
================================================
package com.alibaba.datax.common.element;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Date;
public class DoubleColumn extends Column {
public DoubleColumn(final String data) {
this(data, null == data ? 0 : data.length());
this.validate(data);
}
public DoubleColumn(Long data) {
this(data == null ? (String) null : String.valueOf(data));
}
public DoubleColumn(Integer data) {
this(data == null ? (String) null : String.valueOf(data));
}
/**
* Double无法表示准确的小数数据,我们不推荐使用该方法保存Double数据,建议使用String作为构造入参
*
* */
public DoubleColumn(final Double data) {
this(data == null ? (String) null
: new BigDecimal(String.valueOf(data)).toPlainString());
}
/**
* Float无法表示准确的小数数据,我们不推荐使用该方法保存Float数据,建议使用String作为构造入参
*
* */
public DoubleColumn(final Float data) {
this(data == null ? (String) null
: new BigDecimal(String.valueOf(data)).toPlainString());
}
public DoubleColumn(final BigDecimal data) {
this(null == data ? (String) null : data.toPlainString());
}
public DoubleColumn(final BigInteger data) {
this(null == data ? (String) null : data.toString());
}
public DoubleColumn() {
this((String) null);
}
private DoubleColumn(final String data, int byteSize) {
super(data, Column.Type.DOUBLE, byteSize);
}
@Override
public BigDecimal asBigDecimal() {
if (null == this.getRawData()) {
return null;
}
try {
return new BigDecimal((String) this.getRawData());
} catch (NumberFormatException e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[%s] 无法转换为Double类型 .",
(String) this.getRawData()));
}
}
@Override
public Double asDouble() {
if (null == this.getRawData()) {
return null;
}
String string = (String) this.getRawData();
boolean isDoubleSpecific = string.equals("NaN")
|| string.equals("-Infinity") || string.equals("+Infinity");
if (isDoubleSpecific) {
return Double.valueOf(string);
}
BigDecimal result = this.asBigDecimal();
OverFlowUtil.validateDoubleNotOverFlow(result);
return result.doubleValue();
}
@Override
public Long asLong() {
if (null == this.getRawData()) {
return null;
}
BigDecimal result = this.asBigDecimal();
OverFlowUtil.validateLongNotOverFlow(result.toBigInteger());
return result.longValue();
}
@Override
public BigInteger asBigInteger() {
if (null == this.getRawData()) {
return null;
}
return this.asBigDecimal().toBigInteger();
}
@Override
public String asString() {
if (null == this.getRawData()) {
return null;
}
return (String) this.getRawData();
}
@Override
public Boolean asBoolean() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Bool .");
}
@Override
public Date asDate() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 .");
}
@Override
public Date asDate(String dateFormat) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Date类型 .");
}
@Override
public byte[] asBytes() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Double类型无法转为Bytes类型 .");
}
private void validate(final String data) {
if (null == data) {
return;
}
if (data.equalsIgnoreCase("NaN") || data.equalsIgnoreCase("-Infinity")
|| data.equalsIgnoreCase("Infinity")) {
return;
}
try {
new BigDecimal(data);
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[%s]无法转为Double类型 .", data));
}
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/LongColumn.java
================================================
package com.alibaba.datax.common.element;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
import org.apache.commons.lang3.math.NumberUtils;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Date;
public class LongColumn extends Column {
/**
* 从整形字符串表示转为LongColumn,支持Java科学计数法
*
* NOTE:
* 如果data为浮点类型的字符串表示,数据将会失真,请使用DoubleColumn对接浮点字符串
*
* */
public LongColumn(final String data) {
super(null, Column.Type.LONG, 0);
if (null == data) {
return;
}
try {
BigInteger rawData = NumberUtils.createBigDecimal(data)
.toBigInteger();
super.setRawData(rawData);
// 当 rawData 为[0-127]时,rawData.bitLength() < 8,导致其 byteSize = 0,简单起见,直接认为其长度为 data.length()
// super.setByteSize(rawData.bitLength() / 8);
super.setByteSize(data.length());
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[%s]不能转为Long .", data));
}
}
public LongColumn(Long data) {
this(null == data ? (BigInteger) null : BigInteger.valueOf(data));
}
public LongColumn(Integer data) {
this(null == data ? (BigInteger) null : BigInteger.valueOf(data));
}
public LongColumn(BigInteger data) {
this(data, null == data ? 0 : 8);
}
private LongColumn(BigInteger data, int byteSize) {
super(data, Column.Type.LONG, byteSize);
}
public LongColumn() {
this((BigInteger) null);
}
@Override
public BigInteger asBigInteger() {
if (null == this.getRawData()) {
return null;
}
return (BigInteger) this.getRawData();
}
@Override
public Long asLong() {
BigInteger rawData = (BigInteger) this.getRawData();
if (null == rawData) {
return null;
}
OverFlowUtil.validateLongNotOverFlow(rawData);
return rawData.longValue();
}
@Override
public Double asDouble() {
if (null == this.getRawData()) {
return null;
}
BigDecimal decimal = this.asBigDecimal();
OverFlowUtil.validateDoubleNotOverFlow(decimal);
return decimal.doubleValue();
}
@Override
public Boolean asBoolean() {
if (null == this.getRawData()) {
return null;
}
return this.asBigInteger().compareTo(BigInteger.ZERO) != 0 ? true
: false;
}
@Override
public BigDecimal asBigDecimal() {
if (null == this.getRawData()) {
return null;
}
return new BigDecimal(this.asBigInteger());
}
@Override
public String asString() {
if (null == this.getRawData()) {
return null;
}
return ((BigInteger) this.getRawData()).toString();
}
@Override
public Date asDate() {
if (null == this.getRawData()) {
return null;
}
return new Date(this.asLong());
}
@Override
public Date asDate(String dateFormat) {
return this.asDate();
}
@Override
public byte[] asBytes() {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, "Long类型不能转为Bytes .");
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/OverFlowUtil.java
================================================
package com.alibaba.datax.common.element;
import java.math.BigDecimal;
import java.math.BigInteger;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
public final class OverFlowUtil {
public static final BigInteger MAX_LONG = BigInteger
.valueOf(Long.MAX_VALUE);
public static final BigInteger MIN_LONG = BigInteger
.valueOf(Long.MIN_VALUE);
public static final BigDecimal MIN_DOUBLE_POSITIVE = new BigDecimal(
String.valueOf(Double.MIN_VALUE));
public static final BigDecimal MAX_DOUBLE_POSITIVE = new BigDecimal(
String.valueOf(Double.MAX_VALUE));
public static boolean isLongOverflow(final BigInteger integer) {
return (integer.compareTo(OverFlowUtil.MAX_LONG) > 0 || integer
.compareTo(OverFlowUtil.MIN_LONG) < 0);
}
public static void validateLongNotOverFlow(final BigInteger integer) {
boolean isOverFlow = OverFlowUtil.isLongOverflow(integer);
if (isOverFlow) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_OVER_FLOW,
String.format("[%s] 转为Long类型出现溢出 .", integer.toString()));
}
}
public static boolean isDoubleOverFlow(final BigDecimal decimal) {
if (decimal.signum() == 0) {
return false;
}
BigDecimal newDecimal = decimal;
boolean isPositive = decimal.signum() == 1;
if (!isPositive) {
newDecimal = decimal.negate();
}
return (newDecimal.compareTo(MIN_DOUBLE_POSITIVE) < 0 || newDecimal
.compareTo(MAX_DOUBLE_POSITIVE) > 0);
}
public static void validateDoubleNotOverFlow(final BigDecimal decimal) {
boolean isOverFlow = OverFlowUtil.isDoubleOverFlow(decimal);
if (isOverFlow) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_OVER_FLOW,
String.format("[%s]转为Double类型出现溢出 .",
decimal.toPlainString()));
}
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/Record.java
================================================
package com.alibaba.datax.common.element;
import java.util.Map;
/**
* Created by jingxing on 14-8-24.
*/
public interface Record {
public void addColumn(Column column);
public void setColumn(int i, final Column column);
public Column getColumn(int i);
public String toString();
public int getColumnNumber();
public int getByteSize();
public int getMemorySize();
public void setMeta(Map meta);
public Map getMeta();
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/element/StringColumn.java
================================================
package com.alibaba.datax.common.element;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Date;
import com.alibaba.datax.common.exception.CommonErrorCode;
import com.alibaba.datax.common.exception.DataXException;
/**
* Created by jingxing on 14-8-24.
*/
public class StringColumn extends Column {
public StringColumn() {
this((String) null);
}
public StringColumn(final String rawData) {
super(rawData, Column.Type.STRING, (null == rawData ? 0 : rawData
.length()));
}
@Override
public String asString() {
if (null == this.getRawData()) {
return null;
}
return (String) this.getRawData();
}
private void validateDoubleSpecific(final String data) {
if ("NaN".equals(data) || "Infinity".equals(data)
|| "-Infinity".equals(data)) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[\"%s\"]属于Double特殊类型,不能转为其他类型 .", data));
}
return;
}
@Override
public BigInteger asBigInteger() {
if (null == this.getRawData()) {
return null;
}
this.validateDoubleSpecific((String) this.getRawData());
try {
return this.asBigDecimal().toBigInteger();
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, String.format(
"String[\"%s\"]不能转为BigInteger .", this.asString()));
}
}
@Override
public Long asLong() {
if (null == this.getRawData()) {
return null;
}
this.validateDoubleSpecific((String) this.getRawData());
try {
BigInteger integer = this.asBigInteger();
OverFlowUtil.validateLongNotOverFlow(integer);
return integer.longValue();
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[\"%s\"]不能转为Long .", this.asString()));
}
}
@Override
public BigDecimal asBigDecimal() {
if (null == this.getRawData()) {
return null;
}
this.validateDoubleSpecific((String) this.getRawData());
try {
return new BigDecimal(this.asString());
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT, String.format(
"String [\"%s\"] 不能转为BigDecimal .", this.asString()));
}
}
@Override
public Double asDouble() {
if (null == this.getRawData()) {
return null;
}
String data = (String) this.getRawData();
if ("NaN".equals(data)) {
return Double.NaN;
}
if ("Infinity".equals(data)) {
return Double.POSITIVE_INFINITY;
}
if ("-Infinity".equals(data)) {
return Double.NEGATIVE_INFINITY;
}
BigDecimal decimal = this.asBigDecimal();
OverFlowUtil.validateDoubleNotOverFlow(decimal);
return decimal.doubleValue();
}
@Override
public Boolean asBoolean() {
if (null == this.getRawData()) {
return null;
}
if ("true".equalsIgnoreCase(this.asString())) {
return true;
}
if ("false".equalsIgnoreCase(this.asString())) {
return false;
}
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[\"%s\"]不能转为Bool .", this.asString()));
}
@Override
public Date asDate() {
try {
return ColumnCast.string2Date(this);
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[\"%s\"]不能转为Date .", this.asString()));
}
}
@Override
public Date asDate(String dateFormat) {
try {
return ColumnCast.string2Date(this, dateFormat);
} catch (Exception e) {
throw DataXException.asDataXException(CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[\"%s\"]不能转为Date .", this.asString()));
}
}
@Override
public byte[] asBytes() {
try {
return ColumnCast.string2Bytes(this);
} catch (Exception e) {
throw DataXException.asDataXException(
CommonErrorCode.CONVERT_NOT_SUPPORT,
String.format("String[\"%s\"]不能转为Bytes .", this.asString()));
}
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/exception/CommonErrorCode.java
================================================
package com.alibaba.datax.common.exception;
import com.alibaba.datax.common.spi.ErrorCode;
/**
*
*/
public enum CommonErrorCode implements ErrorCode {
CONFIG_ERROR("Common-00", "您提供的配置文件存在错误信息,请检查您的作业配置 ."),
CONVERT_NOT_SUPPORT("Common-01", "同步数据出现业务脏数据情况,数据类型转换错误 ."),
CONVERT_OVER_FLOW("Common-02", "同步数据出现业务脏数据情况,数据类型转换溢出 ."),
RETRY_FAIL("Common-10", "方法调用多次仍旧失败 ."),
RUNTIME_ERROR("Common-11", "运行时内部调用错误 ."),
HOOK_INTERNAL_ERROR("Common-12", "Hook运行错误 ."),
SHUT_DOWN_TASK("Common-20", "Task收到了shutdown指令,为failover做准备"),
WAIT_TIME_EXCEED("Common-21", "等待时间超出范围"),
TASK_HUNG_EXPIRED("Common-22", "任务hung住,Expired");
private final String code;
private final String describe;
private CommonErrorCode(String code, String describe) {
this.code = code;
this.describe = describe;
}
@Override
public String getCode() {
return this.code;
}
@Override
public String getDescription() {
return this.describe;
}
@Override
public String toString() {
return String.format("Code:[%s], Describe:[%s]", this.code,
this.describe);
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/exception/DataXException.java
================================================
package com.alibaba.datax.common.exception;
import com.alibaba.datax.common.spi.ErrorCode;
import java.io.PrintWriter;
import java.io.StringWriter;
public class DataXException extends RuntimeException {
private static final long serialVersionUID = 1L;
private ErrorCode errorCode;
public DataXException(ErrorCode errorCode, String errorMessage) {
super(errorCode.toString() + " - " + errorMessage);
this.errorCode = errorCode;
}
public DataXException(String errorMessage) {
super(errorMessage);
}
private DataXException(ErrorCode errorCode, String errorMessage, Throwable cause) {
super(errorCode.toString() + " - " + getMessage(errorMessage) + " - " + getMessage(cause), cause);
this.errorCode = errorCode;
}
public static DataXException asDataXException(ErrorCode errorCode, String message) {
return new DataXException(errorCode, message);
}
public static DataXException asDataXException(String message) {
return new DataXException(message);
}
public static DataXException asDataXException(ErrorCode errorCode, String message, Throwable cause) {
if (cause instanceof DataXException) {
return (DataXException) cause;
}
return new DataXException(errorCode, message, cause);
}
public static DataXException asDataXException(ErrorCode errorCode, Throwable cause) {
if (cause instanceof DataXException) {
return (DataXException) cause;
}
return new DataXException(errorCode, getMessage(cause), cause);
}
public ErrorCode getErrorCode() {
return this.errorCode;
}
private static String getMessage(Object obj) {
if (obj == null) {
return "";
}
if (obj instanceof Throwable) {
StringWriter str = new StringWriter();
PrintWriter pw = new PrintWriter(str);
((Throwable) obj).printStackTrace(pw);
return str.toString();
// return ((Throwable) obj).getMessage();
} else {
return obj.toString();
}
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/exception/ExceptionTracker.java
================================================
package com.alibaba.datax.common.exception;
import java.io.PrintWriter;
import java.io.StringWriter;
public final class ExceptionTracker {
public static final int STRING_BUFFER = 1024;
public static String trace(Throwable ex) {
StringWriter sw = new StringWriter(STRING_BUFFER);
PrintWriter pw = new PrintWriter(sw);
ex.printStackTrace(pw);
return sw.toString();
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/AbstractJobPlugin.java
================================================
package com.alibaba.datax.common.plugin;
/**
* Created by jingxing on 14-8-24.
*/
public abstract class AbstractJobPlugin extends AbstractPlugin {
/**
* @return the jobPluginCollector
*/
public JobPluginCollector getJobPluginCollector() {
return jobPluginCollector;
}
/**
* @param jobPluginCollector
* the jobPluginCollector to set
*/
public void setJobPluginCollector(
JobPluginCollector jobPluginCollector) {
this.jobPluginCollector = jobPluginCollector;
}
private JobPluginCollector jobPluginCollector;
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/AbstractPlugin.java
================================================
package com.alibaba.datax.common.plugin;
import com.alibaba.datax.common.base.BaseObject;
import com.alibaba.datax.common.util.Configuration;
import java.util.List;
public abstract class AbstractPlugin extends BaseObject implements Pluginable {
//作业的config
private Configuration pluginJobConf;
//插件本身的plugin
private Configuration pluginConf;
// by qiangsi.lq。 修改为对端的作业configuration
private Configuration peerPluginJobConf;
private String peerPluginName;
private List readerPluginSplitConf;
@Override
public String getPluginName() {
assert null != this.pluginConf;
return this.pluginConf.getString("name");
}
@Override
public String getDeveloper() {
assert null != this.pluginConf;
return this.pluginConf.getString("developer");
}
@Override
public String getDescription() {
assert null != this.pluginConf;
return this.pluginConf.getString("description");
}
@Override
public Configuration getPluginJobConf() {
return pluginJobConf;
}
@Override
public void setPluginJobConf(Configuration pluginJobConf) {
this.pluginJobConf = pluginJobConf;
}
@Override
public void setPluginConf(Configuration pluginConf) {
this.pluginConf = pluginConf;
}
@Override
public Configuration getPeerPluginJobConf() {
return peerPluginJobConf;
}
@Override
public void setPeerPluginJobConf(Configuration peerPluginJobConf) {
this.peerPluginJobConf = peerPluginJobConf;
}
@Override
public String getPeerPluginName() {
return peerPluginName;
}
@Override
public void setPeerPluginName(String peerPluginName) {
this.peerPluginName = peerPluginName;
}
public void preCheck() {
}
public void prepare() {
}
public void post() {
}
public void preHandler(Configuration jobConfiguration){
}
public void postHandler(Configuration jobConfiguration){
}
public List getReaderPluginSplitConf(){
return this.readerPluginSplitConf;
}
public void setReaderPluginSplitConf(List readerPluginSplitConf){
this.readerPluginSplitConf = readerPluginSplitConf;
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/AbstractTaskPlugin.java
================================================
package com.alibaba.datax.common.plugin;
/**
* Created by jingxing on 14-8-24.
*/
public abstract class AbstractTaskPlugin extends AbstractPlugin {
//TaskPlugin 应该具备taskId
private int taskGroupId;
private int taskId;
private TaskPluginCollector taskPluginCollector;
public TaskPluginCollector getTaskPluginCollector() {
return taskPluginCollector;
}
public void setTaskPluginCollector(
TaskPluginCollector taskPluginCollector) {
this.taskPluginCollector = taskPluginCollector;
}
public int getTaskId() {
return taskId;
}
public void setTaskId(int taskId) {
this.taskId = taskId;
}
public int getTaskGroupId() {
return taskGroupId;
}
public void setTaskGroupId(int taskGroupId) {
this.taskGroupId = taskGroupId;
}
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/JobPluginCollector.java
================================================
package com.alibaba.datax.common.plugin;
import java.util.List;
import java.util.Map;
/**
* Created by jingxing on 14-9-9.
*/
public interface JobPluginCollector extends PluginCollector {
/**
* 从Task获取自定义收集信息
*
* */
Map> getMessage();
/**
* 从Task获取自定义收集信息
*
* */
List getMessage(String key);
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/PluginCollector.java
================================================
package com.alibaba.datax.common.plugin;
/**
* 这里只是一个标示类
* */
public interface PluginCollector {
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/Pluginable.java
================================================
package com.alibaba.datax.common.plugin;
import com.alibaba.datax.common.util.Configuration;
public interface Pluginable {
String getDeveloper();
String getDescription();
void setPluginConf(Configuration pluginConf);
void init();
void destroy();
String getPluginName();
Configuration getPluginJobConf();
Configuration getPeerPluginJobConf();
public String getPeerPluginName();
void setPluginJobConf(Configuration jobConf);
void setPeerPluginJobConf(Configuration peerPluginJobConf);
public void setPeerPluginName(String peerPluginName);
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/RecordReceiver.java
================================================
/**
* (C) 2010-2013 Alibaba Group Holding Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.datax.common.plugin;
import com.alibaba.datax.common.element.Record;
public interface RecordReceiver {
public Record getFromReader();
public void shutdown();
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/RecordSender.java
================================================
/**
* (C) 2010-2013 Alibaba Group Holding Limited.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.alibaba.datax.common.plugin;
import com.alibaba.datax.common.element.Record;
public interface RecordSender {
public Record createRecord();
public void sendToWriter(Record record);
public void flush();
public void terminate();
public void shutdown();
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/plugin/TaskPluginCollector.java
================================================
package com.alibaba.datax.common.plugin;
import com.alibaba.datax.common.element.Record;
/**
*
* 该接口提供给Task Plugin用来记录脏数据和自定义信息。
*
* 1. 脏数据记录,TaskPluginCollector提供多种脏数据记录的适配,包括本地输出、集中式汇报等等
* 2. 自定义信息,所有的task插件运行过程中可以通过TaskPluginCollector收集信息,
* Job的插件在POST过程中通过getMessage()接口获取信息
*/
public abstract class TaskPluginCollector implements PluginCollector {
/**
* 收集脏数据
*
* @param dirtyRecord
* 脏数据信息
* @param t
* 异常信息
* @param errorMessage
* 错误的提示信息
*/
public abstract void collectDirtyRecord(final Record dirtyRecord,
final Throwable t, final String errorMessage);
/**
* 收集脏数据
*
* @param dirtyRecord
* 脏数据信息
* @param errorMessage
* 错误的提示信息
*/
public void collectDirtyRecord(final Record dirtyRecord,
final String errorMessage) {
this.collectDirtyRecord(dirtyRecord, null, errorMessage);
}
/**
* 收集脏数据
*
* @param dirtyRecord
* 脏数据信息
* @param t
* 异常信息
*/
public void collectDirtyRecord(final Record dirtyRecord, final Throwable t) {
this.collectDirtyRecord(dirtyRecord, t, "");
}
/**
* 收集自定义信息,Job插件可以通过getMessage获取该信息
* 如果多个key冲突,内部使用List记录同一个key,多个value情况。
* */
public abstract void collectMessage(final String key, final String value);
}
================================================
FILE: common/src/main/java/com/alibaba/datax/common/spi/ErrorCode.java
================================================
package com.alibaba.datax.common.spi;
/**
* 尤其注意:最好提供toString()实现。例如:
*
*