Repository: caiiiycuk/postgresql-to-sqlite Branch: master Commit: 33162203dba8 Files: 31 Total size: 46.2 KB Directory structure: gitextract_jwx89zhw/ ├── .github/ │ └── workflows/ │ └── build.yml ├── .gitignore ├── .travis.yml ├── Dockerfile ├── LICENSE ├── README.md ├── project/ │ ├── Build.scala │ ├── build.properties │ └── plugins.sbt ├── scalastyle-config.xml └── src/ ├── main/ │ └── scala/ │ └── com/ │ └── github/ │ └── caiiiycuk/ │ └── pg2sqlite/ │ ├── Boot.scala │ ├── Config.scala │ ├── Connection.scala │ ├── DumpInserter.scala │ ├── Log.scala │ ├── LoggedIterator.scala │ ├── command/ │ │ ├── Command.scala │ │ ├── CommandException.scala │ │ ├── Copy.scala │ │ ├── CreateIndex.scala │ │ └── CreateTable.scala │ ├── dsl/ │ │ └── DSL.scala │ ├── iterator/ │ │ ├── Line.scala │ │ └── LineIterator.scala │ ├── schema/ │ │ ├── Column.scala │ │ └── Schema.scala │ └── values/ │ ├── LineToValues.scala │ ├── Value.scala │ └── ValueParseException.scala └── test/ └── scala/ └── com/ └── github/ └── caiiiycuk/ └── pg2sqlite/ └── dsl/ ├── DSLTest.scala └── DumperTest.scala ================================================ FILE CONTENTS ================================================ ================================================ FILE: .github/workflows/build.yml ================================================ # This workflow uses actions that are not certified by GitHub. # They are provided by a third-party and are governed by # separate terms of service, privacy policy, and support # documentation. name: Build on: push: branches: [ "master" ] pull_request: branches: [ "master" ] permissions: contents: read jobs: build: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - name: Set up JDK 8 uses: actions/setup-java@v3 with: java-version: '8' distribution: 'temurin' cache: 'sbt' - name: Run tests run: sbt test - name: Build one-jar run: sbt one-jar - name: Upload a Build Artifact uses: actions/upload-artifact@v3.1.2 with: name: postgresql-to-sqlite path: target/scala-2.11/postgresql-to-sqlite*.jar ================================================ FILE: .gitignore ================================================ *.class *.log # sbt specific .cache .history .lib/ dist/* target/ lib_managed/ src_managed/ project/boot/ project/plugins/project/ # Scala-IDE specific .scala_dependencies .worksheet /bin/ ================================================ FILE: .travis.yml ================================================ language: scala scala: - 2.11.12 branches: only: - master jdk: - openjdk9 script: - sbt ++$TRAVIS_SCALA_VERSION test - sbt ++$TRAVIS_SCALA_VERSION one-jar ================================================ FILE: Dockerfile ================================================ FROM hseeberger/scala-sbt:8u222_1.3.5_2.13.1 ENV psource=database.dump ENV starget=sqllight.db RUN mkdir -p /p2s WORKDIR /p2s COPY . ./ RUN sbt one-jar RUN cp target/scala-2.11/postgresql-to-sqlite_2.11-*-one-jar.jar pg2sqlite.jar CMD exec java -jar pg2sqlite.jar -d "$psource" -o "$starget" ================================================ FILE: LICENSE ================================================ The MIT License (MIT) Copyright (c) 2015 Aleksander Guryanov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ================================================ FILE: README.md ================================================ # postgresql-to-sqlite (pg2sqlite) [![Build](https://github.com/caiiiycuk/postgresql-to-sqlite/actions/workflows/build.yml/badge.svg)](https://github.com/caiiiycuk/postgresql-to-sqlite/actions/workflows/build.yml) Easy to use solution to create sqlite database from postgresql dump. * default [`pg_dump`](http://www.postgresql.org/docs/9.4/static/app-pgdump.html) script format * as fast as possible * silently ignore unsupported postgresql features * gzip support ## Installing In [release section](https://github.com/caiiiycuk/postgresql-to-sqlite/releases/) you can download pre-built version of pg2sqlite.jar ## How to use 1. Install jre (java) on your PC 2. Create dump from postgresql database ```sh pg_dump -h host -U user -f database.dump database ``` 3. Make sqlite database from it ``` java -jar pg2sqlite-1.0.3.jar -d database.dump -o sqlite.db ``` ## Command line arguments `pg2sqlite -d -o [-f ]` * **-d** `` - file that contains dump of postgresql database (made by pg_dump, accepts .gz) * **-o** `` - file name of newly created sqlite3 database * **-f** `` - default: false, force database re-creation if database file alredy exists * **-t** `` - default: integer, change sqlite3 date class (read below) ## Timestamps SQLite does not have a storage class set aside for storing dates and/or times. Instead, the built-in [Date And Time Functions](https://www.sqlite.org/lang_datefunc.html) of SQLite are capable of storing dates and times as TEXT, REAL, or INTEGER values: * TEXT as ISO8601 strings ("YYYY-MM-DD HH:MM:SS.SSS"). * REAL as Julian day numbers, the number of days since noon in Greenwich on November 24, 4714 B.C. according to the proleptic Gregorian calendar. * INTEGER as Unix Time, the number of seconds since 1970-01-01 00:00:00 UTC. By default pg2sqlite uses **INTEGER** to store dates, but you can change this with **-t** argument (`-t text` or `-t real`), use it like this: ```sh java -jar pg2sqlite-1.0.3.jar -d database.dump -o sqlite.db -t text ``` ## Tips pg2sqlite does not support database schemas. If your dump file includes schema definition It will print errors like this: ``` Create Table - Exception: unknown database [SQL] 'CREATE TABLE .table (...;' ``` You can easily fix dump file with `sed`: ```sh # sed 's/\.//' -i database.dump sed 's/public\.//' -i database.dump pg2sqlite -d output.dump -o sqlite.db ``` Where `public` is a schema name. ## How to build ```sh git clone https://github.com/caiiiycuk/postgresql-to-sqlite.git cd postgresql-to-sqlite sbt one-jar cp target/scala-2.11/postgresql-to-sqlite_2.11-0.0.1-SNAPSHOT-one-jar.jar pg2sqlite.jar ``` ## Docker Clone the repository and run ``` docker build -t postgresql-to-sqlite:latest . ``` inside the postgresql-to-sqlite folder. Use ``` docker run -v /home/john/dbdata:/dbdata -e psource='/dbdata/pqdump.sql' -e starget='/dbdata/output.sqlite' -it postgresql-to-sqlite:latest ``` where - -v: is the volume where the pqdump file is located. (and later the output file) - -e: `psource` is the pqdump filename and folder & `starget` the sqlite filename and folder p.s. the schema removal has to be done outside the container ## Support If you appreciate this project, please consider voting for it on Stack Overflow: https://stackoverflow.com/questions/6148421/how-to-convert-a-postgres-database-to-sqlite/69293251#69293251 ================================================ FILE: project/Build.scala ================================================ import com.github.retronym.SbtOneJar import sbt._ import Keys._ object Build extends Build { lazy val project = Project("root", file("."), settings = Seq( name := "postgresql-to-sqlite", organization := "com.github.caiiiycuk", version := "1.1.1", scalaVersion := "2.11.12", libraryDependencies ++= Seq( "com.github.scopt" %% "scopt" % "3.3.0", "ch.qos.logback" % "logback-classic" % "1.1.2", "org.xerial" % "sqlite-jdbc" % "3.42.0.0", "org.scalatest" %% "scalatest" % "2.2.4" % "test" ) ) ++ SbtOneJar.oneJarSettings) } ================================================ FILE: project/build.properties ================================================ sbt.version=0.13.18 ================================================ FILE: project/plugins.sbt ================================================ addSbtPlugin("com.typesafe.sbteclipse" % "sbteclipse-plugin" % "4.0.0") addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "0.7.0") addSbtPlugin("org.scala-sbt.plugins" % "sbt-onejar" % "0.8") ================================================ FILE: scalastyle-config.xml ================================================ Scalastyle standard configuration ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/Boot.scala ================================================ package com.github.caiiiycuk.pg2sqlite import com.github.caiiiycuk.pg2sqlite.command.CommandException import com.github.caiiiycuk.pg2sqlite.iterator.LineIterator import com.github.caiiiycuk.pg2sqlite.values.ValueParseException import ch.qos.logback.classic.Level object Boot extends App with Log { val root = org.slf4j.LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME).asInstanceOf[ch.qos.logback.classic.Logger] root.setLevel(Level.INFO) val config = Config.parse(args) import config._ val size = pgdump.length() val connection = Connection.sqlite(sqlite, config.dateClass) val iterator = LineIterator(pgdump) val loggedIterator = LoggedIterator(iterator, () => 100.0 * iterator.readed / size) val dumpInserter = new DumpInserter(connection) log.info(s"'$pgdump' (${toMb(size)} Mb) -> '$sqlite'") val success = try { dumpInserter.insert(loggedIterator) true } catch { case e: CommandException => log.error(e.getMessage) false case e: ValueParseException => log.error(e.getMessage) false case e: Throwable => log.error(e.getMessage, e) false } iterator.close connection.close if (success) { log.info("Well done...") } else { log.error("Task failed...") } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/Config.scala ================================================ package com.github.caiiiycuk.pg2sqlite import java.io.File case class Config(pgdump: File = new File("dump"), sqlite: File = new File("db"), force: Boolean = false, dateClass: String = Connection.DEFAULT_DATE_CLASS) object Config extends Log { private val parser = new scopt.OptionParser[Config]("postgresql-to-sqlite") { head("postgresql-to-sqlite") opt[File]('d', "dump") required() valueName ("") action { (v, c) => c.copy(pgdump = v) } text ("postgresql dump generated by pg_dump") opt[File]('o', "out") required() valueName ("") action { (v, c) => c.copy(sqlite = v) } text ("sqlite3 database to create") opt[Boolean]('f', "force") optional() valueName ("") action { (v, c) => c.copy(force = v) } text ("recreate database if exists") opt[String]('t', "timestamps") optional() valueName ("") action { (v, c) => val dc = v.toUpperCase() if (dc.equals(Connection.TEXT_DATE_CLASS) || dc.equals(Connection.REAL_DATE_CLASS)) { c.copy(dateClass = dc) } else { c } } text ("Change sqlite3 date class (default: INTEGER)") checkConfig { c => import c._ if (!pgdump.exists()) { failure(s"Dump '${pgdump}' does not exists") } else if (sqlite.exists()) { if (force) { sqlite.delete() success } else { failure(s"Database '${sqlite}' already exists") } } else { success } } } def parse(args: Array[String]) = { parser.parse(args, Config()) match { case Some(config) => Option(System.getenv("SQLITE_TMPDIR")) match { case None => log.warn("You should set SQLITE_TMPDIR environment variable to control where sqlite stores temp files") case _ => } config case _ => System.exit(1) ??? } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/Connection.scala ================================================ package com.github.caiiiycuk.pg2sqlite import org.sqlite.SQLiteConfig import java.sql.DriverManager import java.sql.Statement import java.sql.PreparedStatement import scala.collection.mutable.ListBuffer import java.sql.ResultSet import scala.annotation.tailrec import java.io.File import java.util.Properties trait ConnectionHolder { def makeConnection: java.sql.Connection def db: String } object Connection { final val DEFAULT_DATE_CLASS = "INTEGER" final val TEXT_DATE_CLASS = "TEXT" final val REAL_DATE_CLASS = "REAL" private final val DATE_CLASS_PRAGMA = "date_class" private final val FETCH_SIZE = 8192 private final val MAX_VARIABLE_NUMBER = 999 def sqlite(dbFile: File, dateClass: String = DEFAULT_DATE_CLASS): Connection = { val connectionHolder = new ConnectionHolder { override def makeConnection: java.sql.Connection = { val properties = new Properties() properties.setProperty(DATE_CLASS_PRAGMA, dateClass) implicit val connection = DriverManager.getConnection(s"jdbc:sqlite:$dbFile", properties) connection.setAutoCommit(true) sqlitePragmas() connection.setAutoCommit(false) connection } override def db = dbFile.toString } new Connection(connectionHolder) } private def sqlitePragmas()(implicit connection: java.sql.Connection) = { assert(SQLiteConfig.Pragma.DATE_CLASS.pragmaName.equals(DATE_CLASS_PRAGMA)); val statement = connection.createStatement() statement.executeUpdate(s"PRAGMA ${SQLiteConfig.Pragma.SYNCHRONOUS.pragmaName} = OFF") statement.executeUpdate(s"PRAGMA ${SQLiteConfig.Pragma.JOURNAL_MODE.pragmaName} = OFF") statement.executeUpdate(s"PRAGMA ${SQLiteConfig.Pragma.LIMIT_WORKER_THREADS.pragmaName} = 64") statement.executeUpdate(s"PRAGMA ${SQLiteConfig.Pragma.MAX_PAGE_COUNT.pragmaName} = 2147483646") statement.executeUpdate(s"PRAGMA ${SQLiteConfig.Pragma.CACHE_SIZE.pragmaName} = 65536") statement.executeUpdate("PRAGMA cache_spill = true") statement.close } } class Connection(connectionHolder: ConnectionHolder) { import Connection._ final val MAX_VARIABLE_NUMBER = Connection.MAX_VARIABLE_NUMBER lazy val connection = connectionHolder.makeConnection lazy val db = connectionHolder.db def withStatement[T](block: (Statement) => T): T = { val statement = connection.createStatement() val t = block(statement) statement.close t } def withPreparedStatement[T](sql: String, keepAlive: Boolean = false)(block: (PreparedStatement) => T): T = { val statement = connection.prepareStatement(sql) statement.setFetchSize(FETCH_SIZE) val t = block(statement) if (!keepAlive) statement.close t } def close = { connection.commit connection.close } def execute(sql: String) = { withStatement { statement => statement.executeUpdate(sql) } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/DumpInserter.scala ================================================ package com.github.caiiiycuk.pg2sqlite import scala.annotation.tailrec import com.github.caiiiycuk.pg2sqlite.command._ import com.github.caiiiycuk.pg2sqlite.iterator.Line import com.github.caiiiycuk.pg2sqlite.schema.Schema object DumpInserter { val COMMANDS = List(CreateTable, Copy, CreateIndex) } class DumpInserter(connection: Connection) { import DumpInserter._ implicit val schema = new Schema() @tailrec final def insert(iterator: Iterator[Line]): Unit = { if (iterator.hasNext) { val head = iterator.next() val fullIterator = Iterator(head) ++ iterator COMMANDS.find(_.matchHead(head)).foreach { command => command.apply(connection, fullIterator) } insert(iterator) } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/Log.scala ================================================ package com.github.caiiiycuk.pg2sqlite import org.slf4j.LoggerFactory trait Log { protected lazy val log = LoggerFactory.getLogger(getClass) def toMb(length: Long) = { length / 1024 / 1024 } def humanizeMsTime(time: Long) = { val ms = time % 1000 val s = time / 1000 % 60 val m = time / 1000 / 60 s"${m}m ${s}s ${ms}ms" } def humanizeElapsedAndRemaning(startAt: Long, progress: Double): String = { val elapsed = System.currentTimeMillis - startAt val remaining = (elapsed / progress - elapsed).toInt s"elapsed: ${humanizeMsTime(elapsed)} / remaining: ${humanizeMsTime(remaining)}" } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/LoggedIterator.scala ================================================ package com.github.caiiiycuk.pg2sqlite object LoggedIterator { final val DEFAULT_SENSIVITY = 10 } case class LoggedIterator[T](iterator: Iterator[T], progress: () => Double, sensivity: Int = LoggedIterator.DEFAULT_SENSIVITY) extends Iterator[T] with Log { val startAt = System.currentTimeMillis var currentProgress: Long = 0L override def hasNext = iterator.hasNext override def next(): T = { val value = iterator.next val newProgress = progress() val intProgress = (newProgress * sensivity).toLong if (intProgress > currentProgress) { val elapsedAndRemaining = humanizeElapsedAndRemaning(startAt, newProgress / 100) log.info(s"Progress ${intProgress.toDouble / sensivity}%, ${elapsedAndRemaining}...\t") currentProgress = intProgress } value } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/command/Command.scala ================================================ package com.github.caiiiycuk.pg2sqlite.command import scala.util.matching.Regex import scala.annotation.tailrec import com.github.caiiiycuk.pg2sqlite.Connection import com.github.caiiiycuk.pg2sqlite.iterator.Line import com.github.caiiiycuk.pg2sqlite.schema.Schema trait Command { def matchHead(head: Line): Boolean = matchHead(head.text) def matchHead(head: String): Boolean def apply(connection: Connection, iterator: Iterator[Line])(implicit schema: Schema) @tailrec final protected def takeUntil(iterator: Iterator[Line], when: (String) => Boolean, buffer: List[Line] = Nil): List[Line] = { if (!iterator.hasNext) { buffer.reverse } else { val line = iterator.next val newBuffer = line :: buffer if (when(line.text)) { newBuffer.reverse } else { takeUntil(iterator, when, newBuffer) } } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/command/CommandException.scala ================================================ package com.github.caiiiycuk.pg2sqlite.command import com.github.caiiiycuk.pg2sqlite.iterator.Line case class CommandException(command: String, cause: Throwable, context: List[String]) extends Exception(s""" $command - Exception: \t${cause.getMessage} \t${context.mkString("\n\t")}, """, cause) object CommandException { def apply(command: String, cause: Throwable, sql: String, rows: List[Line], context: List[String] = Nil): CommandException = { val default = List(s"[SQL] '$sql'", s"[LINE #${rows.head.num}] ${rows.mkString(" ")}") CommandException(command, cause, default ++ context) } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/command/Copy.scala ================================================ package com.github.caiiiycuk.pg2sqlite.command import com.github.caiiiycuk.pg2sqlite.Connection import com.github.caiiiycuk.pg2sqlite.Log import com.github.caiiiycuk.pg2sqlite.values.LineToValues import java.sql.SQLException import com.github.caiiiycuk.pg2sqlite.iterator.Line import com.github.caiiiycuk.pg2sqlite.schema.Schema import com.github.caiiiycuk.pg2sqlite.values.ValueParseException import com.github.caiiiycuk.pg2sqlite.dsl.DSL._ object Copy extends Command with Log { import LineToValues._ private val TABLE_NAME_POSITION = 1 private val activator = "^(?i)copy".r override def matchHead(head: String): Boolean = { activator.findFirstIn(head).isDefined } override def apply(connection: Connection, iterator: Iterator[Line])(implicit schema: Schema) = { val rows = takeUntil(iterator, _.contains(";")) val rawSql = rows.mkString(" ") val (tableName, sql, columnTypes) = try { val tableName = rawSql.tokens(TABLE_NAME_POSITION) val columns = rawSql.takeBraces.head.columns.map(_.name).toList val marks = ("?," * columns.size).dropRight(1) val sql = s"insert into $tableName(${columns.map(column => s"[$column]").mkString(",")}) values($marks)" val columnTypes = schema.columnsToTypeConstants(tableName, columns) (tableName, sql, columnTypes) } catch { case t: Throwable => throw CommandException(s"COPY - Unable to find TABLE NAME or COLUMNS in '$rawSql'", t, rawSql, rows) } if (schema.shouldExcludeTable(tableName)) { log.info(s"Skipping '$sql'") } else { log.info(s"COPY table '$tableName'") connection.withPreparedStatement(sql) { statement => iterator.takeWhile(!_.startsWith("\\.")).foreach { row => val values = try { toValues(row.text)(columnTypes) } catch { case e: ValueParseException => throw CommandException("COPY", e, sql, rows, List(s"[DATA #${row.num}] '$row'", s"[COLUMN,TYPE] ${schema.columns(tableName).map(_.toString).mkString(" ")}")) } try { values.foreach(_.apply(statement)) statement.executeUpdate() } catch { case e: SQLException => val vals = values.map(_.toString).mkString(", ") throw CommandException("COPY", e, sql, rows, List(s"[DATA #${row.num}] '$row'", s"[VALUES] '$vals'")) } } } } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/command/CreateIndex.scala ================================================ package com.github.caiiiycuk.pg2sqlite.command import java.sql.SQLException import scala.annotation.tailrec import com.github.caiiiycuk.pg2sqlite.Connection import com.github.caiiiycuk.pg2sqlite.iterator.Line import com.github.caiiiycuk.pg2sqlite.schema.Schema import com.github.caiiiycuk.pg2sqlite.dsl.DSL._ import com.github.caiiiycuk.pg2sqlite.Log object CreateIndex extends Command with Log { private val INDEX_NAME_POSITION = 2 private val TABLE_NAME_POSITION = 0 private val activator = """^(?i)create\s+index""".r override def matchHead(head: String): Boolean = { activator.findFirstIn(head).isDefined } override def apply(connection: Connection, iterator: Iterator[Line])(implicit schema: Schema) = { val rows = takeUntil(iterator, _.contains(";")) val rawSql = rows.mkString(" ").toLowerCase val (tableName, sql, columns) = try { val createIndexParts = rawSql.split("""\s+on\s+""") val indexName = createIndexParts(0).tokens(INDEX_NAME_POSITION) val tableName = createIndexParts(1).tokens(TABLE_NAME_POSITION) val columns = rawSql.takeBraces.head.columns.map(column => s"[${column.name}]").mkString(",") (tableName, s"CREATE INDEX $indexName ON $tableName ($columns)", columns) } catch { case t: Throwable => throw CommandException(s"CREATE INDEX - Unable to find INDEX_NAME or TABLE NAME or COLUMNS in '$rawSql'", t, rawSql, rows) } if (schema.shouldExcludeTable(tableName) || columns.isEmpty) { log.info(s"Skipping '$sql'") } else { try { connection.execute(sql) } catch { case e: SQLException => throw CommandException("Create Index", e, sql, rows) } } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/command/CreateTable.scala ================================================ package com.github.caiiiycuk.pg2sqlite.command import java.sql.SQLException import scala.annotation.tailrec import com.github.caiiiycuk.pg2sqlite.Connection import com.github.caiiiycuk.pg2sqlite.iterator.Line import com.github.caiiiycuk.pg2sqlite.schema.Schema import com.github.caiiiycuk.pg2sqlite.Log import com.github.caiiiycuk.pg2sqlite.dsl.DSL._ object CreateTable extends Command with Log { private final val TABLE_NAME_POSITON = 2 private final val activator = """^(?i)create\s+table""".r override def matchHead(head: String): Boolean = { activator.findFirstIn(head).isDefined } override def apply(connection: Connection, iterator: Iterator[Line])(implicit schema: Schema) = { val rows = takeUntil(iterator, _.contains(";")) val rawSql = rows.mkString(" ") val (tableName, sql) = try { val table = rawSql.tokens(TABLE_NAME_POSITON) val columns = rawSql.takeBraces.head.columns columns.foreach { column => schema.addColumn(table, column) } (table, s"CREATE TABLE [$table] (${columns.map(column => s"[${column.name}]").mkString(", ")});") } catch { case t: Throwable => throw CommandException(s"CREATE TABLE - Unable to find TABLE NAME or COLUMNS in '$rawSql'", t, rawSql, rows) } if (schema.shouldExcludeTable(tableName)) { log.info(s"Skipping '$sql'") } else { try { connection.execute(sql) } catch { case e: SQLException => throw CommandException("Create Table", e, sql, rows) } } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/dsl/DSL.scala ================================================ package com.github.caiiiycuk.pg2sqlite.dsl import scala.annotation.tailrec import com.github.caiiiycuk.pg2sqlite.schema.Column class DSL(line: String) { import DSL._ def dropBraces: String = dropBraces(line.toIterator) def takeBraces: List[String] = { takeBraces(line.toIterator) } def commaSplitRespectBraces: List[String] = { commaSplitRespectBraces(line.toIterator) } def tokens: List[String] = { line.replaceAll("\"|'","").split("""\s|:|,|\(|\)""").map(_.trim).filterNot(_.isEmpty).toList } def columns: List[Column] = { val columns = commaSplitRespectBraces(line.toIterator).map(_.trim).filterNot(_.isEmpty) columns.map(_.replaceAll("\"|'", "")).flatMap { columnDefenition => val partials = columnDefenition.split("""\s""") .map(_.trim.toLowerCase).filterNot(_.isEmpty).toList partials match { case head :: _ if head.startsWith("constraint") => None case head :: _ if head.startsWith("to_tsvector(") => val name = columnDefenition.takeBraces.head.tokens.last Some(Column(name, None)) case head :: _ if head.startsWith("lower(") || head.startsWith("upper(") => val name = columnDefenition.takeBraces.head.tokens.head Some(Column(name, None)) case head :: sqlType :: _ => Some(Column(head, Some(sqlType))) case head :: Nil => Some(Column(head, None)) case _ => None } } } @tailrec private def takeBraces(line: Iterator[Char], nesting: Int = 0, acc: String = "", buff: List[String] = Nil): List[String] = if (line.hasNext) { val head = line.next val newAcc = if (nesting > 1 || (nesting > 0 && head != ')')) { acc + head } else { acc } if (head == '(') { takeBraces(line, nesting + 1, newAcc, buff) } else if (head == ')' && nesting == 1) { takeBraces(line, nesting - 1, "", newAcc :: buff) } else if (head == ')') { takeBraces(line, nesting - 1, newAcc, buff) } else { takeBraces(line, nesting, newAcc, buff) } } else if (acc.nonEmpty) { (acc :: buff).reverse } else { buff.reverse } @tailrec private def dropBraces(line: Iterator[Char], nesting: Int = 0, buff: String = ""): String = if (line.hasNext) { val head = line.next if (head == '(') { dropBraces(line, nesting + 1, buff) } else if (head == ')') { dropBraces(line, nesting - 1, buff) } else if (nesting == 0) { dropBraces(line, nesting, buff + head) } else { dropBraces(line, nesting, buff) } } else { buff } @tailrec private def commaSplitRespectBraces(line: Iterator[Char], nesting: Int = 0, acc: String = "", buff: List[String] = Nil): List[String] = if (line.hasNext) { val head = line.next if (head == '(') { commaSplitRespectBraces(line, nesting + 1, acc + head, buff) } else if (head == ')') { commaSplitRespectBraces(line, nesting - 1, acc + head, buff) } else if (head == ',' && nesting == 0) { commaSplitRespectBraces(line, nesting, "", acc :: buff) } else { commaSplitRespectBraces(line, nesting, acc + head, buff) } } else if (acc.nonEmpty) { (acc :: buff).reverse } else { buff.reverse } } object DSL { implicit def toDSLClass(line: String): DSL = { new DSL(line) } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/iterator/Line.scala ================================================ package com.github.caiiiycuk.pg2sqlite.iterator case class Line(num: Int, text: String) { def startsWith(value: String) = text.startsWith(value) override def toString(): String = text } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/iterator/LineIterator.scala ================================================ package com.github.caiiiycuk.pg2sqlite.iterator import java.io.FileReader import java.io.BufferedReader import java.io.Closeable import java.io.File import scala.collection.TraversableOnce.flattenTraversableOnce trait LineIterator extends Iterator[Line] with Closeable { def readed: Long } class FileOptionStringIterator(file: File) extends Iterator[Option[String]] with Closeable { var readed = 0L private val reader = new FileReader(file) { override def read(buf: Array[Char], off: Int, len: Int) = { val count = super.read(buf, off, len) readed += count count } } private val bufferedReader = new BufferedReader(reader) private var current = Option(bufferedReader.readLine()) override def hasNext: Boolean = { current.nonEmpty } override def next(): Option[String] = { val value = current current = Option(bufferedReader.readLine()) value } override def close(): Unit = { bufferedReader.close } } object LineIterator { def apply(file: File) = { val iterator = new FileOptionStringIterator(file) val flatIterator = iterator.flatten.zipWithIndex.map { case (text, index) => Line(index + 1, text) } new LineIterator { override def hasNext: Boolean = flatIterator.hasNext override def next(): Line = flatIterator.next() override def close = iterator.close override def readed: Long = iterator.readed } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/schema/Column.scala ================================================ package com.github.caiiiycuk.pg2sqlite.schema import java.sql.Types object Column { val TYPE_DETECTORS = List( ("""boolean""".r -> Types.BOOLEAN), ("""int""".r -> Types.BIGINT), ("""float""".r -> Types.DOUBLE), ("""numeric""".r -> Types.NUMERIC), ("""bytea""".r -> Types.BLOB), ("""geometry""".r -> Types.BLOB), ("""timestamp""".r -> Types.TIMESTAMP), ("""time""".r -> Types.TIME), ("""date""".r -> Types.DATE), ("""char""".r -> Types.VARCHAR), ("""text""".r -> Types.VARCHAR)) } case class Column(name: String, sqlType: Option[String]) { import Column._ lazy val typeConstant = sqlType.map { sqlType => val nativeType = TYPE_DETECTORS.find { case (regex, _) => regex.findFirstIn(sqlType).isDefined } nativeType.map(_._2) }.flatten } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/schema/Schema.scala ================================================ package com.github.caiiiycuk.pg2sqlite.schema import java.sql.Types import scala.collection.mutable.Map class Schema(excludeTables: Set[String] = Set("sqlite_stat")) { protected case class Table(columns: Map[String, Column] = Map.empty) val tables: Map[String, Table] = Map.empty def addColumn(tableName: String, column: Column) = { val loweredTableName = tableName.toLowerCase val table = tables.get(loweredTableName).getOrElse { val table = Table() tables += ((loweredTableName, table)) table } table.columns += ((column.name.toLowerCase, column)) } def columns(tableName: String) = { tables.get(tableName.toLowerCase).map(_.columns).getOrElse(Map.empty) } def columnsToTypeConstants(tableName: String, columns: List[String]): scala.collection.immutable.Map[Int, Int] = { tables.get(tableName.toLowerCase).map { table => columns.zipWithIndex.flatMap { case (column, index) => table.columns.get(column).flatMap { column => column.typeConstant.map((index + 1, _)) } }.toMap }.getOrElse(scala.collection.immutable.Map.empty) } def shouldExcludeTable(table: String) = { excludeTables.contains(table.toLowerCase) } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/values/LineToValues.scala ================================================ package com.github.caiiiycuk.pg2sqlite.values import java.sql.Types import java.util.Formatter.DateTime import java.text.SimpleDateFormat import java.util.Date object LineToValues { val DOUBLE = """^\d+\.\d+$""".r val INTEGER = """^\d+$""".r val SIMPLE_TIMESTAMP_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss") val SIMPLE_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd") val SIMPLE_TIME_FORMAT = new SimpleDateFormat("HH:mm:ss") val FORMATTER = Map(Types.DATE -> SIMPLE_DATE_FORMAT, Types.TIME -> SIMPLE_TIME_FORMAT, Types.TIMESTAMP -> SIMPLE_TIMESTAMP_FORMAT) val NO_HEX_DIGITS = """[^0-9A-Fa-f]""".r def toValues(line: String)(implicit indexToType: Map[Int, Int]): List[Value] = { val parts = line.split("\t").map(_.trim) parts.zipWithIndex.map { case (value, index) => toValue(index + 1, value) }.toList } def toValue(index: Int, value: String)(implicit indexToType: Map[Int, Int]) = { if (value == """\N""") { NullValue(index, indexToType.get(index)) } else { indexToType.get(index).map { sqlType => toValueWithKnownType(index, value, sqlType) }.getOrElse { value match { case DOUBLE(_*) => toDoubleWithStringFallback(index, value) case INTEGER(_*) => toIntegerWithDoubleFallback(index, value) case _ => StringValue(index, value) } } } } def toValueWithKnownType(index: Int, value: String, sqlType: Int) = { sqlType match { case Types.BIGINT => toIntegerWithDoubleFallback(index, value) case Types.DOUBLE | Types.NUMERIC => toDoubleWithStringFallback(index, value) case Types.VARCHAR => StringValue(index, value) case Types.BOOLEAN => BooleanValue(index, value.toLowerCase != "f") case Types.TIMESTAMP | Types.TIME | Types.DATE => val date = toDate(value, sqlType).getOrElse { throw new ValueParseException(s"[COLUMN#${index}] Doesn`t know how to convert string '$value', to timestamp") } DateValue(index, date, sqlType) case Types.BLOB => BlobValue(index, hex2bytes(value)) case _ => throw new ValueParseException(s"[COLUMN#${index}] Doesn`t know how to convert string '$value', to sql type '$sqlType'") } } private def toDate(value: String, sqlType: Int): Option[Date] = { val formatter = FORMATTER(sqlType) try { Some(formatter.parse(value.take(formatter.toPattern().length))) } catch { case t: Throwable => None } } private def toIntegerWithDoubleFallback(index: Int, value: String) = { try { IntegerValue(index, value.toLong) } catch { case e: NumberFormatException => toDoubleWithStringFallback(index, value) } } private def toDoubleWithStringFallback(index: Int, value: String) = { try { RealValue(index, value.toDouble) } catch { case e: NumberFormatException => StringValue(index, value) } } private def hex2bytes(value: String): Array[Byte] = { if (value.length % 2 != 0 || NO_HEX_DIGITS.findFirstIn(value).isDefined) { value.getBytes } else { javax.xml.bind.DatatypeConverter.parseHexBinary(value) } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/values/Value.scala ================================================ package com.github.caiiiycuk.pg2sqlite.values import java.sql.PreparedStatement import java.sql.Types import java.util.Date abstract class Value(index: Int) { def apply(statement: PreparedStatement) } case class NullValue(index: Int, sqlType: Option[Int]) extends Value(index) { def apply(statement: PreparedStatement) { statement.setNull(index, sqlType.getOrElse(Types.BIGINT)) } } case class BooleanValue(index: Int, value: Boolean) extends Value(index) { def apply(statement: PreparedStatement) { statement.setBoolean(index, value) } } case class RealValue(index: Int, value: Double) extends Value(index) { def apply(statement: PreparedStatement) { statement.setDouble(index, value) } } case class IntegerValue(index: Int, value: Long) extends Value(index) { def apply(statement: PreparedStatement) { statement.setLong(index, value) } } case class StringValue(index: Int, value: String) extends Value(index) { def apply(statement: PreparedStatement) { statement.setString(index, value) } } case class BlobValue(index: Int, value: Array[Byte]) extends Value(index) { def apply(statement: PreparedStatement) { statement.setBytes(index, value) } } case class DateValue(index: Int, value: Date, dateType: Int) extends Value(index) { def apply(statement: PreparedStatement) { dateType match { case Types.DATE => statement.setDate(index, new java.sql.Date(value.getTime)) case Types.TIME => statement.setTime(index, new java.sql.Time(value.getTime)) case _ => statement.setTimestamp(index, new java.sql.Timestamp(value.getTime)) } } } ================================================ FILE: src/main/scala/com/github/caiiiycuk/pg2sqlite/values/ValueParseException.scala ================================================ package com.github.caiiiycuk.pg2sqlite.values class ValueParseException(message: String) extends Exception(message) ================================================ FILE: src/test/scala/com/github/caiiiycuk/pg2sqlite/dsl/DSLTest.scala ================================================ package com.github.caiiiycuk.pg2sqlite.dsl import org.scalatest.FlatSpec import org.scalatest.Matchers import com.github.caiiiycuk.pg2sqlite.dsl.DSL._ import com.github.caiiiycuk.pg2sqlite.schema.Column class DslTest extends FlatSpec with Matchers { "DSL" should "drop braces from line" in { val TEST_STRING = """ id bigint DEFAULT nextval('hibernate_sequence'::regclass) NOT NULL, url text, ident character varying(20) DEFAULT "substring"(upper(md5((((999999999)::double precision * random()))::text)), 1, 8) NOT NULL, created_at timestamp without time zone DEFAULT now() """ TEST_STRING.dropBraces should equal(""" id bigint DEFAULT nextval NOT NULL, url text, ident character varying DEFAULT "substring" NOT NULL, created_at timestamp without time zone DEFAULT now """) } "DSL" should "take columns parts" in { val TEST_STRING = """ insert into some(a, b, c) values ("a", 2, true); """ TEST_STRING.takeBraces should equal(List( "a, b, c", """"a", 2, true""")) } "DSL" should "extract tokens" in { val TEST_STRING = """ insert(strange text) into(some buffer) table """ TEST_STRING.tokens should equal(List("insert", "strange", "text", "into", "some", "buffer", "table")) } "DSL" should "extract columns with type" in { val COLUMNS = """ id bigint DEFAULT nextval('hibernate_sequence'::regclass) NOT NULL, url text, ident character varying(20) DEFAULT "substring"(upper(md5((((999999999)::double precision * random()))::text)), 1, 8) NOT NULL, created_at timestamp without time zone DEFAULT now() """ COLUMNS.columns should equal( List(Column("id", Some("bigint")), Column("url", Some("text")), Column("ident", Some("character")), Column("created_at", Some("timestamp")))) } "DSL" should "exclude keywords (CONSTRAINTS, etc.) from columns list" in { val COLUMNS = """ id integer DEFAULT nextval('hibernate_sequence'::regclass) NOT NULL, location geometry, owner_geoobject_id bigint, CONSTRAINT enforce_dims_location CHECK ((st_ndims(location) = 2)), CONSTRAINT enforce_geotype_location CHECK (((geometrytype(location) = 'POLYGON'::text) OR (location IS NULL))), CONSTRAINT enforce_srid_location CHECK ((st_srid(location) = 3395)) """ COLUMNS.columns should equal( List(Column("id", Some("integer")), Column("location", Some("geometry")), Column("owner_geoobject_id", Some("bigint")))) } "DSL" should "get column name from to_tsvector function call" in { val COLUMNS = "to_tsvector('libstemmer_serb_lat_no_diacrit'::regconfig, content)" COLUMNS.columns should equal( List(Column("content", None))) } "DSL" should "get column name from lower/upper function call" in { val COLUMNS = "lower((email)::text),upper((email_up)::text)" COLUMNS.columns should equal( List(Column("email", None), Column("email_up", None))) } "DSL" should "split by comma respect braces" in { val TEST_STRING = """ id bigint DEFAULT nextval('hibernate_sequence'::regclass) NOT NULL, url text, ident character varying(20) DEFAULT "substring"(upper(md5((((999999999)::double precision * random()))::text)), 1, 8) NOT NULL, created_at timestamp without time zone DEFAULT now() """.replaceAll("\n", "") val parts = TEST_STRING.commaSplitRespectBraces parts.length should equal(4) parts(0) should equal("id bigint DEFAULT nextval('hibernate_sequence'::regclass) NOT NULL") parts(1) should equal("url text") parts(2) should equal("ident character varying(20) DEFAULT \"substring\"(upper(md5((((999999999)::double precision * random()))::text)), 1, 8) NOT NULL") parts(3) should equal("created_at timestamp without time zone DEFAULT now()") } } ================================================ FILE: src/test/scala/com/github/caiiiycuk/pg2sqlite/dsl/DumperTest.scala ================================================ package com.github.caiiiycuk.pg2sqlite.dsl import org.scalatest.FlatSpec import org.scalatest.Matchers import com.github.caiiiycuk.pg2sqlite.iterator.Line import com.github.caiiiycuk.pg2sqlite.{Connection, DumpInserter} import org.scalatest.{BeforeAndAfter, FlatSpec, Matchers} import java.io.File class DumperTest extends FlatSpec with Matchers with BeforeAndAfter { val dbFile = new File("test.db") private final val DATE_DUMP = """ |CREATE TABLE test ( | current timestamp without time zone NOT NULL |); | |COPY test (current) FROM stdin; |2024-05-06 15:14:12 |\. |""".stripMargin private def makeConnection(dateClass: String = Connection.DEFAULT_DATE_CLASS) = { if (dbFile.exists()) { dbFile.delete() } Connection.sqlite(dbFile, dateClass) } after { new File("test.db").delete() } "dumper" should "generate db from test-case of issue#11" in { val connection = makeConnection() val inserter = new DumpInserter(connection) val dump = """ |CREATE TYPE product_type AS ENUM ( | 'Material', | 'Digital' |); | |CREATE TABLE product ( | client_id integer NOT NULL, | order_product integer, | upper_price integer NOT NULL, | lower_price integer NOT NULL, | type product_type NOT NULL, | product_id integer NOT NULL--, | CONSTRAINT product_check CHECK (((lower_price > upper_price) AND (upper_price <= 200))), | CONSTRAINT product_order_product_check CHECK ((order_product > 0)), | CONSTRAINT product_upper_price_check CHECK ((upper_price >= 0)) |); |""".stripMargin .split("\n") .zipWithIndex .map { case (text, num) => Line(num, text) } inserter.insert(dump.iterator) connection.close } "dumper" should "should respect date class (Default)" in { val connection = makeConnection() val inserter = new DumpInserter(connection) val dump = DATE_DUMP.split("\n") .zipWithIndex .map { case (text, num) => Line(num, text) } inserter.insert(dump.iterator) connection.withStatement { statment => val rs = statment.executeQuery("SELECT * FROM test") rs.next() should equal(true) rs.getLong(1) > 0 should equal(true) rs.close() } connection.close } "dumper" should "should respect date class (text)" in { val connection = makeConnection(Connection.TEXT_DATE_CLASS) val inserter = new DumpInserter(connection) val dump = DATE_DUMP.split("\n") .zipWithIndex .map { case (text, num) => Line(num, text) } inserter.insert(dump.iterator) connection.withStatement { statment => val rs = statment.executeQuery("SELECT * FROM test") rs.next() should equal(true) rs.getString(1) should equal("2024-05-06 15:14:12.000") rs.close() } connection.close } "dumper" should "should respect date class (real)" in { val connection = makeConnection(Connection.REAL_DATE_CLASS) val inserter = new DumpInserter(connection) val dump = DATE_DUMP.split("\n") .zipWithIndex .map { case (text, num) => Line(num, text) } inserter.insert(dump.iterator) connection.withStatement { statment => val rs = statment.executeQuery("SELECT * FROM test") rs.next() should equal(true) rs.getDouble(1) > 0 should equal(true) rs.close() } connection.close } }