From 867b81725a3fddc53b184ea3bd8dc3e62ffd4c9b Mon Sep 17 00:00:00 2001 From: Alexey Kuzin <akudiyar@gmail.com> Date: Fri, 25 Oct 2024 04:26:14 +0200 Subject: [PATCH] Add workaround for creating test table using driver --- .../src/main/resources/docker-compose.yml | 6 ++-- .../src/main/resources/docker-compose.yaml | 6 ++-- picodata-jdbc-spark-example/README.md | 6 ++++ picodata-jdbc-spark-example/build.gradle | 22 +++++++----- picodata-jdbc-spark-example/gradlew | 0 .../src/main/resources/docker-compose.yaml | 9 +++-- .../main/resources}/log4j2.properties | 0 .../src/main/resources/logback.xml | 2 ++ .../src/main/resources/logging.properties | 2 ++ .../main/scala/PicodataJDBCSparkExample.scala | 34 ++++++++++++++++--- 10 files changed, 64 insertions(+), 23 deletions(-) mode change 100644 => 100755 picodata-jdbc-spark-example/gradlew rename picodata-jdbc-spark-example/{ => src/main/resources}/log4j2.properties (100%) create mode 100644 picodata-jdbc-spark-example/src/main/resources/logging.properties diff --git a/picodata-java-example/src/main/resources/docker-compose.yml b/picodata-java-example/src/main/resources/docker-compose.yml index c39cc4e..b83aef3 100644 --- a/picodata-java-example/src/main/resources/docker-compose.yml +++ b/picodata-java-example/src/main/resources/docker-compose.yml @@ -3,7 +3,7 @@ version: '3' services: picodata-1: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-1 hostname: picodata-1 environment: @@ -16,7 +16,7 @@ services: - "3301:3301" picodata-2: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-2 hostname: picodata-2 depends_on: @@ -32,7 +32,7 @@ services: picodata-3: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-3 hostname: picodata-3 depends_on: diff --git a/picodata-jdbc-example/src/main/resources/docker-compose.yaml b/picodata-jdbc-example/src/main/resources/docker-compose.yaml index dad0992..367a889 100644 --- a/picodata-jdbc-example/src/main/resources/docker-compose.yaml +++ b/picodata-jdbc-example/src/main/resources/docker-compose.yaml @@ -3,7 +3,7 @@ version: '3' services: picodata-1: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-1 hostname: picodata-1 environment: @@ -19,7 +19,7 @@ services: - "5432:5432" picodata-2: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-2 hostname: picodata-2 depends_on: @@ -35,7 +35,7 @@ services: picodata-3: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-3 hostname: picodata-3 depends_on: diff --git a/picodata-jdbc-spark-example/README.md b/picodata-jdbc-spark-example/README.md index 8d888d0..eee7995 100644 --- a/picodata-jdbc-spark-example/README.md +++ b/picodata-jdbc-spark-example/README.md @@ -41,6 +41,12 @@ docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nCREATE US 6. Execute the following command in the repository root directory: +```shell +$ sh ./gradlew build +``` + +7. Run the application: + ```shell $ sh ./gradlew run ``` diff --git a/picodata-jdbc-spark-example/build.gradle b/picodata-jdbc-spark-example/build.gradle index 6c55406..0f234cb 100644 --- a/picodata-jdbc-spark-example/build.gradle +++ b/picodata-jdbc-spark-example/build.gradle @@ -9,21 +9,25 @@ allprojects { repositories { mavenLocal() mavenCentral() - jcenter() maven { url = uri("https://binary.picodata.io/repository/maven-releases/") } } dependencies { - compile group: 'org.apache.spark', name: 'spark-core_2.13', version: '3.3.2' - compile group: 'org.apache.spark', name: 'spark-hive_2.13', version: '3.3.2' - compile group: 'org.apache.spark', name: 'spark-sql_2.13', version: '3.3.2' - compile group: 'org.slf4j', name: 'slf4j-api', version: '1.7.+' - compile group: 'ch.qos.logback', name: 'logback-classic', version: '1.+' - compile group: 'io.picodata', name: 'picodata-jdbc', version: '1.0.0' - - testCompile group: 'org.scalatest', name: 'scalatest_2.13', version: '3.2.3' + implementation group: 'org.apache.spark', name: 'spark-core_2.13', version: '3.3.2' + implementation group: 'org.apache.spark', name: 'spark-hive_2.13', version: '3.3.2' + implementation group: 'org.apache.spark', name: 'spark-sql_2.13', version: '3.3.2' + implementation group: 'org.slf4j', name: 'slf4j-api', version: '2.0.16' + // for Picodata JDBC + implementation group: 'org.slf4j', name: 'jul-to-slf4j', version: '2.0.16' + // for Spark + implementation group: 'org.slf4j', name: 'log4j-over-slf4j', version: '2.0.16' + // SLF4J -> Logback + implementation group: 'ch.qos.logback', name: 'logback-classic', version: '1.+' + implementation group: 'io.picodata', name: 'picodata-jdbc', version: '1.0.0' + + testImplementation group: 'org.scalatest', name: 'scalatest_2.13', version: '3.2.3' } processResources { diff --git a/picodata-jdbc-spark-example/gradlew b/picodata-jdbc-spark-example/gradlew old mode 100644 new mode 100755 diff --git a/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml b/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml index dad0992..4475c98 100644 --- a/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml +++ b/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml @@ -3,7 +3,7 @@ version: '3' services: picodata-1: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-1 hostname: picodata-1 environment: @@ -14,12 +14,13 @@ services: PICODATA_PEER: picodata-1:3301 PICODATA_PG_LISTEN: picodata-1:5432 PICODATA_PG_SSL: "false" + PICODATA_MEMTX_MEMORY: 512000000 ports: - "3301:3301" - "5432:5432" picodata-2: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-2 hostname: picodata-2 depends_on: @@ -30,12 +31,13 @@ services: PICODATA_LISTEN: picodata-2:3302 PICODATA_ADVERTISE: picodata-2:3302 PICODATA_PEER: picodata-1:3301 + PICODATA_MEMTX_MEMORY: 512000000 ports: - "3302:3302" picodata-3: - image: docker-public.binary.picodata.io/picodata:24.4.1 + image: docker-public.binary.picodata.io/picodata:24.6.0 container_name: picodata-3 hostname: picodata-3 depends_on: @@ -46,5 +48,6 @@ services: PICODATA_LISTEN: picodata-3:3303 PICODATA_ADVERTISE: picodata-3:3303 PICODATA_PEER: picodata-1:3301 + PICODATA_MEMTX_MEMORY: 512000000 ports: - "3303:3303" diff --git a/picodata-jdbc-spark-example/log4j2.properties b/picodata-jdbc-spark-example/src/main/resources/log4j2.properties similarity index 100% rename from picodata-jdbc-spark-example/log4j2.properties rename to picodata-jdbc-spark-example/src/main/resources/log4j2.properties diff --git a/picodata-jdbc-spark-example/src/main/resources/logback.xml b/picodata-jdbc-spark-example/src/main/resources/logback.xml index 253f60a..ba8189f 100644 --- a/picodata-jdbc-spark-example/src/main/resources/logback.xml +++ b/picodata-jdbc-spark-example/src/main/resources/logback.xml @@ -7,6 +7,8 @@ </encoder> </appender> + <logger name="io.picodata" level="INFO"/> + <root level="${logLevel:-INFO}"> <appender-ref ref="STDOUT"/> </root> diff --git a/picodata-jdbc-spark-example/src/main/resources/logging.properties b/picodata-jdbc-spark-example/src/main/resources/logging.properties new file mode 100644 index 0000000..40730ac --- /dev/null +++ b/picodata-jdbc-spark-example/src/main/resources/logging.properties @@ -0,0 +1,2 @@ +handlers = org.slf4j.bridge.SLF4JBridgeHandler +org.postgresql.level = FINE diff --git a/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala b/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala index 09e0127..c450585 100644 --- a/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala +++ b/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala @@ -1,6 +1,9 @@ package io.picodata import org.apache.spark.sql.{SaveMode, SparkSession} +import org.apache.spark.sql.jdbc.JdbcDialects +import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions +import org.apache.spark.sql.functions._ import org.slf4j.Logger import org.slf4j.LoggerFactory import scala.reflect.io.Directory @@ -39,23 +42,44 @@ object PicodataJDBCSparkExample extends App { // 2. Load the CSV into a DataFrame var df = spark.read .format("csv") + .option("header", "true") + .option("inferSchema", "true") .load("src/main/resources/onemillion.csv") + .select(col("id"), col("unique_key"), col("book_name"), col("author"), col("year")) logger.info("Loaded 1M rows into memory") - val jdbcUrl = "jdbc:picodata://localhost:5432/" + val jdbcUrl = "jdbc:picodata://localhost:5432/?user=sqluser&password=P@ssw0rd&sslmode=disable" try { + // only needed if the table is not created on Picodata server + // basic JDBC connector does not support primary keys + val options = Map( + ("driver", "io.picodata.jdbc.Driver"), + ("url", jdbcUrl), + ("dbtable", "test") + ) + val jdbcOptions = new JDBCOptions(options) + val connection = JdbcDialects.get(jdbcUrl).createConnectionFactory(jdbcOptions)(-1) + var statement = connection.prepareStatement("DROP TABLE test") + try { + // IF EXISTS will be available in Picodata 24.6.1+ + statement.executeUpdate() + } catch { + case e: Exception => if (!e.getMessage.contains("test not found")) throw e + } + statement = connection.prepareStatement("CREATE TABLE test" + + "(id INTEGER PRIMARY KEY, unique_key VARCHAR(1000), book_name VARCHAR(100), author VARCHAR(100), year INTEGER)") + statement.executeUpdate() + connection.close() + // 3. Write a Dataset to a Picodata table df.write .format("jdbc") .option("driver", "io.picodata.jdbc.Driver") - .mode(SaveMode.Overwrite) + .mode(SaveMode.Append) // Picodata server connection options .option("url", jdbcUrl) - .option("sslmode", "disable") - .option("user", "sqluser") - .option("password", "P@ssw0rd") // this option is important as it optimizes single INSERT statements into multi-value INSERTs .option("reWriteBatchedInserts", "true") // this option value can be tuned according to the number of Spark workers you have -- GitLab