From 8e20ca706a829fdff0ae93fdd03f896e06252c27 Mon Sep 17 00:00:00 2001 From: Alexey Kuzin <akudiyar@gmail.com> Date: Tue, 26 Nov 2024 01:35:52 +0100 Subject: [PATCH] Update JDBC-Spark example with new options for VDBE opcodes and virtual table limits --- picodata-java-example/README.md | 2 +- .../src/main/resources/docker-compose.yml | 6 +-- picodata-jdbc-example/README.md | 2 +- .../src/main/resources/docker-compose.yaml | 6 +-- picodata-jdbc-spark-example/README.md | 2 +- .../src/main/resources/docker-compose.yaml | 6 +-- .../main/scala/PicodataJDBCSparkExample.scala | 44 ++++++++++++++----- 7 files changed, 45 insertions(+), 23 deletions(-) diff --git a/picodata-java-example/README.md b/picodata-java-example/README.md index 63d88bc..29a1cd8 100644 --- a/picodata-java-example/README.md +++ b/picodata-java-example/README.md @@ -26,7 +26,7 @@ docker-compose up -d 3. Set up driver user authorization for Picodata in the container: ```shell -docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nALTER USER \\\"admin\\\" WITH PASSWORD 'P@ssw0rd';\" | picodata admin /home/picouser/picodata-1/admin.sock" +docker-compose exec picodata-1 bash -c "echo -ne \"ALTER USER \\\"admin\\\" WITH PASSWORD 'P@ssw0rd';\" | picodata admin /var/lib/picodata/picodata-1/admin.sock" ``` 4. Return to the initial directory `picodata-java-example` and launch the example application. diff --git a/picodata-java-example/src/main/resources/docker-compose.yml b/picodata-java-example/src/main/resources/docker-compose.yml index b83aef3..40cfd7b 100644 --- a/picodata-java-example/src/main/resources/docker-compose.yml +++ b/picodata-java-example/src/main/resources/docker-compose.yml @@ -3,7 +3,7 @@ version: '3' services: picodata-1: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-1 hostname: picodata-1 environment: @@ -16,7 +16,7 @@ services: - "3301:3301" picodata-2: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-2 hostname: picodata-2 depends_on: @@ -32,7 +32,7 @@ services: picodata-3: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-3 hostname: picodata-3 depends_on: diff --git a/picodata-jdbc-example/README.md b/picodata-jdbc-example/README.md index a6e6715..7989869 100644 --- a/picodata-jdbc-example/README.md +++ b/picodata-jdbc-example/README.md @@ -26,7 +26,7 @@ docker-compose up -d 3. Create new Picodata user for JDBC driver in the container: ```shell -docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nCREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /home/picouser/picodata-1/admin.sock" +docker-compose exec picodata-1 bash -c "echo -ne \"CREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /var/lib/picodata/picodata-1/admin.sock" ``` 4. Return to the initial directory `picodata-jdbc-example` and launch the example application. diff --git a/picodata-jdbc-example/src/main/resources/docker-compose.yaml b/picodata-jdbc-example/src/main/resources/docker-compose.yaml index 367a889..ea27a29 100644 --- a/picodata-jdbc-example/src/main/resources/docker-compose.yaml +++ b/picodata-jdbc-example/src/main/resources/docker-compose.yaml @@ -3,7 +3,7 @@ version: '3' services: picodata-1: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-1 hostname: picodata-1 environment: @@ -19,7 +19,7 @@ services: - "5432:5432" picodata-2: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-2 hostname: picodata-2 depends_on: @@ -35,7 +35,7 @@ services: picodata-3: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-3 hostname: picodata-3 depends_on: diff --git a/picodata-jdbc-spark-example/README.md b/picodata-jdbc-spark-example/README.md index eee7995..10bd774 100644 --- a/picodata-jdbc-spark-example/README.md +++ b/picodata-jdbc-spark-example/README.md @@ -36,7 +36,7 @@ docker-compose up -d 5. Create new Picodata user for JDBC driver in the container: ```shell -docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nCREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /home/picouser/picodata-1/admin.sock" +docker-compose exec picodata-1 bash -c "echo -ne \"CREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /var/lib/picodata/picodata-1/admin.sock" ``` 6. Execute the following command in the repository root directory: diff --git a/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml b/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml index 4475c98..22d8d1a 100644 --- a/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml +++ b/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml @@ -3,7 +3,7 @@ version: '3' services: picodata-1: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-1 hostname: picodata-1 environment: @@ -20,7 +20,7 @@ services: - "5432:5432" picodata-2: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-2 hostname: picodata-2 depends_on: @@ -37,7 +37,7 @@ services: picodata-3: - image: docker-public.binary.picodata.io/picodata:24.6.0 + image: docker-public.binary.picodata.io/picodata:master container_name: picodata-3 hostname: picodata-3 depends_on: diff --git a/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala b/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala index c450585..77789df 100644 --- a/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala +++ b/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala @@ -22,7 +22,7 @@ object PicodataJDBCSparkExample extends App { val spark = use(SparkSession.builder() .appName("Test Spark with picodata-jdbc") - .master("local") + .master("local[*]") // use all available threads .config("spark.ui.enabled", false) .config("spark.sql.warehouse.dir", warehouseLocationPath) .config("hive.metastore.warehouse.dir", warehouseLocationPath) @@ -49,7 +49,7 @@ object PicodataJDBCSparkExample extends App { logger.info("Loaded 1M rows into memory") - val jdbcUrl = "jdbc:picodata://localhost:5432/?user=sqluser&password=P@ssw0rd&sslmode=disable" + val jdbcUrl = "jdbc:picodata://localhost:5432/" try { // only needed if the table is not created on Picodata server @@ -57,19 +57,17 @@ object PicodataJDBCSparkExample extends App { val options = Map( ("driver", "io.picodata.jdbc.Driver"), ("url", jdbcUrl), + ("user", "sqluser"), + ("password", "P@ssw0rd"), + ("sslmode", "disable"), ("dbtable", "test") ) val jdbcOptions = new JDBCOptions(options) val connection = JdbcDialects.get(jdbcUrl).createConnectionFactory(jdbcOptions)(-1) - var statement = connection.prepareStatement("DROP TABLE test") - try { - // IF EXISTS will be available in Picodata 24.6.1+ - statement.executeUpdate() - } catch { - case e: Exception => if (!e.getMessage.contains("test not found")) throw e - } + var statement = connection.prepareStatement("DROP TABLE IF EXISTS test") + statement.executeUpdate() statement = connection.prepareStatement("CREATE TABLE test" + - "(id INTEGER PRIMARY KEY, unique_key VARCHAR(1000), book_name VARCHAR(100), author VARCHAR(100), year INTEGER)") + "(id UNSIGNED PRIMARY KEY, unique_key VARCHAR(1000), book_name VARCHAR(100), author VARCHAR(100), year INTEGER)") statement.executeUpdate() connection.close() @@ -80,6 +78,9 @@ object PicodataJDBCSparkExample extends App { .mode(SaveMode.Append) // Picodata server connection options .option("url", jdbcUrl) + .option("sslmode", "disable") + .option("user", "sqluser") + .option("password", "P@ssw0rd") // this option is important as it optimizes single INSERT statements into multi-value INSERTs .option("reWriteBatchedInserts", "true") // this option value can be tuned according to the number of Spark workers you have @@ -99,7 +100,28 @@ object PicodataJDBCSparkExample extends App { .option("sslmode", "disable") .option("user", "sqluser") .option("password", "P@ssw0rd") - .option("dbtable", "test") + // The next two options are necessary for querying large amounts of data. + // They must be set empirically depending on expected size of the dataset. + // If these values are too small, you'll see errors like + // "Exceeded maximum number of rows (10000) in virtual table: 41530" or + // "Reached a limit on max executed vdbe opcodes. Limit: 1024000" + .option("options", "vtable_max_rows=512000,vdbe_max_steps=10240000") + // Set the number of partitions empirically depending on the + // available amount of CPU and memory resources + .option("numPartitions", "8") + // The following 3 options cannot be used together with ".query()" option. + // + // PartitionColumn must be a numeric, date, or timestamp column + //.option("partitionColumn", "id") + // Set here real first and last index values if you want to process all + // the data in table + //.option("lowerBound", "1") + //.option("upperBound", "1000000") + // Using query option until the bug with integer boundaries is fixed in Picodata. + // This query will not get us accurate results. + .option("query", "SELECT * FROM \"test\" LIMIT 10") + // This option is to be used with "partitionColumn" + //.option("dbtable", "test") .load() df.printSchema() df.limit(3).show() -- GitLab