From 8e20ca706a829fdff0ae93fdd03f896e06252c27 Mon Sep 17 00:00:00 2001
From: Alexey Kuzin <akudiyar@gmail.com>
Date: Tue, 26 Nov 2024 01:35:52 +0100
Subject: [PATCH] Update JDBC-Spark example with new options for VDBE opcodes
 and virtual table limits

---
 picodata-java-example/README.md               |  2 +-
 .../src/main/resources/docker-compose.yml     |  6 +--
 picodata-jdbc-example/README.md               |  2 +-
 .../src/main/resources/docker-compose.yaml    |  6 +--
 picodata-jdbc-spark-example/README.md         |  2 +-
 .../src/main/resources/docker-compose.yaml    |  6 +--
 .../main/scala/PicodataJDBCSparkExample.scala | 44 ++++++++++++++-----
 7 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/picodata-java-example/README.md b/picodata-java-example/README.md
index 63d88bc..29a1cd8 100644
--- a/picodata-java-example/README.md
+++ b/picodata-java-example/README.md
@@ -26,7 +26,7 @@ docker-compose up -d
 3. Set up driver user authorization for Picodata in the container:
 
 ```shell
-docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nALTER USER \\\"admin\\\" WITH PASSWORD 'P@ssw0rd';\" | picodata admin /home/picouser/picodata-1/admin.sock"
+docker-compose exec picodata-1 bash -c "echo -ne \"ALTER USER \\\"admin\\\" WITH PASSWORD 'P@ssw0rd';\" | picodata admin /var/lib/picodata/picodata-1/admin.sock"
 ```
 
 4. Return to the initial directory `picodata-java-example` and launch the example application.
diff --git a/picodata-java-example/src/main/resources/docker-compose.yml b/picodata-java-example/src/main/resources/docker-compose.yml
index b83aef3..40cfd7b 100644
--- a/picodata-java-example/src/main/resources/docker-compose.yml
+++ b/picodata-java-example/src/main/resources/docker-compose.yml
@@ -3,7 +3,7 @@ version: '3'
 
 services:
   picodata-1:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-1
     hostname: picodata-1
     environment:
@@ -16,7 +16,7 @@ services:
       - "3301:3301"
 
   picodata-2:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-2
     hostname: picodata-2
     depends_on:
@@ -32,7 +32,7 @@ services:
 
 
   picodata-3:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-3
     hostname: picodata-3
     depends_on:
diff --git a/picodata-jdbc-example/README.md b/picodata-jdbc-example/README.md
index a6e6715..7989869 100644
--- a/picodata-jdbc-example/README.md
+++ b/picodata-jdbc-example/README.md
@@ -26,7 +26,7 @@ docker-compose up -d
 3. Create new Picodata user for JDBC driver in the container:
 
 ```shell
-docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nCREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /home/picouser/picodata-1/admin.sock"
+docker-compose exec picodata-1 bash -c "echo -ne \"CREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /var/lib/picodata/picodata-1/admin.sock"
 ```
 
 4. Return to the initial directory `picodata-jdbc-example` and launch the example application.
diff --git a/picodata-jdbc-example/src/main/resources/docker-compose.yaml b/picodata-jdbc-example/src/main/resources/docker-compose.yaml
index 367a889..ea27a29 100644
--- a/picodata-jdbc-example/src/main/resources/docker-compose.yaml
+++ b/picodata-jdbc-example/src/main/resources/docker-compose.yaml
@@ -3,7 +3,7 @@ version: '3'
 
 services:
   picodata-1:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-1
     hostname: picodata-1
     environment:
@@ -19,7 +19,7 @@ services:
       - "5432:5432"
 
   picodata-2:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-2
     hostname: picodata-2
     depends_on:
@@ -35,7 +35,7 @@ services:
 
 
   picodata-3:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-3
     hostname: picodata-3
     depends_on:
diff --git a/picodata-jdbc-spark-example/README.md b/picodata-jdbc-spark-example/README.md
index eee7995..10bd774 100644
--- a/picodata-jdbc-spark-example/README.md
+++ b/picodata-jdbc-spark-example/README.md
@@ -36,7 +36,7 @@ docker-compose up -d
 5. Create new Picodata user for JDBC driver in the container:
 
 ```shell
-docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nCREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /home/picouser/picodata-1/admin.sock"
+docker-compose exec picodata-1 bash -c "echo -ne \"CREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /var/lib/picodata/picodata-1/admin.sock"
 ```
 
 6. Execute the following command in the repository root directory:
diff --git a/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml b/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml
index 4475c98..22d8d1a 100644
--- a/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml
+++ b/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml
@@ -3,7 +3,7 @@ version: '3'
 
 services:
   picodata-1:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-1
     hostname: picodata-1
     environment:
@@ -20,7 +20,7 @@ services:
       - "5432:5432"
 
   picodata-2:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-2
     hostname: picodata-2
     depends_on:
@@ -37,7 +37,7 @@ services:
 
 
   picodata-3:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
     container_name: picodata-3
     hostname: picodata-3
     depends_on:
diff --git a/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala b/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala
index c450585..77789df 100644
--- a/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala
+++ b/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala
@@ -22,7 +22,7 @@ object PicodataJDBCSparkExample extends App {
 
     val spark = use(SparkSession.builder()
       .appName("Test Spark with picodata-jdbc")
-      .master("local")
+      .master("local[*]") // use all available threads
       .config("spark.ui.enabled", false)
       .config("spark.sql.warehouse.dir", warehouseLocationPath)
       .config("hive.metastore.warehouse.dir", warehouseLocationPath)
@@ -49,7 +49,7 @@ object PicodataJDBCSparkExample extends App {
 
     logger.info("Loaded 1M rows into memory")
 
-    val jdbcUrl = "jdbc:picodata://localhost:5432/?user=sqluser&password=P@ssw0rd&sslmode=disable"
+    val jdbcUrl = "jdbc:picodata://localhost:5432/"
 
     try {
       // only needed if the table is not created on Picodata server
@@ -57,19 +57,17 @@ object PicodataJDBCSparkExample extends App {
       val options = Map(
         ("driver", "io.picodata.jdbc.Driver"),
         ("url", jdbcUrl),
+        ("user", "sqluser"),
+        ("password", "P@ssw0rd"),
+        ("sslmode", "disable"),
         ("dbtable", "test")
       )
       val jdbcOptions = new JDBCOptions(options)
       val connection = JdbcDialects.get(jdbcUrl).createConnectionFactory(jdbcOptions)(-1)
-      var statement = connection.prepareStatement("DROP TABLE test")
-      try {
-        // IF EXISTS will be available in Picodata 24.6.1+
-        statement.executeUpdate()
-      } catch {
-        case e: Exception => if (!e.getMessage.contains("test not found")) throw e
-      }
+      var statement = connection.prepareStatement("DROP TABLE IF EXISTS test")
+      statement.executeUpdate()
       statement = connection.prepareStatement("CREATE TABLE test" +
-        "(id INTEGER PRIMARY KEY, unique_key VARCHAR(1000), book_name VARCHAR(100), author VARCHAR(100), year INTEGER)")
+        "(id UNSIGNED PRIMARY KEY, unique_key VARCHAR(1000), book_name VARCHAR(100), author VARCHAR(100), year INTEGER)")
       statement.executeUpdate()
       connection.close()
 
@@ -80,6 +78,9 @@ object PicodataJDBCSparkExample extends App {
         .mode(SaveMode.Append)
         // Picodata server connection options
         .option("url", jdbcUrl)
+        .option("sslmode", "disable")
+        .option("user", "sqluser")
+        .option("password", "P@ssw0rd")
         // this option is important as it optimizes single INSERT statements into multi-value INSERTs
         .option("reWriteBatchedInserts", "true")
         // this option value can be tuned according to the number of Spark workers you have
@@ -99,7 +100,28 @@ object PicodataJDBCSparkExample extends App {
         .option("sslmode", "disable")
         .option("user", "sqluser")
         .option("password", "P@ssw0rd")
-        .option("dbtable", "test")
+        // The next two options are necessary for querying large amounts of data.
+        // They must be set empirically depending on expected size of the dataset.
+        // If these values are too small, you'll see errors like
+        // "Exceeded maximum number of rows (10000) in virtual table: 41530" or
+        // "Reached a limit on max executed vdbe opcodes. Limit: 1024000"
+        .option("options", "vtable_max_rows=512000,vdbe_max_steps=10240000")
+        // Set the number of partitions empirically depending on the
+        // available amount of CPU and memory resources
+        .option("numPartitions", "8")
+        // The following 3 options cannot be used together with ".query()" option.
+        //
+        // PartitionColumn must be a numeric, date, or timestamp column
+        //.option("partitionColumn", "id")
+        // Set here real first and last index values if you want to process all
+        // the data in table
+        //.option("lowerBound", "1")
+        //.option("upperBound", "1000000")
+        // Using query option until the bug with integer boundaries is fixed in Picodata.
+        // This query will not get us accurate results.
+        .option("query", "SELECT * FROM \"test\" LIMIT 10")
+        // This option is to be used with "partitionColumn"
+        //.option("dbtable", "test")
         .load()
       df.printSchema()
       df.limit(3).show()
-- 
GitLab