Update JDBC-Spark example with new options for VDBE opcodes and virtual table limits

8e20ca70 · Alexey Kuzin · fb1f5390 · 8e20ca70 · 8e20ca70 · 8e20ca70
Commit 8e20ca70 authored 3 months ago by Alexey Kuzin
--- a/picodata-java-example/README.md
+++ b/picodata-java-example/README.md
@@ -26,7 +26,7 @@ docker-compose up -d
 3. Set up driver user authorization for Picodata in the container:

 ```shell
-docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nALTER USER \\\"admin\\\" WITH PASSWORD 'P@ssw0rd';\" | picodata admin /home/picouser/picodata-1/admin.sock"
+docker-compose exec picodata-1 bash -c "echo -ne \"ALTER USER \\\"admin\\\" WITH PASSWORD 'P@ssw0rd';\" | picodata admin /var/lib/picodata/picodata-1/admin.sock"
 ```

 4. Return to the initial directory `picodata-java-example` and launch the example application.

--- a/picodata-java-example/src/main/resources/docker-compose.yml
+++ b/picodata-java-example/src/main/resources/docker-compose.yml
@@ -3,7 +3,7 @@ version: '3'

 services:
  picodata-1:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-1
    hostname: picodata-1
    environment:
@@ -16,7 +16,7 @@ services:
      - "3301:3301"

  picodata-2:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-2
    hostname: picodata-2
    depends_on:
@@ -32,7 +32,7 @@ services:


  picodata-3:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-3
    hostname: picodata-3
    depends_on:

--- a/picodata-jdbc-example/README.md
+++ b/picodata-jdbc-example/README.md
@@ -26,7 +26,7 @@ docker-compose up -d
 3. Create new Picodata user for JDBC driver in the container:

 ```shell
-docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nCREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /home/picouser/picodata-1/admin.sock"
+docker-compose exec picodata-1 bash -c "echo -ne \"CREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /var/lib/picodata/picodata-1/admin.sock"
 ```

 4. Return to the initial directory `picodata-jdbc-example` and launch the example application.

--- a/picodata-jdbc-example/src/main/resources/docker-compose.yaml
+++ b/picodata-jdbc-example/src/main/resources/docker-compose.yaml
@@ -3,7 +3,7 @@ version: '3'

 services:
  picodata-1:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-1
    hostname: picodata-1
    environment:
@@ -19,7 +19,7 @@ services:
      - "5432:5432"

  picodata-2:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-2
    hostname: picodata-2
    depends_on:
@@ -35,7 +35,7 @@ services:


  picodata-3:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-3
    hostname: picodata-3
    depends_on:

--- a/picodata-jdbc-spark-example/README.md
+++ b/picodata-jdbc-spark-example/README.md
@@ -36,7 +36,7 @@ docker-compose up -d
 5. Create new Picodata user for JDBC driver in the container:

 ```shell
-docker-compose exec picodata-1 bash -c "echo -ne \"\\set language sql\nCREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /home/picouser/picodata-1/admin.sock"
+docker-compose exec picodata-1 bash -c "echo -ne \"CREATE USER \\\"sqluser\\\" WITH PASSWORD 'P@ssw0rd' USING md5;\nGRANT CREATE TABLE TO \\\"sqluser\\\";\" | picodata admin /var/lib/picodata/picodata-1/admin.sock"
 ```

 6. Execute the following command in the repository root directory:

--- a/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml
+++ b/picodata-jdbc-spark-example/src/main/resources/docker-compose.yaml
@@ -3,7 +3,7 @@ version: '3'

 services:
  picodata-1:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-1
    hostname: picodata-1
    environment:
@@ -20,7 +20,7 @@ services:
      - "5432:5432"

  picodata-2:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-2
    hostname: picodata-2
    depends_on:
@@ -37,7 +37,7 @@ services:


  picodata-3:
-    image: docker-public.binary.picodata.io/picodata:24.6.0
+    image: docker-public.binary.picodata.io/picodata:master
    container_name: picodata-3
    hostname: picodata-3
    depends_on:

--- a/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala
+++ b/picodata-jdbc-spark-example/src/main/scala/PicodataJDBCSparkExample.scala
@@ -22,7 +22,7 @@ object PicodataJDBCSparkExample extends App {

    val spark = use(SparkSession.builder()
      .appName("Test Spark with picodata-jdbc")
-      .master("local")
+      .master("local[*]") // use all available threads
      .config("spark.ui.enabled", false)
      .config("spark.sql.warehouse.dir", warehouseLocationPath)
      .config("hive.metastore.warehouse.dir", warehouseLocationPath)
@@ -49,7 +49,7 @@ object PicodataJDBCSparkExample extends App {

    logger.info("Loaded 1M rows into memory")

-    val jdbcUrl = "jdbc:picodata://localhost:5432/?user=sqluser&password=P@ssw0rd&sslmode=disable"
+    val jdbcUrl = "jdbc:picodata://localhost:5432/"

    try {
      // only needed if the table is not created on Picodata server
@@ -57,19 +57,17 @@ object PicodataJDBCSparkExample extends App {
      val options = Map(
        ("driver", "io.picodata.jdbc.Driver"),
        ("url", jdbcUrl),
+        ("user", "sqluser"),
+        ("password", "P@ssw0rd"),
+        ("sslmode", "disable"),
        ("dbtable", "test")
      )
      val jdbcOptions = new JDBCOptions(options)
      val connection = JdbcDialects.get(jdbcUrl).createConnectionFactory(jdbcOptions)(-1)
-      var statement = connection.prepareStatement("DROP TABLE test")
-      try {
-        // IF EXISTS will be available in Picodata 24.6.1+
-        statement.executeUpdate()
-      } catch {
-        case e: Exception => if (!e.getMessage.contains("test not found")) throw e
-      }
+      var statement = connection.prepareStatement("DROP TABLE IF EXISTS test")
+      statement.executeUpdate()
      statement = connection.prepareStatement("CREATE TABLE test" +
-        "(id INTEGER PRIMARY KEY, unique_key VARCHAR(1000), book_name VARCHAR(100), author VARCHAR(100), year INTEGER)")
+        "(id UNSIGNED PRIMARY KEY, unique_key VARCHAR(1000), book_name VARCHAR(100), author VARCHAR(100), year INTEGER)")
      statement.executeUpdate()
      connection.close()

@@ -80,6 +78,9 @@ object PicodataJDBCSparkExample extends App {
        .mode(SaveMode.Append)
        // Picodata server connection options
        .option("url", jdbcUrl)
+        .option("sslmode", "disable")
+        .option("user", "sqluser")
+        .option("password", "P@ssw0rd")
        // this option is important as it optimizes single INSERT statements into multi-value INSERTs
        .option("reWriteBatchedInserts", "true")
        // this option value can be tuned according to the number of Spark workers you have
@@ -99,7 +100,28 @@ object PicodataJDBCSparkExample extends App {
        .option("sslmode", "disable")
        .option("user", "sqluser")
        .option("password", "P@ssw0rd")
-        .option("dbtable", "test")
+        // The next two options are necessary for querying large amounts of data.
+        // They must be set empirically depending on expected size of the dataset.
+        // If these values are too small, you'll see errors like
+        // "Exceeded maximum number of rows (10000) in virtual table: 41530" or
+        // "Reached a limit on max executed vdbe opcodes. Limit: 1024000"
+        .option("options", "vtable_max_rows=512000,vdbe_max_steps=10240000")
+        // Set the number of partitions empirically depending on the
+        // available amount of CPU and memory resources
+        .option("numPartitions", "8")
+        // The following 3 options cannot be used together with ".query()" option.
+        //
+        // PartitionColumn must be a numeric, date, or timestamp column
+        //.option("partitionColumn", "id")
+        // Set here real first and last index values if you want to process all
+        // the data in table
+        //.option("lowerBound", "1")
+        //.option("upperBound", "1000000")
+        // Using query option until the bug with integer boundaries is fixed in Picodata.
+        // This query will not get us accurate results.
+        .option("query", "SELECT * FROM \"test\" LIMIT 10")
+        // This option is to be used with "partitionColumn"
+        //.option("dbtable", "test")
        .load()
      df.printSchema()
      df.limit(3).show()