From e9dd47007809143426eafe4b546dfc4dcf7ec78f Mon Sep 17 00:00:00 2001
From: Denis Smirnov <sd@picodata.io>
Date: Tue, 16 Aug 2022 12:52:25 +0700
Subject: [PATCH] fix: improve "not in" motion policies

Previously we used a pessimistic approach with a full motion for
"not in" operator. But after some research it seems that we can use
the same logic as we do for equality and "in" operator, i.e. choose
either local, either segment or either full motion policies.
---
 src/ir/transformation/redistribution.rs       |  11 +-
 src/ir/transformation/redistribution/tests.rs |   3 +
 .../redistribution/tests/not_in.rs            | 104 ++++++++++++++++++
 3 files changed, 110 insertions(+), 8 deletions(-)
 create mode 100644 src/ir/transformation/redistribution/tests/not_in.rs

diff --git a/src/ir/transformation/redistribution.rs b/src/ir/transformation/redistribution.rs
index 9c113e4f9f..2bfd9bbb9b 100644
--- a/src/ir/transformation/redistribution.rs
+++ b/src/ir/transformation/redistribution.rs
@@ -280,7 +280,7 @@ impl Plan {
     ) -> Result<MotionPolicy, QueryPlannerError> {
         let outer_dist = self.get_distribution(outer_id)?;
         let inner_dist = self.get_distribution(inner_id)?;
-        if Bool::Eq == *op || Bool::In == *op {
+        if Bool::Eq == *op || Bool::In == *op || Bool::NotEq == *op || Bool::NotIn == *op {
             if let Distribution::Segment {
                 keys: ref keys_outer,
             } = outer_dist
@@ -716,15 +716,10 @@ impl Plan {
                 }
                 (Expression::Row { .. }, Expression::Row { .. }) => {
                     match bool_op.op {
-                        Bool::Eq | Bool::In => {
+                        Bool::Eq | Bool::In | Bool::NotEq | Bool::NotIn => {
                             self.join_policy_for_eq(rel_id, bool_op.left, bool_op.right)?
                         }
-                        Bool::NotEq
-                        | Bool::NotIn
-                        | Bool::Gt
-                        | Bool::GtEq
-                        | Bool::Lt
-                        | Bool::LtEq => MotionPolicy::Full,
+                        Bool::Gt | Bool::GtEq | Bool::Lt | Bool::LtEq => MotionPolicy::Full,
                         Bool::And | Bool::Or => {
                             // "a and 1" or "a or 1" expressions make no sense.
                             return Err(QueryPlannerError::CustomError(
diff --git a/src/ir/transformation/redistribution/tests.rs b/src/ir/transformation/redistribution/tests.rs
index 827c1ede6f..75602f1c7b 100644
--- a/src/ir/transformation/redistribution/tests.rs
+++ b/src/ir/transformation/redistribution/tests.rs
@@ -884,3 +884,6 @@ mod between;
 
 #[cfg(test)]
 mod except;
+
+#[cfg(test)]
+mod not_in;
diff --git a/src/ir/transformation/redistribution/tests/not_in.rs b/src/ir/transformation/redistribution/tests/not_in.rs
new file mode 100644
index 0000000000..3809f20964
--- /dev/null
+++ b/src/ir/transformation/redistribution/tests/not_in.rs
@@ -0,0 +1,104 @@
+use crate::ir::operator::Relational;
+use crate::ir::transformation::helpers::sql_to_ir;
+use crate::ir::transformation::redistribution::tests::get_motion_id;
+use crate::ir::transformation::redistribution::*;
+use pretty_assertions::assert_eq;
+
+#[test]
+fn not_in1() {
+    let query = r#"SELECT 1 FROM "hash_testing" AS "t" WHERE "product_code" NOT IN (
+        SELECT "product_code" FROM "hash_testing_hist")"#;
+
+    let mut plan = sql_to_ir(query, &[]);
+    plan.add_motions().unwrap();
+    let motion_id = *get_motion_id(&plan, 0, 0).unwrap();
+    let motion = plan.get_relation_node(motion_id).unwrap();
+    if let Relational::Motion { policy, .. } = motion {
+        assert_eq!(*policy, MotionPolicy::Full);
+    } else {
+        panic!("Expected a motion node");
+    }
+}
+
+#[test]
+fn not_in2() {
+    let query = r#"SELECT 1 FROM "hash_testing" AS "t" WHERE ("identification_number", "product_code") NOT IN (
+        SELECT "identification_number", "product_code" FROM "hash_testing_hist" AS "t")"#;
+
+    let mut plan = sql_to_ir(query, &[]);
+    plan.add_motions().unwrap();
+    let expected: Option<Vec<Vec<usize>>> = None;
+    assert_eq!(expected, plan.slices);
+}
+
+#[test]
+fn not_in3() {
+    let query = r#"SELECT 1 FROM "hash_testing" AS "t" WHERE ("identification_number", "product_code") NOT IN (
+        SELECT "product_code", 42 FROM "hash_testing_hist")"#;
+
+    let mut plan = sql_to_ir(query, &[]);
+    plan.add_motions().unwrap();
+    let motion_id = *get_motion_id(&plan, 0, 0).unwrap();
+    let motion = plan.get_relation_node(motion_id).unwrap();
+    if let Relational::Motion { policy, .. } = motion {
+        assert_eq!(
+            *policy,
+            MotionPolicy::Segment(
+                (Key {
+                    positions: vec![0, 1]
+                })
+                .into()
+            )
+        );
+    } else {
+        panic!("Expected a motion node");
+    }
+}
+
+#[test]
+fn not_in4() {
+    let query = r#"SELECT 1 FROM "hash_testing" AS "t" WHERE ("product_code", "identification_number") NOT IN (
+        SELECT "product_code", 42 FROM "hash_testing_hist")"#;
+
+    let mut plan = sql_to_ir(query, &[]);
+    plan.add_motions().unwrap();
+    let motion_id = *get_motion_id(&plan, 0, 0).unwrap();
+    let motion = plan.get_relation_node(motion_id).unwrap();
+    if let Relational::Motion { policy, .. } = motion {
+        assert_eq!(
+            *policy,
+            MotionPolicy::Segment(
+                (Key {
+                    positions: vec![1, 0]
+                })
+                .into()
+            )
+        );
+    } else {
+        panic!("Expected a motion node");
+    }
+}
+
+#[test]
+fn not_in5() {
+    let query = r#"SELECT 1 FROM "hash_testing" AS "t" WHERE ("identification_number", "product_code") NOT IN (
+        SELECT 42, 666 FROM "hash_testing_hist" AS "t")"#;
+
+    let mut plan = sql_to_ir(query, &[]);
+    plan.add_motions().unwrap();
+    let motion_id = *get_motion_id(&plan, 0, 0).unwrap();
+    let motion = plan.get_relation_node(motion_id).unwrap();
+    if let Relational::Motion { policy, .. } = motion {
+        assert_eq!(
+            *policy,
+            MotionPolicy::Segment(
+                (Key {
+                    positions: vec![0, 1]
+                })
+                .into()
+            )
+        );
+    } else {
+        panic!("Expected a motion node");
+    }
+}
-- 
GitLab