From e8bec6e1c0908db5e8f16503cfa33ebdaa9a1e21 Mon Sep 17 00:00:00 2001
From: "kai [they]" <coilysiren@gmail.com>
Date: Sat, 21 Oct 2023 17:03:07 -0700
Subject: [PATCH] SQL `CREATE TABLE` in python (#89)

---
 .github/workflows/config.yml |  2 +-
 data/sql_input_1.sql         |  6 +++
 data/sql_output_1.json       |  3 ++
 snippets/python/sql_test.py  | 80 +++++++++++++++++++++++++++++++++--
 src/python/sql_test.py       | 82 ++++++++++++++++++++++++++++++++++--
 tasks.py                     | 18 ++++----
 6 files changed, 174 insertions(+), 17 deletions(-)
 create mode 100644 data/sql_input_1.sql
 create mode 100644 data/sql_output_1.json

diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml
index aa10317..887c634 100644
--- a/.github/workflows/config.yml
+++ b/.github/workflows/config.yml
@@ -28,4 +28,4 @@ jobs:
         uses: actions/checkout@v3
 
       - run: pip install invoke pyyaml
-      - run: invoke test ${{ matrix.language }} any any
+      - run: invoke test ${{ matrix.language }} any any --snippets
diff --git a/data/sql_input_1.sql b/data/sql_input_1.sql
new file mode 100644
index 0000000..bb65967
--- /dev/null
+++ b/data/sql_input_1.sql
@@ -0,0 +1,6 @@
+-- https://cratedb.com/docs/sql-99/en/latest/chapters/01.html
+-- https://www.postgresql.org/docs/16/sql-createtable.html
+-- https://www.postgresql.org/docs/16/sql-select.html
+CREATE TABLE city ();
+CREATE TABLE town ();
+SELECT table_name FROM information_schema.tables WHERE table_schema = 'public';
diff --git a/data/sql_output_1.json b/data/sql_output_1.json
new file mode 100644
index 0000000..86970c5
--- /dev/null
+++ b/data/sql_output_1.json
@@ -0,0 +1,3 @@
+{
+  "table_name": ["city", "town"]
+}
diff --git a/snippets/python/sql_test.py b/snippets/python/sql_test.py
index 1a07517..b9d290e 100644
--- a/snippets/python/sql_test.py
+++ b/snippets/python/sql_test.py
@@ -2,7 +2,81 @@
 import json
 
 
-def run_sql(input_sql: list[str]) -> list[str]:
-    output = {"table_name": ["city"]}
-    return [json.dumps(output)]
+class SQL:
+    data: dict = {}
+
+    def __init__(self) -> None:
+        self.data = {}
+
+    def information_schema_tables(self) -> list[dict]:
+        return [data["metadata"] for data in self.data.values()]
+
+    def create_table(self, *args, table_schema="public") -> dict:
+        table_name = args[2]
+        if not self.data.get(table_name):
+            self.data[table_name] = {
+                "metadata": {
+                    "table_name": table_name,
+                    "table_schema": table_schema,
+                },
+            }
+        return {}
+
+    create_table.sql = "CREATE TABLE"
+
+    def select(self, *args) -> dict:
+        output = {}
+
+        from_index = None
+        where_index = None
+        for i, arg in enumerate(args):
+            if arg == "FROM":
+                from_index = i
+            if arg == "WHERE":
+                where_index = i
+
+        # get select keys by getting the slice of args before FROM
+        select_keys = " ".join(args[1:from_index]).split(",")
+
+        # get where keys by getting the slice of args after WHERE
+        from_value = args[from_index + 1]
+
+        # consider "information_schema.tables" a special case until
+        # we figure out why its so different from the others
+        if from_value == "information_schema.tables":
+            target = self.information_schema_tables()
+
+        # fmt: off
+        output = {
+            key: [
+                value for data in target
+                for key, value in data.items()
+                if key in select_keys
+            ]
+            for key in select_keys
+        }
+        # fmt: on
+
+        return output
+
+    select.sql = "SELECT"
+
+    sql_map = {
+        create_table.sql: create_table,
+        select.sql: select,
+    }
+
+    def run(self, input_sql: list[str]) -> list[str]:
+        output = {}
+
+        for line in input_sql:
+            if not line.startswith("--"):
+                words = line.split(" ")
+                for i in reversed(range(len(words))):
+                    key = " ".join(words[:i])
+                    if func := self.sql_map.get(key):
+                        output = func(self, *words)
+                        break
+
+        return [json.dumps(output)]
 
diff --git a/src/python/sql_test.py b/src/python/sql_test.py
index 3ac0af7..7f41e13 100644
--- a/src/python/sql_test.py
+++ b/src/python/sql_test.py
@@ -12,9 +12,83 @@
 import json
 
 
-def run_sql(input_sql: list[str]) -> list[str]:
-    output = {"table_name": ["city"]}
-    return [json.dumps(output)]
+class SQL:
+    data: dict = {}
+
+    def __init__(self) -> None:
+        self.data = {}
+
+    def information_schema_tables(self) -> list[dict]:
+        return [data["metadata"] for data in self.data.values()]
+
+    def create_table(self, *args, table_schema="public") -> dict:
+        table_name = args[2]
+        if not self.data.get(table_name):
+            self.data[table_name] = {
+                "metadata": {
+                    "table_name": table_name,
+                    "table_schema": table_schema,
+                },
+            }
+        return {}
+
+    create_table.sql = "CREATE TABLE"
+
+    def select(self, *args) -> dict:
+        output = {}
+
+        from_index = None
+        where_index = None
+        for i, arg in enumerate(args):
+            if arg == "FROM":
+                from_index = i
+            if arg == "WHERE":
+                where_index = i
+
+        # get select keys by getting the slice of args before FROM
+        select_keys = " ".join(args[1:from_index]).split(",")
+
+        # get where keys by getting the slice of args after WHERE
+        from_value = args[from_index + 1]
+
+        # consider "information_schema.tables" a special case until
+        # we figure out why its so different from the others
+        if from_value == "information_schema.tables":
+            target = self.information_schema_tables()
+
+        # fmt: off
+        output = {
+            key: [
+                value for data in target
+                for key, value in data.items()
+                if key in select_keys
+            ]
+            for key in select_keys
+        }
+        # fmt: on
+
+        return output
+
+    select.sql = "SELECT"
+
+    sql_map = {
+        create_table.sql: create_table,
+        select.sql: select,
+    }
+
+    def run(self, input_sql: list[str]) -> list[str]:
+        output = {}
+
+        for line in input_sql:
+            if not line.startswith("--"):
+                words = line.split(" ")
+                for i in reversed(range(len(words))):
+                    key = " ".join(words[:i])
+                    if func := self.sql_map.get(key):
+                        output = func(self, *words)
+                        break
+
+        return [json.dumps(output)]
 
 
 ######################
@@ -22,4 +96,4 @@ def run_sql(input_sql: list[str]) -> list[str]:
 ######################
 
 if __name__ == "__main__":
-    helpers.run(run_sql)
+    helpers.run(SQL().run)
diff --git a/tasks.py b/tasks.py
index db3215d..f104be7 100644
--- a/tasks.py
+++ b/tasks.py
@@ -1,4 +1,5 @@
 # builtin packages
+import unittest
 import filecmp
 import glob
 import os
@@ -163,6 +164,8 @@ def generate(self, language, config, script_path, input_file_path):
         docker_run_test_list = [
             "docker",
             "run",
+            "--rm",
+            f"--name={language}",
             f"--volume={self.base_directory}:/workdir",
             "-w=/workdir",
         ]
@@ -297,13 +300,9 @@ def run_tests(self, input_script):
                         prepared_file_data = json.load(reader)
                     with open(ctx.script_output_file_path, "r", encoding="utf-8") as reader:
                         script_output_file_data = json.load(reader)
-                    if prepared_file_data == script_output_file_data:
-                        self.set_success_status(True)
-                        print(f"\t🟢 {ctx.script_relative_path} on {ctx.input_file_path} succeeded")
-                    else:
-                        self.set_success_status(False)
-                        print(f"\t🔴 {ctx.script_relative_path} on {ctx.input_file_path} failed, reason:")
-                        print(f"\t\t output file {ctx.script_output_file_name} has does not match the prepared file")
+                    unittest.TestCase().assertDictEqual(prepared_file_data, script_output_file_data)
+                    self.set_success_status(True)
+                    print(f"\t🟢 {ctx.script_relative_path} on {ctx.input_file_path} succeeded")
                     continue
 
                 # check if the output file matches the prepared file
@@ -392,12 +391,13 @@ def show_results(self):
 
 
 @invoke.task
-def test(ctx: invoke.Context, language, input_script, input_data_index):
+def test(ctx: invoke.Context, language, input_script, input_data_index, snippets=False):
     # language is the programming language to run scripts in
     # input_script is the name of a script you want to run
     runner = TestRunner(ctx, language, input_data_index)
     runner.run_tests(input_script)
-    runner.generate_snippets(input_script)
+    if snippets:
+        runner.generate_snippets(input_script)
     runner.show_results()