Updated the insterted_hours method

caparker · caparker · commit fb0ee0ccdf02 · 2024-10-24T13:26:07.000-07:00
diff --git a/check.py b/check.py
@@ -82,9 +82,12 @@
 
 from ingest.lcs import (
     load_metadata,
+    load_metadata_batch,
+)
+
+from ingest.lcsV2 import (
     load_measurements,
     load_measurements_batch,
-    load_metadata_batch,
 )
 
 from ingest.fetch import (
diff --git a/ingest/etl_process_measurements.sql b/ingest/etl_process_measurements.sql
@@ -53,7 +53,7 @@ FROM staging_measurements;
 	-- this is a short term fix
 	-- a long term fix would not allow duplicate source_id's
 WITH staged_sensors AS (
-  -- this first part signficantly speeds it up on slow machines
+  -- this first part significantly speeds it up on slow machines
   SELECT DISTINCT ingest_id
   FROM staging_measurements
 ), ranked_sensors AS (
@@ -377,6 +377,28 @@ SET datetime_last = GREATEST(sensors_rollup.datetime_last, EXCLUDED.datetime_las
     SET modified_on = now();
 
 
+
+  WITH inserted_hours AS (
+    -- first we group things, adding an hour to make it time-ending after truncating
+    SELECT datetime + '1h'::interval as datetime
+    , utc_offset(datetime + '1h'::interval, tz.tzid) as tz_offset
+    FROM measurements m
+    JOIN sensors s ON (s.sensors_id = m.sensors_id)
+    JOIN sensor_systems sy ON (s.sensor_systems_id = sy.sensor_systems_id)
+    JOIN sensor_nodes sn ON (sy.sensor_nodes_id = sn.sensor_nodes_id)
+    JOIN timezones tz ON (sn.timezones_id = tz.timezones_id)
+    WHERE m.added_on > now() - '1h'::interval
+    GROUP BY 1, 2
+   )
+    INSERT INTO hourly_data_queue (datetime, tz_offset)
+    SELECT as_utc_hour(datetime, tz_offset), tz_offset
+    FROM inserted_hours
+    GROUP BY 1, 2
+    ON CONFLICT (datetime, tz_offset) DO UPDATE
+    SET modified_on = now();
+
+
+
 --Update the export queue/logs to export these records
 --wrap it in a block just in case the database does not have this module installed
 --we subtract the second because the data is assumed to be time ending
diff --git a/ingest/lcs_meas_ingest.sql b/ingest/lcs_meas_ingest.sql
@@ -47,17 +47,41 @@ INTO __total_measurements
 FROM meas;
 
 
+-- -- 	The ranking is to deal with the current possibility
+-- -- that duplicate sensors with the same ingest/source id are created
+-- 	-- this is a short term fix
+-- 	-- a long term fix would not allow duplicate source_id's
+-- WITH ranked_sensors AS (
+--   SELECT s.sensors_id
+-- 	, s.source_id
+-- 	, RANK() OVER (PARTITION BY s.source_id ORDER BY added_on ASC) as rnk
+-- 	FROM sensors s
+-- 	JOIN meas m ON (s.source_id = m.ingest_id)
+-- 	WHERE s.is_active
+-- ), active_sensors AS (
+-- 	SELECT source_id
+-- 	, sensors_id
+-- 	FROM ranked_sensors
+-- 	WHERE rnk = 1)
+-- 	UPDATE meas
+-- 	SET sensors_id=s.sensors_id
+-- 	FROM active_sensors s
+-- 	WHERE s.source_id=ingest_id;
+
 -- 	The ranking is to deal with the current possibility
 -- that duplicate sensors with the same ingest/source id are created
 	-- this is a short term fix
 	-- a long term fix would not allow duplicate source_id's
-WITH ranked_sensors AS (
+WITH staged_sensors AS (
+  -- this first part signficantly speeds it up on slow machines
+  SELECT DISTINCT ingest_id
+  FROM meas
+), ranked_sensors AS (
   SELECT s.sensors_id
 	, s.source_id
 	, RANK() OVER (PARTITION BY s.source_id ORDER BY added_on ASC) as rnk
 	FROM sensors s
-	JOIN meas m ON (s.source_id = m.ingest_id)
-	WHERE s.is_active
+	JOIN staged_sensors m ON (s.source_id = m.ingest_id)
 ), active_sensors AS (
 	SELECT source_id
 	, sensors_id