From e05dfe9d3a88858c2010a421dbc48ddfabbfb638 Mon Sep 17 00:00:00 2001 From: greghutch <4603341+greghutch@users.noreply.github.com> Date: Thu, 30 Jun 2022 16:37:42 -0400 Subject: [PATCH] feat: add some sample queries to fleet-debugger This attemps to get sample bigquery queries into a more easily consumed form than our documentation. For now, the set of queries is sources from my somewhat unstructured explorations. Once we have a published dataset, we can add per-query documentation and reproducible queries for the user. fix: #136 --- queries/README.md | 35 +++++++++++++++++ queries/lmfs/basics/active_tasks.js | 28 +++++++++++++ queries/lmfs/basics/active_vehicles.js | 28 +++++++++++++ queries/lmfs/basics/created_tasks.js | 28 +++++++++++++ queries/lmfs/basics/created_vehicles.js | 28 +++++++++++++ queries/lmfs/basics/task_outcomes.js | 31 +++++++++++++++ .../offroute_fraction_per_vehicle.js | 39 +++++++++++++++++++ .../location_reliability/magic_numbers.js | 34 ++++++++++++++++ .../lmfs/movement/fleet-distance-traveled.js | 35 +++++++++++++++++ .../movement/vehicle-distance-traveled.js | 36 +++++++++++++++++ queries/lmfs/movement/vehicle-speeds.js | 31 +++++++++++++++ queries/not-a-test.sh | 27 +++++++++++++ queries/util/args.js | 10 +++++ queries/util/query.js | 20 ++++++++++ 14 files changed, 410 insertions(+) create mode 100644 queries/README.md create mode 100755 queries/lmfs/basics/active_tasks.js create mode 100755 queries/lmfs/basics/active_vehicles.js create mode 100755 queries/lmfs/basics/created_tasks.js create mode 100755 queries/lmfs/basics/created_vehicles.js create mode 100755 queries/lmfs/basics/task_outcomes.js create mode 100755 queries/lmfs/deviations/offroute_fraction_per_vehicle.js create mode 100755 queries/lmfs/location_reliability/magic_numbers.js create mode 100755 queries/lmfs/movement/fleet-distance-traveled.js create mode 100755 queries/lmfs/movement/vehicle-distance-traveled.js create mode 100755 queries/lmfs/movement/vehicle-speeds.js create mode 100755 queries/not-a-test.sh create mode 100644 queries/util/args.js create mode 100644 queries/util/query.js diff --git a/queries/README.md b/queries/README.md new file mode 100644 index 0000000..245eb24 --- /dev/null +++ b/queries/README.md @@ -0,0 +1,35 @@ +# Sample queries + +This directory includes a number of sample queries encoded in runnable node scripts. All +the sample queries output the query being run so that it can be pasted into bq, the bigquery +console or bigquery geo viz tool. + +The query scripts include a detailed explanation of the query that can been seen +by passing the '--help' option. + +## Dependencies + +Command line queries require the [bq](https://cloud.google.com/bigquery/docs/bq-command-line-tool) bigquery +command line tool to be installed + +The visualization queries are meant to be pasted into the [BigQuery Geo Viz](https://bigquerygeoviz.appspot.com/) tool and +run from there. + +The queries assume that cloud logging has been enabled and a bigquery log sink has +been [configured.](https://cloud.google.com/logging/docs/export/configure_export_v2) + +## Running + +Exmaple Command +``` +node ./lmfs/basics/created_vehicles.js --dataset=. + +``` + +### Custom options + +Some queries may have custom options exposed. Use the '--help' command to see the full set of options. + +``` +node ./lmfs/basics/created_vehicles.js --help +``` diff --git a/queries/lmfs/basics/active_tasks.js b/queries/lmfs/basics/active_tasks.js new file mode 100755 index 0000000..c1df408 --- /dev/null +++ b/queries/lmfs/basics/active_tasks.js @@ -0,0 +1,28 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query prints a daily summary of the number of distinct active tasks +`; +const argv = require("../../util/args.js").processArgs(desc, { + lastNDays: { + describe: "Use this value instead of the default", + default: 30, + }, +}); +const sql = ` +SELECT + * +FROM ( + SELECT + DATE(timestamp) AS date, + COUNT(DISTINCT labels.task_id) AS active_tasks + FROM + \`${argv.dataset}.fleetengine_googleapis_com_update_task\` + WHERE + DATE(timestamp) >= DATE_ADD(CURRENT_DATE(), INTERVAL -${argv.lastNDays} DAY) + GROUP BY + DATE(timestamp)) +ORDER BY + date DESC +`; +query(sql); diff --git a/queries/lmfs/basics/active_vehicles.js b/queries/lmfs/basics/active_vehicles.js new file mode 100755 index 0000000..2ef6ff1 --- /dev/null +++ b/queries/lmfs/basics/active_vehicles.js @@ -0,0 +1,28 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query prints a daily summary of the number of distinct active vehicles +`; +const argv = require("../../util/args.js").processArgs(desc, { + lastNDays: { + describe: "Use this value instead of the default", + default: 30, + }, +}); +const sql = ` +SELECT + * +FROM ( + SELECT + DATE(timestamp) AS date, + COUNT(DISTINCT labels.delivery_vehicle_id) AS active_vehicles + FROM + \`${argv.dataset}.fleetengine_googleapis_com_update_delivery_vehicle\` + WHERE + DATE(timestamp) >= DATE_ADD(CURRENT_DATE(), INTERVAL -${argv.lastNDays} DAY) + GROUP BY + DATE(timestamp)) +ORDER BY + date DESC +`; +query(sql); diff --git a/queries/lmfs/basics/created_tasks.js b/queries/lmfs/basics/created_tasks.js new file mode 100755 index 0000000..24dd1cd --- /dev/null +++ b/queries/lmfs/basics/created_tasks.js @@ -0,0 +1,28 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query prints a daily summary of the number of created tasks +`; +const argv = require("../../util/args.js").processArgs(desc, { + lastNDays: { + describe: "Use this value instead of the default", + default: 30, + }, +}); +const sql = ` +SELECT + * +FROM ( + SELECT + DATE(timestamp) AS date, + COUNT(DISTINCT labels.task_id) AS created_tasks + FROM + \`${argv.dataset}.fleetengine_googleapis_com_create_task\` + WHERE + DATE(timestamp) >= DATE_ADD(CURRENT_DATE(), INTERVAL -${argv.lastNDays} DAY) + GROUP BY + DATE(timestamp)) +ORDER BY + date DESC +`; +query(sql); diff --git a/queries/lmfs/basics/created_vehicles.js b/queries/lmfs/basics/created_vehicles.js new file mode 100755 index 0000000..2949f11 --- /dev/null +++ b/queries/lmfs/basics/created_vehicles.js @@ -0,0 +1,28 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query prints a daily summary of the number of created vehicles +`; +const argv = require("../../util/args.js").processArgs(desc, { + lastNDays: { + describe: "Use this value instead of the default", + default: 30, + }, +}); +const sql = ` +SELECT + * +FROM ( + SELECT + DATE(timestamp) AS date, + COUNT(DISTINCT labels.delivery_vehicle_id) AS created_vehicles + FROM + \`${argv.dataset}.fleetengine_googleapis_com_create_delivery_vehicle\` + WHERE + DATE(timestamp) >= DATE_ADD(CURRENT_DATE(), INTERVAL -${argv.lastNDays} DAY) + GROUP BY + DATE(timestamp)) +ORDER BY + date DESC +`; +query(sql); diff --git a/queries/lmfs/basics/task_outcomes.js b/queries/lmfs/basics/task_outcomes.js new file mode 100755 index 0000000..7fc729c --- /dev/null +++ b/queries/lmfs/basics/task_outcomes.js @@ -0,0 +1,31 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query computes the breakdown of task outcomes specified +in update_task calls over the last 30 days. The query doesn't +attempt to filter out duplicates. +`; +const argv = require("../../util/args.js").processArgs(desc, { + lastNDays: { + describe: "Use this value instead of the default", + default: 30, + }, +}); +const sql = ` +SELECT + * +FROM ( + SELECT + DATE(timestamp) AS date, + COUNTIF(jsonpayload_v1_updatetasklog.response.taskoutcome = "TASK_OUTCOME_LOG_SUCCEEDED") AS success_outcomes, + COUNTIF(jsonpayload_v1_updatetasklog.response.taskoutcome = "TASK_OUTCOME_LOG_FAILED") AS fail_outcomes + FROM + \`${argv.dataset}.fleetengine_googleapis_com_update_task\` + WHERE + DATE(timestamp) >= DATE_ADD(CURRENT_DATE(), INTERVAL -${argv.lastNDays} DAY) + GROUP BY + DATE(timestamp)) +ORDER BY + date DESC +`; +query(sql); diff --git a/queries/lmfs/deviations/offroute_fraction_per_vehicle.js b/queries/lmfs/deviations/offroute_fraction_per_vehicle.js new file mode 100755 index 0000000..7dcff8f --- /dev/null +++ b/queries/lmfs/deviations/offroute_fraction_per_vehicle.js @@ -0,0 +1,39 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query prints out per vehicle the fraction of location updates where the +navigation status was listed as NAVIGATION_STATUS_OFF_ROUTE. Vehicles +with a high fraction of off route updates can indicate a number of problems: + * poor GPS reception (due to bad phone hardware or urban canyons) + * poor route compliance (ie a cyclist given a 4 wheeler route) + * Complicated complex compounds/ parking lots where navigation is + not particularly helpful +`; +const argv = require("../../util/args.js").processArgs(desc, { + date: { + describe: "ISO date string to aggegrate distance traveled. ie 2022-06-03", + required: true, + // Default to today + default: new Date().toISOString().slice(0, 10), + }, +}); +const sql = ` +SELECT + *, + offRouteUpdates/totalNavStatusUpdates AS fractionOffRoute +FROM ( + SELECT + labels.delivery_vehicle_id AS vehicle_id, + COUNT(*) AS totalNavStatusUpdates, + COUNTIF(jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.navigationstatus = "NAVIGATION_STATUS_OFF_ROUTE") AS offRouteUpdates, + FROM + \`${argv.dataset}.fleetengine_googleapis_com_update_delivery_vehicle\` + WHERE + DATE(timestamp) = "${argv.date}" + AND jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.navigationstatus IS NOT NULL + GROUP BY + labels.delivery_vehicle_id ) +ORDER BY + fractionOffRoute DESC +`; +query(sql); diff --git a/queries/lmfs/location_reliability/magic_numbers.js b/queries/lmfs/location_reliability/magic_numbers.js new file mode 100755 index 0000000..e429ae3 --- /dev/null +++ b/queries/lmfs/location_reliability/magic_numbers.js @@ -0,0 +1,34 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query filters and the aggregates all of the location accuracy +measurements coming from the device that are even integers. Given the +math involved to compute an accuracy a result that is exactly an integer +is unlikely. These numbers probably represent hardcoded values coming from the +directly from the GPS chipset. One phone owned by the author appears to have +artifically capped the worst accuracy value it reports as exactly 15 meters (which +would normally be a quite acceptable value). +`; +const argv = require("../../util/args.js").processArgs(desc, {}); +const sql = ` +SELECT + COUNT(*) magicNumberCnt, + locAccuracy, +FROM ( + SELECT + jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.rawlocationaccuracy AS locAccuracy, + labels.delivery_vehicle_id, + FROM + \`${argv.dataset}.fleetengine_googleapis_com_update_delivery_vehicle\` + WHERE + CAST(jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.rawlocationaccuracy AS string) NOT LIKE "%.%" + ORDER BY + timestamp DESC + LIMIT + 100000 ) +GROUP BY + locAccuracy +ORDER BY + magicNumberCnt DESC +`; +query(sql); diff --git a/queries/lmfs/movement/fleet-distance-traveled.js b/queries/lmfs/movement/fleet-distance-traveled.js new file mode 100755 index 0000000..fc4a29c --- /dev/null +++ b/queries/lmfs/movement/fleet-distance-traveled.js @@ -0,0 +1,35 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query returns the number of kilometers traveled by the entire +fleet on the specified day. +`; +const argv = require("../../util/args.js").processArgs(desc, { + date: { + describe: "ISO date string to aggegrate distance traveled. ie 2022-06-03", + required: true, + // Default to today + default: new Date().toISOString().slice(0, 10), + }, +}); +const sql = ` +SELECT + SUM(st_LENGTH(path))/1000 AS kilometers_traveled, + COUNT(vehicle_id) AS num_vehicles +FROM ( + SELECT + labels.delivery_vehicle_id AS vehicle_id, + ST_makeLine(ARRAY_AGG(st_geogpoint(jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.rawlocation.longitude, + jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.rawlocation.latitude) + ORDER BY + timestamp)) AS path, + count (*) AS num_updates + FROM + \`${argv.dataset}.fleetengine_googleapis_com_update_delivery_vehicle\` + WHERE + DATE(timestamp) = "${argv.date}" + AND jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryVehicle.lastLocation.rawLocation.longitude IS NOT NULL + GROUP BY + labels.delivery_vehicle_id) +`; +query(sql); diff --git a/queries/lmfs/movement/vehicle-distance-traveled.js b/queries/lmfs/movement/vehicle-distance-traveled.js new file mode 100755 index 0000000..2f41874 --- /dev/null +++ b/queries/lmfs/movement/vehicle-distance-traveled.js @@ -0,0 +1,36 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query returns the number of kilometers traveled by the specified +vehicle on the specified day. +`; +const argv = require("../../util/args.js").processArgs(desc, { + date: { + describe: "ISO date string to aggegrate distance traveled. ie 2022-06-03", + required: true, + // Default to today + default: new Date().toISOString().slice(0, 10), + }, + vehicle: { + describe: "vehicle to inspect", + required: true, + }, +}); +const sql = ` +SELECT + labels.delivery_vehicle_id AS vehicle_id, + ST_LENGTH(ST_makeLine(ARRAY_AGG(st_geogpoint(jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.rawlocation.longitude, + jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.rawlocation.latitude) + ORDER BY + timestamp)))/1000 AS km_traveled, + count (*) AS num_updates +FROM + \`${argv.dataset}.fleetengine_googleapis_com_update_delivery_vehicle\` +WHERE + DATE(timestamp) = "${argv.date}" + AND jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryVehicle.lastLocation.rawLocation.longitude IS NOT NULL + AND labels.delivery_vehicle_id = "${argv.vehicle}" +GROUP BY + labels.delivery_vehicle_id +`; +query(sql); diff --git a/queries/lmfs/movement/vehicle-speeds.js b/queries/lmfs/movement/vehicle-speeds.js new file mode 100755 index 0000000..a236710 --- /dev/null +++ b/queries/lmfs/movement/vehicle-speeds.js @@ -0,0 +1,31 @@ +#!/usr/bin/env node +const { query } = require("../../util/query.js"); +const desc = ` +This query returns the average and max reported speed (as per device GPS) for all +vehicles on the specified day. +`; +const argv = require("../../util/args.js").processArgs(desc, { + date: { + describe: "ISO date string to aggegrate distance traveled. ie 2022-06-03", + required: true, + // Default to today + default: new Date().toISOString().slice(0, 10), + }, +}); +const sql = ` +SELECT + labels.delivery_vehicle_id AS vehicle_id, + AVG(jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.speed) AS avgSpeed, + MAX(jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.speed) AS maxSpeed, + count (*) AS num_updates +FROM + \`${argv.dataset}.fleetengine_googleapis_com_update_delivery_vehicle\` +WHERE + DATE(timestamp) = "${argv.date}" + AND jsonpayload_v1_updatedeliveryvehiclelog.request.deliveryvehicle.lastlocation.speed IS NOT NULL +GROUP BY + labels.delivery_vehicle_id +ORDER BY + maxSpeed DESC +`; +query(sql); diff --git a/queries/not-a-test.sh b/queries/not-a-test.sh new file mode 100755 index 0000000..2cf7bc3 --- /dev/null +++ b/queries/not-a-test.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# +# Once we have a real, published test this can be made into +# a real test. For now, at least verify all of the commands work. +# +# usage ./not-a-test.sh . + +dataset="$1" +if [ -z "$dataset" ]; then + echo "Must specify dataset as ." + exit 1 +fi + +./lmfs/basics/active_tasks.js --dataset=$dataset || exit 1 +./lmfs/basics/active_vehicles.js --dataset=$dataset || exit 1 +./lmfs/basics/created_tasks.js --dataset=$dataset || exit 1 +./lmfs/basics/task_outcomes.js --dataset=$dataset || exit 1 +./lmfs/basics/created_vehicles.js --dataset=$dataset || exit 1 +./lmfs/movement/fleet-distance-traveled.js --dataset=$dataset --date=2022-06-03|| exit 1 +./lmfs/movement/vehicle-speeds.js --dataset=$dataset || exit 1 +./lmfs/movement/vehicle-distance-traveled.js --dataset=$dataset --date=2022-06-03 --vehicle=protesting_elephant_1654199548000|| exit 1 +./lmfs/deviations/offroute_fraction_per_vehicle.js --dataset=$dataset || exit 1 +./lmfs/location_reliability/magic_numbers.js --dataset=$dataset || exit 1 + +echo "*********************************" +echo "* All not actually tests passed *" +echo "*********************************" diff --git a/queries/util/args.js b/queries/util/args.js new file mode 100644 index 0000000..f90e1a9 --- /dev/null +++ b/queries/util/args.js @@ -0,0 +1,10 @@ +const process = require("process"); +exports.processArgs = function (desc, opts) { + opts.dataset = { + describe: "full . dataset path", + default: process.env["FD_DATASET"], + required: true, + }; + return require("yargs/yargs")(process.argv.slice(2)).usage(desc).options(opts) + .argv; +}; diff --git a/queries/util/query.js b/queries/util/query.js new file mode 100644 index 0000000..31a94e1 --- /dev/null +++ b/queries/util/query.js @@ -0,0 +1,20 @@ +const { exec } = require("child_process"); +exports.query = function query(query) { + const cmd = `bq query --nouse_legacy_sql '${query}'`; + console.log(`Running bq command\n ${cmd}`); + exec(cmd, (error, stdout, stderr) => { + if (error) { + console.log(`error:\n${error.message}`); + console.log(`stderr:\n${stderr}`); + process.exit(1); + return 1; + } + if (stderr) { + console.log(`stderr:\n${stderr}`); + process.exit(1); + return 1; + } + console.log(`Result:\n${stdout}`); + return 0; + }); +};