drop influxdb python dependency

We haven't used influxdb in a long time, remove the dead code and unused dependency. Some code in experiments/ and kettle/ relied on 'requests' being transitively being pulled in by influxdb, so specify that explicitly.
kubernetes · Oct 31, 2022 · e825809 · e825809
1 parent ba4b706
commit e825809
Show file tree

Hide file tree

Showing 5 changed files with 5 additions and 42 deletions.
diff --git a/images/bigquery/Dockerfile b/images/bigquery/Dockerfile
@@ -24,11 +24,10 @@ RUN apt-get update && apt-get install -y \
     rm -rf /var/lib/apt/lists/*
 
 ARG CLOUD_SDK_VERSION=390.0.0
-ARG INFLUXDB_VERSION=5.2.2
 ARG BIGQUERY_LIBRARY_VERSION=0.26.0
 ARG RUAMEL_VERSION=0.16
 
-RUN pip3 install --no-cache-dir influxdb=={INFLUXDB_VERSION} google-cloud-bigquery==${BIGQUERY_LIBRARY_VERSION} ruamel.yaml==${RUAMEL_VERSION}}
+RUN pip3 install --no-cache-dir google-cloud-bigquery==${BIGQUERY_LIBRARY_VERSION} ruamel.yaml==${RUAMEL_VERSION}}
 
 ENV PATH=/google-cloud-sdk/bin:${PATH} \
     CLOUDSDK_CORE_DISABLE_PROMPTS=1

diff --git a/kettle/Dockerfile b/kettle/Dockerfile
@@ -31,7 +31,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \
     && rm -rf /var/lib/apt/lists/*
 
 RUN pip3 install --no-cache-dir --upgrade pip && \
-    pip3 install --no-cache-dir requests google-cloud-pubsub==2.3.0 google-cloud-bigquery==2.11.0 influxdb ruamel.yaml==0.16
+    pip3 install --no-cache-dir requests google-cloud-pubsub==2.3.0 google-cloud-bigquery==2.11.0 ruamel.yaml==0.16
 
 RUN curl -fsSL https://downloads.python.org/pypy/pypy3.6-v7.3.1-linux64.tar.bz2 | tar xj -C opt  && \
     ln -s /opt/pypy*/bin/pypy3 /usr/bin

diff --git a/kettle/monitor.py b/kettle/monitor.py
@@ -20,12 +20,9 @@
 
 import argparse
 import json
-import os
 import sys
 import time
 
-import influxdb
-
 try:
     from google.cloud import bigquery
     import google.cloud.exceptions
@@ -35,7 +32,7 @@
     traceback.print_exc()
 
 
-def collect(tables, stale_hours, influx_client):
+def collect(tables, stale_hours):
     lines = []
     stale = False
     for table_spec in tables:
@@ -69,47 +66,20 @@ def collect(tables, stale_hours, influx_client):
                 table.table_id, hours_old, stale_hours))
             stale = True
 
-        lines.append(influxdb.line_protocol.make_lines({
-            'tags': {'db': table.table_id},
-            'points': [{'measurement': 'bigquery', 'fields': fields}]
-        }))
-
     print('Collected data:')
     print(''.join(lines))
 
-    if influx_client:
-        influx_client.write_points(lines, time_precision='ms', protocol='line')
-    else:
-        print('Not uploading to influxdb; missing client.')
-
     return int(stale)
 
 
-def make_influx_client():
-    """Make an InfluxDB client from config at path $VELODROME_INFLUXDB_CONFIG"""
-    if 'VELODROME_INFLUXDB_CONFIG' not in os.environ:
-        return None
-
-    with open(os.environ['VELODROME_INFLUXDB_CONFIG']) as config_file:
-        config = json.load(config_file)
-
-    return influxdb.InfluxDBClient(
-        host=config['host'],
-        port=config['port'],
-        username=config['user'],
-        password=config['password'],
-        database='metrics',
-    )
-
-
 def main(args):
     parser = argparse.ArgumentParser()
     parser.add_argument('--table', nargs='+', required=True,
                         help='List of datasets to return information about.')
     parser.add_argument('--stale', type=int,
                         help='Number of hours to consider stale.')
     opts = parser.parse_args(args)
-    return collect(opts.table, opts.stale, make_influx_client())
+    return collect(opts.table, opts.stale)
 
 
 if __name__ == '__main__':

diff --git a/metrics/README.md b/metrics/README.md
@@ -95,12 +95,6 @@ the metric name and persist for a year after their creation. Additionally,
 the latest filtered results for a metric are stored in the root of the
 k8s-metrics bucket and named with the format `METRICNAME-latest.json`.
 
-If a config specifies the optional jq filter used to create influxdb timeseries
-data points, then the job will use the filter to generate timeseries points from
-the raw query results.
-
-At one point, these points were uploaded to a system called velodrome, which had an influxdb instance where they can be used to create graphs and tables, but velodrome is no longer in existence.  This may be revised in the future.
-
 ## Query structure
 
 The `query` is written in `Standard SQL` which is really [BigQuery Standard SQL](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax) that allows for working with arrays/repeated fields. Each sub-query, from the most indented out, will build a subtable that the outer query runs against. Any one of the sub query blocks can be run independently from the BigQuery console or optionally added to a test query config and run via the same `bigquery.py` line above.

diff --git a/requirements3.txt b/requirements3.txt
@@ -2,11 +2,11 @@ astroid==2.3.3
 backports.functools_lru_cache==1.6.1
 configparser==4.0.2
 chardet==4.0.0
-influxdb==5.2.3
 isort==4.3.21
 pylint==2.4.4
 parameterized==0.7.4
 PyYAML==5.3
+requests==2.28.1
 ruamel.yaml==0.16.5
 setuptools==44.0.0
 sh==1.12.14