kettle - upgrade py2 -> py3

kubernetes · Aug 21, 2019 · 1e066c3 · 1e066c3
1 parent 3d091d0
commit 1e066c3
Show file tree

Hide file tree

Showing 11 changed files with 82 additions and 73 deletions.
diff --git a/kettle/BUILD.bazel b/kettle/BUILD.bazel
@@ -11,12 +11,12 @@ py_test(
     # https://github.com/bazelbuild/bazel/issues/1973
     # https://github.com/bazelbuild/bazel/issues/2056
     local = True,
-    python_version = "PY2",
+    python_version = "PY3",
     deps = [
         requirement("certifi"),
         requirement("chardet"),
         requirement("idna"),
-        requirement("PyYAML"),
+        requirement("ruamel.yaml"),
         requirement("requests"),
         requirement("urllib3"),
     ],
@@ -28,7 +28,7 @@ py_binary(
         "make_db.py",
         "model.py",
     ],
-    python_version = "PY2",
+    python_version = "PY3",
 )
 
 # TODO(rmmh): re-enable when Bazel is fixed.
@@ -51,7 +51,7 @@ py_test(
         "model_test.py",
         ":package-srcs",
     ],
-    python_version = "PY2",
+    python_version = "PY3",
 )
 
 py_test(
@@ -65,12 +65,12 @@ py_test(
         "buckets.yaml",
         "schema.json",
     ],
-    python_version = "PY2",
+    python_version = "PY3",
     deps = [
         requirement("certifi"),
         requirement("chardet"),
         requirement("idna"),
-        requirement("PyYAML"),
+        requirement("ruamel.yaml"),
         requirement("requests"),
         requirement("urllib3"),
     ],
@@ -87,8 +87,8 @@ py_test(
     data = [":buckets.yaml"],
     # idem
     local = True,
-    python_version = "PY2",
-    deps = [requirement("PyYAML")],
+    python_version = "PY3",
+    deps = [requirement("ruamel.yaml")],
 )
 
 filegroup(

diff --git a/kettle/Dockerfile b/kettle/Dockerfile
@@ -12,29 +12,37 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM ubuntu
+FROM ubuntu:18.04
+
+ENV KETTLE_DB=/data/build.db
+ENV TZ=America/Los_Angeles
+
+RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
+    echo $TZ > /etc/timezone
 
 RUN apt-get update && apt-get install -y \
     tzdata \
     curl \
     pv \
     time \
     sqlite3 \
-    python-pip \
+    python \
+    python3 \
+    python3-pip \
     && rm -rf /var/lib/apt/lists/*
 
-RUN curl -fsSL https://bitbucket.org/squeaky/portable-pypy/downloads/pypy-5.8-1-linux_x86_64-portable.tar.bz2 | tar xj -C opt
-RUN ln -s /opt/pypy*/bin/pypy /usr/bin
-
-ADD requirements.txt /kettle/
-RUN pip install -r /kettle/requirements.txt
+RUN pip3 install requests google-cloud-pubsub==0.25.0 google-cloud-bigquery==0.24.0 influxdb ruamel.yaml==0.16
 
-RUN curl -o installer https://sdk.cloud.google.com && bash installer --disable-prompts --install-dir=/ && rm installer && ln -s /google-cloud-sdk/bin/* /bin/
+RUN curl -fsSL https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3.6-7.1.1-beta-linux_x86_64-portable.tar.bz2 | tar xj -C opt  && \
+    ln -s /opt/pypy*/bin/pypy /usr/bin
 
-ENV KETTLE_DB=/data/build.db
-ENV TZ=America/Los_Angeles
+RUN curl -o installer https://sdk.cloud.google.com && \
+    bash installer --disable-prompts --install-dir=/ && \
+    rm installer && \
+    ln -s /google-cloud-sdk/bin/* /bin/
 
 ADD *.py schema.json runner.sh buckets.yaml /kettle/
 
-CMD ["/kettle/runner.sh"]
 VOLUME ["/data"]
+
+CMD ["/kettle/runner.sh"]
diff --git a/kettle/make_db.py b/kettle/make_db.py
@@ -14,7 +14,6 @@
 
 """Generates a SQLite DB containing test data downloaded from GCS."""
 
-from __future__ import print_function
 
 import argparse
 import logging
@@ -24,13 +23,13 @@
 import signal
 import sys
 import time
-import urllib2
+import urllib.parse
 from xml.etree import cElementTree as ET
 
 import multiprocessing
 import multiprocessing.pool
 import requests
-import yaml
+import ruamel.yaml as yaml
 
 import model
 
@@ -41,7 +40,7 @@ def pad_numbers(string):
 
 WORKER_CLIENT = None  # used for multiprocessing
 
-class GCSClient(object):
+class GCSClient:
     def __init__(self, jobs_dir, metadata=None):
         self.jobs_dir = jobs_dir
         self.metadata = metadata or {}
@@ -55,15 +54,15 @@ def _request(self, path, params, as_json=True):
 
         """
         url = 'https://www.googleapis.com/storage/v1/b/%s' % path
-        for retry in xrange(23):
+        for retry in range(23):
             try:
                 resp = self.session.get(url, params=params, stream=False)
                 if 400 <= resp.status_code < 500 and resp.status_code != 429:
                     return None
                 resp.raise_for_status()
                 if as_json:
                     return resp.json()
-                return resp.content
+                return resp.text
             except requests.exceptions.RequestException:
                 logging.exception('request failed %s', url)
             time.sleep(random.random() * min(60, 2 ** retry))
@@ -78,7 +77,7 @@ def _parse_uri(path):
     def get(self, path, as_json=False):
         """Get an object from GCS."""
         bucket, path = self._parse_uri(path)
-        return self._request('%s/o/%s' % (bucket, urllib2.quote(path, '')),
+        return self._request('%s/o/%s' % (bucket, urllib.parse.quote(path, '')),
                              {'alt': 'media'}, as_json=as_json)
 
     def ls(self, path, dirs=True, files=True, delim=True, item_field='name'):
@@ -140,7 +139,7 @@ def _get_builds(self, job):
             except (ValueError, TypeError):
                 pass
             else:
-                return False, (str(n) for n in xrange(latest_build, 0, -1))
+                return False, (str(n) for n in range(latest_build, 0, -1))
         # Invalid latest-build or bucket is using timestamps
         build_paths = self.ls_dirs('%s%s/' % (self.jobs_dir, job))
         return True, sorted(
@@ -193,14 +192,16 @@ def mp_init_worker(jobs_dir, metadata, client_class, use_signal=True):
     global WORKER_CLIENT  # pylint: disable=global-statement
     WORKER_CLIENT = client_class(jobs_dir, metadata)
 
-def get_started_finished((job, build)):
+def get_started_finished(job_info):
+    (job, build) = job_info
     try:
         return WORKER_CLIENT.get_started_finished(job, build)
     except:
         logging.exception('failed to get tests for %s/%s', job, build)
         raise
 
-def get_junits((build_id, gcs_path)):
+def get_junits(build_info):
+    (build_id, gcs_path) = build_info
     try:
         junits = WORKER_CLIENT.get_junits_from_build(gcs_path)
         return build_id, gcs_path, junits
@@ -267,7 +268,7 @@ def remove_system_out(data):
             for parent in root.findall('*//system-out/..'):
                 for child in parent.findall('system-out'):
                     parent.remove(child)
-            return ET.tostring(root)
+            return ET.tostring(root, 'unicode')
         except ET.ParseError:
             pass
     return data
@@ -292,7 +293,7 @@ def download_junit(db, threads, client_class):
     for n, (build_id, build_path, junits) in enumerate(test_iterator, 1):
         print('%d/%d' % (n, len(builds_to_grab)),
               build_path, len(junits), len(''.join(junits.values())))
-        junits = {k: remove_system_out(v) for k, v in junits.iteritems()}
+        junits = {k: remove_system_out(v) for k, v in junits.items()}
 
         db.insert_build_junits(build_id, junits)
         if n % 100 == 0:
@@ -307,7 +308,7 @@ def main(db, jobs_dirs, threads, get_junit, client_class=GCSClient):
     """Collect test info in matching jobs."""
     get_builds(db, 'gs://kubernetes-jenkins/pr-logs', {'pr': True},
                threads, client_class)
-    for bucket, metadata in jobs_dirs.iteritems():
+    for bucket, metadata in jobs_dirs.items():
         if not bucket.endswith('/'):
             bucket += '/'
         get_builds(db, bucket, metadata, threads, client_class)
@@ -340,6 +341,6 @@ def get_options(argv):
 if __name__ == '__main__':
     OPTIONS = get_options(sys.argv[1:])
     main(model.Database(),
-         yaml.load(open(OPTIONS.buckets)),
+         yaml.safe_load(open(OPTIONS.buckets)),
          OPTIONS.threads,
          OPTIONS.junit)
diff --git a/kettle/make_db_test.py b/kettle/make_db_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Copyright 2017 The Kubernetes Authors.
 #

diff --git a/kettle/make_json.py b/kettle/make_json.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 
 # Copyright 2017 The Kubernetes Authors.
 #
@@ -81,15 +81,15 @@ def make_result(name, time, failure_text):
 
 
 def buckets_yaml():
-    import yaml  # does not support pypy
+    import ruamel.yaml as yaml  # does not support pypy
     with open(os.path.dirname(os.path.abspath(__file__))+'/buckets.yaml') as fp:
-        return yaml.load(fp)
+        return yaml.safe_load(fp)
 
 # pypy compatibility hack
-def python_buckets_yaml(python='python2'):
+def python_buckets_yaml(python='python3'):
     return json.loads(subprocess.check_output(
-        [python, '-c', 'import json,yaml; print json.dumps(yaml.load(open("buckets.yaml")))'],
-        cwd=os.path.dirname(os.path.abspath(__file__))))
+        [python, '-c', 'import json, ruamel.yaml as yaml; print(json.dumps(yaml.safe_load(open("buckets.yaml"))))'],
+        cwd=os.path.dirname(os.path.abspath(__file__)), encoding='utf-8'))
 
 for attempt in [python_buckets_yaml, buckets_yaml, lambda: python_buckets_yaml(python='python')]:
     try:
@@ -105,7 +105,7 @@ def python_buckets_yaml(python='python2'):
 
 def path_to_job_and_number(path):
     assert not path.endswith('/')
-    for bucket, meta in BUCKETS.iteritems():
+    for bucket, meta in BUCKETS.items():
         if path.startswith(bucket):
             prefix = meta['prefix']
             break
@@ -171,7 +171,7 @@ def get_metadata():
             if metadata.get('version') == build_version:
                 metadata.pop('version')
             for key, value in metadata.items():
-                if not isinstance(value, basestring):
+                if not isinstance(value, str):
                     # the schema specifies a string value. force it!
                     metadata[key] = json.dumps(value)
         if not metadata:
@@ -247,9 +247,9 @@ def main(db, opts, outfile):
 
     if rows_emitted:
         gen = db.insert_emitted(rows_emitted, incremental_table=incremental_table)
-        print >>sys.stderr, 'incremental progress gen #%d' % gen
+        print('incremental progress gen #%d' % gen, file=sys.stderr)
     else:
-        print >>sys.stderr, 'no rows emitted'
+        print('no rows emitted', file=sys.stderr)
     return 0
 
 

diff --git a/kettle/make_json_test.py b/kettle/make_json_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 
 # Copyright 2017 The Kubernetes Authors.
 #
@@ -14,7 +14,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import cStringIO as StringIO
+import io as StringIO
 import json
 import time
 import unittest
@@ -26,7 +26,7 @@
 class ValidateBuckets(unittest.TestCase):
     def test_buckets(self):
         prefixes = set()
-        for name, options in sorted(make_json.BUCKETS.iteritems()):
+        for name, options in sorted(make_json.BUCKETS.items()):
             if name == 'gs://kubernetes-jenkins/logs/':
                 continue  # only bucket without a prefix
             prefix = options.get('prefix', '')

diff --git a/kettle/model.py b/kettle/model.py
@@ -20,7 +20,7 @@
 import zlib
 
 
-class Database(object):
+class Database:
     """
     Store build and test result information, and support incremental updates to results.
     """
@@ -103,9 +103,9 @@ def insert_build_junits(self, build_id, junits):
         """
         Insert a junit dictionary {gcs_path: contents} for a given build's rowid.
         """
-        for path, data in junits.iteritems():
+        for path, data in junits.items():
             self.db.execute('replace into file values(?,?)',
-                            (path, buffer(zlib.compress(data, 9))))
+                            (path, memoryview(zlib.compress(data.encode('utf-8'), 9))))
         self.db.execute('delete from build_junit_missing where build_id=?', (build_id,))
 
     ### make_json
@@ -156,7 +156,7 @@ def test_results_for_build(self, path):
         for dataz, in self.db.execute(
                 'select data from file where path between ? and ?',
                 (path, path + '\x7F')):
-            data = zlib.decompress(dataz)
+            data = zlib.decompress(dataz).decode('utf-8')
             if data:
                 results.append(data)
         return results

diff --git a/kettle/model_test.py b/kettle/model_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 
 # Copyright 2017 The Kubernetes Authors.
 #