Skip to content

Commit

Permalink
kettle - upgrade py2 -> py3
Browse files Browse the repository at this point in the history
  • Loading branch information
Travis Clarke committed Aug 21, 2019
1 parent 3d091d0 commit 1e066c3
Show file tree
Hide file tree
Showing 11 changed files with 82 additions and 73 deletions.
16 changes: 8 additions & 8 deletions kettle/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ py_test(
# https://github.com/bazelbuild/bazel/issues/1973
# https://github.com/bazelbuild/bazel/issues/2056
local = True,
python_version = "PY2",
python_version = "PY3",
deps = [
requirement("certifi"),
requirement("chardet"),
requirement("idna"),
requirement("PyYAML"),
requirement("ruamel.yaml"),
requirement("requests"),
requirement("urllib3"),
],
Expand All @@ -28,7 +28,7 @@ py_binary(
"make_db.py",
"model.py",
],
python_version = "PY2",
python_version = "PY3",
)

# TODO(rmmh): re-enable when Bazel is fixed.
Expand All @@ -51,7 +51,7 @@ py_test(
"model_test.py",
":package-srcs",
],
python_version = "PY2",
python_version = "PY3",
)

py_test(
Expand All @@ -65,12 +65,12 @@ py_test(
"buckets.yaml",
"schema.json",
],
python_version = "PY2",
python_version = "PY3",
deps = [
requirement("certifi"),
requirement("chardet"),
requirement("idna"),
requirement("PyYAML"),
requirement("ruamel.yaml"),
requirement("requests"),
requirement("urllib3"),
],
Expand All @@ -87,8 +87,8 @@ py_test(
data = [":buckets.yaml"],
# idem
local = True,
python_version = "PY2",
deps = [requirement("PyYAML")],
python_version = "PY3",
deps = [requirement("ruamel.yaml")],
)

filegroup(
Expand Down
30 changes: 19 additions & 11 deletions kettle/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,29 +12,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.

FROM ubuntu
FROM ubuntu:18.04

ENV KETTLE_DB=/data/build.db
ENV TZ=America/Los_Angeles

RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
echo $TZ > /etc/timezone

RUN apt-get update && apt-get install -y \
tzdata \
curl \
pv \
time \
sqlite3 \
python-pip \
python \
python3 \
python3-pip \
&& rm -rf /var/lib/apt/lists/*

RUN curl -fsSL https://bitbucket.org/squeaky/portable-pypy/downloads/pypy-5.8-1-linux_x86_64-portable.tar.bz2 | tar xj -C opt
RUN ln -s /opt/pypy*/bin/pypy /usr/bin

ADD requirements.txt /kettle/
RUN pip install -r /kettle/requirements.txt
RUN pip3 install requests google-cloud-pubsub==0.25.0 google-cloud-bigquery==0.24.0 influxdb ruamel.yaml==0.16

RUN curl -o installer https://sdk.cloud.google.com && bash installer --disable-prompts --install-dir=/ && rm installer && ln -s /google-cloud-sdk/bin/* /bin/
RUN curl -fsSL https://bitbucket.org/squeaky/portable-pypy/downloads/pypy3.6-7.1.1-beta-linux_x86_64-portable.tar.bz2 | tar xj -C opt && \
ln -s /opt/pypy*/bin/pypy /usr/bin

ENV KETTLE_DB=/data/build.db
ENV TZ=America/Los_Angeles
RUN curl -o installer https://sdk.cloud.google.com && \
bash installer --disable-prompts --install-dir=/ && \
rm installer && \
ln -s /google-cloud-sdk/bin/* /bin/

ADD *.py schema.json runner.sh buckets.yaml /kettle/

CMD ["/kettle/runner.sh"]
VOLUME ["/data"]

CMD ["/kettle/runner.sh"]
29 changes: 15 additions & 14 deletions kettle/make_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

"""Generates a SQLite DB containing test data downloaded from GCS."""

from __future__ import print_function

import argparse
import logging
Expand All @@ -24,13 +23,13 @@
import signal
import sys
import time
import urllib2
import urllib.parse
from xml.etree import cElementTree as ET

import multiprocessing
import multiprocessing.pool
import requests
import yaml
import ruamel.yaml as yaml

import model

Expand All @@ -41,7 +40,7 @@ def pad_numbers(string):

WORKER_CLIENT = None # used for multiprocessing

class GCSClient(object):
class GCSClient:
def __init__(self, jobs_dir, metadata=None):
self.jobs_dir = jobs_dir
self.metadata = metadata or {}
Expand All @@ -55,15 +54,15 @@ def _request(self, path, params, as_json=True):
"""
url = 'https://www.googleapis.com/storage/v1/b/%s' % path
for retry in xrange(23):
for retry in range(23):
try:
resp = self.session.get(url, params=params, stream=False)
if 400 <= resp.status_code < 500 and resp.status_code != 429:
return None
resp.raise_for_status()
if as_json:
return resp.json()
return resp.content
return resp.text
except requests.exceptions.RequestException:
logging.exception('request failed %s', url)
time.sleep(random.random() * min(60, 2 ** retry))
Expand All @@ -78,7 +77,7 @@ def _parse_uri(path):
def get(self, path, as_json=False):
"""Get an object from GCS."""
bucket, path = self._parse_uri(path)
return self._request('%s/o/%s' % (bucket, urllib2.quote(path, '')),
return self._request('%s/o/%s' % (bucket, urllib.parse.quote(path, '')),
{'alt': 'media'}, as_json=as_json)

def ls(self, path, dirs=True, files=True, delim=True, item_field='name'):
Expand Down Expand Up @@ -140,7 +139,7 @@ def _get_builds(self, job):
except (ValueError, TypeError):
pass
else:
return False, (str(n) for n in xrange(latest_build, 0, -1))
return False, (str(n) for n in range(latest_build, 0, -1))
# Invalid latest-build or bucket is using timestamps
build_paths = self.ls_dirs('%s%s/' % (self.jobs_dir, job))
return True, sorted(
Expand Down Expand Up @@ -193,14 +192,16 @@ def mp_init_worker(jobs_dir, metadata, client_class, use_signal=True):
global WORKER_CLIENT # pylint: disable=global-statement
WORKER_CLIENT = client_class(jobs_dir, metadata)

def get_started_finished((job, build)):
def get_started_finished(job_info):
(job, build) = job_info
try:
return WORKER_CLIENT.get_started_finished(job, build)
except:
logging.exception('failed to get tests for %s/%s', job, build)
raise

def get_junits((build_id, gcs_path)):
def get_junits(build_info):
(build_id, gcs_path) = build_info
try:
junits = WORKER_CLIENT.get_junits_from_build(gcs_path)
return build_id, gcs_path, junits
Expand Down Expand Up @@ -267,7 +268,7 @@ def remove_system_out(data):
for parent in root.findall('*//system-out/..'):
for child in parent.findall('system-out'):
parent.remove(child)
return ET.tostring(root)
return ET.tostring(root, 'unicode')
except ET.ParseError:
pass
return data
Expand All @@ -292,7 +293,7 @@ def download_junit(db, threads, client_class):
for n, (build_id, build_path, junits) in enumerate(test_iterator, 1):
print('%d/%d' % (n, len(builds_to_grab)),
build_path, len(junits), len(''.join(junits.values())))
junits = {k: remove_system_out(v) for k, v in junits.iteritems()}
junits = {k: remove_system_out(v) for k, v in junits.items()}

db.insert_build_junits(build_id, junits)
if n % 100 == 0:
Expand All @@ -307,7 +308,7 @@ def main(db, jobs_dirs, threads, get_junit, client_class=GCSClient):
"""Collect test info in matching jobs."""
get_builds(db, 'gs://kubernetes-jenkins/pr-logs', {'pr': True},
threads, client_class)
for bucket, metadata in jobs_dirs.iteritems():
for bucket, metadata in jobs_dirs.items():
if not bucket.endswith('/'):
bucket += '/'
get_builds(db, bucket, metadata, threads, client_class)
Expand Down Expand Up @@ -340,6 +341,6 @@ def get_options(argv):
if __name__ == '__main__':
OPTIONS = get_options(sys.argv[1:])
main(model.Database(),
yaml.load(open(OPTIONS.buckets)),
yaml.safe_load(open(OPTIONS.buckets)),
OPTIONS.threads,
OPTIONS.junit)
2 changes: 1 addition & 1 deletion kettle/make_db_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

# Copyright 2017 The Kubernetes Authors.
#
Expand Down
20 changes: 10 additions & 10 deletions kettle/make_json.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3

# Copyright 2017 The Kubernetes Authors.
#
Expand Down Expand Up @@ -81,15 +81,15 @@ def make_result(name, time, failure_text):


def buckets_yaml():
import yaml # does not support pypy
import ruamel.yaml as yaml # does not support pypy
with open(os.path.dirname(os.path.abspath(__file__))+'/buckets.yaml') as fp:
return yaml.load(fp)
return yaml.safe_load(fp)

# pypy compatibility hack
def python_buckets_yaml(python='python2'):
def python_buckets_yaml(python='python3'):
return json.loads(subprocess.check_output(
[python, '-c', 'import json,yaml; print json.dumps(yaml.load(open("buckets.yaml")))'],
cwd=os.path.dirname(os.path.abspath(__file__))))
[python, '-c', 'import json, ruamel.yaml as yaml; print(json.dumps(yaml.safe_load(open("buckets.yaml"))))'],
cwd=os.path.dirname(os.path.abspath(__file__)), encoding='utf-8'))

for attempt in [python_buckets_yaml, buckets_yaml, lambda: python_buckets_yaml(python='python')]:
try:
Expand All @@ -105,7 +105,7 @@ def python_buckets_yaml(python='python2'):

def path_to_job_and_number(path):
assert not path.endswith('/')
for bucket, meta in BUCKETS.iteritems():
for bucket, meta in BUCKETS.items():
if path.startswith(bucket):
prefix = meta['prefix']
break
Expand Down Expand Up @@ -171,7 +171,7 @@ def get_metadata():
if metadata.get('version') == build_version:
metadata.pop('version')
for key, value in metadata.items():
if not isinstance(value, basestring):
if not isinstance(value, str):
# the schema specifies a string value. force it!
metadata[key] = json.dumps(value)
if not metadata:
Expand Down Expand Up @@ -247,9 +247,9 @@ def main(db, opts, outfile):

if rows_emitted:
gen = db.insert_emitted(rows_emitted, incremental_table=incremental_table)
print >>sys.stderr, 'incremental progress gen #%d' % gen
print('incremental progress gen #%d' % gen, file=sys.stderr)
else:
print >>sys.stderr, 'no rows emitted'
print('no rows emitted', file=sys.stderr)
return 0


Expand Down
6 changes: 3 additions & 3 deletions kettle/make_json_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3

# Copyright 2017 The Kubernetes Authors.
#
Expand All @@ -14,7 +14,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import cStringIO as StringIO
import io as StringIO
import json
import time
import unittest
Expand All @@ -26,7 +26,7 @@
class ValidateBuckets(unittest.TestCase):
def test_buckets(self):
prefixes = set()
for name, options in sorted(make_json.BUCKETS.iteritems()):
for name, options in sorted(make_json.BUCKETS.items()):
if name == 'gs://kubernetes-jenkins/logs/':
continue # only bucket without a prefix
prefix = options.get('prefix', '')
Expand Down
8 changes: 4 additions & 4 deletions kettle/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import zlib


class Database(object):
class Database:
"""
Store build and test result information, and support incremental updates to results.
"""
Expand Down Expand Up @@ -103,9 +103,9 @@ def insert_build_junits(self, build_id, junits):
"""
Insert a junit dictionary {gcs_path: contents} for a given build's rowid.
"""
for path, data in junits.iteritems():
for path, data in junits.items():
self.db.execute('replace into file values(?,?)',
(path, buffer(zlib.compress(data, 9))))
(path, memoryview(zlib.compress(data.encode('utf-8'), 9))))
self.db.execute('delete from build_junit_missing where build_id=?', (build_id,))

### make_json
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_results_for_build(self, path):
for dataz, in self.db.execute(
'select data from file where path between ? and ?',
(path, path + '\x7F')):
data = zlib.decompress(dataz)
data = zlib.decompress(dataz).decode('utf-8')
if data:
results.append(data)
return results
Expand Down
2 changes: 1 addition & 1 deletion kettle/model_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3

# Copyright 2017 The Kubernetes Authors.
#
Expand Down
Loading

0 comments on commit 1e066c3

Please sign in to comment.