diff --git a/.gitignore b/.gitignore index c89e3a128..03624c6b2 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,6 @@ apps/jupyter/bind_dir/redirect.html .venv lib/remote-processors/remote_processors/*pb2* poetry.toml +apps/integration/runs/ +notebooks/data/ +test-output.log diff --git a/apps/integration/integration/automation/integrate b/apps/integration/integration/automation/integrate new file mode 100755 index 000000000..6b96d2aec --- /dev/null +++ b/apps/integration/integration/automation/integrate @@ -0,0 +1,269 @@ +#!/bin/bash + +archname() { + local unamearchname="$1" + local arch="amd64" + [[ "$unamearchname" = "arm64" || "$unamearchname" = "aarch64" ]] && arch="arm64" + echo "${arch}" +} + +error() { + echo "ERROR: $@" >&2 +} + +die() { + error "$@" + exit 1 +} + +NOW="$(date +"%Y-%m-%d_%H_%M_%S")" +ARCH="$(archname "$(uname -m)")" + +RUNDIR="apps/integration/runs/${NOW}" +GIT_LOGFILE="${RUNDIR}/git.log" +DOCKER_LOGFILE="${RUNDIR}/docker.log" +POETRY_LOGFILE="${RUNDIR}/poetry.log" +PYTEST_LOGFILE="${RUNDIR}/pytest.log" +QUERY_LOGFILE="${RUNDIR}/test_queries.log" + +# Parse args +SKIP_BUILD=0 +SKIP_TESTS=0 +SKIP_PUSH=0 +DO_CLEAN=0 +TAG="integration_tests" +declare SSH_TARGET +while [[ $# -gt 0 ]]; do + case "$1" in + --help|-h) + echo "Utility script for building containers, running integration tests, and pushing images" + echo "Make sure to run this from the sycamore root directory." + echo "-------------------------------------------------------" + echo "Arguments:" + echo " --help Display this message" + echo " --build Build images" + echo " --tests Run integration tests" + echo " --push Push images" + echo " --clean Remove logs from previous runs before doing anything." + echo " --tag [TAG] When building, running, and/or pushing, use this docker tag." + echo " Default is 'integration_tests'" + echo " --ssh [TARGET] When building and running tests, also build and run on this host." + echo " Useful for multi-arch builds and tests, e.g. --ssh my-arm-box" + exit 0 + ;; + --build) + SKIP_BUILD=1 + echo "Will build images" + shift + ;; + --tests) + SKIP_TESTS=1 + echo "Will run integration tests" + shift + ;; + --push) + SKIP_PUSH=1 + echo "Will push images" + shift + ;; + --tag) + [[ -z $2 ]] && die "A tag must be specified when using the --tag arg; e.g. --tag my-tag" + [[ $2 =~ [a-z]* ]] || die "Detected tag was $2. Tags should begin with lowercase letters" + TAG="$2" + echo "Using tag ${TAG}" + shift + shift + ;; + --clean) + DO_CLEAN=0 + echo "Will clean ${RUNDIR} before running anything" + shift + ;; + --ssh) + [[ -z $2 ]] && die "A configured ssh target must be specified when using the --ssh arg; e.g. --ssh my-host" + [[ $2 =~ [a-z]* ]] || die "Detected ssh target was $2. ssh tartgets should begin with lowercase letters" + SSH_TARGET="$2" + SSH_BUILDX_PORT=18460 # Selected by googling 5d10 and rerolling until small enough + echo "Using ssh target ${SSH_TARGET}" + shift + shift + ;; + esac +done + +main() { + [[ -d ".git" ]] || die "Please run this script from sycamore root!" + [[ $DO_CLEAN ]] && rm -rf "${RUNDIR}" + if [[ -n $SSH_TARGET && $SKIP_BUILD -ne 0 ]]; then + create-dual-builder + trap cleanup-dual-builder 0 1 2 3 6 + fi + mkdir -p "${RUNDIR}" + echo "Building/testing tag ${TAG}" >&2 + echo "Get the newest git commits" >&2 + if checkout_main_if_new; then + echo "Changes detected. Running Tests" >&2 + poetry install --no-root > "${POETRY_LOGFILE}" 2>&1 \ + && { [[ $SKIP_BUILD -eq 0 ]] || build_images > "${DOCKER_LOGFILE}" 2>&1; } \ + && { [[ $SKIP_TESTS -eq 0 ]] || runtests > "${PYTEST_LOGFILE}" 2>&1; } \ + && touch "${RUNDIR}/passed" + [[ $SKIP_PUSH -eq 0 ]] || push_images >> "${DOCKER_LOGFILE}" 2>&1 + handle_outputs + else + echo "No changes detected. Skipping integration tests" >&2 + fi +} + + +checkout_main_if_new() { + old_sha="$(git rev-parse HEAD)" + git fetch origin main > "${GIT_LOGFILE}" 2>&1 + new_sha="$(git rev-parse FETCH_HEAD)" + if [[ "${old_sha}" != "${new_sha}" ]]; then + [[ -z $(git status --porcelain) ]] || die "Working tree not clean" + git pull --rebase origin main >> "${GIT_LOGFILE}" 2>&1 + echo "==================" >> "${GIT_LOGFILE}" + echo "Using git rev ${new_sha}" >> "${GIT_LOGFILE}" + return 0 + else + return 1 + fi +} + +build_images() { + echo "Building all images" >&2 + docker-build-hub apps/crawler/crawler/http/Dockerfile \ + && docker-build-hub apps/crawler/crawler/s3/Dockerfile \ + && docker-build-hub apps/importer/Dockerfile.buildx \ + && docker-build-hub apps/opensearch/Dockerfile \ + && docker-build-hub apps/jupyter/Dockerfile.buildx --build-arg=TAG="${TAG}" \ + && docker-build-hub apps/demo-ui/Dockerfile.buildx \ + && docker-build-hub apps/remote-processor-service/Dockerfile.buildx \ + && return 0 + return 1 +} + +handle_outputs() { + echo "Handling test outputs" >&2 + [[ -f test-output.log ]] && mv test-output.log "${QUERY_LOGFILE}" + [[ -f "${RUNDIR}/passed" ]] || touch "${RUNDIR}/failed" + aws s3 cp --recursive "${RUNDIR}/" "s3://sycamore-ci/${NOW}/${ARCH}" +} + +push_images() { + echo "Pushing tested images to dockerhub" >&2 + docker-push-hub apps/crawler/crawler/http/Dockerfile \ + && docker-push-hub apps/crawler/crawler/s3/Dockerfile \ + && docker-push-hub apps/importer/Dockerfile.buildx \ + && docker-push-hub apps/opensearch/Dockerfile \ + && docker-push-hub apps/jupyter/Dockerfile.buildx \ + && docker-push-hub apps/demo-ui/Dockerfile.buildx \ + && docker-push-hub apps/remote-processor-service/Dockerfile.buildx \ + && return 0 + return 1 +} + +runtests() { + if [[ -n $SSH_TARGET ]]; then + ssh "${SSH_TARGET}" "cd sycamore && ./apps/integration/integration/automation/integrate --test --clean --tag ${TAG}" & + fi + docker volume rm sycamore_crawl_data sycamore_jupyter_data sycamore_opensearch_data + docker network prune -f + docker compose up reset + poetry run pytest apps/integration/ -p integration.conftest --noconftest --docker-tag "${TAG}" + # this is a complicated command, so: + # -p integration.conftest - load conftest with plugins, to capture the custom command line arg (--docker-tag) + # --noconftest - don't load conftest at pytest runtime; it's already loaded + # --docker-tag - specify tag of containers to test + return $? +} + +docker-build-hub() { + local docker_file="$1" + [[ -n "${docker_file}" ]] || { error "missing ${docker_file}"; return 1;} + local repo_name="$(_docker-repo-name "${docker_file}")" + [[ -n "${repo_name}" ]] || { error "empty repo name"; return 1;} + shift + + echo + echo "Building in sycamore and pushing to docker hub with repo name '${repo_name}'" + docker buildx build "$(_docker-build-args)" -t "${repo_name}:${TAG}" -f "${docker_file}" \ + --cache-to type=registry,ref="${repo_name}:build-cache",mode=max \ + --cache-from type=registry,ref="${repo_name}:build-cache" \ + --platform="$(_docker-platforms)" "$@" --push . \ + || { error "buildx failed" && return 1;} + echo "Successfully built using docker file $docker_file" +} + +docker-push-hub() { + local docker_file="$1" + [[ -n "${docker_file}" ]] || { error "missing ${docker_file}"; return 1;} + local repo_name="$(_docker-repo-name "${docker_file}")" + [[ -n "${repo_name}" ]] || { error "empty repo name"; return 1;} + + echo + echo "Pushing image to docker hub for repo '${repo_name}" + docker push "${repo_name}:${TAG}" || { error "docker push failed"; return 1;} + echo "Successfully pushed image previously built from dockerfile ${docker_file}" +} + +_docker-repo-name() { + local docker_file="$1" + echo "Finding repo name in: ${docker_file}" >&2 + local repo_name="$(grep '^# Repo name: ' "${docker_file}" | awk '{print $4}')" + [[ "${repo_name}" = *private* ]] && die "Private repo ${repo_name} disallowed" + if (( $(wc -w <<< ${repo_name}) != 1 )); then + echo "Unable to find repo name in ${docker_file}" 1>&2 + exit 1 + fi + echo "${repo_name}" +} + +_docker-build-args() { + local branch="$(git branch --show-current)" + local rev="$(git rev-parse --short HEAD)" + local date="$(git show -s --format=%ci HEAD | sed -e 's/ /_/g')" + local diff=unknown + if [[ -z $(git status --porcelain) ]]; then + diff=clean + else + diff="pending_changes_$(git diff HEAD | shasum | awk '{print $1}')" + fi + echo "--build-arg=GIT_BRANCH=${branch} --build-arg=GIT_COMMIT=${rev}--${date} --build-arg=GIT_DIFF=${diff}" +} + +_docker-platforms() { + local remotearch="${ARCH}" + if [[ -n $SSH_TARGET ]]; then + remotearch="$(archname "$(ssh "${SSH_TARGET}" uname -m)")" + fi + if [[ $ARCH != $remotearch ]]; then + echo "linux/${ARCH},linux/${remotearch}" + else + echo "linux/${ARCH}" + fi +} + +create-dual-builder() { + # Over ssh, start a buildkit container on the target, and use port forwarding + # to talk to it. Also start a local buildkit container, and then create a buildx + # remote driver that talks to both of them. + ssh -N -L "${SSH_BUILDX_PORT}":localhost:"${SSH_BUILDX_PORT}" "${SSH_TARGET}" & + REMOTE_ARCH="$(archname "$(ssh "${SSH_TARGET}" uname -m)")" + ssh "${SSH_TARGET}" docker run -d --name=remote-buildkitd --privileged -p "${SSH_BUILDX_PORT}":"${SSH_BUILDX_PORT}" \ + moby/buildkit:latest --addr "tcp://0.0.0.0:${SSH_BUILDX_PORT}" + docker run -d --name=remote-buildkitd --privileged -p "$((SSH_BUILDX_PORT - 1))":"$((SSH_BUILDX_PORT - 1))" \ + moby/buildkit:latest --addr "tcp://0.0.0.0:$((SSH_BUILDX_PORT - 1))" + docker buildx create --name dual-builder --platform "linux/${ARCH}" --driver=remote "tcp://localhost:$((SSH_BUILDX_PORT - 1))" + docker buildx create --append --name dual-builder --platform "linux/${REMOTE_ARCH}" --driver=remote "tcp://localhost:${SSH_BUILDX_PORT}" + docker buildx use dual-builder +} + +cleanup-dual-builder() { + docker buildx rm dual-builder + ssh "${SSH_TARGET}" "docker stop remote-buildkitd && docker rm remote-buildkitd" + docker stop remote-buildkitd && docker rm remote-buildkitd + pgrep -f "${SSH_BUILDX_PORT}:localhost:${SSH_BUILDX_PORT}" | xargs kill +} + +main diff --git a/apps/integration/integration/automation/runtests.sh b/apps/integration/integration/automation/runtests.sh deleted file mode 100755 index 34a9ce9a4..000000000 --- a/apps/integration/integration/automation/runtests.sh +++ /dev/null @@ -1,52 +0,0 @@ -#!/bin/bash - -main() { - if [[ ! -d ".git" ]]; then - echo "Error: please run this script from sycamore root!" >&2 - exit 1 - fi - echo "Get the newest git commits" >&2 - checkout_main_if_new - local should_run=$? - if [[ $should_run ]]; then - echo "Changes detected. Running Tests" >&2 - poetry install - build_containers - runtests - handle_outputs - else - echo "No changes detected. Skipping integration tests" >&2 - fi -} - -checkout_main_if_new() { - old_sha="$(git rev-parse HEAD)" - git fetch origin main >&2 - new_sha="$(git rev-parse FETCH_HEAD)" - if [[ "${old_sha}" != "${new_sha}" ]]; then - git pull origin main >&2 - return 0 - else - return 1 - fi -} - -build_containers() { - echo "Yep, definitely building containers. That's what this function does" >&2 -} - -handle_outputs() { - echo "Yep, definitely handling test outputs. That's what this function does" >&2 -} - -runtests() { - docker system prune -f --volumes - docker compose up reset - poetry run pytest apps/integration/ -p integration.conftest --noconftest --docker-tag latest_rc - # this is a complicated command, so: ^ ^ ^ test against containers tagged latest_rc - # | don't load conftest at pytest runtime; it's already loaded - # load conftest with plugins, to capture the custom command line arg --docker-tag -} - - -main