Skip to content

Commit

Permalink
Merge branch 'master' into feature/es-phase-2
Browse files Browse the repository at this point in the history
  • Loading branch information
jennyhliu authored Oct 21, 2024
2 parents e5d5464 + 199cdcd commit 9c341c5
Show file tree
Hide file tree
Showing 31 changed files with 493 additions and 47 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -75,13 +75,20 @@ aws lambda invoke --function-name $PREFIX-ReconciliationReportMigration $OUTFILE
- Added excludeFileRegex configuration to UpdateGranulesCmrMetadataFileLinks
- This is to allow files matching specified regex to be excluded when updating the Related URLs list
- Defaults to the current behavior of excluding no files.
- **CUMULUS-3773**
- Added sftpFastDownload configuration to SyncGranule task.
- Updated `@cumulus/sftp-client` and `@cumulus/ingest/SftpProviderClient` to support both regular and fastDownload.
- Added sftp support to FakeProvider
- Added sftp integration test

### Changed

- **CUMULUS-3838**
- Updated python dependencies to latest:
- cumulus-process-py 1.4.0
- cumulus-message-adapter-python 2.3.0
- **CUMULUS-3906**
- Bumps example ORCA deployment to version v10.0.1.

### Fixed

Expand Down
8 changes: 2 additions & 6 deletions audit-ci.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
"expiry": "1 July 2023 11:00"
}
},
"cacheable-request",
"http-cache-semantics",
"lodash.pick",
"semver",
"axios",
"jsonpath-plus"
"jsonpath-plus",
"semver"
]
}
2 changes: 0 additions & 2 deletions bamboo/bootstrap-tf-deployment.sh
Original file line number Diff line number Diff line change
Expand Up @@ -98,5 +98,3 @@ echo "Deploying Cumulus example to $DEPLOYMENT"
-var "pdr_node_name_provider_bucket=$PDR_NODE_NAME_PROVIDER_BUCKET" \
-var "rds_admin_access_secret_arn=$RDS_ADMIN_ACCESS_SECRET_ARN" \
-var "orca_db_user_password=$ORCA_DATABASE_USER_PASSWORD" \
-var "orca_s3_access_key=$AWS_ACCESS_KEY_ID" \
-var "orca_s3_secret_key=$AWS_SECRET_ACCESS_KEY" \
1 change: 1 addition & 0 deletions bamboo/select-stack.js
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ function determineIntegrationTestStackName(cb) {
'Roger Kwarteng': 'rkwarten-ci',
'Paul Pilone': 'ppilone-ci',
'Tim Clark': 'teclark-ci',
'Bryan Wexler': 'bwexler-ci',
};

return git('.').log({ '--max-count': '1' }, (e, r) => {
Expand Down
1 change: 1 addition & 0 deletions bamboo/set-bamboo-env-variables.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ declare -a param_list=(
"bamboo_SECRET_ORCA_DATABASE_USER_PASSWORD"
"bamboo_SECRET_PROVIDER_FTP_PORT"
"bamboo_SECRET_PROVIDER_HTTP_PORT"
"bamboo_SECRET_PROVIDER_SFTP_PORT"
"bamboo_SECRET_RDS_ADMIN_ACCESS_SECRET_ARN"
"bamboo_SECRET_RDS_SECURITY_GROUP"
"bamboo_SECRET_SECURITY_GROUP"
Expand Down
1 change: 1 addition & 0 deletions example/cumulus-tf/ingest_granule_workflow.asl.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"downloadBucket": "{$.cumulus_meta.system_bucket}",
"duplicateHandling": "{$.meta.collection.duplicateHandling}",
"pdr": "{$.meta.pdr}",
"sftpFastDownload": true,
"workflowStartTime": "{$.cumulus_meta.workflow_start_time}",
"cumulus_message": {
"input": "{$.payload}",
Expand Down
8 changes: 4 additions & 4 deletions example/cumulus-tf/orca.tf
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ locals {
# ORCA Module
module "orca" {
aws_region = var.region
source = "https://github.com/nasa/cumulus-orca/releases/download/v9.0.4/cumulus-orca-terraform.zip"
source = "https://github.com/nasa/cumulus-orca/releases/download/v10.0.1/cumulus-orca-terraform.zip"

## --------------------------
## Cumulus Variables
Expand All @@ -27,21 +27,21 @@ module "orca" {
vpc_id = local.vpc_id

## OPTIONAL
tags = var.tags
tags = var.tags
deploy_rds_cluster_role_association = false

## --------------------------
## ORCA Variables
## --------------------------
## REQUIRED
db_admin_password = local.rds_admin_login.password
db_cluster_identifier = local.rds_admin_login.dbClusterIdentifier
db_host_endpoint = local.rds_admin_login.host
db_user_password = var.orca_db_user_password
dlq_subscription_email = var.orca_dlq_subscription_email
orca_default_bucket = var.orca_default_bucket
orca_reports_bucket_name = var.system_bucket
rds_security_group_id = local.rds_security_group
s3_access_key = var.orca_s3_access_key
s3_secret_key = var.orca_s3_secret_key

## OPTIONAL
db_admin_username = local.rds_admin_login.username
Expand Down
12 changes: 0 additions & 12 deletions example/cumulus-tf/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -394,18 +394,6 @@ variable "orca_dlq_subscription_email" {
default = "[email protected]"
}

variable "orca_s3_access_key" {
type = string
description = "Access key for communicating with Orca S3 buckets."
default = ""
}

variable "orca_s3_secret_key" {
type = string
description = "Secret key for communicating with Orca S3 buckets."
default = ""
}

variable "lambda_timeouts" {
description = "Configurable map of timeouts for lambdas"
type = map(number)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@
"sampleFileName": "MOD09GQ.A2017025.h21v00.006.2017034065104.hdf",
"url_path": "{cmrMetadata.Granule.Collection.ShortName}___{cmrMetadata.Granule.Collection.VersionId}/{extractYear(cmrMetadata.Granule.Temporal.RangeDateTime.BeginningDateTime)}/{substring(file.fileName, 0, 3)}"
},
{
"bucket": "protected",
"regex": "^MOD09GQ\\.A[\\d]{7}\\.[\\S]{6}\\.006\\.[\\d]{13}\\.hdf.md5$",
"sampleFileName": "MOD09GQ.A2017025.h21v00.006.2017034065104.hdf.md5",
"checksumFor": "^MOD09GQ\\.A[\\d]{7}\\.[\\S]{6}\\.006\\.[\\d]{13}\\.hdf$"
},
{
"bucket": "private",
"regex": "^MOD09GQ\\.A[\\d]{7}\\.[\\S]{6}\\.006\\.[\\d]{13}\\.hdf\\.met$",
Expand Down
1 change: 1 addition & 0 deletions example/deployments/cumulus/bwexler-ci-tf.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
prefix = "bwexler-ci-tf"
1 change: 1 addition & 0 deletions example/deployments/data-persistence/bwexler-ci-tf.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
prefix = "bwexler-ci-tf"
1 change: 1 addition & 0 deletions example/deployments/db-migration/bwexler-ci-tf.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
prefix = "bwexler-ci-tf"
28 changes: 27 additions & 1 deletion example/fake-provider-cf.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
AWSTemplateFormatVersion: 2010-09-09
Description: Fake http and ftp providers for use with Cumulus integration tests
Description: Fake http, ftp and sftp providers for use with Cumulus integration tests
Parameters:
Prefix:
Type: String
Expand Down Expand Up @@ -185,6 +185,32 @@ Resources:
find /home/testuser -type d -exec chmod 0700 {} \;
find /home/testuser -type f -exec chmod 0600 {} \;

# Configure sftp
yum install -y openssh-server

## Stage sftp test data
mkdir -p /var/sftp/testdata
rsync -a /cumulus/packages/test-data/pdrs /var/sftp/testdata
rsync -a /cumulus/packages/test-data/granules /var/sftp/testdata

## use the same account (testuser) both ftp and sftp
chown testuser:testuser /var/sftp/testdata/granules
chown testuser:testuser /var/sftp/testdata/pdrs

cat >> /etc/ssh/sshd_config <<EOS
Port 22
Match User testuser
ForceCommand internal-sftp
PasswordAuthentication yes
ChrootDirectory /var/sftp/testdata
PermitTunnel no
AllowAgentForwarding no
AllowTcpForwarding no
X11Forwarding no
EOS

service sshd restart

# Cleanup
rm -rf /cumulus

Expand Down
19 changes: 19 additions & 0 deletions example/spec/helpers/Providers.js
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,24 @@ const buildFtpProvider = async (postfix = '') => {
return provider;
};

const buildSftpProvider = async (postfix = '') => {
const provider = {
id: `sftp_provider${postfix}`,
protocol: 'sftp',
host: await getProviderHost(),
username: 'testuser',
password: 'testpass',
globalConnectionLimit: 10,
maxDownloadTime: 500,
};

if (process.env.PROVIDER_SFTP_PORT) {
provider.port = Number(process.env.PROVIDER_SFTP_PORT);
}

return provider;
};

const fakeProviderPortMap = {
http: process.env.PROVIDER_HTTP_PORT ? Number(process.env.PROVIDER_HTTP_PORT) : 3030,
https: process.env.PROVIDER_HTTPS_PORT ? Number(process.env.PROVIDER_HTTPS_PORT) : 4040,
Expand Down Expand Up @@ -230,6 +248,7 @@ const deleteProvidersAndAllDependenciesByHost = async (prefix, host) => {
module.exports = {
buildFtpProvider,
buildHttpOrHttpsProvider,
buildSftpProvider,
createProvider,
fetchFakeProviderIp,
fetchFakeS3ProviderBuckets,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"granules": [
{
"granuleId": "MOD09GQ.A2017224.h27v08.006.2017227165029",
"dataType": "MOD09GQ",
"version": "006",
"files": [
{
"path": "/granules",
"size": 1098034,
"type": "data",
"name": "MOD09GQ.A2017224.h27v08.006.2017227165029.hdf"
},
{
"path": "/granules",
"size": 20165,
"type": "metadata",
"name": "MOD09GQ.A2017224.h27v08.006.2017227165029.hdf.met"
},
{
"path": "/granules",
"name": "MOD09GQ.A2017224.h27v08.006.2017227165029.hdf.md5"
}
]
}
],
"pdr": {
"path": "/pdrs",
"size": 596,
"name": "MOD09GQ_1granule_v6.PDR",
"time": 1520873050000
}
}
148 changes: 148 additions & 0 deletions example/spec/parallel/ingestGranule/IngestGranuleSftpSuccessSpec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
'use strict';

const fs = require('fs-extra');
const pMap = require('p-map');
const mime = require('mime-types');

const { headObject } = require('@cumulus/aws-client/S3');
const { randomStringFromRegex } = require('@cumulus/common/test-utils');
const {
addCollections,
api: apiTestUtils,
cleanupCollections,
} = require('@cumulus/integration-tests');
const { deleteExecution, getExecution } = require('@cumulus/api-client/executions');
const { getGranule, deleteGranule } = require('@cumulus/api-client/granules');
const { deleteProvider } = require('@cumulus/api-client/providers');
const { constructCollectionId } = require('@cumulus/message/Collections');
const { buildAndExecuteWorkflow } = require('../../helpers/workflowUtils');
const { loadConfig, createTimestampedTestId, createTestSuffix } = require('../../helpers/testUtils');
const { waitForApiStatus } = require('../../helpers/apiUtils');
const { buildSftpProvider, createProvider } = require('../../helpers/Providers');
const workflowName = 'IngestGranule';
const granuleRegex = '^MOD09GQ\\.A[\\d]{7}\\.[\\w]{6}\\.006\\.[\\d]{13}$';

describe('The SFTP Ingest Granules workflow', () => {
const inputPayloadFilename = './spec/parallel/ingestGranule/IngestGranuleSftp.input.payload.json';
const collectionsDir = './data/collections/s3_MOD09GQ_006';

let config;
let inputPayload;
let pdrFilename;
let provider;
let testSuffix;
let workflowExecution;
let ingestGranuleExecutionArn;
let beforeAllFailed;
let testGranule;

beforeAll(async () => {
try {
config = await loadConfig();

const testId = createTimestampedTestId(config.stackName, 'IngestGranuleSftpSuccess');
testSuffix = createTestSuffix(testId);
const collection = { name: `MOD09GQ${testSuffix}`, version: '006' };
provider = await buildSftpProvider(testSuffix);

// populate collections, providers and test data
const promiseResults = await Promise.all([
addCollections(config.stackName, config.bucket, collectionsDir, testSuffix, testId),
createProvider(config.stackName, provider),
]);

const createdProvider = JSON.parse(promiseResults[1].body).record;

console.log('\nStarting ingest test');
inputPayload = JSON.parse(fs.readFileSync(inputPayloadFilename, 'utf8'));
inputPayload.granules[0].dataType += testSuffix;
inputPayload.granules[0].granuleId = randomStringFromRegex(granuleRegex);
pdrFilename = inputPayload.pdr.name;

console.log(`Granule id is ${inputPayload.granules[0].granuleId}`);

workflowExecution = await buildAndExecuteWorkflow(
config.stackName, config.bucket, workflowName, collection, createdProvider, inputPayload
);

ingestGranuleExecutionArn = workflowExecution.executionArn;

await waitForApiStatus(
getExecution,
{
prefix: config.stackName,
arn: ingestGranuleExecutionArn,
},
'completed'
);
testGranule = await waitForApiStatus(
getGranule,
{
prefix: config.stackName,
granuleId: inputPayload.granules[0].granuleId,
collectionId: constructCollectionId(collection.name, collection.version),
},
'completed'
);
} catch (error) {
beforeAllFailed = error;
}
});

afterAll(async () => {
// clean up stack state added by test
await apiTestUtils.deletePdr({
prefix: config.stackName,
pdr: pdrFilename,
});

await deleteExecution({ prefix: config.stackName, executionArn: ingestGranuleExecutionArn });

await Promise.all([
cleanupCollections(config.stackName, config.bucket, collectionsDir, testSuffix),
deleteProvider({ prefix: config.stackName, providerId: provider.id }),
]);
});

describe('the execution', () => {
afterAll(async () => {
// clean up granule
await deleteGranule({
prefix: config.stackName,
granuleId: inputPayload.granules[0].granuleId,
collectionId: testGranule.collectionId,
});
});

it('completes execution with success status', () => {
if (beforeAllFailed) fail(beforeAllFailed);
expect(workflowExecution.status).toEqual('completed');
});

it('makes the granule available through the Cumulus API', () => {
if (beforeAllFailed) fail(beforeAllFailed);
expect(testGranule.granuleId).toEqual(inputPayload.granules[0].granuleId);
});

it('uploaded the granules with correct ContentType', async () => {
if (beforeAllFailed) fail(beforeAllFailed);
const objectTests = await pMap(
testGranule.files,
async ({ bucket, key }) => {
const headObjectResponse = await headObject(
bucket, key, { retries: 5 }
);

return [
headObjectResponse.ContentType,
mime.lookup(key) || 'application/octet-stream',
];
}
);

objectTests.forEach(
([actual, expected]) => expect(actual).toEqual(expected)
);
});
});
});
Loading

0 comments on commit 9c341c5

Please sign in to comment.