-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataops-start-job.py
executable file
·100 lines (73 loc) · 2.75 KB
/
dataops-start-job.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
#!/usr/bin/env python
'''
This script starts a Job on StreamSets DataOps Platform
Prerequisites:
- Python 3.6+; Python 3.9+ preferred
- StreamSets DataOps Platform SDK for Python v5.1+
See: https://docs.streamsets.com/platform-sdk/latest/learn/installation.html
- DataOps Platform API Credentials for a user with Organization Administrator role
- To avoid including API Credentials in the script, export these two environment variables
prior to running the script:
export CRED_ID=<your CRED_ID>>
export CRED_TOKEN=<your CRED_TOKEN>
- Set the variable JOB_ID at the top of the script for the Job to start
- Optionally, set the Job's runtime parameters
'''
import datetime,os,sys
from streamsets.sdk import ControlHub
# Job to start
JOB_ID= '<your-job-id>'
# Example Runtime Parameters (optional)
RUNTIME_PARAMETERS = {'PARAM_1': 'aaa', 'PARAM_2': 'bbb'}
# Get CRED_ID from the environment
CRED_ID = os.getenv('CRED_ID')
# Get CRED_TOKEN from the environment
CRED_TOKEN = os.getenv('CRED_TOKEN')
# How often to poll Control Hub for Job status
POLLING_FREQUENCY_SECONDS = 10
# How long to wait for a started Job to become active
MAX_WAIT_SECONDS_FOR_JOB_TO_BEOME_ACTIVE = 120
# print_message method which writes a timestamp message ot the console
def print_message(message):
print(datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + ' ' + message)
# Connect to Control Hub
print_message('Connecting to Control Hub')
sch = ControlHub(
credential_id=CRED_ID,
token=CRED_TOKEN)
# Get the Job
job = None
try:
job = sch.jobs.get(job_id = JOB_ID)
except:
sys.exit('Error: Job with ID \'' + JOB_ID + '\' not found.')
print_message('Found Job with name \'' + job.job_name + '\'')
## Get the Job status
job.refresh()
job_status = job.status.status
print_message('Job status is \'' + job_status + '\'')
# Make sure Job has INACTIVE status
if job_status != 'INACTIVE':
print_message('Error: Job must have status \'INACTIVE\' in order to be started')
sys.exit(-1)
## Set the Job's Runtime Parameters
print_message('Setting Job parameters...')
job.runtime_parameters = RUNTIME_PARAMETERS
sch.update_job(job)
## Start the Job
print_message('Starting Job...')
sch.start_job(job)
## Wait for the Job to become Active
job.refresh()
wait_seconds = 0
while job.status.status != 'ACTIVE':
job.refresh()
print_message('Waiting for Job to become ACTIVE...')
sleep(POLLING_FREQUENCY_SECONDS)
wait_seconds += POLLING_FREQUENCY_SECONDS
if wait_seconds > MAX_WAIT_SECONDS_FOR_JOB_TO_BEOME_ACTIVE:
# Exit if Job did not become ACTIVE within the specified time
print_message('Error: Timeout waiting for Job to become ACTIVE')
sys.exit(-1)
print_message('Job status is ACTIVE')
print_message('Done')