Skip to content

Commit e5ae032

Browse files
authored
Merge pull request #1882 from magda-io/issue/1878
Issue/1878 Make broken link minion per domain request wait time configurable
2 parents 8638250 + f18a781 commit e5ae032

17 files changed

+848
-117
lines changed

.vscode/launch.json

+3-4
Original file line numberDiff line numberDiff line change
@@ -163,14 +163,13 @@
163163
"request": "launch",
164164
"protocol": "inspector",
165165
"name": "minion-broken-link test",
166-
"program":
167-
"${workspaceRoot}/magda-minion-broken-link/node_modules/mocha/bin/_mocha",
166+
"program": "${workspaceRoot}/node_modules/mocha/bin/_mocha",
168167
"cwd": "${workspaceRoot}/magda-minion-broken-link",
169168
"args": [
170169
"--compilers",
171-
"ts:@magda/scripts/node_modules/ts-node/register",
170+
"ts:${workspaceRoot}/node_modules/ts-node/register",
172171
"--require",
173-
"@magda/scripts/node_modules/tsconfig-paths/register",
172+
"${workspaceRoot}/node_modules/tsconfig-paths/register",
174173
"src/test/**/*.spec.ts"
175174
]
176175
},

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
- Made access notes show up on distribution page with configurable text
5959
- Added contact point to distribution page, made title configurable
6060
- When `match-part` search strategy is used, a message is shown on UI
61+
- Made broken link minion per domain request wait time configurable
6162
- Fixed mobile menu not show-up properly
6263

6364
## 0.0.49

deploy/helm/magda-dev.yml

+6
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,12 @@ correspondence-api:
9898
smtpHostname: "smtp.mailgun.org"
9999
smtpPort: 2525
100100

101+
minion-broken-link:
102+
domainWaitTimeConfig:
103+
data.csiro.au: 5
104+
data.gov.au: 5
105+
data.act.gov.au: 30
106+
101107
connectors:
102108
config:
103109
- image:

deploy/helm/magda/charts/minion-broken-link/templates/deployment.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ spec:
3636
value: {{ .Values.global.defaultAdminUserId }}
3737
- name: INTERNAL_URL
3838
value: "http://minion-broken-link"
39+
{{- if .Values.domainWaitTimeConfig }}
40+
- name: DOMAIN_WAIT_TIME_CONFIG
41+
value: '{{ toJson .Values.domainWaitTimeConfig | indent 2 }}'
42+
{{- end }}
3943
- name: JWT_SECRET
4044
valueFrom:
4145
secretKeyRef:

magda-minion-broken-link/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
"jsverify": "^0.8.2",
3434
"mocha": "^3.5.0",
3535
"nock": "^9.0.14",
36-
"sinon": "^3.0.0",
36+
"sinon": "^7.0.0",
3737
"typescript": "~2.5.0"
3838
},
3939
"dependencies": {
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
import { Writable } from "stream";
2+
3+
export default class DevNull extends Writable {
4+
_write(
5+
chunk: any,
6+
encoding: string,
7+
callback: (err?: Error) => void
8+
): void {
9+
callback();
10+
}
11+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
import { CoreOptions } from "request";
2+
import request from "@magda/typescript-common/dist/request";
3+
import * as http from "http";
4+
import DevNull from "./DevNull";
5+
6+
/**
7+
* Depends on statusCode, determine a request is failed or not
8+
* @param response http.IncomingMessage
9+
*/
10+
function processResponse(response: http.IncomingMessage) {
11+
if (
12+
(response.statusCode >= 200 && response.statusCode <= 299) ||
13+
response.statusCode === 429
14+
) {
15+
return response.statusCode;
16+
} else {
17+
throw new BadHttpResponseError(
18+
response.statusMessage,
19+
response,
20+
response.statusCode
21+
);
22+
}
23+
}
24+
25+
/**
26+
* Send head request to the URL
27+
* Received data will be discarded
28+
* @param url String: url to be tested
29+
*/
30+
export async function headRequest(
31+
url: string,
32+
requestOpts: CoreOptions = {}
33+
): Promise<number> {
34+
return doRequest(url, "head", requestOpts);
35+
}
36+
37+
/**
38+
* Send head request to the URL
39+
* Received data will be discarded
40+
* @param url String: url to be tested
41+
*/
42+
export async function getRequest(
43+
url: string,
44+
requestOpts: CoreOptions = {}
45+
): Promise<number> {
46+
return doRequest(url, "get", {
47+
...requestOpts,
48+
headers: {
49+
Range: "bytes=0-50"
50+
}
51+
});
52+
}
53+
54+
/**
55+
* Send request to the URL
56+
* Received data will be discarded
57+
* @param url String: url to be tested
58+
*/
59+
export async function doRequest(
60+
url: string,
61+
method: "get" | "head",
62+
requestOpts: CoreOptions = {}
63+
): Promise<number> {
64+
const devnull = new DevNull();
65+
console.info(`${method} ${url}`);
66+
67+
let resolveResponse: (number: number) => void;
68+
let resolveStreamEnd: () => void;
69+
let rejectResponse: (error: Error) => void;
70+
let rejectStreamEnd: (error: Error) => void;
71+
72+
const reqPromise: Promise<number> = new Promise((resolve, reject) => {
73+
resolveResponse = resolve;
74+
rejectResponse = reject;
75+
});
76+
77+
const streamPromise = new Promise((resolve, reject) => {
78+
rejectStreamEnd = reject;
79+
resolveStreamEnd = resolve;
80+
});
81+
82+
const req = request[method](url, requestOpts)
83+
.on("error", err => rejectResponse(err))
84+
.on("response", (response: http.IncomingMessage) => {
85+
try {
86+
console.info(
87+
`Got ${response.statusCode} from ${method} ${url}`
88+
);
89+
90+
resolveResponse(processResponse(response));
91+
} catch (e) {
92+
rejectResponse(e);
93+
}
94+
})
95+
.on("end", () => {
96+
resolveStreamEnd();
97+
});
98+
99+
req.pipe(devnull).on("error", rejectStreamEnd);
100+
req.on("error", rejectStreamEnd);
101+
102+
const [responseCode] = await Promise.all([reqPromise, streamPromise]);
103+
104+
return responseCode;
105+
}
106+
107+
export class BadHttpResponseError extends Error {
108+
public response: http.IncomingMessage;
109+
public httpStatusCode: number;
110+
111+
constructor(
112+
message?: string,
113+
response?: http.IncomingMessage,
114+
httpStatusCode?: number
115+
) {
116+
super(message);
117+
this.message = message;
118+
this.response = response;
119+
this.httpStatusCode = httpStatusCode;
120+
this.stack = new Error().stack;
121+
}
122+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import * as URI from "urijs";
2+
3+
// --- for domain without specified wait time,
4+
// --- this default value (in second) will be used.
5+
export let defaultDomainWaitTime = 1;
6+
// --- record next access time (i.e. no request can be made before the time)
7+
// --- for all domains (only create entries on domain access)
8+
export let domainAccessTimeStore: any = {};
9+
10+
export function setDefaultDomainWaitTime(waitSeconds: number) {
11+
defaultDomainWaitTime = waitSeconds;
12+
}
13+
14+
export function getDefaultDomainWaitTime() {
15+
return defaultDomainWaitTime;
16+
}
17+
18+
export function clearDomainAccessTimeStore() {
19+
domainAccessTimeStore = {};
20+
}
21+
22+
export function getHostWaitTime(host: string, domainWaitTimeConfig: any) {
23+
if (
24+
domainWaitTimeConfig &&
25+
typeof domainWaitTimeConfig[host] === "number"
26+
) {
27+
return domainWaitTimeConfig[host];
28+
}
29+
return defaultDomainWaitTime;
30+
}
31+
32+
/**
33+
* For given url, return the required waitTime (in milliseconds) from now before the request can be sent.
34+
* This value can be used to set a timer to trigger the request at the later time.
35+
*
36+
* @param url String: the url that to be tested
37+
* @param domainWaitTimeConfig object: domainWaitTimeConfig
38+
*/
39+
export default function getUrlWaitTime(url: string, domainWaitTimeConfig: any) {
40+
const uri = new URI(url);
41+
const host = uri.hostname();
42+
const hostWaitTime = getHostWaitTime(host, domainWaitTimeConfig);
43+
const now = new Date().getTime();
44+
if (domainAccessTimeStore[host]) {
45+
if (domainAccessTimeStore[host] < now) {
46+
// --- allow to request now & need to set the new wait time
47+
domainAccessTimeStore[host] = now + hostWaitTime * 1000;
48+
return 0; //--- no need to wait
49+
} else {
50+
// --- need to wait
51+
const waitTime = domainAccessTimeStore[host] - now;
52+
// --- extends wait time for next possible request
53+
domainAccessTimeStore[host] += hostWaitTime * 1000;
54+
return waitTime;
55+
}
56+
} else {
57+
// --- first request && allow to request now
58+
domainAccessTimeStore[host] = now + hostWaitTime * 1000;
59+
return 0; //--- no need to wait
60+
}
61+
}

magda-minion-broken-link/src/index.ts

+42-7
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,46 @@ import minion from "@magda/minion-framework/dist/index";
22
import onRecordFound from "./onRecordFound";
33
import brokenLinkAspectDef from "./brokenLinkAspectDef";
44
import commonYargs from "@magda/minion-framework/dist/commonYargs";
5+
import { CoreOptions } from "request";
56

67
const ID = "minion-broken-link";
78

9+
const coerceJson = (param: string) => (json?: string) => {
10+
const data = JSON.parse(json);
11+
if (!data || typeof data !== "object") {
12+
throw new Error(`Invalid "${param}" parameter.`);
13+
}
14+
return data;
15+
};
16+
817
const argv = commonYargs(ID, 6111, "http://localhost:6111", argv =>
9-
argv.option("externalRetries", {
10-
describe:
11-
"Number of times to retry external links when checking whether they're broken",
12-
type: "number",
13-
default: 1
14-
})
18+
argv
19+
.option("externalRetries", {
20+
describe:
21+
"Number of times to retry external links when checking whether they're broken",
22+
type: "number",
23+
default: 1
24+
})
25+
.option("domainWaitTimeConfig", {
26+
describe:
27+
"A object that defines wait time for each of domain. " +
28+
"Echo property name of the object would be the domain name and property value is the wait time in seconds",
29+
type: "string",
30+
coerce: coerceJson("domainWaitTimeConfig"),
31+
default: process.env.DOMAIN_WAIT_TIME_CONFIG || JSON.stringify({})
32+
})
33+
.option("requestOpts", {
34+
describe:
35+
"The default options to use for the JS request library when making HTTP HEAD/GET requests",
36+
type: "string",
37+
coerce: coerceJson("requestOpts"),
38+
default:
39+
process.env.REQUEST_OPTS || JSON.stringify({ timeout: 20000 })
40+
})
1541
);
1642

43+
console.log("domainWaitTimeConfig: ", argv.domainWaitTimeConfig);
44+
1745
function sleuthBrokenLinks() {
1846
return minion({
1947
argv,
@@ -23,7 +51,14 @@ function sleuthBrokenLinks() {
2351
async: true,
2452
writeAspectDefs: [brokenLinkAspectDef],
2553
onRecordFound: (record, registry) =>
26-
onRecordFound(record, registry, argv.externalRetries)
54+
onRecordFound(
55+
record,
56+
registry,
57+
argv.externalRetries,
58+
1,
59+
argv.domainWaitTimeConfig,
60+
argv.requestOpts as CoreOptions
61+
)
2762
});
2863
}
2964

0 commit comments

Comments
 (0)