Skip to content

Commit 8355013

Browse files
authored
Merge pull request #1701 from magda-io/1680-make-broken-link-sleuther-do-big-files
Made broken link minion not fail on big files
2 parents 2048e90 + 655db6e commit 8355013

File tree

8 files changed

+95
-81
lines changed

8 files changed

+95
-81
lines changed

.gitlab-ci.yml

+5-5
Original file line numberDiff line numberDiff line change
@@ -433,19 +433,19 @@ dockerize:dockerExtensions:
433433
# Helm upgrade
434434
- helm init --upgrade
435435
script:
436-
- helm upgrade $CI_COMMIT_REF_SLUG deploy/helm/magda --install --recreate-pods --namespace $CI_COMMIT_REF_SLUG -f deploy/helm/preview.yml --set global.image.repository=registry.gitlab.com/magda-data/magda/data61,global.image.tag=$CI_COMMIT_REF_SLUG,ingress.hostname=$CI_COMMIT_REF_SLUG.dev.magda.io --timeout 1200 --wait
436+
- helm upgrade $CI_COMMIT_REF_SLUG deploy/helm/magda --install --recreate-pods --namespace $CI_COMMIT_REF_SLUG -f deploy/helm/preview.yml --set global.image.repository=registry.gitlab.com/magda-data/magda/data61,global.image.tag=$CI_COMMIT_REF_SLUG,ingress.hostname=$CI_COMMIT_REF_SLUG.dev.magda.io --timeout 3600 --wait
437437
- echo "Successfully deployed to https://${CI_COMMIT_REF_SLUG}.dev.magda.io"
438438

439439
(UI) Run As Preview:
440440
<<: *runAsPreview
441441
script:
442-
- helm upgrade $CI_COMMIT_REF_SLUG deploy/helm/magda --install --recreate-pods --namespace $CI_COMMIT_REF_SLUG -f deploy/helm/preview.yml --set global.image.repository=registry.gitlab.com/magda-data/magda/data61,global.image.tag=$CI_COMMIT_REF_SLUG,ingress.hostname=$CI_COMMIT_REF_SLUG.dev.magda.io,ingress.targetService=web,tags.all=false,tags.web-server=true,web-server.baseUrl=https://dev.magda.io --timeout 1200 --wait
442+
- helm upgrade $CI_COMMIT_REF_SLUG deploy/helm/magda --install --recreate-pods --namespace $CI_COMMIT_REF_SLUG -f deploy/helm/preview.yml --set global.image.repository=registry.gitlab.com/magda-data/magda/data61,global.image.tag=$CI_COMMIT_REF_SLUG,ingress.hostname=$CI_COMMIT_REF_SLUG.dev.magda.io,ingress.targetService=web,tags.all=false,tags.web-server=true,web-server.baseUrl=https://dev.magda.io --timeout 3600 --wait
443443
- echo "Successfully deployed to https://${CI_COMMIT_REF_SLUG}.dev.magda.io"
444444

445445
(No Data) Run As Preview:
446446
<<: *runAsPreview
447447
script:
448-
- helm upgrade $CI_COMMIT_REF_SLUG deploy/helm/magda --install --recreate-pods --namespace $CI_COMMIT_REF_SLUG -f deploy/helm/preview.yml --set global.image.repository=registry.gitlab.com/magda-data/magda/data61,global.image.tag=$CI_COMMIT_REF_SLUG,ingress.hostname=$CI_COMMIT_REF_SLUG.dev.magda.io,combined-db.waleBackup.method=NONE,elasticsearch.useGcsSnapshots=false --timeout 1200 --wait
448+
- helm upgrade $CI_COMMIT_REF_SLUG deploy/helm/magda --install --recreate-pods --namespace $CI_COMMIT_REF_SLUG -f deploy/helm/preview.yml --set global.image.repository=registry.gitlab.com/magda-data/magda/data61,global.image.tag=$CI_COMMIT_REF_SLUG,ingress.hostname=$CI_COMMIT_REF_SLUG.dev.magda.io,combined-db.waleBackup.method=NONE,elasticsearch.useGcsSnapshots=false --timeout 3600 --wait
449449
- echo "Successfully deployed to https://${CI_COMMIT_REF_SLUG}.dev.magda.io"
450450

451451
Stop Preview: &stopPreview
@@ -489,7 +489,7 @@ Deploy Master To Dev:
489489
- echo "$KUBECTL_CONFIG" > kubectlconfig.yaml
490490
- export KUBECONFIG=kubectlconfig.yaml
491491
- kubectl create secret docker-registry regcred --namespace default --docker-server=registry.gitlab.com --docker-username=gitlab-ci-token --docker-password=$CI_JOB_TOKEN [email protected] --dry-run -o json | kubectl apply --namespace default -f -
492-
- helm upgrade magda deploy/helm/magda --install --recreate-pods -f deploy/helm/magda-dev.yml --set global.image.repository=registry.gitlab.com/magda-data/magda/data61,global.image.tag=master --timeout 1200 --wait
492+
- helm upgrade magda deploy/helm/magda --install --recreate-pods -f deploy/helm/magda-dev.yml --set global.image.repository=registry.gitlab.com/magda-data/magda/data61,global.image.tag=master --timeout 3600 --wait
493493

494494
Release Tags To Docker Hub:
495495
stage: release
@@ -559,7 +559,7 @@ Publish Helm Chart:
559559
# # Get version from lerna json
560560
# - apk add --update jq
561561
# - TAG=$(jq -r ".version" lerna.json)
562-
# - helm upgrade $CI_COMMIT_REF_SLUG deploy/helm/magda --install --recreate-pods --namespace $CI_COMMIT_REF_SLUG -f deploy/helm/preview.yml --set global.image.repository=data61,global.image.tag=$TAG,ingress.hostname=$CI_COMMIT_REF_SLUG.dev.magda.io,global.externalUrl=https://$CI_COMMIT_REF_SLUG.dev.magda.io --timeout 1200 --wait
562+
# - helm upgrade $CI_COMMIT_REF_SLUG deploy/helm/magda --install --recreate-pods --namespace $CI_COMMIT_REF_SLUG -f deploy/helm/preview.yml --set global.image.repository=data61,global.image.tag=$TAG,ingress.hostname=$CI_COMMIT_REF_SLUG.dev.magda.io,global.externalUrl=https://$CI_COMMIT_REF_SLUG.dev.magda.io --timeout 3600 --wait
563563

564564
Stop Staging:
565565
<<: *stopPreview

CHANGES.md

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
- Take open data connector license from dataset level to distribution level and add basic black box test
2121
- Fix logo vertical alignment and partially hidden issue
2222
- Made header padding even
23+
- Made the broken link minion use `GET` for everything and ignore the data.
2324
- Fixed trim with zero records deleted returning 400
2425

2526
## 0.0.47

deploy/helm/preview.yml

+4
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ combined-db:
5959
fileName: db-service-account-private-key.json
6060
data:
6161
storage: 250Gi
62+
resources:
63+
limits:
64+
cpu: 2000m
65+
6266
elasticsearch:
6367
data:
6468
heapSize: 500m

magda-minion-broken-link/src/index.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ const argv = commonYargs(ID, 6111, "http://localhost:6111", argv =>
1010
describe:
1111
"Number of times to retry external links when checking whether they're broken",
1212
type: "number",
13-
default: 3
13+
default: 1
1414
})
1515
);
1616

magda-minion-broken-link/src/onRecordFound.ts

+22-35
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,8 @@ import parseUriSafe from "./parseUriSafe";
1313
export default async function onRecordFound(
1414
record: Record,
1515
registry: Registry,
16-
retries: number = 5,
16+
retries: number = 1,
1717
baseRetryDelaySeconds: number = 1,
18-
base429RetryDelaySeconds = 60,
1918
ftpHandler: FTPHandler = new FTPHandler()
2019
) {
2120
const distributions: Record[] =
@@ -126,6 +125,9 @@ type DistributionLinkCheck = {
126125
*
127126
* @param distribution The distribution Record
128127
* @param distStringsAspect The dcat-distributions-strings aspect for this distribution
128+
* @param baseRetryDelay The first amount of time that will be waited between retries - it increases exponentially on subsequent retries
129+
* @param retries Number of retries before giving up
130+
* @param ftpHandler The FTP handler to use for FTP addresses
129131
*/
130132
function checkDistributionLink(
131133
distribution: Record,
@@ -256,49 +258,34 @@ function retrieveHttp(
256258
): Promise<BrokenLinkAspect> {
257259
const operation: () => Promise<number> = () => {
258260
return new Promise((resolve, reject) => {
259-
request.head(url, (err: Error, response: http.IncomingMessage) => {
260-
if (err) {
261+
const thisReq = request
262+
.get(url, {
263+
headers: {
264+
Range: "bytes=0-50"
265+
}
266+
})
267+
.on("error", err => {
268+
thisReq.abort();
261269
reject(err);
262-
} else {
270+
})
271+
.on("response", (response: http.IncomingMessage) => {
272+
thisReq.abort();
263273
if (
264274
(response.statusCode >= 200 &&
265275
response.statusCode <= 299) ||
266276
response.statusCode === 429
267277
) {
268278
resolve(response.statusCode);
269279
} else {
270-
request.get(
271-
{
272-
url,
273-
headers: {
274-
Range: "bytes=0-50"
275-
}
276-
},
277-
(err: Error, response: http.IncomingMessage) => {
278-
if (err) {
279-
reject(err);
280-
} else {
281-
if (
282-
(response.statusCode >= 200 &&
283-
response.statusCode <= 299) ||
284-
response.statusCode === 429
285-
) {
286-
resolve(response.statusCode);
287-
} else {
288-
reject(
289-
new BadHttpResponseError(
290-
response.statusMessage,
291-
response,
292-
response.statusCode
293-
)
294-
);
295-
}
296-
}
297-
}
280+
reject(
281+
new BadHttpResponseError(
282+
response.statusMessage,
283+
response,
284+
response.statusCode
285+
)
298286
);
299287
}
300-
}
301-
});
288+
});
302289
});
303290
};
304291

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*
2+
* random-stream
3+
* Ben Postlethwaite
4+
* 2013 MIT
5+
*/
6+
import { Readable } from "stream";
7+
8+
export default function RandomStream(options: any) {
9+
options = options || {};
10+
var stream = new Readable();
11+
12+
var randInt = randIntGenerator(options.min, options.max);
13+
14+
stream._read = function(n) {
15+
var self = this;
16+
setTimeout(function() {
17+
self.push(randChar());
18+
}, randInt());
19+
};
20+
21+
return stream;
22+
}
23+
24+
var randAscii = randIntGenerator(33, 126);
25+
26+
function randChar() {
27+
return String.fromCharCode(randAscii());
28+
}
29+
30+
function randIntGenerator(min: number, max: number) {
31+
if (!min) min = 50;
32+
if (!max) max = 250;
33+
34+
return function() {
35+
return Math.floor(Math.random() * (max - min + 1)) + min;
36+
};
37+
}

magda-minion-broken-link/src/test/arbitraries.ts

+1-6
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,7 @@ export const recordArbWithSuccesses = arbFlatMap(
4141
{} as { [a: string]: CheckResult }
4242
);
4343

44-
// some server configurations will disallow HEAD
45-
// method requests. When that fails, we try
46-
// to make a get request to verify the link
47-
const disallowHead = jsc.bool.generator(0);
48-
49-
return { record, successLookup, disallowHead };
44+
return { record, successLookup };
5045
},
5146
({ record, successLookup }) => {
5247
return getKnownProtocolUrls(record).map(

magda-minion-broken-link/src/test/onRecordFound.spec.ts

+24-34
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import {
3030
import FtpHandler from "../FtpHandler";
3131
import AuthorizedRegistryClient from "@magda/typescript-common/dist/registry/AuthorizedRegistryClient";
3232
import parseUriSafe from "../parseUriSafe";
33+
import RandomStream from "./RandomStream";
3334

3435
describe("onRecordFound", function(this: Mocha.ISuiteCallbackContext) {
3536
this.timeout(20000);
@@ -141,11 +142,16 @@ describe("onRecordFound", function(this: Mocha.ISuiteCallbackContext) {
141142
return jsc.assert(
142143
jsc.forall(recordArbWithSuccesses, function({
143144
record,
144-
successLookup,
145-
disallowHead
145+
successLookup
146146
}) {
147147
beforeEachProperty();
148148

149+
/** Endless stream of nonsense, similar to what you'd get if you tried to download a massive file */
150+
const randomStream = RandomStream({
151+
min: 250, // in milliseconds
152+
max: 1000 // in milliseconds
153+
});
154+
149155
// Tell the FTP server to return success/failure for the various FTP
150156
// paths with this dodgy method. Note that because the FTP server can
151157
// only see paths and not host, we only send it the path of the req.
@@ -176,28 +182,19 @@ describe("onRecordFound", function(this: Mocha.ISuiteCallbackContext) {
176182
}) => {
177183
const scope = nock(url);
178184

179-
const intercept = scope.head(
180-
url.endsWith("/") ? "/" : ""
181-
);
182-
183185
if (success !== "error") {
184-
if (!disallowHead) {
185-
intercept.reply(
186-
success === "success" ? 200 : 404
186+
scope
187+
.get(url.endsWith("/") ? "/" : "")
188+
.reply(
189+
success === "success" ? 200 : 404,
190+
() => {
191+
return randomStream;
192+
}
187193
);
188-
if (success !== "success") {
189-
scope
190-
.get(url.endsWith("/") ? "/" : "")
191-
.reply(404);
192-
}
193-
} else {
194-
intercept.reply(405);
195-
scope
196-
.get(url.endsWith("/") ? "/" : "")
197-
.reply(success === "success" ? 200 : 404);
198-
}
199194
} else {
200-
intercept.replyWithError("fail");
195+
scope
196+
.get(url.endsWith("/") ? "/" : "")
197+
.replyWithError("fail");
201198
}
202199

203200
return scope;
@@ -321,17 +318,19 @@ describe("onRecordFound", function(this: Mocha.ISuiteCallbackContext) {
321318
.reply(201);
322319
});
323320

324-
return onRecordFound(record, registry, 0, 0, 0, fakeFtpHandler)
321+
return onRecordFound(record, registry, 0, 0, fakeFtpHandler)
325322
.then(() => {
326323
distScopes.forEach(scope => scope.done());
327324
registryScope.done();
328325
})
329326
.then(() => {
330327
afterEachProperty();
328+
randomStream.destroy();
331329
return true;
332330
})
333331
.catch(e => {
334332
afterEachProperty();
333+
randomStream.destroy();
335334
throw e;
336335
});
337336
}),
@@ -447,11 +446,6 @@ describe("onRecordFound", function(this: Mocha.ISuiteCallbackContext) {
447446

448447
allResults.forEach((failureCodes, i) => {
449448
failureCodes.forEach(failureCode => {
450-
scope
451-
.head(
452-
url.endsWith("/") ? "/" : ""
453-
)
454-
.reply(failureCode);
455449
scope
456450
.get(
457451
url.endsWith("/") ? "/" : ""
@@ -463,7 +457,7 @@ describe("onRecordFound", function(this: Mocha.ISuiteCallbackContext) {
463457
result === "fail429"
464458
) {
465459
scope
466-
.head(
460+
.get(
467461
url.endsWith("/") ? "/" : ""
468462
)
469463
.reply(429);
@@ -472,7 +466,7 @@ describe("onRecordFound", function(this: Mocha.ISuiteCallbackContext) {
472466

473467
if (result === "success") {
474468
scope
475-
.head(url.endsWith("/") ? "/" : "")
469+
.get(url.endsWith("/") ? "/" : "")
476470
.reply(200);
477471
}
478472

@@ -600,18 +594,14 @@ describe("onRecordFound", function(this: Mocha.ISuiteCallbackContext) {
600594
const scope = scopeLookup[base];
601595

602596
failures.forEach(failureCode => {
603-
scope
604-
.head(uri.path())
605-
.delay(delayMs)
606-
.reply(failureCode);
607597
scope
608598
.get(uri.path())
609599
.delay(delayMs)
610600
.reply(failureCode);
611601
});
612602

613603
scope
614-
.head(uri.path())
604+
.get(uri.path())
615605
.delay(delayMs)
616606
.reply(200);
617607
return scopeLookup;

0 commit comments

Comments
 (0)