Skip to content

Commit

Permalink
Add filesystem cache
Browse files Browse the repository at this point in the history
  • Loading branch information
dontcallmedom committed Oct 23, 2023
1 parent 2f297f5 commit abc96df
Show file tree
Hide file tree
Showing 9 changed files with 1,067 additions and 5,362 deletions.
3 changes: 3 additions & 0 deletions .mocharc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"ignore": "test/setup-cache.js"
}
19 changes: 15 additions & 4 deletions lib/caching-queued-fetch.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
const {END_EVENT, ITEM_EVENT, default:RequestQueue} = require('limited-request-queue');

const fsCacheFetch = require('fetch-filecache-for-crawling');

let ghToken = "";
module.exports.DEFAULT_INTERVAL = 50;
Expand All @@ -13,7 +12,7 @@ class Queue {
maxRetry = 2;
cache = {};
originQueue = {};
async request(url, options, {verbose}, attempts = 0) {
async request(url, options, {verbose, fsCachePath}, attempts = 0) {
if (!this.cache[url]) {
if (attempts > this.maxRetry) {
const error = `HTTP error ${this.cache[url].status} ${this.cache[url].statusText} while fetching ${url} (tried ${attempts} times)`;
Expand All @@ -25,7 +24,12 @@ class Queue {
if (verbose) {
console.log("fetching " + url);
}
const r = await fetch(url, options);
const _f = fsCachePath ? fsCacheFetch : fetch;
if (fsCachePath) {
options.logToConsole = verbose;
options.cacheFolder = fsCachePath;
}
const r = await _f(url, options);
this.cache[url] = { status: r.status, statusText: r.statusText, headers: r.headers, body: await r.text()};
}
if (this.cache[url].status >= 400) {
Expand All @@ -44,6 +48,9 @@ class Queue {
);
}
if (retryAfter) {
if (verbose) {
console.log(`Asked to retry fetching ${url} in ${retryAfter}s`);
}
delete this.cache[url];
await wait(retryAfter);
await this.request(url, options, { verbose }, attempts++);
Expand All @@ -70,6 +77,10 @@ class Queue {
})
.then(async (ret) => {
res(ret);
// no need to wait if we're not hitting the network
if (ret.headers?.get("cache-status")?.match(/fetch-filecache-for-crawling; hit/)) {
return;
}
return wait(queueOptions.interval);
}).catch(rej);
});
Expand Down
2 changes: 1 addition & 1 deletion monitor.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ const authedFetch = (url) => {
// Roughly matching github API rate limit of 5000 requests per hour
interval = 750;
}
return queuedFetch(url, { headers }, { interval, verbose: true });
return queuedFetch(url, { headers }, { interval, verbose: true, fsCachePath: ".cache" });
};


Expand Down
Loading

0 comments on commit abc96df

Please sign in to comment.