Skip to content

Commit

Permalink
Refactor in more testable modules (#49)
Browse files Browse the repository at this point in the history
* Refactor in more testable modules

* Make config optional
  • Loading branch information
dontcallmedom authored Nov 10, 2023
1 parent 1dd21f8 commit 3859d58
Show file tree
Hide file tree
Showing 23 changed files with 744 additions and 179 deletions.
27 changes: 27 additions & 0 deletions lib/authed-fetch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
const QueuedFetch = require("./caching-queued-fetch");
const { queuedFetch } = QueuedFetch;

let config;
try {
config = require("../config.json");
} catch (e) {
config = {};
}

const authedFetch = (url) => {
// this is the value used for the discourse API, and feels like a safe default in general
let interval = 200;
const u = new URL(url);
const headers = {
'User-Agent': 'W3C Group dashboard https://github.com/w3c/cg-monitor'
};
if (u.href.startsWith("https://api.github.com/") && config.ghapitoken) {
headers['Authorization'] = 'token ' + config.ghapitoken;
// Roughly matching github API rate limit of 5000 requests per hour
interval = 750;
}
return queuedFetch(url, { headers }, { interval, verbose: true, fsCachePath: ".cache" });
};


module.exports = authedFetch;
43 changes: 43 additions & 0 deletions lib/fetch-activity.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
function wrapService(service) {
return data => {
return { service, data};
};
}


// Failure mode:
// Source error:
// - service.link doesn't exist (signal)
// - service.link isn't recognized as a well-known data source (signal)
// - service.link is recognized as a data source we don't parse (warn)
// vs
// Fetch error:
// - errors while fetching data from service.link (warn)

function fetchActivityType(service) {
switch(service.type) {
case "blog":
// optimistic approach at getting the RSS feed
return fetchRSS(service.link + "feed");
case "rss":
return fetchRSS(service.link);
case "lists":
return fetchMail(service.link);
case "wiki":
return fetchWiki(service.link);
case "repository":
return fetchGithub(service.link);
case "forum":
return fetchForum(service.link);
}
// TODO: signal we don't parse this kind of service
return service;
}

module.exports.fetchActivity = async function fetchActivity(service) {
const serviceWrapper = service.type === "blog" ? {...service, type: "rss"} : service;
return fetchActivityType(service)
.then(wrapService(serviceWrapper))
/* TODO: deal with errors fetching activity data
.catch(); */
};
31 changes: 31 additions & 0 deletions lib/forum-activity.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
const authedFetch = require("./authed-fetch");

async function recursiveFetchDiscourse(url, before = null, acc = []) {
const fetchedUrl = url + (before ? '?before=' + before : '');
try {
const text = (await authedFetch(fetchedUrl)).body;
const {latest_posts} = JSON.parse(text);
if (!latest_posts) return acc;
acc = acc.concat(latest_posts.map(p => { return {created_at: p.created_at, topic_title: p.topic_title}; }));
const minId= Math.min(...latest_posts.map(p => p.id));
if (before === null || before > minId) {
return recursiveFetchDiscourse(url, minId, acc);
}
return acc;
} catch (e) {
console.error("Error while fetching " + fetchedUrl);
console.error(e);
return acc;
}
}

async function fetchForum(url) {
if (!url.match(/discourse/) && !url.match(/socialhub\.activitypub\.rocks/)) return "Did not fetch forum at " + url;
// TODO: fix case where discourse URL is for a specific category à la
// https://discourse.wicg.io/c/web-mapping
// TODO: detect if forum is discourse more reliably?
if (url.endsWith("/")) url = url.slice(0, -1);
return {items: await recursiveFetchDiscourse(url + '/posts.json')};
}

module.exports.fetchForum = fetchForum;
59 changes: 59 additions & 0 deletions lib/github-activity.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
const authedFetch = require("./authed-fetch");

const linkParse = require('parse-link-header');

async function recursiveGhFetch(url, acc = []) {
const { headers, body} = await authedFetch(url);
const link = (headers || new Map()).get('link');
const data = JSON.parse(body);
if (link) {
const parsed = linkParse(link);
if (parsed.next) {
return recursiveGhFetch(parsed.next.url, acc.concat(data));
}
}
return acc.concat(data);
}

function fetchGithubRepo(owner, repo, size) {
return Promise.all([
recursiveGhFetch('https://labs.w3.org/github-cache/v3/repos/' + owner + '/' + repo + '/issues?state=all')
// if the github cache doesn't work, try hitting github directly
.catch(() =>
recursiveGhFetch('https://api.github.com/repos/' + owner + '/' + repo + '/issues?state=all&per_page=100&direction=asc'))
.then(data => data.map(i => { return {html_url: i.html_url, created_at: i.created_at};}))
.catch(() => []),
recursiveGhFetch('https://api.github.com/repos/' + owner + '/' + repo + '/pulls?state=all&per_page=100&direction=asc')
.then(data => data.map(i => { return {html_url: i.html_url, created_at: i.created_at};}))
.then(pulls => {
if (pulls.length === 0) {
// if no pull request, we take a look at commits instead
// unless the repo is empty
if (size === 0) return [];
return recursiveGhFetch('https://labs.w3.org/github-cache/v3/repos/' + owner + '/' + repo + '/commits')
// if the github cache doesn't work, try hitting github directly
.catch(() =>
recursiveGhFetch('https://api.github.com/repos/' + owner + '/' + repo + '/commits?per_page=100&direction=asc'))
.then(data => data.map(i => { return {html_url: i.html_url, created_at: i.created_at, commit: i.commit}; }));
}
return pulls;
}).catch(() => [])
]).then(data => data.flat());
}


async function fetchGithub(url) {
const match = url.match(/github\.com\/([^\/]*)(\/([^\/]*)\/?)?$/);
if (!match) return `Unrecognized repo url ${url}`;
const [, owner,, repo] = match;
if (!repo) {
const repos = await recursiveGhFetch(`https://api.github.com/users/${owner}/repos?per_page=100&direction=asc`);
const items = await Promise.all(repos.filter(r => !r.fork).map(r => r.owner ? fetchGithubRepo(r.owner.login, r.name, r.size) : []));
// TODO: this should instead be sent as a collection of services (1 per repo)
return { items: items.flat() };
} else {
return {items: await fetchGithubRepo(owner, repo)};
}
}

module.exports.fetchGithub = fetchGithub;
38 changes: 38 additions & 0 deletions lib/mail-activity.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
const authedFetch = require("./authed-fetch");

const jsdom = require("jsdom");
const { JSDOM } = jsdom;

const httpToHttps = str => str.replace(/^http:\/\//, "https://");

async function fetchMail(url) {
if (!httpToHttps(url).startsWith('https://lists.w3.org/Archives/Public')) return "Did not fetch " + url;
const text = (await authedFetch(url)).body;
const dom = new JSDOM(text);
const data = {};
[...dom.window.document.querySelectorAll("tbody")].forEach(tbody => {
[...tbody.querySelectorAll("tr")].forEach(tr => {
const month = new Date(tr.querySelector("td").textContent + " GMT");
if (month.toJSON()) {
const mailCount = parseInt(tr.querySelector("td:last-child").textContent, 10);;
// some archives are per quarter
// we detect this on the presence of the string " to "
// as in "January to March"
if (tr.querySelector("td").textContent.includes(" to ")) {
// and if so, we divide arbitrarily in 3 for the per-month view
for (let i = 0; i < 3 ; i++) {
data[month.toJSON().slice(0,7)] = mailCount / 3;
month.setMonth(month.getMonth() - 1);
}
} else {
data[month.toJSON().slice(0,7)] = mailCount;
}
} else {
console.log("Empty ml archive at " + url);
}
});
});
return data;
}

module.exports.fetchMail = fetchMail;
14 changes: 14 additions & 0 deletions lib/rss-activity.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
const authedFetch = require("./authed-fetch");
const RSSParser = require('rss-parser');
const rssparser = new RSSParser();

async function fetchRSS(url) {
try {
const text = (await authedFetch(url)).body;
return rssparser.parseString(text);
} catch (err) {
return "Error fetching " + url + ": " + err;
}
}

module.exports.fetchRSS = fetchRSS;
17 changes: 17 additions & 0 deletions lib/w3c-data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
const authedFetch = require("./authed-fetch");

async function recursiveW3cFetch(url, key=null, acc = []) {
if (!url) return [];
const text = (await authedFetch(url)).body;
const data = JSON.parse(text);
const selectedData = !key ? data : (data._embedded ? data._embedded[key] : data._links[key]);
if (!key) {
return selectedData; // This assumes when no key, no recursion
}
if (data._links && data._links.next) {
return recursiveW3cFetch(data._links.next.href, key, acc.concat(selectedData));
}
return acc.concat(selectedData);
}

module.exports.recursiveW3cFetch = recursiveW3cFetch;
13 changes: 13 additions & 0 deletions lib/wiki-activity.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const {fetchRSS} = require("./rss-activity");

module.exports.fetchWiki = async function fetchWiki(url) {
if (!url.startsWith('http')) url = 'https://www.w3.org' + url;
if (url.startsWith("https://github.com")) {
// based on https://stackoverflow.com/a/8573941
return fetchRSS(url + ".atom");
}
// TODO: handle case of a single wiki page
// handle case of Main_Page
return fetchRSS(url + '/api.php?action=feedrecentchanges&days=1000&limit=1000');
};

3 changes: 3 additions & 0 deletions lib/wrap-service-data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
module.exports.wrapServiceData = async function wrapServiceData(service) {

};
Loading

0 comments on commit 3859d58

Please sign in to comment.