-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactor in more testable modules (#49)
* Refactor in more testable modules * Make config optional
- Loading branch information
1 parent
1dd21f8
commit 3859d58
Showing
23 changed files
with
744 additions
and
179 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
const QueuedFetch = require("./caching-queued-fetch"); | ||
const { queuedFetch } = QueuedFetch; | ||
|
||
let config; | ||
try { | ||
config = require("../config.json"); | ||
} catch (e) { | ||
config = {}; | ||
} | ||
|
||
const authedFetch = (url) => { | ||
// this is the value used for the discourse API, and feels like a safe default in general | ||
let interval = 200; | ||
const u = new URL(url); | ||
const headers = { | ||
'User-Agent': 'W3C Group dashboard https://github.com/w3c/cg-monitor' | ||
}; | ||
if (u.href.startsWith("https://api.github.com/") && config.ghapitoken) { | ||
headers['Authorization'] = 'token ' + config.ghapitoken; | ||
// Roughly matching github API rate limit of 5000 requests per hour | ||
interval = 750; | ||
} | ||
return queuedFetch(url, { headers }, { interval, verbose: true, fsCachePath: ".cache" }); | ||
}; | ||
|
||
|
||
module.exports = authedFetch; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
function wrapService(service) { | ||
return data => { | ||
return { service, data}; | ||
}; | ||
} | ||
|
||
|
||
// Failure mode: | ||
// Source error: | ||
// - service.link doesn't exist (signal) | ||
// - service.link isn't recognized as a well-known data source (signal) | ||
// - service.link is recognized as a data source we don't parse (warn) | ||
// vs | ||
// Fetch error: | ||
// - errors while fetching data from service.link (warn) | ||
|
||
function fetchActivityType(service) { | ||
switch(service.type) { | ||
case "blog": | ||
// optimistic approach at getting the RSS feed | ||
return fetchRSS(service.link + "feed"); | ||
case "rss": | ||
return fetchRSS(service.link); | ||
case "lists": | ||
return fetchMail(service.link); | ||
case "wiki": | ||
return fetchWiki(service.link); | ||
case "repository": | ||
return fetchGithub(service.link); | ||
case "forum": | ||
return fetchForum(service.link); | ||
} | ||
// TODO: signal we don't parse this kind of service | ||
return service; | ||
} | ||
|
||
module.exports.fetchActivity = async function fetchActivity(service) { | ||
const serviceWrapper = service.type === "blog" ? {...service, type: "rss"} : service; | ||
return fetchActivityType(service) | ||
.then(wrapService(serviceWrapper)) | ||
/* TODO: deal with errors fetching activity data | ||
.catch(); */ | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
const authedFetch = require("./authed-fetch"); | ||
|
||
async function recursiveFetchDiscourse(url, before = null, acc = []) { | ||
const fetchedUrl = url + (before ? '?before=' + before : ''); | ||
try { | ||
const text = (await authedFetch(fetchedUrl)).body; | ||
const {latest_posts} = JSON.parse(text); | ||
if (!latest_posts) return acc; | ||
acc = acc.concat(latest_posts.map(p => { return {created_at: p.created_at, topic_title: p.topic_title}; })); | ||
const minId= Math.min(...latest_posts.map(p => p.id)); | ||
if (before === null || before > minId) { | ||
return recursiveFetchDiscourse(url, minId, acc); | ||
} | ||
return acc; | ||
} catch (e) { | ||
console.error("Error while fetching " + fetchedUrl); | ||
console.error(e); | ||
return acc; | ||
} | ||
} | ||
|
||
async function fetchForum(url) { | ||
if (!url.match(/discourse/) && !url.match(/socialhub\.activitypub\.rocks/)) return "Did not fetch forum at " + url; | ||
// TODO: fix case where discourse URL is for a specific category à la | ||
// https://discourse.wicg.io/c/web-mapping | ||
// TODO: detect if forum is discourse more reliably? | ||
if (url.endsWith("/")) url = url.slice(0, -1); | ||
return {items: await recursiveFetchDiscourse(url + '/posts.json')}; | ||
} | ||
|
||
module.exports.fetchForum = fetchForum; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
const authedFetch = require("./authed-fetch"); | ||
|
||
const linkParse = require('parse-link-header'); | ||
|
||
async function recursiveGhFetch(url, acc = []) { | ||
const { headers, body} = await authedFetch(url); | ||
const link = (headers || new Map()).get('link'); | ||
const data = JSON.parse(body); | ||
if (link) { | ||
const parsed = linkParse(link); | ||
if (parsed.next) { | ||
return recursiveGhFetch(parsed.next.url, acc.concat(data)); | ||
} | ||
} | ||
return acc.concat(data); | ||
} | ||
|
||
function fetchGithubRepo(owner, repo, size) { | ||
return Promise.all([ | ||
recursiveGhFetch('https://labs.w3.org/github-cache/v3/repos/' + owner + '/' + repo + '/issues?state=all') | ||
// if the github cache doesn't work, try hitting github directly | ||
.catch(() => | ||
recursiveGhFetch('https://api.github.com/repos/' + owner + '/' + repo + '/issues?state=all&per_page=100&direction=asc')) | ||
.then(data => data.map(i => { return {html_url: i.html_url, created_at: i.created_at};})) | ||
.catch(() => []), | ||
recursiveGhFetch('https://api.github.com/repos/' + owner + '/' + repo + '/pulls?state=all&per_page=100&direction=asc') | ||
.then(data => data.map(i => { return {html_url: i.html_url, created_at: i.created_at};})) | ||
.then(pulls => { | ||
if (pulls.length === 0) { | ||
// if no pull request, we take a look at commits instead | ||
// unless the repo is empty | ||
if (size === 0) return []; | ||
return recursiveGhFetch('https://labs.w3.org/github-cache/v3/repos/' + owner + '/' + repo + '/commits') | ||
// if the github cache doesn't work, try hitting github directly | ||
.catch(() => | ||
recursiveGhFetch('https://api.github.com/repos/' + owner + '/' + repo + '/commits?per_page=100&direction=asc')) | ||
.then(data => data.map(i => { return {html_url: i.html_url, created_at: i.created_at, commit: i.commit}; })); | ||
} | ||
return pulls; | ||
}).catch(() => []) | ||
]).then(data => data.flat()); | ||
} | ||
|
||
|
||
async function fetchGithub(url) { | ||
const match = url.match(/github\.com\/([^\/]*)(\/([^\/]*)\/?)?$/); | ||
if (!match) return `Unrecognized repo url ${url}`; | ||
const [, owner,, repo] = match; | ||
if (!repo) { | ||
const repos = await recursiveGhFetch(`https://api.github.com/users/${owner}/repos?per_page=100&direction=asc`); | ||
const items = await Promise.all(repos.filter(r => !r.fork).map(r => r.owner ? fetchGithubRepo(r.owner.login, r.name, r.size) : [])); | ||
// TODO: this should instead be sent as a collection of services (1 per repo) | ||
return { items: items.flat() }; | ||
} else { | ||
return {items: await fetchGithubRepo(owner, repo)}; | ||
} | ||
} | ||
|
||
module.exports.fetchGithub = fetchGithub; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
const authedFetch = require("./authed-fetch"); | ||
|
||
const jsdom = require("jsdom"); | ||
const { JSDOM } = jsdom; | ||
|
||
const httpToHttps = str => str.replace(/^http:\/\//, "https://"); | ||
|
||
async function fetchMail(url) { | ||
if (!httpToHttps(url).startsWith('https://lists.w3.org/Archives/Public')) return "Did not fetch " + url; | ||
const text = (await authedFetch(url)).body; | ||
const dom = new JSDOM(text); | ||
const data = {}; | ||
[...dom.window.document.querySelectorAll("tbody")].forEach(tbody => { | ||
[...tbody.querySelectorAll("tr")].forEach(tr => { | ||
const month = new Date(tr.querySelector("td").textContent + " GMT"); | ||
if (month.toJSON()) { | ||
const mailCount = parseInt(tr.querySelector("td:last-child").textContent, 10);; | ||
// some archives are per quarter | ||
// we detect this on the presence of the string " to " | ||
// as in "January to March" | ||
if (tr.querySelector("td").textContent.includes(" to ")) { | ||
// and if so, we divide arbitrarily in 3 for the per-month view | ||
for (let i = 0; i < 3 ; i++) { | ||
data[month.toJSON().slice(0,7)] = mailCount / 3; | ||
month.setMonth(month.getMonth() - 1); | ||
} | ||
} else { | ||
data[month.toJSON().slice(0,7)] = mailCount; | ||
} | ||
} else { | ||
console.log("Empty ml archive at " + url); | ||
} | ||
}); | ||
}); | ||
return data; | ||
} | ||
|
||
module.exports.fetchMail = fetchMail; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
const authedFetch = require("./authed-fetch"); | ||
const RSSParser = require('rss-parser'); | ||
const rssparser = new RSSParser(); | ||
|
||
async function fetchRSS(url) { | ||
try { | ||
const text = (await authedFetch(url)).body; | ||
return rssparser.parseString(text); | ||
} catch (err) { | ||
return "Error fetching " + url + ": " + err; | ||
} | ||
} | ||
|
||
module.exports.fetchRSS = fetchRSS; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
const authedFetch = require("./authed-fetch"); | ||
|
||
async function recursiveW3cFetch(url, key=null, acc = []) { | ||
if (!url) return []; | ||
const text = (await authedFetch(url)).body; | ||
const data = JSON.parse(text); | ||
const selectedData = !key ? data : (data._embedded ? data._embedded[key] : data._links[key]); | ||
if (!key) { | ||
return selectedData; // This assumes when no key, no recursion | ||
} | ||
if (data._links && data._links.next) { | ||
return recursiveW3cFetch(data._links.next.href, key, acc.concat(selectedData)); | ||
} | ||
return acc.concat(selectedData); | ||
} | ||
|
||
module.exports.recursiveW3cFetch = recursiveW3cFetch; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
const {fetchRSS} = require("./rss-activity"); | ||
|
||
module.exports.fetchWiki = async function fetchWiki(url) { | ||
if (!url.startsWith('http')) url = 'https://www.w3.org' + url; | ||
if (url.startsWith("https://github.com")) { | ||
// based on https://stackoverflow.com/a/8573941 | ||
return fetchRSS(url + ".atom"); | ||
} | ||
// TODO: handle case of a single wiki page | ||
// handle case of Main_Page | ||
return fetchRSS(url + '/api.php?action=feedrecentchanges&days=1000&limit=1000'); | ||
}; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
module.exports.wrapServiceData = async function wrapServiceData(service) { | ||
|
||
}; |
Oops, something went wrong.