overhaul

Lhris · Jul 19, 2023 · 63013cc · 63013cc
1 parent 7a1f47a
commit 63013cc
Show file tree

Hide file tree

Showing 9 changed files with 194 additions and 144 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -4,16 +4,16 @@ on:
     branches:
       - main
   schedule:
-    # Run this script every 2 hrs
+    # Run this script every 1 hr
     #  */m hr day month day_of_week
-    - cron:  '0 */2 * * *'
+    - cron:  '0 */1 * * *'
   workflow_dispatch:
     inputs:
       tags:
         description: 'tag'  
 jobs:
   build-and-deploy:
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-node@v3

diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,4 @@
 node_modules/*
 output/index.html
-src/data.json
+cache.json
+data.json
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2021 George Mandis, kevinfiol
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -6,10 +6,27 @@ Original blogpost: [Introducing Bubo RSS: An Absurdly Minimalist RSS Feed Reader
 
 Blogpost about my fork: [A minimal RSS Feed Reader](https://kevinfiol.com/blog/a-minimal-rss-feed-reader/)
 
-Some changes I made:
+Some enhancements I made:
 
-* Replace `nunjucks` with `yeahjs`
-* Replace `node-fetch` with `httpie`
+* Replace `nunjucks` with template strings
+* Replace `node-fetch` with Node's native `fetch`
 * Many styling changes, including using the `:target` CSS selector to switch between groups (inspired by https://john-doe.neocities.org/)
-* The build script now sorts the feeds in each group by which one has the latest updates (this greatly improves the experience, imo).
+* The build script now sorts the feeds in each group by which one has the latest updates (this greatly improves the experience, imo)
+* An "All Articles" view
+* Privacy-redirect support via config file
 * Dark mode via `@media (prefers-color-scheme: dark)`
+
+## How to build
+
+Node `>=18.x` required.
+
+```shell
+npm install
+npm run build
+```
+
+## How to host on Github Pages
+
+1. Fork this repo!
+2. Enable [Github Pages](https://pages.github.com/) for your repo (either as a project site, or user site)
+3. Configure `.github/workflows/build.yml` to your liking
diff --git a/package.json b/package.json
@@ -8,7 +8,8 @@
   },
   "scripts": {
     "build": "node src/build.js",
-    "dev": "node src/build.js -d"
+    "write": "node src/build.js --write",
+    "cached": "node src/build.js --cached"
   },
   "author": "kevinfiol",
   "license": "ISC",

diff --git a/src/build.js b/src/build.js
@@ -6,85 +6,97 @@
  *
  */
 
+import Parser from 'rss-parser';
 import { resolve } from 'node:path';
 import { readFileSync, writeFileSync } from 'node:fs';
-import Parser from 'rss-parser';
 import { template } from './template.js';
-import feeds from './feeds.json' assert { type: 'json' };
-
-const DEV = process.argv.includes('-d');
-const TIMEZONE_OFFSET = -4.0; // Default to EST
 
-const REDIRECTS = {
-  'twitter': 'unofficialbird.com',
-  'medium': 'scribe.rip',
-  'youtube': 'youtube.com',
-  'youtu': 'youtu.be'
-};
+const WRITE = process.argv.includes('--write');
+const USE_CACHE = !WRITE && process.argv.includes('--cached');
 
-const FEED_CONTENT_TYPES = [
+const CACHE_PATH = './src/cache.json';
+const OUTFILE_PATH = './output/index.html';
+const CONTENT_TYPES = [
   'application/json',
   'application/atom+xml',
   'application/rss+xml',
   'application/xml',
+  'application/octet-stream',
   'text/xml'
 ];
 
-const parser = new Parser();
-const contentFromAllFeeds = {};
-const errors = [];
+const config = readCfg('./src/config.json');
+const feeds = USE_CACHE ? {} : readCfg('./src/feeds.json');
+const cache = USE_CACHE ? readCfg(CACHE_PATH) : {};
+
+await build({ config, feeds, cache, writeCache: WRITE });
+
+async function build({ config, feeds, cache, writeCache = false }) {
+  let allItems = cache.allItems || [];
+  const parser = new Parser();
+  const errors = [];
+  const groupContents = {};
+
+  for (const groupName in feeds) {
+    groupContents[groupName] = [];
+
+    const results = await Promise.allSettled(
+      Object.values(feeds[groupName]).map(url =>
+        fetch(url, { method: 'GET' })
+          .then(res => [url, res])
+          .catch(e => {
+            throw [url, e];
+          })
+      )
+    );
+
+    for (const result of results) {
+      if (result.status === 'rejected') {
+        const [url, error] = result.reason;
+        errors.push(url);
+        console.error(`Error fetching ${url}:\n`, error);
+        continue;
+      }
 
-if (!DEV) {
-  for (const group in feeds) {
-    contentFromAllFeeds[group] = [];
+      const [url, response] = result.value;
 
-    for (let index = 0; index < feeds[group].length; index++) {
       try {
-        const url = feeds[group][index];
-        const response = await fetch(url, { method: 'GET' });
-        const contentType = response.headers.get('content-type').split(';')[0]; // e.g., `application/xml; charset=utf-8` -> `application/xml`
+        // e.g., `application/xml; charset=utf-8` -> `application/xml`
+        const contentType = response.headers.get('content-type').split(';')[0];
 
-        if (!FEED_CONTENT_TYPES.includes(contentType)) {
-          // invalid content type
-          continue;
-        }
+        if (!CONTENT_TYPES.includes(contentType))
+          throw Error(`Feed at ${url} has invalid content-type.`)
 
         const body = await response.text();
-        const contents = typeof body === "string" ? await parser.parseString(body) : body;
-        const isRedditRSS = contents.feedUrl && contents.feedUrl.startsWith("https://www.reddit.com/r/");
-
-        if (!contents.items.length) {
-          errors.push(url);
-          continue; // don't add feeds without items
-        }
+        const contents = typeof body === 'string'
+          ? await parser.parseString(body)
+          : body;
+        const isRedditRSS = contents.feedUrl && contents.feedUrl.includes("reddit.com/r/");
 
-        contents.feed = feeds[group][index];
-        contents.title = contents.title ? contents.title : contents.link;
-        contentFromAllFeeds[group].push(contents);
+        if (!contents.items.length === 0)
+          throw Error(`Feed at ${url} contains no items.`)
 
-        // try to normalize date attribute naming
-        contents.items.forEach(item => {
-          const timestamp = new Date(item.pubDate || item.isoDate || item.date || item.published).getTime();
-          item.timestamp = isNaN(timestamp) ? (item.pubDate || item.isoDate || item.date || item.published) : timestamp;
+        contents.feed = url;
+        contents.title = contents.title || contents.link;
+        groupContents[groupName].push(contents);
 
-          const formattedDate = new Date(item.timestamp).toLocaleDateString()
-          item.timestamp = formattedDate !== 'Invalid Date' ? formattedDate : dateString;
-
-          // correct link url if lacks hostname
-          if (item.link && item.link.split('http').length == 1) {
-            let newLink;
-
-            if (contents.link.slice(-1) == '/' && item.link.slice(0, 1) == '/') {
-              newLink = contents.link + item.link.slice(1);
-            } else {
-              newLink = contents.link + item.link;
-            }
-
-            item.link = newLink;
+        // item sort & normalization
+        contents.items.sort(byDateSort);
+        contents.items.forEach((item) => {
+          // 1. try to normalize date attribute naming
+          const dateAttr = item.pubDate || item.isoDate || item.date || item.published;
+          item.timestamp = new Date(dateAttr).toLocaleDateString();
+
+          // 2. correct link url if it lacks the hostname
+          if (item.link && item.link.split('http').length === 1) {
+            item.link =
+              // if the hostname ends with a /, and the item link begins with a /
+              contents.link.slice(-1) === '/' && item.link.slice(0, 1) === '/'
+                ? contents.link + item.link.slice(1)
+                : contents.link + item.link;
           }
 
-          // if it's a link submission, let's parse the link to the content and rewrite item.link with it
-          // I can tell its a link submission by the beginning of the contentSnippet
+          // 3. parse subreddit feed comments
           if (isRedditRSS && item.contentSnippet && item.contentSnippet.startsWith('submitted by    ')) {
             // matches anything between double quotes, like `<a href="matches this">foo</a>`
             const quotesContentMatch = /(?<=")(?:\\.|[^"\\])*(?=")/g;
@@ -93,80 +105,92 @@ if (!DEV) {
             item.comments = commentsLink.match(quotesContentMatch)[0];
           }
 
-          // privacy redirects
-          const url = new URL(item.link);
-          const tokens = url.hostname.split('.');
-          const host = tokens[tokens.length - 2];
-          const redirect = REDIRECTS[host];
-
-          if (redirect) {
-            item.link = `https://${redirect}${url.pathname}${url.search}`;
+          // 4. redirects
+          if (config.redirects) {
+            // need to parse hostname methodically due to unreliable feeds
+            const url = new URL(item.link);
+            const tokens = url.hostname.split('.');
+            const host = tokens[tokens.length - 2];
+            const redirect = config.redirects[host];
+            if (redirect) item.link = `https://${redirect}${url.pathname}${url.search}`;
           }
         });
 
-        // sort items by date
-        contents.items.sort(byDateSort);
-      } catch (error) {
-        console.error(error);
-        errors.push(feeds[group][index]);
+        // add to allItems
+        allItems = [...allItems, ...contents.items];
+      } catch (e) {
+        console.error(e);
+        errors.push(url)
       }
     }
   }
-}
 
-let groups;
+  const groups = cache.groups || Object.entries(groupContents);
 
-if (DEV) {
-  const testJson = JSON.parse(readFileSync(resolve('./src/data.json'), { encoding: 'utf8' }));
-  groups = Object.entries(testJson);
-} else {
-  groups = Object.entries(contentFromAllFeeds);
-  writeFileSync(resolve('./src/data.json'), JSON.stringify(contentFromAllFeeds), 'utf8');
-}
+  if (writeCache) {
+    writeFileSync(
+      resolve(CACHE_PATH),
+      JSON.stringify({ groups, allItems }),
+      'utf8'
+    );
+  }
 
-// for each group, sort the feeds
-// sort the feeds by comparing the isoDate of the first items of each feed
-for (let i = 0, len = groups.length; i < len; i++) {
-  groups[i][1].sort((a, b) => byDateSort(a.items[0], b.items[0]));
-}
+  // for each group, sort the feeds
+  // sort the feeds by comparing the isoDate of the first items of each feed
+  groups.forEach(([_groupName, feeds]) => {
+    feeds.sort((a, b) => byDateSort(a.items[0], b.items[0]));
+  });
 
-// collect all items for 'all' feed
-const allItems = [];
-for (let [_groupName, feeds] of groups) {
-  for (let feed of feeds) {
-    for (let i = 0, len = feed.items.length; i < len; i++) {
-      allItems.push({
-        ...feed.items[i],
-        feedUrl: feed.feedUrl ? new URL(feed.feedUrl).hostname : ''
-      });
-    }
-  }
+  // sort `all articles` view
+  allItems.sort((a, b) => byDateSort(a, b));
+
+  const now = getNowDate(config.timezone_offset).toString();
+  const html = template({ allItems, groups, now, errors });
+
+  writeFileSync(resolve(OUTFILE_PATH), html, { encoding: 'utf8' });
+  console.log(`Reader built successfully at: ${OUTFILE_PATH}`);
 }
 
-allItems.sort((a, b) => byDateSort(a, b));
+/**
+ * utils
+ */
+function parseDate(item) {
+  let date = item
+    ? (item.isoDate || item.pubDate)
+    : undefined;
 
-const now = getNowDate(TIMEZONE_OFFSET).toString();
-const html = template({ allItems, groups, now, errors });
-writeFileSync(resolve('./output/index.html'), html, { encoding: 'utf8' });
+  return date ? new Date(date) : undefined;
+}
 
 function byDateSort(dateStrA, dateStrB) {
   const [aDate, bDate] = [parseDate(dateStrA), parseDate(dateStrB)];
   if (!aDate || !bDate) return 0;
   return bDate - aDate;
 }
 
-function parseDate(item) {
-  if (item) {
-    if (item.isoDate) return new Date(item.isoDate);
-    else if (item.pubDate) return new Date(item.pubDate);
-  }
-
-  return null;
-}
-
-function getNowDate(offset) {
+function getNowDate(offset = 0) {
   let d = new Date();
   const utc = d.getTime() + (d.getTimezoneOffset() * 60000);
   d = new Date(utc + (3600000 * offset));
   return d;
 }
+
+function readCfg(path) {
+  let contents, json;
+
+  try {
+    contents = readFileSync(resolve(path), { encoding: 'utf8' });
+  } catch (e) {
+    console.warn(`Warning: Config at ${path} does not exist`);
+    return {};
+  }
+
+  try {
+    json = JSON.parse(contents);
+  } catch (e) {
+    console.error('Error: Config is Invalid JSON: ' + path);
+    process.exit(1);
+  }
+
+  return json;
+}