Skip to content

Commit

Permalink
chore: Import search-assets and refactor data source/push
Browse files Browse the repository at this point in the history
  • Loading branch information
Bouncey authored and raisedadead committed Jun 7, 2019
1 parent 3552fd3 commit 2789824
Show file tree
Hide file tree
Showing 16 changed files with 5,977 additions and 0 deletions.
1 change: 1 addition & 0 deletions lerna.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"client",
"client/plugins/*",
"curriculum",
"search-indexing",
"tools/challenge-md-parser",
"tools/scripts/seed"
],
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"test:curriculum": "cd ./curriculum && npm test && cd ../",
"test:guide-formatting": "node ./tools/scripts/ci/ensure-guide-formatting.js",
"test:lint": "echo 'Warning: TODO - Define Linting tests.'",
"test:search-indexing": "jest ./search-indexing",
"test:server": "cd ./api-server && npm test && cd ../",
"test:tools": "jest ./tools"
},
Expand Down
3 changes: 3 additions & 0 deletions sample.env
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ MONGOHQ_URL='mongodb://localhost:27017/freecodecamp'
ROLLBAR_APP_ID='my-rollbar-app-id'
ROLLBAR_CLIENT_ID='post_client_id from rollbar dashboard'

ALGOLIA_ADMIN_KEY=123abc
ALGOLIA_APP_ID=ACDEFG

AUTH0_CLIENT_ID=stuff
AUTH0_CLIENT_SECRET=stuff
AUTH0_DOMAIN=stuff
Expand Down
9 changes: 9 additions & 0 deletions search-indexing/algolia/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const algoliasearch = require('algoliasearch');
const path = require('path');

const envPath = path.resolve(__dirname, '../.env');
require('dotenv').config({ path: envPath });

const { ALGOLIA_ADMIN_KEY, ALGOLIA_APP_ID } = process.env;

exports.client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_ADMIN_KEY);
40 changes: 40 additions & 0 deletions search-indexing/data-push/init/challenges.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
const { client } = require('../../algolia');
const debug = require('debug');
const getChallengeData = require('../../data-sources/challenges');

const log = debug('fcc:search:init:challenge');

const index = client.initIndex('challenge');

index.setSettings(
{
searchableAttributes: ['title', 'description', 'blockName'],
distinct: true,
attributeForDistinct: 'id',
attributesForFaceting: ['blockName']
},
(err, response) => {
if (err) {
log(err.message);
log(err.debugData);
throw new Error(err);
}
log('setSettings\n\n' + JSON.stringify(response, null, 2));
}
);

exports.insertChallenges = function insertChallenges() {
return getChallengeData().subscribe(
challenges => {
index.addObjects(challenges, err => {
if (err) {
throw new Error(err);
}
});
},
err => {
throw new Error(err);
},
() => log('complete')
);
};
51 changes: 51 additions & 0 deletions search-indexing/data-push/init/guides.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
const { from } = require('rxjs');
const { toArray, switchMap } = require('rxjs/operators');
const _ = require('lodash');
const { client } = require('../../algolia');
const { getGuideArticleData } = require('../../data-sources/guides');
const debug = require('debug');

const log = debug('fcc:search:init:guides');

const index = client.initIndex('guide');

index.setSettings(
{
searchableAttributes: ['title', 'content', 'category'],
distinct: true,
attributeForDistinct: 'id',
attributesForFaceting: ['category']
},
(err, response) => {
if (err) {
log(err.message);
log(err.debugData);
throw new Error(err);
}
log('setSettings\n' + JSON.stringify(response, null, 2));
}
);

exports.insertGuides = function insertGuides() {
return getGuideArticleData()
.pipe(
toArray(),
switchMap(articles => {
const chunks = _.chunk(articles, 200).map(_.flatten);
return from(chunks);
})
)
.subscribe(
articles => {
index.addObjects(articles, err => {
if (err) {
throw new Error(err);
}
});
},
err => {
throw new Error(err);
},
() => log('complete')
);
};
35 changes: 35 additions & 0 deletions search-indexing/data-push/init/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
const envPath = require('path').resolve(__dirname, '../../../.env');
require('dotenv').config({ path: envPath });

const { zip, timer, from } = require('rxjs');

/*
* The below has been commented out to aviod inadvertant
* ops usage with algolia
*/

// const { getStoryData } = require('./news');
// const { insertYoutube } = require('./youtube');
// const { insertChallenges } = require('./challenges');
// const { insertGuides } = require('./guides');

const dataSources = [
// insertGuides,
// insertChallenges
// insertYoutube,
// disable this until the roll out of news
// getStoryData
];

function init() {
return zip(timer(0, 5000), from(dataSources), (a, b) => b).subscribe(
fn => {
fn();
},
err => {
throw new Error(err);
}
);
}

init();
37 changes: 37 additions & 0 deletions search-indexing/data-push/init/youtube.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
const { client } = require('../../algolia');
const debug = require('debug');
const { getYoutubeData } = require('../../data-sources/youtube');

const log = debug('fcc:search:init:youtube');

const index = client.initIndex('youtube');

index.setSettings(
{
searchableAttributes: ['title', 'description', 'playlistTitle'],
distinct: true,
attributeForDistinct: 'id',
attributesForFaceting: ['playlistTitle']
},
(err, response) => {
if (err) {
log(err.message);
log(err.debugData);
throw new Error(err);
}
log('setSettings\n\n' + JSON.stringify(response, null, 2));
}
);

exports.insertYoutube = function insertYoutube() {
getYoutubeData().subscribe(
videos =>
index.addObjects(videos, err => {
if (err) {
throw new Error(err);
}
}),
err => log(err, 'red'),
() => log('Complete', 'blue')
);
};
55 changes: 55 additions & 0 deletions search-indexing/data-push/update/guides.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
const { client } = require('../../algolia');
const _ = require('lodash');
const { getGuideArticleData } = require('../../data-sources/guides');
const debug = require('debug');

const log = debug('fcc:search:update:guides');
const index = client.initIndex('guides');

const concatContents = (doc, current) => ({
...current,
content: doc.content.concat([current.content])
});

exports.updateGuides = async function updateGuides() {
const newDocs = await getGuideArticleData().toPromise();
let hits = [];
const browseAll = index.browseAll();
browseAll.on('result', function onResult(content) {
hits = hits.concat(content.hits);
});
browseAll.on('end', function onEnd() {
log('Finished browsing this index');
log(`We got ${hits.length} records`);
const docIds = _.uniq(hits.map(doc => doc.id));
docIds.map(id => {
const allCurrentForId = hits.filter(doc => doc.id === id);
const allNewForId = newDocs.filter(doc => doc.id === id);
const newForId = allNewForId.reduce(concatContents, { content: [] });
const currentForId = allCurrentForId.reduce(concatContents, {
content: []
});
const isDiff = newForId.content.some(
snippet => !currentForId.content.includes(snippet)
);
if (isDiff) {
log(id, 'cyan');
const objectIDs = allCurrentForId.map(doc => doc.objectID);
index.addObjects(allNewForId, err => {
if (err) {
throw new Error(err);
}
index.deleteObjects(objectIDs, err => {
if (err) {
throw new Error(err);
}
log(`purge of stale data for ${id} complete`, 'magenta');
});
});
}
});
});
browseAll.on('error', function onError(err) {
throw err;
});
};
67 changes: 67 additions & 0 deletions search-indexing/data-sources/challenges/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
const { from, of } = require('rxjs');
const { switchMap, tap } = require('rxjs/operators');
const debug = require('debug');

const { getChallengesForLang } = require('../../../curriculum/getChallenges');
const { chunkDocument, stripHTML, stripURLs } = require('../../utils');

const log = debug('fcc:search:data-source:challenges');

const { LOCALE: lang } = process.env;

module.exports = function getChallenges() {
log('sourcing challenges');
return from(getChallengesForLang(lang)).pipe(
tap(() => log('parsing curriculum')),
switchMap(curriculum => {
const superBlocks = Object.keys(curriculum).filter(
x => x !== 'certificates'
);
return from(superBlocks.map(superBlock => curriculum[superBlock]));
}),
switchMap(superBlock => {
const { blocks } = superBlock;
return from(Object.keys(blocks).map(block => blocks[block]));
}),
switchMap(block => {
const { meta, challenges } = block;
const { dashedName: blockDashedName } = meta;
return of(
challenges.map(challenge => ({ ...challenge, blockDashedName }))
);
}),
switchMap(challenges => {
const formattedChallenges = challenges
.filter(({ isPrivate }) => !isPrivate)
.reduce((acc, current) => {
const {
id,
title,
description,
instructions,
dashedName,
superBlock,
blockDashedName,
block
} = current;
const formattedChallenge = {
blockName: block,
id,
title,
description: stripURLs(stripHTML(description.concat(instructions))),
url: `/${superBlock}/${blockDashedName}/${dashedName}`
};
return [
...acc,
...chunkDocument(
formattedChallenge,
['title', 'id', 'blockName', 'url'],
'description'
)
];
}, []);

return of(formattedChallenges);
})
);
};
57 changes: 57 additions & 0 deletions search-indexing/data-sources/guides/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
const path = require('path');
const fs = require('fs-extra');
const { Observable } = require('rxjs');
const { map, filter } = require('rxjs/operators');
const readdirp = require('readdirp-walk');
const matter = require('gray-matter');

const { chunkDocument, stripHTML, stripURLs } = require('../../utils');

const { LOCALE: lang = 'english' } = process.env;

const selectedGuideDir = `../../../guide/${lang}`;
const guideRoot = path.resolve(__dirname, selectedGuideDir);

function fileStream(root) {
return Observable.create(observer =>
readdirp({ root, fileFilter: '*.md' })
.on('data', file => (file.stat.isFile() ? observer.next(file) : null))
.on('error', e => observer.error(e))
.on('end', () => observer.complete())
);
}

function parseFile(file) {
const fileContents = fs.readFileSync(file.fullPath);
return { ...file, ...matter(fileContents) };
}

function buildArticle(file) {
const {
path,
content,
data: { title }
} = file;
const url = path.replace(/\/index.md$/, '');
const article = {
content: stripURLs(stripHTML(content)),
category: url.split('/').filter(Boolean)[0],
title,
url: `/${url}`,
id: url.replace('/', '-')
};
return chunkDocument(article, ['title', 'url', 'id', 'category'], 'content');
}

function filterStubs(articleChunks) {
return !articleChunks.some(chunk =>
chunk.content.includes('This is a stub. Help our community expand it')
);
}

exports.getGuideArticleData = () =>
fileStream(guideRoot).pipe(
map(file => parseFile(file)),
map(file => buildArticle(file)),
filter(article => filterStubs(article))
);
Loading

0 comments on commit 2789824

Please sign in to comment.