docs/scripts/build-algolia.js

87 lines
2.4 KiB
JavaScript
Executable File

#! /usr/bin/env node
'use strict';
const jsdom = require("jsdom");
const {
JSDOM
} = jsdom;
const md5 = require('md5');
const fs = require('fs');
const newNodes = [];
const newParagraphs = [];
const rawdata = fs.readFileSync('/output/algolia.json');
const nodes = JSON.parse(rawdata);
nodes.forEach(node => {
const dom = new JSDOM(node.content);
const content = dom.window.document.body; //post content wrapped in a body tag
const contentChildren = content.children; // all the children of the body tag
const paragraphOut = {
anchor: '#',
title: '',
content: '',
postref: node.objectID,
objectID: null,
permalink: node.permalink
};
let childCount = contentChildren.length - 1; // how many children
// loop over the content until the next h2 heading -> this is the paragraph of searchable text
while(childCount >= 0) {
const child = contentChildren[childCount];
if (child.tagName === "H2") {
//this is our header
paragraphOut.anchor = `#${child.id}`;
paragraphOut.title = child.textContent;
let next = child.nextElementSibling;
while(next && next.tagName !== 'H2') {
if (next && next.textContent) {
paragraphOut.content += next.textContent;
}
next = next.nextElementSibling;
}
}
childCount--;
}
// a post without headers
if (paragraphOut.title === '') {
// Set the title to the page title
paragraphOut.title = node.title;
// pass along the content
paragraphOut.content = content.textContent;
}
if (paragraphOut.content) {
// limit the content to 10k so we dont blow up just incase someone decides to make a 40k blog post in one paragraph ¯\_(ツ)_/¯
paragraphOut.content = paragraphOut.content.substr(0, 9000);
// objectID is not quite unique yet so hash the entire object
paragraphOut.objectID = md5(JSON.stringify(paragraphOut));
newParagraphs.push(paragraphOut);
newNodes.push(node);
}
// remove potentially large content (see size limits) and replace with the summary so that we don't get results with zero highlightable results
node.content = node.summary;
// remove summary for dedup
delete node.summary;
});
const merged = [...newParagraphs, ...newNodes];
fs.writeFileSync('/output/final.algolia.json', JSON.stringify(merged));
process.exit(0);