#! /usr/bin/env node 'use strict'; const jsdom = require("jsdom"); const { JSDOM } = jsdom; const md5 = require('md5'); const fs = require('fs'); const newNodes = []; const newParagraphs = []; const rawdata = fs.readFileSync('/output/algolia.json'); const nodes = JSON.parse(rawdata); nodes.forEach(node => { const dom = new JSDOM(node.content); const content = dom.window.document.body; //post content wrapped in a body tag const contentChildren = content.children; // all the children of the body tag const paragraphOut = { anchor: '#', title: '', content: '', postref: node.objectID, objectID: null, permalink: node.permalink }; let childCount = contentChildren.length - 1; // how many children // loop over the content until the next h2 heading -> this is the paragraph of searchable text while(childCount >= 0) { const child = contentChildren[childCount]; if (child.tagName === "H2") { //this is our header paragraphOut.anchor = `#${child.id}`; paragraphOut.title = child.textContent; let next = child.nextElementSibling; while(next && next.tagName !== 'H2') { if (next && next.textContent) { paragraphOut.content += next.textContent; } next = next.nextElementSibling; } } childCount--; } // a post without headers if (paragraphOut.title === '') { // Set the title to the page title paragraphOut.title = node.title; // pass along the content paragraphOut.content = content.textContent; } if (paragraphOut.content) { // limit the content to 10k so we dont blow up just incase someone decides to make a 40k blog post in one paragraph ¯\_(ツ)_/¯ paragraphOut.content = paragraphOut.content.substr(0, 9000); // objectID is not quite unique yet so hash the entire object paragraphOut.objectID = md5(JSON.stringify(paragraphOut)); newParagraphs.push(paragraphOut); newNodes.push(node); } // remove potentially large content (see size limits) and replace with the summary so that we don't get results with zero highlightable results node.content = node.summary; // remove summary for dedup delete node.summary; }); const merged = [...newParagraphs, ...newNodes]; fs.writeFileSync('/output/final.algolia.json', JSON.stringify(merged)); process.exit(0);