mirror of https://github.com/rancher/docs.git
87 lines
2.4 KiB
JavaScript
Executable File
87 lines
2.4 KiB
JavaScript
Executable File
#! /usr/bin/env node
|
|
|
|
'use strict';
|
|
const jsdom = require("jsdom");
|
|
const {
|
|
JSDOM
|
|
} = jsdom;
|
|
const md5 = require('md5');
|
|
const fs = require('fs');
|
|
const newNodes = [];
|
|
const newParagraphs = [];
|
|
const rawdata = fs.readFileSync('/output/algolia.json');
|
|
const nodes = JSON.parse(rawdata);
|
|
|
|
nodes.forEach(node => {
|
|
const dom = new JSDOM(node.content);
|
|
const content = dom.window.document.body; //post content wrapped in a body tag
|
|
const contentChildren = content.children; // all the children of the body tag
|
|
const paragraphOut = {
|
|
anchor: '#',
|
|
title: '',
|
|
content: '',
|
|
postref: node.objectID,
|
|
objectID: null,
|
|
permalink: node.permalink
|
|
};
|
|
|
|
let childCount = contentChildren.length - 1; // how many children
|
|
|
|
// loop over the content until the next h2 heading -> this is the paragraph of searchable text
|
|
while(childCount >= 0) {
|
|
const child = contentChildren[childCount];
|
|
|
|
if (child.tagName === "H2") {
|
|
//this is our header
|
|
paragraphOut.anchor = `#${child.id}`;
|
|
paragraphOut.title = child.textContent;
|
|
|
|
let next = child.nextElementSibling;
|
|
|
|
while(next && next.tagName !== 'H2') {
|
|
if (next && next.textContent) {
|
|
paragraphOut.content += next.textContent;
|
|
}
|
|
next = next.nextElementSibling;
|
|
}
|
|
|
|
}
|
|
|
|
childCount--;
|
|
}
|
|
|
|
// a post without headers
|
|
if (paragraphOut.title === '') {
|
|
// Set the title to the page title
|
|
paragraphOut.title = node.title;
|
|
|
|
// pass along the content
|
|
paragraphOut.content = content.textContent;
|
|
}
|
|
|
|
if (paragraphOut.content) {
|
|
// limit the content to 10k so we dont blow up just incase someone decides to make a 40k blog post in one paragraph ¯\_(ツ)_/¯
|
|
paragraphOut.content = paragraphOut.content.substr(0, 9000);
|
|
|
|
// objectID is not quite unique yet so hash the entire object
|
|
paragraphOut.objectID = md5(JSON.stringify(paragraphOut));
|
|
|
|
newParagraphs.push(paragraphOut);
|
|
newNodes.push(node);
|
|
}
|
|
|
|
|
|
// remove potentially large content (see size limits) and replace with the summary so that we don't get results with zero highlightable results
|
|
node.content = node.summary;
|
|
|
|
// remove summary for dedup
|
|
delete node.summary;
|
|
|
|
});
|
|
|
|
const merged = [...newParagraphs, ...newNodes];
|
|
|
|
fs.writeFileSync('/output/final.algolia.json', JSON.stringify(merged));
|
|
|
|
process.exit(0);
|