mirror of https://github.com/dapr/docs.git
Upgrade Algolia search to v3 (#3560)
* update dapr publish command Signed-off-by: Hannah Hunter <hannahhunter@microsoft.com> Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Split workflow into two steps Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Update upload path Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Add concurrency check Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Add Algolia workflow script and step Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Update Algolia box to v3 Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Fix secret name Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Override default search bar in Docsy v3 Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Remove temporary comment Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Consolidate build and deploy Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> --------- Signed-off-by: Hannah Hunter <hannahhunter@microsoft.com> Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> Co-authored-by: Hannah Hunter <hannahhunter@microsoft.com> Co-authored-by: Mark Fussell <markfussell@gmail.com>
This commit is contained in:
parent
b9759702d5
commit
0d0d29ac92
|
@ -0,0 +1,118 @@
|
||||||
|
import os
|
||||||
|
from re import S
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from algoliasearch.search_client import SearchClient
|
||||||
|
|
||||||
|
url = "docs.dapr.io"
|
||||||
|
if len(sys.argv) > 1:
|
||||||
|
starting_directory = os.path.join(os.getcwd(), str(sys.argv[1]))
|
||||||
|
else:
|
||||||
|
starting_directory = os.getcwd()
|
||||||
|
|
||||||
|
ALGOLIA_APP_ID = os.getenv('ALGOLIA_APP_ID')
|
||||||
|
ALGOLIA_API_KEY = os.getenv('ALGOLIA_API_WRITE_KEY')
|
||||||
|
ALGOLIA_INDEX_NAME = os.getenv('ALGOLIA_INDEX_NAME')
|
||||||
|
|
||||||
|
client = SearchClient.create(ALGOLIA_APP_ID, ALGOLIA_API_KEY)
|
||||||
|
index = client.init_index(ALGOLIA_INDEX_NAME)
|
||||||
|
|
||||||
|
excluded_files = [
|
||||||
|
"404.html",
|
||||||
|
]
|
||||||
|
|
||||||
|
exluded_directories = [
|
||||||
|
"zh-hans",
|
||||||
|
]
|
||||||
|
|
||||||
|
rankings = {
|
||||||
|
"Getting started": 0,
|
||||||
|
"Concepts": 100,
|
||||||
|
"Developing applications": 200,
|
||||||
|
"Operations": 300,
|
||||||
|
"Reference": 400,
|
||||||
|
"Contributing": 500,
|
||||||
|
"Home": 600
|
||||||
|
}
|
||||||
|
|
||||||
|
def scan_directory(directory: str, pages: list):
|
||||||
|
if os.path.basename(directory) in exluded_directories:
|
||||||
|
print(f'Skipping directory: {directory}')
|
||||||
|
return
|
||||||
|
for file in os.listdir(directory):
|
||||||
|
path = os.path.join(directory, file)
|
||||||
|
if os.path.isfile(path):
|
||||||
|
if file.endswith(".html") and file not in excluded_files:
|
||||||
|
if '<!-- DISABLE_ALGOLIA -->' not in open(path, encoding="utf8").read():
|
||||||
|
print(f'Indexing: {path}')
|
||||||
|
pages.append(path)
|
||||||
|
else:
|
||||||
|
print(f'Skipping hidden page: {path}')
|
||||||
|
else:
|
||||||
|
scan_directory(path, pages)
|
||||||
|
|
||||||
|
def parse_file(path: str):
|
||||||
|
data = {}
|
||||||
|
data["hierarchy"] = {}
|
||||||
|
data["rank"] = 999
|
||||||
|
data["subrank"] = 99
|
||||||
|
data["type"] = "lvl2"
|
||||||
|
data["lvl0"] = ""
|
||||||
|
data["lvl1"] = ""
|
||||||
|
data["lvl2"] = ""
|
||||||
|
data["lvl3"] = ""
|
||||||
|
text = ""
|
||||||
|
subrank = 0
|
||||||
|
with open(path, "r", errors='ignore') as file:
|
||||||
|
content = file.read()
|
||||||
|
soup = BeautifulSoup(content, "html.parser")
|
||||||
|
for meta in soup.find_all("meta"):
|
||||||
|
if meta.get("name") == "description":
|
||||||
|
data["lvl2"] = meta.get("content")
|
||||||
|
data["hierarchy"]["lvl1"] = meta.get("content")
|
||||||
|
elif meta.get("property") == "og:title":
|
||||||
|
data["lvl0"] = meta.get("content")
|
||||||
|
data["hierarchy"]["lvl0"] = meta.get("content")
|
||||||
|
data["hierarchy"]["lvl2"] = meta.get("content")
|
||||||
|
elif meta.get("property") == "og:url":
|
||||||
|
data["url"] = meta.get("content")
|
||||||
|
data["path"] = meta.get("content").split(url)[1]
|
||||||
|
data["objectID"] = meta.get("content").split(url)[1]
|
||||||
|
breadcrumbs = soup.find_all("li", class_="breadcrumb-item")
|
||||||
|
try:
|
||||||
|
subrank = len(breadcrumbs)
|
||||||
|
data["subrank"] = subrank
|
||||||
|
except:
|
||||||
|
subrank = 99
|
||||||
|
data["subrank"] = 99
|
||||||
|
for bc in breadcrumbs:
|
||||||
|
section = bc.text.strip()
|
||||||
|
data["lvl1"] = section
|
||||||
|
data["hierarchy"]["lvl0"] = section
|
||||||
|
try:
|
||||||
|
data["rank"] = rankings[section] + subrank
|
||||||
|
except:
|
||||||
|
print(f"Rank not found for section {section}")
|
||||||
|
data["rank"] = 998
|
||||||
|
break
|
||||||
|
for p in soup.find_all("p"):
|
||||||
|
if p.text != "":
|
||||||
|
text = text + p.text
|
||||||
|
data["text"] = text
|
||||||
|
return data
|
||||||
|
|
||||||
|
def index_payload(payload):
|
||||||
|
res = index.replace_all_objects(payload)
|
||||||
|
res.wait()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
pages = []
|
||||||
|
payload = []
|
||||||
|
scan_directory(starting_directory, pages)
|
||||||
|
for page in pages:
|
||||||
|
data = parse_file(page)
|
||||||
|
if "objectID" in data:
|
||||||
|
payload.append(data)
|
||||||
|
index_payload(payload)
|
|
@ -79,3 +79,29 @@ jobs:
|
||||||
with:
|
with:
|
||||||
azure_static_web_apps_api_token: ${{ secrets.AZURE_STATIC_WEB_APPS_API_TOKEN_PROUD_BAY_0E9E0E81E }}
|
azure_static_web_apps_api_token: ${{ secrets.AZURE_STATIC_WEB_APPS_API_TOKEN_PROUD_BAY_0E9E0E81E }}
|
||||||
action: "close"
|
action: "close"
|
||||||
|
|
||||||
|
algolia_index:
|
||||||
|
name: Index site for Algolia
|
||||||
|
if: github.event_name == 'push'
|
||||||
|
needs: ['build_and_deploy_job']
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
env:
|
||||||
|
ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }}
|
||||||
|
ALGOLIA_API_WRITE_KEY: ${{ secrets.ALGOLIA_API_WRITE_KEY }}
|
||||||
|
ALGOLIA_INDEX_NAME: daprdocs
|
||||||
|
steps:
|
||||||
|
- name: Checkout docs repo
|
||||||
|
uses: actions/checkout@v2
|
||||||
|
with:
|
||||||
|
submodules: false
|
||||||
|
- name: Download Hugo artifacts
|
||||||
|
uses: actions/download-artifact@v3
|
||||||
|
with:
|
||||||
|
name: hugo_build
|
||||||
|
path: site/
|
||||||
|
- name: Install Python packages
|
||||||
|
run: |
|
||||||
|
pip install --upgrade bs4
|
||||||
|
pip install --upgrade 'algoliasearch>=2.0,<3.0'
|
||||||
|
- name: Index site
|
||||||
|
run: python ./.github/scripts/algolia.py ./site
|
||||||
|
|
|
@ -1,19 +1,13 @@
|
||||||
|
<script src="/js/copy-code-button.js"></script>
|
||||||
|
|
||||||
{{ with .Site.Params.algolia_docsearch }}
|
{{ with .Site.Params.algolia_docsearch }}
|
||||||
<script src="https://cdn.jsdelivr.net/npm/docsearch.js@2.6.3/dist/cdn/docsearch.min.js"></script>
|
<script src="https://cdn.jsdelivr.net/npm/@docsearch/js@3"></script>
|
||||||
<script>
|
<script type="text/javascript">
|
||||||
docsearch({
|
docsearch({
|
||||||
// Your apiKey and indexName will be given to you once
|
container: '#docsearch',
|
||||||
// we create your config
|
|
||||||
apiKey: '54ae43aa28ce8f00c54c8d5f544d29b9',
|
|
||||||
indexName: 'crawler_dapr',
|
|
||||||
appId: 'O0QLQGNF38',
|
appId: 'O0QLQGNF38',
|
||||||
// Replace inputSelector with a CSS selector
|
apiKey: '54ae43aa28ce8f00c54c8d5f544d29b9',
|
||||||
// matching your search input
|
indexName: 'daprdocs',
|
||||||
inputSelector: '.td-search-input',
|
|
||||||
// Set debug to true to inspect the dropdown
|
|
||||||
debug: false,
|
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
{{ end }}
|
{{ end }}
|
||||||
|
|
||||||
<script src="/js/copy-code-button.js"></script>
|
|
|
@ -1,3 +1,3 @@
|
||||||
{{ with .Site.Params.algolia_docsearch }}
|
{{ with .Site.Params.algolia_docsearch }}
|
||||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@docsearch/css@3" />
|
||||||
{{ end }}
|
{{ end }}
|
|
@ -0,0 +1,30 @@
|
||||||
|
{{ if .Site.Params.gcs_engine_id -}}
|
||||||
|
<input type="search" class="form-control td-search-input" placeholder=" {{ T "ui_search" }}" aria-label="{{ T "ui_search" }}" autocomplete="off">
|
||||||
|
{{ else if .Site.Params.algolia_docsearch -}}
|
||||||
|
<div id="docsearch"></div>
|
||||||
|
{{ else if .Site.Params.offlineSearch -}}
|
||||||
|
{{ $offlineSearchIndex := resources.Get "json/offline-search-index.json" | resources.ExecuteAsTemplate "offline-search-index.json" . -}}
|
||||||
|
{{ if hugo.IsProduction -}}
|
||||||
|
{{/* Use `md5` as finger print hash function to shorten file name to avoid `file name too long` error. */ -}}
|
||||||
|
{{ $offlineSearchIndex = $offlineSearchIndex | fingerprint "md5" -}}
|
||||||
|
{{ end -}}
|
||||||
|
{{ $offlineSearchLink := $offlineSearchIndex.RelPermalink -}}
|
||||||
|
|
||||||
|
<input
|
||||||
|
type="search"
|
||||||
|
class="form-control td-search-input"
|
||||||
|
placeholder=" {{ T "ui_search" }}"
|
||||||
|
aria-label="{{ T "ui_search" }}"
|
||||||
|
autocomplete="off"
|
||||||
|
{{/*
|
||||||
|
The data attribute name of the json file URL must end with `src` since
|
||||||
|
Hugo's absurlreplacer requires `src`, `href`, `action` or `srcset` suffix for the attribute name.
|
||||||
|
If the absurlreplacer is not applied, the URL will start with `/`.
|
||||||
|
It causes the json file loading error when when relativeURLs is enabled.
|
||||||
|
https://github.com/google/docsy/issues/181
|
||||||
|
*/}}
|
||||||
|
data-offline-search-index-json-src="{{ $offlineSearchLink }}"
|
||||||
|
data-offline-search-base-href="/"
|
||||||
|
data-offline-search-max-results="{{ .Site.Params.offlineSearchMaxResults | default 10 }}"
|
||||||
|
>
|
||||||
|
{{ end -}}
|
Loading…
Reference in New Issue