mirror of https://github.com/dapr/docs.git
Upgrade Algolia search to v3 (#3560)
* update dapr publish command Signed-off-by: Hannah Hunter <hannahhunter@microsoft.com> Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Split workflow into two steps Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Update upload path Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Add concurrency check Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Add Algolia workflow script and step Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Update Algolia box to v3 Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Fix secret name Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Override default search bar in Docsy v3 Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Remove temporary comment Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> * Consolidate build and deploy Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> --------- Signed-off-by: Hannah Hunter <hannahhunter@microsoft.com> Signed-off-by: Aaron Crawfis <Aaron.Crawfis@microsoft.com> Co-authored-by: Hannah Hunter <hannahhunter@microsoft.com> Co-authored-by: Mark Fussell <markfussell@gmail.com>
This commit is contained in:
parent
b9759702d5
commit
0d0d29ac92
|
@ -0,0 +1,118 @@
|
|||
import os
|
||||
from re import S
|
||||
import sys
|
||||
import json
|
||||
from bs4 import BeautifulSoup
|
||||
from algoliasearch.search_client import SearchClient
|
||||
|
||||
url = "docs.dapr.io"
|
||||
if len(sys.argv) > 1:
|
||||
starting_directory = os.path.join(os.getcwd(), str(sys.argv[1]))
|
||||
else:
|
||||
starting_directory = os.getcwd()
|
||||
|
||||
ALGOLIA_APP_ID = os.getenv('ALGOLIA_APP_ID')
|
||||
ALGOLIA_API_KEY = os.getenv('ALGOLIA_API_WRITE_KEY')
|
||||
ALGOLIA_INDEX_NAME = os.getenv('ALGOLIA_INDEX_NAME')
|
||||
|
||||
client = SearchClient.create(ALGOLIA_APP_ID, ALGOLIA_API_KEY)
|
||||
index = client.init_index(ALGOLIA_INDEX_NAME)
|
||||
|
||||
excluded_files = [
|
||||
"404.html",
|
||||
]
|
||||
|
||||
exluded_directories = [
|
||||
"zh-hans",
|
||||
]
|
||||
|
||||
rankings = {
|
||||
"Getting started": 0,
|
||||
"Concepts": 100,
|
||||
"Developing applications": 200,
|
||||
"Operations": 300,
|
||||
"Reference": 400,
|
||||
"Contributing": 500,
|
||||
"Home": 600
|
||||
}
|
||||
|
||||
def scan_directory(directory: str, pages: list):
|
||||
if os.path.basename(directory) in exluded_directories:
|
||||
print(f'Skipping directory: {directory}')
|
||||
return
|
||||
for file in os.listdir(directory):
|
||||
path = os.path.join(directory, file)
|
||||
if os.path.isfile(path):
|
||||
if file.endswith(".html") and file not in excluded_files:
|
||||
if '<!-- DISABLE_ALGOLIA -->' not in open(path, encoding="utf8").read():
|
||||
print(f'Indexing: {path}')
|
||||
pages.append(path)
|
||||
else:
|
||||
print(f'Skipping hidden page: {path}')
|
||||
else:
|
||||
scan_directory(path, pages)
|
||||
|
||||
def parse_file(path: str):
|
||||
data = {}
|
||||
data["hierarchy"] = {}
|
||||
data["rank"] = 999
|
||||
data["subrank"] = 99
|
||||
data["type"] = "lvl2"
|
||||
data["lvl0"] = ""
|
||||
data["lvl1"] = ""
|
||||
data["lvl2"] = ""
|
||||
data["lvl3"] = ""
|
||||
text = ""
|
||||
subrank = 0
|
||||
with open(path, "r", errors='ignore') as file:
|
||||
content = file.read()
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
for meta in soup.find_all("meta"):
|
||||
if meta.get("name") == "description":
|
||||
data["lvl2"] = meta.get("content")
|
||||
data["hierarchy"]["lvl1"] = meta.get("content")
|
||||
elif meta.get("property") == "og:title":
|
||||
data["lvl0"] = meta.get("content")
|
||||
data["hierarchy"]["lvl0"] = meta.get("content")
|
||||
data["hierarchy"]["lvl2"] = meta.get("content")
|
||||
elif meta.get("property") == "og:url":
|
||||
data["url"] = meta.get("content")
|
||||
data["path"] = meta.get("content").split(url)[1]
|
||||
data["objectID"] = meta.get("content").split(url)[1]
|
||||
breadcrumbs = soup.find_all("li", class_="breadcrumb-item")
|
||||
try:
|
||||
subrank = len(breadcrumbs)
|
||||
data["subrank"] = subrank
|
||||
except:
|
||||
subrank = 99
|
||||
data["subrank"] = 99
|
||||
for bc in breadcrumbs:
|
||||
section = bc.text.strip()
|
||||
data["lvl1"] = section
|
||||
data["hierarchy"]["lvl0"] = section
|
||||
try:
|
||||
data["rank"] = rankings[section] + subrank
|
||||
except:
|
||||
print(f"Rank not found for section {section}")
|
||||
data["rank"] = 998
|
||||
break
|
||||
for p in soup.find_all("p"):
|
||||
if p.text != "":
|
||||
text = text + p.text
|
||||
data["text"] = text
|
||||
return data
|
||||
|
||||
def index_payload(payload):
|
||||
res = index.replace_all_objects(payload)
|
||||
res.wait()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pages = []
|
||||
payload = []
|
||||
scan_directory(starting_directory, pages)
|
||||
for page in pages:
|
||||
data = parse_file(page)
|
||||
if "objectID" in data:
|
||||
payload.append(data)
|
||||
index_payload(payload)
|
|
@ -79,3 +79,29 @@ jobs:
|
|||
with:
|
||||
azure_static_web_apps_api_token: ${{ secrets.AZURE_STATIC_WEB_APPS_API_TOKEN_PROUD_BAY_0E9E0E81E }}
|
||||
action: "close"
|
||||
|
||||
algolia_index:
|
||||
name: Index site for Algolia
|
||||
if: github.event_name == 'push'
|
||||
needs: ['build_and_deploy_job']
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }}
|
||||
ALGOLIA_API_WRITE_KEY: ${{ secrets.ALGOLIA_API_WRITE_KEY }}
|
||||
ALGOLIA_INDEX_NAME: daprdocs
|
||||
steps:
|
||||
- name: Checkout docs repo
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
submodules: false
|
||||
- name: Download Hugo artifacts
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: hugo_build
|
||||
path: site/
|
||||
- name: Install Python packages
|
||||
run: |
|
||||
pip install --upgrade bs4
|
||||
pip install --upgrade 'algoliasearch>=2.0,<3.0'
|
||||
- name: Index site
|
||||
run: python ./.github/scripts/algolia.py ./site
|
||||
|
|
|
@ -1,19 +1,13 @@
|
|||
<script src="/js/copy-code-button.js"></script>
|
||||
|
||||
{{ with .Site.Params.algolia_docsearch }}
|
||||
<script src="https://cdn.jsdelivr.net/npm/docsearch.js@2.6.3/dist/cdn/docsearch.min.js"></script>
|
||||
<script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/@docsearch/js@3"></script>
|
||||
<script type="text/javascript">
|
||||
docsearch({
|
||||
// Your apiKey and indexName will be given to you once
|
||||
// we create your config
|
||||
apiKey: '54ae43aa28ce8f00c54c8d5f544d29b9',
|
||||
indexName: 'crawler_dapr',
|
||||
container: '#docsearch',
|
||||
appId: 'O0QLQGNF38',
|
||||
// Replace inputSelector with a CSS selector
|
||||
// matching your search input
|
||||
inputSelector: '.td-search-input',
|
||||
// Set debug to true to inspect the dropdown
|
||||
debug: false,
|
||||
apiKey: '54ae43aa28ce8f00c54c8d5f544d29b9',
|
||||
indexName: 'daprdocs',
|
||||
});
|
||||
</script>
|
||||
{{ end }}
|
||||
|
||||
<script src="/js/copy-code-button.js"></script>
|
|
@ -1,3 +1,3 @@
|
|||
{{ with .Site.Params.algolia_docsearch }}
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/docsearch.js@2/dist/cdn/docsearch.min.css" />
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@docsearch/css@3" />
|
||||
{{ end }}
|
|
@ -0,0 +1,30 @@
|
|||
{{ if .Site.Params.gcs_engine_id -}}
|
||||
<input type="search" class="form-control td-search-input" placeholder=" {{ T "ui_search" }}" aria-label="{{ T "ui_search" }}" autocomplete="off">
|
||||
{{ else if .Site.Params.algolia_docsearch -}}
|
||||
<div id="docsearch"></div>
|
||||
{{ else if .Site.Params.offlineSearch -}}
|
||||
{{ $offlineSearchIndex := resources.Get "json/offline-search-index.json" | resources.ExecuteAsTemplate "offline-search-index.json" . -}}
|
||||
{{ if hugo.IsProduction -}}
|
||||
{{/* Use `md5` as finger print hash function to shorten file name to avoid `file name too long` error. */ -}}
|
||||
{{ $offlineSearchIndex = $offlineSearchIndex | fingerprint "md5" -}}
|
||||
{{ end -}}
|
||||
{{ $offlineSearchLink := $offlineSearchIndex.RelPermalink -}}
|
||||
|
||||
<input
|
||||
type="search"
|
||||
class="form-control td-search-input"
|
||||
placeholder=" {{ T "ui_search" }}"
|
||||
aria-label="{{ T "ui_search" }}"
|
||||
autocomplete="off"
|
||||
{{/*
|
||||
The data attribute name of the json file URL must end with `src` since
|
||||
Hugo's absurlreplacer requires `src`, `href`, `action` or `srcset` suffix for the attribute name.
|
||||
If the absurlreplacer is not applied, the URL will start with `/`.
|
||||
It causes the json file loading error when when relativeURLs is enabled.
|
||||
https://github.com/google/docsy/issues/181
|
||||
*/}}
|
||||
data-offline-search-index-json-src="{{ $offlineSearchLink }}"
|
||||
data-offline-search-base-href="/"
|
||||
data-offline-search-max-results="{{ .Site.Params.offlineSearchMaxResults | default 10 }}"
|
||||
>
|
||||
{{ end -}}
|
Loading…
Reference in New Issue