new Crawler({
appId: "5CCY***", // changes your appId
apiKey: "01469aacecdb****", // changes crawler apiKey
indexPrefix: "",
rateLimit: 8,
startUrls: ["https://www.docubook.pro"], // changes your domain
renderJavaScript: true,
maxDepth: 10,
maxUrls: 8000,
schedule: "every 1 day at 02:00 am", // Recommended daily schedule
sitemaps: [],
ignoreCanonicalTo: true,
discoveryPatterns: ["https://www.docubook.pro/**"], // changes your domain
actions: [
{
indexName: "docsearch_docubook", // recommendations for index names with the prefix docsearch_
pathsToMatch: ["https://www.docubook.pro/**"], // changes your domain
recordExtractor: ({ $, helpers }) => {
// Prefer the specific data attribute, fall back to breadcrumb
const lvl0 =
$("[data-search-lvl0='true']").first().text().trim() ||
$("nav[aria-label='breadcrumb'] li:nth-child(3)").text().trim() ||
"Docs";
const layoutAnchors = ["scroll-container", "main-navbar"];
return helpers
.docsearch({
recordProps: {
lvl0: {
selectors: "",
defaultValue: lvl0,
},
// Limit H1 extraction to the document content area.
// This avoids layout anchors like #scroll-container or #main-navbar.
lvl1: ["article h1", ".prose h1"],
content: ["article p, article li", ".prose p, .prose li"],
lvl2: ["article h2", ".prose h2"],
lvl3: ["article h3", ".prose h3"],
lvl4: ["article h4", ".prose h4"],
lvl5: ["article h5", ".prose h5"],
lvl6: ["article h6", ".prose h6"],
},
aggregateContent: true,
recordVersion: "v3",
})
.map((record) => {
if (!layoutAnchors.includes(record.anchor)) return record;
return {
...record,
anchor: undefined,
url: record.url
? record.url.replace(/#(scroll-container|main-navbar)$/, "")
: record.url,
url_without_anchor:
record.url_without_anchor ||
(record.url ? record.url.split("#")[0] : record.url),
};
});
},
},
],
safetyChecks: { beforeIndexPublishing: { maxLostRecordsPercentage: 10 } },
initialIndexSettings: {
"docsearch_docubook": { // recommendations for index names with the prefix docsearch_
attributesForFaceting: ["type", "lang"],
attributesToRetrieve: [
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type",
],
attributesToHighlight: ["hierarchy", "content"],
attributesToSnippet: ["content:10"],
camelCaseAttributes: ["hierarchy", "content"],
searchableAttributes: [
"unordered(hierarchy.lvl0)",
"unordered(hierarchy.lvl1)",
"unordered(hierarchy.lvl2)",
"unordered(hierarchy.lvl3)",
"unordered(hierarchy.lvl4)",
"unordered(hierarchy.lvl5)",
"unordered(hierarchy.lvl6)",
"content",
],
distinct: true,
attributeForDistinct: "url",
customRanking: [
"desc(weight.pageRank)",
"desc(weight.level)",
"asc(weight.position)",
],
ranking: [
"words",
"filters",
"typo",
"attribute",
"proximity",
"exact",
"custom",
],
highlightPreTag: '<span class="algolia-docsearch-suggestion--highlight">',
highlightPostTag: "</span>",
minWordSizefor1Typo: 3,
minWordSizefor2Typos: 7,
allowTyposOnNumericTokens: false,
minProximity: 1,
ignorePlurals: true,
advancedSyntax: true,
attributeCriteriaComputedByMinProximity: true,
removeWordsIfNoResults: "allOptional",
},
},
});