422 lines
11 KiB
JavaScript
422 lines
11 KiB
JavaScript
/*
|
|
This is the source for offline web search; it will be embedded
|
|
in a generated search page along with the search index xml.
|
|
|
|
You will almost certainly want to gzip this when delivering it!
|
|
Also be sure it has the proper cache headers to give a remotely
|
|
acceptable performance result. Configure the web server to do
|
|
both these. When storing it for offline usage, you might just
|
|
leave it unzipped though for convenience of use without a web server.
|
|
|
|
Tip to the end user: you might want to open this page and keep it
|
|
open in a reused tab.
|
|
|
|
The file generated should be the skeleton.html with the search
|
|
index in a <script id="search-index-container" type="text/xml">
|
|
right before this, and this file popped in a <script> right before
|
|
</body>.
|
|
|
|
It concatenates them all together so it works from file://, since
|
|
that doesn't allow XHR requests to other files so I can't just
|
|
ajax the search.xml nor does it allow pre-gzipped assets...
|
|
*/
|
|
var searchIndexString = document.getElementById("search-index-container").innerHTML;
|
|
|
|
var parser = new DOMParser();
|
|
var searchDocument = parser.parseFromString(searchIndexString, "text/xml");
|
|
|
|
|
|
// what follows is a port of the D search cgi program. Sort of. It isn't
|
|
// actually ported since I'm not quite happy with the cgi search algorithm
|
|
// yet.
|
|
|
|
function adrdox_search(searchTerm) {
|
|
if(searchTerm.length == 0)
|
|
return;
|
|
|
|
searchTerm = searchTerm.replace(/\++/g, ' ');
|
|
|
|
if(searchTerm == window.currentSearchTerm)
|
|
return;
|
|
|
|
window.currentSearchTerm = searchTerm;
|
|
|
|
location.hash = "#!" + encodeURIComponent(searchTerm);
|
|
|
|
var hitsObj = {};
|
|
|
|
var terms = searchTerm.split(" ");
|
|
for(var i = 0; i < terms.length; i++) {
|
|
var t = terms[i];
|
|
var hitschild = searchDocument.querySelectorAll("adrdox > index > term[value=\""+stemmer(t)+"\"] > result");
|
|
for(var a = 0; a < hitschild.length; a++) {
|
|
if(hitsObj[hitschild[a].getAttribute("decl")])
|
|
hitsObj[hitschild[a].getAttribute("decl")] += Number(hitschild[a].getAttribute("score"));
|
|
else
|
|
hitsObj[hitschild[a].getAttribute("decl")] = Number(hitschild[a].getAttribute("score"));
|
|
}
|
|
|
|
if(stemmer(t) != t) {
|
|
var hitschild = searchDocument.querySelectorAll("adrdox > index > term[value=\""+t+"\"] > result");
|
|
for(var a = 0; a < hitschild.length; a++) {
|
|
if(hitsObj[hitschild[a].getAttribute("decl")])
|
|
hitsObj[hitschild[a].getAttribute("decl")] += Number(hitschild[a].getAttribute("score"));
|
|
else
|
|
hitsObj[hitschild[a].getAttribute("decl")] = Number(hitschild[a].getAttribute("score"));
|
|
}
|
|
}
|
|
}
|
|
|
|
var dotTerms = searchTerm.replace(/ /g, "").split(".");
|
|
for(var i = 0; i < dotTerms.length; i++) {
|
|
var t = dotTerms[i];
|
|
var dna = declByName[t];
|
|
if(dna) {
|
|
dna.forEach(function(dn) {
|
|
if(hitsObj[dn.getAttribute("id")])
|
|
hitsObj[dn.getAttribute("id")] += 1;
|
|
else
|
|
hitsObj[dn.getAttribute("id")] = 1;
|
|
});
|
|
}
|
|
}
|
|
|
|
var hits = [];
|
|
for(name in hitsObj)
|
|
hits.push( { decl: name, score: hitsObj[name] } );
|
|
|
|
hits.sort(function(a, b) {
|
|
var s1 = Number(b.score);
|
|
var s2 = Number(a.score);
|
|
if(s1 == s2)
|
|
return a.decl < b.decl;
|
|
return s1 - s2;
|
|
});
|
|
|
|
var container = document.getElementById("page-content");
|
|
container.innerHTML = "<h2>Search Results</h2>";
|
|
|
|
var resultsElement = document.createElement("dl");
|
|
resultsElement.className = "member-list";
|
|
container.appendChild(resultsElement);
|
|
|
|
var prevFqn;
|
|
|
|
for(var a = 0; a < hits.length; a++) {
|
|
var decl = searchDocument.querySelector("adrdox > listing decl[id=\""+hits[a].decl+"\"]");
|
|
if(!decl) continue;
|
|
|
|
var dt = document.createElement("dt");
|
|
var link = document.createElement("a");
|
|
link.href = decl.querySelector("link").textContent;
|
|
|
|
var fqn = [];
|
|
var par = decl;
|
|
while(par) {
|
|
fqn.push(par.querySelector("name").textContent);
|
|
if(par.getAttribute("type") == "module")
|
|
break;
|
|
par = par.parentNode;
|
|
if(par.tagName != "decl")
|
|
break;
|
|
}
|
|
|
|
var newFqn = fqn.reverse().join(".\u200B");
|
|
|
|
if(newFqn == prevFqn)
|
|
continue;
|
|
prevFqn = newFqn;
|
|
|
|
link.textContent = newFqn;
|
|
|
|
dt.appendChild(link);
|
|
dt.className = "search-result";
|
|
dt.setAttribute("data-score", hits[a].score);
|
|
dt.setAttribute("data-decl", hits[a].decl);
|
|
resultsElement.appendChild(dt);
|
|
|
|
var dd = document.createElement("dd");
|
|
dd.innerHTML = decl.querySelector("desc").textContent;
|
|
|
|
resultsElement.appendChild(dd);
|
|
|
|
if(a > 20)
|
|
break;
|
|
}
|
|
}
|
|
|
|
window.onhashchange = function() {
|
|
adrdox_search(decodeURIComponent(location.hash.substring(2)));
|
|
};
|
|
|
|
var declByName = {};
|
|
|
|
window.onload = function() {
|
|
// index the html first ...
|
|
|
|
searchDocument.querySelectorAll("adrdox > listing decl[id] > name").forEach(function(element) {
|
|
if(!Array.isArray(declByName[element.textContent]))
|
|
declByName[element.textContent] = [];
|
|
declByName[element.textContent].push(element.parentNode);
|
|
|
|
var p = element.parentNode.parentNode;
|
|
while(p.tagName == "decl") {
|
|
var e = p.querySelector("name");
|
|
if(!Array.isArray(declByName[e.textContent]))
|
|
declByName[e.textContent] = [];
|
|
declByName[e.textContent].push(element.parentNode);
|
|
p = p.parentNode;
|
|
}
|
|
});
|
|
//foreach(element; index.querySelectorAll("adrdox > index term[value]"))
|
|
// termByValue[element.attrs.value] = element;
|
|
|
|
|
|
// populate the search from a form, if present
|
|
var searchTerm = location.search.substring("?searchTerm=".length);
|
|
if(searchTerm)
|
|
location.href = location.href.substring(0, location.href.indexOf("?"));
|
|
searchTerm = location.hash.substring(2);
|
|
adrdox_search(decodeURIComponent(searchTerm));
|
|
|
|
var search = document.getElementById("search");
|
|
if(!search)
|
|
return;
|
|
search.onsubmit = function() {
|
|
adrdox_search(search.elements["searchTerm"].value);
|
|
return false;
|
|
};
|
|
};
|
|
|
|
// Following is a copy/paste of a stemmer algorithm...it is BSD licensed.
|
|
// **************
|
|
|
|
// Reference Javascript Porter Stemmer. This code corresponds to the original
|
|
// 1980 paper available here: http://tartarus.org/martin/PorterStemmer/def.txt
|
|
// The latest version of this code is available at https://github.com/kristopolous/Porter-Stemmer
|
|
//
|
|
// Original comment:
|
|
// Porter stemmer in Javascript. Few comments, but it's easy to follow against the rules in the original
|
|
// paper, in
|
|
//
|
|
// Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
|
|
// no. 3, pp 130-137,
|
|
//
|
|
// see also http://www.tartarus.org/~martin/PorterStemmer
|
|
|
|
var stemmer = (function(){
|
|
var step2list = {
|
|
"ational" : "ate",
|
|
"tional" : "tion",
|
|
"enci" : "ence",
|
|
"anci" : "ance",
|
|
"izer" : "ize",
|
|
"bli" : "ble",
|
|
"alli" : "al",
|
|
"entli" : "ent",
|
|
"eli" : "e",
|
|
"ousli" : "ous",
|
|
"ization" : "ize",
|
|
"ation" : "ate",
|
|
"ator" : "ate",
|
|
"alism" : "al",
|
|
"iveness" : "ive",
|
|
"fulness" : "ful",
|
|
"ousness" : "ous",
|
|
"aliti" : "al",
|
|
"iviti" : "ive",
|
|
"biliti" : "ble",
|
|
"logi" : "log"
|
|
},
|
|
|
|
step3list = {
|
|
"icate" : "ic",
|
|
"ative" : "",
|
|
"alize" : "al",
|
|
"iciti" : "ic",
|
|
"ical" : "ic",
|
|
"ful" : "",
|
|
"ness" : ""
|
|
},
|
|
|
|
c = "[^aeiou]", // consonant
|
|
v = "[aeiouy]", // vowel
|
|
C = c + "[^aeiouy]*", // consonant sequence
|
|
V = v + "[aeiou]*", // vowel sequence
|
|
|
|
mgr0 = "^(" + C + ")?" + V + C, // [C]VC... is m>0
|
|
meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$", // [C]VC[V] is m=1
|
|
mgr1 = "^(" + C + ")?" + V + C + V + C, // [C]VCVC... is m>1
|
|
s_v = "^(" + C + ")?" + v; // vowel in stem
|
|
|
|
function dummyDebug() {}
|
|
|
|
function realDebug() {
|
|
console.log(Array.prototype.slice.call(arguments).join(' '));
|
|
}
|
|
|
|
return function (w, debug) {
|
|
var
|
|
stem,
|
|
suffix,
|
|
firstch,
|
|
re,
|
|
re2,
|
|
re3,
|
|
re4,
|
|
debugFunction,
|
|
origword = w;
|
|
|
|
if (debug) {
|
|
debugFunction = realDebug;
|
|
} else {
|
|
debugFunction = dummyDebug;
|
|
}
|
|
|
|
if (w.length < 3) { return w; }
|
|
|
|
firstch = w.substr(0,1);
|
|
if (firstch == "y") {
|
|
w = firstch.toUpperCase() + w.substr(1);
|
|
}
|
|
|
|
// Step 1a
|
|
re = /^(.+?)(ss|i)es$/;
|
|
re2 = /^(.+?)([^s])s$/;
|
|
|
|
if (re.test(w)) {
|
|
w = w.replace(re,"$1$2");
|
|
debugFunction('1a',re, w);
|
|
|
|
} else if (re2.test(w)) {
|
|
w = w.replace(re2,"$1$2");
|
|
debugFunction('1a',re2, w);
|
|
}
|
|
|
|
// Step 1b
|
|
re = /^(.+?)eed$/;
|
|
re2 = /^(.+?)(ed|ing)$/;
|
|
if (re.test(w)) {
|
|
var fp = re.exec(w);
|
|
re = new RegExp(mgr0);
|
|
if (re.test(fp[1])) {
|
|
re = /.$/;
|
|
w = w.replace(re,"");
|
|
debugFunction('1b',re, w);
|
|
}
|
|
} else if (re2.test(w)) {
|
|
var fp = re2.exec(w);
|
|
stem = fp[1];
|
|
re2 = new RegExp(s_v);
|
|
if (re2.test(stem)) {
|
|
w = stem;
|
|
debugFunction('1b', re2, w);
|
|
|
|
re2 = /(at|bl|iz)$/;
|
|
re3 = new RegExp("([^aeiouylsz])\\1$");
|
|
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
|
|
|
if (re2.test(w)) {
|
|
w = w + "e";
|
|
debugFunction('1b', re2, w);
|
|
|
|
} else if (re3.test(w)) {
|
|
re = /.$/;
|
|
w = w.replace(re,"");
|
|
debugFunction('1b', re3, w);
|
|
|
|
} else if (re4.test(w)) {
|
|
w = w + "e";
|
|
debugFunction('1b', re4, w);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Step 1c
|
|
re = new RegExp("^(.*" + v + ".*)y$");
|
|
if (re.test(w)) {
|
|
var fp = re.exec(w);
|
|
stem = fp[1];
|
|
w = stem + "i";
|
|
debugFunction('1c', re, w);
|
|
}
|
|
|
|
// Step 2
|
|
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
|
|
if (re.test(w)) {
|
|
var fp = re.exec(w);
|
|
stem = fp[1];
|
|
suffix = fp[2];
|
|
re = new RegExp(mgr0);
|
|
if (re.test(stem)) {
|
|
w = stem + step2list[suffix];
|
|
debugFunction('2', re, w);
|
|
}
|
|
}
|
|
|
|
// Step 3
|
|
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
|
|
if (re.test(w)) {
|
|
var fp = re.exec(w);
|
|
stem = fp[1];
|
|
suffix = fp[2];
|
|
re = new RegExp(mgr0);
|
|
if (re.test(stem)) {
|
|
w = stem + step3list[suffix];
|
|
debugFunction('3', re, w);
|
|
}
|
|
}
|
|
|
|
// Step 4
|
|
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
|
|
re2 = /^(.+?)(s|t)(ion)$/;
|
|
if (re.test(w)) {
|
|
var fp = re.exec(w);
|
|
stem = fp[1];
|
|
re = new RegExp(mgr1);
|
|
if (re.test(stem)) {
|
|
w = stem;
|
|
debugFunction('4', re, w);
|
|
}
|
|
} else if (re2.test(w)) {
|
|
var fp = re2.exec(w);
|
|
stem = fp[1] + fp[2];
|
|
re2 = new RegExp(mgr1);
|
|
if (re2.test(stem)) {
|
|
w = stem;
|
|
debugFunction('4', re2, w);
|
|
}
|
|
}
|
|
|
|
// Step 5
|
|
re = /^(.+?)e$/;
|
|
if (re.test(w)) {
|
|
var fp = re.exec(w);
|
|
stem = fp[1];
|
|
re = new RegExp(mgr1);
|
|
re2 = new RegExp(meq1);
|
|
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
|
|
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) {
|
|
w = stem;
|
|
debugFunction('5', re, re2, re3, w);
|
|
}
|
|
}
|
|
|
|
re = /ll$/;
|
|
re2 = new RegExp(mgr1);
|
|
if (re.test(w) && re2.test(w)) {
|
|
re = /.$/;
|
|
w = w.replace(re,"");
|
|
debugFunction('5', re, re2, w);
|
|
}
|
|
|
|
// and turn initial Y back to y
|
|
if (firstch == "y") {
|
|
w = firstch.toLowerCase() + w.substr(1);
|
|
}
|
|
|
|
|
|
return w;
|
|
}
|
|
})();
|