Search full profile content using lunr.js

This commit is contained in:
James Nylen 2020-05-04 09:32:28 +00:00
parent 44abc2a909
commit e983d65e01
8 changed files with 341 additions and 116 deletions

View file

@ -9,7 +9,11 @@ const phin = require( 'phin' );
const rimraf = require( 'rimraf' );
const swig = require( 'swig-templates' );
const { parseFromDirectory, headingPropertyNames } = require( '../lib' );
const {
parseFromDirectory,
headingPropertyNames,
buildSearchData,
} = require( '../lib' );
const contentPath = path.join( __dirname, '..' );
const sitePath = path.join( __dirname, '..', 'site' );
const siteBuildPath = path.join( sitePath, 'build' );
@ -211,7 +215,7 @@ async function buildSite() {
// Set up styles/scripts for specific pages
const indexScripts = [ {
url: '//cdnjs.cloudflare.com/ajax/libs/list.js/1.5.0/list.min.js',
url: '//cdnjs.cloudflare.com/ajax/libs/lunr.js/2.3.7/lunr.min.js',
}, {
url: copyAssetToBuild( 'companies-table.js' ),
} ];
@ -223,6 +227,13 @@ async function buildSite() {
fs.copyFileSync( path.join( faviconPath, f ), path.join( siteBuildPath, f ) );
} );
// Generate search index
console.log( 'Generating search index' );
const searchIndexFilename = copyAssetToBuild(
'search.js',
JSON.stringify( buildSearchData( data ) )
);
// Generate the index.html file from the main README
// TODO: Build this page and its table dynamically; more filters
const readmeTemplate = swig.compileFile(
@ -232,6 +243,9 @@ async function buildSite() {
writePage( 'index.html', readmeTemplate( {
stylesheets,
scripts: scripts.concat( indexScripts ),
inlineScripts: [
'var searchIndexFilename = ' + JSON.stringify( searchIndexFilename ) + ';',
],
pageContent: data.readmeContent,
editUrl: githubEditUrl( 'README.md' ),
} ) );
@ -249,6 +263,7 @@ async function buildSite() {
writePage( path.join( dirname, 'index.html' ), companyTemplate( {
stylesheets,
scripts,
inlineScripts: [],
company,
headingPropertyNames,
missingHeadings,

View file

@ -5,6 +5,7 @@ const path = require( 'path' );
const util = require( 'util' );
const cheerio = require( 'cheerio' );
const lunr = require( 'lunr' );
const marked = require( 'marked' );
@ -75,10 +76,13 @@ exports.stripExtraChars = stripExtraChars;
/**
* Other exports
*/
exports.headingPropertyNames = headingsAll.reduce( ( acc, val ) => {
acc[ toIdentifierCase( val ) ] = val;
return acc;
}, {} );
function getHeadingPropertyNames() {
return headingsAll.reduce( ( acc, val ) => {
acc[ toIdentifierCase( val ) ] = val;
return acc;
}, {} );
}
exports.headingPropertyNames = getHeadingPropertyNames();
/**
@ -138,13 +142,14 @@ exports.parseFromDirectory = contentPath => {
let lastCompanyName = null;
$( 'tr' ).each( ( i, tr ) => {
const $tr = $( tr );
if ( i === 0 ) {
// Assign an ID to the table.
$( tr ).closest( 'table' ).attr( 'id', 'companies-table' );
$tr.closest( 'table' ).attr( 'id', 'companies-table' );
// Skip the table header row.
return;
}
const $td = $( tr ).children( 'td' );
const $td = $tr.children( 'td' );
const websiteUrl = $td.eq( 1 ).text();
const websiteText = websiteUrl
@ -162,6 +167,13 @@ exports.parseFromDirectory = contentPath => {
shortRegion: $td.eq( 2 ).text().trim(),
};
if ( ! websiteText ) {
readmeError(
'Missing website for company: %s',
readmeEntry.name
);
}
if ( readmeEntry.name ) {
if ( companyNamesSeen[ readmeEntry.name.toLowerCase() ] ) {
readmeError(
@ -173,7 +185,7 @@ exports.parseFromDirectory = contentPath => {
} else {
readmeError(
'Missing company name: %s',
$( tr ).html().replace( /\n/g, '' )
$tr.html().replace( /\n/g, '' )
);
}
@ -183,14 +195,14 @@ exports.parseFromDirectory = contentPath => {
) {
readmeError(
'Invalid content in Website column: %s',
$( tr ).html().replace( /\n/g, '' )
$tr.html().replace( /\n/g, '' )
);
}
if ( $td.eq( 2 ).children().length > 0 ) {
readmeError(
'Extra content in Region column: %s',
$( tr ).html().replace( /\n/g, '' )
$tr.html().replace( /\n/g, '' )
);
}
@ -244,7 +256,10 @@ exports.parseFromDirectory = contentPath => {
);
}
// Set classes on table cells
// Set identifying attributes on table elements
$tr
.attr( 'class', 'company-row' )
.attr( 'id', 'company-row-' + ( i - 1 ) );
$td.eq( 0 ).attr( 'class', 'company-name' );
$td.eq( 1 ).attr( 'class', 'company-website' );
$td.eq( 2 ).attr( 'class', 'company-region' );
@ -509,3 +524,71 @@ exports.parseFromDirectory = contentPath => {
readmeContent,
};
};
/**
* Build search index data from the result of parseFromDirectory().
*
* Returns an object with { index, textData } keys.
*/
exports.buildSearchData = data => {
const textData = [];
data.companies.forEach( ( company, i ) => {
const thisTextData = {
id: String( i ),
nameText: company.name,
websiteText: company.websiteText,
};
if ( company.shortRegion ) {
thisTextData.shortRegion = company.shortRegion;
}
Object.keys( exports.headingPropertyNames ).forEach( h => {
if ( company.profileContent[ h ] ) {
const text = cheerio.load( company.profileContent[ h ] ).text();
thisTextData[ h ] = text;
}
} );
textData.push( thisTextData );
} );
const index = lunr( function() {
this.field( 'nameText' );
this.field( 'websiteText' );
this.field( 'shortRegion' );
Object.keys( exports.headingPropertyNames ).forEach( h => {
this.field( h );
} );
// https://github.com/olivernn/lunr.js/issues/25#issuecomment-623267494
this.metadataWhitelist = ['position'];
// How to find more stop word candidates:
// cat site/build/assets/search-1588572080743.js | jq -r '.textData | .[] | .[]'
// paste the output into this tool:
// https://www.online-utility.org/text/analyzer.jsp
const extraStopWords = lunr.generateStopWordFilter( [
'career',
'company',
'employees',
'job',
'remote',
'team',
'work',
] );
lunr.Pipeline.registerFunction( extraStopWords, 'extraStopWords' );
this.pipeline.after( lunr.stopWordFilter, extraStopWords );
Object.keys( textData ).forEach( c => this.add( textData[ c ] ) );
} );
const headings = getHeadingPropertyNames();
headings.nameText = 'Name';
headings.websiteText = 'Website';
headings.shortRegion = 'Region';
return { index, textData, headings };
};

5
package-lock.json generated
View file

@ -627,6 +627,11 @@
"resolved": "https://registry.npmjs.org/longest/-/longest-1.0.1.tgz",
"integrity": "sha1-MKCy2jj3N3DoKUoNIuZiXtd9AJc="
},
"lunr": {
"version": "2.3.7",
"resolved": "https://registry.npmjs.org/lunr/-/lunr-2.3.7.tgz",
"integrity": "sha512-HjFSiy0Y0qZoW5OA1I6qBi7OnsDdqQnaUr03jhorh30maQoaP+4lQCKklYE3Nq3WJMSUfuBl6N+bKY5wxCb9hw=="
},
"marked": {
"version": "0.7.0",
"resolved": "https://registry.npmjs.org/marked/-/marked-0.7.0.tgz",

View file

@ -14,6 +14,7 @@
},
"dependencies": {
"cheerio": "^1.0.0-rc.3",
"lunr": "2.3.7",
"marked": "^0.7.0",
"phin": "^3.4.0",
"rimraf": "^3.0.0",

View file

@ -1,52 +1,180 @@
function setupFilters() {
function setupSearch() {
var table = document.querySelector( 'table#companies-table' );
var headerCells = table.querySelectorAll( 'thead tr th' );
headerCells[ 0 ].innerHTML =
'<button class="sort" data-sort="company-name">Name</button>';
headerCells[ 1 ].innerHTML =
'<button class="sort" data-sort="company-website">Website</button>';
headerCells[ 2 ].innerHTML =
'<button class="sort" data-sort="company-region">Region</button>';
var searchInput = document.createElement( 'input' );
searchInput.type = 'text';
searchInput.placeholder = 'Search';
searchInput.id = 'search-input';
var tbody = table.querySelector( 'tbody' );
tbody.setAttribute( 'class', 'list' );
var filterInput = document.createElement( 'input' );
filterInput.type = 'text';
filterInput.placeholder = 'Filter Companies';
filterInput.id = 'company-filter';
filterInput.setAttribute( 'class', 'company-filter' );
var searchStatus = document.createElement( 'span' );
searchStatus.id = 'search-status';
var companiesHeading = document.querySelector( 'h2#companies' );
companiesHeading.appendChild( filterInput );
companiesHeading.appendChild( searchInput );
companiesHeading.appendChild( searchStatus );
var filtersExplanation = document.createElement( 'p' );
filtersExplanation.id = 'filters-explanation';
filtersExplanation.innerHTML = (
'Use the text box above to filter the list of companies, '
+ 'or click a column heading to sort by that column.'
var searchExplanation = document.createElement( 'p' );
searchExplanation.id = 'search-explanation';
searchExplanation.innerHTML = (
'Use the text box above to search all of our company data.'
);
table.parentNode.insertBefore( filtersExplanation, table );
table.parentNode.insertBefore( searchExplanation, table );
window.tableFilter = new List(
'main', // element ID that contains everything
{
valueNames: [
'company-name',
'company-website',
'company-region'
],
searchClass: 'company-filter',
var searchLoading = false;
var searchData = null;
var searchIndex = null;
var updateTimeout = null;
function updateSearch() {
if ( ! searchData || searchLoading ) {
return;
}
);
var searchValue = searchInput.value.trim();
var allMatch = ! searchValue;
var searchResults = searchValue ? searchIndex.search( searchInput.value ) : [];
if ( allMatch ) {
searchStatus.innerHTML = '&nbsp;';
} else if ( searchResults.length === 1 ) {
searchStatus.innerText = searchInput.value + ': 1 result';
} else {
searchStatus.innerText = (
searchInput.value + ': '
+ searchResults.length + ' results'
);
}
var searchMatches = {};
searchResults.forEach( function( r ) {
searchMatches[ +r.ref ] = r;
} );
searchData.textData.forEach( function( company, index ) {
var match = searchMatches[ index ];
var row = document.getElementById( 'company-row-' + index );
var rowMatch = row.nextElementSibling;
if ( rowMatch && rowMatch.classList.contains( 'company-match' ) ) {
rowMatch.parentNode.removeChild( rowMatch );
}
row.style.display = ( match || allMatch ? '' : 'none' );
row.classList.remove( 'has-match' );
if ( match ) {
row.classList.add( 'has-match' );
var metadata = match.matchData.metadata;
var contextWords = ( window.innerWidth <= 600 ? 4 : 6 );
var k1, k2, pos;
loop1: for ( k1 in metadata ) {
for ( k2 in metadata[ k1 ] ) {
pos = metadata[ k1 ][ k2 ].position[ 0 ];
break loop1;
}
}
rowMatch = document.createElement( 'tr' );
rowMatch.setAttribute( 'class', 'company-match' );
var rowMatchCell = document.createElement( 'td' );
rowMatchCell.setAttribute( 'colspan', 3 );
var spanBefore = document.createElement( 'span' );
var spanMatch = document.createElement( 'strong' );
var spanAfter = document.createElement( 'span' );
var text = company[ k2 ];
var words = [];
var currentWord = '';
var i, inWord, c;
for ( i = pos[ 0 ] - 1; i >= 0; i-- ) {
c = text.substring( i, i + 1 );
inWord = /\S/.test( c );
if ( inWord ) {
currentWord = c + currentWord;
}
if ( ( ! inWord || i === 0 ) && currentWord ) {
words.unshift( currentWord );
currentWord = '';
if ( words.length === contextWords + 1 ) {
words[ 0 ] = '\u2026';
break;
}
}
}
spanBefore.innerText = (
( window.innerWidth > 600 ? searchData.headings[ k2 ] + ': ' : '' )
+ words.join( ' ' )
+ ' '
);
spanMatch.innerText = text.substring( pos[ 0 ], pos[ 0 ] + pos[ 1 ] );
words = [];
currentWord = '';
for ( i = pos[ 0 ] + pos[ 1 ] + 1; i < text.length; i++ ) {
c = text.substring( i, i + 1 );
inWord = /\S/.test( c );
if ( inWord ) {
currentWord += c;
}
if ( ( ! inWord || i === text.length - 1 ) && currentWord ) {
words.push( currentWord );
currentWord = '';
if ( words.length === contextWords + 1 ) {
words[ contextWords ] = '\u2026';
break;
}
}
}
spanAfter.innerText = ' ' + words.join( ' ' );
rowMatchCell.appendChild( spanBefore );
rowMatchCell.appendChild( spanMatch );
rowMatchCell.appendChild( spanAfter );
rowMatch.appendChild( rowMatchCell );
row.parentNode.insertBefore( rowMatch, row.nextSibling );
}
} );
}
searchInput.addEventListener( 'focus', function() {
if ( searchData || searchLoading ) {
return;
}
searchLoading = true;
var searchLoadingText = 'Loading search data...';
searchStatus.innerHTML = searchLoadingText;
var xhr = new XMLHttpRequest();
xhr.open( 'GET', searchIndexFilename );
xhr.onprogress = function( e ) {
searchStatus.innerHTML = (
searchLoadingText
+ ' '
+ Math.round( 100 * e.loaded / e.total )
+ '%'
);
};
xhr.onload = function() {
searchLoading = false;
if ( xhr.status === 200 ) {
searchData = JSON.parse( xhr.response );
searchIndex = lunr.Index.load( searchData.index );
updateSearch();
} else {
searchStatus.innerHTML = 'Error!';
}
};
xhr.send();
} );
searchInput.addEventListener( 'keyup', function() {
if ( updateTimeout ) {
clearTimeout( updateTimeout );
}
updateTimeout = setTimeout( updateSearch, 450 );
} );
document.body.setAttribute(
'class',
document.body.getAttribute( 'class' ) + ' filters-enabled'
document.body.getAttribute( 'class' ) + ' search-enabled'
);
}
document.addEventListener( 'DOMContentLoaded', function( event ) {
setupFilters();
setupSearch();
} );

View file

@ -46,10 +46,10 @@ h1.company-name {
}
/**
* Styles for the companies table and filters on the main page
* Styles for the companies table and search on the main page
*/
#company-filter {
#search-input {
margin: 0 0 0 16px;
padding: 8px;
font-family: "Source Sans Pro", sans-serif;
@ -58,60 +58,46 @@ h1.company-name {
vertical-align: middle;
}
#filters-explanation {
#search-status {
margin-left: 18px;
font-size: 15px;
font-weight: normal;
}
#search-explanation {
font-style: italic;
font-size: 15px;
}
@media screen and (min-width: 50em) {
#filters-explanation {
#search-explanation {
font-size: 16px;
}
}
body.filters-enabled table#companies-table th {
padding: 0;
table#companies-table th {
border-bottom: 2px solid #eee;
line-height: 1;
padding: 9px 6px;
}
table#companies-table button.sort {
width: 100%;
table#companies-table td {
border-width: 0;
border-radius: 0;
padding: 7px 3px 4px;
text-align: left;
font-weight: 700;
color: #666;
outline: none;
}
table#companies-table button.sort:hover,
table#companies-table button.sort:focus {
color: #c61610;
}
table#companies-table button.sort:hover {
background: #f4f4f4;
line-height: 1;
}
/* Sort indicators adapted from http://listjs.com/examples/table/ */
table#companies-table .sort.asc:after,
table#companies-table .sort.desc:after {
width: 0;
height: 0;
border-left: 6px solid transparent;
border-right: 6px solid transparent;
content: '';
display: inline-block;
position: relative;
left: 3px;
top: -2px;
table#companies-table tr.company-row td {
border-top: 1px solid #eee;
padding: 9px 6px;
}
table#companies-table tr.company-row.has-match td {
padding-bottom: 3px;
}
table#companies-table .sort.asc:after {
border-bottom: 6px solid;
}
table#companies-table .sort.desc:after {
border-top: 6px solid;
table#companies-table tr.company-match td {
padding: 0 0 9px 18px;
font-size: 81%;
font-style: italic;
}
/* Column-specific styles */
@ -132,16 +118,21 @@ table#companies-table td.company-region {
/* Mobile-friendly display */
@media screen and (max-width: 600px) {
body.filters-enabled h2#companies {
margin-bottom: 18px;
body.search-enabled h2#companies {
margin-bottom: 12px;
}
#company-filter {
#search-input {
display: block;
width: 100%;
margin: 27px 0 0 0;
}
#search-status {
font-size: 13.5px;
margin-left: 0;
}
table#companies-table,
table#companies-table thead,
table#companies-table tbody,
@ -149,33 +140,31 @@ table#companies-table td.company-region {
display: block;
}
table#companies-table tr {
border-bottom: 1px solid #eee;
padding: 0 0 8px 12px;
table#companies-table tr.company-row {
border-top: 1px solid #eee;
padding: 7.5px 0 6px 12px;
line-height: 1.2;
}
table#companies-table tr.company-row.has-match {
padding-bottom: 3px;
}
table#companies-table button.sort {
width: auto;
padding-left: 6px;
padding-right: 6px;
}
table#companies-table .sort.asc:after,
table#companies-table .sort.desc:after {
margin-right: 2px;
table#companies-table tr.company-match td {
padding-left: 12px;
font-size: 90%;
}
table#companies-table thead tr {
border-bottom-width: 3px;
border-bottom: 2px solid #eee;
padding: 0;
}
table#companies-table th,
table#companies-table td {
table#companies-table tr.company-row td,
table#companies-table tr.company-row.has-match td {
width: auto !important;
padding-left: 0;
padding-right: 0;
border-bottom-width: 0;
padding: 0;
border-width: 0;
}
table#companies-table th {
@ -191,11 +180,12 @@ table#companies-table td.company-region {
display: none;
}
table#companies-table td.company-name {
table#companies-table tr.company-row td.company-name,
table#companies-table tr.company-row.has-match td.company-name {
display: flex;
font-size: 16px;
font-weight: 700;
padding-bottom: 0;
padding-bottom: 3px;
margin-left: -12px;
}

View file

@ -11,6 +11,9 @@
{%- for script in scripts %}
<script type="text/javascript" src="{{ script.url }}"></script>
{%- endfor %}
{%- for src in inlineScripts %}
<script type="text/javascript">{{ src|safe }}</script>
{%- endfor %}
<link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="192x192" href="/favicon-192x192.png">
<link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">

View file

@ -12,37 +12,37 @@ website links.</p>
<th>Region</th>
</tr>
</thead>
<tbody><tr>
<tbody><tr class="company-row" id="company-row-0">
<td class="company-name"><a href="/and-yet/">&amp;yet</a></td>
<td class="company-website"><a href="https://andyet.com" target="_blank" rel="noopener noreferrer">andyet.com</a></td>
<td class="company-region">Worldwide</td>
</tr>
<tr>
<tr class="company-row" id="company-row-1">
<td class="company-name"><a href="/10up/">10up</a></td>
<td class="company-website"><a href="https://10up.com/" target="_blank" rel="noopener noreferrer">10up.com</a></td>
<td class="company-region">Worldwide</td>
</tr>
<tr>
<tr class="company-row" id="company-row-2">
<td class="company-name"><a href="/17hats/">17hats</a></td>
<td class="company-website"><a href="https://www.17hats.com/" target="_blank" rel="noopener noreferrer">17hats.com</a></td>
<td class="company-region">Worldwide</td>
</tr>
<tr>
<tr class="company-row" id="company-row-3">
<td class="company-name"><a href="/18f/">18F</a></td>
<td class="company-website"><a href="https://18f.gsa.gov/" target="_blank" rel="noopener noreferrer">18f.gsa.gov</a></td>
<td class="company-region">USA</td>
</tr>
<tr>
<tr class="company-row" id="company-row-4">
<td class="company-name"><a href="/45royale/">45royale</a> &#x26A0;</td>
<td class="company-website"><a href="http://45royale.com/" target="_blank" rel="noopener noreferrer">45royale.com</a></td>
<td class="company-region"></td>
</tr>
<tr>
<tr class="company-row" id="company-row-5">
<td class="company-name"><a href="/aerolab/">Aerolab</a> &#x26A0;</td>
<td class="company-website"><a href="https://aerolab.co/" target="_blank" rel="noopener noreferrer">aerolab.co</a></td>
<td class="company-region"></td>
</tr>
<tr>
<tr class="company-row" id="company-row-6">
<td class="company-name"><a href="/angularclass/">AngularClass</a> &#x26A0;</td>
<td class="company-website"><a href="http://www.wikihow.com/wikiHow:About-wikiHow" target="_blank" rel="noopener noreferrer">wikihow.com/wikiHow:About-wikiHow</a></td>
<td class="company-region">PST Timezone</td>