remote-jobs/bin/build-site.js

300 lines
8.8 KiB
JavaScript
Raw Normal View History

#!/usr/bin/env node
const fs = require( 'fs' );
const path = require( 'path' );
const util = require( 'util' );
const cheerio = require( 'cheerio' );
const phin = require( 'phin' );
const rimraf = require( 'rimraf' );
const swig = require( 'swig-templates' );
Add full profile search (#763) * Prevent duplicate company names * Fix output indentation * Search full profile content using lunr.js * Remove extra stop words This wasn't really working correctly - the stop word 'work' would leave instances of 'working' and 'works' in the index for example. * Change company name description from "Name" to "Company name" * Pre-process query: - Search for terms in AND mode, per https://lunrjs.com/guides/searching.html#term-presence - Discard non-alphanumeric characters from the search - Better handling of contractions and searching for stop words * Display search query and results in the console * Add special search token: _incomplete * Add a link to search for incomplete profiles * Revert "Add a link to search for incomplete profiles" This reverts commit f6384c90cb1790fdc7492b4e887127812db21f30. * Add link to search documentation * Improve search explanation appearance when it spans multiple lines * Fix searching for contractions Previously, searching for e.g. "don't" wasn't working correctly. After trimming the contraction, "do" is a stop word, so it should be ignored. * Improve "empty search" message * Prefer matches other than "company name" in search excerpts * Move inline scripts before external scripts This probably doesn't matter right now due to the way the scripts are currently structured, but it might matter one day and it's more logical this way. * Fix search engine index progress * Improve script indentation * I got 99 problems and they're all bots * Update script exit code When a Node.js error occurs the exit code is probably going to be 1, so we should use a different code. * Fix the tests * Update documentation This was wrong (out of date), but the correct version is obvious from reading the code. * Make download progress work in both Chrome and Firefox See https://stackoverflow.com/a/32799706
2020-05-06 06:42:21 +00:00
const {
parseFromDirectory,
headingPropertyNames,
buildSearchData,
} = require( '../lib' );
const contentPath = path.join( __dirname, '..' );
const sitePath = path.join( __dirname, '..', 'site' );
const siteBuildPath = path.join( sitePath, 'build' );
// If we are inside the site build path, this is going to cause problems since
// we blow away this directory before regenerating the site
// Error message (in node core): path.js:1086 cwd = process.cwd();
// Error: ENOENT: no such file or directory, uv_cwd
function checkPath( wd ) {
const checkWorkingPath = path.resolve( wd ) + path.sep;
const checkBuildPath = siteBuildPath + path.sep;
if ( checkWorkingPath.substring( 0, checkBuildPath.length ) === checkBuildPath ) {
throw new Error(
"Please change out of the 'site/build' directory before running this script"
);
}
}
checkPath( process.cwd() );
if ( process.env.INIT_CWD ) {
// This script was run via npm; check the original working directory
// because npm barfs in this situation too
checkPath( process.env.INIT_CWD );
}
// Parse the content from the Markdown files
console.log( 'Parsing content' );
const data = parseFromDirectory( contentPath );
// Stop if there were any errors
if ( data.errors && data.errors.length > 0 ) {
data.errors.forEach( err => {
err.message.split( '\n' ).forEach( line => {
console.log( '%s: %s', err.filename, line );
} );
} );
process.exit( 1 );
}
// Otherwise, OK to continue building the static site
const assetCacheBuster = Date.now();
// https://github.com/nodejs/node/issues/17871 :(
process.on( 'unhandledRejection', err => {
console.error( 'Unhandled promise rejection:', err );
process.exit( 1 );
} );
/**
* Perform an HTTP request to a URL and return the request body.
*/
async function request( url ) {
console.log(
'Requesting URL "%s"',
url.length > 70
? url.substring( 0, 67 ) + '...'
: url
);
const res = await phin.promisified( url );
if ( res.statusCode !== 200 ) {
throw new Error(
'HTTP response code ' + res.statusCode
+ ' for URL: ' + url
);
}
return res.body.toString();
}
/**
* Write a file to site/build/assets/ (from memory or from an existing file in
* site/assets/) and include a cache buster in the new name. Return the URL to
* the asset file.
*/
function copyAssetToBuild( filename, content = null, addSuffix = true ) {
let destFilename = filename;
if ( addSuffix ) {
destFilename = destFilename
.replace( /(\.[^.]+)$/, '-' + assetCacheBuster + '$1' );
}
const destPath = path.join( siteBuildPath, 'assets', destFilename );
if ( ! content ) {
const srcPath = path.join( sitePath, 'assets', filename );
content = fs.readFileSync( srcPath );
}
fs.writeFileSync( destPath, content );
return '/assets/' + destFilename;
}
/**
* Return a URL to edit a page on GitHub.
*/
function githubEditUrl( filename ) {
return (
'https://github.com/remoteintech/remote-jobs/edit/main/'
+ filename
);
}
/**
* Write a page's contents to an HTML file.
*/
function writePage( filename, pageContent ) {
filename = path.join( siteBuildPath, filename );
if ( ! fs.existsSync( path.dirname( filename ) ) ) {
fs.mkdirSync( path.dirname( filename ) );
}
fs.writeFileSync( filename, pageContent );
}
/**
* The main function that prepares the static site.
*/
async function buildSite() {
// Load the HTML from the WP.com blog site
const $ = cheerio.load( await request( 'https://blog.remoteintech.company/' ) );
// Load stylesheets from the WP.com blog site
const wpcomStylesheets = $( 'style, link[rel=stylesheet]' ).map( ( i, el ) => {
const $el = $( el );
const stylesheet = {
id: $el.attr( 'id' ) || null,
media: $el.attr( 'media' ) || null,
};
if ( $el.is( 'style' ) ) {
stylesheet.content = $el.html();
} else {
stylesheet.url = $el.attr( 'href' );
2019-10-21 06:55:40 +00:00
if ( /^\/\//.test( stylesheet.url ) ) {
stylesheet.url = 'https:' + stylesheet.url;
}
}
return stylesheet;
} ).toArray();
// Fetch the contents of stylesheets included via <link> tags
await Promise.all(
wpcomStylesheets.filter( s => !! s.url ).map( stylesheet => {
return request( stylesheet.url ).then( content => {
stylesheet.content = content;
} );
} )
);
// TODO: Most URLs that appear inside these CSS files are broken because
// they refer to relative URLs against s[012].wp.com
const wpcomStylesheetContent = wpcomStylesheets
.filter( stylesheet => !! stylesheet.content.trim() )
.map( stylesheet => {
const lines = [ '/**' ];
const idString = (
stylesheet.id ? ' (id="' + stylesheet.id + '")' : ''
);
if ( stylesheet.url ) {
lines.push( ' * WP.com external style' + idString );
lines.push( ' * ' + stylesheet.url );
} else {
lines.push( ' * WP.com inline style' + idString );
}
lines.push( ' */' );
if ( stylesheet.media && stylesheet.media !== 'all' ) {
lines.push( '@media ' + stylesheet.media + ' {' );
}
lines.push( stylesheet.content.trim() );
if ( stylesheet.media && stylesheet.media !== 'all' ) {
lines.push( '} /* @media ' + stylesheet.media + ' */' );
}
return lines.join( '\n' );
} ).join( '\n\n' ) + '\n';
// Use the emoji code from WP.com
// Most platforms will display emoji natively, but e.g. Linux does not
let wpcomEmojiScript = null;
$( 'script' ).each( ( i, el ) => {
const scriptContents = $( el ).html();
if ( /\bwindow\._wpemojiSettings\s*=\s*{/.test( scriptContents ) ) {
wpcomEmojiScript = scriptContents;
}
} );
// Set up the site build directory (start fresh each time)
rimraf.sync( siteBuildPath );
fs.mkdirSync( siteBuildPath );
fs.mkdirSync( path.join( siteBuildPath, 'assets' ) );
copyAssetToBuild( 'remoteintech.png', null, false );
Feature/added target blank external icon for links (#999) * added external link image and also added to build script * added target blank and external link icons in company page * added external link icon and update indentation * fixed children nodes have a element issue * updated validation html with new external url's * added internal and external url checking * updated test files with email _blank condtion excluded * Updating Upworthy and Intevity (twin technologies) (#973) * adding content for company * removing warning emoji from upworthy * Twin Technologies is Now Intevity * adding location * renaming & updating info * reorganizing * Removing double h * updating dependencies * Update scopic-software.md (#992) Added Office locations to scopic-software.md * Updated details for CoreOS (#1010) * Update README.md with coreos * Update coreos.md * Add Thorn (#1015) * Add Thorn to README * Add thorn.md company profile * Add Thorn remote status detail * Adding designcode (#994) * Update README.md * Create designcode.md * Update designcode.md * Update designcode.md * Update designcode.md * Update README.md * Update designcode.md * Update README.md * Update README.md * Update designcode.md * feature: add MongoDB company profile (#1003) * Adding company profile for Scandit (#1005) * Added profile for Scandit * Added link to the company profile for Scandit * Details company profile Sutherland (#1006) * Add more infos sutherland * Add profile infos for Sutherland * Added Interpersonal Frequency company info (#1013) * Added Interpersonal Frequency company info * Added region for Interpersonal Frequency in README.md * added mozzila (#1014) Co-authored-by: Marija <simicmariya@gmail.com> * Adding Hopper to list (#990) * adding Hopper to readme * adding hopper * fixing line break * fixing line issue in remote status * fixes * removing mozzila * Detail suse profile (#1007) * Update suse.md * Update README.md * Updated reg ex to accept urls without http or https Co-authored-by: Aiden Threadgoode <64971702+a-thread@users.noreply.github.com> Co-authored-by: danishirfannn <73023221+danishirfannn@users.noreply.github.com> Co-authored-by: Aadarsh Baid <baidaadarsh@gmail.com> Co-authored-by: amplifi <amplifi@users.noreply.github.com> Co-authored-by: snehaj27 <69983797+snehaj27@users.noreply.github.com> Co-authored-by: Adrienne Tacke <adriennetacke@users.noreply.github.com> Co-authored-by: Nitin Gupta <gniting@users.noreply.github.com> Co-authored-by: Simon Sassi <dualprodu@gmail.com> Co-authored-by: Oscar Montes <oscmcojc1@live.com.mx> Co-authored-by: mariyasimic <70208378+mariyasimic@users.noreply.github.com> Co-authored-by: Marija <simicmariya@gmail.com> Co-authored-by: ngutierrez31 <ngutierrez131@gmail.com>
2020-10-29 22:45:48 +00:00
copyAssetToBuild( 'external-link.svg', null, false );
// Set up styles/scripts to be included on all pages
const stylesheets = [ {
url: copyAssetToBuild( 'wpcom-blog-styles.css', wpcomStylesheetContent ),
}, {
url: '//fonts.googleapis.com/css?family=Source+Sans+Pro:r%7CSource+Sans+Pro:r,i,b,bi&amp;subset=latin,latin-ext,latin,latin-ext',
}, {
url: copyAssetToBuild( 'site.css' ),
} ];
const scripts = [];
if ( wpcomEmojiScript ) {
scripts.push( {
url: copyAssetToBuild( 'wpcom-emoji.js', wpcomEmojiScript ),
} );
}
// Set up styles/scripts for specific pages
const indexScripts = [ {
Add full profile search (#763) * Prevent duplicate company names * Fix output indentation * Search full profile content using lunr.js * Remove extra stop words This wasn't really working correctly - the stop word 'work' would leave instances of 'working' and 'works' in the index for example. * Change company name description from "Name" to "Company name" * Pre-process query: - Search for terms in AND mode, per https://lunrjs.com/guides/searching.html#term-presence - Discard non-alphanumeric characters from the search - Better handling of contractions and searching for stop words * Display search query and results in the console * Add special search token: _incomplete * Add a link to search for incomplete profiles * Revert "Add a link to search for incomplete profiles" This reverts commit f6384c90cb1790fdc7492b4e887127812db21f30. * Add link to search documentation * Improve search explanation appearance when it spans multiple lines * Fix searching for contractions Previously, searching for e.g. "don't" wasn't working correctly. After trimming the contraction, "do" is a stop word, so it should be ignored. * Improve "empty search" message * Prefer matches other than "company name" in search excerpts * Move inline scripts before external scripts This probably doesn't matter right now due to the way the scripts are currently structured, but it might matter one day and it's more logical this way. * Fix search engine index progress * Improve script indentation * I got 99 problems and they're all bots * Update script exit code When a Node.js error occurs the exit code is probably going to be 1, so we should use a different code. * Fix the tests * Update documentation This was wrong (out of date), but the correct version is obvious from reading the code. * Make download progress work in both Chrome and Firefox See https://stackoverflow.com/a/32799706
2020-05-06 06:42:21 +00:00
url: '//cdnjs.cloudflare.com/ajax/libs/lunr.js/2.3.7/lunr.min.js',
}, {
url: copyAssetToBuild( 'companies-table.js' ),
} ];
2021-10-24 23:58:11 +00:00
const notFoundStyles = [ {
url: copyAssetToBuild( '404.css' )
} ];
2018-12-21 06:18:24 +00:00
// Copy favicon files
console.log( 'Copying favicon files' );
const faviconPath = path.join( sitePath, 'assets', 'favicon-package' );
fs.readdirSync( faviconPath ).forEach( f => {
fs.copyFileSync( path.join( faviconPath, f ), path.join( siteBuildPath, f ) );
} );
Add full profile search (#763) * Prevent duplicate company names * Fix output indentation * Search full profile content using lunr.js * Remove extra stop words This wasn't really working correctly - the stop word 'work' would leave instances of 'working' and 'works' in the index for example. * Change company name description from "Name" to "Company name" * Pre-process query: - Search for terms in AND mode, per https://lunrjs.com/guides/searching.html#term-presence - Discard non-alphanumeric characters from the search - Better handling of contractions and searching for stop words * Display search query and results in the console * Add special search token: _incomplete * Add a link to search for incomplete profiles * Revert "Add a link to search for incomplete profiles" This reverts commit f6384c90cb1790fdc7492b4e887127812db21f30. * Add link to search documentation * Improve search explanation appearance when it spans multiple lines * Fix searching for contractions Previously, searching for e.g. "don't" wasn't working correctly. After trimming the contraction, "do" is a stop word, so it should be ignored. * Improve "empty search" message * Prefer matches other than "company name" in search excerpts * Move inline scripts before external scripts This probably doesn't matter right now due to the way the scripts are currently structured, but it might matter one day and it's more logical this way. * Fix search engine index progress * Improve script indentation * I got 99 problems and they're all bots * Update script exit code When a Node.js error occurs the exit code is probably going to be 1, so we should use a different code. * Fix the tests * Update documentation This was wrong (out of date), but the correct version is obvious from reading the code. * Make download progress work in both Chrome and Firefox See https://stackoverflow.com/a/32799706
2020-05-06 06:42:21 +00:00
// Generate search index
console.log( 'Generating search index' );
const searchIndexData = JSON.stringify( buildSearchData( data ) );
const searchIndexFilename = copyAssetToBuild(
'search.js',
searchIndexData
);
// Generate the index.html file from the main README
// TODO: Build this page and its table dynamically; more filters
const readmeTemplate = swig.compileFile(
path.join( sitePath, 'templates', 'index.html' )
);
console.log( 'Writing main page' );
writePage( 'index.html', readmeTemplate( {
stylesheets,
scripts: scripts.concat( indexScripts ),
Add full profile search (#763) * Prevent duplicate company names * Fix output indentation * Search full profile content using lunr.js * Remove extra stop words This wasn't really working correctly - the stop word 'work' would leave instances of 'working' and 'works' in the index for example. * Change company name description from "Name" to "Company name" * Pre-process query: - Search for terms in AND mode, per https://lunrjs.com/guides/searching.html#term-presence - Discard non-alphanumeric characters from the search - Better handling of contractions and searching for stop words * Display search query and results in the console * Add special search token: _incomplete * Add a link to search for incomplete profiles * Revert "Add a link to search for incomplete profiles" This reverts commit f6384c90cb1790fdc7492b4e887127812db21f30. * Add link to search documentation * Improve search explanation appearance when it spans multiple lines * Fix searching for contractions Previously, searching for e.g. "don't" wasn't working correctly. After trimming the contraction, "do" is a stop word, so it should be ignored. * Improve "empty search" message * Prefer matches other than "company name" in search excerpts * Move inline scripts before external scripts This probably doesn't matter right now due to the way the scripts are currently structured, but it might matter one day and it's more logical this way. * Fix search engine index progress * Improve script indentation * I got 99 problems and they're all bots * Update script exit code When a Node.js error occurs the exit code is probably going to be 1, so we should use a different code. * Fix the tests * Update documentation This was wrong (out of date), but the correct version is obvious from reading the code. * Make download progress work in both Chrome and Firefox See https://stackoverflow.com/a/32799706
2020-05-06 06:42:21 +00:00
inlineScripts: [
'\n\t\tvar searchIndexFilename = ' + JSON.stringify( searchIndexFilename ) + ';'
+ '\n\t\tvar searchIndexSize = ' + JSON.stringify( searchIndexData.length ) + ';'
+ '\n\t\t',
],
pageContent: data.readmeContent,
editUrl: githubEditUrl( 'README.md' ),
} ) );
// Generate the page for each company
const companyTemplate = swig.compileFile(
path.join( sitePath, 'templates', 'company.html' )
);
process.stdout.write( 'Writing company pages..' );
data.companies.forEach( ( company, i ) => {
const dirname = company.linkedFilename.replace( /\.md$/, '' );
const missingHeadings = Object.keys( headingPropertyNames )
.filter( h => ! company.profileContent[ h ] );
writePage( path.join( dirname, 'index.html' ), companyTemplate( {
stylesheets,
scripts,
Add full profile search (#763) * Prevent duplicate company names * Fix output indentation * Search full profile content using lunr.js * Remove extra stop words This wasn't really working correctly - the stop word 'work' would leave instances of 'working' and 'works' in the index for example. * Change company name description from "Name" to "Company name" * Pre-process query: - Search for terms in AND mode, per https://lunrjs.com/guides/searching.html#term-presence - Discard non-alphanumeric characters from the search - Better handling of contractions and searching for stop words * Display search query and results in the console * Add special search token: _incomplete * Add a link to search for incomplete profiles * Revert "Add a link to search for incomplete profiles" This reverts commit f6384c90cb1790fdc7492b4e887127812db21f30. * Add link to search documentation * Improve search explanation appearance when it spans multiple lines * Fix searching for contractions Previously, searching for e.g. "don't" wasn't working correctly. After trimming the contraction, "do" is a stop word, so it should be ignored. * Improve "empty search" message * Prefer matches other than "company name" in search excerpts * Move inline scripts before external scripts This probably doesn't matter right now due to the way the scripts are currently structured, but it might matter one day and it's more logical this way. * Fix search engine index progress * Improve script indentation * I got 99 problems and they're all bots * Update script exit code When a Node.js error occurs the exit code is probably going to be 1, so we should use a different code. * Fix the tests * Update documentation This was wrong (out of date), but the correct version is obvious from reading the code. * Make download progress work in both Chrome and Firefox See https://stackoverflow.com/a/32799706
2020-05-06 06:42:21 +00:00
inlineScripts: [],
company,
headingPropertyNames,
missingHeadings,
editUrl: githubEditUrl( 'company-profiles/' + company.linkedFilename ),
} ) );
if ( i % 10 === 0 ) {
process.stdout.write( '.' );
}
} );
2021-10-13 07:30:12 +00:00
// Generate custom 404 page
console.log();
console.log( 'Writing custom 404 page' );
2021-10-24 23:58:11 +00:00
const notFoundTemplate = swig.compileFile(
path.join( sitePath, 'templates', '404.html' )
);
writePage( '404.html', notFoundTemplate( {
notFoundStyles
} ) );
2021-10-13 07:30:12 +00:00
console.log();
console.log( 'Site files are ready in "site/build/"' );
}
buildSite();