Fix formatting changes previously made

This commit is contained in:
Sandy McFadden 2022-02-04 21:24:39 -04:00
parent 745db9c291
commit cce5c06fae

View file

@ -1,71 +1,78 @@
#!/usr/bin/env node
const fs = require("fs");
const path = require("path");
const util = require("util");
const fs = require( 'fs' );
const path = require( 'path' );
const util = require( 'util' );
const cheerio = require( 'cheerio' );
const lunr = require( 'lunr' );
const marked = require( 'marked' );
const cheerio = require("cheerio");
const lunr = require("lunr");
const marked = require("marked");
/**
* Constants
*/
const headingsRequired = ["Company blurb"];
const headingsRequired = [
'Company blurb',
];
const headingsOptional = [
"Company size",
"Remote status",
"Region",
"Company technologies",
"Office locations",
"How to apply",
'Company size',
'Remote status',
'Region',
'Company technologies',
'Office locations',
'How to apply',
];
const headingsAll = headingsRequired.concat( headingsOptional );
/**
* Utility functions
*/
function companyNameToProfileFilename( companyName ) {
return companyName
.toLowerCase()
.replace(/&/g, " and ")
.replace(/'/g, "")
.replace(/[^a-z0-9]+/gi, "-")
.replace(/^-|-$/g, "");
return companyName.toLowerCase()
.replace( /&/g, ' and ' )
.replace( /'/g, '' )
.replace( /[^a-z0-9]+/gi, '-' )
.replace( /^-|-$/g, '' );
}
exports.companyNameToProfileFilename = companyNameToProfileFilename;
// adapted from https://gist.github.com/RandomEtc/2657669
function jsonStringifyUnicodeEscaped( obj ) {
return JSON.stringify(obj).replace(/[\u007f-\uffff]/g, (c) => {
return "\\u" + ("0000" + c.charCodeAt(0).toString(16)).slice(-4);
return JSON.stringify( obj ).replace( /[\u007f-\uffff]/g, c => {
return '\\u' + ( '0000' + c.charCodeAt( 0 ).toString( 16 ) ).slice( -4 );
} );
}
exports.jsonStringifyUnicodeEscaped = jsonStringifyUnicodeEscaped;
function toIdentifierCase( text ) {
return text
.replace(/'/g, "")
.replace(/[^a-z0-9]+/gi, " ")
.replace( /'/g, '' )
.replace( /[^a-z0-9]+/gi, ' ' )
.trim()
.split( /\s+/ )
.map( ( word, i ) => {
if ( i === 0 ) {
return word.toLowerCase();
}
return word.substr(0, 1).toUpperCase() + word.substr(1).toLowerCase();
return (
word.substr( 0, 1 ).toUpperCase()
+ word.substr( 1 ).toLowerCase()
);
} )
.join("");
.join( '' );
}
exports.toIdentifierCase = toIdentifierCase;
function stripExtraChars( text ) {
return text.replace(/\ufe0f/g, "");
return text.replace( /\ufe0f/g, '' );
}
exports.stripExtraChars = stripExtraChars;
/**
* Other exports
*/
@ -77,13 +84,14 @@ function getHeadingPropertyNames() {
}
exports.headingPropertyNames = getHeadingPropertyNames();
/**
* The main exported function
*
* Start with a directory including a README.md and company-profiles/*.md
* files, and validate and parse the content of the Markdown files.
*/
exports.parseFromDirectory = (contentPath) => {
exports.parseFromDirectory = contentPath => {
const companyNamesSeen = {};
let errors = [];
@ -95,28 +103,29 @@ exports.parseFromDirectory = (contentPath) => {
}
// Build list of Markdown files containing company profiles.
const profilesPath = path.join(contentPath, "company-profiles");
const profilesPath = path.join( contentPath, 'company-profiles' );
const profileFilenames = fs.readdirSync( profilesPath );
// Scan the company table in the readme.
const readmeCompanies = [];
const readmeMarkdown = stripExtraChars(
fs.readFileSync(path.join(contentPath, "README.md"), "utf8")
);
const readmeMarkdown = stripExtraChars( fs.readFileSync(
path.join( contentPath, 'README.md' ),
'utf8'
) );
let inTable = false;
readmeMarkdown.split("\n").forEach((line) => {
readmeMarkdown.split( '\n' ).forEach( line => {
if ( /^\s*-+\s*\|\s*-+\s*\|\s*-+\s*$/.test( line ) ) {
inTable = true;
} else if ( /^\s*$/.test( line ) ) {
inTable = false;
} else if ( inTable ) {
const fields = line.split("|");
const fields = line.split( '|' );
if ( fields.length !== 3 ) {
readmeError(
"Expected 3 table cells but found %d: %s",
'Expected 3 table cells but found %d: %s',
fields.length,
line
);
@ -127,38 +136,34 @@ exports.parseFromDirectory = (contentPath) => {
const $ = cheerio.load( marked.parse( readmeMarkdown ) );
function readmeError( msg, ...params ) {
error("README.md", msg, ...params);
error( 'README.md', msg, ...params );
}
const mainUrl = "remoteintech.company";
const mainUrl = 'remoteintech.company'
function addTargetBlankAndExternalLinkIcons (el) {
if (el.type === "tag") {
const anchorTagElements = el.children.filter(
(element) => element.name === "a"
);
if (el.type === 'tag') {
const anchorTagElements = el.children.filter(element => element.name === 'a')
if (anchorTagElements.length > 0) {
anchorTagElements.forEach((element) => {
const url = element.attribs.href;
const urlInfo = getUrlInfo(url);
anchorTagElements.forEach(element => {
const url = element.attribs.href
const urlInfo = getUrlInfo(url)
if (urlInfo.is_email || urlInfo.is_internal) {
return;
return
}
element.attribs.target = "_blank";
element.attribs.target = '_blank'
$element = $(element);
$element.append(
'<span style="vertical-align: text-top;"> <img src="/assets/external-link.svg" /> </span>'
);
});
$element = $( element )
$element.append('<span style="vertical-align: text-top;"> <img src="/assets/external-link.svg" /> </span>')
})
}
if (el.children && el.children.length) {
el.children.forEach((element) => {
addTargetBlankAndExternalLinkIcons(element);
});
el.children.forEach(element => {
addTargetBlankAndExternalLinkIcons(element)
})
}
}
}
@ -168,25 +173,24 @@ exports.parseFromDirectory = (contentPath) => {
* @param {*} url
*/
function getUrlInfo (url) {
const data = {};
const data = {}
if (url.match(/^mailto:/)) {
// checking url email or not
data.is_email = true;
return data;
if (url.match(/^mailto:/)) { // checking url email or not
data.is_email = true
return data
}
const mainDomainFromGivenUrl = extractMainDomainFromUrl(url);
const mainDomainFromGivenUrl = extractMainDomainFromUrl(url)
// checking url is email or not
if (mainDomainFromGivenUrl !== mainUrl) {
data.is_internal = false;
return data;
data.is_internal = false
return data
} else {
data.is_internal = true;
data.is_internal = true
}
return data;
return data
}
/**
@ -196,47 +200,38 @@ exports.parseFromDirectory = (contentPath) => {
function extractMainDomainFromUrl (url) {
const domainRe = /(https?:\/\/){0,1}((?:[\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls
const data = domainRe.exec(url);
const data = domainRe.exec(url)
const splittedDomain = data[2].split(".");
const splittedDomain = data[2].split('.')
if (splittedDomain.length === 2) {
// check extra subdomain is present or not
return data[2];
if (splittedDomain.length === 2) { // check extra subdomain is present or not
return data[2]
}
return (
splittedDomain[splittedDomain.length - 2] +
"." +
splittedDomain[splittedDomain.length - 1]
); // return only main domain address
return splittedDomain[splittedDomain.length - 2] + '.' + splittedDomain[splittedDomain.length - 1] // return only main domain address
}
let lastCompanyName = null;
$("tr").each((i, tr) => {
$( 'tr' ).each( ( i, tr ) => {
const $tr = $( tr );
if ( i === 0 ) {
// Assign an ID to the table.
$tr.closest("table").attr("id", "companies-table");
$tr.closest( 'table' ).attr( 'id', 'companies-table' );
// Skip the table header row.
return;
}
const $td = $tr.children("td");
const $td = $tr.children( 'td' );
const websiteUrl = $td.eq( 1 ).text();
const websiteText = websiteUrl
.replace(/^https?:\/\//, "")
.replace(/^www\./, "")
.replace(/\/$/, "");
.replace( /^https?:\/\//, '' )
.replace( /^www\./, '' )
.replace( /\/$/, '' );
const readmeEntry = {
// Strip out warning emoji indicating that this profile is incomplete
name: $td
.eq(0)
.text()
.replace(/\u26a0/, "")
.trim(),
name: $td.eq( 0 ).text().replace( /\u26a0/, '' ).trim(),
// Detect warning emoji next to company name
isIncomplete: /\u26a0/.test( $td.eq( 0 ).text() ),
websiteUrl,
@ -245,32 +240,41 @@ exports.parseFromDirectory = (contentPath) => {
};
if ( ! websiteText ) {
readmeError("Missing website for company: %s", readmeEntry.name);
readmeError(
'Missing website for company: %s',
readmeEntry.name
);
}
if ( readmeEntry.name ) {
if ( companyNamesSeen[ readmeEntry.name.toLowerCase() ] ) {
readmeError("Duplicate company: %s", readmeEntry.name);
readmeError(
'Duplicate company: %s',
readmeEntry.name
);
}
companyNamesSeen[ readmeEntry.name.toLowerCase() ] = true;
} else {
readmeError("Missing company name: %s", $tr.html().replace(/\n/g, ""));
readmeError(
'Missing company name: %s',
$tr.html().replace( /\n/g, '' )
);
}
if (
$td.eq( 1 ).children().length !== 1 ||
!$td.eq(1).children().eq(0).is("a")
! $td.eq( 1 ).children().eq( 0 ).is( 'a' )
) {
readmeError(
"Invalid content in Website column: %s",
$tr.html().replace(/\n/g, "")
'Invalid content in Website column: %s',
$tr.html().replace( /\n/g, '' )
);
}
if ( $td.eq( 2 ).children().length > 0 ) {
readmeError(
"Extra content in Region column: %s",
$tr.html().replace(/\n/g, "")
'Extra content in Region column: %s',
$tr.html().replace( /\n/g, '' )
);
}
@ -286,12 +290,10 @@ exports.parseFromDirectory = (contentPath) => {
}
lastCompanyName = readmeEntry.name;
const $profileLink = $td.eq(0).find("a");
const $profileLink = $td.eq( 0 ).find( 'a' );
if ( $profileLink.length === 1 ) {
const match = $profileLink
.attr("href")
.match(/^\/company-profiles\/(.*\.md)$/);
const match = $profileLink.attr( 'href' ).match( /^\/company-profiles\/(.*\.md)$/ );
if ( match ) {
readmeEntry.linkedFilename = match[ 1 ];
@ -299,14 +301,14 @@ exports.parseFromDirectory = (contentPath) => {
readmeError(
'Missing company profile for "%s", or broken link: "%s"',
readmeEntry.name,
$profileLink.attr("href")
$profileLink.attr( 'href' )
);
}
const nameCheck = $profileLink.text().trim();
if ( nameCheck !== readmeEntry.name ) {
readmeError(
"Extra text in company name: %s, %s",
'Extra text in company name: %s, %s',
jsonStringifyUnicodeEscaped( nameCheck ),
jsonStringifyUnicodeEscaped( readmeEntry.name )
);
@ -315,7 +317,7 @@ exports.parseFromDirectory = (contentPath) => {
readmeError(
'Invalid link to company profile for "%s": "%s"',
readmeEntry.name,
$profileLink.attr("href")
$profileLink.attr( 'href' )
);
}
} else {
@ -327,78 +329,84 @@ exports.parseFromDirectory = (contentPath) => {
}
// Set identifying attributes on table elements
$tr.attr("class", "company-row").attr("id", "company-row-" + (i - 1));
$td.eq(0).attr("class", "company-name");
$td.eq(1).attr("class", "company-website");
$td.eq(2).attr("class", "company-region");
$tr
.attr( 'class', 'company-row' )
.attr( 'id', 'company-row-' + ( i - 1 ) );
$td.eq( 0 ).attr( 'class', 'company-name' );
$td.eq( 1 ).attr( 'class', 'company-website' );
$td.eq( 2 ).attr( 'class', 'company-region' );
// Rewrite company profile link to the correct URL for the static site
if ( $profileLink.length ) {
$profileLink.attr(
"href",
$profileLink
.attr("href")
.replace(/^\/company-profiles\//, "/")
.replace(/\.md$/, "/")
'href',
$profileLink.attr( 'href' )
.replace( /^\/company-profiles\//, '/' )
.replace( /\.md$/, '/' )
);
}
// Rewrite external website link (target="_blank" etc, shorter text)
const $websiteLink = $td.eq( 1 ).children().eq( 0 );
$websiteLink
.attr("target", "_blank")
.attr("rel", "noopener noreferrer")
.attr( 'target', '_blank' )
.attr( 'rel', 'noopener noreferrer' )
.text( websiteText );
readmeCompanies.push( readmeEntry );
} );
const readmeContent = $("body").html();
const readmeContent = $( 'body' ).html();
// Scan the individual Markdown files containing the company profiles.
const allProfileHeadings = {};
profileFilenames.forEach((filename) => {
profileFilenames.forEach( filename => {
function profileError( msg, ...params ) {
error( filename, msg, ...params );
}
const profileMarkdown = stripExtraChars(
fs.readFileSync(path.join(profilesPath, filename), "utf8")
);
const profileMarkdown = stripExtraChars( fs.readFileSync(
path.join( profilesPath, filename ),
'utf8'
) );
const $ = cheerio.load( marked.parse( profileMarkdown ) );
let hasTitleError = false;
if ($("h1").length !== 1) {
if ( $( 'h1' ).length !== 1 ) {
profileError(
"Expected 1 first-level heading but found %d",
$("h1").length
'Expected 1 first-level heading but found %d',
$( 'h1' ).length
);
hasTitleError = true;
}
if (!$("h1").parent().is("body")) {
profileError("The main title is wrapped inside of another element.");
if ( ! $( 'h1' ).parent().is( 'body' ) ) {
profileError(
'The main title is wrapped inside of another element.'
);
}
const companyName = $("h1").text();
const companyName = $( 'h1' ).text();
if ( ! /[a-z]/i.test( companyName ) ) {
profileError('Company name looks wrong: "%s"', companyName);
profileError(
'Company name looks wrong: "%s"',
companyName
);
hasTitleError = true;
}
const filenameBase = filename.replace(/\.md$/, "");
const filenameBase = filename.replace( /\.md$/, '' );
const filenameExpected = companyNameToProfileFilename( companyName );
if (
! hasTitleError &&
filenameBase !== filenameExpected &&
// Some profile files just have shorter names than the company name,
// which is fine.
filenameExpected.substring(0, filenameBase.length + 1) !==
filenameBase + "-"
filenameExpected.substring( 0, filenameBase.length + 1 ) !== filenameBase + '-'
) {
profileError(
'Company title "%s" doesn\'t match filename (expected ~ "%s.md")',
@ -408,21 +416,21 @@ exports.parseFromDirectory = (contentPath) => {
}
const readmeEntry = readmeCompanies.find(
(readmeEntry) => readmeEntry.linkedFilename === filename
readmeEntry => readmeEntry.linkedFilename === filename
);
if (filename !== "example.md" && !readmeEntry) {
profileError("No link to company profile from readme");
if ( filename !== 'example.md' && ! readmeEntry ) {
profileError( 'No link to company profile from readme' );
}
// Build and validate list of headings contained in this Markdown profile.
const profileHeadings = [];
$("h2").each((i, el) => {
$( 'h2' ).each( ( i, el ) => {
const headingName = $( el ).html();
if (!$(el).parent().is("body")) {
if ( ! $( el ).parent().is( 'body' ) ) {
profileError(
'The section heading for "%s" is wrapped inside of another element.',
headingName
@ -430,7 +438,10 @@ exports.parseFromDirectory = (contentPath) => {
}
if ( profileHeadings.indexOf( headingName ) >= 0 ) {
profileError('Duplicate section: "%s".', headingName);
profileError(
'Duplicate section: "%s".',
headingName
);
} else {
// Track headings for this profile
profileHeadings.push( headingName );
@ -451,9 +462,12 @@ exports.parseFromDirectory = (contentPath) => {
}
} );
headingsRequired.forEach((headingName) => {
headingsRequired.forEach( headingName => {
if ( profileHeadings.indexOf( headingName ) === -1 ) {
profileError('Required section "%s" not found.', headingName);
profileError(
'Required section "%s" not found.',
headingName
);
}
} );
@ -465,40 +479,37 @@ exports.parseFromDirectory = (contentPath) => {
}
let currentHeading = null;
$("body")
.children()
.each((i, el) => {
$( 'body' ).children().each( ( i, el ) => {
const $el = $( el );
if ($el.is("h1")) {
if ( $el.is( 'h1' ) ) {
return;
}
if ($el.is("h2")) {
if ( $el.is( 'h2' ) ) {
currentHeading = $el.html();
profileContent[currentHeading] = "";
profileContent[ currentHeading ] = '';
} else if ( currentHeading ) {
// Note: This assumes that the only possible children of the
// 'body' are block-level elements. I think this is correct,
// because from what I've seen, any inline content is wrapped
// in a <p>.
addTargetBlankAndExternalLinkIcons(el);
addTargetBlankAndExternalLinkIcons(el)
profileContent[ currentHeading ] = (
profileContent[currentHeading] +
"\n\n" +
$.html(el)
profileContent[ currentHeading ]
+ '\n\n' + $.html( el )
).trim();
} else {
profileError(
"Content is not part of any section: %s",
$.html(el).replace(/\n/g, "")
'Content is not part of any section: %s',
$.html( el ).replace( /\n/g, '' )
);
}
} );
Object.keys(profileContent).forEach((heading) => {
Object.keys( profileContent ).forEach( heading => {
const sectionText = profileContent[ heading ]
.replace(/<[^>]+>/g, "")
.replace( /<[^>]+>/g, '' )
.trim();
if ( ! sectionText ) {
profileError(
@ -509,7 +520,7 @@ exports.parseFromDirectory = (contentPath) => {
} );
// Rewrite profile content to use more code-friendly heading names.
Object.keys(profileContent).forEach((headingName) => {
Object.keys( profileContent ).forEach( headingName => {
const headingIdentifier = toIdentifierCase( headingName );
profileContent[ headingIdentifier ] = profileContent[ headingName ];
delete profileContent[ headingName ];
@ -520,29 +531,27 @@ exports.parseFromDirectory = (contentPath) => {
// mark was left in the readme, or vice versa.
const isIncomplete = {
readme: readmeEntry.isIncomplete,
sections:
sections: (
profileHeadings.length === 1 &&
profileHeadings[0] === "Company blurb",
profileHeadings[ 0 ] === 'Company blurb'
),
content: /&#x26A0;/.test( profileContent.companyBlurb ),
};
const incompleteCount = Object.values(isIncomplete).reduce(
(sum, v) => sum + (v ? 1 : 0),
0
);
const incompleteCount = Object.values( isIncomplete )
.reduce( ( sum, v ) => sum + ( v ? 1 : 0 ), 0 );
// incompleteCount === 0: Profile is incomplete; all 3 indicators are consistent
// incompleteCount === 3: Profile is "complete"; all 3 indicators are consistent
if ( incompleteCount === 1 ) {
if ( isIncomplete.readme ) {
profileError(
"Profile looks complete, but the main readme contains a warning emoji."
'Profile looks complete, but the main readme contains a warning emoji.'
);
} else if ( isIncomplete.sections ) {
profileError(
'Profile is marked as complete, but it only contains a "Company blurb" heading.'
);
} else {
// isIncomplete.content
)
} else { // isIncomplete.content
profileError(
'Profile looks complete, but the "Company blurb" contains a warning emoji.'
);
@ -550,15 +559,14 @@ exports.parseFromDirectory = (contentPath) => {
} else if ( incompleteCount === 2 ) {
if ( ! isIncomplete.readme ) {
profileError(
"Profile looks incomplete, but the main readme does not contain a warning emoji."
'Profile looks incomplete, but the main readme does not contain a warning emoji.'
);
} else if ( ! isIncomplete.sections ) {
profileError(
"Profile is marked as incomplete, but it contains multiple sections." +
'\nPlease remove the warning emoji from the "Company blurb" section and the main readme.'
);
} else {
// ! isIncomplete.content
'Profile is marked as incomplete, but it contains multiple sections.'
+ '\nPlease remove the warning emoji from the "Company blurb" section and the main readme.'
)
} else { // ! isIncomplete.content
profileError(
'Profile looks incomplete, but the "Company blurb" does not contain a warning emoji.'
);
@ -568,7 +576,7 @@ exports.parseFromDirectory = (contentPath) => {
} );
const profileHeadingCounts = {};
Object.keys(allProfileHeadings).forEach((heading) => {
Object.keys( allProfileHeadings ).forEach( heading => {
profileHeadingCounts[ heading ] = allProfileHeadings[ heading ].length;
} );
@ -578,7 +586,7 @@ exports.parseFromDirectory = (contentPath) => {
errors,
profileFilenames,
profileHeadingCounts,
};
}
}
return {
@ -593,7 +601,7 @@ exports.parseFromDirectory = (contentPath) => {
/**
* Build search index data from the result of parseFromDirectory().
*/
exports.buildSearchData = (data) => {
exports.buildSearchData = data => {
const textData = [];
data.companies.forEach( ( company, i ) => {
@ -607,13 +615,11 @@ exports.buildSearchData = (data) => {
thisTextData.shortRegion = company.shortRegion;
}
Object.keys(exports.headingPropertyNames).forEach((h) => {
Object.keys( exports.headingPropertyNames ).forEach( h => {
if ( company.profileContent[ h ] ) {
const text = cheerio
.load(company.profileContent[h])
.text()
const text = cheerio.load( company.profileContent[ h ] ).text()
// Replace warning emoji with a searchable token
.replace(/\u26a0/, "(_incomplete)");
.replace( /\u26a0/, '(_incomplete)' );
thisTextData[ h ] = text;
}
} );
@ -622,34 +628,34 @@ exports.buildSearchData = (data) => {
} );
const index = lunr( function() {
this.field("nameText");
this.field("websiteText");
this.field("shortRegion");
this.field( 'nameText' );
this.field( 'websiteText' );
this.field( 'shortRegion' );
Object.keys(exports.headingPropertyNames).forEach((h) => {
Object.keys( exports.headingPropertyNames ).forEach( h => {
this.field( h );
} );
// https://github.com/olivernn/lunr.js/issues/25#issuecomment-623267494
this.metadataWhitelist = ["position"];
this.metadataWhitelist = ['position'];
// https://github.com/olivernn/lunr.js/issues/192#issuecomment-172915226
// https://gist.github.com/olivernn/7cd496f8654a0246c53c
function contractionTrimmer( token ) {
return token.update((str) => {
return str.replace(/('m|'ve|n't|'d|'ll|'ve|'s|'re)$/, "");
return token.update( str => {
return str.replace( /('m|'ve|n't|'d|'ll|'ve|'s|'re)$/, '' );
} );
}
lunr.Pipeline.registerFunction(contractionTrimmer, "contractionTrimmer");
lunr.Pipeline.registerFunction( contractionTrimmer, 'contractionTrimmer' );
this.pipeline.after( lunr.trimmer, contractionTrimmer );
Object.keys(textData).forEach((c) => this.add(textData[c]));
Object.keys( textData ).forEach( c => this.add( textData[ c ] ) );
} );
const headings = getHeadingPropertyNames();
headings.nameText = "Company name";
headings.websiteText = "Website";
headings.shortRegion = "Region";
headings.nameText = 'Company name';
headings.websiteText = 'Website';
headings.shortRegion = 'Region';
return { index, textData, headings };
};