mirror of
https://github.com/remoteintech/remote-jobs
synced 2024-12-26 04:13:10 +00:00
Improve URL handling for external links in index.js
Improved URL handling with error checks for malformed links in `getUrlInfo`, refined domain extraction in `extractMainDomainFromUrl`, and ensured external link attributes apply correctly. Enhanced logging for easier debugging. Signed-off-by: Muhammad Azeem <37941410+AzeemSup@users.noreply.github.com>
This commit is contained in:
parent
9f13291a72
commit
35cb998ae5
1 changed files with 60 additions and 56 deletions
116
lib/index.js
116
lib/index.js
|
@ -141,74 +141,78 @@ exports.parseFromDirectory = contentPath => {
|
|||
|
||||
const mainUrl = 'remoteintech.company'
|
||||
|
||||
function addTargetBlankAndExternalLinkIcons (el) {
|
||||
if (el.type === 'tag') {
|
||||
const anchorTagElements = el.children.filter(element => element.name === 'a')
|
||||
if (anchorTagElements.length > 0) {
|
||||
anchorTagElements.forEach(element => {
|
||||
const url = element.attribs.href
|
||||
const urlInfo = getUrlInfo(url)
|
||||
|
||||
if (urlInfo.is_email || urlInfo.is_internal) {
|
||||
return
|
||||
}
|
||||
|
||||
element.attribs.target = '_blank'
|
||||
|
||||
$element = $( element )
|
||||
$element.append('<span style="vertical-align: text-top;"> <img src="/assets/external-link.svg" /> </span>')
|
||||
})
|
||||
}
|
||||
|
||||
if (el.children && el.children.length) {
|
||||
el.children.forEach(element => {
|
||||
addTargetBlankAndExternalLinkIcons(element)
|
||||
})
|
||||
}
|
||||
}
|
||||
function addTargetBlankAndExternalLinkIcons(el) {
|
||||
if (el.type === 'tag') {
|
||||
const anchorTagElements = el.children.filter(element => element.name === 'a');
|
||||
|
||||
anchorTagElements.forEach(element => {
|
||||
const url = element.attribs.href;
|
||||
const urlInfo = getUrlInfo(url);
|
||||
|
||||
if (url && !urlInfo.is_email && !urlInfo.is_internal) {
|
||||
element.attribs.target = '_blank';
|
||||
element.attribs.rel = 'noopener noreferrer';
|
||||
|
||||
const $element = $(element);
|
||||
$element.append('<span style="vertical-align: text-top;"> <img src="/assets/external-link.svg" /> </span>');
|
||||
}
|
||||
});
|
||||
|
||||
// Recursively process child elements
|
||||
el.children.forEach(child => addTargetBlankAndExternalLinkIcons(child));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Getting info about the url. It includes checking isEmail of isInternal
|
||||
* @param {*} url
|
||||
*/
|
||||
function getUrlInfo (url) {
|
||||
const data = {}
|
||||
|
||||
if (url.match(/^mailto:/)) { // checking url email or not
|
||||
data.is_email = true
|
||||
return data
|
||||
}
|
||||
|
||||
const mainDomainFromGivenUrl = extractMainDomainFromUrl(url)
|
||||
|
||||
// checking url is email or not
|
||||
if (mainDomainFromGivenUrl !== mainUrl) {
|
||||
data.is_internal = false
|
||||
return data
|
||||
} else {
|
||||
data.is_internal = true
|
||||
}
|
||||
|
||||
return data
|
||||
function getUrlInfo(url) {
|
||||
const data = {};
|
||||
|
||||
if (!url || typeof url !== 'string') {
|
||||
data.is_email = false;
|
||||
data.is_internal = false;
|
||||
return data;
|
||||
}
|
||||
|
||||
if (url.startsWith('mailto:')) {
|
||||
data.is_email = true;
|
||||
return data;
|
||||
}
|
||||
|
||||
try {
|
||||
const mainDomainFromGivenUrl = extractMainDomainFromUrl(url);
|
||||
data.is_internal = mainDomainFromGivenUrl === mainUrl;
|
||||
} catch (error) {
|
||||
console.error("Error processing URL:", url, error);
|
||||
data.is_internal = false;
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracting main domain from the url
|
||||
* @param {*} url
|
||||
*/
|
||||
function extractMainDomainFromUrl (url) {
|
||||
const domainRe = /(https?:\/\/){0,1}((?:[\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls
|
||||
|
||||
const data = domainRe.exec(url)
|
||||
|
||||
const splittedDomain = data[2].split('.')
|
||||
|
||||
if (splittedDomain.length === 2) { // check extra subdomain is present or not
|
||||
return data[2]
|
||||
}
|
||||
|
||||
return splittedDomain[splittedDomain.length - 2] + '.' + splittedDomain[splittedDomain.length - 1] // return only main domain address
|
||||
function extractMainDomainFromUrl(url) {
|
||||
try {
|
||||
const domainRe = /(https?:\/\/)?(([\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls
|
||||
const data = domainRe.exec(url);
|
||||
|
||||
if (!data || !data[2]) {
|
||||
console.warn("Invalid URL format:", url);
|
||||
return ''; // Return empty if domain extraction fails
|
||||
}
|
||||
|
||||
const domainParts = data[2].split('.');
|
||||
return domainParts.length === 2 ? data[2] :
|
||||
domainParts.slice(-2).join('.');
|
||||
} catch (error) {
|
||||
console.error("Error extracting main domain:", url, error);
|
||||
return ''; // Safe fallback for unexpected input
|
||||
}
|
||||
}
|
||||
|
||||
let lastCompanyName = null;
|
||||
|
|
Loading…
Reference in a new issue