Improve URL handling for external links in index.js

Improved URL handling with error checks for malformed links in `getUrlInfo`, refined domain extraction in `extractMainDomainFromUrl`, and ensured external link attributes apply correctly. Enhanced logging for easier debugging.

Signed-off-by: Muhammad Azeem <37941410+AzeemSup@users.noreply.github.com>
This commit is contained in:
Muhammad Azeem 2024-10-31 15:48:27 +05:00 committed by GitHub
parent 9f13291a72
commit 35cb998ae5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -141,74 +141,78 @@ exports.parseFromDirectory = contentPath => {
const mainUrl = 'remoteintech.company'
function addTargetBlankAndExternalLinkIcons (el) {
if (el.type === 'tag') {
const anchorTagElements = el.children.filter(element => element.name === 'a')
if (anchorTagElements.length > 0) {
anchorTagElements.forEach(element => {
const url = element.attribs.href
const urlInfo = getUrlInfo(url)
if (urlInfo.is_email || urlInfo.is_internal) {
return
}
element.attribs.target = '_blank'
$element = $( element )
$element.append('<span style="vertical-align: text-top;"> <img src="/assets/external-link.svg" /> </span>')
})
}
if (el.children && el.children.length) {
el.children.forEach(element => {
addTargetBlankAndExternalLinkIcons(element)
})
}
}
function addTargetBlankAndExternalLinkIcons(el) {
if (el.type === 'tag') {
const anchorTagElements = el.children.filter(element => element.name === 'a');
anchorTagElements.forEach(element => {
const url = element.attribs.href;
const urlInfo = getUrlInfo(url);
if (url && !urlInfo.is_email && !urlInfo.is_internal) {
element.attribs.target = '_blank';
element.attribs.rel = 'noopener noreferrer';
const $element = $(element);
$element.append('<span style="vertical-align: text-top;"> <img src="/assets/external-link.svg" /> </span>');
}
});
// Recursively process child elements
el.children.forEach(child => addTargetBlankAndExternalLinkIcons(child));
}
}
/**
* Getting info about the url. It includes checking isEmail of isInternal
* @param {*} url
*/
function getUrlInfo (url) {
const data = {}
if (url.match(/^mailto:/)) { // checking url email or not
data.is_email = true
return data
}
const mainDomainFromGivenUrl = extractMainDomainFromUrl(url)
// checking url is email or not
if (mainDomainFromGivenUrl !== mainUrl) {
data.is_internal = false
return data
} else {
data.is_internal = true
}
return data
function getUrlInfo(url) {
const data = {};
if (!url || typeof url !== 'string') {
data.is_email = false;
data.is_internal = false;
return data;
}
if (url.startsWith('mailto:')) {
data.is_email = true;
return data;
}
try {
const mainDomainFromGivenUrl = extractMainDomainFromUrl(url);
data.is_internal = mainDomainFromGivenUrl === mainUrl;
} catch (error) {
console.error("Error processing URL:", url, error);
data.is_internal = false;
}
return data;
}
/**
* Extracting main domain from the url
* @param {*} url
*/
function extractMainDomainFromUrl (url) {
const domainRe = /(https?:\/\/){0,1}((?:[\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls
const data = domainRe.exec(url)
const splittedDomain = data[2].split('.')
if (splittedDomain.length === 2) { // check extra subdomain is present or not
return data[2]
}
return splittedDomain[splittedDomain.length - 2] + '.' + splittedDomain[splittedDomain.length - 1] // return only main domain address
function extractMainDomainFromUrl(url) {
try {
const domainRe = /(https?:\/\/)?(([\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls
const data = domainRe.exec(url);
if (!data || !data[2]) {
console.warn("Invalid URL format:", url);
return ''; // Return empty if domain extraction fails
}
const domainParts = data[2].split('.');
return domainParts.length === 2 ? data[2] :
domainParts.slice(-2).join('.');
} catch (error) {
console.error("Error extracting main domain:", url, error);
return ''; // Safe fallback for unexpected input
}
}
let lastCompanyName = null;