From 35cb998ae5fb19653c562eaa2185925b120161fc Mon Sep 17 00:00:00 2001 From: Muhammad Azeem <37941410+AzeemSup@users.noreply.github.com> Date: Thu, 31 Oct 2024 15:48:27 +0500 Subject: [PATCH] Improve URL handling for external links in index.js Improved URL handling with error checks for malformed links in `getUrlInfo`, refined domain extraction in `extractMainDomainFromUrl`, and ensured external link attributes apply correctly. Enhanced logging for easier debugging. Signed-off-by: Muhammad Azeem <37941410+AzeemSup@users.noreply.github.com> --- lib/index.js | 116 ++++++++++++++++++++++++++------------------------- 1 file changed, 60 insertions(+), 56 deletions(-) diff --git a/lib/index.js b/lib/index.js index 4faee7a6..814e6faa 100755 --- a/lib/index.js +++ b/lib/index.js @@ -141,74 +141,78 @@ exports.parseFromDirectory = contentPath => { const mainUrl = 'remoteintech.company' - function addTargetBlankAndExternalLinkIcons (el) { - if (el.type === 'tag') { - const anchorTagElements = el.children.filter(element => element.name === 'a') - if (anchorTagElements.length > 0) { - anchorTagElements.forEach(element => { - const url = element.attribs.href - const urlInfo = getUrlInfo(url) - - if (urlInfo.is_email || urlInfo.is_internal) { - return - } - - element.attribs.target = '_blank' - - $element = $( element ) - $element.append(' ') - }) - } - - if (el.children && el.children.length) { - el.children.forEach(element => { - addTargetBlankAndExternalLinkIcons(element) - }) - } - } + function addTargetBlankAndExternalLinkIcons(el) { + if (el.type === 'tag') { + const anchorTagElements = el.children.filter(element => element.name === 'a'); + + anchorTagElements.forEach(element => { + const url = element.attribs.href; + const urlInfo = getUrlInfo(url); + + if (url && !urlInfo.is_email && !urlInfo.is_internal) { + element.attribs.target = '_blank'; + element.attribs.rel = 'noopener noreferrer'; + + const $element = $(element); + $element.append(' '); + } + }); + + // Recursively process child elements + el.children.forEach(child => addTargetBlankAndExternalLinkIcons(child)); + } } /** * Getting info about the url. It includes checking isEmail of isInternal * @param {*} url */ - function getUrlInfo (url) { - const data = {} - - if (url.match(/^mailto:/)) { // checking url email or not - data.is_email = true - return data - } - - const mainDomainFromGivenUrl = extractMainDomainFromUrl(url) - - // checking url is email or not - if (mainDomainFromGivenUrl !== mainUrl) { - data.is_internal = false - return data - } else { - data.is_internal = true - } - - return data + function getUrlInfo(url) { + const data = {}; + + if (!url || typeof url !== 'string') { + data.is_email = false; + data.is_internal = false; + return data; + } + + if (url.startsWith('mailto:')) { + data.is_email = true; + return data; + } + + try { + const mainDomainFromGivenUrl = extractMainDomainFromUrl(url); + data.is_internal = mainDomainFromGivenUrl === mainUrl; + } catch (error) { + console.error("Error processing URL:", url, error); + data.is_internal = false; + } + + return data; } /** * Extracting main domain from the url * @param {*} url */ - function extractMainDomainFromUrl (url) { - const domainRe = /(https?:\/\/){0,1}((?:[\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls - - const data = domainRe.exec(url) - - const splittedDomain = data[2].split('.') - - if (splittedDomain.length === 2) { // check extra subdomain is present or not - return data[2] - } - - return splittedDomain[splittedDomain.length - 2] + '.' + splittedDomain[splittedDomain.length - 1] // return only main domain address + function extractMainDomainFromUrl(url) { + try { + const domainRe = /(https?:\/\/)?(([\w\d-]+\.)+[\w\d]{2,})/i; // taken example from https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls + const data = domainRe.exec(url); + + if (!data || !data[2]) { + console.warn("Invalid URL format:", url); + return ''; // Return empty if domain extraction fails + } + + const domainParts = data[2].split('.'); + return domainParts.length === 2 ? data[2] : + domainParts.slice(-2).join('.'); + } catch (error) { + console.error("Error extracting main domain:", url, error); + return ''; // Safe fallback for unexpected input + } } let lastCompanyName = null;