Add basic support for pronoun pages

Closes #7, #35
2024-11-22 03:23:02 +00:00 · 2023-07-10 22:32:16 +02:00 · 2023-07-10 22:32:16 +02:00 · f032dcca3f
commit f032dcca3f
parent 4820fb243e
2 changed files with 52 additions and 8 deletions
--- a/src/libs/pronouns.js
+++ b/src/libs/pronouns.js
@ -1,6 +1,11 @@
 import sanitizeHtml from "sanitize-html";

 const fieldMatchers = [/pro.*nouns?/i, "pronomen"];
+const knownPronounUrls = [
+	/pronouns\.page\/([\w/]+)/,
+	/pronouns\.within\.lgbt\/([\w/]+)/,
+	/pronouns\.cc\/pronouns\/([\w/]+)/,
+];

 /**
 * Tries to extract the pronouns for the given status.
@ -16,22 +21,27 @@ export function extractFromStatus(status) {
 	const account = status.account;
 	const fields = account.fields;

-	let pronouns;
+	let pronounsRaw;
 	for (const field of fields) {
 		// TODO: add ranking of fields
-		if (pronouns) break;
+		if (pronounsRaw) break;

 		for (const matcher of fieldMatchers) {
 			if (typeof matcher === "string" && field.name.toLowerCase().includes(matcher)) {
-				pronouns = field.value;
+				pronounsRaw = field.value;
 			} else if (field.name.match(matcher)) {
-				pronouns = field.value;
+				pronounsRaw = field.value;
 			}
 		}
 	}
-	if (!pronouns) return null;
-	pronouns = sanitizeHtml(pronouns, { allowedTags: [], allowedAttributes: {} });
+	if (!pronounsRaw) return null;
+	let text = sanitizeHtml(pronounsRaw, { allowedTags: [], allowedAttributes: {} });
+	// If one of pronoun URLs matches, overwrite the current known value.
+	for (const knownUrlRe of knownPronounUrls) {
+		if (!knownUrlRe.test(pronounsRaw)) continue;
+		text = pronounsRaw.match(knownUrlRe)[1];
+	}

-	if (!pronouns) return null;
-	return pronouns;
+	if (!text) return null;
+	return text;
 }
--- a/tests/extractPronouns.spec.js
+++ b/tests/extractPronouns.spec.js
@ -24,3 +24,37 @@ for (const field of validFields) {
 }

 extract.run();
+
+const valueExtractionSuite = suite("value extraction");
+valueExtractionSuite.before(() => {
+	global.window = {
+		// @ts-ignore
+		navigator: {
+			language: "en",
+		},
+	};
+});
+valueExtractionSuite.after(() => {
+	global.window = undefined;
+});
+const valueExtractionTests = [
+	["she/her", "she/her"], // exact match
+	["they and them", "they and them"], // exact match with multiple words
+	["they/them (https://pronouns.page/they/them)", "they/them"], // plain-text "URL" with additional text
+	["https://en.pronouns.page/they/them", "they/them"], // plain-text "URLs"
+	["pronouns.page/they/them", "they/them"], // plain-text "URLs" without scheme
+	[`<a href="https://en.pronouns.page/they/them"></a>`, "they/them"], // HTML-formatted URLs
+	[`<a href="https://en.pronouns.page/@Vipra"></a>`, null], // pronoun pages with usernames
+];
+for (const [input, expects] of valueExtractionTests) {
+	valueExtractionSuite(input, async () => {
+		const result = await pronouns.extractFromStatus({
+			account: {
+				fields: [{ name: "pronouns", value: input }],
+			},
+		});
+		assert.equal(result, expects);
+	});
+}
+
+valueExtractionSuite.run();