Add basic support for pronoun pages

Closes #7, #35
This commit is contained in:
nachtjasmin 2023-07-10 22:32:16 +02:00
parent 4820fb243e
commit f032dcca3f
No known key found for this signature in database
2 changed files with 52 additions and 8 deletions

View file

@ -1,6 +1,11 @@
import sanitizeHtml from "sanitize-html";
const fieldMatchers = [/pro.*nouns?/i, "pronomen"];
const knownPronounUrls = [
/pronouns\.page\/([\w/]+)/,
/pronouns\.within\.lgbt\/([\w/]+)/,
/pronouns\.cc\/pronouns\/([\w/]+)/,
];
/**
* Tries to extract the pronouns for the given status.
@ -16,22 +21,27 @@ export function extractFromStatus(status) {
const account = status.account;
const fields = account.fields;
let pronouns;
let pronounsRaw;
for (const field of fields) {
// TODO: add ranking of fields
if (pronouns) break;
if (pronounsRaw) break;
for (const matcher of fieldMatchers) {
if (typeof matcher === "string" && field.name.toLowerCase().includes(matcher)) {
pronouns = field.value;
pronounsRaw = field.value;
} else if (field.name.match(matcher)) {
pronouns = field.value;
pronounsRaw = field.value;
}
}
}
if (!pronouns) return null;
pronouns = sanitizeHtml(pronouns, { allowedTags: [], allowedAttributes: {} });
if (!pronounsRaw) return null;
let text = sanitizeHtml(pronounsRaw, { allowedTags: [], allowedAttributes: {} });
// If one of pronoun URLs matches, overwrite the current known value.
for (const knownUrlRe of knownPronounUrls) {
if (!knownUrlRe.test(pronounsRaw)) continue;
text = pronounsRaw.match(knownUrlRe)[1];
}
if (!pronouns) return null;
return pronouns;
if (!text) return null;
return text;
}

View file

@ -24,3 +24,37 @@ for (const field of validFields) {
}
extract.run();
const valueExtractionSuite = suite("value extraction");
valueExtractionSuite.before(() => {
global.window = {
// @ts-ignore
navigator: {
language: "en",
},
};
});
valueExtractionSuite.after(() => {
global.window = undefined;
});
const valueExtractionTests = [
["she/her", "she/her"], // exact match
["they and them", "they and them"], // exact match with multiple words
["they/them (https://pronouns.page/they/them)", "they/them"], // plain-text "URL" with additional text
["https://en.pronouns.page/they/them", "they/them"], // plain-text "URLs"
["pronouns.page/they/them", "they/them"], // plain-text "URLs" without scheme
[`<a href="https://en.pronouns.page/they/them"></a>`, "they/them"], // HTML-formatted URLs
[`<a href="https://en.pronouns.page/@Vipra"></a>`, null], // pronoun pages with usernames
];
for (const [input, expects] of valueExtractionTests) {
valueExtractionSuite(input, async () => {
const result = await pronouns.extractFromStatus({
account: {
fields: [{ name: "pronouns", value: input }],
},
});
assert.equal(result, expects);
});
}
valueExtractionSuite.run();