mirror of
https://github.com/ItsVipra/ProToots
synced 2024-11-22 03:23:02 +00:00
parent
bee683036d
commit
854a597e56
3 changed files with 159 additions and 13 deletions
|
@ -63,7 +63,6 @@ export async function fetchPronouns(dataID, accountName, type) {
|
|||
let pronouns = await extractFromStatus(status);
|
||||
if (!pronouns) {
|
||||
pronouns = "null";
|
||||
//TODO: if no field check bio
|
||||
info(`no pronouns found for ${accountName}, cached null`);
|
||||
}
|
||||
await cachePronouns(accountName, pronouns);
|
||||
|
|
|
@ -19,21 +19,39 @@ const knownPronounUrls = [
|
|||
export async function extractFromStatus(status) {
|
||||
// get account from status and pull out fields
|
||||
const account = status.account;
|
||||
const fields = account.fields;
|
||||
const { fields, note } = account;
|
||||
let pronouns;
|
||||
|
||||
let pronounsRaw;
|
||||
for (const field of fields) {
|
||||
// TODO: add ranking of fields
|
||||
if (pronounsRaw) break;
|
||||
|
||||
for (const matcher of fieldMatchers) {
|
||||
if (typeof matcher === "string" && field.name.toLowerCase().includes(matcher)) {
|
||||
pronounsRaw = field.value;
|
||||
} else if (field.name.match(matcher)) {
|
||||
pronounsRaw = field.value;
|
||||
}
|
||||
if (fields) {
|
||||
for (const f of fields) {
|
||||
pronouns = await extractFromField(f);
|
||||
if (pronouns) break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pronouns && note) {
|
||||
pronouns = extractFromBio(note);
|
||||
}
|
||||
|
||||
return pronouns;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {{name: string, value: string}} field The field value
|
||||
* @returns {Promise<string|null>} The pronouns or null.
|
||||
*/
|
||||
async function extractFromField(field) {
|
||||
let pronounsRaw;
|
||||
for (const matcher of fieldMatchers) {
|
||||
if (typeof matcher === "string" && field.name.toLowerCase().includes(matcher)) {
|
||||
pronounsRaw = field.value;
|
||||
break;
|
||||
} else if (field.name.match(matcher)) {
|
||||
pronounsRaw = field.value;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!pronounsRaw) return null;
|
||||
let text = sanitizeHtml(pronounsRaw, { allowedTags: [], allowedAttributes: {} });
|
||||
// If one of pronoun URLs matches, overwrite the current known value.
|
||||
|
@ -101,3 +119,114 @@ function sanitizePronounPageValue(val) {
|
|||
if (val === "no-pronouns") val = "no pronouns";
|
||||
return val;
|
||||
}
|
||||
|
||||
const knownPronouns = [
|
||||
"ae",
|
||||
"aer",
|
||||
"aers",
|
||||
"aerself",
|
||||
"co",
|
||||
"co's",
|
||||
"cos",
|
||||
"coself",
|
||||
"e",
|
||||
"eir",
|
||||
"eirs",
|
||||
"em",
|
||||
"ems",
|
||||
"emself",
|
||||
"es",
|
||||
"ey",
|
||||
"fae",
|
||||
"faer",
|
||||
"faers",
|
||||
"faerself",
|
||||
"he",
|
||||
"her",
|
||||
"hers",
|
||||
"herself",
|
||||
"him",
|
||||
"himself",
|
||||
"hir",
|
||||
"hirs",
|
||||
"hirself",
|
||||
"his",
|
||||
"hu",
|
||||
"hum",
|
||||
"hus",
|
||||
"huself",
|
||||
"it",
|
||||
"its",
|
||||
"itself",
|
||||
"ne",
|
||||
"nem",
|
||||
"nemself",
|
||||
"nir",
|
||||
"nirs",
|
||||
"nirself",
|
||||
"one",
|
||||
"one's",
|
||||
"oneself",
|
||||
"per",
|
||||
"pers",
|
||||
"perself",
|
||||
"s/he",
|
||||
"she",
|
||||
"their",
|
||||
"theirs",
|
||||
"them",
|
||||
"themself",
|
||||
"themselves",
|
||||
"they",
|
||||
"thon",
|
||||
"thon's",
|
||||
"thons",
|
||||
"thonself",
|
||||
"ve",
|
||||
"ver",
|
||||
"vers",
|
||||
"verself",
|
||||
"vi",
|
||||
"vim",
|
||||
"vims",
|
||||
"vimself",
|
||||
"vir",
|
||||
"virs",
|
||||
"virself",
|
||||
"vis",
|
||||
"xe",
|
||||
"xem",
|
||||
"xemself",
|
||||
"xyr",
|
||||
"xyrs",
|
||||
"ze",
|
||||
"zhe",
|
||||
"zher",
|
||||
"zhers",
|
||||
"zherself",
|
||||
"zir",
|
||||
"zirs",
|
||||
"zirself",
|
||||
];
|
||||
|
||||
/**
|
||||
* Tries to extract pronouns from the bio/note. Only "known" pronouns are returned, which is
|
||||
* a compromise for the pattern matching. At no point we want to limit the pronouns used by persons.
|
||||
* @param {string} bio The bio
|
||||
* @returns {string|null} The result or null
|
||||
*/
|
||||
function extractFromBio(bio) {
|
||||
const exactMatches = bio.matchAll(/(\w+)\/(\w+)/gi);
|
||||
for (const [match, subjective, objective] of exactMatches) {
|
||||
if (knownPronouns.includes(subjective) && knownPronouns.includes(objective)) {
|
||||
return match;
|
||||
}
|
||||
}
|
||||
|
||||
const followedByColon = bio.matchAll(/pronouns?:\W+([\w/+]+)/gi);
|
||||
for (const match of followedByColon) {
|
||||
return match.pop(); // first group is last entry in array
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -71,3 +71,21 @@ for (const [input, expects] of valueExtractionTests) {
|
|||
}
|
||||
|
||||
valueExtractionSuite.run();
|
||||
|
||||
const bioExtractSuite = suite("bio extraction");
|
||||
const bioExtractTests = [
|
||||
["I'm cute and my pronouns are she/her", "she/her"], // exact match
|
||||
["my pronouns are helicopter/joke", null], // not on allowlist
|
||||
["pronouns: uwu/owo", "uwu/owo"], // followed by pronoun pattern
|
||||
["pronouns: any", "any"], // followed by pronoun pattern,
|
||||
];
|
||||
for (const [input, expects] of bioExtractTests) {
|
||||
bioExtractSuite(input, async () => {
|
||||
const result = await pronouns.extractFromStatus({
|
||||
account: { note: input },
|
||||
});
|
||||
assert.equal(result, expects);
|
||||
});
|
||||
}
|
||||
|
||||
bioExtractSuite.run();
|
||||
|
|
Loading…
Reference in a new issue