mirror of
https://github.com/gchq/CyberChef
synced 2025-03-13 21:36:56 +00:00
Added word count functionality
This commit is contained in:
parent
c4e7c41a6e
commit
1caa79303b
4 changed files with 236 additions and 1 deletions
|
@ -302,7 +302,8 @@
|
|||
"Unescape string",
|
||||
"Pseudo-Random Number Generator",
|
||||
"Sleep",
|
||||
"File Tree"
|
||||
"File Tree",
|
||||
"Word Count"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
116
src/core/operations/WordCount.mjs
Normal file
116
src/core/operations/WordCount.mjs
Normal file
|
@ -0,0 +1,116 @@
|
|||
/**
|
||||
* @author sw5678
|
||||
* @copyright Crown Copyright 2016
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
|
||||
import Operation from "../Operation.mjs";
|
||||
import Utils from "../Utils.mjs";
|
||||
import {LETTER_DELIM_OPTIONS} from "../lib/Delim.mjs";
|
||||
import {caseInsensitiveSort} from "../lib/Sort.mjs";
|
||||
|
||||
|
||||
/**
|
||||
* Word Count operation
|
||||
*/
|
||||
class WordCount extends Operation {
|
||||
|
||||
/**
|
||||
* Word Count constructor
|
||||
*/
|
||||
constructor() {
|
||||
super();
|
||||
|
||||
this.name = "Word Count";
|
||||
this.module = "Default";
|
||||
this.description = "Provides a count of each word in a given text";
|
||||
this.inputType = "string";
|
||||
this.outputType = "string";
|
||||
this.args = [
|
||||
{
|
||||
name: "Delimiter",
|
||||
type: "option",
|
||||
value: LETTER_DELIM_OPTIONS
|
||||
},
|
||||
{
|
||||
"name": "Include Total",
|
||||
"type": "boolean",
|
||||
"value": true
|
||||
},
|
||||
{
|
||||
"name": "Order",
|
||||
"type": "option",
|
||||
"value": ["Alphabetical", "Count"]
|
||||
}
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} input
|
||||
* @param {Object[]} args
|
||||
* @returns {string}
|
||||
*/
|
||||
run(input, args) {
|
||||
|
||||
const delimiter = Utils.charRep(args[0]);
|
||||
|
||||
// Lower case and split
|
||||
const inputArray = input.replace(/(?:\r\n|\r|\n)/g, delimiter).toLowerCase().split(delimiter);
|
||||
|
||||
// Count up the words
|
||||
const counter = {};
|
||||
let total = 0;
|
||||
for (let j = 0; j < inputArray.length; j++) {
|
||||
|
||||
// Trim whitespace and replace punctuation
|
||||
const word = inputArray[j].replace(/(?:!|"|#|\$|%|&|\(|\)|\*|\+|,|-|\.|\/|:|;|<|=|>|\?|@|\[|\\|\]|\^|_|`|\{|\||\}|~|£)/g, "").trim();
|
||||
|
||||
// If empty string or ', then skip
|
||||
if (word === "" || /[']+/.test(word)) {
|
||||
continue;
|
||||
} else if (word in counter) {
|
||||
counter[word]++;
|
||||
total++;
|
||||
} else {
|
||||
counter[word] = 1;
|
||||
total++;
|
||||
}
|
||||
}
|
||||
|
||||
// Sort results
|
||||
let order;
|
||||
if (args[2] === "Alphabetical") {
|
||||
// Sort alphabetically
|
||||
order = Object.keys(counter).sort(caseInsensitiveSort);
|
||||
} else if (args[2] === "Count") {
|
||||
// Sort by count
|
||||
// Create the array of key-value pairs
|
||||
order = Object.keys(counter).map((key) => {
|
||||
return [key, counter[key]];
|
||||
});
|
||||
// Sort the array based on the second element (i.e. the value)
|
||||
order.sort((first, second) => {
|
||||
return second[1] - first[1];
|
||||
});
|
||||
// Obtain the list of keys in sorted order of the values.
|
||||
order = order.map((e) => {
|
||||
return e[0];
|
||||
});
|
||||
}
|
||||
|
||||
// Process output to string
|
||||
let output = "WORD,COUNT\n";
|
||||
for (let k = 0; k < order.length; k++) {
|
||||
output = output + order[k] + "," + counter[order[k]] + "\n";
|
||||
}
|
||||
|
||||
// Add total counter at the bottom
|
||||
if (args[1]) {
|
||||
output = output + "TOTAL," + total;
|
||||
}
|
||||
|
||||
return output;
|
||||
}
|
||||
}
|
||||
|
||||
export default WordCount;
|
|
@ -142,6 +142,7 @@ import "./tests/TranslateDateTimeFormat.mjs";
|
|||
import "./tests/Typex.mjs";
|
||||
import "./tests/UnescapeString.mjs";
|
||||
import "./tests/Unicode.mjs";
|
||||
import "./tests/WordCount.mjs";
|
||||
import "./tests/YARA.mjs";
|
||||
|
||||
const testStatus = {
|
||||
|
|
117
tests/operations/tests/WordCount.mjs
Normal file
117
tests/operations/tests/WordCount.mjs
Normal file
|
@ -0,0 +1,117 @@
|
|||
/**
|
||||
* @author sw5678
|
||||
* @copyright Crown Copyright 2023
|
||||
* @license Apache-2.0
|
||||
*/
|
||||
import TestRegister from "../../lib/TestRegister.mjs";
|
||||
|
||||
TestRegister.addTests([
|
||||
{
|
||||
"name": "Word Count: Empty test 1",
|
||||
"input": "",
|
||||
"expectedOutput": "WORD,COUNT\nTOTAL,0",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Space", true, "Alphabetical"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Word Count: Empty test 2",
|
||||
"input": "",
|
||||
"expectedOutput": "WORD,COUNT\nTOTAL,0",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Space", true, "Count"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Word Count: Empty test 3",
|
||||
"input": "",
|
||||
"expectedOutput": "WORD,COUNT\n",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Space", false, "Alphabetical"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Word Count: Empty test 4",
|
||||
"input": "",
|
||||
"expectedOutput": "WORD,COUNT\n",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Space", false, "Count"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Word Count: Count test 1",
|
||||
"input": "Hello world. Hello. \n\n World, ''!@£$%^&*()_+=-[]{};'|:/.,<>? world",
|
||||
"expectedOutput": "WORD,COUNT\nhello,2\nworld,3\nTOTAL,5",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Space", true, "Alphabetical"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Word Count: Count test 2",
|
||||
"input": "Hello world. Hello. \n\n World, ''!@£$%^&*()_+=-[]{};'|:/.,<>? world",
|
||||
"expectedOutput": "WORD,COUNT\nworld,3\nhello,2\nTOTAL,5",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Space", true, "Count"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Word Count: Count test 3",
|
||||
"input": "Hello world. Hello. \n\n World, ''!@£$%^&*()_+=-[]{};'|:/.,<>? world",
|
||||
"expectedOutput": "WORD,COUNT\nhello,2\nworld,3\n",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Space", false, "Alphabetical"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Word Count: Count test 4",
|
||||
"input": "Hello world. Hello. \n\n World, ''!@£$%^&*()_+=-[]{};'|:/.,<>? world",
|
||||
"expectedOutput": "WORD,COUNT\nworld,3\nhello,2\n",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Space", false, "Count"],
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"name": "Word Count: Different delimiter test",
|
||||
"input": "Hello, World\nhello, world \n''!@£$%^&*()_+=-[]{};'|:/.,<>? world",
|
||||
"expectedOutput": "WORD,COUNT\nworld,3\nhello,2\n",
|
||||
|
||||
"recipeConfig": [
|
||||
{
|
||||
"op": "Word Count",
|
||||
"args": ["Comma", false, "Count"],
|
||||
},
|
||||
],
|
||||
}
|
||||
]);
|
Loading…
Add table
Reference in a new issue