Simplified 'Shuffle' operation to work in the same way as 'Sort' and 'Unique'

This commit is contained in:
n1474335 2022-11-25 11:50:27 +00:00
parent 9a5d62c4c3
commit 59fe8d1c4b
2 changed files with 18 additions and 135 deletions

View file

@ -5,6 +5,8 @@
*/ */
import Operation from "../Operation.mjs"; import Operation from "../Operation.mjs";
import Utils from "../Utils.mjs";
import {INPUT_DELIM_OPTIONS} from "../lib/Delim.mjs";
/** /**
* Shuffle operation * Shuffle operation
@ -21,34 +23,25 @@ class Shuffle extends Operation {
this.module = "Default"; this.module = "Default";
this.description = "Randomly reorders input elements."; this.description = "Randomly reorders input elements.";
this.infoURL = "https://wikipedia.org/wiki/Shuffling"; this.infoURL = "https://wikipedia.org/wiki/Shuffling";
this.inputType = "ArrayBuffer"; this.inputType = "string";
this.outputType = "ArrayBuffer"; this.outputType = "string";
this.args = [ this.args = [
{ {
"name": "By", name: "Delimiter",
"type": "option", type: "option",
"value": ["Byte", "Character", "Line"], value: INPUT_DELIM_OPTIONS
"defaultIndex": 1
} }
]; ];
} }
/** /**
* @param {ArrayBuffer} input * @param {string} input
* @param {Object[]} args * @param {Object[]} args
* @returns {ArrayBuffer} * @returns {string}
*/ */
run(input, args) { run(input, args) {
const type = args[0]; const delim = Utils.charRep(args[0]);
if (input.byteLength === 0) return input; if (input.length === 0) return input;
if (ArrayBuffer.isView(input)) {
if (input.byteOffset === 0 && input.byteLength === input.buffer.byteLength) {
input = input.buffer;
} else {
input = input.buffer.slice(input.byteOffset, input.byteOffset + input.byteLength);
}
}
const inputBytes = new Uint8Array(input);
// return a random number in [0, 1) // return a random number in [0, 1)
const rng = (typeof crypto) !== "undefined" && crypto.getRandomValues ? (function() { const rng = (typeof crypto) !== "undefined" && crypto.getRandomValues ? (function() {
@ -66,71 +59,10 @@ class Shuffle extends Operation {
return Math.floor(rng() * max); return Math.floor(rng() * max);
}; };
const toShuffle = []; // Split input into shuffleable sections
let addLastNewLine = false; const toShuffle = input.split(delim);
switch (type) {
case "Character":
// split input into UTF-8 code points
for (let i = 0; i < inputBytes.length;) {
const charLength = (function() {
if (inputBytes[i] < 0xc0) return 1;
if (inputBytes[i] < 0xe0) return 2;
if (inputBytes[i] < 0xf0) return 3;
if (inputBytes[i] < 0xf8) return 4;
return 1;
})();
if (i + charLength <= inputBytes.length) {
let elementLength = charLength;
for (let j = 1; j < charLength; j++) {
if ((inputBytes[i + j] & 0xc0) !== 0x80) {
elementLength = 1;
break;
}
}
toShuffle.push([i, elementLength]);
i += elementLength;
} else {
toShuffle.push([i, 1]);
i++;
}
}
break;
case "Line":
{
// split input by newline characters
let lineBegin = 0;
for (let i = 0; i < inputBytes.length; i++) {
if (inputBytes[i] === 0xd || inputBytes[i] === 0xa) {
if (i + 1 < inputBytes.length && inputBytes[i] === 0xd && inputBytes[i + 1] === 0xa) {
i++;
}
toShuffle.push([lineBegin, i - lineBegin + 1]);
lineBegin = i + 1;
}
}
if (lineBegin < inputBytes.length) {
toShuffle.push([lineBegin, inputBytes.length - lineBegin]);
addLastNewLine = true;
}
}
break;
default:
{
// Creating element information for each bytes looks very wasteful.
// Therefore, directly shuffle here.
const outputBytes = new Uint8Array(inputBytes);
for (let i = outputBytes.length - 1; i > 0; i--) {
const idx = randint(i + 1);
const tmp = outputBytes[idx];
outputBytes[idx] = outputBytes[i];
outputBytes[i] = tmp;
}
return outputBytes.buffer;
}
}
// shuffle elements // shuffle elements
const lastStart = toShuffle[toShuffle.length - 1][0];
for (let i = toShuffle.length - 1; i > 0; i--) { for (let i = toShuffle.length - 1; i > 0; i--) {
const idx = randint(i + 1); const idx = randint(i + 1);
const tmp = toShuffle[idx]; const tmp = toShuffle[idx];
@ -138,18 +70,7 @@ class Shuffle extends Operation {
toShuffle[i] = tmp; toShuffle[i] = tmp;
} }
// place shuffled elements return toShuffle.join(delim);
const outputBytes = new Uint8Array(inputBytes.length + (addLastNewLine ? 1 : 0));
let outputPos = 0;
for (let i = 0; i < toShuffle.length; i++) {
outputBytes.set(new Uint8Array(input, toShuffle[i][0], toShuffle[i][1]), outputPos);
outputPos += toShuffle[i][1];
if (addLastNewLine && toShuffle[i][0] === lastStart) {
outputBytes[outputPos] = 0xa;
outputPos++;
}
}
return outputBytes.buffer;
} }
} }

View file

@ -13,7 +13,7 @@ TestRegister.addTests([
"recipeConfig": [ "recipeConfig": [
{ {
"op": "Shuffle", "op": "Shuffle",
"args": ["Character"] "args": ["Comma"]
} }
] ]
}, },
@ -24,7 +24,7 @@ TestRegister.addTests([
"recipeConfig": [ "recipeConfig": [
{ {
"op": "Shuffle", "op": "Shuffle",
"args": ["Byte"] "args": ["Nothing (separate chars)"]
}, },
{ {
"op": "To Hex", "op": "To Hex",
@ -36,29 +36,6 @@ TestRegister.addTests([
} }
] ]
}, },
{
"name": "Shuffle characters",
"input": "1234\uff15\uff16\uff17\uff18",
"expectedOutput": " 0031 0032 0033 0034 FF15 FF16 FF17 FF18",
"recipeConfig": [
{
"op": "Shuffle",
"args": ["Character"]
},
{
"op": "Escape Unicode Characters",
"args": ["%u", true, 4, true]
},
{
"op": "Split",
"args": ["%u", " "]
},
{
"op": "Sort",
"args": ["Space", false, "Alphabetical (case sensitive)"]
}
]
},
{ {
"name": "Shuffle lines", "name": "Shuffle lines",
"input": "1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\n", "input": "1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf\n",
@ -66,27 +43,12 @@ TestRegister.addTests([
"recipeConfig": [ "recipeConfig": [
{ {
"op": "Shuffle", "op": "Shuffle",
"args": ["Line"] "args": ["Line feed"]
}, },
{ {
"op": "Sort", "op": "Sort",
"args": ["Line feed", false, "Alphabetical (case sensitive)"] "args": ["Line feed", false, "Alphabetical (case sensitive)"]
} }
] ]
}, }
{
"name": "Shuffle lines (last character is not newline)",
"input": "1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf",
"expectedOutput": "\n1\n2\n3\n4\n5\n6\n7\n8\n9\na\nb\nc\nd\ne\nf",
"recipeConfig": [
{
"op": "Shuffle",
"args": ["Line"]
},
{
"op": "Sort",
"args": ["Line feed", false, "Alphabetical (case sensitive)"]
}
]
},
]); ]);