Dish._translate now uses ArrayBuffer as its intermediate type instead of byteArray. This should speed up operations on large files.

This commit is contained in:
n1474335 2019-04-02 15:34:30 +01:00
parent 27677adbe8
commit 7d03be3a77
2 changed files with 100 additions and 27 deletions

View file

@ -149,78 +149,75 @@ class Dish {
*/
async _translate(toType, notUTF8=false) {
log.debug(`Translating Dish from ${Dish.enumLookup(this.type)} to ${Dish.enumLookup(toType)}`);
const byteArrayToStr = notUTF8 ? Utils.byteArrayToChars : Utils.byteArrayToUtf8;
// Convert data to intermediate byteArray type
// Convert data to intermediate ArrayBuffer type
try {
switch (this.type) {
case Dish.STRING:
this.value = this.value ? Utils.strToByteArray(this.value) : [];
this.value = this.value ? Utils.strToArrayBuffer(this.value) : new ArrayBuffer;
break;
case Dish.NUMBER:
this.value = typeof this.value === "number" ? Utils.strToByteArray(this.value.toString()) : [];
this.value = typeof this.value === "number" ? Utils.strToArrayBuffer(this.value.toString()) : new ArrayBuffer;
break;
case Dish.HTML:
this.value = this.value ? Utils.strToByteArray(Utils.unescapeHtml(Utils.stripHtmlTags(this.value, true))) : [];
this.value = this.value ? Utils.strToArrayBuffer(Utils.unescapeHtml(Utils.stripHtmlTags(this.value, true))) : new ArrayBuffer;
break;
case Dish.ARRAY_BUFFER:
// Array.from() would be nicer here, but it's slightly slower
this.value = Array.prototype.slice.call(new Uint8Array(this.value));
case Dish.BYTE_ARRAY:
this.value = new Uint8Array(this.value).buffer;
break;
case Dish.BIG_NUMBER:
this.value = BigNumber.isBigNumber(this.value) ? Utils.strToByteArray(this.value.toFixed()) : [];
this.value = BigNumber.isBigNumber(this.value) ? Utils.strToArrayBuffer(this.value.toFixed()) : new ArrayBuffer;
break;
case Dish.JSON:
this.value = this.value ? Utils.strToByteArray(JSON.stringify(this.value, null, 4)) : [];
this.value = this.value ? Utils.strToArrayBuffer(JSON.stringify(this.value, null, 4)) : new ArrayBuffer;
break;
case Dish.FILE:
this.value = await Utils.readFile(this.value);
this.value = Array.prototype.slice.call(this.value);
this.value = (await Utils.readFile(this.value)).buffer;
break;
case Dish.LIST_FILE:
this.value = await Promise.all(this.value.map(async f => Utils.readFile(f)));
this.value = this.value.map(b => Array.prototype.slice.call(b));
this.value = [].concat.apply([], this.value);
this.value = concatenateTypedArrays(...this.value).buffer;
break;
default:
break;
}
} catch (err) {
throw new DishError(`Error translating from ${Dish.enumLookup(this.type)} to byteArray: ${err}`);
throw new DishError(`Error translating from ${Dish.enumLookup(this.type)} to ArrayBuffer: ${err}`);
}
this.type = Dish.BYTE_ARRAY;
this.type = Dish.ARRAY_BUFFER;
// Convert from byteArray to toType
// Convert from ArrayBuffer to toType
try {
switch (toType) {
case Dish.STRING:
case Dish.HTML:
this.value = this.value ? byteArrayToStr(this.value) : "";
this.value = this.value ? Utils.arrayBufferToStr(this.value, !notUTF8) : "";
this.type = Dish.STRING;
break;
case Dish.NUMBER:
this.value = this.value ? parseFloat(byteArrayToStr(this.value)) : 0;
this.value = this.value ? parseFloat(Utils.arrayBufferToStr(this.value, !notUTF8)) : 0;
this.type = Dish.NUMBER;
break;
case Dish.ARRAY_BUFFER:
this.value = new Uint8Array(this.value).buffer;
case Dish.BYTE_ARRAY:
this.value = Array.prototype.slice.call(new Uint8Array(this.value));
this.type = Dish.ARRAY_BUFFER;
break;
case Dish.BIG_NUMBER:
try {
this.value = new BigNumber(byteArrayToStr(this.value));
this.value = new BigNumber(Utils.arrayBufferToStr(this.value, !notUTF8));
} catch (err) {
this.value = new BigNumber(NaN);
}
this.type = Dish.BIG_NUMBER;
break;
case Dish.JSON:
this.value = JSON.parse(byteArrayToStr(this.value));
this.value = JSON.parse(Utils.arrayBufferToStr(this.value, !notUTF8));
this.type = Dish.JSON;
break;
case Dish.FILE:
this.value = new File(this.value, "unknown");
this.type = Dish.FILE;
break;
case Dish.LIST_FILE:
this.value = [new File(this.value, "unknown")];
@ -230,7 +227,7 @@ class Dish {
break;
}
} catch (err) {
throw new DishError(`Error translating from byteArray to ${Dish.enumLookup(toType)}: ${err}`);
throw new DishError(`Error translating from ArrayBuffer to ${Dish.enumLookup(toType)}: ${err}`);
}
}
@ -374,6 +371,26 @@ class Dish {
}
/**
* Concatenates a list of Uint8Arrays together
*
* @param {Uint8Array[]} arrays
* @returns {Uint8Array}
*/
function concatenateTypedArrays(...arrays) {
let totalLength = 0;
for (const arr of arrays) {
totalLength += arr.length;
}
const result = new Uint8Array(totalLength);
let offset = 0;
for (const arr of arrays) {
result.set(arr, offset);
offset += arr.length;
}
return result;
}
/**
* Dish data type enum for byte arrays.

View file

@ -367,6 +367,61 @@ class Utils {
}
/**
* Converts a string to an ArrayBuffer.
* Treats the string as UTF-8 if any values are over 255.
*
* @param {string} str
* @returns {ArrayBuffer}
*
* @example
* // returns [72,101,108,108,111]
* Utils.strToArrayBuffer("Hello");
*
* // returns [228,189,160,229,165,189]
* Utils.strToArrayBuffer("你好");
*/
static strToArrayBuffer(str) {
const arr = new Uint8Array(str.length);
let i = str.length, b;
while (i--) {
b = str.charCodeAt(i);
arr[i] = b;
// If any of the bytes are over 255, read as UTF-8
if (b > 255) return Utils.strToUtf8ArrayBuffer(str);
}
return arr.buffer;
}
/**
* Converts a string to a UTF-8 ArrayBuffer.
*
* @param {string} str
* @returns {ArrayBuffer}
*
* @example
* // returns [72,101,108,108,111]
* Utils.strToUtf8ArrayBuffer("Hello");
*
* // returns [228,189,160,229,165,189]
* Utils.strToUtf8ArrayBuffer("你好");
*/
static strToUtf8ArrayBuffer(str) {
const utf8Str = utf8.encode(str);
if (str.length !== utf8Str.length) {
if (ENVIRONMENT_IS_WORKER()) {
self.setOption("attemptHighlight", false);
} else if (ENVIRONMENT_IS_WEB()) {
window.app.options.attemptHighlight = false;
}
}
return Utils.strToArrayBuffer(utf8Str);
}
/**
* Converts a string to a byte array.
* Treats the string as UTF-8 if any values are over 255.
@ -459,7 +514,7 @@ class Utils {
/**
* Attempts to convert a byte array to a UTF-8 string.
*
* @param {byteArray} byteArray
* @param {byteArray|Uint8Array} byteArray
* @returns {string}
*
* @example
@ -505,6 +560,7 @@ class Utils {
static byteArrayToChars(byteArray) {
if (!byteArray) return "";
let str = "";
// String concatenation appears to be faster than an array join
for (let i = 0; i < byteArray.length;) {
str += String.fromCharCode(byteArray[i++]);
}
@ -524,8 +580,8 @@ class Utils {
* Utils.arrayBufferToStr(Uint8Array.from([104,101,108,108,111]).buffer);
*/
static arrayBufferToStr(arrayBuffer, utf8=true) {
const byteArray = Array.prototype.slice.call(new Uint8Array(arrayBuffer));
return utf8 ? Utils.byteArrayToUtf8(byteArray) : Utils.byteArrayToChars(byteArray);
const arr = new Uint8Array(arrayBuffer);
return utf8 ? Utils.byteArrayToUtf8(arr) : Utils.byteArrayToChars(arr);
}