From 7d03be3a77d7aa66e0e32d986b9230023444a2de Mon Sep 17 00:00:00 2001 From: n1474335 Date: Tue, 2 Apr 2019 15:34:30 +0100 Subject: [PATCH] Dish._translate now uses ArrayBuffer as its intermediate type instead of byteArray. This should speed up operations on large files. --- src/core/Dish.mjs | 65 +++++++++++++++++++++++++++++----------------- src/core/Utils.mjs | 62 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 100 insertions(+), 27 deletions(-) diff --git a/src/core/Dish.mjs b/src/core/Dish.mjs index 0635eb75..38247381 100755 --- a/src/core/Dish.mjs +++ b/src/core/Dish.mjs @@ -149,78 +149,75 @@ class Dish { */ async _translate(toType, notUTF8=false) { log.debug(`Translating Dish from ${Dish.enumLookup(this.type)} to ${Dish.enumLookup(toType)}`); - const byteArrayToStr = notUTF8 ? Utils.byteArrayToChars : Utils.byteArrayToUtf8; - // Convert data to intermediate byteArray type + // Convert data to intermediate ArrayBuffer type try { switch (this.type) { case Dish.STRING: - this.value = this.value ? Utils.strToByteArray(this.value) : []; + this.value = this.value ? Utils.strToArrayBuffer(this.value) : new ArrayBuffer; break; case Dish.NUMBER: - this.value = typeof this.value === "number" ? Utils.strToByteArray(this.value.toString()) : []; + this.value = typeof this.value === "number" ? Utils.strToArrayBuffer(this.value.toString()) : new ArrayBuffer; break; case Dish.HTML: - this.value = this.value ? Utils.strToByteArray(Utils.unescapeHtml(Utils.stripHtmlTags(this.value, true))) : []; + this.value = this.value ? Utils.strToArrayBuffer(Utils.unescapeHtml(Utils.stripHtmlTags(this.value, true))) : new ArrayBuffer; break; - case Dish.ARRAY_BUFFER: - // Array.from() would be nicer here, but it's slightly slower - this.value = Array.prototype.slice.call(new Uint8Array(this.value)); + case Dish.BYTE_ARRAY: + this.value = new Uint8Array(this.value).buffer; break; case Dish.BIG_NUMBER: - this.value = BigNumber.isBigNumber(this.value) ? Utils.strToByteArray(this.value.toFixed()) : []; + this.value = BigNumber.isBigNumber(this.value) ? Utils.strToArrayBuffer(this.value.toFixed()) : new ArrayBuffer; break; case Dish.JSON: - this.value = this.value ? Utils.strToByteArray(JSON.stringify(this.value, null, 4)) : []; + this.value = this.value ? Utils.strToArrayBuffer(JSON.stringify(this.value, null, 4)) : new ArrayBuffer; break; case Dish.FILE: - this.value = await Utils.readFile(this.value); - this.value = Array.prototype.slice.call(this.value); + this.value = (await Utils.readFile(this.value)).buffer; break; case Dish.LIST_FILE: this.value = await Promise.all(this.value.map(async f => Utils.readFile(f))); - this.value = this.value.map(b => Array.prototype.slice.call(b)); - this.value = [].concat.apply([], this.value); + this.value = concatenateTypedArrays(...this.value).buffer; break; default: break; } } catch (err) { - throw new DishError(`Error translating from ${Dish.enumLookup(this.type)} to byteArray: ${err}`); + throw new DishError(`Error translating from ${Dish.enumLookup(this.type)} to ArrayBuffer: ${err}`); } - this.type = Dish.BYTE_ARRAY; + this.type = Dish.ARRAY_BUFFER; - // Convert from byteArray to toType + // Convert from ArrayBuffer to toType try { switch (toType) { case Dish.STRING: case Dish.HTML: - this.value = this.value ? byteArrayToStr(this.value) : ""; + this.value = this.value ? Utils.arrayBufferToStr(this.value, !notUTF8) : ""; this.type = Dish.STRING; break; case Dish.NUMBER: - this.value = this.value ? parseFloat(byteArrayToStr(this.value)) : 0; + this.value = this.value ? parseFloat(Utils.arrayBufferToStr(this.value, !notUTF8)) : 0; this.type = Dish.NUMBER; break; - case Dish.ARRAY_BUFFER: - this.value = new Uint8Array(this.value).buffer; + case Dish.BYTE_ARRAY: + this.value = Array.prototype.slice.call(new Uint8Array(this.value)); this.type = Dish.ARRAY_BUFFER; break; case Dish.BIG_NUMBER: try { - this.value = new BigNumber(byteArrayToStr(this.value)); + this.value = new BigNumber(Utils.arrayBufferToStr(this.value, !notUTF8)); } catch (err) { this.value = new BigNumber(NaN); } this.type = Dish.BIG_NUMBER; break; case Dish.JSON: - this.value = JSON.parse(byteArrayToStr(this.value)); + this.value = JSON.parse(Utils.arrayBufferToStr(this.value, !notUTF8)); this.type = Dish.JSON; break; case Dish.FILE: this.value = new File(this.value, "unknown"); + this.type = Dish.FILE; break; case Dish.LIST_FILE: this.value = [new File(this.value, "unknown")]; @@ -230,7 +227,7 @@ class Dish { break; } } catch (err) { - throw new DishError(`Error translating from byteArray to ${Dish.enumLookup(toType)}: ${err}`); + throw new DishError(`Error translating from ArrayBuffer to ${Dish.enumLookup(toType)}: ${err}`); } } @@ -374,6 +371,26 @@ class Dish { } +/** + * Concatenates a list of Uint8Arrays together + * + * @param {Uint8Array[]} arrays + * @returns {Uint8Array} + */ +function concatenateTypedArrays(...arrays) { + let totalLength = 0; + for (const arr of arrays) { + totalLength += arr.length; + } + const result = new Uint8Array(totalLength); + let offset = 0; + for (const arr of arrays) { + result.set(arr, offset); + offset += arr.length; + } + return result; +} + /** * Dish data type enum for byte arrays. diff --git a/src/core/Utils.mjs b/src/core/Utils.mjs index 185100ab..59af74fb 100755 --- a/src/core/Utils.mjs +++ b/src/core/Utils.mjs @@ -367,6 +367,61 @@ class Utils { } + /** + * Converts a string to an ArrayBuffer. + * Treats the string as UTF-8 if any values are over 255. + * + * @param {string} str + * @returns {ArrayBuffer} + * + * @example + * // returns [72,101,108,108,111] + * Utils.strToArrayBuffer("Hello"); + * + * // returns [228,189,160,229,165,189] + * Utils.strToArrayBuffer("你好"); + */ + static strToArrayBuffer(str) { + const arr = new Uint8Array(str.length); + let i = str.length, b; + while (i--) { + b = str.charCodeAt(i); + arr[i] = b; + // If any of the bytes are over 255, read as UTF-8 + if (b > 255) return Utils.strToUtf8ArrayBuffer(str); + } + return arr.buffer; + } + + + /** + * Converts a string to a UTF-8 ArrayBuffer. + * + * @param {string} str + * @returns {ArrayBuffer} + * + * @example + * // returns [72,101,108,108,111] + * Utils.strToUtf8ArrayBuffer("Hello"); + * + * // returns [228,189,160,229,165,189] + * Utils.strToUtf8ArrayBuffer("你好"); + */ + static strToUtf8ArrayBuffer(str) { + const utf8Str = utf8.encode(str); + + if (str.length !== utf8Str.length) { + if (ENVIRONMENT_IS_WORKER()) { + self.setOption("attemptHighlight", false); + } else if (ENVIRONMENT_IS_WEB()) { + window.app.options.attemptHighlight = false; + } + } + + return Utils.strToArrayBuffer(utf8Str); + } + + /** * Converts a string to a byte array. * Treats the string as UTF-8 if any values are over 255. @@ -459,7 +514,7 @@ class Utils { /** * Attempts to convert a byte array to a UTF-8 string. * - * @param {byteArray} byteArray + * @param {byteArray|Uint8Array} byteArray * @returns {string} * * @example @@ -505,6 +560,7 @@ class Utils { static byteArrayToChars(byteArray) { if (!byteArray) return ""; let str = ""; + // String concatenation appears to be faster than an array join for (let i = 0; i < byteArray.length;) { str += String.fromCharCode(byteArray[i++]); } @@ -524,8 +580,8 @@ class Utils { * Utils.arrayBufferToStr(Uint8Array.from([104,101,108,108,111]).buffer); */ static arrayBufferToStr(arrayBuffer, utf8=true) { - const byteArray = Array.prototype.slice.call(new Uint8Array(arrayBuffer)); - return utf8 ? Utils.byteArrayToUtf8(byteArray) : Utils.byteArrayToChars(byteArray); + const arr = new Uint8Array(arrayBuffer); + return utf8 ? Utils.byteArrayToUtf8(arr) : Utils.byteArrayToChars(arr); }