diff --git a/src/core/lib/ChrEnc.mjs b/src/core/lib/ChrEnc.mjs
index 6879d736..55fe3761 100644
--- a/src/core/lib/ChrEnc.mjs
+++ b/src/core/lib/ChrEnc.mjs
@@ -224,8 +224,85 @@ export function chrEncWidth(page) {
* @copyright Crown Copyright 2019
* @license Apache-2.0
*/
+export const UNICODE_NORMALISATION_FORMS = ["NFD", "NFC", "NFKD", "NFKC"];
+
/**
- * Character encoding format mappings.
+ * Detects whether the input buffer is valid UTF8.
+ *
+ * @param {ArrayBuffer} data
+ * @returns {number} - 0 = not UTF8, 1 = ASCII, 2 = UTF8
*/
-export const UNICODE_NORMALISATION_FORMS = ["NFD", "NFC", "NFKD", "NFKC"];
+export function isUTF8(data) {
+ const bytes = new Uint8Array(data);
+ let i = 0;
+ let onlyASCII = true;
+ while (i < bytes.length) {
+ if (( // ASCII
+ bytes[i] === 0x09 ||
+ bytes[i] === 0x0A ||
+ bytes[i] === 0x0D ||
+ (0x20 <= bytes[i] && bytes[i] <= 0x7E)
+ )) {
+ i += 1;
+ continue;
+ }
+
+ onlyASCII = false;
+
+ if (( // non-overlong 2-byte
+ (0xC2 <= bytes[i] && bytes[i] <= 0xDF) &&
+ (0x80 <= bytes[i+1] && bytes[i+1] <= 0xBF)
+ )) {
+ i += 2;
+ continue;
+ }
+
+ if (( // excluding overlongs
+ bytes[i] === 0xE0 &&
+ (0xA0 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
+ (0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF)
+ ) ||
+ ( // straight 3-byte
+ ((0xE1 <= bytes[i] && bytes[i] <= 0xEC) ||
+ bytes[i] === 0xEE ||
+ bytes[i] === 0xEF) &&
+ (0x80 <= bytes[i + 1] && bytes[i+1] <= 0xBF) &&
+ (0x80 <= bytes[i+2] && bytes[i+2] <= 0xBF)
+ ) ||
+ ( // excluding surrogates
+ bytes[i] === 0xED &&
+ (0x80 <= bytes[i+1] && bytes[i+1] <= 0x9F) &&
+ (0x80 <= bytes[i+2] && bytes[i+2] <= 0xBF)
+ )) {
+ i += 3;
+ continue;
+ }
+
+ if (( // planes 1-3
+ bytes[i] === 0xF0 &&
+ (0x90 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
+ (0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
+ (0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
+ ) ||
+ ( // planes 4-15
+ (0xF1 <= bytes[i] && bytes[i] <= 0xF3) &&
+ (0x80 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
+ (0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
+ (0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
+ ) ||
+ ( // plane 16
+ bytes[i] === 0xF4 &&
+ (0x80 <= bytes[i + 1] && bytes[i + 1] <= 0x8F) &&
+ (0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
+ (0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
+ )) {
+ i += 4;
+ continue;
+ }
+
+ return 0;
+ }
+
+ return onlyASCII ? 1 : 2;
+}
diff --git a/src/core/lib/Magic.mjs b/src/core/lib/Magic.mjs
index 921fc3f6..14111ec7 100644
--- a/src/core/lib/Magic.mjs
+++ b/src/core/lib/Magic.mjs
@@ -3,6 +3,7 @@ import Utils, { isWorkerEnvironment } from "../Utils.mjs";
import Recipe from "../Recipe.mjs";
import Dish from "../Dish.mjs";
import {detectFileType, isType} from "./FileType.mjs";
+import {isUTF8} from "./ChrEnc.mjs";
import chiSquared from "chi-squared";
/**
@@ -111,82 +112,6 @@ class Magic {
};
}
- /**
- * Detects whether the input buffer is valid UTF8.
- *
- * @returns {boolean}
- */
- isUTF8() {
- const bytes = new Uint8Array(this.inputBuffer);
- let i = 0;
- while (i < bytes.length) {
- if (( // ASCII
- bytes[i] === 0x09 ||
- bytes[i] === 0x0A ||
- bytes[i] === 0x0D ||
- (0x20 <= bytes[i] && bytes[i] <= 0x7E)
- )) {
- i += 1;
- continue;
- }
-
- if (( // non-overlong 2-byte
- (0xC2 <= bytes[i] && bytes[i] <= 0xDF) &&
- (0x80 <= bytes[i+1] && bytes[i+1] <= 0xBF)
- )) {
- i += 2;
- continue;
- }
-
- if (( // excluding overlongs
- bytes[i] === 0xE0 &&
- (0xA0 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
- (0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF)
- ) ||
- ( // straight 3-byte
- ((0xE1 <= bytes[i] && bytes[i] <= 0xEC) ||
- bytes[i] === 0xEE ||
- bytes[i] === 0xEF) &&
- (0x80 <= bytes[i + 1] && bytes[i+1] <= 0xBF) &&
- (0x80 <= bytes[i+2] && bytes[i+2] <= 0xBF)
- ) ||
- ( // excluding surrogates
- bytes[i] === 0xED &&
- (0x80 <= bytes[i+1] && bytes[i+1] <= 0x9F) &&
- (0x80 <= bytes[i+2] && bytes[i+2] <= 0xBF)
- )) {
- i += 3;
- continue;
- }
-
- if (( // planes 1-3
- bytes[i] === 0xF0 &&
- (0x90 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
- (0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
- (0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
- ) ||
- ( // planes 4-15
- (0xF1 <= bytes[i] && bytes[i] <= 0xF3) &&
- (0x80 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
- (0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
- (0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
- ) ||
- ( // plane 16
- bytes[i] === 0xF4 &&
- (0x80 <= bytes[i + 1] && bytes[i + 1] <= 0x8F) &&
- (0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
- (0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
- )) {
- i += 4;
- continue;
- }
-
- return false;
- }
-
- return true;
- }
-
/**
* Calculates the Shannon entropy of the input data.
*
@@ -336,7 +261,7 @@ class Magic {
data: this.inputStr.slice(0, 100),
languageScores: this.detectLanguage(extLang),
fileType: this.detectFileType(),
- isUTF8: this.isUTF8(),
+ isUTF8: !!isUTF8(this.inputBuffer),
entropy: this.calcEntropy(),
matchingOps: matchingOps,
useful: useful,
diff --git a/src/web/App.mjs b/src/web/App.mjs
index cce91b1e..eeae264f 100755
--- a/src/web/App.mjs
+++ b/src/web/App.mjs
@@ -500,22 +500,22 @@ class App {
// Input Character Encoding
// Must be set before the input is loaded
if (this.uriParams.ienc) {
- this.manager.input.chrEncChange(parseInt(this.uriParams.ienc, 10));
+ this.manager.input.chrEncChange(parseInt(this.uriParams.ienc, 10), true);
}
// Output Character Encoding
if (this.uriParams.oenc) {
- this.manager.output.chrEncChange(parseInt(this.uriParams.oenc, 10));
+ this.manager.output.chrEncChange(parseInt(this.uriParams.oenc, 10), true);
}
// Input EOL sequence
if (this.uriParams.ieol) {
- this.manager.input.eolChange(this.uriParams.ieol);
+ this.manager.input.eolChange(this.uriParams.ieol, true);
}
// Output EOL sequence
if (this.uriParams.oeol) {
- this.manager.output.eolChange(this.uriParams.oeol);
+ this.manager.output.eolChange(this.uriParams.oeol, true);
}
// Read in input data from URI params
diff --git a/src/web/stylesheets/components/_operation.css b/src/web/stylesheets/components/_operation.css
index 685a368a..a97fed70 100755
--- a/src/web/stylesheets/components/_operation.css
+++ b/src/web/stylesheets/components/_operation.css
@@ -69,6 +69,10 @@ select.arg {
min-width: 100px;
}
+select.arg.form-control:not([size]):not([multiple]), select.custom-file-control:not([size]):not([multiple]) {
+ height: 100% !important;
+}
+
textarea.arg {
min-height: 74px;
resize: vertical;
@@ -80,7 +84,7 @@ div.toggle-string {
input.toggle-string {
border-top-right-radius: 0 !important;
- height: 42px !important;
+ height: 100%;
}
.operation [class^='bmd-label'],
diff --git a/src/web/utils/statusBar.mjs b/src/web/utils/statusBar.mjs
index 69c4dd51..1adcd5be 100644
--- a/src/web/utils/statusBar.mjs
+++ b/src/web/utils/statusBar.mjs
@@ -24,6 +24,8 @@ class StatusBarPanel {
this.eolHandler = opts.eolHandler;
this.chrEncHandler = opts.chrEncHandler;
this.chrEncGetter = opts.chrEncGetter;
+ this.getEncodingState = opts.getEncodingState;
+ this.getEOLState = opts.getEOLState;
this.htmlOutput = opts.htmlOutput;
this.eolVal = null;
@@ -115,7 +117,7 @@ class StatusBarPanel {
if (isNaN(chrEncVal)) return;
- this.chrEncHandler(chrEncVal);
+ this.chrEncHandler(chrEncVal, true);
this.updateCharEnc(chrEncVal);
hideElement(e.target.closest(".cm-status-bar-select-content"));
}
@@ -212,12 +214,31 @@ class StatusBarPanel {
* @param {EditorState} state
*/
updateEOL(state) {
- if (state.lineBreak === this.eolVal) return;
+ if (this.getEOLState() < 2 && state.lineBreak === this.eolVal) return;
const val = this.dom.querySelector(".eol-value");
const button = val.closest(".cm-status-bar-select-btn");
- const eolCode = eolSeqToCode[state.lineBreak];
- const eolName = eolCodeToName[eolCode];
+ let eolCode = eolSeqToCode[state.lineBreak];
+ let eolName = eolCodeToName[eolCode];
+
+ switch (this.getEOLState()) {
+ case 1: // Detected
+ val.classList.add("font-italic");
+ eolCode += " (detected)";
+ eolName += " (detected)";
+ // Pulse
+ val.classList.add("pulse");
+ setTimeout(() => {
+ val.classList.remove("pulse");
+ }, 2000);
+ break;
+ case 0: // Unset
+ case 2: // Manually set
+ default:
+ val.classList.remove("font-italic");
+ break;
+ }
+
val.textContent = eolCode;
button.setAttribute("title", `End of line sequence:
${eolName}`);
button.setAttribute("data-original-title", `End of line sequence:
${eolName}`);
@@ -230,12 +251,30 @@ class StatusBarPanel {
*/
updateCharEnc() {
const chrEncVal = this.chrEncGetter();
- if (chrEncVal === this.chrEncVal) return;
+ if (this.getEncodingState() < 2 && chrEncVal === this.chrEncVal) return;
- const name = CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] ? CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] : "Raw Bytes";
+ let name = CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] ? CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] : "Raw Bytes";
const val = this.dom.querySelector(".chr-enc-value");
const button = val.closest(".cm-status-bar-select-btn");
+
+ switch (this.getEncodingState()) {
+ case 1: // Detected
+ val.classList.add("font-italic");
+ name += " (detected)";
+ // Pulse
+ val.classList.add("pulse");
+ setTimeout(() => {
+ val.classList.remove("pulse");
+ }, 2000);
+ break;
+ case 0: // Unset
+ case 2: // Manually set
+ default:
+ val.classList.remove("font-italic");
+ break;
+ }
+
val.textContent = name;
button.setAttribute("title", `${this.label} character encoding:
${name}`);
button.setAttribute("data-original-title", `${this.label} character encoding:
${name}`);
diff --git a/src/web/waiters/InputWaiter.mjs b/src/web/waiters/InputWaiter.mjs
index ad8eb38c..bffca98c 100644
--- a/src/web/waiters/InputWaiter.mjs
+++ b/src/web/waiters/InputWaiter.mjs
@@ -62,7 +62,8 @@ class InputWaiter {
this.inputTextEl = document.getElementById("input-text");
this.inputChrEnc = 0;
- this.eolSetManually = false;
+ this.eolState = 0; // 0 = unset, 1 = detected, 2 = manual
+ this.encodingState = 0; // 0 = unset, 1 = detected, 2 = manual
this.initEditor();
this.inputWorker = null;
@@ -116,7 +117,9 @@ class InputWaiter {
label: "Input",
eolHandler: this.eolChange.bind(this),
chrEncHandler: this.chrEncChange.bind(this),
- chrEncGetter: this.getChrEnc.bind(this)
+ chrEncGetter: this.getChrEnc.bind(this),
+ getEncodingState: this.getEncodingState.bind(this),
+ getEOLState: this.getEOLState.bind(this)
}),
// Mutable state
@@ -156,6 +159,8 @@ class InputWaiter {
]
});
+
+ if (this.inputEditorView) this.inputEditorView.destroy();
this.inputEditorView = new EditorView({
state: initialState,
parent: this.inputTextEl
@@ -166,30 +171,18 @@ class InputWaiter {
* Handler for EOL change events
* Sets the line separator
* @param {string} eol
- * @param {boolean} manual - a flag for whether this was set by the user or automatically
+ * @param {boolean} [manual=false]
*/
eolChange(eol, manual=false) {
const eolVal = eolCodeToSeq[eol];
if (eolVal === undefined) return;
- const eolBtn = document.querySelector("#input-text .eol-value");
- if (manual) {
- this.eolSetManually = true;
- eolBtn.classList.remove("font-italic");
- } else {
- eolBtn.classList.add("font-italic");
- }
+ this.eolState = manual ? 2 : this.eolState;
+ if (this.eolState < 2 && eolVal === this.getEOLSeq()) return;
- if (eolVal === this.getEOLSeq()) return;
-
- if (!manual) {
- // Pulse
- eolBtn.classList.add("pulse");
- setTimeout(() => {
- eolBtn.classList.remove("pulse");
- }, 2000);
+ if (this.eolState === 1) {
// Alert
- this.app.alert(`Input EOL separator has been changed to ${eolCodeToName[eol]}`, 5000);
+ this.app.alert(`Input end of line separator has been detected and changed to ${eolCodeToName[eol]}`, 5000);
}
// Update the EOL value
@@ -210,14 +203,24 @@ class InputWaiter {
return this.inputEditorView.state.lineBreak;
}
+ /**
+ * Returns whether the input EOL sequence was set manually or has been detected automatically
+ * @returns {number} - 0 = unset, 1 = detected, 2 = manual
+ */
+ getEOLState() {
+ return this.eolState;
+ }
+
/**
* Handler for Chr Enc change events
* Sets the input character encoding
* @param {number} chrEncVal
+ * @param {boolean} [manual=false]
*/
- chrEncChange(chrEncVal) {
+ chrEncChange(chrEncVal, manual=false) {
if (typeof chrEncVal !== "number") return;
this.inputChrEnc = chrEncVal;
+ this.encodingState = manual ? 2 : this.encodingState;
this.inputChange();
}
@@ -229,6 +232,14 @@ class InputWaiter {
return this.inputChrEnc;
}
+ /**
+ * Returns whether the input character encoding was set manually or has been detected automatically
+ * @returns {number} - 0 = unset, 1 = detected, 2 = manual
+ */
+ getEncodingState() {
+ return this.encodingState;
+ }
+
/**
* Sets word wrap on the input editor
* @param {boolean} wrap
@@ -908,7 +919,7 @@ class InputWaiter {
*/
afterPaste(e) {
// If EOL has been fixed, skip this.
- if (this.eolSetManually) return;
+ if (this.eolState > 1) return;
const inputText = this.getInput();
@@ -930,17 +941,23 @@ class InputWaiter {
}, 0);
if (total === 0) return;
- // If CRLF not zero and more than half the highest alternative, choose CRLF
+ // Find most prevalent line ending sequence
const highest = Object.entries(eolCharCounts).reduce((acc, curr) => {
return curr[1] > acc[1] ? curr : acc;
}, ["LF", 0]);
+ let choice = highest[0];
+
+ // If CRLF not zero and more than half the highest alternative, choose CRLF
if ((eolCharCounts.CRLF * 2) > highest[1]) {
- this.eolChange("CRLF");
- return;
+ choice = "CRLF";
}
- // Else choose max
- this.eolChange(highest[0]);
+ const eolVal = eolCodeToSeq[choice];
+ if (eolVal === this.getEOLSeq()) return;
+
+ // Setting automatically
+ this.eolState = 1;
+ this.eolChange(choice);
}
/**
@@ -1276,8 +1293,13 @@ class InputWaiter {
this.manager.output.removeAllOutputs();
this.manager.output.terminateZipWorker();
- this.eolSetManually = false;
- this.manager.output.eolSetManually = false;
+ this.eolState = 0;
+ this.encodingState = 0;
+ this.manager.output.eolState = 0;
+ this.manager.output.encodingState = 0;
+
+ this.initEditor();
+ this.manager.output.initEditor();
const tabsList = document.getElementById("input-tabs");
const tabsListChildren = tabsList.children;
diff --git a/src/web/waiters/OutputWaiter.mjs b/src/web/waiters/OutputWaiter.mjs
index 6acd6752..190d2ad9 100755
--- a/src/web/waiters/OutputWaiter.mjs
+++ b/src/web/waiters/OutputWaiter.mjs
@@ -7,6 +7,7 @@
import Utils, {debounce} from "../../core/Utils.mjs";
import Dish from "../../core/Dish.mjs";
+import {isUTF8, CHR_ENC_SIMPLE_REVERSE_LOOKUP} from "../../core/lib/ChrEnc.mjs";
import {detectFileType} from "../../core/lib/FileType.mjs";
import FileSaver from "file-saver";
import ZipWorker from "worker-loader?inline=no-fallback!../workers/ZipWorker.mjs";
@@ -70,7 +71,8 @@ class OutputWaiter {
this.zipWorker = null;
this.maxTabs = this.manager.tabs.calcMaxTabs();
this.tabTimeout = null;
- this.eolSetManually = false;
+ this.eolState = 0; // 0 = unset, 1 = detected, 2 = manual
+ this.encodingState = 0; // 0 = unset, 1 = detected, 2 = manual
}
/**
@@ -110,6 +112,8 @@ class OutputWaiter {
eolHandler: this.eolChange.bind(this),
chrEncHandler: this.chrEncChange.bind(this),
chrEncGetter: this.getChrEnc.bind(this),
+ getEncodingState: this.getEncodingState.bind(this),
+ getEOLState: this.getEOLState.bind(this),
htmlOutput: this.htmlOutput
}),
htmlPlugin(this.htmlOutput),
@@ -138,6 +142,7 @@ class OutputWaiter {
]
});
+ if (this.outputEditorView) this.outputEditorView.destroy();
this.outputEditorView = new EditorView({
state: initialState,
parent: this.outputTextEl
@@ -148,30 +153,18 @@ class OutputWaiter {
* Handler for EOL change events
* Sets the line separator
* @param {string} eol
- * @param {boolean} manual - a flag for whether this was set by the user or automatically
+ * @param {boolean} [manual=false]
*/
async eolChange(eol, manual=false) {
const eolVal = eolCodeToSeq[eol];
if (eolVal === undefined) return;
- const eolBtn = document.querySelector("#output-text .eol-value");
- if (manual) {
- this.eolSetManually = true;
- eolBtn.classList.remove("font-italic");
- } else {
- eolBtn.classList.add("font-italic");
- }
+ this.eolState = manual ? 2 : this.eolState;
+ if (this.eolState < 2 && eolVal === this.getEOLSeq()) return;
- if (eolVal === this.getEOLSeq()) return;
-
- if (!manual) {
- // Pulse
- eolBtn.classList.add("pulse");
- setTimeout(() => {
- eolBtn.classList.remove("pulse");
- }, 2000);
+ if (this.eolState === 1) {
// Alert
- this.app.alert(`Output EOL separator has been changed to ${eolCodeToName[eol]}`, 5000);
+ this.app.alert(`Output end of line separator has been detected and changed to ${eolCodeToName[eol]}`, 5000);
}
const currentTabNum = this.manager.tabs.getActiveTab("output");
@@ -205,13 +198,23 @@ class OutputWaiter {
return this.outputs[currentTabNum].eolSequence;
}
+ /**
+ * Returns whether the output EOL sequence was set manually or has been detected automatically
+ * @returns {number} - 0 = unset, 1 = detected, 2 = manual
+ */
+ getEOLState() {
+ return this.eolState;
+ }
+
/**
* Handler for Chr Enc change events
* Sets the output character encoding
* @param {number} chrEncVal
+ * @param {boolean} [manual=false]
*/
- async chrEncChange(chrEncVal) {
+ async chrEncChange(chrEncVal, manual=false) {
if (typeof chrEncVal !== "number") return;
+ const currentEnc = this.getChrEnc();
const currentTabNum = this.manager.tabs.getActiveTab("output");
if (currentTabNum >= 0) {
@@ -220,10 +223,17 @@ class OutputWaiter {
throw new Error(`Cannot change output ${currentTabNum} chrEnc to ${chrEncVal}`);
}
- // Reset the output, forcing it to re-decode the data with the new character encoding
- await this.setOutput(this.currentOutputCache, true);
- // Update the URL manually since we aren't firing a statechange event
- this.app.updateURL(true);
+ this.encodingState = manual ? 2 : this.encodingState;
+
+ if (this.encodingState > 1) {
+ // Reset the output, forcing it to re-decode the data with the new character encoding
+ await this.setOutput(this.currentOutputCache, true);
+ // Update the URL manually since we aren't firing a statechange event
+ this.app.updateURL(true);
+ } else if (currentEnc !== chrEncVal) {
+ // Alert
+ this.app.alert(`Output character encoding has been detected and changed to ${CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] || "Raw Bytes"}`, 5000);
+ }
}
/**
@@ -238,6 +248,14 @@ class OutputWaiter {
return this.outputs[currentTabNum].encoding;
}
+ /**
+ * Returns whether the output character encoding was set manually or has been detected automatically
+ * @returns {number} - 0 = unset, 1 = detected, 2 = manual
+ */
+ getEncodingState() {
+ return this.encodingState;
+ }
+
/**
* Sets word wrap on the output editor
* @param {boolean} wrap
@@ -273,6 +291,7 @@ class OutputWaiter {
const tabNum = this.manager.tabs.getActiveTab("output");
this.manager.timing.recordTime("outputDecodingStart", tabNum);
if (data instanceof ArrayBuffer) {
+ await this.detectEncoding(data);
data = await this.bufferToStr(data);
}
this.manager.timing.recordTime("outputDecodingEnd", tabNum);
@@ -380,7 +399,7 @@ class OutputWaiter {
*/
detectEOLSequence(data) {
// If EOL has been fixed, skip this.
- if (this.eolSetManually) return;
+ if (this.eolState > 1) return;
// If data is too long, skip this.
if (data.length > 1000000) return;
@@ -402,17 +421,54 @@ class OutputWaiter {
}, 0);
if (total === 0) return;
- // If CRLF not zero and more than half the highest alternative, choose CRLF
+ // Find most prevalent line ending sequence
const highest = Object.entries(eolCharCounts).reduce((acc, curr) => {
return curr[1] > acc[1] ? curr : acc;
}, ["LF", 0]);
+ let choice = highest[0];
+
+ // If CRLF not zero and more than half the highest alternative, choose CRLF
if ((eolCharCounts.CRLF * 2) > highest[1]) {
- this.eolChange("CRLF");
- return;
+ choice = "CRLF";
}
- // Else choose max
- this.eolChange(highest[0]);
+ const eolVal = eolCodeToSeq[choice];
+ if (eolVal === this.getEOLSeq()) return;
+
+ // Setting automatically
+ this.eolState = 1;
+ this.eolChange(choice);
+ }
+
+ /**
+ * Checks whether the character encoding should be updated.
+ *
+ * @param {ArrayBuffer} data
+ */
+ async detectEncoding(data) {
+ // If encoding has been fixed, skip this.
+ if (this.encodingState > 1) return;
+ // If data is too long, skip this.
+ if (data.byteLength > 1000000) return;
+
+ const enc = isUTF8(data); // 0 = not UTF8, 1 = ASCII, 2 = UTF8
+
+ switch (enc) {
+ case 0: // not UTF8
+ // Set to Raw Bytes
+ this.encodingState = 1;
+ await this.chrEncChange(0, false);
+ break;
+ case 2: // UTF8
+ // Set to UTF8
+ this.encodingState = 1;
+ await this.chrEncChange(65001, false);
+ break;
+ case 1: // ASCII
+ default:
+ // Ignore
+ break;
+ }
}
/**