mirror of
https://github.com/gchq/CyberChef
synced 2025-01-06 01:28:48 +00:00
Automatically detect UTF8 character encoding in output
This commit is contained in:
parent
16dfb3fac6
commit
65ffd8d65d
7 changed files with 270 additions and 147 deletions
|
@ -224,8 +224,85 @@ export function chrEncWidth(page) {
|
||||||
* @copyright Crown Copyright 2019
|
* @copyright Crown Copyright 2019
|
||||||
* @license Apache-2.0
|
* @license Apache-2.0
|
||||||
*/
|
*/
|
||||||
|
export const UNICODE_NORMALISATION_FORMS = ["NFD", "NFC", "NFKD", "NFKC"];
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Character encoding format mappings.
|
* Detects whether the input buffer is valid UTF8.
|
||||||
|
*
|
||||||
|
* @param {ArrayBuffer} data
|
||||||
|
* @returns {number} - 0 = not UTF8, 1 = ASCII, 2 = UTF8
|
||||||
*/
|
*/
|
||||||
export const UNICODE_NORMALISATION_FORMS = ["NFD", "NFC", "NFKD", "NFKC"];
|
export function isUTF8(data) {
|
||||||
|
const bytes = new Uint8Array(data);
|
||||||
|
let i = 0;
|
||||||
|
let onlyASCII = true;
|
||||||
|
while (i < bytes.length) {
|
||||||
|
if (( // ASCII
|
||||||
|
bytes[i] === 0x09 ||
|
||||||
|
bytes[i] === 0x0A ||
|
||||||
|
bytes[i] === 0x0D ||
|
||||||
|
(0x20 <= bytes[i] && bytes[i] <= 0x7E)
|
||||||
|
)) {
|
||||||
|
i += 1;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
onlyASCII = false;
|
||||||
|
|
||||||
|
if (( // non-overlong 2-byte
|
||||||
|
(0xC2 <= bytes[i] && bytes[i] <= 0xDF) &&
|
||||||
|
(0x80 <= bytes[i+1] && bytes[i+1] <= 0xBF)
|
||||||
|
)) {
|
||||||
|
i += 2;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( // excluding overlongs
|
||||||
|
bytes[i] === 0xE0 &&
|
||||||
|
(0xA0 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF)
|
||||||
|
) ||
|
||||||
|
( // straight 3-byte
|
||||||
|
((0xE1 <= bytes[i] && bytes[i] <= 0xEC) ||
|
||||||
|
bytes[i] === 0xEE ||
|
||||||
|
bytes[i] === 0xEF) &&
|
||||||
|
(0x80 <= bytes[i + 1] && bytes[i+1] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[i+2] && bytes[i+2] <= 0xBF)
|
||||||
|
) ||
|
||||||
|
( // excluding surrogates
|
||||||
|
bytes[i] === 0xED &&
|
||||||
|
(0x80 <= bytes[i+1] && bytes[i+1] <= 0x9F) &&
|
||||||
|
(0x80 <= bytes[i+2] && bytes[i+2] <= 0xBF)
|
||||||
|
)) {
|
||||||
|
i += 3;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (( // planes 1-3
|
||||||
|
bytes[i] === 0xF0 &&
|
||||||
|
(0x90 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
|
||||||
|
) ||
|
||||||
|
( // planes 4-15
|
||||||
|
(0xF1 <= bytes[i] && bytes[i] <= 0xF3) &&
|
||||||
|
(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
|
||||||
|
) ||
|
||||||
|
( // plane 16
|
||||||
|
bytes[i] === 0xF4 &&
|
||||||
|
(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0x8F) &&
|
||||||
|
(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
|
||||||
|
(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
|
||||||
|
)) {
|
||||||
|
i += 4;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return onlyASCII ? 1 : 2;
|
||||||
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ import Utils, { isWorkerEnvironment } from "../Utils.mjs";
|
||||||
import Recipe from "../Recipe.mjs";
|
import Recipe from "../Recipe.mjs";
|
||||||
import Dish from "../Dish.mjs";
|
import Dish from "../Dish.mjs";
|
||||||
import {detectFileType, isType} from "./FileType.mjs";
|
import {detectFileType, isType} from "./FileType.mjs";
|
||||||
|
import {isUTF8} from "./ChrEnc.mjs";
|
||||||
import chiSquared from "chi-squared";
|
import chiSquared from "chi-squared";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -111,82 +112,6 @@ class Magic {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Detects whether the input buffer is valid UTF8.
|
|
||||||
*
|
|
||||||
* @returns {boolean}
|
|
||||||
*/
|
|
||||||
isUTF8() {
|
|
||||||
const bytes = new Uint8Array(this.inputBuffer);
|
|
||||||
let i = 0;
|
|
||||||
while (i < bytes.length) {
|
|
||||||
if (( // ASCII
|
|
||||||
bytes[i] === 0x09 ||
|
|
||||||
bytes[i] === 0x0A ||
|
|
||||||
bytes[i] === 0x0D ||
|
|
||||||
(0x20 <= bytes[i] && bytes[i] <= 0x7E)
|
|
||||||
)) {
|
|
||||||
i += 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (( // non-overlong 2-byte
|
|
||||||
(0xC2 <= bytes[i] && bytes[i] <= 0xDF) &&
|
|
||||||
(0x80 <= bytes[i+1] && bytes[i+1] <= 0xBF)
|
|
||||||
)) {
|
|
||||||
i += 2;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (( // excluding overlongs
|
|
||||||
bytes[i] === 0xE0 &&
|
|
||||||
(0xA0 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
|
|
||||||
(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF)
|
|
||||||
) ||
|
|
||||||
( // straight 3-byte
|
|
||||||
((0xE1 <= bytes[i] && bytes[i] <= 0xEC) ||
|
|
||||||
bytes[i] === 0xEE ||
|
|
||||||
bytes[i] === 0xEF) &&
|
|
||||||
(0x80 <= bytes[i + 1] && bytes[i+1] <= 0xBF) &&
|
|
||||||
(0x80 <= bytes[i+2] && bytes[i+2] <= 0xBF)
|
|
||||||
) ||
|
|
||||||
( // excluding surrogates
|
|
||||||
bytes[i] === 0xED &&
|
|
||||||
(0x80 <= bytes[i+1] && bytes[i+1] <= 0x9F) &&
|
|
||||||
(0x80 <= bytes[i+2] && bytes[i+2] <= 0xBF)
|
|
||||||
)) {
|
|
||||||
i += 3;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (( // planes 1-3
|
|
||||||
bytes[i] === 0xF0 &&
|
|
||||||
(0x90 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
|
|
||||||
(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
|
|
||||||
(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
|
|
||||||
) ||
|
|
||||||
( // planes 4-15
|
|
||||||
(0xF1 <= bytes[i] && bytes[i] <= 0xF3) &&
|
|
||||||
(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0xBF) &&
|
|
||||||
(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
|
|
||||||
(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
|
|
||||||
) ||
|
|
||||||
( // plane 16
|
|
||||||
bytes[i] === 0xF4 &&
|
|
||||||
(0x80 <= bytes[i + 1] && bytes[i + 1] <= 0x8F) &&
|
|
||||||
(0x80 <= bytes[i + 2] && bytes[i + 2] <= 0xBF) &&
|
|
||||||
(0x80 <= bytes[i + 3] && bytes[i + 3] <= 0xBF)
|
|
||||||
)) {
|
|
||||||
i += 4;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculates the Shannon entropy of the input data.
|
* Calculates the Shannon entropy of the input data.
|
||||||
*
|
*
|
||||||
|
@ -336,7 +261,7 @@ class Magic {
|
||||||
data: this.inputStr.slice(0, 100),
|
data: this.inputStr.slice(0, 100),
|
||||||
languageScores: this.detectLanguage(extLang),
|
languageScores: this.detectLanguage(extLang),
|
||||||
fileType: this.detectFileType(),
|
fileType: this.detectFileType(),
|
||||||
isUTF8: this.isUTF8(),
|
isUTF8: !!isUTF8(this.inputBuffer),
|
||||||
entropy: this.calcEntropy(),
|
entropy: this.calcEntropy(),
|
||||||
matchingOps: matchingOps,
|
matchingOps: matchingOps,
|
||||||
useful: useful,
|
useful: useful,
|
||||||
|
|
|
@ -500,22 +500,22 @@ class App {
|
||||||
// Input Character Encoding
|
// Input Character Encoding
|
||||||
// Must be set before the input is loaded
|
// Must be set before the input is loaded
|
||||||
if (this.uriParams.ienc) {
|
if (this.uriParams.ienc) {
|
||||||
this.manager.input.chrEncChange(parseInt(this.uriParams.ienc, 10));
|
this.manager.input.chrEncChange(parseInt(this.uriParams.ienc, 10), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output Character Encoding
|
// Output Character Encoding
|
||||||
if (this.uriParams.oenc) {
|
if (this.uriParams.oenc) {
|
||||||
this.manager.output.chrEncChange(parseInt(this.uriParams.oenc, 10));
|
this.manager.output.chrEncChange(parseInt(this.uriParams.oenc, 10), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Input EOL sequence
|
// Input EOL sequence
|
||||||
if (this.uriParams.ieol) {
|
if (this.uriParams.ieol) {
|
||||||
this.manager.input.eolChange(this.uriParams.ieol);
|
this.manager.input.eolChange(this.uriParams.ieol, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output EOL sequence
|
// Output EOL sequence
|
||||||
if (this.uriParams.oeol) {
|
if (this.uriParams.oeol) {
|
||||||
this.manager.output.eolChange(this.uriParams.oeol);
|
this.manager.output.eolChange(this.uriParams.oeol, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read in input data from URI params
|
// Read in input data from URI params
|
||||||
|
|
|
@ -69,6 +69,10 @@ select.arg {
|
||||||
min-width: 100px;
|
min-width: 100px;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
select.arg.form-control:not([size]):not([multiple]), select.custom-file-control:not([size]):not([multiple]) {
|
||||||
|
height: 100% !important;
|
||||||
|
}
|
||||||
|
|
||||||
textarea.arg {
|
textarea.arg {
|
||||||
min-height: 74px;
|
min-height: 74px;
|
||||||
resize: vertical;
|
resize: vertical;
|
||||||
|
@ -80,7 +84,7 @@ div.toggle-string {
|
||||||
|
|
||||||
input.toggle-string {
|
input.toggle-string {
|
||||||
border-top-right-radius: 0 !important;
|
border-top-right-radius: 0 !important;
|
||||||
height: 42px !important;
|
height: 100%;
|
||||||
}
|
}
|
||||||
|
|
||||||
.operation [class^='bmd-label'],
|
.operation [class^='bmd-label'],
|
||||||
|
|
|
@ -24,6 +24,8 @@ class StatusBarPanel {
|
||||||
this.eolHandler = opts.eolHandler;
|
this.eolHandler = opts.eolHandler;
|
||||||
this.chrEncHandler = opts.chrEncHandler;
|
this.chrEncHandler = opts.chrEncHandler;
|
||||||
this.chrEncGetter = opts.chrEncGetter;
|
this.chrEncGetter = opts.chrEncGetter;
|
||||||
|
this.getEncodingState = opts.getEncodingState;
|
||||||
|
this.getEOLState = opts.getEOLState;
|
||||||
this.htmlOutput = opts.htmlOutput;
|
this.htmlOutput = opts.htmlOutput;
|
||||||
|
|
||||||
this.eolVal = null;
|
this.eolVal = null;
|
||||||
|
@ -115,7 +117,7 @@ class StatusBarPanel {
|
||||||
|
|
||||||
if (isNaN(chrEncVal)) return;
|
if (isNaN(chrEncVal)) return;
|
||||||
|
|
||||||
this.chrEncHandler(chrEncVal);
|
this.chrEncHandler(chrEncVal, true);
|
||||||
this.updateCharEnc(chrEncVal);
|
this.updateCharEnc(chrEncVal);
|
||||||
hideElement(e.target.closest(".cm-status-bar-select-content"));
|
hideElement(e.target.closest(".cm-status-bar-select-content"));
|
||||||
}
|
}
|
||||||
|
@ -212,12 +214,31 @@ class StatusBarPanel {
|
||||||
* @param {EditorState} state
|
* @param {EditorState} state
|
||||||
*/
|
*/
|
||||||
updateEOL(state) {
|
updateEOL(state) {
|
||||||
if (state.lineBreak === this.eolVal) return;
|
if (this.getEOLState() < 2 && state.lineBreak === this.eolVal) return;
|
||||||
|
|
||||||
const val = this.dom.querySelector(".eol-value");
|
const val = this.dom.querySelector(".eol-value");
|
||||||
const button = val.closest(".cm-status-bar-select-btn");
|
const button = val.closest(".cm-status-bar-select-btn");
|
||||||
const eolCode = eolSeqToCode[state.lineBreak];
|
let eolCode = eolSeqToCode[state.lineBreak];
|
||||||
const eolName = eolCodeToName[eolCode];
|
let eolName = eolCodeToName[eolCode];
|
||||||
|
|
||||||
|
switch (this.getEOLState()) {
|
||||||
|
case 1: // Detected
|
||||||
|
val.classList.add("font-italic");
|
||||||
|
eolCode += " (detected)";
|
||||||
|
eolName += " (detected)";
|
||||||
|
// Pulse
|
||||||
|
val.classList.add("pulse");
|
||||||
|
setTimeout(() => {
|
||||||
|
val.classList.remove("pulse");
|
||||||
|
}, 2000);
|
||||||
|
break;
|
||||||
|
case 0: // Unset
|
||||||
|
case 2: // Manually set
|
||||||
|
default:
|
||||||
|
val.classList.remove("font-italic");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
val.textContent = eolCode;
|
val.textContent = eolCode;
|
||||||
button.setAttribute("title", `End of line sequence:<br>${eolName}`);
|
button.setAttribute("title", `End of line sequence:<br>${eolName}`);
|
||||||
button.setAttribute("data-original-title", `End of line sequence:<br>${eolName}`);
|
button.setAttribute("data-original-title", `End of line sequence:<br>${eolName}`);
|
||||||
|
@ -230,12 +251,30 @@ class StatusBarPanel {
|
||||||
*/
|
*/
|
||||||
updateCharEnc() {
|
updateCharEnc() {
|
||||||
const chrEncVal = this.chrEncGetter();
|
const chrEncVal = this.chrEncGetter();
|
||||||
if (chrEncVal === this.chrEncVal) return;
|
if (this.getEncodingState() < 2 && chrEncVal === this.chrEncVal) return;
|
||||||
|
|
||||||
const name = CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] ? CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] : "Raw Bytes";
|
let name = CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] ? CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] : "Raw Bytes";
|
||||||
|
|
||||||
const val = this.dom.querySelector(".chr-enc-value");
|
const val = this.dom.querySelector(".chr-enc-value");
|
||||||
const button = val.closest(".cm-status-bar-select-btn");
|
const button = val.closest(".cm-status-bar-select-btn");
|
||||||
|
|
||||||
|
switch (this.getEncodingState()) {
|
||||||
|
case 1: // Detected
|
||||||
|
val.classList.add("font-italic");
|
||||||
|
name += " (detected)";
|
||||||
|
// Pulse
|
||||||
|
val.classList.add("pulse");
|
||||||
|
setTimeout(() => {
|
||||||
|
val.classList.remove("pulse");
|
||||||
|
}, 2000);
|
||||||
|
break;
|
||||||
|
case 0: // Unset
|
||||||
|
case 2: // Manually set
|
||||||
|
default:
|
||||||
|
val.classList.remove("font-italic");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
val.textContent = name;
|
val.textContent = name;
|
||||||
button.setAttribute("title", `${this.label} character encoding:<br>${name}`);
|
button.setAttribute("title", `${this.label} character encoding:<br>${name}`);
|
||||||
button.setAttribute("data-original-title", `${this.label} character encoding:<br>${name}`);
|
button.setAttribute("data-original-title", `${this.label} character encoding:<br>${name}`);
|
||||||
|
|
|
@ -62,7 +62,8 @@ class InputWaiter {
|
||||||
|
|
||||||
this.inputTextEl = document.getElementById("input-text");
|
this.inputTextEl = document.getElementById("input-text");
|
||||||
this.inputChrEnc = 0;
|
this.inputChrEnc = 0;
|
||||||
this.eolSetManually = false;
|
this.eolState = 0; // 0 = unset, 1 = detected, 2 = manual
|
||||||
|
this.encodingState = 0; // 0 = unset, 1 = detected, 2 = manual
|
||||||
this.initEditor();
|
this.initEditor();
|
||||||
|
|
||||||
this.inputWorker = null;
|
this.inputWorker = null;
|
||||||
|
@ -116,7 +117,9 @@ class InputWaiter {
|
||||||
label: "Input",
|
label: "Input",
|
||||||
eolHandler: this.eolChange.bind(this),
|
eolHandler: this.eolChange.bind(this),
|
||||||
chrEncHandler: this.chrEncChange.bind(this),
|
chrEncHandler: this.chrEncChange.bind(this),
|
||||||
chrEncGetter: this.getChrEnc.bind(this)
|
chrEncGetter: this.getChrEnc.bind(this),
|
||||||
|
getEncodingState: this.getEncodingState.bind(this),
|
||||||
|
getEOLState: this.getEOLState.bind(this)
|
||||||
}),
|
}),
|
||||||
|
|
||||||
// Mutable state
|
// Mutable state
|
||||||
|
@ -156,6 +159,8 @@ class InputWaiter {
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
if (this.inputEditorView) this.inputEditorView.destroy();
|
||||||
this.inputEditorView = new EditorView({
|
this.inputEditorView = new EditorView({
|
||||||
state: initialState,
|
state: initialState,
|
||||||
parent: this.inputTextEl
|
parent: this.inputTextEl
|
||||||
|
@ -166,30 +171,18 @@ class InputWaiter {
|
||||||
* Handler for EOL change events
|
* Handler for EOL change events
|
||||||
* Sets the line separator
|
* Sets the line separator
|
||||||
* @param {string} eol
|
* @param {string} eol
|
||||||
* @param {boolean} manual - a flag for whether this was set by the user or automatically
|
* @param {boolean} [manual=false]
|
||||||
*/
|
*/
|
||||||
eolChange(eol, manual=false) {
|
eolChange(eol, manual=false) {
|
||||||
const eolVal = eolCodeToSeq[eol];
|
const eolVal = eolCodeToSeq[eol];
|
||||||
if (eolVal === undefined) return;
|
if (eolVal === undefined) return;
|
||||||
|
|
||||||
const eolBtn = document.querySelector("#input-text .eol-value");
|
this.eolState = manual ? 2 : this.eolState;
|
||||||
if (manual) {
|
if (this.eolState < 2 && eolVal === this.getEOLSeq()) return;
|
||||||
this.eolSetManually = true;
|
|
||||||
eolBtn.classList.remove("font-italic");
|
|
||||||
} else {
|
|
||||||
eolBtn.classList.add("font-italic");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (eolVal === this.getEOLSeq()) return;
|
if (this.eolState === 1) {
|
||||||
|
|
||||||
if (!manual) {
|
|
||||||
// Pulse
|
|
||||||
eolBtn.classList.add("pulse");
|
|
||||||
setTimeout(() => {
|
|
||||||
eolBtn.classList.remove("pulse");
|
|
||||||
}, 2000);
|
|
||||||
// Alert
|
// Alert
|
||||||
this.app.alert(`Input EOL separator has been changed to ${eolCodeToName[eol]}`, 5000);
|
this.app.alert(`Input end of line separator has been detected and changed to ${eolCodeToName[eol]}`, 5000);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the EOL value
|
// Update the EOL value
|
||||||
|
@ -210,14 +203,24 @@ class InputWaiter {
|
||||||
return this.inputEditorView.state.lineBreak;
|
return this.inputEditorView.state.lineBreak;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the input EOL sequence was set manually or has been detected automatically
|
||||||
|
* @returns {number} - 0 = unset, 1 = detected, 2 = manual
|
||||||
|
*/
|
||||||
|
getEOLState() {
|
||||||
|
return this.eolState;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handler for Chr Enc change events
|
* Handler for Chr Enc change events
|
||||||
* Sets the input character encoding
|
* Sets the input character encoding
|
||||||
* @param {number} chrEncVal
|
* @param {number} chrEncVal
|
||||||
|
* @param {boolean} [manual=false]
|
||||||
*/
|
*/
|
||||||
chrEncChange(chrEncVal) {
|
chrEncChange(chrEncVal, manual=false) {
|
||||||
if (typeof chrEncVal !== "number") return;
|
if (typeof chrEncVal !== "number") return;
|
||||||
this.inputChrEnc = chrEncVal;
|
this.inputChrEnc = chrEncVal;
|
||||||
|
this.encodingState = manual ? 2 : this.encodingState;
|
||||||
this.inputChange();
|
this.inputChange();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -229,6 +232,14 @@ class InputWaiter {
|
||||||
return this.inputChrEnc;
|
return this.inputChrEnc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the input character encoding was set manually or has been detected automatically
|
||||||
|
* @returns {number} - 0 = unset, 1 = detected, 2 = manual
|
||||||
|
*/
|
||||||
|
getEncodingState() {
|
||||||
|
return this.encodingState;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets word wrap on the input editor
|
* Sets word wrap on the input editor
|
||||||
* @param {boolean} wrap
|
* @param {boolean} wrap
|
||||||
|
@ -908,7 +919,7 @@ class InputWaiter {
|
||||||
*/
|
*/
|
||||||
afterPaste(e) {
|
afterPaste(e) {
|
||||||
// If EOL has been fixed, skip this.
|
// If EOL has been fixed, skip this.
|
||||||
if (this.eolSetManually) return;
|
if (this.eolState > 1) return;
|
||||||
|
|
||||||
const inputText = this.getInput();
|
const inputText = this.getInput();
|
||||||
|
|
||||||
|
@ -930,17 +941,23 @@ class InputWaiter {
|
||||||
}, 0);
|
}, 0);
|
||||||
if (total === 0) return;
|
if (total === 0) return;
|
||||||
|
|
||||||
// If CRLF not zero and more than half the highest alternative, choose CRLF
|
// Find most prevalent line ending sequence
|
||||||
const highest = Object.entries(eolCharCounts).reduce((acc, curr) => {
|
const highest = Object.entries(eolCharCounts).reduce((acc, curr) => {
|
||||||
return curr[1] > acc[1] ? curr : acc;
|
return curr[1] > acc[1] ? curr : acc;
|
||||||
}, ["LF", 0]);
|
}, ["LF", 0]);
|
||||||
|
let choice = highest[0];
|
||||||
|
|
||||||
|
// If CRLF not zero and more than half the highest alternative, choose CRLF
|
||||||
if ((eolCharCounts.CRLF * 2) > highest[1]) {
|
if ((eolCharCounts.CRLF * 2) > highest[1]) {
|
||||||
this.eolChange("CRLF");
|
choice = "CRLF";
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Else choose max
|
const eolVal = eolCodeToSeq[choice];
|
||||||
this.eolChange(highest[0]);
|
if (eolVal === this.getEOLSeq()) return;
|
||||||
|
|
||||||
|
// Setting automatically
|
||||||
|
this.eolState = 1;
|
||||||
|
this.eolChange(choice);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1276,8 +1293,13 @@ class InputWaiter {
|
||||||
this.manager.output.removeAllOutputs();
|
this.manager.output.removeAllOutputs();
|
||||||
this.manager.output.terminateZipWorker();
|
this.manager.output.terminateZipWorker();
|
||||||
|
|
||||||
this.eolSetManually = false;
|
this.eolState = 0;
|
||||||
this.manager.output.eolSetManually = false;
|
this.encodingState = 0;
|
||||||
|
this.manager.output.eolState = 0;
|
||||||
|
this.manager.output.encodingState = 0;
|
||||||
|
|
||||||
|
this.initEditor();
|
||||||
|
this.manager.output.initEditor();
|
||||||
|
|
||||||
const tabsList = document.getElementById("input-tabs");
|
const tabsList = document.getElementById("input-tabs");
|
||||||
const tabsListChildren = tabsList.children;
|
const tabsListChildren = tabsList.children;
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
|
|
||||||
import Utils, {debounce} from "../../core/Utils.mjs";
|
import Utils, {debounce} from "../../core/Utils.mjs";
|
||||||
import Dish from "../../core/Dish.mjs";
|
import Dish from "../../core/Dish.mjs";
|
||||||
|
import {isUTF8, CHR_ENC_SIMPLE_REVERSE_LOOKUP} from "../../core/lib/ChrEnc.mjs";
|
||||||
import {detectFileType} from "../../core/lib/FileType.mjs";
|
import {detectFileType} from "../../core/lib/FileType.mjs";
|
||||||
import FileSaver from "file-saver";
|
import FileSaver from "file-saver";
|
||||||
import ZipWorker from "worker-loader?inline=no-fallback!../workers/ZipWorker.mjs";
|
import ZipWorker from "worker-loader?inline=no-fallback!../workers/ZipWorker.mjs";
|
||||||
|
@ -70,7 +71,8 @@ class OutputWaiter {
|
||||||
this.zipWorker = null;
|
this.zipWorker = null;
|
||||||
this.maxTabs = this.manager.tabs.calcMaxTabs();
|
this.maxTabs = this.manager.tabs.calcMaxTabs();
|
||||||
this.tabTimeout = null;
|
this.tabTimeout = null;
|
||||||
this.eolSetManually = false;
|
this.eolState = 0; // 0 = unset, 1 = detected, 2 = manual
|
||||||
|
this.encodingState = 0; // 0 = unset, 1 = detected, 2 = manual
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -110,6 +112,8 @@ class OutputWaiter {
|
||||||
eolHandler: this.eolChange.bind(this),
|
eolHandler: this.eolChange.bind(this),
|
||||||
chrEncHandler: this.chrEncChange.bind(this),
|
chrEncHandler: this.chrEncChange.bind(this),
|
||||||
chrEncGetter: this.getChrEnc.bind(this),
|
chrEncGetter: this.getChrEnc.bind(this),
|
||||||
|
getEncodingState: this.getEncodingState.bind(this),
|
||||||
|
getEOLState: this.getEOLState.bind(this),
|
||||||
htmlOutput: this.htmlOutput
|
htmlOutput: this.htmlOutput
|
||||||
}),
|
}),
|
||||||
htmlPlugin(this.htmlOutput),
|
htmlPlugin(this.htmlOutput),
|
||||||
|
@ -138,6 +142,7 @@ class OutputWaiter {
|
||||||
]
|
]
|
||||||
});
|
});
|
||||||
|
|
||||||
|
if (this.outputEditorView) this.outputEditorView.destroy();
|
||||||
this.outputEditorView = new EditorView({
|
this.outputEditorView = new EditorView({
|
||||||
state: initialState,
|
state: initialState,
|
||||||
parent: this.outputTextEl
|
parent: this.outputTextEl
|
||||||
|
@ -148,30 +153,18 @@ class OutputWaiter {
|
||||||
* Handler for EOL change events
|
* Handler for EOL change events
|
||||||
* Sets the line separator
|
* Sets the line separator
|
||||||
* @param {string} eol
|
* @param {string} eol
|
||||||
* @param {boolean} manual - a flag for whether this was set by the user or automatically
|
* @param {boolean} [manual=false]
|
||||||
*/
|
*/
|
||||||
async eolChange(eol, manual=false) {
|
async eolChange(eol, manual=false) {
|
||||||
const eolVal = eolCodeToSeq[eol];
|
const eolVal = eolCodeToSeq[eol];
|
||||||
if (eolVal === undefined) return;
|
if (eolVal === undefined) return;
|
||||||
|
|
||||||
const eolBtn = document.querySelector("#output-text .eol-value");
|
this.eolState = manual ? 2 : this.eolState;
|
||||||
if (manual) {
|
if (this.eolState < 2 && eolVal === this.getEOLSeq()) return;
|
||||||
this.eolSetManually = true;
|
|
||||||
eolBtn.classList.remove("font-italic");
|
|
||||||
} else {
|
|
||||||
eolBtn.classList.add("font-italic");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (eolVal === this.getEOLSeq()) return;
|
if (this.eolState === 1) {
|
||||||
|
|
||||||
if (!manual) {
|
|
||||||
// Pulse
|
|
||||||
eolBtn.classList.add("pulse");
|
|
||||||
setTimeout(() => {
|
|
||||||
eolBtn.classList.remove("pulse");
|
|
||||||
}, 2000);
|
|
||||||
// Alert
|
// Alert
|
||||||
this.app.alert(`Output EOL separator has been changed to ${eolCodeToName[eol]}`, 5000);
|
this.app.alert(`Output end of line separator has been detected and changed to ${eolCodeToName[eol]}`, 5000);
|
||||||
}
|
}
|
||||||
|
|
||||||
const currentTabNum = this.manager.tabs.getActiveTab("output");
|
const currentTabNum = this.manager.tabs.getActiveTab("output");
|
||||||
|
@ -205,13 +198,23 @@ class OutputWaiter {
|
||||||
return this.outputs[currentTabNum].eolSequence;
|
return this.outputs[currentTabNum].eolSequence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the output EOL sequence was set manually or has been detected automatically
|
||||||
|
* @returns {number} - 0 = unset, 1 = detected, 2 = manual
|
||||||
|
*/
|
||||||
|
getEOLState() {
|
||||||
|
return this.eolState;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Handler for Chr Enc change events
|
* Handler for Chr Enc change events
|
||||||
* Sets the output character encoding
|
* Sets the output character encoding
|
||||||
* @param {number} chrEncVal
|
* @param {number} chrEncVal
|
||||||
|
* @param {boolean} [manual=false]
|
||||||
*/
|
*/
|
||||||
async chrEncChange(chrEncVal) {
|
async chrEncChange(chrEncVal, manual=false) {
|
||||||
if (typeof chrEncVal !== "number") return;
|
if (typeof chrEncVal !== "number") return;
|
||||||
|
const currentEnc = this.getChrEnc();
|
||||||
|
|
||||||
const currentTabNum = this.manager.tabs.getActiveTab("output");
|
const currentTabNum = this.manager.tabs.getActiveTab("output");
|
||||||
if (currentTabNum >= 0) {
|
if (currentTabNum >= 0) {
|
||||||
|
@ -220,10 +223,17 @@ class OutputWaiter {
|
||||||
throw new Error(`Cannot change output ${currentTabNum} chrEnc to ${chrEncVal}`);
|
throw new Error(`Cannot change output ${currentTabNum} chrEnc to ${chrEncVal}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reset the output, forcing it to re-decode the data with the new character encoding
|
this.encodingState = manual ? 2 : this.encodingState;
|
||||||
await this.setOutput(this.currentOutputCache, true);
|
|
||||||
// Update the URL manually since we aren't firing a statechange event
|
if (this.encodingState > 1) {
|
||||||
this.app.updateURL(true);
|
// Reset the output, forcing it to re-decode the data with the new character encoding
|
||||||
|
await this.setOutput(this.currentOutputCache, true);
|
||||||
|
// Update the URL manually since we aren't firing a statechange event
|
||||||
|
this.app.updateURL(true);
|
||||||
|
} else if (currentEnc !== chrEncVal) {
|
||||||
|
// Alert
|
||||||
|
this.app.alert(`Output character encoding has been detected and changed to ${CHR_ENC_SIMPLE_REVERSE_LOOKUP[chrEncVal] || "Raw Bytes"}`, 5000);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -238,6 +248,14 @@ class OutputWaiter {
|
||||||
return this.outputs[currentTabNum].encoding;
|
return this.outputs[currentTabNum].encoding;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether the output character encoding was set manually or has been detected automatically
|
||||||
|
* @returns {number} - 0 = unset, 1 = detected, 2 = manual
|
||||||
|
*/
|
||||||
|
getEncodingState() {
|
||||||
|
return this.encodingState;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets word wrap on the output editor
|
* Sets word wrap on the output editor
|
||||||
* @param {boolean} wrap
|
* @param {boolean} wrap
|
||||||
|
@ -273,6 +291,7 @@ class OutputWaiter {
|
||||||
const tabNum = this.manager.tabs.getActiveTab("output");
|
const tabNum = this.manager.tabs.getActiveTab("output");
|
||||||
this.manager.timing.recordTime("outputDecodingStart", tabNum);
|
this.manager.timing.recordTime("outputDecodingStart", tabNum);
|
||||||
if (data instanceof ArrayBuffer) {
|
if (data instanceof ArrayBuffer) {
|
||||||
|
await this.detectEncoding(data);
|
||||||
data = await this.bufferToStr(data);
|
data = await this.bufferToStr(data);
|
||||||
}
|
}
|
||||||
this.manager.timing.recordTime("outputDecodingEnd", tabNum);
|
this.manager.timing.recordTime("outputDecodingEnd", tabNum);
|
||||||
|
@ -380,7 +399,7 @@ class OutputWaiter {
|
||||||
*/
|
*/
|
||||||
detectEOLSequence(data) {
|
detectEOLSequence(data) {
|
||||||
// If EOL has been fixed, skip this.
|
// If EOL has been fixed, skip this.
|
||||||
if (this.eolSetManually) return;
|
if (this.eolState > 1) return;
|
||||||
// If data is too long, skip this.
|
// If data is too long, skip this.
|
||||||
if (data.length > 1000000) return;
|
if (data.length > 1000000) return;
|
||||||
|
|
||||||
|
@ -402,17 +421,54 @@ class OutputWaiter {
|
||||||
}, 0);
|
}, 0);
|
||||||
if (total === 0) return;
|
if (total === 0) return;
|
||||||
|
|
||||||
// If CRLF not zero and more than half the highest alternative, choose CRLF
|
// Find most prevalent line ending sequence
|
||||||
const highest = Object.entries(eolCharCounts).reduce((acc, curr) => {
|
const highest = Object.entries(eolCharCounts).reduce((acc, curr) => {
|
||||||
return curr[1] > acc[1] ? curr : acc;
|
return curr[1] > acc[1] ? curr : acc;
|
||||||
}, ["LF", 0]);
|
}, ["LF", 0]);
|
||||||
|
let choice = highest[0];
|
||||||
|
|
||||||
|
// If CRLF not zero and more than half the highest alternative, choose CRLF
|
||||||
if ((eolCharCounts.CRLF * 2) > highest[1]) {
|
if ((eolCharCounts.CRLF * 2) > highest[1]) {
|
||||||
this.eolChange("CRLF");
|
choice = "CRLF";
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Else choose max
|
const eolVal = eolCodeToSeq[choice];
|
||||||
this.eolChange(highest[0]);
|
if (eolVal === this.getEOLSeq()) return;
|
||||||
|
|
||||||
|
// Setting automatically
|
||||||
|
this.eolState = 1;
|
||||||
|
this.eolChange(choice);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks whether the character encoding should be updated.
|
||||||
|
*
|
||||||
|
* @param {ArrayBuffer} data
|
||||||
|
*/
|
||||||
|
async detectEncoding(data) {
|
||||||
|
// If encoding has been fixed, skip this.
|
||||||
|
if (this.encodingState > 1) return;
|
||||||
|
// If data is too long, skip this.
|
||||||
|
if (data.byteLength > 1000000) return;
|
||||||
|
|
||||||
|
const enc = isUTF8(data); // 0 = not UTF8, 1 = ASCII, 2 = UTF8
|
||||||
|
|
||||||
|
switch (enc) {
|
||||||
|
case 0: // not UTF8
|
||||||
|
// Set to Raw Bytes
|
||||||
|
this.encodingState = 1;
|
||||||
|
await this.chrEncChange(0, false);
|
||||||
|
break;
|
||||||
|
case 2: // UTF8
|
||||||
|
// Set to UTF8
|
||||||
|
this.encodingState = 1;
|
||||||
|
await this.chrEncChange(65001, false);
|
||||||
|
break;
|
||||||
|
case 1: // ASCII
|
||||||
|
default:
|
||||||
|
// Ignore
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in a new issue