mirror of
https://github.com/gchq/CyberChef
synced 2024-12-29 05:53:11 +00:00
OCR operation now relies on local files
This commit is contained in:
parent
3539e065fa
commit
70346bce35
8 changed files with 259 additions and 3 deletions
196
package-lock.json
generated
196
package-lock.json
generated
|
@ -3497,6 +3497,170 @@
|
|||
"integrity": "sha1-Z29us8OZl8LuGsOpJP1hJHSPV40=",
|
||||
"dev": true
|
||||
},
|
||||
"copy-webpack-plugin": {
|
||||
"version": "5.0.4",
|
||||
"resolved": "https://registry.npmjs.org/copy-webpack-plugin/-/copy-webpack-plugin-5.0.4.tgz",
|
||||
"integrity": "sha512-YBuYGpSzoCHSSDGyHy6VJ7SHojKp6WHT4D7ItcQFNAYx2hrwkMe56e97xfVR0/ovDuMTrMffXUiltvQljtAGeg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"cacache": "^11.3.3",
|
||||
"find-cache-dir": "^2.1.0",
|
||||
"glob-parent": "^3.1.0",
|
||||
"globby": "^7.1.1",
|
||||
"is-glob": "^4.0.1",
|
||||
"loader-utils": "^1.2.3",
|
||||
"minimatch": "^3.0.4",
|
||||
"normalize-path": "^3.0.0",
|
||||
"p-limit": "^2.2.0",
|
||||
"schema-utils": "^1.0.0",
|
||||
"serialize-javascript": "^1.7.0",
|
||||
"webpack-log": "^2.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"big.js": {
|
||||
"version": "5.2.2",
|
||||
"resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
|
||||
"integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
|
||||
"dev": true
|
||||
},
|
||||
"cacache": {
|
||||
"version": "11.3.3",
|
||||
"resolved": "https://registry.npmjs.org/cacache/-/cacache-11.3.3.tgz",
|
||||
"integrity": "sha512-p8WcneCytvzPxhDvYp31PD039vi77I12W+/KfR9S8AZbaiARFBCpsPJS+9uhWfeBfeAtW7o/4vt3MUqLkbY6nA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"bluebird": "^3.5.5",
|
||||
"chownr": "^1.1.1",
|
||||
"figgy-pudding": "^3.5.1",
|
||||
"glob": "^7.1.4",
|
||||
"graceful-fs": "^4.1.15",
|
||||
"lru-cache": "^5.1.1",
|
||||
"mississippi": "^3.0.0",
|
||||
"mkdirp": "^0.5.1",
|
||||
"move-concurrently": "^1.0.1",
|
||||
"promise-inflight": "^1.0.1",
|
||||
"rimraf": "^2.6.3",
|
||||
"ssri": "^6.0.1",
|
||||
"unique-filename": "^1.1.1",
|
||||
"y18n": "^4.0.0"
|
||||
}
|
||||
},
|
||||
"glob-parent": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
|
||||
"integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"is-glob": "^3.1.0",
|
||||
"path-dirname": "^1.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"is-glob": {
|
||||
"version": "3.1.0",
|
||||
"resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
|
||||
"integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"is-extglob": "^2.1.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"globby": {
|
||||
"version": "7.1.1",
|
||||
"resolved": "https://registry.npmjs.org/globby/-/globby-7.1.1.tgz",
|
||||
"integrity": "sha1-+yzP+UAfhgCUXfral0QMypcrhoA=",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"array-union": "^1.0.1",
|
||||
"dir-glob": "^2.0.0",
|
||||
"glob": "^7.1.2",
|
||||
"ignore": "^3.3.5",
|
||||
"pify": "^3.0.0",
|
||||
"slash": "^1.0.0"
|
||||
}
|
||||
},
|
||||
"ignore": {
|
||||
"version": "3.3.10",
|
||||
"resolved": "https://registry.npmjs.org/ignore/-/ignore-3.3.10.tgz",
|
||||
"integrity": "sha512-Pgs951kaMm5GXP7MOvxERINe3gsaVjUWFm+UZPSq9xYriQAksyhg0csnS0KXSNRD5NmNdapXEpjxG49+AKh/ug==",
|
||||
"dev": true
|
||||
},
|
||||
"json5": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz",
|
||||
"integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"minimist": "^1.2.0"
|
||||
}
|
||||
},
|
||||
"loader-utils": {
|
||||
"version": "1.2.3",
|
||||
"resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.2.3.tgz",
|
||||
"integrity": "sha512-fkpz8ejdnEMG3s37wGL07iSBDg99O9D5yflE9RGNH3hRdx9SOwYfnGYdZOUIZitN8E+E2vkq3MUMYMvPYl5ZZA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"big.js": "^5.2.2",
|
||||
"emojis-list": "^2.0.0",
|
||||
"json5": "^1.0.1"
|
||||
}
|
||||
},
|
||||
"lru-cache": {
|
||||
"version": "5.1.1",
|
||||
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
|
||||
"integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"yallist": "^3.0.2"
|
||||
}
|
||||
},
|
||||
"normalize-path": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
|
||||
"integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
|
||||
"dev": true
|
||||
},
|
||||
"pify": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz",
|
||||
"integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=",
|
||||
"dev": true
|
||||
},
|
||||
"rimraf": {
|
||||
"version": "2.7.1",
|
||||
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.7.1.tgz",
|
||||
"integrity": "sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"glob": "^7.1.3"
|
||||
}
|
||||
},
|
||||
"schema-utils": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-1.0.0.tgz",
|
||||
"integrity": "sha512-i27Mic4KovM/lnGsy8whRCHhc7VicJajAjTrYg11K9zfZXnYIt4k5F+kZkwjnrhKzLic/HLU4j11mjsz2G/75g==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"ajv": "^6.1.0",
|
||||
"ajv-errors": "^1.0.0",
|
||||
"ajv-keywords": "^3.1.0"
|
||||
}
|
||||
},
|
||||
"y18n": {
|
||||
"version": "4.0.0",
|
||||
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz",
|
||||
"integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==",
|
||||
"dev": true
|
||||
},
|
||||
"yallist": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz",
|
||||
"integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"core-js": {
|
||||
"version": "3.2.1",
|
||||
"resolved": "https://registry.npmjs.org/core-js/-/core-js-3.2.1.tgz",
|
||||
|
@ -4358,6 +4522,32 @@
|
|||
"randombytes": "^2.0.0"
|
||||
}
|
||||
},
|
||||
"dir-glob": {
|
||||
"version": "2.2.2",
|
||||
"resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-2.2.2.tgz",
|
||||
"integrity": "sha512-f9LBi5QWzIW3I6e//uxZoLBlUt9kcp66qo0sSCxL6YZKc75R1c4MFCoe/LaZiBGmgujvQdxc5Bn3QhfyvK5Hsw==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"path-type": "^3.0.0"
|
||||
},
|
||||
"dependencies": {
|
||||
"path-type": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz",
|
||||
"integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"pify": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"pify": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz",
|
||||
"integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=",
|
||||
"dev": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"dns-equal": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/dns-equal/-/dns-equal-1.0.0.tgz",
|
||||
|
@ -11682,6 +11872,12 @@
|
|||
"xmlbuilder": "^13.0.0"
|
||||
}
|
||||
},
|
||||
"slash": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/slash/-/slash-1.0.0.tgz",
|
||||
"integrity": "sha1-xB8vbDn8FtHNF61LXYlhFK5HDVU=",
|
||||
"dev": true
|
||||
},
|
||||
"slice-ansi": {
|
||||
"version": "2.1.0",
|
||||
"resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-2.1.0.tgz",
|
||||
|
|
|
@ -45,6 +45,7 @@
|
|||
"babel-plugin-dynamic-import-node": "^2.3.0",
|
||||
"chromedriver": "^76.0.1",
|
||||
"colors": "^1.3.3",
|
||||
"copy-webpack-plugin": "^5.0.4",
|
||||
"css-loader": "^3.2.0",
|
||||
"eslint": "^6.2.2",
|
||||
"exports-loader": "^0.7.0",
|
||||
|
|
|
@ -14,6 +14,8 @@ import { isWorkerEnvironment } from "../Utils.mjs";
|
|||
import Tesseract from "tesseract.js";
|
||||
const { TesseractWorker } = Tesseract;
|
||||
|
||||
import process from "process";
|
||||
|
||||
/**
|
||||
* Optical Character Recognition operation
|
||||
*/
|
||||
|
@ -26,7 +28,7 @@ class OpticalCharacterRecognition extends Operation {
|
|||
super();
|
||||
|
||||
this.name = "Optical Character Recognition";
|
||||
this.module = "Image";
|
||||
this.module = "OCR";
|
||||
this.description = "Optical character recognition or optical character reader (OCR) is the mechanical or electronic conversion of images of typed, handwritten or printed text into machine-encoded text.<br><br>Supported image formats: png, jpg, bmp, pbm.";
|
||||
this.infoURL = "https://wikipedia.org/wiki/Optical_character_recognition";
|
||||
this.inputType = "ArrayBuffer";
|
||||
|
@ -48,14 +50,22 @@ class OpticalCharacterRecognition extends Operation {
|
|||
async run(input, args) {
|
||||
const [showConfidence] = args;
|
||||
|
||||
if (!isWorkerEnvironment()) throw OperationError("This operation only works in a browser");
|
||||
|
||||
const type = isImage(input);
|
||||
if (!type) {
|
||||
throw new OperationError("Invalid File Type");
|
||||
}
|
||||
|
||||
const assetDir = isWorkerEnvironment() ? `${self.docURL}/assets/` : `${process.cwd()}/src/core/vendor/`;
|
||||
|
||||
try {
|
||||
const image = `data:${type};base64,${toBase64(input)}`;
|
||||
const worker = new TesseractWorker();
|
||||
const worker = new TesseractWorker({
|
||||
workerPath: `${assetDir}/tesseract/worker.min.js`,
|
||||
langPath: `${assetDir}/tesseract/lang-data/`,
|
||||
corePath: `${assetDir}/tesseract/tesseract-core.wasm.js`,
|
||||
});
|
||||
const result = await worker.recognize(image)
|
||||
.progress(progress => {
|
||||
if (isWorkerEnvironment()) {
|
||||
|
|
BIN
src/core/vendor/tesseract/lang-data/eng.traineddata.gz
vendored
Normal file
BIN
src/core/vendor/tesseract/lang-data/eng.traineddata.gz
vendored
Normal file
Binary file not shown.
24
src/core/vendor/tesseract/tesseract-core.wasm.js
vendored
Normal file
24
src/core/vendor/tesseract/tesseract-core.wasm.js
vendored
Normal file
File diff suppressed because one or more lines are too long
17
src/core/vendor/tesseract/worker.min.js
vendored
Normal file
17
src/core/vendor/tesseract/worker.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
|
@ -248,7 +248,7 @@ TestRegister.addTests([
|
|||
}
|
||||
]
|
||||
},
|
||||
/*{ Commented out as it takes a while to run and drops a file to disk (eng.traineddata)
|
||||
/*{ This operation only works in a browser
|
||||
name: "Optical Character Recognition",
|
||||
input: "iVBORw0KGgoAAAANSUhEUgAAAUAAAAC0CAIAAABqhmJGAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAASuSURBVHhe7dftVdswAIbRzsVAzMM0XabDUCOUxLYsWW4Jp+/pvf9w9GH76CHw4x2IJWAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAI9p8G/PbyY8rL2686g8t+vnqHTyfgIYfvz/26veTXn/UKX8+f0EU9bHrtu/6KfAN/AwEXAj7lFf2TBFw4nae8on+SgIvJ01n/KLzpDK+L3bT/Ap4O+HC+V12mTH+M3gzcLbIY/EO6HfxYp13k09nb6r3UqcdnjoCL3ll72J26h+35Oxy2XvZ0wOLaXq9v2+F1UC+7RZtMZ/DnfX1lwDOPzwUCLo7O2trtDK8H3M/iqoc6bj1subT68XTA/F7bGJooyzKbhTvLPHY8eJLHlbNX1DqYUVfdXbqwJjsCLsans37aNNJM6w68OR0wv9f9ymKw3k67yn2ZZpHlg3a3zis60s6oV+ZvlzMCLoanc3Dsdt9TdWT/lM8OmNjr5KY72jmzq1zfrbvXtVtmRMDF8HTWcgaaqIrD1U4G/MFewxrW262s5jS/Fzpmdts6mnHy+Fwl4GJ0OjsNrG1P/y7CNo3+gEt7jW56MVprNed7A/5w+n6YJ+BieDpnj/jO6pweTz0acGWvmZveL9XOmd3x6wKuTt8PEwRczLRw4eje1XX7c/cDruw1uuneOu2c4aOvzI57mJhRh1xZlQ0BF+Oz9vcF96fuB1zYa7R2b5mD6/XSwdfg8snj4q21+W/L02dfzIxhQMDFyTm6Hd7m+JYP7rPKT5sRuzhOBywm91rUkYc3fV9ltchtr8VmzuGOdfDB9N1tFYefNfdXLmyGjNZkhoCLUQufVqd/7z7rUcLW/XieDvg0s9difNOdRV5ePibt5vTuazusWbF9rs2E5v4mH58LBFyMW7g5OID7s9cMuTygmt9rcNPb5MrAz0lHc3Z9Ht7XZsxqxO36ZtLR/c0+PpMEzLOc/4LhrwmYZ6lfywJ+JgHzJPr9DgLmi23/zdXvcwmYL7YKWL1PJ2AIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmCI9f7+G6yFxVg/GyYwAAAAAElFTkSuQmCC",
|
||||
expectedOutput: "Tesseract.js\n",
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
const webpack = require("webpack");
|
||||
const MiniCssExtractPlugin = require("mini-css-extract-plugin");
|
||||
const CopyWebpackPlugin = require("copy-webpack-plugin");
|
||||
const path = require("path");
|
||||
|
||||
/**
|
||||
|
@ -50,6 +51,13 @@ module.exports = {
|
|||
new MiniCssExtractPlugin({
|
||||
filename: "assets/[name].css"
|
||||
}),
|
||||
new CopyWebpackPlugin([
|
||||
{
|
||||
context: "src/core/vendor/",
|
||||
from: "tesseract/**/*",
|
||||
to: "assets/"
|
||||
}
|
||||
])
|
||||
],
|
||||
resolve: {
|
||||
alias: {
|
||||
|
|
Loading…
Reference in a new issue