OCR operation now relies on local files

This commit is contained in:
n1474335 2019-09-13 17:40:20 +01:00
parent 3539e065fa
commit 70346bce35
8 changed files with 259 additions and 3 deletions

196
package-lock.json generated
View file

@ -3497,6 +3497,170 @@
"integrity": "sha1-Z29us8OZl8LuGsOpJP1hJHSPV40=",
"dev": true
},
"copy-webpack-plugin": {
"version": "5.0.4",
"resolved": "https://registry.npmjs.org/copy-webpack-plugin/-/copy-webpack-plugin-5.0.4.tgz",
"integrity": "sha512-YBuYGpSzoCHSSDGyHy6VJ7SHojKp6WHT4D7ItcQFNAYx2hrwkMe56e97xfVR0/ovDuMTrMffXUiltvQljtAGeg==",
"dev": true,
"requires": {
"cacache": "^11.3.3",
"find-cache-dir": "^2.1.0",
"glob-parent": "^3.1.0",
"globby": "^7.1.1",
"is-glob": "^4.0.1",
"loader-utils": "^1.2.3",
"minimatch": "^3.0.4",
"normalize-path": "^3.0.0",
"p-limit": "^2.2.0",
"schema-utils": "^1.0.0",
"serialize-javascript": "^1.7.0",
"webpack-log": "^2.0.0"
},
"dependencies": {
"big.js": {
"version": "5.2.2",
"resolved": "https://registry.npmjs.org/big.js/-/big.js-5.2.2.tgz",
"integrity": "sha512-vyL2OymJxmarO8gxMr0mhChsO9QGwhynfuu4+MHTAW6czfq9humCB7rKpUjDd9YUiDPU4mzpyupFSvOClAwbmQ==",
"dev": true
},
"cacache": {
"version": "11.3.3",
"resolved": "https://registry.npmjs.org/cacache/-/cacache-11.3.3.tgz",
"integrity": "sha512-p8WcneCytvzPxhDvYp31PD039vi77I12W+/KfR9S8AZbaiARFBCpsPJS+9uhWfeBfeAtW7o/4vt3MUqLkbY6nA==",
"dev": true,
"requires": {
"bluebird": "^3.5.5",
"chownr": "^1.1.1",
"figgy-pudding": "^3.5.1",
"glob": "^7.1.4",
"graceful-fs": "^4.1.15",
"lru-cache": "^5.1.1",
"mississippi": "^3.0.0",
"mkdirp": "^0.5.1",
"move-concurrently": "^1.0.1",
"promise-inflight": "^1.0.1",
"rimraf": "^2.6.3",
"ssri": "^6.0.1",
"unique-filename": "^1.1.1",
"y18n": "^4.0.0"
}
},
"glob-parent": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-3.1.0.tgz",
"integrity": "sha1-nmr2KZ2NO9K9QEMIMr0RPfkGxa4=",
"dev": true,
"requires": {
"is-glob": "^3.1.0",
"path-dirname": "^1.0.0"
},
"dependencies": {
"is-glob": {
"version": "3.1.0",
"resolved": "https://registry.npmjs.org/is-glob/-/is-glob-3.1.0.tgz",
"integrity": "sha1-e6WuJCF4BKxwcHuWkiVnSGzD6Eo=",
"dev": true,
"requires": {
"is-extglob": "^2.1.0"
}
}
}
},
"globby": {
"version": "7.1.1",
"resolved": "https://registry.npmjs.org/globby/-/globby-7.1.1.tgz",
"integrity": "sha1-+yzP+UAfhgCUXfral0QMypcrhoA=",
"dev": true,
"requires": {
"array-union": "^1.0.1",
"dir-glob": "^2.0.0",
"glob": "^7.1.2",
"ignore": "^3.3.5",
"pify": "^3.0.0",
"slash": "^1.0.0"
}
},
"ignore": {
"version": "3.3.10",
"resolved": "https://registry.npmjs.org/ignore/-/ignore-3.3.10.tgz",
"integrity": "sha512-Pgs951kaMm5GXP7MOvxERINe3gsaVjUWFm+UZPSq9xYriQAksyhg0csnS0KXSNRD5NmNdapXEpjxG49+AKh/ug==",
"dev": true
},
"json5": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/json5/-/json5-1.0.1.tgz",
"integrity": "sha512-aKS4WQjPenRxiQsC93MNfjx+nbF4PAdYzmd/1JIj8HYzqfbu86beTuNgXDzPknWk0n0uARlyewZo4s++ES36Ow==",
"dev": true,
"requires": {
"minimist": "^1.2.0"
}
},
"loader-utils": {
"version": "1.2.3",
"resolved": "https://registry.npmjs.org/loader-utils/-/loader-utils-1.2.3.tgz",
"integrity": "sha512-fkpz8ejdnEMG3s37wGL07iSBDg99O9D5yflE9RGNH3hRdx9SOwYfnGYdZOUIZitN8E+E2vkq3MUMYMvPYl5ZZA==",
"dev": true,
"requires": {
"big.js": "^5.2.2",
"emojis-list": "^2.0.0",
"json5": "^1.0.1"
}
},
"lru-cache": {
"version": "5.1.1",
"resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz",
"integrity": "sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==",
"dev": true,
"requires": {
"yallist": "^3.0.2"
}
},
"normalize-path": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/normalize-path/-/normalize-path-3.0.0.tgz",
"integrity": "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==",
"dev": true
},
"pify": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz",
"integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=",
"dev": true
},
"rimraf": {
"version": "2.7.1",
"resolved": "https://registry.npmjs.org/rimraf/-/rimraf-2.7.1.tgz",
"integrity": "sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==",
"dev": true,
"requires": {
"glob": "^7.1.3"
}
},
"schema-utils": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-1.0.0.tgz",
"integrity": "sha512-i27Mic4KovM/lnGsy8whRCHhc7VicJajAjTrYg11K9zfZXnYIt4k5F+kZkwjnrhKzLic/HLU4j11mjsz2G/75g==",
"dev": true,
"requires": {
"ajv": "^6.1.0",
"ajv-errors": "^1.0.0",
"ajv-keywords": "^3.1.0"
}
},
"y18n": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/y18n/-/y18n-4.0.0.tgz",
"integrity": "sha512-r9S/ZyXu/Xu9q1tYlpsLIsa3EeLXXk0VwlxqTcFRfg9EhMW+17kbt9G0NrgCmhGb5vT2hyhJZLfDGx+7+5Uj/w==",
"dev": true
},
"yallist": {
"version": "3.0.3",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-3.0.3.tgz",
"integrity": "sha512-S+Zk8DEWE6oKpV+vI3qWkaK+jSbIK86pCwe2IF/xwIpQ8jEuxpw9NyaGjmp9+BoJv5FV2piqCDcoCtStppiq2A==",
"dev": true
}
}
},
"core-js": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/core-js/-/core-js-3.2.1.tgz",
@ -4358,6 +4522,32 @@
"randombytes": "^2.0.0"
}
},
"dir-glob": {
"version": "2.2.2",
"resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-2.2.2.tgz",
"integrity": "sha512-f9LBi5QWzIW3I6e//uxZoLBlUt9kcp66qo0sSCxL6YZKc75R1c4MFCoe/LaZiBGmgujvQdxc5Bn3QhfyvK5Hsw==",
"dev": true,
"requires": {
"path-type": "^3.0.0"
},
"dependencies": {
"path-type": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/path-type/-/path-type-3.0.0.tgz",
"integrity": "sha512-T2ZUsdZFHgA3u4e5PfPbjd7HDDpxPnQb5jN0SrDsjNSuVXHJqtwTnWqG0B1jZrgmJ/7lj1EmVIByWt1gxGkWvg==",
"dev": true,
"requires": {
"pify": "^3.0.0"
}
},
"pify": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/pify/-/pify-3.0.0.tgz",
"integrity": "sha1-5aSs0sEB/fPZpNB/DbxNtJ3SgXY=",
"dev": true
}
}
},
"dns-equal": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/dns-equal/-/dns-equal-1.0.0.tgz",
@ -11682,6 +11872,12 @@
"xmlbuilder": "^13.0.0"
}
},
"slash": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/slash/-/slash-1.0.0.tgz",
"integrity": "sha1-xB8vbDn8FtHNF61LXYlhFK5HDVU=",
"dev": true
},
"slice-ansi": {
"version": "2.1.0",
"resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-2.1.0.tgz",

View file

@ -45,6 +45,7 @@
"babel-plugin-dynamic-import-node": "^2.3.0",
"chromedriver": "^76.0.1",
"colors": "^1.3.3",
"copy-webpack-plugin": "^5.0.4",
"css-loader": "^3.2.0",
"eslint": "^6.2.2",
"exports-loader": "^0.7.0",

View file

@ -14,6 +14,8 @@ import { isWorkerEnvironment } from "../Utils.mjs";
import Tesseract from "tesseract.js";
const { TesseractWorker } = Tesseract;
import process from "process";
/**
* Optical Character Recognition operation
*/
@ -26,7 +28,7 @@ class OpticalCharacterRecognition extends Operation {
super();
this.name = "Optical Character Recognition";
this.module = "Image";
this.module = "OCR";
this.description = "Optical character recognition or optical character reader (OCR) is the mechanical or electronic conversion of images of typed, handwritten or printed text into machine-encoded text.<br><br>Supported image formats: png, jpg, bmp, pbm.";
this.infoURL = "https://wikipedia.org/wiki/Optical_character_recognition";
this.inputType = "ArrayBuffer";
@ -48,14 +50,22 @@ class OpticalCharacterRecognition extends Operation {
async run(input, args) {
const [showConfidence] = args;
if (!isWorkerEnvironment()) throw OperationError("This operation only works in a browser");
const type = isImage(input);
if (!type) {
throw new OperationError("Invalid File Type");
}
const assetDir = isWorkerEnvironment() ? `${self.docURL}/assets/` : `${process.cwd()}/src/core/vendor/`;
try {
const image = `data:${type};base64,${toBase64(input)}`;
const worker = new TesseractWorker();
const worker = new TesseractWorker({
workerPath: `${assetDir}/tesseract/worker.min.js`,
langPath: `${assetDir}/tesseract/lang-data/`,
corePath: `${assetDir}/tesseract/tesseract-core.wasm.js`,
});
const result = await worker.recognize(image)
.progress(progress => {
if (isWorkerEnvironment()) {

Binary file not shown.

File diff suppressed because one or more lines are too long

17
src/core/vendor/tesseract/worker.min.js vendored Normal file

File diff suppressed because one or more lines are too long

View file

@ -248,7 +248,7 @@ TestRegister.addTests([
}
]
},
/*{ Commented out as it takes a while to run and drops a file to disk (eng.traineddata)
/*{ This operation only works in a browser
name: "Optical Character Recognition",
input: "iVBORw0KGgoAAAANSUhEUgAAAUAAAAC0CAIAAABqhmJGAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAASuSURBVHhe7dftVdswAIbRzsVAzMM0XabDUCOUxLYsWW4Jp+/pvf9w9GH76CHw4x2IJWAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAI9p8G/PbyY8rL2686g8t+vnqHTyfgIYfvz/26veTXn/UKX8+f0EU9bHrtu/6KfAN/AwEXAj7lFf2TBFw4nae8on+SgIvJ01n/KLzpDK+L3bT/Ap4O+HC+V12mTH+M3gzcLbIY/EO6HfxYp13k09nb6r3UqcdnjoCL3ll72J26h+35Oxy2XvZ0wOLaXq9v2+F1UC+7RZtMZ/DnfX1lwDOPzwUCLo7O2trtDK8H3M/iqoc6bj1subT68XTA/F7bGJooyzKbhTvLPHY8eJLHlbNX1DqYUVfdXbqwJjsCLsans37aNNJM6w68OR0wv9f9ymKw3k67yn2ZZpHlg3a3zis60s6oV+ZvlzMCLoanc3Dsdt9TdWT/lM8OmNjr5KY72jmzq1zfrbvXtVtmRMDF8HTWcgaaqIrD1U4G/MFewxrW262s5jS/Fzpmdts6mnHy+Fwl4GJ0OjsNrG1P/y7CNo3+gEt7jW56MVprNed7A/5w+n6YJ+BieDpnj/jO6pweTz0acGWvmZveL9XOmd3x6wKuTt8PEwRczLRw4eje1XX7c/cDruw1uuneOu2c4aOvzI57mJhRh1xZlQ0BF+Oz9vcF96fuB1zYa7R2b5mD6/XSwdfg8snj4q21+W/L02dfzIxhQMDFyTm6Hd7m+JYP7rPKT5sRuzhOBywm91rUkYc3fV9ltchtr8VmzuGOdfDB9N1tFYefNfdXLmyGjNZkhoCLUQufVqd/7z7rUcLW/XieDvg0s9difNOdRV5ePibt5vTuazusWbF9rs2E5v4mH58LBFyMW7g5OID7s9cMuTygmt9rcNPb5MrAz0lHc3Z9Ht7XZsxqxO36ZtLR/c0+PpMEzLOc/4LhrwmYZ6lfywJ+JgHzJPr9DgLmi23/zdXvcwmYL7YKWL1PJ2AIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmAIJmCI9f7+G6yFxVg/GyYwAAAAAElFTkSuQmCC",
expectedOutput: "Tesseract.js\n",

View file

@ -1,5 +1,6 @@
const webpack = require("webpack");
const MiniCssExtractPlugin = require("mini-css-extract-plugin");
const CopyWebpackPlugin = require("copy-webpack-plugin");
const path = require("path");
/**
@ -50,6 +51,13 @@ module.exports = {
new MiniCssExtractPlugin({
filename: "assets/[name].css"
}),
new CopyWebpackPlugin([
{
context: "src/core/vendor/",
from: "tesseract/**/*",
to: "assets/"
}
])
],
resolve: {
alias: {