Refactored file type detection engine

This commit is contained in:
n1474335 2018-12-18 17:44:42 +00:00
parent d02124550b
commit e6fb0be1d0
12 changed files with 867 additions and 727 deletions

0
src/core/lib/BCD.mjs Executable file → Normal file
View file

0
src/core/lib/Base58.mjs Executable file → Normal file
View file

0
src/core/lib/Base64.mjs Executable file → Normal file
View file

0
src/core/lib/CanvasComponents.mjs Executable file → Normal file
View file

View file

@ -1,231 +0,0 @@
/**
* File extraction functions
*
* @author n1474335 [n1474335@gmail.com]
* @copyright Crown Copyright 2018
* @license Apache-2.0
*
*/
import Stream from "./Stream";
/**
* Attempts to extract a file from a data stream given its mime type and offset.
*
* @param {Uint8Array} bytes
* @param {Object} fileDetail
* @param {string} fileDetail.mime
* @param {string} fileDetail.ext
* @param {number} fileDetail.offset
* @returns {File}
*/
export function extractFile(bytes, fileDetail) {
let fileData;
switch (fileDetail.mime) {
case "image/jpeg":
fileData = extractJPEG(bytes, fileDetail.offset);
break;
case "application/x-msdownload":
fileData = extractMZPE(bytes, fileDetail.offset);
break;
case "application/pdf":
fileData = extractPDF(bytes, fileDetail.offset);
break;
case "application/zip":
fileData = extractZIP(bytes, fileDetail.offset);
break;
default:
throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`);
}
return new File([fileData], `extracted_at_0x${fileDetail.offset.toString(16)}.${fileDetail.ext}`);
}
/**
* JPEG extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractJPEG(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
while (stream.hasMore()) {
const marker = stream.getBytes(2);
if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker);
let segmentSize = 0;
switch (marker[1]) {
// No length
case 0xd8: // Start of Image
case 0x01: // For temporary use in arithmetic coding
break;
case 0xd9: // End found
return stream.carve();
// Variable size segment
case 0xc0: // Start of frame (Baseline DCT)
case 0xc1: // Start of frame (Extended sequential DCT)
case 0xc2: // Start of frame (Progressive DCT)
case 0xc3: // Start of frame (Lossless sequential)
case 0xc4: // Define Huffman Table
case 0xc5: // Start of frame (Differential sequential DCT)
case 0xc6: // Start of frame (Differential progressive DCT)
case 0xc7: // Start of frame (Differential lossless)
case 0xc8: // Reserved for JPEG extensions
case 0xc9: // Start of frame (Extended sequential DCT)
case 0xca: // Start of frame (Progressive DCT)
case 0xcb: // Start of frame (Lossless sequential)
case 0xcc: // Define arithmetic conditioning table
case 0xcd: // Start of frame (Differential sequential DCT)
case 0xce: // Start of frame (Differential progressive DCT)
case 0xcf: // Start of frame (Differential lossless)
case 0xdb: // Define Quantization Table
case 0xde: // Define hierarchical progression
case 0xe0: // Application-specific
case 0xe1: // Application-specific
case 0xe2: // Application-specific
case 0xe3: // Application-specific
case 0xe4: // Application-specific
case 0xe5: // Application-specific
case 0xe6: // Application-specific
case 0xe7: // Application-specific
case 0xe8: // Application-specific
case 0xe9: // Application-specific
case 0xea: // Application-specific
case 0xeb: // Application-specific
case 0xec: // Application-specific
case 0xed: // Application-specific
case 0xee: // Application-specific
case 0xef: // Application-specific
case 0xfe: // Comment
segmentSize = stream.readInt(2, "be");
stream.position += segmentSize - 2;
break;
// 1 byte
case 0xdf: // Expand reference image
stream.position++;
break;
// 2 bytes
case 0xdc: // Define number of lines
case 0xdd: // Define restart interval
stream.position += 2;
break;
// Start scan
case 0xda: // Start of scan
segmentSize = stream.readInt(2, "be");
stream.position += segmentSize - 2;
stream.continueUntil(0xff);
break;
// Continue through encoded data
case 0x00: // Byte stuffing
case 0xd0: // Restart
case 0xd1: // Restart
case 0xd2: // Restart
case 0xd3: // Restart
case 0xd4: // Restart
case 0xd5: // Restart
case 0xd6: // Restart
case 0xd7: // Restart
stream.continueUntil(0xff);
break;
default:
stream.continueUntil(0xff);
break;
}
}
throw new Error("Unable to parse JPEG successfully");
}
/**
* Portable executable extractor.
* Assumes that the offset refers to an MZ header.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractMZPE(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Move to PE header pointer
stream.moveTo(0x3c);
const peAddress = stream.readInt(4, "le");
// Move to PE header
stream.moveTo(peAddress);
// Get number of sections
stream.moveForwardsBy(6);
const numSections = stream.readInt(2, "le");
// Get optional header size
stream.moveForwardsBy(12);
const optionalHeaderSize = stream.readInt(2, "le");
// Move past optional header to section header
stream.moveForwardsBy(2 + optionalHeaderSize);
// Move to final section header
stream.moveForwardsBy((numSections - 1) * 0x28);
// Get raw data info
stream.moveForwardsBy(16);
const rawDataSize = stream.readInt(4, "le");
const rawDataAddress = stream.readInt(4, "le");
// Move to end of final section
stream.moveTo(rawDataAddress + rawDataSize);
return stream.carve();
}
/**
* PDF extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractPDF(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Find end-of-file marker (%%EOF)
stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]);
stream.moveForwardsBy(5);
stream.consumeIf(0x0d);
stream.consumeIf(0x0a);
return stream.carve();
}
/**
* ZIP extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractZIP(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Find End of central directory record
stream.continueUntil([0x50, 0x4b, 0x05, 0x06]);
// Get comment length and consume
stream.moveForwardsBy(20);
const commentLength = stream.readInt(2, "le");
stream.moveForwardsBy(commentLength);
return stream.carve();
}

808
src/core/lib/FileType.mjs Normal file
View file

@ -0,0 +1,808 @@
/**
* File type functions
*
* @author n1474335 [n1474335@gmail.com]
* @copyright Crown Copyright 2018
* @license Apache-2.0
*
*/
import Stream from "./Stream";
/**
* A categorised table of file types, including signatures to identifying them and functions
* to extract them where possible.
*/
const FILE_SIGNATURES = {
"Pictures": [
{
name: "JPEG Image",
extension: "jpg",
mime: "image/jpeg",
description: "",
signature: {
0: 0xff,
1: 0xd8,
2: 0xff
},
extractor: extractJPEG
},
{
name: "GIF Image",
extension: "gif",
mime: "image/gif",
description: "",
signature: {
0: 0x47,
1: 0x49,
2: 0x46
},
extractor: null
},
{
name: "PNG Image",
extension: "png",
mime: "image/png",
description: "",
signature: {
0: 0x89,
1: 0x50,
2: 0x4e,
3: 0x47
},
extractor: null
},
],
"Documents": [
{
name: "Portable Document Format",
extension: "pdf",
mime: "application/pdf",
description: "",
signature: {
0: 0x25,
1: 0x50,
2: 0x44,
3: 0x46
},
extractor: extractPDF
},
],
"Applications": [
{
name: "Windows Portable Executable",
extension: "exe",
mime: "application/x-msdownload",
description: "",
signature: {
0: 0x4d,
1: 0x5a
},
extractor: extractMZPE
},
],
"Archives": [
{
name: "ZIP",
extension: "zip",
mime: "application/zip",
description: "",
signature: {
0: 0x50,
1: 0x4b,
2: [0x3, 0x5, 0x7],
3: [0x4, 0x6, 0x8]
},
extractor: extractZIP
},
],
};
/**
* Checks whether a signature matches a buffer.
*
* @param {Object} sig - A dictionary of offsets with values assigned to them. These
* values can be numbers for static checks, arrays of potential valid matches, or
* bespoke functions to check the validity of the buffer value at that offset.
* @param {Uint8Array} buf
* @returns {boolean}
*/
function signatureMatches(sig, buf) {
for (const offset in sig) {
switch (typeof sig[offset]) {
case "number": // Static check
if (buf[offset] !== sig[offset])
return false;
break;
case "object": // Array of options
if (sig[offset].indexOf(buf[offset]) < 0)
return false;
break;
case "function": // More complex calculation
if (!sig[offset](buf[offset]))
return false;
break;
default:
throw new Error(`Unrecognised signature type at offset ${offset}`);
}
}
return true;
}
/**
* Given a buffer, detects magic byte sequences at specific positions and returns the
* extension and mime type.
*
* @param {Uint8Array} buf
* @returns {Object[]} type
* @returns {string} type.ext - File extension
* @returns {string} type.mime - Mime type
* @returns {string} [type.desc] - Description
*/
export function detectFileType(buf) {
if (!(buf && buf.length > 1)) {
return [];
}
const matchingFiles = [];
// TODO allow user to select which categories to check
for (const cat in FILE_SIGNATURES) {
const category = FILE_SIGNATURES[cat];
category.forEach(filetype => {
if (signatureMatches(filetype.signature, buf)) {
matchingFiles.push(filetype);
}
});
}
return matchingFiles;
// Delete all below this line once implemented in FILE_SIGNATURES above.
/*
if (buf[0] === 0xFF && buf[1] === 0xD8 && buf[2] === 0xFF) {
return {
ext: "jpg",
mime: "image/jpeg"
};
}
if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4E && buf[3] === 0x47) {
return {
ext: "png",
mime: "image/png"
};
}
if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) {
return {
ext: "gif",
mime: "image/gif"
};
}
if (buf[8] === 0x57 && buf[9] === 0x45 && buf[10] === 0x42 && buf[11] === 0x50) {
return {
ext: "webp",
mime: "image/webp"
};
}
// needs to be before `tif` check
if (((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) && buf[8] === 0x43 && buf[9] === 0x52) {
return {
ext: "cr2",
mime: "image/x-canon-cr2"
};
}
if ((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) {
return {
ext: "tif",
mime: "image/tiff"
};
}
if (buf[0] === 0x42 && buf[1] === 0x4D) {
return {
ext: "bmp",
mime: "image/bmp"
};
}
if (buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0xBC) {
return {
ext: "jxr",
mime: "image/vnd.ms-photo"
};
}
if (buf[0] === 0x38 && buf[1] === 0x42 && buf[2] === 0x50 && buf[3] === 0x53) {
return {
ext: "psd",
mime: "image/vnd.adobe.photoshop"
};
}
// needs to be before `zip` check
if (buf[0] === 0x50 && buf[1] === 0x4B && buf[2] === 0x3 && buf[3] === 0x4 && buf[30] === 0x6D && buf[31] === 0x69 && buf[32] === 0x6D && buf[33] === 0x65 && buf[34] === 0x74 && buf[35] === 0x79 && buf[36] === 0x70 && buf[37] === 0x65 && buf[38] === 0x61 && buf[39] === 0x70 && buf[40] === 0x70 && buf[41] === 0x6C && buf[42] === 0x69 && buf[43] === 0x63 && buf[44] === 0x61 && buf[45] === 0x74 && buf[46] === 0x69 && buf[47] === 0x6F && buf[48] === 0x6E && buf[49] === 0x2F && buf[50] === 0x65 && buf[51] === 0x70 && buf[52] === 0x75 && buf[53] === 0x62 && buf[54] === 0x2B && buf[55] === 0x7A && buf[56] === 0x69 && buf[57] === 0x70) {
return {
ext: "epub",
mime: "application/epub+zip"
};
}
if (buf[0] === 0x50 && buf[1] === 0x4B && (buf[2] === 0x3 || buf[2] === 0x5 || buf[2] === 0x7) && (buf[3] === 0x4 || buf[3] === 0x6 || buf[3] === 0x8)) {
return {
ext: "zip",
mime: "application/zip"
};
}
if (buf[257] === 0x75 && buf[258] === 0x73 && buf[259] === 0x74 && buf[260] === 0x61 && buf[261] === 0x72) {
return {
ext: "tar",
mime: "application/x-tar"
};
}
if (buf[0] === 0x52 && buf[1] === 0x61 && buf[2] === 0x72 && buf[3] === 0x21 && buf[4] === 0x1A && buf[5] === 0x7 && (buf[6] === 0x0 || buf[6] === 0x1)) {
return {
ext: "rar",
mime: "application/x-rar-compressed"
};
}
if (buf[0] === 0x1F && buf[1] === 0x8B && buf[2] === 0x8) {
return {
ext: "gz",
mime: "application/gzip"
};
}
if (buf[0] === 0x42 && buf[1] === 0x5A && buf[2] === 0x68) {
return {
ext: "bz2",
mime: "application/x-bzip2"
};
}
if (buf[0] === 0x37 && buf[1] === 0x7A && buf[2] === 0xBC && buf[3] === 0xAF && buf[4] === 0x27 && buf[5] === 0x1C) {
return {
ext: "7z",
mime: "application/x-7z-compressed"
};
}
if (buf[0] === 0x78 && buf[1] === 0x01) {
return {
ext: "dmg, zlib",
mime: "application/x-apple-diskimage, application/x-deflate"
};
}
if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && (buf[3] === 0x18 || buf[3] === 0x20) && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) || (buf[0] === 0x33 && buf[1] === 0x67 && buf[2] === 0x70 && buf[3] === 0x35) || (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x6D && buf[9] === 0x70 && buf[10] === 0x34 && buf[11] === 0x32 && buf[16] === 0x6D && buf[17] === 0x70 && buf[18] === 0x34 && buf[19] === 0x31 && buf[20] === 0x6D && buf[21] === 0x70 && buf[22] === 0x34 && buf[23] === 0x32 && buf[24] === 0x69 && buf[25] === 0x73 && buf[26] === 0x6F && buf[27] === 0x6D)) {
return {
ext: "mp4",
mime: "video/mp4"
};
}
if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x56)) {
return {
ext: "m4v",
mime: "video/x-m4v"
};
}
if (buf[0] === 0x4D && buf[1] === 0x54 && buf[2] === 0x68 && buf[3] === 0x64) {
return {
ext: "mid",
mime: "audio/midi"
};
}
// needs to be before the `webm` check
if (buf[31] === 0x6D && buf[32] === 0x61 && buf[33] === 0x74 && buf[34] === 0x72 && buf[35] === 0x6f && buf[36] === 0x73 && buf[37] === 0x6B && buf[38] === 0x61) {
return {
ext: "mkv",
mime: "video/x-matroska"
};
}
if (buf[0] === 0x1A && buf[1] === 0x45 && buf[2] === 0xDF && buf[3] === 0xA3) {
return {
ext: "webm",
mime: "video/webm"
};
}
if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x14 && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) {
return {
ext: "mov",
mime: "video/quicktime"
};
}
if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x41 && buf[9] === 0x56 && buf[10] === 0x49) {
return {
ext: "avi",
mime: "video/x-msvideo"
};
}
if (buf[0] === 0x30 && buf[1] === 0x26 && buf[2] === 0xB2 && buf[3] === 0x75 && buf[4] === 0x8E && buf[5] === 0x66 && buf[6] === 0xCF && buf[7] === 0x11 && buf[8] === 0xA6 && buf[9] === 0xD9) {
return {
ext: "wmv",
mime: "video/x-ms-wmv"
};
}
if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x1 && buf[3].toString(16)[0] === "b") {
return {
ext: "mpg",
mime: "video/mpeg"
};
}
if ((buf[0] === 0x49 && buf[1] === 0x44 && buf[2] === 0x33) || (buf[0] === 0xFF && buf[1] === 0xfb)) {
return {
ext: "mp3",
mime: "audio/mpeg"
};
}
if ((buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x41) || (buf[0] === 0x4D && buf[1] === 0x34 && buf[2] === 0x41 && buf[3] === 0x20)) {
return {
ext: "m4a",
mime: "audio/m4a"
};
}
if (buf[0] === 0x4F && buf[1] === 0x67 && buf[2] === 0x67 && buf[3] === 0x53) {
return {
ext: "ogg",
mime: "audio/ogg"
};
}
if (buf[0] === 0x66 && buf[1] === 0x4C && buf[2] === 0x61 && buf[3] === 0x43) {
return {
ext: "flac",
mime: "audio/x-flac"
};
}
if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x57 && buf[9] === 0x41 && buf[10] === 0x56 && buf[11] === 0x45) {
return {
ext: "wav",
mime: "audio/x-wav"
};
}
if (buf[0] === 0x23 && buf[1] === 0x21 && buf[2] === 0x41 && buf[3] === 0x4D && buf[4] === 0x52 && buf[5] === 0x0A) {
return {
ext: "amr",
mime: "audio/amr"
};
}
if (buf[0] === 0x25 && buf[1] === 0x50 && buf[2] === 0x44 && buf[3] === 0x46) {
return {
ext: "pdf",
mime: "application/pdf"
};
}
if (buf[0] === 0x4D && buf[1] === 0x5A) {
return {
ext: "exe",
mime: "application/x-msdownload"
};
}
if ((buf[0] === 0x43 || buf[0] === 0x46) && buf[1] === 0x57 && buf[2] === 0x53) {
return {
ext: "swf",
mime: "application/x-shockwave-flash"
};
}
if (buf[0] === 0x7B && buf[1] === 0x5C && buf[2] === 0x72 && buf[3] === 0x74 && buf[4] === 0x66) {
return {
ext: "rtf",
mime: "application/rtf"
};
}
if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x46 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) {
return {
ext: "woff",
mime: "application/font-woff"
};
}
if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x32 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) {
return {
ext: "woff2",
mime: "application/font-woff"
};
}
if (buf[34] === 0x4C && buf[35] === 0x50 && ((buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x01) || (buf[8] === 0x01 && buf[9] === 0x00 && buf[10] === 0x00) || (buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x02))) {
return {
ext: "eot",
mime: "application/octet-stream"
};
}
if (buf[0] === 0x00 && buf[1] === 0x01 && buf[2] === 0x00 && buf[3] === 0x00 && buf[4] === 0x00) {
return {
ext: "ttf",
mime: "application/font-sfnt"
};
}
if (buf[0] === 0x4F && buf[1] === 0x54 && buf[2] === 0x54 && buf[3] === 0x4F && buf[4] === 0x00) {
return {
ext: "otf",
mime: "application/font-sfnt"
};
}
if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) {
return {
ext: "ico",
mime: "image/x-icon"
};
}
if (buf[0] === 0x46 && buf[1] === 0x4C && buf[2] === 0x56 && buf[3] === 0x01) {
return {
ext: "flv",
mime: "video/x-flv"
};
}
if (buf[0] === 0x25 && buf[1] === 0x21) {
return {
ext: "ps",
mime: "application/postscript"
};
}
if (buf[0] === 0xFD && buf[1] === 0x37 && buf[2] === 0x7A && buf[3] === 0x58 && buf[4] === 0x5A && buf[5] === 0x00) {
return {
ext: "xz",
mime: "application/x-xz"
};
}
if (buf[0] === 0x53 && buf[1] === 0x51 && buf[2] === 0x4C && buf[3] === 0x69) {
return {
ext: "sqlite",
mime: "application/x-sqlite3"
};
}
*/
/**
*
* Added by n1474335 [n1474335@gmail.com] from here on
*
*/
/*
if ((buf[0] === 0x1F && buf[1] === 0x9D) || (buf[0] === 0x1F && buf[1] === 0xA0)) {
return {
ext: "z, tar.z",
mime: "application/x-gtar"
};
}
if (buf[0] === 0x7F && buf[1] === 0x45 && buf[2] === 0x4C && buf[3] === 0x46) {
return {
ext: "none, axf, bin, elf, o, prx, puff, so",
mime: "application/x-executable",
desc: "Executable and Linkable Format file. No standard file extension."
};
}
if (buf[0] === 0xCA && buf[1] === 0xFE && buf[2] === 0xBA && buf[3] === 0xBE) {
return {
ext: "class",
mime: "application/java-vm"
};
}
if (buf[0] === 0xEF && buf[1] === 0xBB && buf[2] === 0xBF) {
return {
ext: "txt",
mime: "text/plain",
desc: "UTF-8 encoded Unicode byte order mark detected, commonly but not exclusively seen in text files."
};
}
// Must be before Little-endian UTF-16 BOM
if (buf[0] === 0xFF && buf[1] === 0xFE && buf[2] === 0x00 && buf[3] === 0x00) {
return {
ext: "UTF32LE",
mime: "charset/utf32le",
desc: "Little-endian UTF-32 encoded Unicode byte order mark detected."
};
}
if (buf[0] === 0xFF && buf[1] === 0xFE) {
return {
ext: "UTF16LE",
mime: "charset/utf16le",
desc: "Little-endian UTF-16 encoded Unicode byte order mark detected."
};
}
if ((buf[0x8001] === 0x43 && buf[0x8002] === 0x44 && buf[0x8003] === 0x30 && buf[0x8004] === 0x30 && buf[0x8005] === 0x31) ||
(buf[0x8801] === 0x43 && buf[0x8802] === 0x44 && buf[0x8803] === 0x30 && buf[0x8804] === 0x30 && buf[0x8805] === 0x31) ||
(buf[0x9001] === 0x43 && buf[0x9002] === 0x44 && buf[0x9003] === 0x30 && buf[0x9004] === 0x30 && buf[0x9005] === 0x31)) {
return {
ext: "iso",
mime: "application/octet-stream",
desc: "ISO 9660 CD/DVD image file"
};
}
if (buf[0] === 0xD0 && buf[1] === 0xCF && buf[2] === 0x11 && buf[3] === 0xE0 && buf[4] === 0xA1 && buf[5] === 0xB1 && buf[6] === 0x1A && buf[7] === 0xE1) {
return {
ext: "doc, xls, ppt",
mime: "application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint",
desc: "Microsoft Office documents"
};
}
if (buf[0] === 0x64 && buf[1] === 0x65 && buf[2] === 0x78 && buf[3] === 0x0A && buf[4] === 0x30 && buf[5] === 0x33 && buf[6] === 0x35 && buf[7] === 0x00) {
return {
ext: "dex",
mime: "application/octet-stream",
desc: "Dalvik Executable (Android)"
};
}
if (buf[0] === 0x4B && buf[1] === 0x44 && buf[2] === 0x4D) {
return {
ext: "vmdk",
mime: "application/vmdk, application/x-virtualbox-vmdk"
};
}
if (buf[0] === 0x43 && buf[1] === 0x72 && buf[2] === 0x32 && buf[3] === 0x34) {
return {
ext: "crx",
mime: "application/crx",
desc: "Google Chrome extension or packaged app"
};
}
if (buf[0] === 0x78 && (buf[1] === 0x01 || buf[1] === 0x9C || buf[1] === 0xDA || buf[1] === 0x5e)) {
return {
ext: "zlib",
mime: "application/x-deflate"
};
}
return null;
*/
}
/**
* Attempts to extract a file from a data stream given its offset and extractor function.
*
* @param {Uint8Array} bytes
* @param {Object} fileDetail
* @param {string} fileDetail.mime
* @param {string} fileDetail.extension
* @param {Function} fileDetail.extractor
* @param {number} offset
* @returns {File}
*/
export function extractFile(bytes, fileDetail, offset) {
if (fileDetail.extractor) {
const fileData = fileDetail.extractor(bytes, offset);
return new File([fileData], `extracted_at_0x${offset.toString(16)}.${fileDetail.extension}`);
}
throw new Error(`No extraction algorithm available for "${fileDetail.mime}" files`);
}
/**
* JPEG extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractJPEG(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
while (stream.hasMore()) {
const marker = stream.getBytes(2);
if (marker[0] !== 0xff) throw new Error("Invalid JPEG marker: " + marker);
let segmentSize = 0;
switch (marker[1]) {
// No length
case 0xd8: // Start of Image
case 0x01: // For temporary use in arithmetic coding
break;
case 0xd9: // End found
return stream.carve();
// Variable size segment
case 0xc0: // Start of frame (Baseline DCT)
case 0xc1: // Start of frame (Extended sequential DCT)
case 0xc2: // Start of frame (Progressive DCT)
case 0xc3: // Start of frame (Lossless sequential)
case 0xc4: // Define Huffman Table
case 0xc5: // Start of frame (Differential sequential DCT)
case 0xc6: // Start of frame (Differential progressive DCT)
case 0xc7: // Start of frame (Differential lossless)
case 0xc8: // Reserved for JPEG extensions
case 0xc9: // Start of frame (Extended sequential DCT)
case 0xca: // Start of frame (Progressive DCT)
case 0xcb: // Start of frame (Lossless sequential)
case 0xcc: // Define arithmetic conditioning table
case 0xcd: // Start of frame (Differential sequential DCT)
case 0xce: // Start of frame (Differential progressive DCT)
case 0xcf: // Start of frame (Differential lossless)
case 0xdb: // Define Quantization Table
case 0xde: // Define hierarchical progression
case 0xe0: // Application-specific
case 0xe1: // Application-specific
case 0xe2: // Application-specific
case 0xe3: // Application-specific
case 0xe4: // Application-specific
case 0xe5: // Application-specific
case 0xe6: // Application-specific
case 0xe7: // Application-specific
case 0xe8: // Application-specific
case 0xe9: // Application-specific
case 0xea: // Application-specific
case 0xeb: // Application-specific
case 0xec: // Application-specific
case 0xed: // Application-specific
case 0xee: // Application-specific
case 0xef: // Application-specific
case 0xfe: // Comment
segmentSize = stream.readInt(2, "be");
stream.position += segmentSize - 2;
break;
// 1 byte
case 0xdf: // Expand reference image
stream.position++;
break;
// 2 bytes
case 0xdc: // Define number of lines
case 0xdd: // Define restart interval
stream.position += 2;
break;
// Start scan
case 0xda: // Start of scan
segmentSize = stream.readInt(2, "be");
stream.position += segmentSize - 2;
stream.continueUntil(0xff);
break;
// Continue through encoded data
case 0x00: // Byte stuffing
case 0xd0: // Restart
case 0xd1: // Restart
case 0xd2: // Restart
case 0xd3: // Restart
case 0xd4: // Restart
case 0xd5: // Restart
case 0xd6: // Restart
case 0xd7: // Restart
stream.continueUntil(0xff);
break;
default:
stream.continueUntil(0xff);
break;
}
}
throw new Error("Unable to parse JPEG successfully");
}
/**
* Portable executable extractor.
* Assumes that the offset refers to an MZ header.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractMZPE(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Move to PE header pointer
stream.moveTo(0x3c);
const peAddress = stream.readInt(4, "le");
// Move to PE header
stream.moveTo(peAddress);
// Get number of sections
stream.moveForwardsBy(6);
const numSections = stream.readInt(2, "le");
// Get optional header size
stream.moveForwardsBy(12);
const optionalHeaderSize = stream.readInt(2, "le");
// Move past optional header to section header
stream.moveForwardsBy(2 + optionalHeaderSize);
// Move to final section header
stream.moveForwardsBy((numSections - 1) * 0x28);
// Get raw data info
stream.moveForwardsBy(16);
const rawDataSize = stream.readInt(4, "le");
const rawDataAddress = stream.readInt(4, "le");
// Move to end of final section
stream.moveTo(rawDataAddress + rawDataSize);
return stream.carve();
}
/**
* PDF extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractPDF(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Find end-of-file marker (%%EOF)
stream.continueUntil([0x25, 0x25, 0x45, 0x4f, 0x46]);
stream.moveForwardsBy(5);
stream.consumeIf(0x0d);
stream.consumeIf(0x0a);
return stream.carve();
}
/**
* ZIP extractor.
*
* @param {Uint8Array} bytes
* @param {number} offset
* @returns {Uint8Array}
*/
export function extractZIP(bytes, offset) {
const stream = new Stream(bytes.slice(offset));
// Find End of central directory record
stream.continueUntil([0x50, 0x4b, 0x05, 0x06]);
// Get comment length and consume
stream.moveForwardsBy(20);
const commentLength = stream.readInt(2, "le");
stream.moveForwardsBy(commentLength);
return stream.carve();
}

View file

@ -2,6 +2,7 @@ import OperationConfig from "../config/OperationConfig.json";
import Utils from "../Utils";
import Recipe from "../Recipe";
import Dish from "../Dish";
import {detectFileType} from "./FileType";
import chiSquared from "chi-squared";
/**
@ -92,7 +93,14 @@ class Magic {
* @returns {string} [type.desc] - Description
*/
detectFileType() {
return Magic.magicFileType(this.inputBuffer);
const fileType = detectFileType(this.inputBuffer);
if (!fileType.length) return null;
return {
ext: fileType[0].extension,
mime: fileType[0].mime,
desc: fileType[0].description
};
}
/**
@ -784,452 +792,9 @@ class Magic {
}[code];
}
/**
* Given a buffer, detects magic byte sequences at specific positions and returns the
* extension and mime type.
*
* @param {Uint8Array} buf
* @returns {Object} type
* @returns {string} type.ext - File extension
* @returns {string} type.mime - Mime type
* @returns {string} [type.desc] - Description
*/
static magicFileType(buf) {
if (!(buf && buf.length > 1)) {
return null;
}
if (buf[0] === 0xFF && buf[1] === 0xD8 && buf[2] === 0xFF) {
return {
ext: "jpg",
mime: "image/jpeg"
};
}
if (buf[0] === 0x89 && buf[1] === 0x50 && buf[2] === 0x4E && buf[3] === 0x47) {
return {
ext: "png",
mime: "image/png"
};
}
if (buf[0] === 0x47 && buf[1] === 0x49 && buf[2] === 0x46) {
return {
ext: "gif",
mime: "image/gif"
};
}
if (buf[8] === 0x57 && buf[9] === 0x45 && buf[10] === 0x42 && buf[11] === 0x50) {
return {
ext: "webp",
mime: "image/webp"
};
}
// needs to be before `tif` check
if (((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) && buf[8] === 0x43 && buf[9] === 0x52) {
return {
ext: "cr2",
mime: "image/x-canon-cr2"
};
}
if ((buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0x2A && buf[3] === 0x0) || (buf[0] === 0x4D && buf[1] === 0x4D && buf[2] === 0x0 && buf[3] === 0x2A)) {
return {
ext: "tif",
mime: "image/tiff"
};
}
if (buf[0] === 0x42 && buf[1] === 0x4D) {
return {
ext: "bmp",
mime: "image/bmp"
};
}
if (buf[0] === 0x49 && buf[1] === 0x49 && buf[2] === 0xBC) {
return {
ext: "jxr",
mime: "image/vnd.ms-photo"
};
}
if (buf[0] === 0x38 && buf[1] === 0x42 && buf[2] === 0x50 && buf[3] === 0x53) {
return {
ext: "psd",
mime: "image/vnd.adobe.photoshop"
};
}
// needs to be before `zip` check
if (buf[0] === 0x50 && buf[1] === 0x4B && buf[2] === 0x3 && buf[3] === 0x4 && buf[30] === 0x6D && buf[31] === 0x69 && buf[32] === 0x6D && buf[33] === 0x65 && buf[34] === 0x74 && buf[35] === 0x79 && buf[36] === 0x70 && buf[37] === 0x65 && buf[38] === 0x61 && buf[39] === 0x70 && buf[40] === 0x70 && buf[41] === 0x6C && buf[42] === 0x69 && buf[43] === 0x63 && buf[44] === 0x61 && buf[45] === 0x74 && buf[46] === 0x69 && buf[47] === 0x6F && buf[48] === 0x6E && buf[49] === 0x2F && buf[50] === 0x65 && buf[51] === 0x70 && buf[52] === 0x75 && buf[53] === 0x62 && buf[54] === 0x2B && buf[55] === 0x7A && buf[56] === 0x69 && buf[57] === 0x70) {
return {
ext: "epub",
mime: "application/epub+zip"
};
}
if (buf[0] === 0x50 && buf[1] === 0x4B && (buf[2] === 0x3 || buf[2] === 0x5 || buf[2] === 0x7) && (buf[3] === 0x4 || buf[3] === 0x6 || buf[3] === 0x8)) {
return {
ext: "zip",
mime: "application/zip"
};
}
if (buf[257] === 0x75 && buf[258] === 0x73 && buf[259] === 0x74 && buf[260] === 0x61 && buf[261] === 0x72) {
return {
ext: "tar",
mime: "application/x-tar"
};
}
if (buf[0] === 0x52 && buf[1] === 0x61 && buf[2] === 0x72 && buf[3] === 0x21 && buf[4] === 0x1A && buf[5] === 0x7 && (buf[6] === 0x0 || buf[6] === 0x1)) {
return {
ext: "rar",
mime: "application/x-rar-compressed"
};
}
if (buf[0] === 0x1F && buf[1] === 0x8B && buf[2] === 0x8) {
return {
ext: "gz",
mime: "application/gzip"
};
}
if (buf[0] === 0x42 && buf[1] === 0x5A && buf[2] === 0x68) {
return {
ext: "bz2",
mime: "application/x-bzip2"
};
}
if (buf[0] === 0x37 && buf[1] === 0x7A && buf[2] === 0xBC && buf[3] === 0xAF && buf[4] === 0x27 && buf[5] === 0x1C) {
return {
ext: "7z",
mime: "application/x-7z-compressed"
};
}
if (buf[0] === 0x78 && buf[1] === 0x01) {
return {
ext: "dmg, zlib",
mime: "application/x-apple-diskimage, application/x-deflate"
};
}
if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && (buf[3] === 0x18 || buf[3] === 0x20) && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) || (buf[0] === 0x33 && buf[1] === 0x67 && buf[2] === 0x70 && buf[3] === 0x35) || (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x6D && buf[9] === 0x70 && buf[10] === 0x34 && buf[11] === 0x32 && buf[16] === 0x6D && buf[17] === 0x70 && buf[18] === 0x34 && buf[19] === 0x31 && buf[20] === 0x6D && buf[21] === 0x70 && buf[22] === 0x34 && buf[23] === 0x32 && buf[24] === 0x69 && buf[25] === 0x73 && buf[26] === 0x6F && buf[27] === 0x6D)) {
return {
ext: "mp4",
mime: "video/mp4"
};
}
if ((buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x1C && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x56)) {
return {
ext: "m4v",
mime: "video/x-m4v"
};
}
if (buf[0] === 0x4D && buf[1] === 0x54 && buf[2] === 0x68 && buf[3] === 0x64) {
return {
ext: "mid",
mime: "audio/midi"
};
}
// needs to be before the `webm` check
if (buf[31] === 0x6D && buf[32] === 0x61 && buf[33] === 0x74 && buf[34] === 0x72 && buf[35] === 0x6f && buf[36] === 0x73 && buf[37] === 0x6B && buf[38] === 0x61) {
return {
ext: "mkv",
mime: "video/x-matroska"
};
}
if (buf[0] === 0x1A && buf[1] === 0x45 && buf[2] === 0xDF && buf[3] === 0xA3) {
return {
ext: "webm",
mime: "video/webm"
};
}
if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x0 && buf[3] === 0x14 && buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70) {
return {
ext: "mov",
mime: "video/quicktime"
};
}
if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x41 && buf[9] === 0x56 && buf[10] === 0x49) {
return {
ext: "avi",
mime: "video/x-msvideo"
};
}
if (buf[0] === 0x30 && buf[1] === 0x26 && buf[2] === 0xB2 && buf[3] === 0x75 && buf[4] === 0x8E && buf[5] === 0x66 && buf[6] === 0xCF && buf[7] === 0x11 && buf[8] === 0xA6 && buf[9] === 0xD9) {
return {
ext: "wmv",
mime: "video/x-ms-wmv"
};
}
if (buf[0] === 0x0 && buf[1] === 0x0 && buf[2] === 0x1 && buf[3].toString(16)[0] === "b") {
return {
ext: "mpg",
mime: "video/mpeg"
};
}
if ((buf[0] === 0x49 && buf[1] === 0x44 && buf[2] === 0x33) || (buf[0] === 0xFF && buf[1] === 0xfb)) {
return {
ext: "mp3",
mime: "audio/mpeg"
};
}
if ((buf[4] === 0x66 && buf[5] === 0x74 && buf[6] === 0x79 && buf[7] === 0x70 && buf[8] === 0x4D && buf[9] === 0x34 && buf[10] === 0x41) || (buf[0] === 0x4D && buf[1] === 0x34 && buf[2] === 0x41 && buf[3] === 0x20)) {
return {
ext: "m4a",
mime: "audio/m4a"
};
}
if (buf[0] === 0x4F && buf[1] === 0x67 && buf[2] === 0x67 && buf[3] === 0x53) {
return {
ext: "ogg",
mime: "audio/ogg"
};
}
if (buf[0] === 0x66 && buf[1] === 0x4C && buf[2] === 0x61 && buf[3] === 0x43) {
return {
ext: "flac",
mime: "audio/x-flac"
};
}
if (buf[0] === 0x52 && buf[1] === 0x49 && buf[2] === 0x46 && buf[3] === 0x46 && buf[8] === 0x57 && buf[9] === 0x41 && buf[10] === 0x56 && buf[11] === 0x45) {
return {
ext: "wav",
mime: "audio/x-wav"
};
}
if (buf[0] === 0x23 && buf[1] === 0x21 && buf[2] === 0x41 && buf[3] === 0x4D && buf[4] === 0x52 && buf[5] === 0x0A) {
return {
ext: "amr",
mime: "audio/amr"
};
}
if (buf[0] === 0x25 && buf[1] === 0x50 && buf[2] === 0x44 && buf[3] === 0x46) {
return {
ext: "pdf",
mime: "application/pdf"
};
}
if (buf[0] === 0x4D && buf[1] === 0x5A) {
return {
ext: "exe",
mime: "application/x-msdownload"
};
}
if ((buf[0] === 0x43 || buf[0] === 0x46) && buf[1] === 0x57 && buf[2] === 0x53) {
return {
ext: "swf",
mime: "application/x-shockwave-flash"
};
}
if (buf[0] === 0x7B && buf[1] === 0x5C && buf[2] === 0x72 && buf[3] === 0x74 && buf[4] === 0x66) {
return {
ext: "rtf",
mime: "application/rtf"
};
}
if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x46 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) {
return {
ext: "woff",
mime: "application/font-woff"
};
}
if (buf[0] === 0x77 && buf[1] === 0x4F && buf[2] === 0x46 && buf[3] === 0x32 && buf[4] === 0x00 && buf[5] === 0x01 && buf[6] === 0x00 && buf[7] === 0x00) {
return {
ext: "woff2",
mime: "application/font-woff"
};
}
if (buf[34] === 0x4C && buf[35] === 0x50 && ((buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x01) || (buf[8] === 0x01 && buf[9] === 0x00 && buf[10] === 0x00) || (buf[8] === 0x02 && buf[9] === 0x00 && buf[10] === 0x02))) {
return {
ext: "eot",
mime: "application/octet-stream"
};
}
if (buf[0] === 0x00 && buf[1] === 0x01 && buf[2] === 0x00 && buf[3] === 0x00 && buf[4] === 0x00) {
return {
ext: "ttf",
mime: "application/font-sfnt"
};
}
if (buf[0] === 0x4F && buf[1] === 0x54 && buf[2] === 0x54 && buf[3] === 0x4F && buf[4] === 0x00) {
return {
ext: "otf",
mime: "application/font-sfnt"
};
}
if (buf[0] === 0x00 && buf[1] === 0x00 && buf[2] === 0x01 && buf[3] === 0x00) {
return {
ext: "ico",
mime: "image/x-icon"
};
}
if (buf[0] === 0x46 && buf[1] === 0x4C && buf[2] === 0x56 && buf[3] === 0x01) {
return {
ext: "flv",
mime: "video/x-flv"
};
}
if (buf[0] === 0x25 && buf[1] === 0x21) {
return {
ext: "ps",
mime: "application/postscript"
};
}
if (buf[0] === 0xFD && buf[1] === 0x37 && buf[2] === 0x7A && buf[3] === 0x58 && buf[4] === 0x5A && buf[5] === 0x00) {
return {
ext: "xz",
mime: "application/x-xz"
};
}
if (buf[0] === 0x53 && buf[1] === 0x51 && buf[2] === 0x4C && buf[3] === 0x69) {
return {
ext: "sqlite",
mime: "application/x-sqlite3"
};
}
/**
*
* Added by n1474335 [n1474335@gmail.com] from here on
*
*/
if ((buf[0] === 0x1F && buf[1] === 0x9D) || (buf[0] === 0x1F && buf[1] === 0xA0)) {
return {
ext: "z, tar.z",
mime: "application/x-gtar"
};
}
if (buf[0] === 0x7F && buf[1] === 0x45 && buf[2] === 0x4C && buf[3] === 0x46) {
return {
ext: "none, axf, bin, elf, o, prx, puff, so",
mime: "application/x-executable",
desc: "Executable and Linkable Format file. No standard file extension."
};
}
if (buf[0] === 0xCA && buf[1] === 0xFE && buf[2] === 0xBA && buf[3] === 0xBE) {
return {
ext: "class",
mime: "application/java-vm"
};
}
if (buf[0] === 0xEF && buf[1] === 0xBB && buf[2] === 0xBF) {
return {
ext: "txt",
mime: "text/plain",
desc: "UTF-8 encoded Unicode byte order mark detected, commonly but not exclusively seen in text files."
};
}
// Must be before Little-endian UTF-16 BOM
if (buf[0] === 0xFF && buf[1] === 0xFE && buf[2] === 0x00 && buf[3] === 0x00) {
return {
ext: "UTF32LE",
mime: "charset/utf32le",
desc: "Little-endian UTF-32 encoded Unicode byte order mark detected."
};
}
if (buf[0] === 0xFF && buf[1] === 0xFE) {
return {
ext: "UTF16LE",
mime: "charset/utf16le",
desc: "Little-endian UTF-16 encoded Unicode byte order mark detected."
};
}
if ((buf[0x8001] === 0x43 && buf[0x8002] === 0x44 && buf[0x8003] === 0x30 && buf[0x8004] === 0x30 && buf[0x8005] === 0x31) ||
(buf[0x8801] === 0x43 && buf[0x8802] === 0x44 && buf[0x8803] === 0x30 && buf[0x8804] === 0x30 && buf[0x8805] === 0x31) ||
(buf[0x9001] === 0x43 && buf[0x9002] === 0x44 && buf[0x9003] === 0x30 && buf[0x9004] === 0x30 && buf[0x9005] === 0x31)) {
return {
ext: "iso",
mime: "application/octet-stream",
desc: "ISO 9660 CD/DVD image file"
};
}
if (buf[0] === 0xD0 && buf[1] === 0xCF && buf[2] === 0x11 && buf[3] === 0xE0 && buf[4] === 0xA1 && buf[5] === 0xB1 && buf[6] === 0x1A && buf[7] === 0xE1) {
return {
ext: "doc, xls, ppt",
mime: "application/msword, application/vnd.ms-excel, application/vnd.ms-powerpoint",
desc: "Microsoft Office documents"
};
}
if (buf[0] === 0x64 && buf[1] === 0x65 && buf[2] === 0x78 && buf[3] === 0x0A && buf[4] === 0x30 && buf[5] === 0x33 && buf[6] === 0x35 && buf[7] === 0x00) {
return {
ext: "dex",
mime: "application/octet-stream",
desc: "Dalvik Executable (Android)"
};
}
if (buf[0] === 0x4B && buf[1] === 0x44 && buf[2] === 0x4D) {
return {
ext: "vmdk",
mime: "application/vmdk, application/x-virtualbox-vmdk"
};
}
if (buf[0] === 0x43 && buf[1] === 0x72 && buf[2] === 0x32 && buf[3] === 0x34) {
return {
ext: "crx",
mime: "application/crx",
desc: "Google Chrome extension or packaged app"
};
}
if (buf[0] === 0x78 && (buf[1] === 0x01 || buf[1] === 0x9C || buf[1] === 0xDA || buf[1] === 0x5e)) {
return {
ext: "zlib",
mime: "application/x-deflate"
};
}
return null;
}
}
/**
* Byte frequencies of various languages generated from Wikipedia dumps taken in late 2017 and early 2018.
* The Chi-Squared test cannot accept expected values of 0, so 0.0001 has been used to account for bytes

View file

@ -11,13 +11,6 @@
/**
* A Stream can be used to traverse a binary blob, interpreting sections of it
* as various data types.
*
* @param {Uint8Array} bytes
* @param {Object} fileDetail
* @param {string} fileDetail.mime
* @param {string} fileDetail.ext
* @param {number} fileDetail.offset
* @returns {File}
*/
export default class Stream {

View file

@ -5,7 +5,7 @@
*/
import Operation from "../Operation";
import Magic from "../lib/Magic";
import {detectFileType} from "../lib/FileType";
/**
* Detect File Type operation
@ -34,17 +34,21 @@ class DetectFileType extends Operation {
*/
run(input, args) {
const data = new Uint8Array(input),
type = Magic.magicFileType(data);
types = detectFileType(data);
if (!type) {
if (!types.length) {
return "Unknown file type. Have you tried checking the entropy of this data to determine whether it might be encrypted or compressed?";
} else {
let output = "File extension: " + type.ext + "\n" +
"MIME type: " + type.mime;
let output;
if (type.desc && type.desc.length) {
output += "\nDescription: " + type.desc;
}
types.forEach(type => {
output += "File extension: " + type.extension + "\n" +
"MIME type: " + type.mime + "\n";
if (type.description && type.description.length) {
output += "\nDescription: " + type.description + "\n";
}
});
return output;
}

View file

@ -6,9 +6,8 @@
import Operation from "../Operation";
// import OperationError from "../errors/OperationError";
import Magic from "../lib/Magic";
import Utils from "../Utils";
import {extractFile} from "../lib/FileExtraction";
import {detectFileType, extractFile} from "../lib/FileType";
/**
* Extract Files operation
@ -40,13 +39,13 @@ class ExtractFiles extends Operation {
const bytes = new Uint8Array(input);
// Scan for embedded files
const fileDetails = scanForEmbeddedFiles(bytes);
const detectedFiles = scanForEmbeddedFiles(bytes);
// Extract each file that we support
const files = [];
fileDetails.forEach(fileDetail => {
detectedFiles.forEach(detectedFile => {
try {
files.push(extractFile(bytes, fileDetail));
files.push(extractFile(bytes, detectedFile.fileDetails, detectedFile.offset));
} catch (err) {}
});
@ -70,22 +69,21 @@ class ExtractFiles extends Operation {
* @param data
*/
function scanForEmbeddedFiles(data) {
let type;
const types = [];
const detectedFiles = [];
for (let i = 0; i < data.length; i++) {
type = Magic.magicFileType(data.slice(i));
if (type) {
types.push({
offset: i,
ext: type.ext,
mime: type.mime,
desc: type.desc
const fileDetails = detectFileType(data.slice(i));
if (fileDetails.length) {
fileDetails.forEach(match => {
detectedFiles.push({
offset: i,
fileDetails: match,
});
});
}
}
return types;
return detectedFiles;
}
export default ExtractFiles;

View file

@ -9,7 +9,7 @@ import { fromHex } from "../lib/Hex";
import Operation from "../Operation";
import OperationError from "../errors/OperationError";
import Utils from "../Utils";
import Magic from "../lib/Magic";
import {detectFileType} from "../lib/FileType";
/**
* Render Image operation
@ -72,8 +72,8 @@ class RenderImage extends Operation {
}
// Determine file type
const type = Magic.magicFileType(input);
if (!(type && type.mime.indexOf("image") === 0)) {
const types = detectFileType(input);
if (!(types.length && types[0].mime.indexOf("image") === 0)) {
throw new OperationError("Invalid file type");
}
@ -92,9 +92,9 @@ class RenderImage extends Operation {
let dataURI = "data:";
// Determine file type
const type = Magic.magicFileType(data);
if (type && type.mime.indexOf("image") === 0) {
dataURI += type.mime + ";";
const types = detectFileType(data);
if (types.length && types[0].mime.indexOf("image") === 0) {
dataURI += types[0].mime + ";";
} else {
throw new OperationError("Invalid file type");
}

View file

@ -6,7 +6,7 @@
import Operation from "../Operation";
import Utils from "../Utils";
import Magic from "../lib/Magic";
import {detectFileType} from "../lib/FileType";
/**
* Scan for Embedded Files operation
@ -41,7 +41,7 @@ class ScanForEmbeddedFiles extends Operation {
*/
run(input, args) {
let output = "Scanning data for 'magic bytes' which may indicate embedded files. The following results may be false positives and should not be treat as reliable. Any suffiently long file is likely to contain these magic bytes coincidentally.\n",
type,
types,
numFound = 0,
numCommonFound = 0;
const ignoreCommon = args[0],
@ -49,20 +49,23 @@ class ScanForEmbeddedFiles extends Operation {
data = new Uint8Array(input);
for (let i = 0; i < data.length; i++) {
type = Magic.magicFileType(data.slice(i));
if (type) {
if (ignoreCommon && commonExts.indexOf(type.ext) > -1) {
numCommonFound++;
continue;
}
numFound++;
output += "\nOffset " + i + " (0x" + Utils.hex(i) + "):\n" +
" File extension: " + type.ext + "\n" +
" MIME type: " + type.mime + "\n";
types = detectFileType(data.slice(i));
if (types.length) {
types.forEach(type => {
if (ignoreCommon && commonExts.indexOf(type.extension) > -1) {
numCommonFound++;
return;
}
if (type.desc && type.desc.length) {
output += " Description: " + type.desc + "\n";
}
numFound++;
output += "\nOffset " + i + " (0x" + Utils.hex(i) + "):\n" +
" File extension: " + type.extension + "\n" +
" MIME type: " + type.mime + "\n";
if (type.description && type.description.length) {
output += " Description: " + type.description + "\n";
}
});
}
}