2017-03-23 17:52:20 +00:00
import Utils from "../Utils.js" ;
2017-03-06 12:45:51 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* Entropy operations .
*
* @ author n1474335 [ n1474335 @ gmail . com ]
* @ copyright Crown Copyright 2016
* @ license Apache - 2.0
*
* @ namespace
* /
2017-03-23 17:52:20 +00:00
const Entropy = {
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* @ constant
* @ default
* /
CHUNK _SIZE : 1000 ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* Entropy operation .
*
2017-01-31 18:24:56 +00:00
* @ param { byteArray } input
2016-11-28 10:42:58 +00:00
* @ param { Object [ ] } args
* @ returns { html }
* /
2017-01-31 18:24:56 +00:00
runEntropy : function ( input , args ) {
2017-04-13 17:08:50 +00:00
let chunkSize = args [ 0 ] ,
2016-11-28 10:42:58 +00:00
output = "" ,
2017-01-31 18:24:56 +00:00
entropy = Entropy . _calcEntropy ( input ) ;
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
output += "Shannon entropy: " + entropy + "\n" +
"<br><canvas id='chart-area'></canvas><br>\n" +
"- 0 represents no randomness (i.e. all the bytes in the data have the same value) whereas 8, the maximum, represents a completely random string.\n" +
"- Standard English text usually falls somewhere between 3.5 and 5.\n" +
"- Properly encrypted or compressed data of a reasonable length should have an entropy of over 7.5.\n\n" +
"The following results show the entropy of chunks of the input data. Chunks with particularly high entropy could suggest encrypted or compressed sections.\n\n" +
" < br > < script > \
var canvas = document . getElementById ( 'chart-area' ) , \
2017-01-31 18:24:56 +00:00
parentRect = canvas . parentNode . getBoundingClientRect ( ) , \
2016-11-28 10:42:58 +00:00
entropy = " + entropy + " , \
2017-01-31 18:24:56 +00:00
height = parentRect . height * 0.25 ; \
2016-11-28 10:42:58 +00:00
\
2017-01-31 18:24:56 +00:00
canvas . width = parentRect . width * 0.95 ; \
2016-11-28 10:42:58 +00:00
canvas . height = height > 150 ? 150 : height ; \
\
2017-01-31 18:24:56 +00:00
CanvasComponents . drawScaleBar ( canvas , entropy , 8 , [ \
2016-11-28 10:42:58 +00:00
{ \
label : 'English text' , \
min : 3.5 , \
max : 5 \
} , { \
label : 'Encrypted/compressed' , \
min : 7.5 , \
max : 8 \
} \
] ) ; \
< / s c r i p t > " ;
2017-02-09 15:09:33 +00:00
2017-04-13 17:08:50 +00:00
let chunkEntropy = 0 ;
2017-01-31 18:24:56 +00:00
if ( chunkSize !== 0 ) {
2017-04-13 17:08:50 +00:00
for ( let i = 0 ; i < input . length ; i += chunkSize ) {
2017-01-31 18:24:56 +00:00
chunkEntropy = Entropy . _calcEntropy ( input . slice ( i , i + chunkSize ) ) ;
output += "Bytes " + i + " to " + ( i + chunkSize ) + ": " + chunkEntropy + "\n" ;
2016-11-28 10:42:58 +00:00
}
} else {
output += "Chunk size cannot be 0." ;
}
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
return output ;
} ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* @ constant
* @ default
* /
FREQ _ZEROS : false ,
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
/ * *
* Frequency distribution operation .
*
2017-12-28 00:24:16 +00:00
* @ param { ArrayBuffer } input
2016-11-28 10:42:58 +00:00
* @ param { Object [ ] } args
* @ returns { html }
* /
2017-01-31 18:24:56 +00:00
runFreqDistrib : function ( input , args ) {
2017-12-28 00:24:16 +00:00
const data = new Uint8Array ( input ) ;
if ( ! data . length ) return "No data" ;
2017-02-09 15:09:33 +00:00
2017-04-13 17:43:38 +00:00
let distrib = new Array ( 256 ) . fill ( 0 ) ,
2016-11-28 10:42:58 +00:00
percentages = new Array ( 256 ) ,
2017-12-28 00:24:16 +00:00
len = data . length ,
2017-04-13 17:31:26 +00:00
showZeroes = args [ 0 ] ,
i ;
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
// Count bytes
for ( i = 0 ; i < len ; i ++ ) {
2017-12-28 00:24:16 +00:00
distrib [ data [ i ] ] ++ ;
2016-11-28 10:42:58 +00:00
}
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
// Calculate percentages
2017-04-13 17:08:50 +00:00
let repr = 0 ;
2016-11-28 10:42:58 +00:00
for ( i = 0 ; i < 256 ; i ++ ) {
if ( distrib [ i ] > 0 ) repr ++ ;
percentages [ i ] = distrib [ i ] / len * 100 ;
}
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
// Print
2017-04-13 17:08:50 +00:00
let output = "<canvas id='chart-area'></canvas><br>" +
2016-11-28 10:42:58 +00:00
"Total data length: " + len +
"\nNumber of bytes represented: " + repr +
"\nNumber of bytes not represented: " + ( 256 - repr ) +
"\n\nByte Percentage\n" +
" < script > \
var canvas = document . getElementById ( 'chart-area' ) , \
2017-01-31 18:24:56 +00:00
parentRect = canvas . parentNode . getBoundingClientRect ( ) , \
2016-11-28 10:42:58 +00:00
scores = " + JSON.stringify(percentages) + " ; \
\
2017-01-31 18:24:56 +00:00
canvas . width = parentRect . width * 0.95 ; \
canvas . height = parentRect . height * 0.9 ; \
2016-11-28 10:42:58 +00:00
\
2017-01-31 18:24:56 +00:00
CanvasComponents . drawBarChart ( canvas , scores , 'Byte' , 'Frequency %' , 16 , 6 ) ; \
2016-11-28 10:42:58 +00:00
< / s c r i p t > " ;
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
for ( i = 0 ; i < 256 ; i ++ ) {
2017-01-31 18:24:56 +00:00
if ( distrib [ i ] || showZeroes ) {
2016-11-28 10:42:58 +00:00
output += " " + Utils . hex ( i , 2 ) + " (" +
2017-12-28 14:38:57 +00:00
( percentages [ i ] . toFixed ( 2 ) . replace ( ".00" , "" ) + "%)" ) . padEnd ( 8 , " " ) +
2016-11-28 10:42:58 +00:00
Array ( Math . ceil ( percentages [ i ] ) + 1 ) . join ( "|" ) + "\n" ;
}
}
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
return output ;
} ,
2017-02-09 15:09:33 +00:00
2017-12-19 15:02:01 +00:00
/ * *
* Chi Square operation .
*
2017-12-28 00:24:16 +00:00
* @ param { ArrayBuffer } data
2017-12-19 15:02:01 +00:00
* @ param { Object [ ] } args
* @ returns { number }
* /
runChiSq : function ( input , args ) {
2017-12-28 00:24:16 +00:00
const data = new Uint8Array ( input ) ;
2017-12-19 15:02:01 +00:00
let distArray = new Array ( 256 ) . fill ( 0 ) ,
total = 0 ;
2017-12-28 00:24:16 +00:00
for ( let i = 0 ; i < data . length ; i ++ ) {
distArray [ data [ i ] ] ++ ;
2017-12-19 15:02:01 +00:00
}
for ( let i = 0 ; i < distArray . length ; i ++ ) {
if ( distArray [ i ] > 0 ) {
2017-12-28 00:24:16 +00:00
total += Math . pow ( distArray [ i ] - data . length / 256 , 2 ) / ( data . length / 256 ) ;
2017-12-19 15:02:01 +00:00
}
}
return total ;
} ,
2016-11-28 10:42:58 +00:00
/ * *
* Calculates the Shannon entropy for a given chunk of data .
*
* @ private
2017-01-31 18:24:56 +00:00
* @ param { byteArray } data
2016-11-28 10:42:58 +00:00
* @ returns { number }
* /
2017-01-31 18:24:56 +00:00
_calcEntropy : function ( data ) {
2017-04-13 17:08:50 +00:00
let prob = [ ] ,
2016-11-28 10:42:58 +00:00
uniques = data . unique ( ) ,
2017-04-13 17:31:26 +00:00
str = Utils . byteArrayToChars ( data ) ,
i ;
2017-02-09 15:09:33 +00:00
2017-04-13 17:31:26 +00:00
for ( i = 0 ; i < uniques . length ; i ++ ) {
2016-11-28 10:42:58 +00:00
prob . push ( str . count ( Utils . chr ( uniques [ i ] ) ) / data . length ) ;
}
2017-02-09 15:09:33 +00:00
2017-04-13 17:08:50 +00:00
let entropy = 0 ,
2016-11-28 10:42:58 +00:00
p ;
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
for ( i = 0 ; i < prob . length ; i ++ ) {
p = prob [ i ] ;
entropy += p * Math . log ( p ) / Math . log ( 2 ) ;
}
2017-02-09 15:09:33 +00:00
2016-11-28 10:42:58 +00:00
return - entropy ;
} ,
} ;
2017-03-23 17:52:20 +00:00
export default Entropy ;