roadie/Roadie.Api.Library/Imaging/ImageHasher.cs

160 lines
6.5 KiB
C#
Raw Normal View History

2018-11-03 21:21:36 +00:00
using SixLabors.ImageSharp;
2018-11-04 20:33:37 +00:00
using SixLabors.ImageSharp.Advanced;
2018-11-04 15:16:52 +00:00
using SixLabors.ImageSharp.Processing;
2018-11-03 21:21:36 +00:00
using System.IO;
namespace Roadie.Library.Imaging
{
/// <summary>
2019-07-03 16:21:29 +00:00
/// Contains a variety of methods useful in generating image hashes for image comparison
/// and recognition.
/// Credit for the AverageHash implementation to David Oftedal of the University of Oslo.
2018-11-03 21:21:36 +00:00
/// </summary>
2018-11-04 15:16:52 +00:00
public static class ImageHasher
2018-11-03 21:21:36 +00:00
{
#region Private constants and utility methods
/// <summary>
2019-07-03 16:21:29 +00:00
/// Bitcounts array used for BitCount method (used in Similarity comparisons).
/// Don't try to read this or understand it, I certainly don't. Credit goes to
/// David Oftedal of the University of Oslo, Norway for this.
/// http://folk.uio.no/davidjo/computing.php
2018-11-03 21:21:36 +00:00
/// </summary>
2019-07-03 16:21:29 +00:00
private static readonly byte[] bitCounts =
{
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3,
2, 3, 3, 4,
2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4,
3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5,
4, 5, 5, 6,
4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5,
4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4,
3, 4, 4, 5,
3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6,
5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
2018-11-03 21:21:36 +00:00
};
/// <summary>
2019-07-03 16:21:29 +00:00
/// Counts bits (duh). Utility function for similarity.
/// I wouldn't try to understand this. I just copy-pasta'd it
/// from Oftedal's implementation. It works.
2018-11-03 21:21:36 +00:00
/// </summary>
/// <param name="num">The hash we are counting.</param>
/// <returns>The total bit count.</returns>
private static uint BitCount(ulong num)
{
uint count = 0;
2019-07-03 16:21:29 +00:00
for (; num > 0; num >>= 8) count += bitCounts[num & 0xff];
2018-11-04 20:33:37 +00:00
2018-11-03 21:21:36 +00:00
return count;
}
#endregion Private constants and utility methods
#region Public interface methods
/// <summary>
2019-07-03 16:21:29 +00:00
/// Generate a hash for the image to be able to find like/matching images.
2018-11-03 21:21:36 +00:00
/// </summary>
2018-11-04 15:16:52 +00:00
/// <param name="bytes">Image bytes</param>
/// <returns>Hash of Image</returns>
public static ulong AverageHash(byte[] bytes)
2018-11-03 21:21:36 +00:00
{
2019-07-03 16:21:29 +00:00
using (var image = Image.Load(bytes))
2018-11-04 15:16:52 +00:00
{
image.Mutate(ctx => ctx.Resize(8, 8).Grayscale());
using (var ms = new MemoryStream())
2018-11-03 21:21:36 +00:00
{
2019-07-03 16:21:29 +00:00
var grayscale = new byte[64];
2018-11-04 15:16:52 +00:00
uint averageValue = 0;
2019-07-03 16:21:29 +00:00
for (var y = 0; y < 8; y++)
2018-11-04 15:16:52 +00:00
{
2019-07-03 16:21:29 +00:00
var pixelRowSpan = image.GetPixelRowSpan(y);
for (var x = 0; x < 8; x++)
2018-11-04 15:16:52 +00:00
{
2019-07-03 16:21:29 +00:00
var pixel = pixelRowSpan[x].PackedValue;
var gray = (pixel & 0x00ff0000) >> 16;
2018-11-04 15:16:52 +00:00
gray += (pixel & 0x0000ff00) >> 8;
2019-07-03 16:21:29 +00:00
gray += pixel & 0x000000ff;
2018-11-04 15:16:52 +00:00
gray /= 12;
2019-07-03 16:21:29 +00:00
grayscale[x + y * 8] = (byte)gray;
2018-11-04 15:16:52 +00:00
averageValue += gray;
}
}
2019-07-03 16:21:29 +00:00
2018-11-04 15:16:52 +00:00
averageValue /= 64;
ulong hash = 0;
2019-07-03 16:21:29 +00:00
for (var i = 0; i < 64; i++)
2018-11-04 15:16:52 +00:00
if (grayscale[i] >= averageValue)
2019-07-03 16:21:29 +00:00
hash |= 1UL << (63 - i);
2018-11-04 15:16:52 +00:00
return hash;
2018-11-03 21:21:36 +00:00
}
2018-11-04 15:16:52 +00:00
}
2018-11-04 20:33:37 +00:00
}
2018-11-03 21:21:36 +00:00
2018-11-04 20:33:37 +00:00
/// <summary>
2019-07-03 16:21:29 +00:00
/// Computes the average hash of the image content in the given file.
2018-11-04 20:33:37 +00:00
/// </summary>
/// <param name="path">Path to the input file.</param>
/// <returns>The hash of the input file's image content.</returns>
2019-07-03 16:21:29 +00:00
public static ulong AverageHash(string path)
2018-11-04 20:33:37 +00:00
{
return AverageHash(File.ReadAllBytes(path));
2018-11-03 21:21:36 +00:00
}
2018-11-04 20:33:37 +00:00
public static bool ImagesAreSame(string path1, string path2)
{
return Similarity(path1, path2) == 100;
}
2018-11-03 21:21:36 +00:00
2018-11-04 20:33:37 +00:00
public static bool ImagesAreSame(byte[] image1, byte[] image2)
{
return Similarity(image1, image2) == 100;
}
2018-11-03 21:21:36 +00:00
/// <summary>
2019-07-03 16:21:29 +00:00
/// Returns a percentage-based similarity value between the two given hashes. The higher
/// the percentage, the closer the hashes are to being identical.
2018-11-03 21:21:36 +00:00
/// </summary>
/// <param name="hash1">The first hash.</param>
/// <param name="hash2">The second hash.</param>
/// <returns>The similarity percentage.</returns>
public static double Similarity(ulong hash1, ulong hash2)
{
2019-07-03 16:21:29 +00:00
return (64 - BitCount(hash1 ^ hash2)) * 100 / 64.0;
2018-11-03 21:21:36 +00:00
}
2018-11-04 20:33:37 +00:00
/// <summary>
2019-07-03 16:21:29 +00:00
/// Returns a percentage-based similarity value between the image content of the two given
/// files. The higher the percentage, the closer the image contents are to being identical.
2018-11-04 20:33:37 +00:00
/// </summary>
/// <param name="image1">The first image file.</param>
/// <param name="image2">The second image file.</param>
/// <returns>The similarity percentage.</returns>
2019-07-03 16:21:29 +00:00
public static double Similarity(string path1, string path2)
2018-11-04 20:33:37 +00:00
{
2019-07-03 16:21:29 +00:00
var hash1 = AverageHash(path1);
var hash2 = AverageHash(path2);
2018-11-04 20:33:37 +00:00
return Similarity(hash1, hash2);
}
2018-11-04 15:16:52 +00:00
2018-11-04 20:33:37 +00:00
/// <summary>
2019-07-03 16:21:29 +00:00
/// Returns a percentage-based similarity value between the image content of the two given
/// files. The higher the percentage, the closer the image contents are to being identical.
2018-11-04 20:33:37 +00:00
/// </summary>
/// <param name="image1">The first image bytes.</param>
/// <param name="image2">The second image bytes.</param>
/// <returns>The similarity percentage.</returns>
public static double Similarity(byte[] image1, byte[] image2)
{
2019-07-03 16:21:29 +00:00
var hash1 = AverageHash(image1);
var hash2 = AverageHash(image2);
2018-11-04 20:33:37 +00:00
return Similarity(hash1, hash2);
}
2018-11-03 21:21:36 +00:00
#endregion Public interface methods
}
2018-11-04 20:33:37 +00:00
}