Skip to content

Commit e610b72

Browse files
authored
Merge pull request #17 from dferreyra/ImproveHashPerformance
Improve performance by improving the handling of hashes
2 parents 630fced + e600eae commit e610b72

File tree

10 files changed

+188
-414
lines changed

10 files changed

+188
-414
lines changed

ProbabilisticDataStructures/CuckooBloomFilter.cs

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -384,38 +384,23 @@ private bool Insert(uint i1, uint i2, byte[] f)
384384
/// fingerprint for the given data</returns>
385385
private Components GetComponents(byte[] data)
386386
{
387-
var hash = this.ComputeHash(data);
387+
var hash = Hash.ComputeHash(data);
388388
var f = hash.Take((int)this.F).ToArray();
389389
var i1 = this.ComputeHashSum32(hash);
390390
var i2 = this.ComputeHashSum32(f);
391391

392392
return Components.Create(f, i1, i2);
393393
}
394394

395-
/// <summary>
396-
/// Returns a 32-bit hash value for the given data.
397-
/// </summary>
398-
/// <param name="data">Data</param>
399-
/// <returns>32-bit hash value</returns>
400-
private byte[] ComputeHash(byte[] data)
401-
{
402-
var hash = new Hash(this.Hash);
403-
hash.ComputeHash(data);
404-
var sum = hash.Sum();
405-
return sum;
406-
}
407-
408395
/// <summary>
409396
/// Returns the sum of the hash.
410397
/// </summary>
411398
/// <param name="data">Data</param>
412399
/// <returns>32-bit hash value</returns>
413400
private uint ComputeHashSum32(byte[] data)
414401
{
415-
var hash = new Hash(this.Hash);
416-
hash.ComputeHash(data);
417-
var sum = hash.Sum();
418-
return Utils.ToBigEndianUInt32(sum);
402+
var sum = Hash.ComputeHash(data);
403+
return Utils.HashBytesToUInt32(sum);
419404
}
420405

421406
/// <summary>

ProbabilisticDataStructures/Hash.cs

Lines changed: 0 additions & 78 deletions
This file was deleted.

ProbabilisticDataStructures/Hash128.cs

Lines changed: 0 additions & 100 deletions
This file was deleted.

ProbabilisticDataStructures/HyperLogLog.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,10 +208,8 @@ public void SetHash(HashAlgorithm h)
208208
/// <returns>32-bit hash value</returns>
209209
private uint CalculateHash(byte[] data)
210210
{
211-
var hash = new Hash(this.Hash);
212-
hash.ComputeHash(data);
213-
var sum = hash.Sum();
214-
return Utils.ToBigEndianUInt32(sum);
211+
var sum = Hash.ComputeHash(data);
212+
return Utils.HashBytesToUInt32(sum);
215213
}
216214

217215
/// <summary>

ProbabilisticDataStructures/InverseBloomFilter.cs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,10 +161,8 @@ private uint Index(byte[] data)
161161
/// <returns>32-bit hash value</returns>
162162
private uint ComputeHashSum32(byte[] data)
163163
{
164-
var hash = new Hash(this.Hash);
165-
hash.ComputeHash(data);
166-
var sum = hash.Sum();
167-
return Utils.ToBigEndianUInt32(sum);
164+
var sum = Hash.ComputeHash(data);
165+
return Utils.HashBytesToUInt32(sum);
168166
}
169167

170168
/// <summary>

ProbabilisticDataStructures/Utils.cs

Lines changed: 78 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
using System;
1+
using System;
22
using System.Linq;
33
using System.Security.Cryptography;
4+
using System.Text;
45

56
namespace ProbabilisticDataStructures
67
{
@@ -48,19 +49,32 @@ public static uint OptimalK(double fpRate)
4849

4950
/// <summary>
5051
/// Returns the upper and lower base hash values from which the k hashes are
51-
/// derived.
52+
/// derived. The result will be the same regardless of the endianness of the
53+
/// architecture.
5254
/// </summary>
5355
/// <param name="data">The data bytes to hash.</param>
5456
/// <param name="algorithm">The hashing algorithm to use.</param>
5557
/// <returns>A HashKernel</returns>
5658
public static HashKernelReturnValue HashKernel(byte[] data, HashAlgorithm algorithm)
5759
{
58-
var hash = new Hash(algorithm);
59-
hash.ComputeHash(data);
60-
var sum = hash.Sum();
60+
var sum = algorithm.ComputeHash(data);
61+
return HashKernelFromHashBytes(sum);
62+
}
63+
64+
/// <summary>
65+
/// Returns the upper and lower base hash values from which the k hashes are
66+
/// derived using the given hash bytes directly. The result will be the
67+
/// same regardless of the endianness of the architecture. Used by a unit
68+
/// test to confirm the calculation is compatible with the HashKernel from
69+
/// https://github.com/tylertreat/BoomFilters running in Go.
70+
/// </summary>
71+
/// <param name="hashBytes">The hash bytes.</param>
72+
/// <returns>A HashKernel</returns>
73+
public static HashKernelReturnValue HashKernelFromHashBytes(byte[] hashBytes)
74+
{
6175
return HashKernelReturnValue.Create(
62-
ToBigEndianUInt32(sum.Skip(4).Take(4).ToArray()),
63-
ToBigEndianUInt32(sum.Take(4).ToArray())
76+
HashBytesToUInt32(hashBytes, 0),
77+
HashBytesToUInt32(hashBytes, 4)
6478
);
6579
}
6680

@@ -73,30 +87,73 @@ public static HashKernelReturnValue HashKernel(byte[] data, HashAlgorithm algori
7387
/// <returns>A HashKernel</returns>
7488
public static HashKernel128ReturnValue HashKernel128(byte[] data, HashAlgorithm algorithm)
7589
{
76-
var hash = new Hash128(algorithm);
77-
var sum = hash.ComputeHashAndSum(data);
90+
var sum = algorithm.ComputeHash(data);
7891
return HashKernel128ReturnValue.Create(
79-
ToBigEndianUInt64(sum, 8),
80-
ToBigEndianUInt64(sum, 0)
92+
HashBytesToUInt64(sum, 0),
93+
HashBytesToUInt64(sum, 8)
8194
);
8295
}
8396

84-
public static uint ToBigEndianUInt32(byte[] bytes)
97+
/// <summary>
98+
/// Returns the uint represented by the given hash bytes, starting at
99+
/// byte <paramref name="offset"/>. The result will be the same
100+
/// regardless of the endianness of the architecture.
101+
/// </summary>
102+
/// <param name="hashBytes"></param>
103+
/// <param name="offset"></param>
104+
/// <returns></returns>
105+
public static uint HashBytesToUInt32(byte[] hashBytes, int offset = 0)
85106
{
86-
if (BitConverter.IsLittleEndian)
87-
Array.Reverse(bytes);
107+
return
108+
((uint)hashBytes[offset]) |
109+
((uint)hashBytes[offset + 1]) << 8 |
110+
((uint)hashBytes[offset + 2]) << 16 |
111+
((uint)hashBytes[offset + 3]) << 24;
112+
}
88113

89-
uint i = BitConverter.ToUInt32(bytes, 0);
90-
return i;
114+
/// <summary>
115+
/// Returns the ulong represented by the given hash bytes, starting at
116+
/// byte <paramref name="offset"/>. The result will be the same
117+
/// regardless of the endianness of the architecture.
118+
/// </summary>
119+
/// <param name="hashBytes"></param>
120+
/// <param name="offset"></param>
121+
/// <returns></returns>
122+
public static ulong HashBytesToUInt64(byte[] hashBytes, int offset = 0)
123+
{
124+
return
125+
((ulong)hashBytes[offset]) |
126+
((ulong)hashBytes[offset + 1]) << 8 |
127+
((ulong)hashBytes[offset + 2]) << 16 |
128+
((ulong)hashBytes[offset + 3]) << 24 |
129+
((ulong)hashBytes[offset + 4]) << 32 |
130+
((ulong)hashBytes[offset + 5]) << 40 |
131+
((ulong)hashBytes[offset + 6]) << 48 |
132+
((ulong)hashBytes[offset + 7]) << 56;
91133
}
92134

93-
public static ulong ToBigEndianUInt64(byte[] bytes, int offset)
135+
/// <summary>
136+
/// Compute the hash for the provided bytes.
137+
/// </summary>
138+
/// <param name="inputBytes">The bytes to hash.</param>
139+
/// <returns>The hash string of the bytes.</returns>
140+
public static string ComputeHashAsString(byte[] inputBytes, HashAlgorithm hashAlgorithm)
94141
{
95-
if (BitConverter.IsLittleEndian)
96-
Array.Reverse(bytes, offset, 8);
142+
// Compute the hash of the input byte array.
143+
byte[] data = hashAlgorithm.ComputeHash(inputBytes);
144+
145+
// Create a new StringBuilder to collect the bytes and create a string.
146+
StringBuilder sb = new StringBuilder();
147+
148+
// Loop through each byte of the hashed data and format each one as a
149+
// hexadecimal string.
150+
for (int i = 0; i < data.Length; i++)
151+
{
152+
sb.Append(data[i].ToString("X2"));
153+
}
97154

98-
ulong i = BitConverter.ToUInt64(bytes, offset);
99-
return i;
155+
// Return the hexadecimal string.
156+
return sb.ToString();
100157
}
101158
}
102159

0 commit comments

Comments
 (0)