TheAlgorithms/C++ 1.0.0
All the algorithms implemented in C++
Loading...
Searching...
No Matches
md5.cpp
Go to the documentation of this file.
1
41#include <algorithm>
42#include <array>
43#include <cassert>
44#include <cstdint>
45#include <cstring>
46#include <iostream>
47#include <string>
48#include <vector>
49
54namespace hashing {
60namespace md5 {
67uint32_t leftRotate32bits(uint32_t n, std::size_t rotate) {
68 return (n << rotate) | (n >> (32 - rotate));
69}
78 union {
79 uint32_t i;
80 std::array<char, 4> c;
81 } bint = {0x01020304};
82
83 return bint.c[0] == 1;
84}
90uint32_t toLittleEndian32(uint32_t n) {
91 if (!isBigEndian()) {
92 return ((n << 24) & 0xFF000000) | ((n << 8) & 0x00FF0000) |
93 ((n >> 8) & 0x0000FF00) | ((n >> 24) & 0x000000FF);
94 }
95 // Machine works on little endian, no need to change anything
96 return n;
97}
103uint64_t toLittleEndian64(uint64_t n) {
104 if (!isBigEndian()) {
105 return ((n << 56) & 0xFF00000000000000) |
106 ((n << 40) & 0x00FF000000000000) |
107 ((n << 24) & 0x0000FF0000000000) |
108 ((n << 8) & 0x000000FF00000000) |
109 ((n >> 8) & 0x00000000FF000000) |
110 ((n >> 24) & 0x0000000000FF0000) |
111 ((n >> 40) & 0x000000000000FF00) |
112 ((n >> 56) & 0x00000000000000FF);
113 ;
114 }
115 // Machine works on little endian, no need to change anything
116 return n;
117}
123std::string sig2hex(void* sig) {
124 const char* hexChars = "0123456789abcdef";
125 auto* intsig = static_cast<uint8_t*>(sig);
126 std::string hex = "";
127 for (uint8_t i = 0; i < 16; i++) {
128 hex.push_back(hexChars[(intsig[i] >> 4) & 0xF]);
129 hex.push_back(hexChars[(intsig[i]) & 0xF]);
130 }
131 return hex;
132}
139void* hash_bs(const void* input_bs, uint64_t input_size) {
140 auto* input = static_cast<const uint8_t*>(input_bs);
141
142 // Step 0: Initial Data (Those are decided in the MD5 protocol)
143 // s is the shift used in the leftrotate each round
144 std::array<uint32_t, 64> s = {
145 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
146 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20,
147 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
148 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
149 // K is pseudo-random values used each round
150 // The values can be obtained by the following python code:
151
162 std::array<uint32_t, 64> K = {
163 3614090360, 3905402710, 606105819, 3250441966, 4118548399, 1200080426,
164 2821735955, 4249261313, 1770035416, 2336552879, 4294925233, 2304563134,
165 1804603682, 4254626195, 2792965006, 1236535329, 4129170786, 3225465664,
166 643717713, 3921069994, 3593408605, 38016083, 3634488961, 3889429448,
167 568446438, 3275163606, 4107603335, 1163531501, 2850285829, 4243563512,
168 1735328473, 2368359562, 4294588738, 2272392833, 1839030562, 4259657740,
169 2763975236, 1272893353, 4139469664, 3200236656, 681279174, 3936430074,
170 3572445317, 76029189, 3654602809, 3873151461, 530742520, 3299628645,
171 4096336452, 1126891415, 2878612391, 4237533241, 1700485571, 2399980690,
172 4293915773, 2240044497, 1873313359, 4264355552, 2734768916, 1309151649,
173 4149444226, 3174756917, 718787259, 3951481745};
174
175 // The initial 128-bit state
176 uint32_t a0 = 0x67452301, A = 0;
177 uint32_t b0 = 0xefcdab89, B = 0;
178 uint32_t c0 = 0x98badcfe, C = 0;
179 uint32_t d0 = 0x10325476, D = 0;
180
181 // Step 1: Processing the bytestring
182
183 // First compute the size the padded message will have
184 // so it is possible to allocate the right amount of memory
185 uint64_t padded_message_size = 0;
186 if (input_size % 64 < 56) {
187 padded_message_size = input_size + 64 - (input_size % 64);
188 } else {
189 padded_message_size = input_size + 128 - (input_size % 64);
190 }
191
192 std::vector<uint8_t> padded_message(padded_message_size);
193
194 // Beginning of the padded message is the original message
195 std::copy(input, input + input_size, padded_message.begin());
196
197 // Afterwards comes a single 1 bit and then only zeroes
198 padded_message[input_size] = 1 << 7; // 10000000
199 for (uint64_t i = input_size; i % 64 != 56; i++) {
200 if (i == input_size) {
201 continue; // pass first iteration
202 }
203 padded_message[i] = 0;
204 }
205
206 // We then have to add the 64-bit size of the message at the end
207 // When there is a conversion from int to bytestring or vice-versa
208 // We always need to make sure it is little endian
209 uint64_t input_bitsize_le = toLittleEndian64(input_size * 8);
210 for (uint8_t i = 0; i < 8; i++) {
211 padded_message[padded_message_size - 8 + i] =
212 (input_bitsize_le >> (56 - 8 * i)) & 0xFF;
213 }
214
215 // Already allocate memory for blocks
216 std::array<uint32_t, 16> blocks{};
217
218 // Rounds
219 for (uint64_t chunk = 0; chunk * 64 < padded_message_size; chunk++) {
220 // First, build the 16 32-bits blocks from the chunk
221 for (uint8_t bid = 0; bid < 16; bid++) {
222 blocks[bid] = 0;
223
224 // Having to build a 32-bit word from 4-bit words
225 // Add each and shift them to the left
226 for (uint8_t cid = 0; cid < 4; cid++) {
227 blocks[bid] = (blocks[bid] << 8) +
228 padded_message[chunk * 64 + bid * 4 + cid];
229 }
230 }
231
232 A = a0;
233 B = b0;
234 C = c0;
235 D = d0;
236
237 // Main "hashing" loop
238 for (uint8_t i = 0; i < 64; i++) {
239 uint32_t F = 0, g = 0;
240 if (i < 16) {
241 F = (B & C) | ((~B) & D);
242 g = i;
243 } else if (i < 32) {
244 F = (D & B) | ((~D) & C);
245 g = (5 * i + 1) % 16;
246 } else if (i < 48) {
247 F = B ^ C ^ D;
248 g = (3 * i + 5) % 16;
249 } else {
250 F = C ^ (B | (~D));
251 g = (7 * i) % 16;
252 }
253
254 // Update the accumulators
255 F += A + K[i] + toLittleEndian32(blocks[g]);
256
257 A = D;
258 D = C;
259 C = B;
260 B += leftRotate32bits(F, s[i]);
261 }
262 // Update the state with this chunk's hash
263 a0 += A;
264 b0 += B;
265 c0 += C;
266 d0 += D;
267 }
268
269 // Build signature from state
270 // Note, any type could be used for the signature
271 // uint8_t was used to make the 16 bytes obvious
272 // The sig needs to be little endian
273 auto* sig = new uint8_t[16];
274 for (uint8_t i = 0; i < 4; i++) {
275 sig[i] = (a0 >> (8 * i)) & 0xFF;
276 sig[i + 4] = (b0 >> (8 * i)) & 0xFF;
277 sig[i + 8] = (c0 >> (8 * i)) & 0xFF;
278 sig[i + 12] = (d0 >> (8 * i)) & 0xFF;
279 }
280
281 return sig;
282}
288void* hash(const std::string& message) {
289 return hash_bs(&message[0], message.size());
290}
291} // namespace md5
292} // namespace hashing
293
298static void test() {
299 // Hashes empty string and stores signature
300 void* sig = hashing::md5::hash("");
301 std::cout << "Hashing empty string" << std::endl;
302 // Prints signature hex representation
303 std::cout << hashing::md5::sig2hex(sig) << std::endl << std::endl;
304 // Test with cassert whether sig is correct from the expected value
305 assert(hashing::md5::sig2hex(sig).compare(
306 "d41d8cd98f00b204e9800998ecf8427e") == 0);
307
308 // Hashes "The quick brown fox jumps over the lazy dog" and stores signature
309 void* sig2 =
310 hashing::md5::hash("The quick brown fox jumps over the lazy dog");
311 std::cout << "Hashing The quick brown fox jumps over the lazy dog"
312 << std::endl;
313 // Prints signature hex representation
314 std::cout << hashing::md5::sig2hex(sig2) << std::endl << std::endl;
315 // Test with cassert whether sig is correct from the expected value
316 assert(hashing::md5::sig2hex(sig2).compare(
317 "9e107d9d372bb6826bd81d3542a419d6") == 0);
318
319 // Hashes "The quick brown fox jumps over the lazy dog." (notice the
320 // additional period) and stores signature
321 void* sig3 =
322 hashing::md5::hash("The quick brown fox jumps over the lazy dog.");
323 std::cout << "Hashing "
324 "The quick brown fox jumps over the lazy dog."
325 << std::endl;
326 // Prints signature hex representation
327 std::cout << hashing::md5::sig2hex(sig3) << std::endl << std::endl;
328 // Test with cassert whether sig is correct from the expected value
329 assert(hashing::md5::sig2hex(sig3).compare(
330 "e4d909c290d0fb1ca068ffaddf22cbd0") == 0);
331
332 // Hashes "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
333 // and stores signature
334 void* sig4 = hashing::md5::hash(
335 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789");
336 std::cout
337 << "Hashing "
338 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"
339 << std::endl;
340 // Prints signature hex representation
341 std::cout << hashing::md5::sig2hex(sig4) << std::endl << std::endl;
342 // Test with cassert whether sig is correct from the expected value
343 assert(hashing::md5::sig2hex(sig4).compare(
344 "d174ab98d277d9f5a5611c2c9f419d9f") == 0);
345}
346
352static void interactive() {
353 while (true) {
354 std::string input;
355 std::cout << "Enter a message to be hashed (Ctrl-C to exit): "
356 << std::endl;
357 std::getline(std::cin, input);
358 void* sig = hashing::md5::hash(input);
359 std::cout << "Hash is: " << hashing::md5::sig2hex(sig) << std::endl;
360
361 while (true) {
362 std::cout << "Want to enter another message? (y/n) ";
363 std::getline(std::cin, input);
364 if (input.compare("y") == 0) {
365 break;
366 } else if (input.compare("n") == 0) {
367 return;
368 }
369 }
370 }
371}
372
377int main() {
378 test(); // run self-test implementations
379
380 // Launch interactive mode where user can input messages and see
381 // their hash
382 interactive();
383 return 0;
384}
void * hash_bs(const void *input_bs, uint64_t input_size)
The MD5 algorithm itself, taking in a bytestring.
Definition md5.cpp:139
uint32_t toLittleEndian32(uint32_t n)
Sets 32-bit integer to little-endian if needed.
Definition md5.cpp:90
uint64_t toLittleEndian64(uint64_t n)
Sets 64-bit integer to little-endian if needed.
Definition md5.cpp:103
void * hash(const std::string &message)
Converts the string to bytestring and calls the main algorithm.
Definition md5.cpp:288
static void test()
Self-test implementations of well-known MD5 hashes.
Definition md5.cpp:298
std::string sig2hex(void *sig)
Transforms the 128-bit MD5 signature into a 32 char hex string.
Definition md5.cpp:123
static void interactive()
Puts user in a loop where inputs can be given and MD5 hash will be computed and printed.
Definition md5.cpp:352
uint32_t leftRotate32bits(uint32_t n, std::size_t rotate)
Rotates the bits of a 32-bit unsigned integer.
Definition md5.cpp:67
int main()
Main function.
Definition md5.cpp:377
bool isBigEndian()
Checks whether integers are stored as big endian or not.
Definition md5.cpp:77
Used for assert.
int compare(const void *a, const void *b)