siphasher/
sip.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash.
12
13use core::cmp;
14use core::hash;
15use core::hash::Hasher as _;
16use core::marker::PhantomData;
17use core::mem;
18use core::ptr;
19use core::u64;
20
21/// An implementation of SipHash 1-3.
22///
23/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
24#[derive(Debug, Clone, Copy, Default)]
25#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
26pub struct SipHasher13 {
27    hasher: Hasher<Sip13Rounds>,
28}
29
30/// An implementation of SipHash 2-4.
31///
32/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
33#[derive(Debug, Clone, Copy, Default)]
34#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
35pub struct SipHasher24 {
36    hasher: Hasher<Sip24Rounds>,
37}
38
39/// An implementation of SipHash 2-4.
40///
41/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
42///
43/// SipHash is a general-purpose hashing function: it runs at a good
44/// speed (competitive with Spooky and City) and permits strong _keyed_
45/// hashing. This lets you key your hashtables from a strong RNG, such as
46/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
47///
48/// Although the SipHash algorithm is considered to be generally strong,
49/// it is not intended for cryptographic purposes. As such, all
50/// cryptographic uses of this implementation are _strongly discouraged_.
51#[derive(Debug, Clone, Copy, Default)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub struct SipHasher(SipHasher24);
54
55#[derive(Debug, Clone, Copy)]
56#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
57struct Hasher<S: Sip> {
58    k0: u64,
59    k1: u64,
60    length: usize, // how many bytes we've processed
61    state: State,  // hash State
62    tail: u64,     // unprocessed bytes le
63    ntail: usize,  // how many bytes in tail are valid
64    _marker: PhantomData<S>,
65}
66
67#[derive(Debug, Clone, Copy)]
68#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
69struct State {
70    // v0, v2 and v1, v3 show up in pairs in the algorithm,
71    // and simd implementations of SipHash will use vectors
72    // of v02 and v13. By placing them in this order in the struct,
73    // the compiler can pick up on just a few simd optimizations by itself.
74    v0: u64,
75    v2: u64,
76    v1: u64,
77    v3: u64,
78}
79
80macro_rules! compress {
81    ($state:expr) => {{
82        compress!($state.v0, $state.v1, $state.v2, $state.v3)
83    }};
84    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
85        $v0 = $v0.wrapping_add($v1);
86        $v1 = $v1.rotate_left(13);
87        $v1 ^= $v0;
88        $v0 = $v0.rotate_left(32);
89        $v2 = $v2.wrapping_add($v3);
90        $v3 = $v3.rotate_left(16);
91        $v3 ^= $v2;
92        $v0 = $v0.wrapping_add($v3);
93        $v3 = $v3.rotate_left(21);
94        $v3 ^= $v0;
95        $v2 = $v2.wrapping_add($v1);
96        $v1 = $v1.rotate_left(17);
97        $v1 ^= $v2;
98        $v2 = $v2.rotate_left(32);
99    }};
100}
101
102/// Loads an integer of the desired type from a byte stream, in LE order. Uses
103/// `copy_nonoverlapping` to let the compiler generate the most efficient way
104/// to load it from a possibly unaligned address.
105///
106/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
107macro_rules! load_int_le {
108    ($buf:expr, $i:expr, $int_ty:ident) => {{
109        debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
110        let mut data = 0 as $int_ty;
111        ptr::copy_nonoverlapping(
112            $buf.as_ptr().add($i),
113            &mut data as *mut _ as *mut u8,
114            mem::size_of::<$int_ty>(),
115        );
116        data.to_le()
117    }};
118}
119
120/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
121/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
122/// sizes and avoid calling `memcpy`, which is good for speed.
123///
124/// Unsafe because: unchecked indexing at start..start+len
125#[inline]
126unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
127    debug_assert!(len < 8);
128    let mut i = 0; // current byte index (from LSB) in the output u64
129    let mut out = 0;
130    if i + 3 < len {
131        out = load_int_le!(buf, start + i, u32) as u64;
132        i += 4;
133    }
134    if i + 1 < len {
135        out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
136        i += 2
137    }
138    if i < len {
139        out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
140        i += 1;
141    }
142    debug_assert_eq!(i, len);
143    out
144}
145
146impl SipHasher {
147    /// Creates a new `SipHasher` with the two initial keys set to 0.
148    #[inline]
149    pub fn new() -> SipHasher {
150        SipHasher::new_with_keys(0, 0)
151    }
152
153    /// Creates a `SipHasher` that is keyed off the provided keys.
154    #[inline]
155    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
156        SipHasher(SipHasher24::new_with_keys(key0, key1))
157    }
158
159    /// Creates a `SipHasher` from a 16 byte key.
160    pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
161        let mut b0 = [0u8; 8];
162        let mut b1 = [0u8; 8];
163        b0.copy_from_slice(&key[0..8]);
164        b1.copy_from_slice(&key[8..16]);
165        let key0 = u64::from_le_bytes(b0);
166        let key1 = u64::from_le_bytes(b1);
167        Self::new_with_keys(key0, key1)
168    }
169
170    /// Get the keys used by this hasher
171    pub fn keys(&self) -> (u64, u64) {
172        (self.0.hasher.k0, self.0.hasher.k1)
173    }
174
175    /// Get the key used by this hasher as a 16 byte vector
176    pub fn key(&self) -> [u8; 16] {
177        let mut bytes = [0u8; 16];
178        bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
179        bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
180        bytes
181    }
182
183    /// Hash a byte array - This is the easiest and safest way to use SipHash.
184    #[inline]
185    pub fn hash(&self, bytes: &[u8]) -> u64 {
186        let mut hasher = self.0.hasher;
187        hasher.write(bytes);
188        hasher.finish()
189    }
190}
191
192impl SipHasher13 {
193    /// Creates a new `SipHasher13` with the two initial keys set to 0.
194    #[inline]
195    pub fn new() -> SipHasher13 {
196        SipHasher13::new_with_keys(0, 0)
197    }
198
199    /// Creates a `SipHasher13` that is keyed off the provided keys.
200    #[inline]
201    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
202        SipHasher13 {
203            hasher: Hasher::new_with_keys(key0, key1),
204        }
205    }
206
207    /// Creates a `SipHasher13` from a 16 byte key.
208    pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
209        let mut b0 = [0u8; 8];
210        let mut b1 = [0u8; 8];
211        b0.copy_from_slice(&key[0..8]);
212        b1.copy_from_slice(&key[8..16]);
213        let key0 = u64::from_le_bytes(b0);
214        let key1 = u64::from_le_bytes(b1);
215        Self::new_with_keys(key0, key1)
216    }
217
218    /// Get the keys used by this hasher
219    pub fn keys(&self) -> (u64, u64) {
220        (self.hasher.k0, self.hasher.k1)
221    }
222
223    /// Get the key used by this hasher as a 16 byte vector
224    pub fn key(&self) -> [u8; 16] {
225        let mut bytes = [0u8; 16];
226        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
227        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
228        bytes
229    }
230
231    /// Hash a byte array - This is the easiest and safest way to use SipHash.
232    #[inline]
233    pub fn hash(&self, bytes: &[u8]) -> u64 {
234        let mut hasher = self.hasher;
235        hasher.write(bytes);
236        hasher.finish()
237    }
238}
239
240impl SipHasher24 {
241    /// Creates a new `SipHasher24` with the two initial keys set to 0.
242    #[inline]
243    pub fn new() -> SipHasher24 {
244        SipHasher24::new_with_keys(0, 0)
245    }
246
247    /// Creates a `SipHasher24` that is keyed off the provided keys.
248    #[inline]
249    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
250        SipHasher24 {
251            hasher: Hasher::new_with_keys(key0, key1),
252        }
253    }
254
255    /// Creates a `SipHasher24` from a 16 byte key.
256    pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
257        let mut b0 = [0u8; 8];
258        let mut b1 = [0u8; 8];
259        b0.copy_from_slice(&key[0..8]);
260        b1.copy_from_slice(&key[8..16]);
261        let key0 = u64::from_le_bytes(b0);
262        let key1 = u64::from_le_bytes(b1);
263        Self::new_with_keys(key0, key1)
264    }
265
266    /// Get the keys used by this hasher
267    pub fn keys(&self) -> (u64, u64) {
268        (self.hasher.k0, self.hasher.k1)
269    }
270
271    /// Get the key used by this hasher as a 16 byte vector
272    pub fn key(&self) -> [u8; 16] {
273        let mut bytes = [0u8; 16];
274        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
275        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
276        bytes
277    }
278
279    /// Hash a byte array - This is the easiest and safest way to use SipHash.
280    #[inline]
281    pub fn hash(&self, bytes: &[u8]) -> u64 {
282        let mut hasher = self.hasher;
283        hasher.write(bytes);
284        hasher.finish()
285    }
286}
287
288impl<S: Sip> Hasher<S> {
289    #[inline]
290    fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
291        let mut state = Hasher {
292            k0: key0,
293            k1: key1,
294            length: 0,
295            state: State {
296                v0: 0,
297                v1: 0,
298                v2: 0,
299                v3: 0,
300            },
301            tail: 0,
302            ntail: 0,
303            _marker: PhantomData,
304        };
305        state.reset();
306        state
307    }
308
309    #[inline]
310    fn reset(&mut self) {
311        self.length = 0;
312        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
313        self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
314        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
315        self.state.v3 = self.k1 ^ 0x7465646279746573;
316        self.ntail = 0;
317    }
318
319    // A specialized write function for values with size <= 8.
320    //
321    // The hashing of multi-byte integers depends on endianness. E.g.:
322    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
323    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
324    //
325    // This function does the right thing for little-endian hardware. On
326    // big-endian hardware `x` must be byte-swapped first to give the right
327    // behaviour. After any byte-swapping, the input must be zero-extended to
328    // 64-bits. The caller is responsible for the byte-swapping and
329    // zero-extension.
330    #[inline]
331    fn short_write<T>(&mut self, _x: T, x: u64) {
332        let size = mem::size_of::<T>();
333        self.length += size;
334
335        // The original number must be zero-extended, not sign-extended.
336        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
337
338        // The number of bytes needed to fill `self.tail`.
339        let needed = 8 - self.ntail;
340
341        self.tail |= x << (8 * self.ntail);
342        if size < needed {
343            self.ntail += size;
344            return;
345        }
346
347        // `self.tail` is full, process it.
348        self.state.v3 ^= self.tail;
349        S::c_rounds(&mut self.state);
350        self.state.v0 ^= self.tail;
351
352        self.ntail = size - needed;
353        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
354    }
355}
356
357impl hash::Hasher for SipHasher {
358    #[inline]
359    fn write(&mut self, msg: &[u8]) {
360        self.0.write(msg)
361    }
362
363    #[inline]
364    fn finish(&self) -> u64 {
365        self.0.finish()
366    }
367
368    #[inline]
369    fn write_usize(&mut self, i: usize) {
370        self.0.write_usize(i);
371    }
372
373    #[inline]
374    fn write_u8(&mut self, i: u8) {
375        self.0.write_u8(i);
376    }
377
378    #[inline]
379    fn write_u16(&mut self, i: u16) {
380        self.0.write_u16(i);
381    }
382
383    #[inline]
384    fn write_u32(&mut self, i: u32) {
385        self.0.write_u32(i);
386    }
387
388    #[inline]
389    fn write_u64(&mut self, i: u64) {
390        self.0.write_u64(i);
391    }
392}
393
394impl hash::Hasher for SipHasher13 {
395    #[inline]
396    fn write(&mut self, msg: &[u8]) {
397        self.hasher.write(msg)
398    }
399
400    #[inline]
401    fn finish(&self) -> u64 {
402        self.hasher.finish()
403    }
404
405    #[inline]
406    fn write_usize(&mut self, i: usize) {
407        self.hasher.write_usize(i);
408    }
409
410    #[inline]
411    fn write_u8(&mut self, i: u8) {
412        self.hasher.write_u8(i);
413    }
414
415    #[inline]
416    fn write_u16(&mut self, i: u16) {
417        self.hasher.write_u16(i);
418    }
419
420    #[inline]
421    fn write_u32(&mut self, i: u32) {
422        self.hasher.write_u32(i);
423    }
424
425    #[inline]
426    fn write_u64(&mut self, i: u64) {
427        self.hasher.write_u64(i);
428    }
429}
430
431impl hash::Hasher for SipHasher24 {
432    #[inline]
433    fn write(&mut self, msg: &[u8]) {
434        self.hasher.write(msg)
435    }
436
437    #[inline]
438    fn finish(&self) -> u64 {
439        self.hasher.finish()
440    }
441
442    #[inline]
443    fn write_usize(&mut self, i: usize) {
444        self.hasher.write_usize(i);
445    }
446
447    #[inline]
448    fn write_u8(&mut self, i: u8) {
449        self.hasher.write_u8(i);
450    }
451
452    #[inline]
453    fn write_u16(&mut self, i: u16) {
454        self.hasher.write_u16(i);
455    }
456
457    #[inline]
458    fn write_u32(&mut self, i: u32) {
459        self.hasher.write_u32(i);
460    }
461
462    #[inline]
463    fn write_u64(&mut self, i: u64) {
464        self.hasher.write_u64(i);
465    }
466}
467
468impl<S: Sip> hash::Hasher for Hasher<S> {
469    #[inline]
470    fn write_usize(&mut self, i: usize) {
471        self.short_write(i, i.to_le() as u64);
472    }
473
474    #[inline]
475    fn write_u8(&mut self, i: u8) {
476        self.short_write(i, i as u64);
477    }
478
479    #[inline]
480    fn write_u32(&mut self, i: u32) {
481        self.short_write(i, i.to_le() as u64);
482    }
483
484    #[inline]
485    fn write_u64(&mut self, i: u64) {
486        self.short_write(i, i.to_le());
487    }
488
489    #[inline]
490    fn write(&mut self, msg: &[u8]) {
491        let length = msg.len();
492        self.length += length;
493
494        let mut needed = 0;
495
496        if self.ntail != 0 {
497            needed = 8 - self.ntail;
498            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
499            if length < needed {
500                self.ntail += length;
501                return;
502            } else {
503                self.state.v3 ^= self.tail;
504                S::c_rounds(&mut self.state);
505                self.state.v0 ^= self.tail;
506                self.ntail = 0;
507            }
508        }
509
510        // Buffered tail is now flushed, process new input.
511        let len = length - needed;
512        let left = len & 0x7;
513
514        let mut i = needed;
515        while i < len - left {
516            let mi = unsafe { load_int_le!(msg, i, u64) };
517
518            self.state.v3 ^= mi;
519            S::c_rounds(&mut self.state);
520            self.state.v0 ^= mi;
521
522            i += 8;
523        }
524
525        self.tail = unsafe { u8to64_le(msg, i, left) };
526        self.ntail = left;
527    }
528
529    #[inline]
530    fn finish(&self) -> u64 {
531        let mut state = self.state;
532
533        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
534
535        state.v3 ^= b;
536        S::c_rounds(&mut state);
537        state.v0 ^= b;
538
539        state.v2 ^= 0xff;
540        S::d_rounds(&mut state);
541
542        state.v0 ^ state.v1 ^ state.v2 ^ state.v3
543    }
544}
545
546impl<S: Sip> Default for Hasher<S> {
547    /// Creates a `Hasher<S>` with the two initial keys set to 0.
548    #[inline]
549    fn default() -> Hasher<S> {
550        Hasher::new_with_keys(0, 0)
551    }
552}
553
554#[doc(hidden)]
555trait Sip {
556    fn c_rounds(_: &mut State);
557    fn d_rounds(_: &mut State);
558}
559
560#[derive(Debug, Clone, Copy, Default)]
561struct Sip13Rounds;
562
563impl Sip for Sip13Rounds {
564    #[inline]
565    fn c_rounds(state: &mut State) {
566        compress!(state);
567    }
568
569    #[inline]
570    fn d_rounds(state: &mut State) {
571        compress!(state);
572        compress!(state);
573        compress!(state);
574    }
575}
576
577#[derive(Debug, Clone, Copy, Default)]
578struct Sip24Rounds;
579
580impl Sip for Sip24Rounds {
581    #[inline]
582    fn c_rounds(state: &mut State) {
583        compress!(state);
584        compress!(state);
585    }
586
587    #[inline]
588    fn d_rounds(state: &mut State) {
589        compress!(state);
590        compress!(state);
591        compress!(state);
592        compress!(state);
593    }
594}