hash32/
murmur3.rs

1use core::slice;
2use core::mem::MaybeUninit;
3
4use byteorder::{ByteOrder, LE};
5
6use crate::Hasher as _;
7
8/// 32-bit MurmurHash3 hasher
9pub struct Hasher {
10    buf: Buffer,
11    index: Index,
12    processed: u32,
13    state: State,
14}
15
16struct State(u32);
17
18#[derive(Clone, Copy)]
19#[repr(align(4))]
20struct Buffer {
21    bytes: MaybeUninit<[u8; 4]>,
22}
23
24#[derive(Clone, Copy, PartialEq)]
25enum Index {
26    _0,
27    _1,
28    _2,
29    _3,
30}
31
32impl Index {
33    fn usize(&self) -> usize {
34        match *self {
35            Index::_0 => 0,
36            Index::_1 => 1,
37            Index::_2 => 2,
38            Index::_3 => 3,
39        }
40    }
41}
42
43impl From<usize> for Index {
44    fn from(x: usize) -> Self {
45        match x % 4 {
46            0 => Index::_0,
47            1 => Index::_1,
48            2 => Index::_2,
49            3 => Index::_3,
50            _ => unreachable!(),
51        }
52    }
53}
54
55impl Hasher {
56    fn push(&mut self, buf: &[u8]) {
57        let start = self.index.usize();
58        let len = buf.len();
59        // NOTE(unsafe) avoid calling `memcpy` on a 0-3 byte copy
60        // self.buf.bytes[start..start+len].copy_from(buf);
61        for i in 0..len {
62            unsafe {
63                *self.buf.bytes.assume_init_mut().get_unchecked_mut(start + i) = *buf.get_unchecked(i);
64            }
65        }
66        self.index = Index::from(start + len);
67    }
68}
69
70impl Default for Hasher {
71    #[allow(deprecated)]
72    fn default() -> Self {
73        Hasher {
74            buf: Buffer { bytes: MaybeUninit::uninit() },
75            index: Index::_0,
76            processed: 0,
77            state: State(0),
78        }
79    }
80}
81
82impl crate::Hasher for Hasher {
83    fn finish32(&self) -> u32 {
84        // tail
85        let mut state = match self.index {
86            Index::_3 => {
87                let mut block = 0;
88                unsafe {
89                    block ^= u32::from(self.buf.bytes.assume_init_ref()[2]) << 16;
90                    block ^= u32::from(self.buf.bytes.assume_init_ref()[1]) << 8;
91                    block ^= u32::from(self.buf.bytes.assume_init_ref()[0]);
92                }
93                self.state.0 ^ pre_mix(block)
94            }
95            Index::_2 => {
96                let mut block = 0;
97                unsafe {
98                    block ^= u32::from(self.buf.bytes.assume_init_ref()[1]) << 8;
99                    block ^= u32::from(self.buf.bytes.assume_init_ref()[0]);
100                }
101                self.state.0 ^ pre_mix(block)
102            }
103            Index::_1 => {
104                let mut block = 0;
105                unsafe {
106                    block ^= u32::from(self.buf.bytes.assume_init_ref()[0]);
107                }
108                self.state.0 ^ pre_mix(block)
109            }
110            Index::_0 => self.state.0,
111        };
112
113        // finalization mix
114        state ^= self.processed;
115        state ^= state >> 16;
116        state = state.wrapping_mul(0x85ebca6b);
117        state ^= state >> 13;
118        state = state.wrapping_mul(0xc2b2ae35);
119        state ^= state >> 16;
120
121        state
122    }
123}
124
125impl core::hash::Hasher for Hasher {
126    #[inline]
127    fn write(&mut self, bytes: &[u8]) {
128        let len = bytes.len();
129        self.processed += len as u32;
130
131        let body = if self.index == Index::_0 {
132            bytes
133        } else {
134            let index = self.index.usize();
135            if len + index >= 4 {
136                // we can complete a block using the data left in the buffer
137                // NOTE(unsafe) avoid panicking branch (`slice_index_len_fail`)
138                // let (head, body) = bytes.split_at(4 - index);
139                let mid = 4 - index;
140                let head = unsafe { slice::from_raw_parts(bytes.as_ptr(), mid) };
141                let body = unsafe {
142                    slice::from_raw_parts(bytes.as_ptr().offset(mid as isize), len - mid)
143                };
144
145                // NOTE(unsafe) avoid calling `memcpy` on a 0-3 byte copy
146                // self.buf.bytes[index..].copy_from_slice(head);
147                for i in 0..4 - index {
148                    unsafe {
149                        *self.buf.bytes.assume_init_mut().get_unchecked_mut(index + i) = *head.get_unchecked(i);
150                    }
151                }
152
153                self.index = Index::_0;
154
155                self.state.process_block(&self.buf.bytes);
156
157                body
158            } else {
159                bytes
160            }
161        };
162
163        for block in body.chunks(4) {
164            if block.len() == 4 {
165                self.state
166                    .process_block(unsafe { &*(block.as_ptr() as *const _) });
167            } else {
168                self.push(block);
169            }
170        }
171
172        // XXX is this faster?
173        // for block in body.exact_chunks(4) {
174        //     self.state
175        //         .process_block(unsafe { &*(block.as_ptr() as *const _) });
176        // }
177
178        // let tail = body.split_at(body.len() / 4 * 4).1;
179
180        // self.push(tail);
181    }
182
183    #[inline]
184    fn finish(&self) -> u64 {
185        self.finish32().into()
186    }
187}
188
189const C1: u32 = 0xcc9e2d51;
190const C2: u32 = 0x1b873593;
191const R1: u32 = 15;
192
193impl State {
194    fn process_block(&mut self, block: &MaybeUninit<[u8; 4]>) {
195        self.0 ^= pre_mix(LE::read_u32(unsafe { block.assume_init_ref() }));
196        self.0 = self.0.rotate_left(13);
197        self.0 = 5u32.wrapping_mul(self.0).wrapping_add(0xe6546b64);
198    }
199}
200
201fn pre_mix(mut block: u32) -> u32 {
202    block = block.wrapping_mul(C1);
203    block = block.rotate_left(R1);
204    block = block.wrapping_mul(C2);
205    block
206}