phf_shared/
lib.rs

1//! See [the `phf` crate's documentation][phf] for details.
2//!
3//! [phf]: https://docs.rs/phf
4
5#![doc(html_root_url = "https://docs.rs/phf_shared/0.11")]
6#![cfg_attr(not(feature = "std"), no_std)]
7
8#[cfg(feature = "std")]
9extern crate std as core;
10
11use core::fmt;
12use core::hash::{Hash, Hasher};
13use core::num::Wrapping;
14use siphasher::sip128::{Hash128, Hasher128, SipHasher13};
15
16#[non_exhaustive]
17pub struct Hashes {
18    pub g: u32,
19    pub f1: u32,
20    pub f2: u32,
21}
22
23/// A central typedef for hash keys
24///
25/// Makes experimentation easier by only needing to be updated here.
26pub type HashKey = u64;
27
28#[inline]
29pub fn displace(f1: u32, f2: u32, d1: u32, d2: u32) -> u32 {
30    (Wrapping(d2) + Wrapping(f1) * Wrapping(d1) + Wrapping(f2)).0
31}
32
33/// `key` is from `phf_generator::HashState`.
34#[inline]
35pub fn hash<T: ?Sized + PhfHash>(x: &T, key: &HashKey) -> Hashes {
36    let mut hasher = SipHasher13::new_with_keys(0, *key);
37    x.phf_hash(&mut hasher);
38
39    let Hash128 {
40        h1: lower,
41        h2: upper,
42    } = hasher.finish128();
43
44    Hashes {
45        g: (lower >> 32) as u32,
46        f1: lower as u32,
47        f2: upper as u32,
48    }
49}
50
51/// Return an index into `phf_generator::HashState::map`.
52///
53/// * `hash` is from `hash()` in this crate.
54/// * `disps` is from `phf_generator::HashState::disps`.
55/// * `len` is the length of `phf_generator::HashState::map`.
56#[inline]
57pub fn get_index(hashes: &Hashes, disps: &[(u32, u32)], len: usize) -> u32 {
58    let (d1, d2) = disps[(hashes.g % (disps.len() as u32)) as usize];
59    displace(hashes.f1, hashes.f2, d1, d2) % (len as u32)
60}
61
62/// A trait implemented by types which can be used in PHF data structures.
63///
64/// This differs from the standard library's `Hash` trait in that `PhfHash`'s
65/// results must be architecture independent so that hashes will be consistent
66/// between the host and target when cross compiling.
67pub trait PhfHash {
68    /// Feeds the value into the state given, updating the hasher as necessary.
69    fn phf_hash<H: Hasher>(&self, state: &mut H);
70
71    /// Feeds a slice of this type into the state provided.
72    fn phf_hash_slice<H: Hasher>(data: &[Self], state: &mut H)
73    where
74        Self: Sized,
75    {
76        for piece in data {
77            piece.phf_hash(state);
78        }
79    }
80}
81
82/// Trait for printing types with `const` constructors, used by `phf_codegen` and `phf_macros`.
83pub trait FmtConst {
84    /// Print a `const` expression representing this value.
85    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
86}
87
88/// Identical to `std::borrow::Borrow` except omitting blanket impls to facilitate other
89/// borrowing patterns.
90///
91/// The same semantic requirements apply:
92///
93/// > In particular `Eq`, `Ord` and `Hash` must be equivalent for borrowed and owned values:
94/// `x.borrow() == y.borrow()` should give the same result as `x == y`.
95///
96/// (This crate's API only requires `Eq` and `PhfHash`, however.)
97///
98/// ### Motivation
99/// The conventional signature for lookup methods on collections looks something like this:
100///
101/// ```ignore
102/// impl<K, V> Map<K, V> where K: PhfHash + Eq {
103///     fn get<T: ?Sized>(&self, key: &T) -> Option<&V> where T: PhfHash + Eq, K: Borrow<T> {
104///         ...
105///     }
106/// }
107/// ```
108///
109/// This allows the key type used for lookup to be different than the key stored in the map so for
110/// example you can use `&str` to look up a value in a `Map<String, _>`. However, this runs into
111/// a problem in the case where `T` and `K` are both a `Foo<_>` type constructor but
112/// the contained type is different (even being the same type with different lifetimes).
113///
114/// The main issue for this crate's API is that, with this method signature, you cannot perform a
115/// lookup on a `Map<UniCase<&'static str>, _>` with a `UniCase<&'a str>` where `'a` is not
116/// `'static`; there is no impl of `Borrow` that resolves to
117/// `impl Borrow<UniCase<'a>> for UniCase<&'static str>` and one cannot be added either because of
118/// all the blanket impls.
119///
120/// Instead, this trait is implemented conservatively, without blanket impls, so that impls like
121/// this may be added. This is feasible since the set of types that implement `PhfHash` is
122/// intentionally small.
123///
124/// This likely won't be fixable with specialization alone but will require full support for lattice
125/// impls since we technically want to add overlapping blanket impls.
126pub trait PhfBorrow<B: ?Sized> {
127    /// Convert a reference to `self` to a reference to the borrowed type.
128    fn borrow(&self) -> &B;
129}
130
131/// Create an impl of `FmtConst` delegating to `fmt::Debug` for types that can deal with it.
132///
133/// Ideally with specialization this could be just one default impl and then specialized where
134/// it doesn't apply.
135macro_rules! delegate_debug (
136    ($ty:ty) => {
137        impl FmtConst for $ty {
138            fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
139                write!(f, "{:?}", self)
140            }
141        }
142    }
143);
144
145delegate_debug!(str);
146delegate_debug!(char);
147delegate_debug!(u8);
148delegate_debug!(i8);
149delegate_debug!(u16);
150delegate_debug!(i16);
151delegate_debug!(u32);
152delegate_debug!(i32);
153delegate_debug!(u64);
154delegate_debug!(i64);
155delegate_debug!(usize);
156delegate_debug!(isize);
157delegate_debug!(u128);
158delegate_debug!(i128);
159delegate_debug!(bool);
160
161/// `impl PhfBorrow<T> for T`
162macro_rules! impl_reflexive(
163    ($($t:ty),*) => (
164        $(impl PhfBorrow<$t> for $t {
165            fn borrow(&self) -> &$t {
166                self
167            }
168        })*
169    )
170);
171
172impl_reflexive!(
173    str,
174    char,
175    u8,
176    i8,
177    u16,
178    i16,
179    u32,
180    i32,
181    u64,
182    i64,
183    usize,
184    isize,
185    u128,
186    i128,
187    bool,
188    [u8]
189);
190
191#[cfg(feature = "std")]
192impl PhfBorrow<str> for String {
193    fn borrow(&self) -> &str {
194        self
195    }
196}
197
198#[cfg(feature = "std")]
199impl PhfBorrow<[u8]> for Vec<u8> {
200    fn borrow(&self) -> &[u8] {
201        self
202    }
203}
204
205#[cfg(feature = "std")]
206delegate_debug!(String);
207
208#[cfg(feature = "std")]
209impl PhfHash for String {
210    #[inline]
211    fn phf_hash<H: Hasher>(&self, state: &mut H) {
212        (**self).phf_hash(state)
213    }
214}
215
216#[cfg(feature = "std")]
217impl PhfHash for Vec<u8> {
218    #[inline]
219    fn phf_hash<H: Hasher>(&self, state: &mut H) {
220        (**self).phf_hash(state)
221    }
222}
223
224impl<'a, T: 'a + PhfHash + ?Sized> PhfHash for &'a T {
225    fn phf_hash<H: Hasher>(&self, state: &mut H) {
226        (*self).phf_hash(state)
227    }
228}
229
230impl<'a, T: 'a + FmtConst + ?Sized> FmtConst for &'a T {
231    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
232        (*self).fmt_const(f)
233    }
234}
235
236impl<'a> PhfBorrow<str> for &'a str {
237    fn borrow(&self) -> &str {
238        self
239    }
240}
241
242impl<'a> PhfBorrow<[u8]> for &'a [u8] {
243    fn borrow(&self) -> &[u8] {
244        self
245    }
246}
247
248impl<'a, const N: usize> PhfBorrow<[u8; N]> for &'a [u8; N] {
249    fn borrow(&self) -> &[u8; N] {
250        self
251    }
252}
253
254impl PhfHash for str {
255    #[inline]
256    fn phf_hash<H: Hasher>(&self, state: &mut H) {
257        self.as_bytes().phf_hash(state)
258    }
259}
260
261impl PhfHash for [u8] {
262    #[inline]
263    fn phf_hash<H: Hasher>(&self, state: &mut H) {
264        state.write(self);
265    }
266}
267
268impl FmtConst for [u8] {
269    #[inline]
270    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
271        // slices need a leading reference
272        write!(f, "&{:?}", self)
273    }
274}
275
276#[cfg(feature = "unicase")]
277impl<S> PhfHash for unicase::UniCase<S>
278where
279    unicase::UniCase<S>: Hash,
280{
281    #[inline]
282    fn phf_hash<H: Hasher>(&self, state: &mut H) {
283        self.hash(state)
284    }
285}
286
287#[cfg(feature = "unicase")]
288impl<S> FmtConst for unicase::UniCase<S>
289where
290    S: AsRef<str>,
291{
292    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
293        if self.is_ascii() {
294            f.write_str("UniCase::ascii(")?;
295        } else {
296            f.write_str("UniCase::unicode(")?;
297        }
298
299        self.as_ref().fmt_const(f)?;
300        f.write_str(")")
301    }
302}
303
304#[cfg(feature = "unicase")]
305impl<'b, 'a: 'b, S: ?Sized + 'a> PhfBorrow<unicase::UniCase<&'b S>> for unicase::UniCase<&'a S> {
306    fn borrow(&self) -> &unicase::UniCase<&'b S> {
307        self
308    }
309}
310
311#[cfg(feature = "uncased")]
312impl PhfHash for uncased::UncasedStr {
313    #[inline]
314    fn phf_hash<H: Hasher>(&self, state: &mut H) {
315        self.hash(state)
316    }
317}
318
319#[cfg(feature = "uncased")]
320impl FmtConst for uncased::UncasedStr {
321    fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
322        f.write_str("UncasedStr::new(")?;
323        self.as_str().fmt_const(f)?;
324        f.write_str(")")
325    }
326}
327
328#[cfg(feature = "uncased")]
329impl PhfBorrow<uncased::UncasedStr> for &uncased::UncasedStr {
330    fn borrow(&self) -> &uncased::UncasedStr {
331        self
332    }
333}
334
335macro_rules! sip_impl (
336    (le $t:ty) => (
337        impl PhfHash for $t {
338            #[inline]
339            fn phf_hash<H: Hasher>(&self, state: &mut H) {
340                self.to_le().hash(state);
341            }
342        }
343    );
344    ($t:ty) => (
345        impl PhfHash for $t {
346            #[inline]
347            fn phf_hash<H: Hasher>(&self, state: &mut H) {
348                self.hash(state);
349            }
350        }
351    )
352);
353
354sip_impl!(u8);
355sip_impl!(i8);
356sip_impl!(le u16);
357sip_impl!(le i16);
358sip_impl!(le u32);
359sip_impl!(le i32);
360sip_impl!(le u64);
361sip_impl!(le i64);
362sip_impl!(le usize);
363sip_impl!(le isize);
364sip_impl!(le u128);
365sip_impl!(le i128);
366sip_impl!(bool);
367
368impl PhfHash for char {
369    #[inline]
370    fn phf_hash<H: Hasher>(&self, state: &mut H) {
371        (*self as u32).phf_hash(state)
372    }
373}
374
375// minimize duplicated code since formatting drags in quite a bit
376fn fmt_array<T: core::fmt::Debug>(array: &[T], f: &mut fmt::Formatter<'_>) -> fmt::Result {
377    write!(f, "{:?}", array)
378}
379
380macro_rules! array_impl (
381    ($t:ty) => (
382        impl<const N: usize> PhfHash for [$t; N] {
383            #[inline]
384            fn phf_hash<H: Hasher>(&self, state: &mut H) {
385                for v in &self[..] {
386                    v.phf_hash(state);
387                }
388            }
389        }
390
391        impl<const N: usize> FmtConst for [$t; N] {
392            fn fmt_const(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
393                fmt_array(self, f)
394            }
395        }
396
397        impl<const N: usize> PhfBorrow<[$t]> for [$t; N] {
398            fn borrow(&self) -> &[$t] {
399                self
400            }
401        }
402    )
403);
404
405array_impl!(u8);
406array_impl!(i8);
407array_impl!(u16);
408array_impl!(i16);
409array_impl!(u32);
410array_impl!(i32);
411array_impl!(u64);
412array_impl!(i64);
413array_impl!(usize);
414array_impl!(isize);
415array_impl!(u128);
416array_impl!(i128);
417array_impl!(bool);
418array_impl!(char);
419
420macro_rules! slice_impl (
421    ($t:ty) => {
422        impl PhfHash for [$t] {
423            #[inline]
424            fn phf_hash<H: Hasher>(&self, state: &mut H) {
425                for v in self {
426                    v.phf_hash(state);
427                }
428            }
429        }
430    };
431);
432
433slice_impl!(i8);
434slice_impl!(u16);
435slice_impl!(i16);
436slice_impl!(u32);
437slice_impl!(i32);
438slice_impl!(u64);
439slice_impl!(i64);
440slice_impl!(usize);
441slice_impl!(isize);
442slice_impl!(u128);
443slice_impl!(i128);
444slice_impl!(bool);
445slice_impl!(char);