ppv_lite86/
soft.rs

1//! Implement 256- and 512- bit in terms of 128-bit, for machines without native wide SIMD.
2
3use crate::types::*;
4use crate::{vec128_storage, vec256_storage, vec512_storage};
5use core::marker::PhantomData;
6use core::ops::*;
7
8zerocopy::cryptocorrosion_derive_traits! {
9    #[repr(transparent)]
10    #[derive(Copy, Clone, Default)]
11    #[allow(non_camel_case_types)]
12    pub struct x2<W, G>(pub [W; 2], PhantomData<G>);
13}
14
15impl<W, G> x2<W, G> {
16    #[inline(always)]
17    pub fn new(xs: [W; 2]) -> Self {
18        x2(xs, PhantomData)
19    }
20}
21macro_rules! fwd_binop_x2 {
22    ($trait:ident, $fn:ident) => {
23        impl<W: $trait + Copy, G> $trait for x2<W, G> {
24            type Output = x2<W::Output, G>;
25            #[inline(always)]
26            fn $fn(self, rhs: Self) -> Self::Output {
27                x2::new([self.0[0].$fn(rhs.0[0]), self.0[1].$fn(rhs.0[1])])
28            }
29        }
30    };
31}
32macro_rules! fwd_binop_assign_x2 {
33    ($trait:ident, $fn_assign:ident) => {
34        impl<W: $trait + Copy, G> $trait for x2<W, G> {
35            #[inline(always)]
36            fn $fn_assign(&mut self, rhs: Self) {
37                (self.0[0]).$fn_assign(rhs.0[0]);
38                (self.0[1]).$fn_assign(rhs.0[1]);
39            }
40        }
41    };
42}
43macro_rules! fwd_unop_x2 {
44    ($fn:ident) => {
45        #[inline(always)]
46        fn $fn(self) -> Self {
47            x2::new([self.0[0].$fn(), self.0[1].$fn()])
48        }
49    };
50}
51impl<W, G> RotateEachWord32 for x2<W, G>
52where
53    W: Copy + RotateEachWord32,
54{
55    fwd_unop_x2!(rotate_each_word_right7);
56    fwd_unop_x2!(rotate_each_word_right8);
57    fwd_unop_x2!(rotate_each_word_right11);
58    fwd_unop_x2!(rotate_each_word_right12);
59    fwd_unop_x2!(rotate_each_word_right16);
60    fwd_unop_x2!(rotate_each_word_right20);
61    fwd_unop_x2!(rotate_each_word_right24);
62    fwd_unop_x2!(rotate_each_word_right25);
63}
64impl<W, G> RotateEachWord64 for x2<W, G>
65where
66    W: Copy + RotateEachWord64,
67{
68    fwd_unop_x2!(rotate_each_word_right32);
69}
70impl<W, G> RotateEachWord128 for x2<W, G> where W: RotateEachWord128 {}
71impl<W, G> BitOps0 for x2<W, G>
72where
73    W: BitOps0,
74    G: Copy,
75{
76}
77impl<W, G> BitOps32 for x2<W, G>
78where
79    W: BitOps32 + BitOps0,
80    G: Copy,
81{
82}
83impl<W, G> BitOps64 for x2<W, G>
84where
85    W: BitOps64 + BitOps0,
86    G: Copy,
87{
88}
89impl<W, G> BitOps128 for x2<W, G>
90where
91    W: BitOps128 + BitOps0,
92    G: Copy,
93{
94}
95fwd_binop_x2!(BitAnd, bitand);
96fwd_binop_x2!(BitOr, bitor);
97fwd_binop_x2!(BitXor, bitxor);
98fwd_binop_x2!(AndNot, andnot);
99fwd_binop_assign_x2!(BitAndAssign, bitand_assign);
100fwd_binop_assign_x2!(BitOrAssign, bitor_assign);
101fwd_binop_assign_x2!(BitXorAssign, bitxor_assign);
102impl<W, G> ArithOps for x2<W, G>
103where
104    W: ArithOps,
105    G: Copy,
106{
107}
108fwd_binop_x2!(Add, add);
109fwd_binop_assign_x2!(AddAssign, add_assign);
110impl<W: Not + Copy, G> Not for x2<W, G> {
111    type Output = x2<W::Output, G>;
112    #[inline(always)]
113    fn not(self) -> Self::Output {
114        x2::new([self.0[0].not(), self.0[1].not()])
115    }
116}
117impl<W, G> UnsafeFrom<[W; 2]> for x2<W, G> {
118    #[inline(always)]
119    unsafe fn unsafe_from(xs: [W; 2]) -> Self {
120        x2::new(xs)
121    }
122}
123impl<W: Copy, G> Vec2<W> for x2<W, G> {
124    #[inline(always)]
125    fn extract(self, i: u32) -> W {
126        self.0[i as usize]
127    }
128    #[inline(always)]
129    fn insert(mut self, w: W, i: u32) -> Self {
130        self.0[i as usize] = w;
131        self
132    }
133}
134impl<W: Copy + Store<vec128_storage>, G> Store<vec256_storage> for x2<W, G> {
135    #[inline(always)]
136    unsafe fn unpack(p: vec256_storage) -> Self {
137        let p = p.split128();
138        x2::new([W::unpack(p[0]), W::unpack(p[1])])
139    }
140}
141impl<W, G> From<x2<W, G>> for vec256_storage
142where
143    W: Copy,
144    vec128_storage: From<W>,
145{
146    #[inline(always)]
147    fn from(x: x2<W, G>) -> Self {
148        vec256_storage::new128([x.0[0].into(), x.0[1].into()])
149    }
150}
151impl<W, G> Swap64 for x2<W, G>
152where
153    W: Swap64 + Copy,
154{
155    fwd_unop_x2!(swap1);
156    fwd_unop_x2!(swap2);
157    fwd_unop_x2!(swap4);
158    fwd_unop_x2!(swap8);
159    fwd_unop_x2!(swap16);
160    fwd_unop_x2!(swap32);
161    fwd_unop_x2!(swap64);
162}
163impl<W: Copy, G> MultiLane<[W; 2]> for x2<W, G> {
164    #[inline(always)]
165    fn to_lanes(self) -> [W; 2] {
166        self.0
167    }
168    #[inline(always)]
169    fn from_lanes(lanes: [W; 2]) -> Self {
170        x2::new(lanes)
171    }
172}
173impl<W: BSwap + Copy, G> BSwap for x2<W, G> {
174    #[inline(always)]
175    fn bswap(self) -> Self {
176        x2::new([self.0[0].bswap(), self.0[1].bswap()])
177    }
178}
179impl<W: StoreBytes + BSwap + Copy, G> StoreBytes for x2<W, G> {
180    #[inline(always)]
181    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
182        let input = input.split_at(input.len() / 2);
183        x2::new([W::unsafe_read_le(input.0), W::unsafe_read_le(input.1)])
184    }
185    #[inline(always)]
186    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
187        let input = input.split_at(input.len() / 2);
188        x2::new([W::unsafe_read_be(input.0), W::unsafe_read_be(input.1)])
189    }
190    #[inline(always)]
191    fn write_le(self, out: &mut [u8]) {
192        let out = out.split_at_mut(out.len() / 2);
193        self.0[0].write_le(out.0);
194        self.0[1].write_le(out.1);
195    }
196    #[inline(always)]
197    fn write_be(self, out: &mut [u8]) {
198        let out = out.split_at_mut(out.len() / 2);
199        self.0[0].write_be(out.0);
200        self.0[1].write_be(out.1);
201    }
202}
203impl<W: Copy + LaneWords4, G: Copy> LaneWords4 for x2<W, G> {
204    #[inline(always)]
205    fn shuffle_lane_words2301(self) -> Self {
206        Self::new([
207            self.0[0].shuffle_lane_words2301(),
208            self.0[1].shuffle_lane_words2301(),
209        ])
210    }
211    #[inline(always)]
212    fn shuffle_lane_words1230(self) -> Self {
213        Self::new([
214            self.0[0].shuffle_lane_words1230(),
215            self.0[1].shuffle_lane_words1230(),
216        ])
217    }
218    #[inline(always)]
219    fn shuffle_lane_words3012(self) -> Self {
220        Self::new([
221            self.0[0].shuffle_lane_words3012(),
222            self.0[1].shuffle_lane_words3012(),
223        ])
224    }
225}
226
227zerocopy::cryptocorrosion_derive_traits! {
228    #[repr(transparent)]
229    #[derive(Copy, Clone, Default)]
230    #[allow(non_camel_case_types)]
231    pub struct x4<W>(pub [W; 4]);
232}
233
234impl<W> x4<W> {
235    #[inline(always)]
236    pub fn new(xs: [W; 4]) -> Self {
237        x4(xs)
238    }
239}
240macro_rules! fwd_binop_x4 {
241    ($trait:ident, $fn:ident) => {
242        impl<W: $trait + Copy> $trait for x4<W> {
243            type Output = x4<W::Output>;
244            #[inline(always)]
245            fn $fn(self, rhs: Self) -> Self::Output {
246                x4([
247                    self.0[0].$fn(rhs.0[0]),
248                    self.0[1].$fn(rhs.0[1]),
249                    self.0[2].$fn(rhs.0[2]),
250                    self.0[3].$fn(rhs.0[3]),
251                ])
252            }
253        }
254    };
255}
256macro_rules! fwd_binop_assign_x4 {
257    ($trait:ident, $fn_assign:ident) => {
258        impl<W: $trait + Copy> $trait for x4<W> {
259            #[inline(always)]
260            fn $fn_assign(&mut self, rhs: Self) {
261                self.0[0].$fn_assign(rhs.0[0]);
262                self.0[1].$fn_assign(rhs.0[1]);
263                self.0[2].$fn_assign(rhs.0[2]);
264                self.0[3].$fn_assign(rhs.0[3]);
265            }
266        }
267    };
268}
269macro_rules! fwd_unop_x4 {
270    ($fn:ident) => {
271        #[inline(always)]
272        fn $fn(self) -> Self {
273            x4([
274                self.0[0].$fn(),
275                self.0[1].$fn(),
276                self.0[2].$fn(),
277                self.0[3].$fn(),
278            ])
279        }
280    };
281}
282impl<W> RotateEachWord32 for x4<W>
283where
284    W: Copy + RotateEachWord32,
285{
286    fwd_unop_x4!(rotate_each_word_right7);
287    fwd_unop_x4!(rotate_each_word_right8);
288    fwd_unop_x4!(rotate_each_word_right11);
289    fwd_unop_x4!(rotate_each_word_right12);
290    fwd_unop_x4!(rotate_each_word_right16);
291    fwd_unop_x4!(rotate_each_word_right20);
292    fwd_unop_x4!(rotate_each_word_right24);
293    fwd_unop_x4!(rotate_each_word_right25);
294}
295impl<W> RotateEachWord64 for x4<W>
296where
297    W: Copy + RotateEachWord64,
298{
299    fwd_unop_x4!(rotate_each_word_right32);
300}
301impl<W> RotateEachWord128 for x4<W> where W: RotateEachWord128 {}
302impl<W> BitOps0 for x4<W> where W: BitOps0 {}
303impl<W> BitOps32 for x4<W> where W: BitOps32 + BitOps0 {}
304impl<W> BitOps64 for x4<W> where W: BitOps64 + BitOps0 {}
305impl<W> BitOps128 for x4<W> where W: BitOps128 + BitOps0 {}
306fwd_binop_x4!(BitAnd, bitand);
307fwd_binop_x4!(BitOr, bitor);
308fwd_binop_x4!(BitXor, bitxor);
309fwd_binop_x4!(AndNot, andnot);
310fwd_binop_assign_x4!(BitAndAssign, bitand_assign);
311fwd_binop_assign_x4!(BitOrAssign, bitor_assign);
312fwd_binop_assign_x4!(BitXorAssign, bitxor_assign);
313impl<W> ArithOps for x4<W> where W: ArithOps {}
314fwd_binop_x4!(Add, add);
315fwd_binop_assign_x4!(AddAssign, add_assign);
316impl<W: Not + Copy> Not for x4<W> {
317    type Output = x4<W::Output>;
318    #[inline(always)]
319    fn not(self) -> Self::Output {
320        x4([
321            self.0[0].not(),
322            self.0[1].not(),
323            self.0[2].not(),
324            self.0[3].not(),
325        ])
326    }
327}
328impl<W> UnsafeFrom<[W; 4]> for x4<W> {
329    #[inline(always)]
330    unsafe fn unsafe_from(xs: [W; 4]) -> Self {
331        x4(xs)
332    }
333}
334impl<W: Copy> Vec4<W> for x4<W> {
335    #[inline(always)]
336    fn extract(self, i: u32) -> W {
337        self.0[i as usize]
338    }
339    #[inline(always)]
340    fn insert(mut self, w: W, i: u32) -> Self {
341        self.0[i as usize] = w;
342        self
343    }
344}
345impl<W: Copy> Vec4Ext<W> for x4<W> {
346    #[inline(always)]
347    fn transpose4(a: Self, b: Self, c: Self, d: Self) -> (Self, Self, Self, Self)
348    where
349        Self: Sized,
350    {
351        (
352            x4([a.0[0], b.0[0], c.0[0], d.0[0]]),
353            x4([a.0[1], b.0[1], c.0[1], d.0[1]]),
354            x4([a.0[2], b.0[2], c.0[2], d.0[2]]),
355            x4([a.0[3], b.0[3], c.0[3], d.0[3]]),
356        )
357    }
358}
359impl<W: Copy + Store<vec128_storage>> Store<vec512_storage> for x4<W> {
360    #[inline(always)]
361    unsafe fn unpack(p: vec512_storage) -> Self {
362        let p = p.split128();
363        x4([
364            W::unpack(p[0]),
365            W::unpack(p[1]),
366            W::unpack(p[2]),
367            W::unpack(p[3]),
368        ])
369    }
370}
371impl<W> From<x4<W>> for vec512_storage
372where
373    W: Copy,
374    vec128_storage: From<W>,
375{
376    #[inline(always)]
377    fn from(x: x4<W>) -> Self {
378        vec512_storage::new128([x.0[0].into(), x.0[1].into(), x.0[2].into(), x.0[3].into()])
379    }
380}
381impl<W> Swap64 for x4<W>
382where
383    W: Swap64 + Copy,
384{
385    fwd_unop_x4!(swap1);
386    fwd_unop_x4!(swap2);
387    fwd_unop_x4!(swap4);
388    fwd_unop_x4!(swap8);
389    fwd_unop_x4!(swap16);
390    fwd_unop_x4!(swap32);
391    fwd_unop_x4!(swap64);
392}
393impl<W: Copy> MultiLane<[W; 4]> for x4<W> {
394    #[inline(always)]
395    fn to_lanes(self) -> [W; 4] {
396        self.0
397    }
398    #[inline(always)]
399    fn from_lanes(lanes: [W; 4]) -> Self {
400        x4(lanes)
401    }
402}
403impl<W: BSwap + Copy> BSwap for x4<W> {
404    #[inline(always)]
405    fn bswap(self) -> Self {
406        x4([
407            self.0[0].bswap(),
408            self.0[1].bswap(),
409            self.0[2].bswap(),
410            self.0[3].bswap(),
411        ])
412    }
413}
414impl<W: StoreBytes + BSwap + Copy> StoreBytes for x4<W> {
415    #[inline(always)]
416    unsafe fn unsafe_read_le(input: &[u8]) -> Self {
417        let n = input.len() / 4;
418        x4([
419            W::unsafe_read_le(&input[..n]),
420            W::unsafe_read_le(&input[n..n * 2]),
421            W::unsafe_read_le(&input[n * 2..n * 3]),
422            W::unsafe_read_le(&input[n * 3..]),
423        ])
424    }
425    #[inline(always)]
426    unsafe fn unsafe_read_be(input: &[u8]) -> Self {
427        let n = input.len() / 4;
428        x4([
429            W::unsafe_read_be(&input[..n]),
430            W::unsafe_read_be(&input[n..n * 2]),
431            W::unsafe_read_be(&input[n * 2..n * 3]),
432            W::unsafe_read_be(&input[n * 3..]),
433        ])
434    }
435    #[inline(always)]
436    fn write_le(self, out: &mut [u8]) {
437        let n = out.len() / 4;
438        self.0[0].write_le(&mut out[..n]);
439        self.0[1].write_le(&mut out[n..n * 2]);
440        self.0[2].write_le(&mut out[n * 2..n * 3]);
441        self.0[3].write_le(&mut out[n * 3..]);
442    }
443    #[inline(always)]
444    fn write_be(self, out: &mut [u8]) {
445        let n = out.len() / 4;
446        self.0[0].write_be(&mut out[..n]);
447        self.0[1].write_be(&mut out[n..n * 2]);
448        self.0[2].write_be(&mut out[n * 2..n * 3]);
449        self.0[3].write_be(&mut out[n * 3..]);
450    }
451}
452impl<W: Copy + LaneWords4> LaneWords4 for x4<W> {
453    #[inline(always)]
454    fn shuffle_lane_words2301(self) -> Self {
455        x4([
456            self.0[0].shuffle_lane_words2301(),
457            self.0[1].shuffle_lane_words2301(),
458            self.0[2].shuffle_lane_words2301(),
459            self.0[3].shuffle_lane_words2301(),
460        ])
461    }
462    #[inline(always)]
463    fn shuffle_lane_words1230(self) -> Self {
464        x4([
465            self.0[0].shuffle_lane_words1230(),
466            self.0[1].shuffle_lane_words1230(),
467            self.0[2].shuffle_lane_words1230(),
468            self.0[3].shuffle_lane_words1230(),
469        ])
470    }
471    #[inline(always)]
472    fn shuffle_lane_words3012(self) -> Self {
473        x4([
474            self.0[0].shuffle_lane_words3012(),
475            self.0[1].shuffle_lane_words3012(),
476            self.0[2].shuffle_lane_words3012(),
477            self.0[3].shuffle_lane_words3012(),
478        ])
479    }
480}