compiler_builtins/mem/
impls.rs

1// In C and Rust it is UB to read or write to usize::MAX because if an allocation extends to the
2// last byte of address space (there must be an allocation to do the read or write), in C computing
3// its one-past-the-end pointer would be equal to NULL and in Rust computing the address of a
4// trailing ZST member with a safe place projection would wrap (place projection address computation
5// is non-wrapping).
6//
7// However, some embedded systems have special memory at usize::MAX, and need to access that
8// memory. If they do that with the intrinsics provided by compiler-builtins (such as memcpy!), the
9// ptr::add in these loops will wrap. And if compiler-builtins is compiled with cfg(ub_checks),
10// this will fail a UB check at runtime.
11//
12// Since this scenario is UB, we are within our rights hit this check and halt execution...
13// But we are also within our rights to try to make it work.
14// We use wrapping_add/wrapping_sub for pointer arithmetic in this module in an attempt to support
15// this use. Of course this is not a guarantee that such use will work, it just means that this
16// crate doing wrapping pointer arithmetic with a method that must not wrap won't be the problem if
17// something does go wrong at runtime.
18use core::intrinsics::likely;
19
20const WORD_SIZE: usize = core::mem::size_of::<usize>();
21const WORD_MASK: usize = WORD_SIZE - 1;
22
23// If the number of bytes involved exceed this threshold we will opt in word-wise copy.
24// The value here selected is max(2 * WORD_SIZE, 16):
25// * We need at least 2 * WORD_SIZE bytes to guarantee that at least 1 word will be copied through
26//   word-wise copy.
27// * The word-wise copy logic needs to perform some checks so it has some small overhead.
28//   ensures that even on 32-bit platforms we have copied at least 8 bytes through
29//   word-wise copy so the saving of word-wise copy outweighs the fixed overhead.
30const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 {
31    2 * WORD_SIZE
32} else {
33    16
34};
35
36#[cfg(feature = "mem-unaligned")]
37unsafe fn read_usize_unaligned(x: *const usize) -> usize {
38    // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which
39    // is translated to memcpy in LLVM.
40    let x_read = (x as *const [u8; core::mem::size_of::<usize>()]).read();
41    usize::from_ne_bytes(x_read)
42}
43
44/// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
45/// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
46/// chunk size if a load happened.
47#[cfg(not(feature = "mem-unaligned"))]
48#[inline(always)]
49unsafe fn load_chunk_aligned<T: Copy>(
50    src: *const usize,
51    dst: *mut usize,
52    load_sz: usize,
53    offset: usize,
54) -> usize {
55    let chunk_sz = core::mem::size_of::<T>();
56    if (load_sz & chunk_sz) != 0 {
57        *dst.wrapping_byte_add(offset).cast::<T>() = *src.wrapping_byte_add(offset).cast::<T>();
58        offset | chunk_sz
59    } else {
60        offset
61    }
62}
63
64/// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
65/// read with the out-of-bounds part filled with 0s.
66/// `load_sz` be strictly less than `WORD_SIZE`.
67#[cfg(not(feature = "mem-unaligned"))]
68#[inline(always)]
69unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
70    debug_assert!(load_sz < WORD_SIZE);
71    // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
72    // (since `load_sz < WORD_SIZE`).
73    const { assert!(WORD_SIZE <= 8) };
74
75    let mut i = 0;
76    let mut out = 0usize;
77    // We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
78    i = load_chunk_aligned::<u32>(src, &raw mut out, load_sz, i);
79    i = load_chunk_aligned::<u16>(src, &raw mut out, load_sz, i);
80    i = load_chunk_aligned::<u8>(src, &raw mut out, load_sz, i);
81    debug_assert!(i == load_sz);
82    out
83}
84
85/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be
86/// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts
87/// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s.
88/// `load_sz` be strictly less than `WORD_SIZE`.
89#[cfg(not(feature = "mem-unaligned"))]
90#[inline(always)]
91unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
92    debug_assert!(load_sz < WORD_SIZE);
93    // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
94    // (since `load_sz < WORD_SIZE`).
95    const { assert!(WORD_SIZE <= 8) };
96
97    let mut i = 0;
98    let mut out = 0usize;
99    // Obtain pointers pointing to the beginning of the range we want to load.
100    let src_shifted = src.wrapping_byte_add(WORD_SIZE - load_sz);
101    let out_shifted = (&raw mut out).wrapping_byte_add(WORD_SIZE - load_sz);
102    // We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
103    i = load_chunk_aligned::<u8>(src_shifted, out_shifted, load_sz, i);
104    i = load_chunk_aligned::<u16>(src_shifted, out_shifted, load_sz, i);
105    i = load_chunk_aligned::<u32>(src_shifted, out_shifted, load_sz, i);
106    debug_assert!(i == load_sz);
107    out
108}
109
110#[inline(always)]
111pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize) {
112    #[inline(always)]
113    unsafe fn copy_forward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
114        let dest_end = dest.wrapping_add(n);
115        while dest < dest_end {
116            *dest = *src;
117            dest = dest.wrapping_add(1);
118            src = src.wrapping_add(1);
119        }
120    }
121
122    #[inline(always)]
123    unsafe fn copy_forward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
124        let mut dest_usize = dest as *mut usize;
125        let mut src_usize = src as *mut usize;
126        let dest_end = dest.wrapping_add(n) as *mut usize;
127
128        while dest_usize < dest_end {
129            *dest_usize = *src_usize;
130            dest_usize = dest_usize.wrapping_add(1);
131            src_usize = src_usize.wrapping_add(1);
132        }
133    }
134
135    /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
136    /// `src` *must not* be `usize`-aligned.
137    #[cfg(not(feature = "mem-unaligned"))]
138    #[inline(always)]
139    unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
140        debug_assert!(n > 0 && n % WORD_SIZE == 0);
141        debug_assert!(src.addr() % WORD_SIZE != 0);
142
143        let mut dest_usize = dest as *mut usize;
144        let dest_end = dest.wrapping_add(n) as *mut usize;
145
146        // Calculate the misalignment offset and shift needed to reassemble value.
147        // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
148        let offset = src as usize & WORD_MASK;
149        let shift = offset * 8;
150
151        // Realign src
152        let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize;
153        let mut prev_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
154
155        while dest_usize.wrapping_add(1) < dest_end {
156            src_aligned = src_aligned.wrapping_add(1);
157            let cur_word = *src_aligned;
158            let reassembled = if cfg!(target_endian = "little") {
159                prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift)
160            } else {
161                prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift)
162            };
163            prev_word = cur_word;
164
165            *dest_usize = reassembled;
166            dest_usize = dest_usize.wrapping_add(1);
167        }
168
169        // There's one more element left to go, and we can't use the loop for that as on the `src` side,
170        // it is partially out-of-bounds.
171        src_aligned = src_aligned.wrapping_add(1);
172        let cur_word = load_aligned_partial(src_aligned, offset);
173        let reassembled = if cfg!(target_endian = "little") {
174            prev_word >> shift | cur_word << (WORD_SIZE * 8 - shift)
175        } else {
176            prev_word << shift | cur_word >> (WORD_SIZE * 8 - shift)
177        };
178        // prev_word does not matter any more
179
180        *dest_usize = reassembled;
181        // dest_usize does not matter any more
182    }
183
184    /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
185    /// `src` *must not* be `usize`-aligned.
186    #[cfg(feature = "mem-unaligned")]
187    #[inline(always)]
188    unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
189        let mut dest_usize = dest as *mut usize;
190        let mut src_usize = src as *mut usize;
191        let dest_end = dest.wrapping_add(n) as *mut usize;
192
193        while dest_usize < dest_end {
194            *dest_usize = read_usize_unaligned(src_usize);
195            dest_usize = dest_usize.wrapping_add(1);
196            src_usize = src_usize.wrapping_add(1);
197        }
198    }
199
200    if n >= WORD_COPY_THRESHOLD {
201        // Align dest
202        // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
203        let dest_misalignment = (dest as usize).wrapping_neg() & WORD_MASK;
204        copy_forward_bytes(dest, src, dest_misalignment);
205        dest = dest.wrapping_add(dest_misalignment);
206        src = src.wrapping_add(dest_misalignment);
207        n -= dest_misalignment;
208
209        let n_words = n & !WORD_MASK;
210        let src_misalignment = src as usize & WORD_MASK;
211        if likely(src_misalignment == 0) {
212            copy_forward_aligned_words(dest, src, n_words);
213        } else {
214            copy_forward_misaligned_words(dest, src, n_words);
215        }
216        dest = dest.wrapping_add(n_words);
217        src = src.wrapping_add(n_words);
218        n -= n_words;
219    }
220    copy_forward_bytes(dest, src, n);
221}
222
223#[inline(always)]
224pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
225    // The following backward copy helper functions uses the pointers past the end
226    // as their inputs instead of pointers to the start!
227    #[inline(always)]
228    unsafe fn copy_backward_bytes(mut dest: *mut u8, mut src: *const u8, n: usize) {
229        let dest_start = dest.wrapping_sub(n);
230        while dest_start < dest {
231            dest = dest.wrapping_sub(1);
232            src = src.wrapping_sub(1);
233            *dest = *src;
234        }
235    }
236
237    #[inline(always)]
238    unsafe fn copy_backward_aligned_words(dest: *mut u8, src: *const u8, n: usize) {
239        let mut dest_usize = dest as *mut usize;
240        let mut src_usize = src as *mut usize;
241        let dest_start = dest.wrapping_sub(n) as *mut usize;
242
243        while dest_start < dest_usize {
244            dest_usize = dest_usize.wrapping_sub(1);
245            src_usize = src_usize.wrapping_sub(1);
246            *dest_usize = *src_usize;
247        }
248    }
249
250    /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
251    /// `src` *must not* be `usize`-aligned.
252    #[cfg(not(feature = "mem-unaligned"))]
253    #[inline(always)]
254    unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
255        debug_assert!(n > 0 && n % WORD_SIZE == 0);
256        debug_assert!(src.addr() % WORD_SIZE != 0);
257
258        let mut dest_usize = dest as *mut usize;
259        let dest_start = dest.wrapping_sub(n) as *mut usize; // we're moving towards the start
260
261        // Calculate the misalignment offset and shift needed to reassemble value.
262        // Since `src` is definitely not aligned, `offset` is in the range 1..WORD_SIZE.
263        let offset = src as usize & WORD_MASK;
264        let shift = offset * 8;
265
266        // Realign src
267        let mut src_aligned = src.wrapping_byte_sub(offset) as *mut usize;
268        let mut prev_word = load_aligned_partial(src_aligned, offset);
269
270        while dest_start.wrapping_add(1) < dest_usize {
271            src_aligned = src_aligned.wrapping_sub(1);
272            let cur_word = *src_aligned;
273            let reassembled = if cfg!(target_endian = "little") {
274                prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift
275            } else {
276                prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift
277            };
278            prev_word = cur_word;
279
280            dest_usize = dest_usize.wrapping_sub(1);
281            *dest_usize = reassembled;
282        }
283
284        // There's one more element left to go, and we can't use the loop for that as on the `src` side,
285        // it is partially out-of-bounds.
286        src_aligned = src_aligned.wrapping_sub(1);
287        let cur_word = load_aligned_end_partial(src_aligned, WORD_SIZE - offset);
288        let reassembled = if cfg!(target_endian = "little") {
289            prev_word << (WORD_SIZE * 8 - shift) | cur_word >> shift
290        } else {
291            prev_word >> (WORD_SIZE * 8 - shift) | cur_word << shift
292        };
293        // prev_word does not matter any more
294
295        dest_usize = dest_usize.wrapping_sub(1);
296        *dest_usize = reassembled;
297    }
298
299    /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
300    /// `src` *must not* be `usize`-aligned.
301    #[cfg(feature = "mem-unaligned")]
302    #[inline(always)]
303    unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
304        let mut dest_usize = dest as *mut usize;
305        let mut src_usize = src as *mut usize;
306        let dest_start = dest.wrapping_sub(n) as *mut usize;
307
308        while dest_start < dest_usize {
309            dest_usize = dest_usize.wrapping_sub(1);
310            src_usize = src_usize.wrapping_sub(1);
311            *dest_usize = read_usize_unaligned(src_usize);
312        }
313    }
314
315    let mut dest = dest.wrapping_add(n);
316    let mut src = src.wrapping_add(n);
317
318    if n >= WORD_COPY_THRESHOLD {
319        // Align dest
320        // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
321        let dest_misalignment = dest as usize & WORD_MASK;
322        copy_backward_bytes(dest, src, dest_misalignment);
323        dest = dest.wrapping_sub(dest_misalignment);
324        src = src.wrapping_sub(dest_misalignment);
325        n -= dest_misalignment;
326
327        let n_words = n & !WORD_MASK;
328        let src_misalignment = src as usize & WORD_MASK;
329        if likely(src_misalignment == 0) {
330            copy_backward_aligned_words(dest, src, n_words);
331        } else {
332            copy_backward_misaligned_words(dest, src, n_words);
333        }
334        dest = dest.wrapping_sub(n_words);
335        src = src.wrapping_sub(n_words);
336        n -= n_words;
337    }
338    copy_backward_bytes(dest, src, n);
339}
340
341#[inline(always)]
342pub unsafe fn set_bytes(mut s: *mut u8, c: u8, mut n: usize) {
343    #[inline(always)]
344    pub unsafe fn set_bytes_bytes(mut s: *mut u8, c: u8, n: usize) {
345        let end = s.wrapping_add(n);
346        while s < end {
347            *s = c;
348            s = s.wrapping_add(1);
349        }
350    }
351
352    #[inline(always)]
353    pub unsafe fn set_bytes_words(s: *mut u8, c: u8, n: usize) {
354        let mut broadcast = c as usize;
355        let mut bits = 8;
356        while bits < WORD_SIZE * 8 {
357            broadcast |= broadcast << bits;
358            bits *= 2;
359        }
360
361        let mut s_usize = s as *mut usize;
362        let end = s.wrapping_add(n) as *mut usize;
363
364        while s_usize < end {
365            *s_usize = broadcast;
366            s_usize = s_usize.wrapping_add(1);
367        }
368    }
369
370    if likely(n >= WORD_COPY_THRESHOLD) {
371        // Align s
372        // Because of n >= 2 * WORD_SIZE, dst_misalignment < n
373        let misalignment = (s as usize).wrapping_neg() & WORD_MASK;
374        set_bytes_bytes(s, c, misalignment);
375        s = s.wrapping_add(misalignment);
376        n -= misalignment;
377
378        let n_words = n & !WORD_MASK;
379        set_bytes_words(s, c, n_words);
380        s = s.wrapping_add(n_words);
381        n -= n_words;
382    }
383    set_bytes_bytes(s, c, n);
384}
385
386#[inline(always)]
387pub unsafe fn compare_bytes(s1: *const u8, s2: *const u8, n: usize) -> i32 {
388    let mut i = 0;
389    while i < n {
390        let a = *s1.wrapping_add(i);
391        let b = *s2.wrapping_add(i);
392        if a != b {
393            return a as i32 - b as i32;
394        }
395        i += 1;
396    }
397    0
398}
399
400#[inline(always)]
401pub unsafe fn c_string_length(mut s: *const core::ffi::c_char) -> usize {
402    let mut n = 0;
403    while *s != 0 {
404        n += 1;
405        s = s.wrapping_add(1);
406    }
407    n
408}
compiler_builtins/mem/impls.rs

compiler_builtins/mem/
impls.rs