Software APIs
hardened_memory.c
1// Copyright lowRISC contributors (OpenTitan project).
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
6
10
11// NOTE: The three hardened_mem* functions have similar contents, but the parts
12// that are shared between them are commented only in `memcpy()`.
13status_t hardened_memcpy(uint32_t *restrict dest, const uint32_t *restrict src,
14 size_t word_len) {
15 random_order_t order;
16 random_order_init(&order, word_len);
17
18 size_t count = 0;
19
20 // Immediately convert `src` and `dest` to addresses, which erases their
21 // provenance and causes their addresses to be exposed (in the provenance
22 // sense).
23 uintptr_t src_addr = (uintptr_t)src;
24 uintptr_t dest_addr = (uintptr_t)dest;
25
26 // We need to launder `count`, so that the SW.LOOP-COMPLETION check is not
27 // deleted by the compiler.
28 for (; launderw(count) < word_len; count = launderw(count) + 1) {
29 // The order values themselves are in units of words, but we need `byte_idx`
30 // to be in units of bytes.
31 //
32 // The value obtained from `advance()` is laundered, to prevent
33 // implementation details from leaking across procedures.
34 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
35
36 // Prevent the compiler from reordering the loop; this ensures a
37 // happens-before among indices consistent with `order`.
38 barrierw(byte_idx);
39
40 // Calculate pointers.
41 void *src = (void *)launderw(src_addr + byte_idx);
42 void *dest = (void *)launderw(dest_addr + byte_idx);
43
44 // Perform the copy, without performing a typed dereference operation.
45 write_32(read_32(src), dest);
46 }
48 HARDENED_CHECK_EQ(count, word_len);
49
50 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
51}
52
53status_t hardened_memshred(uint32_t *dest, size_t word_len) {
54 random_order_t order;
55 random_order_init(&order, word_len);
56
57 size_t count = 0;
58
59 uintptr_t data_addr = (uintptr_t)dest;
60
61 for (; count < word_len; count = launderw(count) + 1) {
62 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
63 barrierw(byte_idx);
64
65 // Calculate pointer.
66 void *data = (void *)launderw(data_addr + byte_idx);
67
68 // Write a freshly-generated random word to `*data`.
69 write_32(hardened_memshred_random_word(), data);
70 }
72
73 HARDENED_CHECK_EQ(count, word_len);
74
75 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
76}
77
78hardened_bool_t hardened_memeq(const uint32_t *lhs, const uint32_t *rhs,
79 size_t word_len) {
80 random_order_t order;
81 random_order_init(&order, word_len);
82
83 size_t count = 0;
84
85 uintptr_t lhs_addr = (uintptr_t)lhs;
86 uintptr_t rhs_addr = (uintptr_t)rhs;
87
88 uint32_t zeros = 0;
89 uint32_t ones = UINT32_MAX;
90
91 // The loop is almost token-for-token the one above, but the copy is
92 // replaced with something else.
93 for (; count < word_len; count = launderw(count) + 1) {
94 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
95 barrierw(byte_idx);
96
97 // Calculate pointers.
98 void *av = (void *)launderw(lhs_addr + byte_idx);
99 void *bv = (void *)launderw(rhs_addr + byte_idx);
100
101 uint32_t a = read_32(av);
102 uint32_t b = read_32(bv);
103
104 // Launder one of the operands, so that the compiler cannot cache the result
105 // of the xor for use in the next operation.
106 //
107 // We launder `zeroes` so that compiler cannot learn that `zeroes` has
108 // strictly more bits set at the end of the loop.
109 zeros = launder32(zeros) | (launder32(a) ^ b);
110
111 // Same as above. The compiler can cache the value of `a[offset]`, but it
112 // has no chance to strength-reduce this operation.
113 ones = launder32(ones) & (launder32(a) ^ ~b);
114 }
116
117 HARDENED_CHECK_EQ(count, word_len);
118 if (launder32(zeros) == 0) {
119 HARDENED_CHECK_EQ(ones, UINT32_MAX);
120 return kHardenedBoolTrue;
121 }
122
123 HARDENED_CHECK_NE(ones, UINT32_MAX);
124 return kHardenedBoolFalse;
125}
126
127hardened_bool_t consttime_memeq_byte(const void *lhs, const void *rhs,
128 size_t len) {
129 uint32_t zeros = 0;
130 uint32_t ones = UINT32_MAX;
131
132 random_order_t order;
133 random_order_init(&order, len);
134
135 size_t count = 0;
136
137 uintptr_t lhs_addr = (uintptr_t)lhs;
138 uintptr_t rhs_addr = (uintptr_t)rhs;
139
140 for (; launderw(count) < len; count = launderw(count) + 1) {
141 size_t byte_idx = launderw(random_order_advance(&order));
142 barrierw(byte_idx);
143
144 uint8_t *a = (uint8_t *)launderw(lhs_addr + byte_idx);
145 uint8_t *b = (uint8_t *)launderw(rhs_addr + byte_idx);
146
147 // Launder one of the operands, so that the compiler cannot cache the result
148 // of the xor for use in the next operation.
149 //
150 // We launder `zeroes` so that compiler cannot learn that `zeroes` has
151 // strictly more bits set at the end of the loop.
152 zeros = launder32(zeros) | (launder32((uint32_t)*a) ^ *b);
153
154 // Same as above. The compiler can cache the value of `a[offset]`, but it
155 // has no chance to strength-reduce this operation.
156 ones = launder32(ones) & (launder32((uint32_t)*a) ^ ~(uint32_t)*b);
157 }
158
159 HARDENED_CHECK_EQ(count, len);
160
161 if (launder32(zeros) == 0) {
162 HARDENED_CHECK_EQ(ones, UINT32_MAX);
163 return kHardenedBoolTrue;
164 }
165
166 HARDENED_CHECK_NE(ones, UINT32_MAX);
167 return kHardenedBoolFalse;
168}
169
170status_t hardened_xor(const uint32_t *restrict x, const uint32_t *restrict y,
171 size_t word_len, uint32_t *restrict dest) {
172 // Randomize the content of the output buffer before writing to it.
173 hardened_memshred(dest, word_len);
174
175 // Create a random variable rand.
176 uint32_t rand[word_len];
177 hardened_memshred(rand, word_len);
178
179 // Cast pointers to `uintptr_t` to erase their provenance.
180 uintptr_t x_addr = (uintptr_t)x;
181 uintptr_t y_addr = (uintptr_t)y;
182 uintptr_t dest_addr = (uintptr_t)dest;
183 uintptr_t rand_addr = (uintptr_t)&rand;
184
185 // Generate a random ordering.
186 random_order_t order;
187 random_order_init(&order, word_len);
188 size_t count = 0;
189
190 // XOR the mask with the first share. This loop is modelled off the one in
191 // `hardened_memcpy`; see the comments there for more details.
192 for (; launderw(count) < word_len; count = launderw(count) + 1) {
193 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
194
195 // Prevent the compiler from re-ordering the loop.
196 barrierw(byte_idx);
197
198 // Calculate pointers.
199 uintptr_t xp = x_addr + byte_idx;
200 uintptr_t yp = y_addr + byte_idx;
201 uintptr_t destp = dest_addr + byte_idx;
202 uintptr_t randp = rand_addr + byte_idx;
203
204 // Set the pointers.
205 void *xv = (void *)launderw(xp);
206 void *yv = (void *)launderw(yp);
207 void *destv = (void *)launderw(destp);
208 void *randv = (void *)launderw(randp);
209
210 // Perform the XORs: dest = ((x ^ rand) ^ y) ^ rand
211 write_32(read_32(xv) ^ read_32(randv), destv);
212 write_32(read_32(destv) ^ read_32(yv), destv);
213 write_32(read_32(destv) ^ read_32(randv), destv);
214 }
216 HARDENED_CHECK_EQ(count, word_len);
217
218 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
219}
220
221status_t hardened_xor_in_place(uint32_t *restrict x, const uint32_t *restrict y,
222 size_t word_len) {
223 // Generate a random ordering.
224 random_order_t order;
225 random_order_init(&order, word_len);
226 size_t count = 0;
227
228 // Cast pointers to `uintptr_t` to erase their provenance.
229 uintptr_t x_addr = (uintptr_t)x;
230 uintptr_t y_addr = (uintptr_t)y;
231
232 // XOR the mask with the first share. This loop is modelled off the one in
233 // `hardened_memcpy`; see the comments there for more details.
234 for (; launderw(count) < word_len; count = launderw(count) + 1) {
235 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
236
237 // Prevent the compiler from re-ordering the loop.
238 barrierw(byte_idx);
239
240 // Calculate pointers.
241 void *xv = (void *)launderw(x_addr + byte_idx);
242 void *yv = (void *)launderw(y_addr + byte_idx);
243
244 // Perform an XOR in the array.
245 write_32(read_32(xv) ^ read_32(yv), xv);
246 }
248 HARDENED_CHECK_EQ(count, word_len);
249
250 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
251}
252
253status_t randomized_bytecopy(void *restrict dest, const void *restrict src,
254 size_t byte_len) {
255 random_order_t order;
256 random_order_init(&order, byte_len);
257
258 size_t count = 0;
259
260 uintptr_t src_addr = (uintptr_t)src;
261 uintptr_t dest_addr = (uintptr_t)dest;
262
263 for (; launderw(count) < byte_len; count = launderw(count) + 1) {
264 size_t byte_idx = launderw(random_order_advance(&order));
265 barrierw(byte_idx);
266
267 uint8_t *src_byte_idx = (uint8_t *)launderw(src_addr + byte_idx);
268 uint8_t *dest_byte_idx = (uint8_t *)launderw(dest_addr + byte_idx);
269
270 *(dest_byte_idx) = *(src_byte_idx);
271 }
273 HARDENED_CHECK_EQ(count, byte_len);
274
275 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
276}
277
278status_t randomized_bytexor_in_place(void *restrict x, const void *restrict y,
279 size_t byte_len) {
280 random_order_t order;
281 random_order_init(&order, byte_len);
282
283 size_t count = 0;
284
285 uintptr_t x_addr = (uintptr_t)x;
286 uintptr_t y_addr = (uintptr_t)y;
287
288 for (; launderw(count) < byte_len; count = launderw(count) + 1) {
289 size_t byte_idx = launderw(random_order_advance(&order));
290 barrierw(byte_idx);
291
292 // TODO(#8815) byte writes vs. word-wise integrity
293 uint8_t *x_byte_idx = (uint8_t *)launderw(x_addr + byte_idx);
294 uint8_t *y_byte_idx = (uint8_t *)launderw(y_addr + byte_idx);
295
296 *(x_byte_idx) = *(x_byte_idx) ^ *(y_byte_idx);
297 }
299 HARDENED_CHECK_EQ(count, byte_len);
300
301 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
302}
303
304#ifdef OT_PLATFORM_RV32
305/**
306 * Call the RISC-V addition with carry.
307 *
308 * @param x First input of the addition.
309 * @param y Second input of the addition.
310 * @param carry The carry-in, updates to the carry-out.
311 * @return The addition of x, y, and carry.
312 */
313static inline uint32_t rv32_addc(uint32_t x, uint32_t y, uint32_t *carry) {
314 uint32_t res, next_carry, c1, c2;
315 __asm__ __volatile__(
316 "add %[res], %[x], %[c_in]\n\t"
317 "sltu %[c1], %[res], %[c_in]\n\t"
318 "add %[res], %[res], %[y]\n\t"
319 "sltu %[c2], %[res], %[y]\n\t"
320 "or %[next_c], %[c1], %[c2]"
321 : [res] "=&r"(res), [next_c] "=&r"(next_carry), [c1] "=&r"(c1),
322 [c2] "=&r"(c2)
323 : [x] "r"(x), [y] "r"(y), [c_in] "r"(*carry));
324 *carry = next_carry;
325 return res;
326}
327
328/**
329 * Call the RISC-V subtraction with borrow.
330 *
331 * @param x First input of the subtraction.
332 * @param y Second input of the subtraction.
333 * @param borrow The borrow-in, updates to the borrow-out.
334 * @return The subtraction of x, y, and borrow.
335 */
336static inline uint32_t rv32_subc(uint32_t x, uint32_t y, uint32_t *borrow) {
337 uint32_t res, next_borrow, b1, b2;
338 __asm__ __volatile__(
339 "sltu %[b1], %[x], %[b_in]\n\t"
340 "sub %[res], %[x], %[b_in]\n\t"
341 "sltu %[b2], %[res], %[y]\n\t"
342 "sub %[res], %[res], %[y]\n\t"
343 "or %[next_b], %[b1], %[b2]"
344 : [res] "=&r"(res), [next_b] "=&r"(next_borrow), [b1] "=&r"(b1),
345 [b2] "=&r"(b2)
346 : [x] "r"(x), [y] "r"(y), [b_in] "r"(*borrow));
347 *borrow = next_borrow;
348 return res;
349}
350
351/**
352 * Call the RISC-V select.
353 *
354 *
355 * @param a First input of the select.
356 * @param b Second input of the select.
357 * @param cond The condition to select.
358 * @return `a` if `cond == 1`, or `b` if `cond == 0`.
359 */
360static inline uint32_t rv32_sel(uint32_t cond, uint32_t a, uint32_t b) {
361 uint32_t res, mask, tmp;
362 __asm__ __volatile__(
363 "neg %[mask], %[cond]\n\t" // mask = 0 - cond (0xFFFFFFFF if 1,
364 // 0x00000000 if 0)
365 "xor %[tmp], %[a], %[b]\n\t" // tmp = a ^ b
366 "and %[tmp], %[tmp], %[mask]\n\t" // tmp = (a ^ b) & mask
367 "xor %[res], %[b], %[tmp]" // out = b ^ ((a ^ b) & mask)
368 : [res] "=r"(res), [mask] "=&r"(mask), [tmp] "=&r"(tmp)
369 : [cond] "r"(cond), [a] "r"(a), [b] "r"(b));
370 return res;
371}
372#endif
373
374status_t hardened_add(const uint32_t *restrict x, const uint32_t *restrict y,
375 size_t word_len, uint32_t *restrict dest) {
376 // Randomize the content of the output buffer before writing to it.
377 hardened_memshred(dest, word_len);
378
379 uint32_t carry = 0;
380 size_t count = 0;
381
382 for (; launderw(count) < word_len; count = launderw(count) + 1) {
383#ifdef OT_PLATFORM_RV32
384 dest[count] = rv32_addc(x[count], y[count], &carry);
385#else
386 uint32_t x_val = x[count];
387 uint32_t y_val = y[count];
388
389 uint32_t res = x_val + carry;
390 uint32_t next_carry = (res < carry);
391
392 res += y_val;
393 next_carry += (res < y_val);
394
395 dest[count] = res;
396 carry = next_carry;
397#endif
398 }
399 HARDENED_CHECK_EQ(count, word_len);
400
401 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
402}
403
404status_t hardened_sub(const uint32_t *restrict x, const uint32_t *restrict y,
405 size_t word_len, uint32_t *restrict dest) {
406 // Randomize the content of the output buffer before writing to it.
407 hardened_memshred(dest, word_len);
408
409 uint32_t borrow = 0;
410 size_t count = 0;
411
412 for (; launderw(count) < word_len; count = launderw(count) + 1) {
413#ifdef OT_PLATFORM_RV32
414 dest[count] = rv32_subc(x[count], y[count], &borrow);
415#else
416 uint32_t x_val = x[count];
417 uint32_t y_val = y[count];
418
419 uint32_t res = x_val - borrow;
420
421 uint32_t next_borrow = (x_val < borrow);
422
423 next_borrow += (res < y_val);
424 res -= y_val;
425
426 dest[count] = res;
427 borrow = next_borrow;
428#endif
429 }
430 HARDENED_CHECK_EQ(count, word_len);
431
432 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
433}
434
435status_t hardened_sub_mod(const uint32_t *restrict x,
436 const uint32_t *restrict y,
437 const uint32_t *restrict n, size_t word_len,
438 uint32_t *restrict dest) {
439 // Randomize the content of the output buffer before writing to it.
440 hardened_memshred(dest, word_len);
441
442 // temp_sub = x - y
443 uint32_t temp_sub[word_len];
444 uint32_t borrow = 0;
445 size_t count = 0;
446 for (; launderw(count) < word_len; count = launderw(count) + 1) {
447#ifdef OT_PLATFORM_RV32
448 temp_sub[count] = rv32_subc(x[count], y[count], &borrow);
449#else
450 uint32_t x_val = x[count];
451 uint32_t y_val = y[count];
452 uint32_t res = x_val - borrow;
453 uint32_t next_borrow = (x_val < borrow);
454 next_borrow += (res < y_val);
455 res -= y_val;
456 temp_sub[count] = res;
457 borrow = next_borrow;
458#endif
459 }
460 HARDENED_CHECK_EQ(count, word_len);
461
462 // temp_add = temp_sub + n
463 uint32_t temp_add[word_len];
464 uint32_t carry = 0;
465 count = 0;
466 for (; launderw(count) < word_len; count = launderw(count) + 1) {
467#ifdef OT_PLATFORM_RV32
468 temp_add[count] = rv32_addc(temp_sub[count], n[count], &carry);
469#else
470 uint32_t x_val = temp_sub[count];
471 uint32_t y_val = n[count];
472 uint32_t res = x_val + carry;
473 uint32_t next_carry = (res < carry);
474 res += y_val;
475 next_carry += (res < y_val);
476 temp_add[count] = res;
477 carry = next_carry;
478#endif
479 }
480 HARDENED_CHECK_EQ(count, word_len);
481
482 // If borrow is 1, choose temp_add, otherwise choose temp_sub.
483 uint32_t is_borrow = launder32(borrow);
484
485 count = 0;
486 for (; launderw(count) < word_len; count = launderw(count) + 1) {
487#ifdef OT_PLATFORM_RV32
488 dest[count] = rv32_sel(is_borrow, temp_add[count], temp_sub[count]);
489#else
490 // The mask is all 1s if borrow is 1, and all 0s if borrow is 0.
491 uint32_t mask = ~(is_borrow - 1);
492 // Prevent optimizations of mask.
493 mask = launder32(mask);
494 dest[count] = (temp_add[count] & launder32(mask)) |
495 (temp_sub[count] & launder32(~mask));
496#endif
497 }
498 HARDENED_CHECK_EQ(count, word_len);
499
500 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
501}
502
503status_t hardened_add_mod(const uint32_t *restrict x,
504 const uint32_t *restrict y,
505 const uint32_t *restrict n, size_t word_len,
506 uint32_t *restrict dest) {
507 // Randomize the content of the output buffer before writing to it.
508 hardened_memshred(dest, word_len);
509
510 // temp_add = x + y
511 uint32_t temp_add[word_len];
512 uint32_t carry = 0;
513 size_t count = 0;
514 for (; launderw(count) < word_len; count = launderw(count) + 1) {
515#ifdef OT_PLATFORM_RV32
516 temp_add[count] = rv32_addc(x[count], y[count], &carry);
517#else
518 uint32_t x_val = x[count];
519 uint32_t y_val = y[count];
520 uint32_t res = x_val + carry;
521 uint32_t next_carry = (res < carry);
522 res += y_val;
523 next_carry += (res < y_val);
524 temp_add[count] = res;
525 carry = next_carry;
526#endif
527 }
528 HARDENED_CHECK_EQ(count, word_len);
529
530 // temp_sub = temp_add - n
531 uint32_t temp_sub[word_len];
532 uint32_t borrow = 0;
533 count = 0;
534 for (; launderw(count) < word_len; count = launderw(count) + 1) {
535#ifdef OT_PLATFORM_RV32
536 temp_sub[count] = rv32_subc(temp_add[count], n[count], &borrow);
537#else
538 uint32_t x_val = temp_add[count];
539 uint32_t y_val = n[count];
540 uint32_t res = x_val - borrow;
541 uint32_t next_borrow = (x_val < borrow);
542 next_borrow += (res < y_val);
543 res -= y_val;
544 temp_sub[count] = res;
545 borrow = next_borrow;
546#endif
547 }
548 HARDENED_CHECK_EQ(count, word_len);
549
550 uint32_t is_ge = launder32(carry) | (1 - launder32(borrow));
551
552 count = 0;
553 for (; launderw(count) < word_len; count = launderw(count) + 1) {
554#ifdef OT_PLATFORM_RV32
555 dest[count] = rv32_sel(is_ge, temp_sub[count], temp_add[count]);
556#else
557 uint32_t mask = ~(is_ge - 1);
558 // Prevent optimizations of mask
559 mask = launder32(mask);
560 dest[count] = (temp_sub[count] & launder32(mask)) |
561 (temp_add[count] & launder32(~mask));
562#endif
563 }
564 HARDENED_CHECK_EQ(count, word_len);
565
566 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
567}
568
569status_t hardened_range_check(const uint32_t *value, const uint32_t *N,
570 size_t word_len) {
571 uint32_t borrow = 0;
572 uint32_t is_zero_acc = 0;
573 size_t count = 0;
574
575 for (; launderw(count) < word_len; count = launderw(count) + 1) {
576 uint32_t val_word = value[count];
577 uint32_t n_word = N[count];
578
579 // Accumulate bits to check if value is zero.
580 is_zero_acc = launder32(is_zero_acc) | launder32(val_word);
581
582#ifdef OT_PLATFORM_RV32
583 (void)rv32_subc(val_word, n_word, &borrow);
584#else
585 // Compute borrow to check if value < N.
586 uint32_t res = val_word - borrow;
587 uint32_t next_borrow = (val_word < borrow);
588 next_borrow += (res < n_word);
589
590 borrow = next_borrow;
591#endif
592 }
593 HARDENED_CHECK_EQ(count, word_len);
594
595 uint32_t is_zero = (launder32(is_zero_acc) == 0);
596 uint32_t is_greater_or_eq = (launder32(borrow) == 0);
597 uint32_t is_bad = launder32(is_zero) | launder32(is_greater_or_eq);
598
599 if (launder32(is_bad) != 0) {
600 return (status_t){
601 .value = (int32_t)launder32((uint32_t)OTCRYPTO_BAD_ARGS.value)};
602 }
603 HARDENED_CHECK_EQ(is_bad, 0);
604
605 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
606}
607
608status_t hardened_mod_reduce(const uint32_t *value, const uint32_t *n,
609 size_t word_len, uint32_t *result) {
610 // This function computes modular reduction (value % n).
611 // It implements a constant-time shift-and-subtract division. It iterates
612 // through the bits of the dividend (`value`) from MSB to LSB, shifting them
613 // into a remainder `r`, and subtracting the divisor `n` in constant time.
614
615 // Remainder, twice the size of the modulus to handle the left shift.
616 uint32_t r[2 * word_len];
617 // Intermediate storing of (r - n).
618 uint32_t r_sub[2 * word_len];
619
620 size_t i = 0;
621
622 // Initialize remainder arrays to zero.
623 for (; launderw(i) < 2 * word_len; i = launderw(i) + 1) {
624 r[i] = 0;
625 r_sub[i] = 0;
626 }
627 HARDENED_CHECK_EQ(i, 2 * word_len);
628
629 // Process each bit of `value` from Most Significant Bit (MSB) down to LSB.
630 i = word_len * 32;
631 for (; launderw(i) > 0; i = launderw(i) - 1) {
632 size_t bit_idx = i - 1;
633 size_t word_idx = bit_idx >> 5;
634 size_t bit_in_word = bit_idx % 32;
635
636 // Shift the current remainder `r` left by 1 bit.
637 uint32_t carry = 0;
638 size_t j = 0;
639 for (; launderw(j) < 2 * word_len; j = launderw(j) + 1) {
640 uint32_t next_carry = (r[j] >> 31);
641 r[j] = (r[j] << 1) | carry;
642 carry = next_carry;
643 }
644 HARDENED_CHECK_EQ(j, 2 * word_len);
645
646 // Inject the current top bit of `value` into the LSB of `r`.
647 uint32_t bit = (value[word_idx] >> bit_in_word) & 1;
648 r[0] = r[0] ^ ((r[0] ^ bit) & 1);
649
650 // Compute `r_sub = r - n`.
651 uint32_t borrow = 0;
652 j = 0;
653 for (; launderw(j) < word_len; j = launderw(j) + 1) {
654#ifdef OT_PLATFORM_RV32
655 r_sub[j] = rv32_subc(r[j], n[j], &borrow);
656#else
657 uint32_t r_word = r[j];
658 uint32_t n_word = n[j];
659 uint32_t res = r_word - borrow;
660 uint32_t next_borrow = (r_word < borrow);
661 next_borrow |= (res < n_word);
662 res -= n_word;
663 r_sub[j] = res;
664 borrow = next_borrow;
665#endif
666 }
667 HARDENED_CHECK_EQ(j, word_len);
668
669 // Propagate the borrow through the upper half of r
670 j = word_len;
671 for (; launderw(j) < 2 * word_len; j = launderw(j) + 1) {
672#ifdef OT_PLATFORM_RV32
673 r_sub[j] = rv32_subc(r[j], 0, &borrow);
674#else
675 uint32_t res = r[j] - borrow;
676 borrow = (r[j] < borrow);
677 r_sub[j] = res;
678#endif
679 }
680 HARDENED_CHECK_EQ(j, 2 * word_len);
681
682 // Conditional swap.
683 // If r < n, the final borrow is 1. If r >= n, the final borrow is 0.
684#ifdef OT_PLATFORM_RV32
685 uint32_t cond = launder32(1 - launder32(borrow));
686#else
687 uint32_t mask = borrow - 1;
688 // Prevent compiler optimizations of the mask.
689 mask = launder32(mask);
690#endif
691
692 j = 0;
693 for (; launderw(j) < 2 * word_len; j = launderw(j) + 1) {
694#ifdef OT_PLATFORM_RV32
695 r[j] = rv32_sel(cond, r_sub[j], r[j]);
696#else
697 r[j] = (r_sub[j] & launder32(mask)) | (r[j] & launder32(~mask));
698#endif
699 }
700 HARDENED_CHECK_EQ(j, 2 * word_len);
701 }
702 HARDENED_CHECK_EQ(i, 0);
703
704 // Copy the lower word_len elements of the final remainder into the result
705 // array.
706 TRY(hardened_memcpy(result, r, word_len));
707
708 return (status_t){.value = (int32_t)launder32((uint32_t)OTCRYPTO_OK.value)};
709}