Software APIs
hardened.h
Go to the documentation of this file.
1// Copyright lowRISC contributors (OpenTitan project).
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
5#ifndef OPENTITAN_SW_DEVICE_LIB_BASE_HARDENED_H_
6#define OPENTITAN_SW_DEVICE_LIB_BASE_HARDENED_H_
7
8#include <stdint.h>
9
10#include "sw/device/lib/base/hardened_asm.h"
13
14/**
15 * @file
16 * @brief Data Types for use in Hardened Code.
17 */
18
19#ifdef __cplusplus
20extern "C" {
21#endif // __cplusplus
22
23/**
24 * This is a boolean type for use in hardened contexts.
25 *
26 * The intention is that this is used instead of `<stdbool.h>`'s #bool, where a
27 * higher hamming distance is required between the truthy and the falsey value.
28 *
29 * The values below were chosen at random, with some specific restrictions. They
30 * have a Hamming Distance of 8, and they are 11-bit values so they can be
31 * materialized with a single instruction on RISC-V. They are also specifically
32 * not the complement of each other.
33 */
34typedef enum hardened_bool {
35 /**
36 * The truthy value, expected to be used like #true.
37 */
38 kHardenedBoolTrue = HARDENED_BOOL_TRUE,
39 /**
40 * The falsey value, expected to be used like #false.
41 */
42 kHardenedBoolFalse = HARDENED_BOOL_FALSE,
44
45/**
46 * A byte-sized hardened boolean.
47 *
48 * This type is intended for cases where a byte-sized hardened boolean is
49 * required.
50 *
51 * The values below were chosen to ensure that the hamming difference between
52 * them is greater than 5 and they are not bitwise complements of each other.
53 */
54typedef enum hardened_byte_bool {
55 /**
56 * The truthy value.
57 */
58 kHardenedByteBoolTrue = HARDENED_BYTE_BOOL_TRUE,
59 /**
60 * The falsy value.
61 */
62 kHardenedByteBoolFalse = HARDENED_BYTE_BOOL_FALSE,
64
65/*
66 * Launders the 32-bit value `val`.
67 *
68 * `launder32()` returns a value identical to the input, while introducing an
69 * optimization barrier that prevents the compiler from learning new information
70 * about the original value, based on operations on the laundered value. This
71 * does not work the other way around: some information that the compiler has
72 * already learned about the value may make it through; this last caveat is
73 * explained below.
74 *
75 * In some circumstances, it is desirable to introduce a redundant (from the
76 * compiler's perspective) operation into the instruction stream to make it less
77 * likely that hardware faults will result in critical operations being
78 * bypassed. Laundering makes it possible to insert such duplicate operations,
79 * while blocking the compiler's attempts to delete them through dead code
80 * elimination.
81 *
82 * Suppose we have a pure-C `CHECK()` macro that does a runtime-assert.
83 * For example, in the following code, a compiler would fold `CHECK()` away,
84 * since dataflow analysis could tell it that `x` is always zero, allowing it to
85 * deduce that `x == 0` -> `0 == 0` -> `true`. It then deletes the `CHECK(true)`
86 * as dead code.
87 * ```
88 * if (x == 0) {
89 * CHECK(x == 0);
90 * }
91 * ```
92 *
93 * If we believe that an attacker can glitch the chip to skip the first branch,
94 * this assumption no longer holds. We can use laundering to prevent LLVM from
95 * learning that `x` is zero inside the block:
96 * ```
97 * if (launder32(x) == 0) {
98 * CHECK(x == 0);
99 * }
100 * ```
101 *
102 * Note that this operation is order sensitive: while we can stop the compiler
103 * from learning new information, it is very hard to make it forget in some
104 * cases. If we had instead written
105 * ```
106 * if (x == 0) {
107 * CHECK(launder32(x) == 0);
108 * }
109 * ```
110 * then it would be entitled to rewrite this into `launder32(0) == 0`.
111 * Although it can't make the deduction that this is identically true, because
112 * `launder32()` is the identity function, this behaves like `0 == 0` at
113 * runtime. For example, a completely valid lowering of this code is
114 * ```
115 * bnez a0, else
116 * mv a0, zero
117 * bnez a0, shutdown
118 * // ...
119 * ```
120 *
121 * Even pulling the expression out of the branch as `uint32_t y = launder32(x);`
122 * doesn't work, because the compiler may chose to move the operation into the
123 * branch body. Thus, we need to stop it from learning that `x` is zero in the
124 * first place.
125 *
126 * Note that, in spite of `HARDENED_CHECK` being written in assembly, it is
127 * still vulnerable to certain inimical compiler optimizations. For example,
128 * `HARDENED_CHECK_EQ(x, 0)` can be written to `HARDENED_CHECK_EQ(0, 0)`.
129 * Although the compiler cannot delete this code, it will emit the nonsensical
130 * ```
131 * beqz zero, continue
132 * unimp
133 * continue:
134 * ```
135 * effectively negating the doubled condition.
136 *
137 * ---
138 *
139 * In general, this intrinsic should *only* be used on values that the compiler
140 * doesn't already know anything interesting about. The precise meaning of this
141 * can be subtle, since inlining can easily foil pessimization. Uses of this
142 * function must be carefully reviewed, and commented with justification and an
143 * explanation of what information it is preventing access to, and what ambient
144 * invariants it relies on.
145 *
146 * This function makes no guarantees: it is only intended to help harden code.
147 * It does not remove the need to carefully verify that the correct
148 * instruction-level invariants are upheld in the final release.
149 *
150 * This operation *does not* introduce a sequence point: a compiler may move it
151 * anywhere between where its input is last modified or its output is first
152 * moved. This can include moving through different nodes in the control flow
153 * graph. The location of the laundering operation must be determined
154 * *exclusively* by its position in the expression dependency graph.
155 *
156 * Other examples of laundering use-cases should be listed here as they become
157 * apparent.
158 *
159 * * Obscuring loop completion. It may be useful to prevent a compiler from
160 * learning that a particular loop is guaranteed to complete. The most
161 * straightforward way to do this is to launder the loop increment: replace
162 * `++i` with `i = launder32(i) + 1`. This is helpful for preventing the
163 * compiler from learning that the loop executes in a particular order, and
164 * foiling unroll/unroll-and-jam optimizations. It also prevents the compiler
165 * from learning that the loop counter was saturated. However, if the exit
166 * condition is `i < len`, the compiler will learn that `i >= len` outside the
167 * loop.
168 *
169 * Laundering just the exit comparison, `launder32(i) < len`, will still allow
170 * the compiler to deduce that the loop is traversed in linear order, but it
171 * will not learn that `i >= len` outside the loop. Laundering both loop
172 * control expressions may be necessary in some cases.
173 *
174 * * Assigning a literal to a value without the compiler learning about
175 * that literal value. `x = launder32(0);` zeros `x`, but the compiler
176 * cannot replace all uses of it with a constant `0`. Note that
177 * `x = launder32(x);` does not prevent knowledge the compiler already has
178 * about `x` from replacing it with a constant in `launder32()`.
179 *
180 * * Preventing operations from being re-associated. The compiler is entitled
181 * to rewrite `x ^ (y ^ z)` into `(x ^ y) ^ z`, but cannot do the same with
182 * `x ^ launder32(y ^ z)`. No operations commute through `launder32()`.
183 *
184 * * Preventing dead-code elimination. The compiler cannot delete the
185 * unreachable expression `if (launder32(false)) { foo(); }`. However, the
186 * branch will never be executed in an untampered instruction stream.
187 *
188 * @param val A 32-bit integer to launder.
189 * @return A 32-bit integer which will *happen* to have the same value as `val`
190 * at runtime.
191 */
193inline uint32_t launder32(uint32_t val) {
194 // NOTE: This implementation is LLVM-specific, and should be considered to be
195 // a no-op in every other compiler. For example, GCC has in the past peered
196 // into the insides of assembly blocks.
197 //
198 // LLVM cannot see through assembly blocks, and must treat this as a black
199 // box. Similar tricks are employed in other security-sensitive code-bases,
200 // such as https://boringssl-review.googlesource.com/c/boringssl/+/36484.
201 //
202 // # "To volatile or not to volatile, that is the question."
203 // Naively, this snippet would not be marked volatile, since we do not care
204 // about reordering. However, there are rare optimizations that can result
205 // in "miscompilation" of this primitive. For example, if the argument is
206 // a complex expression, we get something like the following Godbolt:
207 // https://godbolt.org/z/c7M7Yr7Yo.
208 //
209 // This is not actually a miscompilation: LLVM is treating the asm
210 // statement as behaving like a random oracle determined entirely by its
211 // arguments; therefore, it is entitled to deduplicate both occurences of
212 // `LaunderPure(y)` (in this particular case, from bisecting the passes,
213 // it appears to happen during register allocation).
214 //
215 // We work around this by marking the inline asm volatile.
216 //
217 // Alternatively, we could add a "nonce" to the inline asm that has been
218 // tainted by a volatile asm operation. This has the benefit that the input
219 // does not need to happen-before the volatile operation, and can be
220 // arbitrarily reordered, while ensuring that no call of launder32() is
221 // "pure" in the deduplication sense. I.e.:
222 //
223 // uint32_t nonce;
224 // asm volatile("" : "=r"(nonce));
225 // asm("" : "+r"(val) : "r"(nonce));
226 //
227 // At the time of writing, it seems preferable to have something we know is
228 // correct rather than being overly clever; this is recorded here in case
229 // the current implementation is unsuitable and we need something more
230 // carefully tuned.
231 //
232 // This comment was formerly present to justify the lack of volatile. It is
233 // left behind as a warning to not try to remove it without further careful
234 // analysis.
235 // > It is *not* marked as volatile, since reordering is not desired; marking
236 // > it volatile does make some laundering operations suddenly start working
237 // > spuriously, which is entirely dependent on how excited LLVM is about
238 // > reordering things. To avoid this trap, we do not mark as volatile and
239 // > instead require that reordering be prevented through careful sequencing
240 // > of statements.
241
242 // When we're building for static analysis, reduce false positives by
243 // short-circuiting the inline assembly block.
244#if OT_BUILD_FOR_STATIC_ANALYZER || OT_DISABLE_HARDENING
245 return val;
246#endif
247
248 // The +r constraint tells the compiler that this is an "inout" parameter: it
249 // means that not only does the black box depend on `val`, but it also mutates
250 // it in an unspecified way.
251 asm volatile("" : "+r"(val));
252 return val;
253}
254
255/**
256 * Launders the pointer-sized value `val`.
257 *
258 * See `launder32()` for detailed semantics.
259 *
260 * @param val A 32-bit integer to launder.
261 * @return A 32-bit integer which will happen to have the same value as `val` at
262 * runtime.
263 */
265inline uintptr_t launderw(uintptr_t val) {
266#if OT_BUILD_FOR_STATIC_ANALYZER || OT_DISABLE_HARDENING
267 return val;
268#endif
269 asm volatile("" : "+r"(val));
270 return val;
271}
272
273/**
274 * Creates a reordering barrier for `val`.
275 *
276 * `barrier32()` takes an argument and discards it, while introducing an
277 * "impure" dependency on that value. This forces the compiler to schedule
278 * instructions such that the intermediate `val` actually appears in a
279 * register. Because it is impure, `barrier32()` operations will not be
280 * reordered past each other or MMIO operations, although they can be reordered
281 * past functions if LTO inlines them.
282 *
283 * Most compilers will try to reorder arithmetic operations in such a way
284 * that they form large basic blocks, since modern microarchitectures can
285 * take advantage of instruction-level parallelism. Unfortunately, this means
286 * that instructions that we want to interleave with other instructions are
287 * likely to get separated; this includes static interleavings,
288 * loop-invariant code motion, and some kinds of unroll-and-jam.
289 *
290 * For example, given
291 *
292 * ```
293 * uint32_t product = 5;
294 *
295 * foo();
296 * product *= product;
297 * foo();
298 * product *= product;
299 * foo();
300 * product *= product;
301 * ```
302 *
303 * A compiler will likely reorder this as
304 *
305 * ```
306 * uint32_t product = 5;
307 *
308 * foo();
309 * foo();
310 * foo();
311 * product *= product;
312 * product *= product;
313 * product *= product;
314 * ```
315 *
316 * The compiler is further entitled to constant-propagate `product`.
317 * `barrier32()` can be used to avoid this:
318 *
319 * ```
320 * // NB: the initial value of `product` is laundered to prevent
321 * // constant propagation, but only because it is a compile-time
322 * // constant. Laundering the intermediates would also work.
323 * uint32_t product = launder32(5);
324 * barrier32(product);
325 *
326 * barrier32(foo());
327 * product *= product;
328 * barrier32(product);
329 *
330 * barrier32(foo());
331 * product *= product;
332 * barrier32(product);
333 *
334 * barrier32(foo());
335 * product *= product;
336 * barrier32(product);
337 * ```
338 *
339 * Note that we must place barriers on the result of the function calls,
340 * too, so that the compiler believes that there is a potential dependency
341 * between the return value of the function, and the followup value of
342 * `product`.
343 *
344 * This is also useful for avoiding loop reordering:
345 *
346 * ```
347 * for (int i = 0; i != n - 1; i = (i + kPrime) % n) {
348 * barrier32(i);
349 *
350 * // Stuff.
351 * }
352 * ```
353 *
354 * A sufficiently intelligent compiler might notice that it can linearize this
355 * loop; however, the barriers in each loop iteration force a particular order
356 * is observed.
357 *
358 * @param val A value to create a barrier for.
359 */
360inline void barrier32(uint32_t val) { asm volatile("" ::"r"(val)); }
361
362/**
363 * Creates a reordering barrier for `val`.
364 *
365 * See `barrier32()` for detailed semantics.
366 *
367 * @param val A value to create a barrier for.
368 */
369inline void barrierw(uintptr_t val) { asm volatile("" ::"r"(val)); }
370
371/**
372 * A constant-time, 32-bit boolean value.
373 *
374 * Values of this type MUST be either all zero bits or all one bits,
375 * representing `false` and `true` respectively.
376 *
377 * Although it is possible to convert an existing `bool` into a `ct_bool32_t` by
378 * writing `-((ct_bool32_t) my_bool)`, we recommend against it
379 */
380typedef uint32_t ct_bool32_t;
381
382// The formulae below are taken from Hacker's Delight, Chapter 2.
383// Although the book does not define them as being constant-time, they are
384// branchless; branchless code is always constant-time.
385//
386// Proofs and references to HD are provided only in the 32-bit versions.
387//
388// Warren Jr., Henry S. (2013). Hacker's Delight (2 ed.).
389// Addison Wesley - Pearson Education, Inc. ISBN 978-0-321-84268-8.
390
391/**
392 * Performs constant-time signed comparison to zero.
393 *
394 * Returns whether `a < 0`, as a constant-time boolean.
395 * In other words, this checks if `a` is negative, i.e., it's sign bit is set.
396 *
397 * @return `a < 0`.
398 */
400inline ct_bool32_t ct_sltz32(int32_t a) {
401 // Proof. `a` is negative iff its MSB is set;
402 // arithmetic-right-shifting by bits(a)-1 smears the sign bit across all
403 // of `a`.
404 return OT_UNSIGNED(a >> (sizeof(a) * 8 - 1));
405}
406
407/**
408 * Performs constant-time unsigned ascending comparison.
409 *
410 * Returns `a < b` as a constant-time boolean.
411 *
412 * @return `a < b`.
413 */
415inline ct_bool32_t ct_sltu32(uint32_t a, uint32_t b) {
416 // Proof. See Hacker's Delight page 23.
417 return ct_sltz32(OT_SIGNED(((a & ~b) | ((a ^ ~b) & (a - b)))));
418}
419
420/**
421 * Performs constant-time zero equality.
422 *
423 * Returns `a == 0` as a constant-time boolean.
424 *
425 * @return `a == 0`.
426 */
428inline ct_bool32_t ct_seqz32(uint32_t a) {
429 // Proof. See Hacker's Delight page 23.
430 // HD gives this formula: `a == b := ~(a-b | b-a)`.
431 //
432 // Setting `b` to zero gives us
433 // ~(a | -a) -> ~a & ~-a -> ~a & (a - 1)
434 // via identities on page 16.
435 //
436 // This forumula is also given on page 11 for a different purpose.
437 return ct_sltz32(OT_SIGNED(~a & (a - 1)));
438}
439
440/**
441 * Performs constant-time equality.
442 *
443 * Returns `a == b` as a constant-time boolean.
444 *
445 * @return `a == b`.
446 */
448inline ct_bool32_t ct_seq32(uint32_t a, uint32_t b) {
449 // Proof. a ^ b == 0 -> a ^ a ^ b == a ^ 0 -> b == a.
450 return ct_seqz32(a ^ b);
451}
452
453/**
454 * Performs a constant-time select.
455 *
456 * Returns `a` if `c` is true; otherwise, returns `b`.
457 *
458 * This function should be used with one of the comparison functions above; do
459 * NOT create `c` using an `if` or `?:` operation.
460 *
461 * @param c The condition to test.
462 * @param a The value to return on true.
463 * @param b The value to return on false.
464 * @return `c ? a : b`.
465 */
467inline uint32_t ct_cmov32(ct_bool32_t c, uint32_t a, uint32_t b) {
468 // Proof. See Hacker's Delight page 46. HD gives this as a branchless swap;
469 // branchless select is a special case of that.
470
471 // `c` must be laundered because LLVM has a strength reduction pass for this
472 // exact pattern, but lacking a cmov instruction, it will almost certainly
473 // select a branch instruction here.
474 return (launder32(c) & a) | (launder32(~c) & b);
475}
476
477/**
478 * A constant-time, pointer-sized boolean value.
479 *
480 * Values of this type MUST be either all zero bits or all one bits.
481 */
482typedef uintptr_t ct_boolw_t;
483
484/**
485 * Performs constant-time signed comparison to zero.
486 *
487 * Returns whether `a < 0`, as a constant-time boolean.
488 * In other words, this checks if `a` is negative, i.e., it's sign bit is set.
489 *
490 * @return `a < 0`.
491 */
493inline ct_boolw_t ct_sltzw(intptr_t a) {
494 return OT_UNSIGNED(a >> (sizeof(a) * 8 - 1));
495}
496
497/**
498 * Performs constant-time unsigned ascending comparison.
499 *
500 * Returns `a < b` as a constant-time boolean.
501 *
502 * @return `a < b`.
503 */
505inline ct_boolw_t ct_sltuw(uintptr_t a, uintptr_t b) {
506 return ct_sltzw(OT_SIGNED((a & ~b) | ((a ^ ~b) & (a - b))));
507}
508
509/**
510 * Performs constant-time zero equality.
511 *
512 * Returns `a == 0` as a constant-time boolean.
513 *
514 * @return `a == 0`.
515 */
517inline ct_boolw_t ct_seqzw(uintptr_t a) {
518 return ct_sltzw(OT_SIGNED(~a & (a - 1)));
519}
520
521/**
522 * Performs constant-time equality.
523 *
524 * Returns `a == b` as a constant-time boolean.
525 *
526 * @return `a == b`.
527 */
529inline ct_boolw_t ct_seqw(uintptr_t a, uintptr_t b) { return ct_seqzw(a ^ b); }
530
531/**
532 * Performs a constant-time select.
533 *
534 * Returns `a` if `c` is true; otherwise, returns `b`.
535 *
536 * This function should be used with one of the comparison functions above; do
537 * NOT create `c` using an `if` or `?:` operation.
538 *
539 * @param c The condition to test.
540 * @param a The value to return on true.
541 * @param b The value to return on false.
542 * @return `c ? a : b`.
543 */
545inline uintptr_t ct_cmovw(ct_boolw_t c, uintptr_t a, uintptr_t b) {
546 return (launderw(c) & a) | (launderw(~c) & b);
547}
548
549// Implementation details shared across shutdown macros.
550#ifdef OT_PLATFORM_RV32
551// This string can be tuned to be longer or shorter as desired, for
552// fault-hardening purposes.
553#define HARDENED_UNIMP_SEQUENCE_() "unimp; unimp; unimp;"
554
555#define HARDENED_CHECK_OP_EQ_ "beq"
556#define HARDENED_CHECK_OP_NE_ "bne"
557#define HARDENED_CHECK_OP_LT_ "bltu"
558#define HARDENED_CHECK_OP_GT_ "bgtu"
559#define HARDENED_CHECK_OP_LE_ "bleu"
560#define HARDENED_CHECK_OP_GE_ "bgeu"
561
562// The inverse opcodes test the opposite condition of their name (e.g. EQ checks
563// for not equal, etc).
564#define HARDENED_CHECK_INV_EQ_ "bne"
565#define HARDENED_CHECK_INV_NE_ "beq"
566#define HARDENED_CHECK_INV_LT_ "bgeu"
567#define HARDENED_CHECK_INV_GT_ "bleu"
568#define HARDENED_CHECK_INV_LE_ "bgtu"
569#define HARDENED_CHECK_INV_GE_ "bltu"
570
571#ifndef OT_DISABLE_HARDENING
572// clang-format off
573#define HARDENED_CHECK_(op_, a_, b_) \
574 asm volatile( \
575 OT_CAT(HARDENED_CHECK_OP_, op_) " %0, %1, .L_HARDENED_OK_%=;" \
576 ".L_HARDENED_BAD_%=:;" \
577 "unimp;" \
578 ".L_HARDENED_OK_%=:;" \
579 OT_CAT(HARDENED_CHECK_INV_, op_) " %0, %1, .L_HARDENED_BAD_%=;" \
580 ::"r"(a_), "r"(b_))
581// clang-format on
582
583#define HARDENED_TRAP_() \
584 do { \
585 asm volatile(HARDENED_UNIMP_SEQUENCE_()); \
586 } while (false)
587
588#else // OT_DISABLE_HARDENING
589// We allow disabling hardening to measure the impact of the hardened sequences
590// on code size.
591#define HARDENED_CHECK_(op_, a_, b_) \
592 do { \
593 (void)(a_); \
594 (void)(b_); \
595 } while (0)
596#define HARDENED_TRAP_() \
597 do { \
598 } while (0)
599#endif // OT_DISABLE_HARDENING
600#else // OT_PLATFORM_RV32
601#include <assert.h>
602
603#define HARDENED_CHECK_OP_EQ_ ==
604#define HARDENED_CHECK_OP_NE_ !=
605#define HARDENED_CHECK_OP_LT_ <
606#define HARDENED_CHECK_OP_GT_ >
607#define HARDENED_CHECK_OP_LE_ <=
608#define HARDENED_CHECK_OP_GE_ >=
609
610#define HARDENED_CHECK_(op_, a_, b_) \
611 assert((uint64_t)(a_)OT_CAT(HARDENED_CHECK_OP_, op_)(uint64_t)(b_))
612
613#define HARDENED_TRAP_() __builtin_trap()
614#endif // OT_PLATFORM_RV32
615
616/**
617 * If the following code is (unexpectedly) reached a trap instruction will be
618 * executed.
619 */
620#define HARDENED_TRAP() HARDENED_TRAP_()
621
622/**
623 * Compare two values in a way that is *manifestly* true: that is, under normal
624 * program execution, the comparison is a tautology.
625 *
626 * If the comparison fails, we can deduce the device is under attack, so an
627 * illegal instruction will be executed. The manner in which the comparison is
628 * done is carefully constructed in assembly to minimize the possibility of
629 * adversarial faults. This macro also implicitly launders both arguments since
630 * the compiler doesn't see the comparison and can't learn any new information
631 * from it.
632 *
633 * There are six variants of this macro: one for each of the six comparison
634 * operations.
635 *
636 * This macro is intended to be used along with `launder32()` to handle detected
637 * faults when implementing redundant checks, i.e. in places where the compiler
638 * could otherwise prove statically unreachable. For example:
639 * ```
640 * if (launder32(x) == 0) {
641 * HARDENED_CHECK_EQ(x, 0);
642 * }
643 * ```
644 * See `launder32()` for more details.
645 */
646#define HARDENED_CHECK_EQ(a_, b_) HARDENED_CHECK_(EQ_, a_, b_)
647#define HARDENED_CHECK_NE(a_, b_) HARDENED_CHECK_(NE_, a_, b_)
648#define HARDENED_CHECK_LT(a_, b_) HARDENED_CHECK_(LT_, a_, b_)
649#define HARDENED_CHECK_GT(a_, b_) HARDENED_CHECK_(GT_, a_, b_)
650#define HARDENED_CHECK_LE(a_, b_) HARDENED_CHECK_(LE_, a_, b_)
651#define HARDENED_CHECK_GE(a_, b_) HARDENED_CHECK_(GE_, a_, b_)
652
653#ifdef __cplusplus
654}
655#endif // __cplusplus
656
657#endif // OPENTITAN_SW_DEVICE_LIB_BASE_HARDENED_H_