Software APIs
hardened_memory.c
1 // Copyright lowRISC contributors (OpenTitan project).
2 // Licensed under the Apache License, Version 2.0, see LICENSE for details.
3 // SPDX-License-Identifier: Apache-2.0
4 
6 
10 
11 // NOTE: The three hardened_mem* functions have similar contents, but the parts
12 // that are shared between them are commented only in `memcpy()`.
13 void hardened_memcpy(uint32_t *restrict dest, const uint32_t *restrict src,
14  size_t word_len) {
15  random_order_t order;
16  random_order_init(&order, word_len);
17 
18  size_t count = 0;
19  size_t expected_count = random_order_len(&order);
20 
21  // Immediately convert `src` and `dest` to addresses, which erases their
22  // provenance and causes their addresses to be exposed (in the provenance
23  // sense).
24  uintptr_t src_addr = (uintptr_t)src;
25  uintptr_t dest_addr = (uintptr_t)dest;
26 
27  // `decoys` is a small stack array that is filled with uninitialized memory.
28  // It is scratch space for us to do "extra" operations, when the number of
29  // iteration indices the chosen random order is different from `word_len`.
30  //
31  // These extra operations also introduce noise that an attacker must do work
32  // to filter, such as by applying side-channel analysis to obtain an address
33  // trace.
34  uint32_t decoys[8];
35  uintptr_t decoy_addr = (uintptr_t)&decoys;
36 
37  // We need to launder `count`, so that the SW.LOOP-COMPLETION check is not
38  // deleted by the compiler.
39  size_t byte_len = word_len * sizeof(uint32_t);
40  for (; launderw(count) < expected_count; count = launderw(count) + 1) {
41  // The order values themselves are in units of words, but we need `byte_idx`
42  // to be in units of bytes.
43  //
44  // The value obtained from `advance()` is laundered, to prevent
45  // implementation details from leaking across procedures.
46  size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
47 
48  // Prevent the compiler from reordering the loop; this ensures a
49  // happens-before among indices consistent with `order`.
50  barrierw(byte_idx);
51 
52  // Compute putative offsets into `src`, `dest`, and `decoys`. Some of these
53  // may go off the end of `src` and `dest`, but they will not be cast to
54  // pointers in that case. (Note that casting out-of-range addresses to
55  // pointers is UB.)
56  uintptr_t srcp = src_addr + byte_idx;
57  uintptr_t destp = dest_addr + byte_idx;
58  uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
59  uintptr_t decoy2 =
60  decoy_addr + ((byte_idx + sizeof(decoys) / 2) % sizeof(decoys));
61 
62  // Branchlessly select whether to do a "real" copy or a decoy copy,
63  // depending on whether we've gone off the end of the array or not.
64  //
65  // Pretty much everything needs to be laundered: we need to launder
66  // `byte_idx` for obvious reasons, and we need to launder the result of the
67  // select, so that the compiler cannot delete the resulting loads and
68  // stores. This is similar to having used `volatile uint32_t *`.
69  void *src = (void *)launderw(
70  ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), srcp, decoy1));
71  void *dest = (void *)launderw(
72  ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), destp, decoy2));
73 
74  // Perform the copy, without performing a typed dereference operation.
75  write_32(read_32(src), dest);
76  }
77 
78  HARDENED_CHECK_EQ(count, expected_count);
79 }
80 
81 // The source of randomness for shred, which may be replaced at link-time.
82 OT_WEAK
83 uint32_t hardened_memshred_random_word(void) { return 0xcaffe17e; }
84 
85 void hardened_memshred(uint32_t *dest, size_t word_len) {
86  random_order_t order;
87  random_order_init(&order, word_len);
88 
89  size_t count = 0;
90  size_t expected_count = random_order_len(&order);
91 
92  uintptr_t data_addr = (uintptr_t)dest;
93 
94  uint32_t decoys[8];
95  uintptr_t decoy_addr = (uintptr_t)&decoys;
96 
97  size_t byte_len = word_len * sizeof(uint32_t);
98  for (; count < expected_count; count = launderw(count) + 1) {
99  size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
100  barrierw(byte_idx);
101 
102  uintptr_t datap = data_addr + byte_idx;
103  uintptr_t decoy = decoy_addr + (byte_idx % sizeof(decoys));
104 
105  void *data = (void *)launderw(
106  ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), datap, decoy));
107 
108  // Write a freshly-generated random word to `*data`.
109  write_32(hardened_memshred_random_word(), data);
110  }
111 
112  HARDENED_CHECK_EQ(count, expected_count);
113 }
114 
115 hardened_bool_t hardened_memeq(const uint32_t *lhs, const uint32_t *rhs,
116  size_t word_len) {
117  random_order_t order;
118  random_order_init(&order, word_len);
119 
120  size_t count = 0;
121  size_t expected_count = random_order_len(&order);
122 
123  uintptr_t lhs_addr = (uintptr_t)lhs;
124  uintptr_t rhs_addr = (uintptr_t)rhs;
125 
126  // `decoys` needs to be filled with equal values this time around. It
127  // should be filled with values with a Hamming weight of around 16, which is
128  // the most common hamming weight among 32-bit words.
129  uint32_t decoys[8] = {
130  0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
131  0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
132  };
133  uintptr_t decoy_addr = (uintptr_t)&decoys;
134 
135  uint32_t zeros = 0;
136  uint32_t ones = UINT32_MAX;
137 
138  // The loop is almost token-for-token the one above, but the copy is
139  // replaced with something else.
140  size_t byte_len = word_len * sizeof(uint32_t);
141  for (; count < expected_count; count = launderw(count) + 1) {
142  size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
143  barrierw(byte_idx);
144 
145  uintptr_t ap = lhs_addr + byte_idx;
146  uintptr_t bp = rhs_addr + byte_idx;
147  uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
148  uintptr_t decoy2 =
149  decoy_addr + ((byte_idx + sizeof(decoys) / 2) % sizeof(decoys));
150 
151  void *av = (void *)launderw(
152  ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), ap, decoy1));
153  void *bv = (void *)launderw(
154  ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), bp, decoy2));
155 
156  uint32_t a = read_32(av);
157  uint32_t b = read_32(bv);
158 
159  // Launder one of the operands, so that the compiler cannot cache the result
160  // of the xor for use in the next operation.
161  //
162  // We launder `zeroes` so that compiler cannot learn that `zeroes` has
163  // strictly more bits set at the end of the loop.
164  zeros = launder32(zeros) | (launder32(a) ^ b);
165 
166  // Same as above. The compiler can cache the value of `a[offset]`, but it
167  // has no chance to strength-reduce this operation.
168  ones = launder32(ones) & (launder32(a) ^ ~b);
169  }
170 
171  HARDENED_CHECK_EQ(count, expected_count);
172  if (launder32(zeros) == 0) {
173  HARDENED_CHECK_EQ(ones, UINT32_MAX);
174  return kHardenedBoolTrue;
175  }
176 
177  HARDENED_CHECK_NE(ones, UINT32_MAX);
178  return kHardenedBoolFalse;
179 }