Software APIs
hardened_memory.c
1// Copyright lowRISC contributors (OpenTitan project).
2// Licensed under the Apache License, Version 2.0, see LICENSE for details.
3// SPDX-License-Identifier: Apache-2.0
4
6
10
11// NOTE: The three hardened_mem* functions have similar contents, but the parts
12// that are shared between them are commented only in `memcpy()`.
13void hardened_memcpy(uint32_t *restrict dest, const uint32_t *restrict src,
14 size_t word_len) {
15 random_order_t order;
16 random_order_init(&order, word_len);
17
18 size_t count = 0;
19 size_t expected_count = random_order_len(&order);
20
21 // Immediately convert `src` and `dest` to addresses, which erases their
22 // provenance and causes their addresses to be exposed (in the provenance
23 // sense).
24 uintptr_t src_addr = (uintptr_t)src;
25 uintptr_t dest_addr = (uintptr_t)dest;
26
27 // `decoys` is a small stack array that is filled with uninitialized memory.
28 // It is scratch space for us to do "extra" operations, when the number of
29 // iteration indices the chosen random order is different from `word_len`.
30 //
31 // These extra operations also introduce noise that an attacker must do work
32 // to filter, such as by applying side-channel analysis to obtain an address
33 // trace.
34 uint32_t decoys[8];
35 uintptr_t decoy_addr = (uintptr_t)&decoys;
36
37 // We need to launder `count`, so that the SW.LOOP-COMPLETION check is not
38 // deleted by the compiler.
39 size_t byte_len = word_len * sizeof(uint32_t);
40 for (; launderw(count) < expected_count; count = launderw(count) + 1) {
41 // The order values themselves are in units of words, but we need `byte_idx`
42 // to be in units of bytes.
43 //
44 // The value obtained from `advance()` is laundered, to prevent
45 // implementation details from leaking across procedures.
46 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
47
48 // Prevent the compiler from reordering the loop; this ensures a
49 // happens-before among indices consistent with `order`.
50 barrierw(byte_idx);
51
52 // Compute putative offsets into `src`, `dest`, and `decoys`. Some of these
53 // may go off the end of `src` and `dest`, but they will not be cast to
54 // pointers in that case. (Note that casting out-of-range addresses to
55 // pointers is UB.)
56 uintptr_t srcp = src_addr + byte_idx;
57 uintptr_t destp = dest_addr + byte_idx;
58 uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
59 uintptr_t decoy2 =
60 decoy_addr +
61 ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));
62
63 // Branchlessly select whether to do a "real" copy or a decoy copy,
64 // depending on whether we've gone off the end of the array or not.
65 //
66 // Pretty much everything needs to be laundered: we need to launder
67 // `byte_idx` for obvious reasons, and we need to launder the result of the
68 // select, so that the compiler cannot delete the resulting loads and
69 // stores. This is similar to having used `volatile uint32_t *`.
70 void *src = (void *)launderw(
71 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), srcp, decoy1));
72 void *dest = (void *)launderw(
73 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), destp, decoy2));
74
75 // Perform the copy, without performing a typed dereference operation.
76 write_32(read_32(src), dest);
77 }
79 HARDENED_CHECK_EQ(count, expected_count);
80}
81
82void hardened_memshred(uint32_t *dest, size_t word_len) {
83 random_order_t order;
84 random_order_init(&order, word_len);
85
86 size_t count = 0;
87 size_t expected_count = random_order_len(&order);
88
89 uintptr_t data_addr = (uintptr_t)dest;
90
91 uint32_t decoys[8];
92 uintptr_t decoy_addr = (uintptr_t)&decoys;
93
94 size_t byte_len = word_len * sizeof(uint32_t);
95 for (; count < expected_count; count = launderw(count) + 1) {
96 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
97 barrierw(byte_idx);
98
99 uintptr_t datap = data_addr + byte_idx;
100 uintptr_t decoy = decoy_addr + (byte_idx % sizeof(decoys));
101
102 void *data = (void *)launderw(
103 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), datap, decoy));
104
105 // Write a freshly-generated random word to `*data`.
106 write_32(hardened_memshred_random_word(), data);
107 }
109
110 HARDENED_CHECK_EQ(count, expected_count);
111}
112
113hardened_bool_t hardened_memeq(const uint32_t *lhs, const uint32_t *rhs,
114 size_t word_len) {
115 random_order_t order;
116 random_order_init(&order, word_len);
117
118 size_t count = 0;
119 size_t expected_count = random_order_len(&order);
120
121 uintptr_t lhs_addr = (uintptr_t)lhs;
122 uintptr_t rhs_addr = (uintptr_t)rhs;
123
124 // `decoys` needs to be filled with equal values this time around. It
125 // should be filled with values with a Hamming weight of around 16, which is
126 // the most common hamming weight among 32-bit words.
127 uint32_t decoys[8] = {
128 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
129 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa, 0xaaaaaaaa,
130 };
131 uintptr_t decoy_addr = (uintptr_t)&decoys;
132
133 uint32_t zeros = 0;
134 uint32_t ones = UINT32_MAX;
135
136 // The loop is almost token-for-token the one above, but the copy is
137 // replaced with something else.
138 size_t byte_len = word_len * sizeof(uint32_t);
139 for (; count < expected_count; count = launderw(count) + 1) {
140 size_t byte_idx = launderw(random_order_advance(&order)) * sizeof(uint32_t);
141 barrierw(byte_idx);
142
143 uintptr_t ap = lhs_addr + byte_idx;
144 uintptr_t bp = rhs_addr + byte_idx;
145 uintptr_t decoy1 = decoy_addr + (byte_idx % sizeof(decoys));
146 uintptr_t decoy2 =
147 decoy_addr +
148 ((byte_idx + (sizeof(decoys) / 2) + sizeof(uint32_t)) % sizeof(decoys));
149
150 void *av = (void *)launderw(
151 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), ap, decoy1));
152 void *bv = (void *)launderw(
153 ct_cmovw(ct_sltuw(launderw(byte_idx), byte_len), bp, decoy2));
154
155 uint32_t a = read_32(av);
156 uint32_t b = read_32(bv);
157
158 // Launder one of the operands, so that the compiler cannot cache the result
159 // of the xor for use in the next operation.
160 //
161 // We launder `zeroes` so that compiler cannot learn that `zeroes` has
162 // strictly more bits set at the end of the loop.
163 zeros = launder32(zeros) | (launder32(a) ^ b);
164
165 // Same as above. The compiler can cache the value of `a[offset]`, but it
166 // has no chance to strength-reduce this operation.
167 ones = launder32(ones) & (launder32(a) ^ ~b);
168 }
170
171 HARDENED_CHECK_EQ(count, expected_count);
172 if (launder32(zeros) == 0) {
173 HARDENED_CHECK_EQ(ones, UINT32_MAX);
174 return kHardenedBoolTrue;
175 }
176
177 HARDENED_CHECK_NE(ones, UINT32_MAX);
178 return kHardenedBoolFalse;
179}