Software APIs
hardened.h
Go to the documentation of this file.
1 // Copyright lowRISC contributors.
2 // Licensed under the Apache License, Version 2.0, see LICENSE for details.
3 // SPDX-License-Identifier: Apache-2.0
4 
5 #ifndef OPENTITAN_SW_DEVICE_LIB_BASE_HARDENED_H_
6 #define OPENTITAN_SW_DEVICE_LIB_BASE_HARDENED_H_
7 
8 #include <stdint.h>
9 
11 
12 /**
13  * @file
14  * @brief Data Types for use in Hardened Code.
15  */
16 
17 #ifdef __cplusplus
18 extern "C" {
19 #endif // __cplusplus
20 
21 /**
22  * This is a boolean type for use in hardened contexts.
23  *
24  * The intention is that this is used instead of `<stdbool.h>`'s #bool, where a
25  * higher hamming distance is required between the truthy and the falsey value.
26  *
27  * The values below were chosen at random, with some specific restrictions. They
28  * have a Hamming Distance of 8, and they are 11-bit values so they can be
29  * materialized with a single instruction on RISC-V. They are also specifically
30  * not the complement of each other.
31  */
32 typedef enum hardened_bool {
33  /**
34  * The truthy value, expected to be used like #true.
35  */
37  /**
38  * The falsey value, expected to be used like #false.
39  */
42 
43 /**
44  * A byte-sized hardened boolean.
45  *
46  * This type is intended for cases where a byte-sized hardened boolean is
47  * required, e.g. for the entries of the `CREATOR_SW_CFG_KEY_IS_VALID` OTP item.
48  *
49  * The values below were chosen to ensure that the hamming difference between
50  * them is greater than 5 and they are not bitwise complements of each other.
51  */
52 typedef enum hardened_byte_bool {
53  /**
54  * The truthy value.
55  */
57  /**
58  * The falsy value.
59  */
62 
63 /*
64  * Launders the 32-bit value `val`.
65  *
66  * `launder32()` returns a value identical to the input, while introducing an
67  * optimization barrier that prevents the compiler from learning new information
68  * about the original value, based on operations on the laundered value. This
69  * does not work the other way around: some information that the compiler has
70  * already learned about the value may make it through; this last caveat is
71  * explained below.
72  *
73  * In some circumstances, it is desirable to introduce a redundant (from the
74  * compiler's perspective) operation into the instruction stream to make it less
75  * likely that hardware faults will result in critical operations being
76  * bypassed. Laundering makes it possible to insert such duplicate operations,
77  * while blocking the compiler's attempts to delete them through dead code
78  * elimination.
79  *
80  * Suppose we have a pure-C `CHECK()` macro that does a runtime-assert.
81  * For example, in the following code, a compiler would fold `CHECK()` away,
82  * since dataflow analysis could tell it that `x` is always zero, allowing it to
83  * deduce that `x == 0` -> `0 == 0` -> `true`. It then deletes the `CHECK(true)`
84  * as dead code.
85  * ```
86  * if (x == 0) {
87  * CHECK(x == 0);
88  * }
89  * ```
90  *
91  * If we believe that an attacker can glitch the chip to skip the first branch,
92  * this assumption no longer holds. We can use laundering to prevent LLVM from
93  * learning that `x` is zero inside the block:
94  * ```
95  * if (launder32(x) == 0) {
96  * CHECK(x == 0);
97  * }
98  * ```
99  *
100  * Note that this operation is order sensitive: while we can stop the compiler
101  * from learning new information, it is very hard to make it forget in some
102  * cases. If we had instead written
103  * ```
104  * if (x == 0) {
105  * CHECK(launder32(x) == 0);
106  * }
107  * ```
108  * then it would be entitled to rewrite this into `launder32(0) == 0`.
109  * Although it can't make the deduction that this is identically true, because
110  * `launder32()` is the identity function, this behaves like `0 == 0` at
111  * runtime. For example, a completely valid lowering of this code is
112  * ```
113  * bnez a0, else
114  * mv a0, zero
115  * bnez a0, shutdown
116  * // ...
117  * ```
118  *
119  * Even pulling the expression out of the branch as `uint32_t y = launder32(x);`
120  * doesn't work, because the compiler may chose to move the operation into the
121  * branch body. Thus, we need to stop it from learning that `x` is zero in the
122  * first place.
123  *
124  * Note that, in spite of `HARDENED_CHECK` being written in assembly, it is
125  * still vulnerable to certain inimical compiler optimizations. For example,
126  * `HARDENED_CHECK_EQ(x, 0)` can be written to `HARDENED_CHECK_EQ(0, 0)`.
127  * Although the compiler cannot delete this code, it will emit the nonsensical
128  * ```
129  * beqz zero, continue
130  * unimp
131  * continue:
132  * ```
133  * effectively negating the doubled condition.
134  *
135  * ---
136  *
137  * In general, this intrinsic should *only* be used on values that the compiler
138  * doesn't already know anything interesting about. The precise meaning of this
139  * can be subtle, since inlining can easily foil pessimization. Uses of this
140  * function must be carefully reviewed, and commented with justification and an
141  * explanation of what information it is preventing access to, and what ambient
142  * invariants it relies on.
143  *
144  * This function makes no guarantees: it is only intended to help harden code.
145  * It does not remove the need to carefully verify that the correct
146  * instruction-level invariants are upheld in the final release.
147  *
148  * This operation *does not* introduce a sequence point: a compiler may move it
149  * anywhere between where its input is last modified or its output is first
150  * moved. This can include moving through different nodes in the control flow
151  * graph. The location of the laundering operation must be determined
152  * *exclusively* by its position in the expression dependency graph.
153  *
154  * Other examples of laundering use-cases should be listed here as they become
155  * apparent.
156  *
157  * * Obscuring loop completion. It may be useful to prevent a compiler from
158  * learning that a particular loop is guaranteed to complete. The most
159  * straightforward way to do this is to launder the loop increment: replace
160  * `++i` with `i = launder32(i) + 1`. This is helpful for preventing the
161  * compiler from learning that the loop executes in a particular order, and
162  * foiling unroll/unroll-and-jam optimizations. It also prevents the compiler
163  * from learning that the loop counter was saturated. However, if the exit
164  * condition is `i < len`, the compiler will learn that `i >= len` outside the
165  * loop.
166  *
167  * Laundering just the exit comparison, `launder32(i) < len`, will still allow
168  * the compiler to deduce that the loop is traversed in linear order, but it
169  * will not learn that `i >= len` outside the loop. Laundering both loop
170  * control expressions may be necessary in some cases.
171  *
172  * * Assigning a literal to a value without the compiler learning about
173  * that literal value. `x = launder32(0);` zeros `x`, but the compiler
174  * cannot replace all uses of it with a constant `0`. Note that
175  * `x = launder32(x);` does not prevent knowledge the compiler already has
176  * about `x` from replacing it with a constant in `launder32()`.
177  *
178  * * Preventing operations from being re-associated. The compiler is entitled
179  * to rewrite `x ^ (y ^ z)` into `(x ^ y) ^ z`, but cannot do the same with
180  * `x ^ launder32(y ^ z)`. No operations commute through `launder32()`.
181  *
182  * * Preventing dead-code elimination. The compiler cannot delete the
183  * unreachable expression `if (launder32(false)) { foo(); }`. However, the
184  * branch will never be executed in an untampered instruction stream.
185  *
186  * @param val A 32-bit integer to launder.
187  * @return A 32-bit integer which will *happen* to have the same value as `val`
188  * at runtime.
189  */
190 inline uint32_t launder32(uint32_t val) {
191  // NOTE: This implementation is LLVM-specific, and should be considered to be
192  // a no-op in every other compiler. For example, GCC has in the past peered
193  // into the insides of assembly blocks.
194  //
195  // LLVM cannot see through assembly blocks, and must treat this as a black
196  // box. Similar tricks are employed in other security-sensitive code-bases,
197  // such as https://boringssl-review.googlesource.com/c/boringssl/+/36484.
198  //
199  // It is *not* marked as volatile, since reordering is not desired; marking it
200  // as volatile does make some laundering operations suddenly start working
201  // spuriously, which is entirely dependent on how excited LLVM is about
202  // reordering things. To avoid this trap, we do not mark as volatile and
203  // instead require that reordering be prevented through careful sequencing of
204  // statements.
205  //
206  // The +r constraint tells the compiler that this is an "inout" parameter: it
207  // means that not only does the black box depend on `val`, but it also mutates
208  // it in an unspecified way. This ensures that the laundering operation occurs
209  // in the right place in the dependency ordering.
210  asm("" : "+r"(val));
211  return val;
212 }
213 
214 /**
215  * Launders the pointer-sized value `val`.
216  *
217  * See `launder32()` for detailed semantics.
218  *
219  * @param val A 32-bit integer to launder.
220  * @return A 32-bit integer which will happen to have the same value as `val` at
221  * runtime.
222  */
223 inline uintptr_t launderw(uintptr_t val) {
224  asm("" : "+r"(val));
225  return val;
226 }
227 
228 /**
229  * Creates a reordering barrier for `val`.
230  *
231  * `barrier32()` takes an argument and discards it, while introducing an
232  * "impure" dependency on that value. This forces the compiler to schedule
233  * instructions such that the intermediate `val` actually appears in a
234  * register. Because it is impure, `barrier32()` operations will not be
235  * reordered past each other or MMIO operations, although they can be reordered
236  * past functions if LTO inlines them.
237  *
238  * Most compilers will try to reorder arithmetic operations in such a way
239  * that they form large basic blocks, since modern microarchitectures can
240  * take advantage of instruction-level parallelism. Unfortunately, this means
241  * that instructions that we want to interleave with other instructions are
242  * likely to get separated; this includes static interleavings,
243  * loop-invariant code motion, and some kinds of unroll-and-jam.
244  *
245  * For example, given
246  *
247  * ```
248  * uint32_t product = 5;
249  *
250  * foo();
251  * product *= product;
252  * foo();
253  * product *= product;
254  * foo();
255  * product *= product;
256  * ```
257  *
258  * A compiler will likely reorder this as
259  *
260  * ```
261  * uint32_t product = 5;
262  *
263  * foo();
264  * foo();
265  * foo();
266  * product *= product;
267  * product *= product;
268  * product *= product;
269  * ```
270  *
271  * The compiler is further entitled to constant-propagate `product`.
272  * `barrier32()` can be used to avoid this:
273  *
274  * ```
275  * // NB: the initial value of `product` is laundered to prevent
276  * // constant propagation, but only because it is a compile-time
277  * // constant. Laundering the intermediates would also work.
278  * uint32_t product = launder32(5);
279  * barrier32(product);
280  *
281  * barrier32(foo());
282  * product *= product;
283  * barrier32(product);
284  *
285  * barrier32(foo());
286  * product *= product;
287  * barrier32(product);
288  *
289  * barrier32(foo());
290  * product *= product;
291  * barrier32(product);
292  * ```
293  *
294  * Note that we must place barriers on the result of the function calls,
295  * too, so that the compiler believes that there is a potential dependency
296  * between the return value of the function, and the followup value of
297  * `product`.
298  *
299  * This is also useful for avoiding loop reordering:
300  *
301  * ```
302  * for (int i = 0; i != n - 1; i = (i + kPrime) % n) {
303  * barrier32(i);
304  *
305  * // Stuff.
306  * }
307  * ```
308  *
309  * A sufficiently intelligent compiler might notice that it can linearize this
310  * loop; however, the barriers in each loop iteration force a particular order
311  * is observed.
312  *
313  * @param val A value to create a barrier for.
314  */
315 inline void barrier32(uint32_t val) { asm volatile("" ::"r"(val)); }
316 
317 /**
318  * Creates a reordering barrier for `val`.
319  *
320  * See `barrier32()` for detailed semantics.
321  *
322  * @param val A value to create a barrier for.
323  */
324 inline void barrierw(uintptr_t val) { asm volatile("" ::"r"(val)); }
325 
326 /**
327  * A constant-time, 32-bit boolean value.
328  *
329  * Values of this type MUST be either all zero bits or all one bits,
330  * representing `false` and `true` respectively.
331  *
332  * Although it is possible to convert an existing `bool` into a `ct_bool32_t` by
333  * writing `-((ct_bool32_t) my_bool)`, we recommend against it
334  */
335 typedef uint32_t ct_bool32_t;
336 
337 // The formulae below are taken from Hacker's Delight, Chapter 2.
338 // Although the book does not define them as being constant-time, they are
339 // branchless; branchless code is always constant-time.
340 //
341 // Proofs and references to HD are provided only in the 32-bit versions.
342 //
343 // Warren Jr., Henry S. (2013). Hacker's Delight (2 ed.).
344 // Addison Wesley - Pearson Education, Inc. ISBN 978-0-321-84268-8.
345 
346 /**
347  * Performs constant-time signed comparison to zero.
348  *
349  * Returns whether `a < 0`, as a constant-time boolean.
350  * In other words, this checks if `a` is negative, i.e., it's sign bit is set.
351  *
352  * @return `a < 0`.
353  */
354 inline ct_bool32_t ct_sltz32(int32_t a) {
355  // Proof. `a` is negative iff its MSB is set;
356  // arithmetic-right-shifting by bits(a)-1 smears the sign bit across all
357  // of `a`.
358  return (uint32_t)(a >> (sizeof(a) * 8 - 1));
359 }
360 
361 /**
362  * Performs constant-time unsigned ascending comparison.
363  *
364  * Returns `a < b` as a constant-time boolean.
365  *
366  * @return `a < b`.
367  */
368 inline ct_bool32_t ct_sltu32(uint32_t a, uint32_t b) {
369  // Proof. See Hacker's Delight page 23.
370  return ct_sltz32((a & ~b) | ((a ^ ~b) & (a - b)));
371 }
372 
373 /**
374  * Performs constant-time zero equality.
375  *
376  * Returns `a == 0` as a constant-time boolean.
377  *
378  * @return `a == 0`.
379  */
380 inline ct_bool32_t ct_seqz32(uint32_t a) {
381  // Proof. See Hacker's Delight page 23.
382  // HD gives this formula: `a == b := ~(a-b | b-a)`.
383  //
384  // Setting `b` to zero gives us
385  // ~(a | -a) -> ~a & ~-a -> ~a & (a - 1)
386  // via identities on page 16.
387  //
388  // This forumula is also given on page 11 for a different purpose.
389  return ct_sltz32(~a & (a - 1));
390 }
391 
392 /**
393  * Performs constant-time equality.
394  *
395  * Returns `a == b` as a constant-time boolean.
396  *
397  * @return `a == b`.
398  */
399 inline ct_bool32_t ct_seq32(uint32_t a, uint32_t b) {
400  // Proof. a ^ b == 0 -> a ^ a ^ b == a ^ 0 -> b == a.
401  return ct_seqz32(a ^ b);
402 }
403 
404 /**
405  * Performs a constant-time select.
406  *
407  * Returns `a` if `c` is true; otherwise, returns `b`.
408  *
409  * This function should be used with one of the comparison functions above; do
410  * NOT create `c` using an `if` or `?:` operation.
411  *
412  * @param c The condition to test.
413  * @param a The value to return on true.
414  * @param b The value to return on false.
415  * @return `c ? a : b`.
416  */
417 inline uint32_t ct_cmov32(ct_bool32_t c, uint32_t a, uint32_t b) {
418  // Proof. See Hacker's Delight page 46. HD gives this as a branchless swap;
419  // branchless select is a special case of that.
420 
421  // `c` must be laundered because LLVM has a strength reduction pass for this
422  // exact pattern, but lacking a cmov instruction, it will almost certainly
423  // select a branch instruction here.
424  return (launder32(c) & a) | (launder32(~c) & b);
425 }
426 
427 /**
428  * A constant-time, pointer-sized boolean value.
429  *
430  * Values of this type MUST be either all zero bits or all one bits.
431  */
432 typedef uintptr_t ct_boolw_t;
433 
434 /**
435  * Performs constant-time signed comparison to zero.
436  *
437  * Returns whether `a < 0`, as a constant-time boolean.
438  * In other words, this checks if `a` is negative, i.e., it's sign bit is set.
439  *
440  * @return `a < 0`.
441  */
442 inline ct_boolw_t ct_sltzw(intptr_t a) {
443  return (uintptr_t)(a >> (sizeof(a) * 8 - 1));
444 }
445 
446 /**
447  * Performs constant-time unsigned ascending comparison.
448  *
449  * Returns `a < b` as a constant-time boolean.
450  *
451  * @return `a < b`.
452  */
453 inline ct_boolw_t ct_sltuw(uintptr_t a, uintptr_t b) {
454  return ct_sltzw((a & ~b) | ((a ^ ~b) & (a - b)));
455 }
456 
457 /**
458  * Performs constant-time zero equality.
459  *
460  * Returns `a == 0` as a constant-time boolean.
461  *
462  * @return `a == 0`.
463  */
464 inline ct_boolw_t ct_seqzw(uintptr_t a) { return ct_sltzw(~a & (a - 1)); }
465 
466 /**
467  * Performs constant-time equality.
468  *
469  * Returns `a == b` as a constant-time boolean.
470  *
471  * @return `a == b`.
472  */
473 inline ct_boolw_t ct_seqw(uintptr_t a, uintptr_t b) { return ct_seqzw(a ^ b); }
474 
475 /**
476  * Performs a constant-time select.
477  *
478  * Returns `a` if `c` is true; otherwise, returns `b`.
479  *
480  * This function should be used with one of the comparison functions above; do
481  * NOT create `c` using an `if` or `?:` operation.
482  *
483  * @param c The condition to test.
484  * @param a The value to return on true.
485  * @param b The value to return on false.
486  * @return `c ? a : b`.
487  */
488 inline uintptr_t ct_cmovw(ct_boolw_t c, uintptr_t a, uintptr_t b) {
489  return (launderw(c) & a) | (launderw(~c) & b);
490 }
491 
492 // Implementation details shared across shutdown macros.
493 #ifndef OT_OFF_TARGET_TEST
494 // This string can be tuned to be longer or shorter as desired, for
495 // fault-hardening purposes.
496 #define HARDENED_UNIMP_SEQUENCE_() "unimp; unimp; unimp; unimp;"
497 
498 #define HARDENED_CHECK_OP_EQ_ "beq"
499 #define HARDENED_CHECK_OP_NE_ "bne"
500 #define HARDENED_CHECK_OP_LT_ "blt"
501 #define HARDENED_CHECK_OP_GT_ "bgt"
502 #define HARDENED_CHECK_OP_LE_ "ble"
503 #define HARDENED_CHECK_OP_GE_ "bge"
504 
505 // clang-format off
506 #define HARDENED_CHECK_(op_, a_, b_) \
507  asm volatile( \
508  op_ " %0, %1, .L_HARDENED_%=;" \
509  HARDENED_UNIMP_SEQUENCE_() \
510  ".L_HARDENED_%=:;" \
511  ::"r"(a_), "r"(b_))
512 // clang-format on
513 
514 #define HARDENED_UNREACHABLE_() \
515  do { \
516  asm volatile(HARDENED_UNIMP_SEQUENCE_()); \
517  __builtin_unreachable(); \
518  } while (false)
519 #else // OT_OFF_TARGET_TEST
520 #include <assert.h>
521 
522 #define HARDENED_CHECK_OP_EQ_ ==
523 #define HARDENED_CHECK_OP_NE_ !=
524 #define HARDENED_CHECK_OP_LT_ <
525 #define HARDENED_CHECK_OP_GT_ >
526 #define HARDENED_CHECK_OP_LE_ <=
527 #define HARDENED_CHECK_OP_GE_ >=
528 
529 #define HARDENED_CHECK_(op_, a_, b_) assert((a_)op_(b_))
530 
531 #define HARDENED_UNREACHABLE_() assert(false)
532 #endif // OT_OFF_TARGET_TEST
533 
534 /**
535  * Indicates that the following code is unreachable; it should never be
536  * reached at runtime.
537  *
538  * If it is reached anyways, an illegal instruction will be executed.
539  */
540 #define HARDENED_UNREACHABLE() HARDENED_UNREACHABLE_()
541 
542 /**
543  * Compare two values in a way that is *manifestly* true: that is, under normal
544  * program execution, the comparison is a tautology.
545  *
546  * If the comparison fails, we can deduce the device is under attack, so an
547  * illegal instruction will be executed. The manner in which the comparison is
548  * done is carefully constructed in assembly to minimize the possibility of
549  * adversarial faults.
550  *
551  * There are six variants of this macro: one for each of the six comparison
552  * operations.
553  *
554  * This macro is intended to be used along with `launder32()` to handle detected
555  * faults when implementing redundant checks, i.e. in places where the compiler
556  * could otherwise prove statically unreachable. For example:
557  * ```
558  * if (launder32(x) == 0) {
559  * HARDENED_CHECK_EQ(launder32(x), 0);
560  * }
561  * ```
562  * See `launder32()` for more details.
563  */
564 #define HARDENED_CHECK_EQ(a_, b_) HARDENED_CHECK_(HARDENED_CHECK_OP_EQ_, a_, b_)
565 #define HARDENED_CHECK_NE(a_, b_) HARDENED_CHECK_(HARDENED_CHECK_OP_NE_, a_, b_)
566 #define HARDENED_CHECK_LT(a_, b_) HARDENED_CHECK_(HARDENED_CHECK_OP_LT_, a_, b_)
567 #define HARDENED_CHECK_GT(a_, b_) HARDENED_CHECK_(HARDENED_CHECK_OP_GT_, a_, b_)
568 #define HARDENED_CHECK_LE(a_, b_) HARDENED_CHECK_(HARDENED_CHECK_OP_LE_, a_, b_)
569 #define HARDENED_CHECK_GE(a_, b_) HARDENED_CHECK_(HARDENED_CHECK_OP_GE_, a_, b_)
570 
571 #ifdef __cplusplus
572 }
573 #endif // __cplusplus
574 
575 #endif // OPENTITAN_SW_DEVICE_LIB_BASE_HARDENED_H_