diff --git a/Makefile b/Makefile
index 6c62948..2139967 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,7 @@
 AR = /usr/bin/ar
 CC = /usr/bin/gcc
 CFLAGS = -Wall -O3
+# CFLAGS = -Wall -g
 
 all: hss_lib.a \
      hss_lib_thread.a \
@@ -12,7 +13,7 @@ hss_lib.a: hss.o hss_alloc.o hss_aux.o hss_common.o \
      hss_compute.o hss_generate.o hss_keygen.o hss_param.o hss_reserve.o \
      hss_sign.o hss_sign_inc.o hss_thread_single.o \
      hss_verify.o hss_verify_inc.o hss_derive.o \
-     hss_derive.o hss_zeroize.o lm_common.o \
+     hss_derive.o hss_zeroize.o hss_malloc.o lm_common.o \
      lm_ots_common.o lm_ots_sign.o lm_ots_verify.o lm_verify.o endian.o \
      hash.o sha256.o
 	$(AR) rcs $@ $^
@@ -21,7 +22,7 @@ hss_lib_thread.a: hss.o hss_alloc.o hss_aux.o hss_common.o \
      hss_compute.o hss_generate.o hss_keygen.o hss_param.o hss_reserve.o \
      hss_sign.o hss_sign_inc.o hss_thread_pthread.o \
      hss_verify.o hss_verify_inc.o \
-     hss_derive.o hss_zeroize.o lm_common.o \
+     hss_derive.o hss_zeroize.o hss_malloc.o lm_common.o \
      lm_ots_common.o lm_ots_sign.o lm_ots_verify.o lm_verify.o endian.o \
      hash.o sha256.o
 	$(AR) rcs $@ $^
@@ -37,79 +38,82 @@ demo: demo.c hss_lib_thread.a
 test_1: test_1.c lm_ots_common.o lm_ots_sign.o lm_ots_verify.o  endian.o hash.o sha256.o hss_zeroize.o
 	$(CC) $(CFLAGS) -o test_1 test_1.c lm_ots_common.o lm_ots_sign.o lm_ots_verify.o  endian.o hash.o sha256.o hss_zeroize.o -lcrypto
 
-test_hss: test_hss.c test_hss.h test_testvector.c test_stat.c test_keygen.c test_load.c test_sign.c test_sign_inc.c test_verify.c test_verify_inc.c test_keyload.c test_reserve.c test_thread.c test_h25.c hss.h hss_lib_thread.a
-	$(CC) $(CFLAGS) test_hss.c test_testvector.c test_stat.c test_keygen.c test_sign.c test_sign_inc.c test_load.c test_verify.c test_verify_inc.c test_keyload.c test_reserve.c test_thread.c test_h25.c hss_lib_thread.a -lcrypto -lpthread -o test_hss
+test_hss: test_hss.c test_hss.h test_testvector.c test_stat.c test_keygen.c test_load.c test_sign.c test_sign_inc.c test_verify.c test_verify_inc.c test_keyload.c test_reserve.c test_thread.c test_h25.c test_fault.c test_update.c hss.h hss_lib_thread.a
+	$(CC) $(CFLAGS) test_hss.c test_testvector.c test_stat.c test_keygen.c test_sign.c test_sign_inc.c test_load.c test_verify.c test_verify_inc.c test_keyload.c test_reserve.c test_thread.c test_h25.c test_fault.c test_update.c hss_lib_thread.a -lcrypto -lpthread -o test_hss
 
-hss.o: hss.c hss.h common_defs.h hash.h endian.h hss_internal.h hss_aux.h hss_derive.h
+hss.o: hss.c hss.h common_defs.h hash.h endian.h hss_internal.h hss_aux.h hss_derive.h config.h
 	$(CC) $(CFLAGS) -c hss.c -o $@
 
-hss_alloc.o: hss_alloc.c hss.h hss_internal.h lm_common.h
+hss_alloc.o: hss_alloc.c hss.h hss_internal.h lm_common.h config.h hss_malloc.h
 	$(CC) $(CFLAGS) -c hss_alloc.c -o $@
 
-hss_aux.o: hss_aux.c hss_aux.h hss_internal.h common_defs.h lm_common.h endian.h hash.h
+hss_aux.o: hss_aux.c hss_aux.h hss_internal.h common_defs.h lm_common.h endian.h hash.h config.h
 	$(CC) $(CFLAGS) -c hss_aux.c -o $@
 
-hss_common.o: hss_common.c common_defs.h hss_common.h lm_common.h
+hss_common.o: hss_common.c common_defs.h hss_common.h lm_common.h config.h
 	$(CC) $(CFLAGS) -c hss_common.c -o $@
 
-hss_compute.o: hss_compute.c hss_internal.h hash.h hss_thread.h lm_ots_common.h lm_ots.h endian.h hss_derive.h
+hss_compute.o: hss_compute.c hss_internal.h hash.h hss_thread.h lm_ots_common.h lm_ots.h endian.h hss_derive.h config.h
 	$(CC) $(CFLAGS) -c hss_compute.c -o $@
 
-hss_derive.o: hss_derive.c hss_derive.h hss_internal.h hash.h endian.h
+hss_derive.o: hss_derive.c hss_derive.h hss_internal.h hash.h endian.h config.h
 	$(CC) $(CFLAGS) -c hss_derive.c -o $@
 
-hss_generate.o: hss_generate.c hss.h hss_internal.h hss_aux.h hash.h hss_thread.h hss_reserve.h lm_ots_common.h endian.h
+hss_generate.o: hss_generate.c hss.h hss_internal.h hss_aux.h hash.h hss_thread.h hss_reserve.h lm_ots_common.h endian.h config.h
 	$(CC) $(CFLAGS) -c hss_generate.c -o $@
 
-hss_keygen.o: hss_keygen.c hss.h common_defs.h hss_internal.h hss_aux.h endian.h hash.h hss_thread.h lm_common.h lm_ots_common.h
+hss_keygen.o: hss_keygen.c hss.h common_defs.h hss_internal.h hss_aux.h endian.h hash.h hss_thread.h lm_common.h lm_ots_common.h config.h
 	$(CC) $(CFLAGS) -c hss_keygen.c -o $@
 
-hss_param.o: hss_param.c hss.h hss_internal.h endian.h hss_zeroize.h
+hss_malloc.o: hss_malloc.c hss.h config.h common_defs.h hss_malloc.h
+	$(CC) $(CFLAGS) -c hss_malloc.c -o $@
+
+hss_param.o: hss_param.c hss.h hss_internal.h endian.h hss_zeroize.h config.h
 	$(CC) $(CFLAGS) -c hss_param.c -o $@
 
-hss_reserve.o: hss_reserve.c common_defs.h hss_internal.h hss_reserve.h endian.h
+hss_reserve.o: hss_reserve.c hss.h common_defs.h hss_internal.h hss_reserve.h endian.h config.h
 	$(CC) $(CFLAGS) -c hss_reserve.c -o $@
    
-hss_sign.o: hss_sign.c common_defs.h hss.h hash.h endian.h hss_internal.h hss_aux.h hss_thread.h hss_reserve.h lm_ots.h lm_ots_common.h hss_derive.h
+hss_sign.o: hss_sign.c common_defs.h hss.h hash.h endian.h hss_internal.h hss_aux.h hss_thread.h hss_reserve.h lm_ots.h lm_ots_common.h hss_derive.h config.h
 	$(CC) $(CFLAGS) -c hss_sign.c -o $@
    
-hss_sign_inc.o: hss_sign_inc.c hss.h common_defs.h hss.h hash.h endian.h hss_internal.h hss_aux.h hss_reserve.h hss_derive.h lm_ots.h lm_ots_common.h hss_sign_inc.h
+hss_sign_inc.o: hss_sign_inc.c hss.h common_defs.h hss.h hash.h endian.h hss_internal.h hss_aux.h hss_reserve.h hss_derive.h lm_ots.h lm_ots_common.h hss_sign_inc.h config.h
 	$(CC) $(CFLAGS) -c hss_sign_inc.c -o $@
 
-hss_thread_single.o: hss_thread_single.c hss_thread.h
+hss_thread_single.o: hss_thread_single.c hss_thread.h config.h
 	$(CC) $(CFLAGS) -c hss_thread_single.c -o $@
 
-hss_thread_pthread.o: hss_thread_pthread.c hss_thread.h
+hss_thread_pthread.o: hss_thread_pthread.c hss_thread.h config.h
 	$(CC) $(CFLAGS) -c hss_thread_pthread.c -o $@
 
-hss_verify.o: hss_verify.c hss_verify.h common_defs.h lm_verify.h lm_common.h lm_ots_verify.h hash.h endian.h hss_thread.h
+hss_verify.o: hss_verify.c hss.h hss_verify.h common_defs.h lm_verify.h lm_common.h lm_ots_verify.h hash.h endian.h hss_thread.h config.h
 	$(CC) $(CFLAGS) -c hss_verify.c -o $@
 
-hss_verify_inc.o: hss_verify_inc.c hss_verify_inc.h common_defs.h lm_verify.h lm_common.h lm_ots_verify.h hash.h endian.h hss_thread.h
+hss_verify_inc.o: hss_verify_inc.c hss.h hss_verify_inc.h common_defs.h lm_verify.h lm_common.h lm_ots_verify.h hash.h endian.h hss_thread.h config.h
 	$(CC) $(CFLAGS) -c hss_verify_inc.c -o $@
 
 hss_zeroize.o: hss_zeroize.c hss_zeroize.h
 	$(CC) $(CFLAGS) -c hss_zeroize.c -o $@
 
-lm_common.o: lm_common.c lm_common.h hash.h common_defs.h lm_ots_common.h
+lm_common.o: lm_common.c lm_common.h hash.h common_defs.h lm_ots_common.h config.h
 	$(CC) $(CFLAGS) -c lm_common.c -o $@
 
-lm_ots_common.o: lm_ots_common.c common_defs.h hash.h
+lm_ots_common.o: lm_ots_common.c common_defs.h hash.h config.h
 	$(CC) $(CFLAGS) -c lm_ots_common.c -o $@
 
-lm_ots_sign.o: lm_ots_sign.c common_defs.h lm_ots.h lm_ots_common.h hash.h endian.h hss_zeroize.h hss_derive.h
+lm_ots_sign.o: lm_ots_sign.c common_defs.h lm_ots.h lm_ots_common.h hash.h endian.h hss_zeroize.h hss_derive.h config.h
 	$(CC) $(CFLAGS) -c lm_ots_sign.c -o $@
 
-lm_ots_verify.o: lm_ots_verify.c lm_ots_verify.h lm_ots_common.h hash.h endian.h common_defs.h
+lm_ots_verify.o: lm_ots_verify.c lm_ots_verify.h lm_ots_common.h hash.h endian.h common_defs.h config.h
 	$(CC) $(CFLAGS) -c lm_ots_verify.c -o $@
 
-lm_verify.o: lm_verify.c lm_verify.h lm_common.h lm_ots_common.h lm_ots_verify.h hash.h endian.h common_defs.h
+lm_verify.o: lm_verify.c lm_verify.h lm_common.h lm_ots_common.h lm_ots_verify.h hash.h endian.h common_defs.h config.h
 	$(CC) $(CFLAGS) -c lm_verify.c -o $@
 
 endian.o: endian.c endian.h
 	$(CC) $(CFLAGS) -c endian.c -o $@
 
-hash.o: hash.c hash.h sha256.h hss_zeroize.h
+hash.o: hash.c hash.h sha256.h hss_zeroize.h config.h
 	$(CC) $(CFLAGS) -c hash.c -o $@
 
 sha256.o: sha256.c sha256.h endian.h
diff --git a/README b/README
index 36b2692..594a1fd 100644
--- a/README
+++ b/README
@@ -1,7 +1,18 @@
 This code attempts to be a usable implementation of the LMS Hash Based
 Signature Scheme from RFC 8554.
 
-See read.me for documentation how to use it.
+This specific branch attempts to implement optional fault tolerance;
+that is, no secret keying data is leaked on a miscompute.
+
+This appears to be complete and tested.
+
+Changes:
+- Expanded the size of the private key to 64 bytes (both to add a
+  checksum to detect NVRAM errors, and to include a 'max count' value
+  which may be useful in key sharing
+- Added a FAULT_TOLERANCE flag that, when exabled, adds checking logic
+  to the public key computations; this is at some cost to both time
+  and memory
+- Added a fault regression test, which tests out the above protection
+- Also added a test that looks for memory leaks
 
-This is the ACVP branch - designed to be (optionally) compatible with the
-public ACVP server
diff --git a/config.h b/config.h
index bf4b132..00158af 100644
--- a/config.h
+++ b/config.h
@@ -9,6 +9,63 @@
  * the operating environment needs
  */
 
+/*
+ * These control how we do threading; these apply only if we have the
+ * threading library installed
+ *
+ * This is the maximum number of threads we'll try to create; we won't
+ * exceed this number no matter what the application tells us
+ */
+#define MAX_THREAD 16   /* Never try to create more than 16 threads */
+
+/*
+ * This is the number of threads we'll try to create if the application
+ * doesn't specify otherwise (i.e. passes in 0)
+ */
+#define DEFAULT_THREAD 16 /* Go with 16 threads by default */
+
+/*
+ * We provide two different methods to be resiliant against fault attacks.
+ * Both these methods have costs (but very different costs); if fault attacks
+ * are a concern for your implementation, you should enable one (or both of
+ * them if you're feeling especially paranoid, they are mutually compatible)
+ */
+
+/*
+ * Method 1 for fault tolerance: when we initially compute the signature for
+ * an internal root node, we store it (actually, the hash of the signed
+ * public key) in the private key.  Then, if we ever need to compute that
+ * signature again, we compare hashes; if they're different, then a fault
+ * that could have leaked the private key has occurred
+ * 0 -> We don't.
+ * 1 -> We do.  This has the cost of expanding the size of the private key
+ *      by 7*FAULT_CACHE_LEN bytes; it also can cause us to update the private
+ *      key more often than expected (if you use reservations)
+ */
+#define FAULT_CACHE_SIG  1
+
+/*
+ * If we cache hashes of signatures (FAULT_CACHE_SIG), then this determines
+ * the length of the hash we use; if FAULT_CACHE_LEN < 32, we truncate the
+ * hash.  This is here because we generally don't need to store the entire
+ * hash (unless we assume that the attacker can generate a precise fault at a
+ * specific spot in the computation, and he has enough computational resources
+ * to do a second preimage attack on a truncated hash), and shortening the
+ * hash reduces the space used by a private key.
+ */
+#define FAULT_CACHE_LEN   8
+
+/*
+ * Method 2 for fault tolerance: compute hashes twice, and compare the results
+ * Note that the goal of this is to prevent errors that would cause us
+ * to leak information that would allow forgeries; errors that only cause us
+ * to produce invalid signatures are not of concern.
+ * 0 -> We don't.
+ * 1 -> We do.  This has the extra cost of increassing load and signature
+ *      generation times, and increased memory consumption
+ */
+#define FAULT_RECOMPUTE  0
+
 /*
  * This modifies which seed generation logic we use
  * Note that changing these parameters will change the mapping
@@ -20,7 +77,7 @@
  *      seed in more than a defined number of distinct hashes
  * 2 -> We generate seeds and secrets in a way which is compatible with ACVP
  */
-#define SECRET_METHOD 2
+#define SECRET_METHOD 0
 
 /*
  * If we're using the side channel resistant method, this defines the max
@@ -31,4 +88,35 @@
  */
 #define SECRET_MAX 4  /* Never use a seed more than 16 times */
 
+/*
+ * This determines whether we use the OpenSSL implementation of SHA-256
+ * or we use our own
+ * 1 -> We use the OpenSSL implementation; it's faster (and can use the
+ *      Intel SHA256 instructions for even more speed)
+ * 0 -> We use a portable C implementation; it's slower, but it does
+ *      allow for some of the below instrumentation logic
+ */
+#define USE_OPENSSL 1   /* We use the OpenSSL implementation for SHA-256 */
+
+/*
+ * This determines whether we will print out the internal hash inputs and
+ * outputs if the global hss_verbose is set.  Obvously, this is not great
+ * for security; however it can be useful to track down those truly hard
+ * bugs.  It is also quite chatty, and if you do use this, you probably
+ * want to shut off multithreading
+ * This works only if USE_OPENSSL == 0
+ * 0 -> Omit debugging code
+ * 1 -> Include debugging code
+ */
+#define ALLOW_VERBOSE 0  /* Don't do instrumentation */
+
+/*
+ * This determines whether we'll including some test instrumenetation into
+ * the code.  This is never appropriate for a real application; this does
+ * allow the testing code to run some additional tests
+ * 0 -> Omit instrumentation
+ * 1 -> Include instrumentation
+ */
+#define TEST_INSTRUMENTATION 0  /* Test mode off */
+
 #endif /* CONFIG_H_ */
diff --git a/demo.c b/demo.c
index 2aeb877..1912569 100644
--- a/demo.c
+++ b/demo.c
@@ -457,8 +457,8 @@ static int sign(const char *keyname, char **files) {
     printf( "Loading private key\n" );
     fflush(stdout);
     struct hss_working_key *w = hss_load_private_key(
-             read_private_key, private_key_filename, /* How to load the */
-                                         /* private key */
+             read_private_key, update_private_key, /* How to load and */
+                 private_key_filename,   /* update the private key */
              0,                          /* Use minimal memory */
              aux_data, len_aux_data,     /* The auxiliary data */
              0);                         /* Use the defaults for extra info */
@@ -517,8 +517,6 @@ static int sign(const char *keyname, char **files) {
         (void)hss_sign_init(
              &ctx,                 /* Incremental signing context */
              w,                    /* Working key */
-             update_private_key,    /* Routine to update the */
-             private_key_filename, /* private key */
              sig, sig_len,         /* Where to place the signature */
              0);                   /* Use the defaults for extra info */
 
@@ -712,8 +710,8 @@ static int advance(const char *keyname, const char *text_advance) {
     printf( "Loading private key\n" );
     fflush(stdout);
     struct hss_working_key *w = hss_load_private_key(
-             read_private_key, private_key_filename, /* How to load the */
-                                         /* private key */
+             read_private_key, update_private_key, /* How to load and */
+                 private_key_filename,   /* update the private key */
              0,                          /* Use minimal memory */
              aux_data, len_aux_data,     /* The auxiliary data */
              0);                         /* Use the defaults for extra info */
@@ -729,9 +727,7 @@ static int advance(const char *keyname, const char *text_advance) {
         /* Now that we've loaded the private key, we fast-forward it */
         /* We do this by reserving N signatures (which updates the private */
         /* key to reflect that we've generated those signatures) */
-    bool success = hss_reserve_signature( w,
-             update_private_key, private_key_filename,
-             advance, 0 );
+    bool success = hss_reserve_signature( w, advance, 0 );
     if (!success) {
         printf( "Error advancing\n" );
     }
diff --git a/hash.c b/hash.c
index ea5b376..8d17b17 100644
--- a/hash.c
+++ b/hash.c
@@ -2,10 +2,7 @@
 #include "hash.h"
 #include "sha256.h"
 #include "hss_zeroize.h"
-
-#define ALLOW_VERBOSE 0  /* 1 -> we allow the dumping of intermediate */
-                         /*      states.  Useful for debugging; horrid */
-                         /*      for security */
+#include "config.h"
 
 /*
  * This is the file that implements the hashing APIs we use internally.
@@ -25,6 +22,63 @@
 bool hss_verbose = false;
 #endif
 
+#if TEST_INSTRUMENTATION
+#include "hss_fault.h"
+
+/*
+ * These globals are the way we communicate with the fault testing logic
+ * (test_fault.c); when it decides that it wants to inject a fault, that
+ * code sets these globals, and we then inject a fault accordingly
+ */
+int hash_fault_enabled = 0;   /* Is hash fault injected enabled? */
+                       /* 0 -> no */
+                       /* 1 -> yes for the specific hash listed below */
+                       /* 2 -> always */
+int hash_fault_level;  /* Where we inject the fault; which LMS level */
+                       /* in the HSS hierarchy are we attempting to */
+                       /* target; 0 -> root LMS tree */
+int hash_fault_reason; /* Where we inject the fault; which reason */
+                       /* we perform the hash are we attempting to */
+                       /* fault */
+long hash_fault_count; /* Decrements when we get a match on both level */
+                       /* and reason. When this count hits zero, we fault */
+
+static int current_level; /* The LMS level that the code has told us that */
+                       /* we're computing at */
+void hss_set_level(int level) {
+    if (hash_fault_enabled) {
+        current_level = level;
+    }
+}
+
+static enum hash_reason current_reason; /* The reason that the code told us */
+                        /* that we're computing the next hash */
+void hss_set_hash_reason(enum hash_reason reason) {
+    if (hash_fault_enabled) {
+        current_reason = reason;
+    }
+}
+
+/*
+ * This checks whether it's time to miscompute a hash
+ */
+static bool do_fault(void) {
+    switch (hash_fault_enabled) {
+    default:
+        return false;
+    case 1:
+        if (current_level == hash_fault_level &&
+                              current_reason == hash_fault_reason) {
+            hash_fault_count -= 1;
+            return hash_fault_count == 0;
+        }
+        return false;
+    case 2:
+        return true;
+    }
+}
+#endif
+
 /*
  * This will hash the message, given the hash type. It assumes that the result
  * buffer is large enough for the hash
@@ -40,6 +94,11 @@ void hss_hash_ctx(void *result, int hash_type, union hash_context *ctx,
     switch (hash_type) {
     case HASH_SHA256: {
         SHA256_Init(&ctx->sha256);
+#if TEST_INSTRUMENTATION
+        if (do_fault()) {
+            SHA256_Update(&ctx->sha256, "", 1); /* Miscompute the hash */
+        }
+#endif
         SHA256_Update(&ctx->sha256, message, message_len);
         SHA256_Final(result, &ctx->sha256);
 #if ALLOW_VERBOSE
@@ -70,6 +129,11 @@ void hss_init_hash_context(int h, union hash_context *ctx) {
     switch (h) {
     case HASH_SHA256:
         SHA256_Init( &ctx->sha256 );
+#if TEST_INSTRUMENTATION
+        if (do_fault()) {
+            SHA256_Update(&ctx->sha256, "", 1); /* Miscompute the hash */
+        }
+#endif
         break;
     }
 }
diff --git a/hss.c b/hss.c
index 67de5f1..b607900 100644
--- a/hss.c
+++ b/hss.c
@@ -13,6 +13,7 @@
 #include "hss_internal.h"
 #include "hss_aux.h"
 #include "hss_derive.h"
+#include "hss_fault.h"
 #include "config.h"
 #include "lm_ots_common.h"
 
@@ -22,6 +23,8 @@
 struct hss_working_key *hss_load_private_key(
     bool (*read_private_key)(unsigned char *private_key,
             size_t len_private_key, void *context),
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
         void *context,
     size_t memory_target,
     const unsigned char *aux_data, size_t len_aux_data,
@@ -31,7 +34,8 @@ struct hss_working_key *hss_load_private_key(
     unsigned levels;
     param_set_t lm[ MAX_HSS_LEVELS ];
     param_set_t ots[ MAX_HSS_LEVELS ];
-    if (!hss_get_parameter_set( &levels, lm, ots, read_private_key, context)) {
+    if (!hss_get_parameter_set( &levels, lm, ots, read_private_key, context,
+                                 info)) {
         /* Can't read private key, or private key invalid */
         return 0;
     }
@@ -46,7 +50,9 @@ struct hss_working_key *hss_load_private_key(
     }
 
     /* Step 3: load the ephemeral key */
-    if (! hss_generate_working_key( read_private_key, context, 
+    if (! hss_generate_working_key( read_private_key,
+                                    update_private_key,
+                                    context, 
                                     aux_data, len_aux_data, w, info )) {
         /* About the only thing I can see failing here is perhaps */
         /* attempting to reread the private key failed the second time; */
@@ -59,6 +65,172 @@ struct hss_working_key *hss_load_private_key(
     return w;
 }
 
+/*
+ * Routines to read/update the private key
+ */
+
+/*
+ * This computes the checksum that appears in the private key
+ * It is here to detect write errors that might accidentally send us
+ * backwards.  It is unkeyed, because we have no good place to get the
+ * key from (if we assume the attacker can modify the private key, well,
+ * we're out of luck)
+ */
+static void compute_private_key_checksum(
+                  unsigned char checksum[PRIVATE_KEY_CHECKSUM_LEN],
+                  const unsigned char *private_key ) {
+    union hash_context ctx;
+    unsigned char hash[MAX_HASH];
+    unsigned levels = private_key[ PRIVATE_KEY_FORMAT_NUM_LEVEL ];
+    if (levels > MAX_HSS_LEVELS) levels = MAX_HSS_LEVELS; /* Actually, */
+                                                           /* an error */
+
+        /* Hash everything except the checksum */
+    hss_set_level(0);
+    hss_set_hash_reason(h_reason_priv_checksum);
+    hss_init_hash_context( HASH_SHA256, &ctx );
+    hss_update_hash_context( HASH_SHA256, &ctx,
+                             private_key, PRIVATE_KEY_CHECKSUM );
+    hss_update_hash_context( HASH_SHA256, &ctx,
+             private_key + PRIVATE_KEY_CHECKSUM + PRIVATE_KEY_CHECKSUM_LEN,
+             PRIVATE_KEY_LEN(levels) -
+                    (PRIVATE_KEY_CHECKSUM + PRIVATE_KEY_CHECKSUM_LEN ));
+    hss_finalize_hash_context( HASH_SHA256, &ctx,
+             hash );
+
+        /* The first 8 bytes of the hash is the checksum */
+    memcpy( checksum, hash, PRIVATE_KEY_CHECKSUM_LEN );
+
+    hss_zeroize( &ctx, sizeof ctx );
+    hss_zeroize( hash, sizeof hash );
+}
+
+static const unsigned char expected_format[ PRIVATE_KEY_FORMAT_LEN ] = {
+    0x01,  /* Current format version */
+    SECRET_METHOD ? SECRET_MAX : 0xff,  /* Secret method marker */
+    FAULT_CACHE_SIG ? FAULT_CACHE_LEN : 0, /* Do we store hashed sigs */
+                     /* in the private key (and if so, how long are they) */
+    0      /* Number of tree levels goes here */
+};
+
+void hss_set_private_key_format(unsigned char *private_key, int levels) {
+    memcpy( private_key + PRIVATE_KEY_FORMAT, expected_format,
+            PRIVATE_KEY_FORMAT_LEN );
+    private_key[PRIVATE_KEY_FORMAT_NUM_LEVEL] = levels;
+}
+
+bool hss_check_private_key(const unsigned char *private_key) {
+    /* If the key isn't in the format we expect, it's a bad key (or, at */
+    /* least, it's unusable by us) */
+    if (0 != memcmp( private_key + PRIVATE_KEY_FORMAT, expected_format,
+                                           PRIVATE_KEY_FORMAT_LEN - 1 )) {
+        return false;
+    }
+
+    /* Check the checksum on the key */
+    unsigned char checksum[ PRIVATE_KEY_CHECKSUM_LEN ];
+    compute_private_key_checksum( checksum, private_key );
+    bool success = (0 == memcmp( checksum, &private_key[PRIVATE_KEY_CHECKSUM],
+                     PRIVATE_KEY_CHECKSUM_LEN ));
+    hss_zeroize( checksum, sizeof checksum );
+    return success;
+}
+
+enum hss_error_code hss_read_private_key(unsigned char *private_key,
+            struct hss_working_key *w) {
+    int levels = w->levels;
+    if (levels < 1 || levels > MAX_HSS_LEVELS) {
+        return hss_error_internal;
+    }
+    if (w->read_private_key) {
+        unsigned char temp[ HSS_MAX_PRIVATE_KEY_LEN ];
+         if (!w->read_private_key( temp,
+                     PRIVATE_KEY_LEN(levels), w->context)) {
+            hss_zeroize(temp, sizeof temp );
+            hss_zeroize( private_key, PRIVATE_KEY_LEN(levels) );
+            return hss_error_private_key_read_failed;
+        }
+        memcpy( private_key, temp, PRIVATE_KEY_LEN(levels) );
+        hss_zeroize(temp, sizeof temp);
+    } else {
+        memcpy( private_key, w->context, PRIVATE_KEY_LEN(levels) );
+    }
+    if (private_key[PRIVATE_KEY_FORMAT_NUM_LEVEL] != levels) {
+        hss_zeroize( private_key, PRIVATE_KEY_LEN(levels) );
+        return hss_error_incompatible_param_set;
+    }
+    if (!hss_check_private_key(private_key)) { 
+        hss_zeroize( private_key, PRIVATE_KEY_LEN(levels) );
+        return hss_error_bad_private_key;
+    }
+    return hss_error_none;
+}
+
+/*
+ * This assumes that the private key is already set up, and so only updates
+ * the counter and the checksum
+ */
+enum hss_error_code hss_write_private_key(unsigned char *private_key,
+            struct hss_working_key *w, int num_cache_sig) {
+    int extra = 0;
+#if FAULT_CACHE_SIG
+    /* If we're also saving cached signatures, extend the area we write */
+    /* to include the updated signatures */
+    extra = num_cache_sig * FAULT_CACHE_LEN;
+#endif
+    return hss_write_private_key_no_w( private_key,
+              PRIVATE_KEY_CHECKSUM + PRIVATE_KEY_CHECKSUM_LEN + extra, 
+              w->read_private_key, w->update_private_key, w->context );
+}
+
+enum hss_error_code hss_write_private_key_no_w(
+            unsigned char *private_key, size_t len,
+            bool (*read_private_key)(unsigned char *private_key,
+                                    size_t len_private_key, void *context),
+            bool (*update_private_key)(unsigned char *private_key,
+                                    size_t len_private_key, void *context),
+            void *context) {
+    /* Update the checksum */
+    compute_private_key_checksum( private_key + PRIVATE_KEY_CHECKSUM,
+                                  private_key );
+
+    /* Write it out */
+    if (update_private_key) {
+        if (!update_private_key( private_key, len, context )) {
+            return hss_error_private_key_write_failed;
+        }
+#if FAULT_RECOMPUTE
+        /* Double check that the write went through */
+        /* Note: read_private_key is null only during the initial write */
+        /* during key generation; errors there don't break security */
+        /* Q: this is relatively cheap; should we do this even if */
+        /*    !FAULT_RECOMPUTE && !FAULT_CACHE_SIG ??? */
+        if (read_private_key) {
+            int levels = private_key[PRIVATE_KEY_FORMAT_NUM_LEVEL];
+            if (levels < 1 || levels > MAX_HSS_LEVELS) {
+                return hss_error_internal;
+            }
+            unsigned char private_key_check[HSS_MAX_PRIVATE_KEY_LEN];
+            if (!read_private_key( private_key_check, PRIVATE_KEY_LEN(levels),
+                                   context )) {
+                hss_zeroize( private_key_check, sizeof private_key_check );
+                return hss_error_private_key_read_failed;
+            }
+            int cmp = memcmp( private_key, private_key_check,
+                              PRIVATE_KEY_LEN(levels) );
+            hss_zeroize( private_key_check, sizeof private_key_check );
+            if (cmp != 0) {
+                 return hss_error_bad_private_key;
+            }  
+        }
+#endif
+    } else {
+        memcpy( context, private_key, len );
+    }
+
+    return hss_error_none;
+}
+
 /*
  * Internal function to generate the root seed and I value (based on the
  * private seed).  We do this (rather than select seed, I at random) so that
@@ -95,6 +267,9 @@ bool hss_generate_root_seed_I_value(unsigned char *seed, unsigned char *I,
 #endif
     union hash_context ctx;
 
+    hss_set_level(0);
+    hss_set_hash_reason(h_reason_other);
+
     hss_hash_ctx(hash_postimage, HASH_SHA256, &ctx, hash_preimage,
                                                             TOPSEED_LEN );
     memcpy( hash_preimage + TOPSEED_SEED, hash_postimage, SEED_LEN );
@@ -127,7 +302,8 @@ bool hss_generate_child_seed_I_value( unsigned char *seed, unsigned char *I,
                    const unsigned char *parent_seed,
                    const unsigned char *parent_I,
                    merkle_index_t index,
-                   param_set_t lm, param_set_t ots) {
+                   param_set_t lm, param_set_t ots, int child_level) {
+    hss_set_level(child_level);
     struct seed_derive derive;
     if (!hss_seed_derive_init( &derive, lm, ots, parent_I, parent_seed )) {
         return false;
@@ -168,3 +344,54 @@ enum hss_error_code hss_extra_info_test_error_code( struct hss_extra_info *p ) {
     if (!p) return hss_error_got_null;
     return p->error_code;
 }
+
+/*
+ * This is here to allow the regression tests to make inquiries to part of
+ * the config; what tests run (and how they run) depend, at times, on the
+ * config
+ */
+int hss_is_fault_hardening_on(int type) {
+    switch (type) {
+    case 0:   /* 0 -> is fault hardening on? */
+         return FAULT_RECOMPUTE || FAULT_CACHE_SIG;
+    case 1:   /* 1 -> are with caching sigs (and if so, what's the hash */
+              /*      length that we're using) */
+         return FAULT_CACHE_SIG ? FAULT_CACHE_LEN : 0;
+    default: return 0;
+    }
+}
+
+#if FAULT_CACHE_SIG
+/* Check if a buffer is all-zeros.  Used only if we're storing hashes of */
+/* signatures in the private key */
+bool hss_all_zero( unsigned char *s, size_t len) {
+    while (len--) {
+        if (*s++ != 0)
+            return false;
+    }
+    return true;
+}
+
+/* This hashes a signature (which signs an internal root) into a value that */
+/* is stored in the private key */
+/* The data we're hashing is public; hence we don't bother zeroizing */
+bool hss_compute_hash_for_cache( unsigned char *hash_output,
+                                 const unsigned char *sig, size_t sig_len ) {
+    unsigned char hash[ MAX_HASH ];
+    union hash_context ctx;
+
+    /* Compute the hash.  Since this hash is not externally exposed, we */
+    /* can use a fixed SHA-256 hash */
+    hss_set_hash_reason(h_reason_sig_hash);
+    hss_hash_ctx( hash, HASH_SHA256, &ctx, sig, sig_len );
+
+    /* We use the 'all-zero' value to mean 'this hash hasn't been computed */
+    /* yet'.  If the hash just happens to be that, set one of the bits */
+    if (hss_all_zero( hash, FAULT_CACHE_LEN )) {
+        hash[0] = 0x01;
+    }
+
+    memcpy( hash_output, hash, FAULT_CACHE_LEN );
+    return true;
+}
+#endif
diff --git a/hss.h b/hss.h
index cda5b34..481ff01 100644
--- a/hss.h
+++ b/hss.h
@@ -134,6 +134,17 @@ bool hss_generate_private_key(
  * This assumes that the key has already been generated by
  * hss_generate_private_key
  *
+ * The update_private_key function will be called when the private key is
+ * updated; it is expected to write the private key to secure storage (and the
+ * context pointer is a value that is passed to the update_private_key
+ * function; it can be used to tell the update_private_key function where
+ * in the secure storage to place the key).  And, if it is NULL, the context
+ * is expected to point to a copy of the private_key in RAM.
+ * One distinction is that, on an update, len_private_key will be 8;
+ * the update_private_key can choose to update only the first 8 bytes
+ * of the private key (the rest will be unchanged), or write all
+ * 48 bytes (private_key will point to the full 48 byte value)
+ *
  * memory_target is a value which gives a goal for the amount of memory (in
  * bytes) that this structure should take up.  There are a number of
  * time/memory trade-offs possible; the function uses this parameter as a
@@ -156,6 +167,8 @@ struct hss_working_key;
 struct hss_working_key *hss_load_private_key(
     bool (*read_private_key)(unsigned char *private_key,
             size_t len_private_key, void *context),
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
         void *context,
     size_t memory_target,
     const unsigned char *aux_data, size_t len_aux_data, /* Optional */
@@ -172,17 +185,6 @@ void hss_free_working_key( struct hss_working_key * );
  * working_key is the key that has been allocated by allocate_working_key and
  * initialied by hss_generate_working_key
  *
- * The update_private_key function will be called when the private key is
- * updated; it is expected to write the private key to secure storage (and the
- * context pointer is a value that is passed to the update_private_key
- * function; it can be used to tell the update_private_key function where
- * in the secure storage to place the key).  And, if it is NULL, the context
- * is expected to point to a copy of the private_key in RAM.
- * One distinction is that, on an update, len_private_key will be 8;
- * the update_private_key can choose to update only the first 8 bytes
- * of the private key (the rest will be unchanged), or write all
- * 48 bytes (private_key will point to the full 48 byte value)
- *
  * message, message_len are the message being signed
  *
  * signature is where the signature will be written, with signature_len being
@@ -192,9 +194,6 @@ void hss_free_working_key( struct hss_working_key * );
  */
 bool hss_generate_signature(
     struct hss_working_key *working_key,
-    bool (*update_private_key)(unsigned char *private_key,
-            size_t len_private_key, void *context),
-    void *context,
     const void *message, size_t message_len,
     unsigned char *signature, size_t signature_len,
     struct hss_extra_info *info);
@@ -242,6 +241,10 @@ struct hss_working_key *allocate_working_key(
  * storage, with context being a value passed to that function.
  * If NULL, we assume that the context pointer points to the private key
  *
+ * The update_private_key is a function to write the private key to secure
+ * storage, with context being a value passed to that function.
+ * If NULL, we assume that the context pointer points to the private key
+ *
  * aux_data points to a buffer containing the auxiliary data generated
  * during the key generation process, with len_aux_data being the length
  * of the buffer.  Passing it a NULL means that we're not providing that
@@ -252,6 +255,8 @@ struct hss_working_key *allocate_working_key(
 bool hss_generate_working_key(
     bool (*read_private_key)(unsigned char *private_key,
             size_t len_private_key, void *context),
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
         void *context,
     const unsigned char *aux_data, size_t len_aux_data,  /* Optional */
     struct hss_working_key *working_key,
@@ -269,9 +274,6 @@ bool hss_generate_working_key(
  */
 bool hss_reserve_signature(
     struct hss_working_key *w,
-    bool (*update_private_key)(unsigned char *private_key,
-            size_t len_private_key, void *context),
-    void *context,
     unsigned sigs_to_reserve,
     struct hss_extra_info *info);
 
@@ -297,7 +299,12 @@ bool hss_set_autoreserve(
 size_t hss_get_private_key_len(unsigned levels,
                    const param_set_t *lm_type,
                    const param_set_t *lm_ots_type);
-#define HSS_MAX_PRIVATE_KEY_LEN (8 + 8 + SEED_LEN + 16)
+
+/*
+ * We guarrantee that the private key length will never exceed this
+ * This is a conservative value, which is true for any config.h setting
+ */
+#define HSS_MAX_PRIVATE_KEY_LEN (4 + 8 + 8 + 7*32 + 8 + 8 + SEED_LEN + 16)
 
 /*
  * This include file has the functions that contains the lengths of the other
@@ -345,13 +352,14 @@ bool hss_get_parameter_set( unsigned *levels,
                            param_set_t lm_ots_type[ MAX_HSS_LEVELS ],
                            bool (*read_private_key)(unsigned char *private_key,
                                        size_t len_private_key, void *context),
-                           void *context);
+                           void *context,
+                           struct hss_extra_info *info);
 
 enum hss_error_code {
     hss_error_none = 0,      /* I don't know nothing about any error */
 
-    hss_range_normal_failures, /* There errors happen during normal use */
-                             /* of the signature scheme */
+    hss_range_normal_failures = 100, /* There errors happen during normal */
+                             /* use of the signature scheme */
     hss_error_bad_signature, /* Invalid signature */
     hss_error_private_key_expired, /* This private key has generated all */
                              /* the signatures it is allowed */
@@ -359,7 +367,7 @@ enum hss_error_code {
                              /* because the key couldn't do that many  */
                              /* signatures */
 
-    hss_range_bad_parameters, /* These errors are cause by the */
+    hss_range_bad_parameters = 200, /* These errors are cause by the */
                           /* application passing in a bad parameter */
     hss_error_no_randomness, /* No RNG supplied */
     hss_error_bad_param_set, /* Application asked for an illegal parmaeter */
@@ -369,6 +377,8 @@ enum hss_error_code {
     hss_error_bad_aux,       /* Error with provided aux buffer */
     hss_error_no_private_buffer, /* Application didn't provide a place */
                              /* to put the private key */
+    hss_error_incompatible_functions, /* Wrote set of read/update function */
+                             /* pointers */
     hss_error_incompatible_param_set, /* The parameter set of the working */
                              /* set didn't agree with the private key */
     hss_error_key_uninitialized, /* The working key used had never been */
@@ -379,8 +389,9 @@ enum hss_error_code {
                              /* properly */
     hss_error_ctx_already_used, /* The ctx has already been used */
     hss_error_bad_public_key, /* Somehow, we got an invalid public key */
+    hss_error_bad_private_key, /* Somehow, we got an invalid private key */
 
-    hss_range_processing_error, /* These errors are cause by an */
+    hss_range_processing_error = 300, /* These errors are cause by an */
                              /* error while processing */
     hss_error_bad_randomness, /* The RNG claimed failure */
     hss_error_private_key_write_failed, /* The write of the private key */
@@ -389,10 +400,12 @@ enum hss_error_code {
                              /* from NVRAM failed */
     hss_error_out_of_memory, /* A malloc failure caused us to fail */
 
-    hss_range_my_problem,    /* These are caused by internal errors */
+    hss_range_my_problem = 400, /* These are caused by internal errors */
                              /* within the HSS implementation */
     hss_error_internal,      /* Some internal assertion failed (should */
                              /* never happen) */
+    hss_error_fault_detected, /* Internal doubling checking detected that */
+                             /* a fault has occurred */
 };
 
 /*
@@ -414,4 +427,18 @@ void hss_extra_info_set_threads( struct hss_extra_info *, int );
 bool hss_extra_info_test_last_signature( struct hss_extra_info * );
 enum hss_error_code hss_extra_info_test_error_code( struct hss_extra_info * );
 
+/*
+ * Allow applications (mainly, the fault regression test) check on the
+ * fault hardening flag.
+ * If the parameter is 0, this is checking on any fault hardening
+ * If the paraemter is 1, this is specfically checking on CACHE_SIG 
+ */
+int hss_is_fault_hardening_on(int);
+
+/*
+ * Do a report on any detected memory leaks; will be ignored if
+ * instrumentation is disabled.  Will return TRUE if none were detected
+ */
+bool hss_report_memory_leak(void);
+
 #endif /* HSS_H_ */
diff --git a/hss_alloc.c b/hss_alloc.c
index e85e79c..02641c6 100644
--- a/hss_alloc.c
+++ b/hss_alloc.c
@@ -8,6 +8,7 @@
 #include "hss.h"
 #include "hss_internal.h"
 #include "lm_common.h"
+#include "hss_malloc.h"
 
 #define MALLOC_OVERHEAD  8   /* Our simplistic model about the overhead */
                              /* that malloc takes up is that it adds 8 */
@@ -170,7 +171,7 @@ struct hss_working_key *allocate_working_key(
 signed long initial_mem_target = mem_target; /* DEBUG HACK */
 #endif
 
-    struct hss_working_key *w = malloc( sizeof *w );
+    struct hss_working_key *w = hss_malloc( sizeof *w, mu_working_key );
     if (!w) {
         info->error_code = hss_error_out_of_memory;
         return NULL;
@@ -189,8 +190,12 @@ signed long initial_mem_target = mem_target; /* DEBUG HACK */
         w->signed_pk[i] = NULL;
     }
     for (i=0; i<MAX_HSS_LEVELS; i++) {
-        w->tree[i] = NULL;
+        int redux;
+        for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+             w->tree[redux][i] = NULL;
+        }
     }
+
     w->stack = NULL;
 
     /* Allocate all the memory for the level signatures */
@@ -211,7 +216,7 @@ signed long initial_mem_target = mem_target; /* DEBUG HACK */
 
         w->signed_pk_len[i] = w->siglen[i-1] + pklen;
 
-        w->signed_pk[i] = malloc( w->signed_pk_len[i] );
+        w->signed_pk[i] = hss_malloc( w->signed_pk_len[i], mu_signed_pk );
         if (!w->signed_pk[i]) {
             hss_free_working_key(w);
             info->error_code = hss_error_out_of_memory;
@@ -234,7 +239,6 @@ signed long initial_mem_target = mem_target; /* DEBUG HACK */
     unsigned level_hash[MAX_HSS_LEVELS];
     unsigned level_height[MAX_HSS_LEVELS];
     unsigned hash_size[MAX_HSS_LEVELS];
-    unsigned total_height = 0;
 
     /* Parse the parameter sets */
     for (i=0; i<levels; i++) {
@@ -245,9 +249,6 @@ signed long initial_mem_target = mem_target; /* DEBUG HACK */
             info->error_code = hss_error_bad_param_set;
             return 0;
         }
-
-        total_height += level_height[i];  /* Also track the number of */
-                      /* signatures we can generate with this parm set */
     }
 
     /*
@@ -301,6 +302,13 @@ signed long initial_mem_target = mem_target; /* DEBUG HACK */
         size_t mem = compute_level_memory_usage(i, subtree,
                        level_height[i], hash_size[i], &subtree_levels[i],
                         &stack_used );
+#if FAULT_RECOMPUTE
+        if (i > 0) {
+            /* Non-top levels are replicated; hence double the cost */
+            stack_used *= 2;
+            mem *= 2;
+        }
+#endif
 
         mem_target -= mem;
         stack_usage += stack_used;
@@ -361,6 +369,14 @@ signed long initial_mem_target = mem_target; /* DEBUG HACK */
         signed long mem = compute_level_memory_usage(i, j,
                        level_height[i], hash_size[i], &subtree_levels[i],
                        &stack_used );
+#if FAULT_RECOMPUTE
+        if (levels > 1) {
+            /* If we use more than one level, this bottom level */
+            /* is replicated */
+            stack_used *= 2;
+            mem *= 2;
+        }
+#endif
             /* # of sublevels this would have */
         unsigned sub_levels = (level_height[i] + j - 1) / j;
 
@@ -426,7 +442,7 @@ printf( "Allocation = %ld\n", initial_mem_target - mem_target + best_mem ); /* D
         stack = NULL;   /* Hey!  No stack required */
                         /* Avoid the malloc, as malloc(0) is allowed to fail */
     } else {
-        stack = malloc(stack_usage);
+        stack = hss_malloc(stack_usage, mu_stack);
         if (!stack) {
             hss_free_working_key(w);
             info->error_code = hss_error_out_of_memory;
@@ -441,69 +457,82 @@ printf( "Allocation = %ld\n", initial_mem_target - mem_target + best_mem ); /* D
      * allocations
      */
     for (i = 0; i<levels; i++) {
-        struct merkle_level *tree = malloc( sizeof *tree );
-        if (!tree) { 
-            hss_free_working_key(w);
-            info->error_code = hss_error_out_of_memory;
-            return 0;
-        }
-        unsigned h0 = level_height[i];
-        tree->level = h0;
-        tree->h = level_hash[i];
-        tree->hash_size = hash_size[i];
-        tree->lm_type = lm_type[i];
-        tree->lm_ots_type = lm_ots_type[i];
-        /* We'll initialize current_index from the private key */
-        tree->max_index = (1L << tree->level) - 1;
-        tree->sublevels = subtree_levels[i];
-        tree->subtree_size = subtree_size[i];
-        unsigned top_subtree_size = h0 - (subtree_levels[i]-1)*subtree_size[i];
-        tree->top_subtree_size = top_subtree_size;
-
-        unsigned j, k;
-        for (j=0; j<MAX_SUBLEVELS; j++)
-            for (k=0; k<NUM_SUBTREE; k++)
-                tree->subtree[j][k] = NULL;
-        w->tree[i] = tree;
-
-        unsigned subtree_level = 0;
-        unsigned levels_below = h0;
-        for (j=0; j<subtree_levels[i]; j++) {
-            /* The height of the subtrees at this level  */
-            unsigned height = (j == 0) ? top_subtree_size : subtree_size[i];
-            levels_below -= height;
-
-            for (k=0; k<NUM_SUBTREE; k++) {
-                /* The root subtree doesn't get a 'building subtree' */
-                if (j == 0 && k == BUILDING_TREE) continue;
-                /* The subtrees in the topmost tree don't get a */
-                /* 'next subtree' */
-                if (k == NEXT_TREE && i == 0) continue;
-
-                struct subtree *s = malloc( sizeof *s + hash_size[i] *
-                                               (((size_t)2<<height)-1));
-                if (!s) {
-                    hss_free_working_key(w);
-                    info->error_code = hss_error_out_of_memory;
-                    return 0;
-                }
-
-                s->level = subtree_level;
-                s->levels_below = levels_below;
-                tree->subtree[j][k] = s;
-                if (k == ACTIVE_TREE) {
-                    /* Active trees don't need no stack */
-                    s->stack = NULL;
-                } else if (levels_below == 0) {
-                    /* Bottom level subtrees don't need no stack */
-                    s->stack = NULL;
-                } else {
-                    s->stack = &stack[stack_index];
-                    stack_index += hash_size[i] * levels_below;
+        int redux;
+        for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+#if FAULT_RECOMPUTE
+            if (i == 0 && redux == 1) {
+                /* Special case; we don't have a redundent tree for the */
+                /* top level */
+                w->tree[1][0] = w->tree[0][0];
+                continue;
+            }
+#endif
+            struct merkle_level *tree = hss_malloc( sizeof *tree, mu_tree );
+            if (!tree) { 
+                hss_free_working_key(w);
+                info->error_code = hss_error_out_of_memory;
+                return 0;
+            }
+            unsigned h0 = level_height[i];
+            tree->level = h0;
+            tree->h = level_hash[i];
+            tree->hash_size = hash_size[i];
+            tree->lm_type = lm_type[i];
+            tree->lm_ots_type = lm_ots_type[i];
+            /* We'll initialize current_index from the private key */
+            tree->max_index = (1L << tree->level) - 1;
+            tree->sublevels = subtree_levels[i];
+            tree->subtree_size = subtree_size[i];
+            unsigned top_subtree_size =
+                             h0 - (subtree_levels[i]-1)*subtree_size[i];
+            tree->top_subtree_size = top_subtree_size;
+    
+            unsigned j, k;
+            for (j=0; j<MAX_SUBLEVELS; j++)
+                for (k=0; k<NUM_SUBTREE; k++)
+                    tree->subtree[j][k] = NULL;
+            w->tree[redux][i] = tree;
+    
+            unsigned subtree_level = 0;
+            unsigned levels_below = h0;
+            for (j=0; j<subtree_levels[i]; j++) {
+                /* The height of the subtrees at this level  */
+                unsigned height = (j == 0) ? top_subtree_size :
+                                             subtree_size[i];
+                levels_below -= height;
+    
+                for (k=0; k<NUM_SUBTREE; k++) {
+                    /* The root subtree doesn't get a 'building subtree' */
+                    if (j == 0 && k == BUILDING_TREE) continue;
+                    /* The subtrees in the topmost tree don't get a */
+                    /* 'next subtree' */
+                    if (k == NEXT_TREE && i == 0) continue;
+    
+                    struct subtree *s = hss_malloc( sizeof *s + hash_size[i] *
+                                      (((size_t)2<<height)-1), mu_subtree);
+                    if (!s) {
+                        hss_free_working_key(w);
+                        info->error_code = hss_error_out_of_memory;
+                        return 0;
+                    }
+    
+                    s->level = subtree_level;
+                    s->levels_below = levels_below;
+                    tree->subtree[j][k] = s;
+                    if (k == ACTIVE_TREE) {
+                        /* Active trees don't need no stack */
+                        s->stack = NULL;
+                    } else if (levels_below == 0) {
+                        /* Bottom level subtrees don't need no stack */
+                        s->stack = NULL;
+                    } else {
+                        s->stack = &stack[stack_index];
+                        stack_index += hash_size[i] * levels_below;
+                    }
                 }
+    
+                subtree_level += height;
             }
-
-            subtree_level += height;
         }
     }
 
@@ -515,38 +544,35 @@ printf( "Allocation = %ld\n", initial_mem_target - mem_target + best_mem ); /* D
     }
 /* SANITY CHECK */
 
-    /* Compute the max number of signatures we can generate */
-    if (total_height > 64) total_height = 64; /* (bounded by 2**64) */
-    w->max_count = ((sequence_t)2 << (total_height-1)) - 1; /* height-1 so */
-            /* we don't try to shift by 64, and hit undefined behavior */
-
-        /* We use the count 0xffff..ffff to signify 'we've used up all our */
-        /* signatures'.  Make sure that is above max_count, even for */
-        /* parameter sets that can literally generate 2**64 signatures (by */
-        /* letting them generate only 2**64-1) */
-    if (total_height == 64) w->max_count--;
-
     return w;
 }
 
+static void free_tree(struct merkle_level *tree) { 
+    if (tree) {
+        unsigned j, k;
+        for (j=0; j<MAX_SUBLEVELS; j++)
+            for (k=0; k<3; k++)
+                hss_free(tree->subtree[j][k]);
+        hss_zeroize( tree, sizeof *tree ); /* We have seeds here */
+    }
+    hss_free(tree);
+}
+
 void hss_free_working_key(struct hss_working_key *w) {
     int i;
     if (!w) return;
     for (i=0; i<MAX_HSS_LEVELS; i++) {
-        struct merkle_level *tree = w->tree[i];
-        if (tree) {
-            unsigned j, k;
-            for (j=0; j<MAX_SUBLEVELS; j++)
-                for (k=0; k<3; k++)
-                    free(tree->subtree[j][k]);
-            hss_zeroize( tree, sizeof *tree ); /* We have seeds here */
-        }
-        free(tree);
+        free_tree(w->tree[0][i]);
     }
+#if FAULT_RECOMPUTE
+    for (i=1; i<MAX_HSS_LEVELS; i++) {
+        free_tree(w->tree[1][i]);
+    }
+#endif
     for (i=0; i<MAX_HSS_LEVELS; i++) {
-        free(w->signed_pk[i]);
+        hss_free(w->signed_pk[i]);
     }
-    free(w->stack);
+    hss_free(w->stack);
     hss_zeroize( w, sizeof *w ); /* We have secret information here */
-    free(w);
+    hss_free(w);
 }
diff --git a/hss_aux.c b/hss_aux.c
index 03807d7..40b1053 100644
--- a/hss_aux.c
+++ b/hss_aux.c
@@ -11,6 +11,7 @@
 #include "endian.h"
 #include "hash.h"
 #include "hss_zeroize.h"
+#include "hss_fault.h"
 
 /*
  * The structure of aux data
@@ -143,9 +144,9 @@ struct expanded_aux_data *hss_expand_aux_data( const unsigned char *aux_data,
         /* Now, MAC the entire aux file */
         union hash_context ctx;
         unsigned char key[ MAX_HASH ];
-        compute_seed_derive( key, w->tree[0]->h, w->working_key_seed, &ctx );
+        compute_seed_derive( key, w->tree[0][0]->h, w->working_key_seed, &ctx );
         unsigned char expected_mac[ MAX_HASH ];
-        compute_hmac( expected_mac, w->tree[0]->h, size_hash, &ctx, key,
+        compute_hmac( expected_mac, w->tree[0][0]->h, size_hash, &ctx, key,
                           orig_aux_data, aux_data - orig_aux_data );
         hss_zeroize( key, size_hash );
         hss_zeroize( &ctx, sizeof ctx );
@@ -214,6 +215,8 @@ void hss_save_aux_data( struct expanded_aux_data *data, unsigned level,
  */
 static void compute_seed_derive( unsigned char *result, unsigned hash,
      const unsigned char *seed, union hash_context *ctx) {
+    hss_set_level(0);
+    hss_set_hash_reason(h_reason_other);
     hss_init_hash_context( hash, ctx );
     unsigned char prefix[ DAUX_PREFIX_LEN ];
     memset( prefix, 0, DAUX_D );
@@ -250,6 +253,8 @@ static void compute_hmac( unsigned char *dest,
     unsigned block_size = hss_hash_blocksize(hash);
 
     /* Step 1: first phase of the HMAC */
+    hss_set_level(0);
+    hss_set_hash_reason(h_reason_other);
     hss_init_hash_context( hash, ctx );
     xor_key( key, IPAD, size_hash );
     hss_update_hash_context( hash, ctx, key, size_hash );
@@ -326,7 +331,7 @@ bool hss_extract_aux_data(const struct expanded_aux_data *aux, unsigned level,
     if (!aux) return false;              /* No aux data */
     if (!aux->data[level]) return false; /* We don't have that specific */
                                      /* level saved */
-    unsigned hash_size = w->tree[0]->hash_size;
+    unsigned hash_size = w->tree[0][0]->hash_size;
 
     /* We do have the data; copy it to the destination */
     memcpy( dest,
diff --git a/hss_compute.c b/hss_compute.c
index 553dd21..e378171 100644
--- a/hss_compute.c
+++ b/hss_compute.c
@@ -12,6 +12,7 @@
 #include "lm_ots.h"
 #include "endian.h"
 #include "hss_derive.h"
+#include "hss_fault.h"
 
 /* Count the number of 1 bits at the end (lsbits) of the integer */
 /* Do it in the obvious way; straightline code may be faster (no */
@@ -87,6 +88,7 @@ static enum hss_error_code hss_compute_internal_node( unsigned char *dest,
         /* Hash it to form the leaf node */
         put_bigendian( pub_key + LEAF_R, r, 4);
         union hash_context ctx;
+        hss_set_hash_reason(h_reason_merkle);
         hss_hash_ctx( current_buf, h, &ctx, pub_key, LEAF_LEN(hash_size) );
 
         /* Work up the stack, combining right nodes with the left nodes */
@@ -134,6 +136,7 @@ void hss_combine_internal_nodes( unsigned char *dest,
     memcpy( hash_val + INTR_PK,             left_node,  hash_size );
     memcpy( hash_val + INTR_PK + hash_size, right_node, hash_size );
     union hash_context ctx;
+    hss_set_hash_reason(h_reason_merkle);
     hss_hash_ctx( dest, h, &ctx, hash_val, INTR_LEN(hash_size) );
 }
 
@@ -147,6 +150,8 @@ void hss_gen_intermediate_tree(const void *data,
     unsigned hash_len = hss_hash_length(d->h);
     unsigned i;
 
+    hss_set_level(d->level);
+
     for (i=0; i<d->node_count; i++) {
         unsigned char result[ MAX_HASH ];
         enum hss_error_code status = hss_compute_internal_node( result,
diff --git a/hss_derive.c b/hss_derive.c
index 89a2757..38b1a0c 100644
--- a/hss_derive.c
+++ b/hss_derive.c
@@ -22,6 +22,7 @@
 #include "hss_internal.h"
 #include "hash.h"
 #include "endian.h"
+#include "hss_fault.h"
 #include "config.h"
 
 #if SECRET_METHOD == 2
@@ -93,7 +94,15 @@ void hss_seed_derive( unsigned char *seed, struct seed_derive *derive,
 #else
     int hash = HASH;            /* Use our standard one */
 #endif
-
+    /* Declare the reason we're doing this (for the fault instrumentation */
+    switch (derive->j) {
+    default:
+         hss_set_hash_reason(h_reason_derive); break;
+    case SEED_CHILD_SEED: case SEED_CHILD_I:
+         hss_set_hash_reason(h_reason_derive_iseed); break;
+    case SEED_RANDOMIZER_INDEX:
+         hss_set_hash_reason(h_reason_derive_c); break;
+    }
     hss_hash( seed, hash, buffer, PRG_LEN(SEED_LEN) );
 
     hss_zeroize( buffer, PRG_LEN(SEED_LEN) );
@@ -173,6 +182,7 @@ void hss_seed_derive_set_q( struct seed_derive *derive, merkle_index_t q ) {
     derive->q = q;
     unsigned bits_change = my_log2(change);
     unsigned q_levels = derive->q_levels;
+    hss_set_hash_reason(h_reason_derive);
 
         /* levels_change will be the number of levels of the q-tree we'll */
         /* need to recompute */
@@ -241,6 +251,16 @@ void hss_seed_derive_set_j( struct seed_derive *derive, unsigned j ) {
     unsigned j_levels = derive->j_levels;
     unsigned shift = SECRET_MAX * j_levels;
 
+    /* Declare the reason we're doing this (for the fault instrumentation */
+    switch (j) {
+    default:
+         hss_set_hash_reason(h_reason_derive); break;
+    case SEED_CHILD_SEED: case SEED_CHILD_I:
+         hss_set_hash_reason(h_reason_derive_iseed); break;
+    case SEED_RANDOMIZER_INDEX:
+         hss_set_hash_reason(h_reason_derive_c); break;
+    }
+
     unsigned j_mask = derive->j_mask;
     j &= j_mask-1; /* Set the high-order bit; clear any bits above that */
     j |= j_mask;  /* This ensures that when we do the hashes, that the */
@@ -266,11 +286,18 @@ void hss_seed_derive_set_j( struct seed_derive *derive, unsigned j ) {
 /* (which means incrementally computing that path) */
 void hss_seed_derive( unsigned char *seed, struct seed_derive *derive,
                       bool increment_j ) {
+
     memcpy( seed, derive->j_seed[ derive->j_levels - 1], SEED_LEN );
 
     if (increment_j) {
         int i;
 
+        /* Declare the reason we're doing this */
+        /* Distinguishing between OTS key generation (the 99% case) */
+        /* and stepping to the I value (the 1% case) is annoying */
+        /* so don't bother - the fault test doesn't really need this */
+        hss_set_hash_reason(h_reason_derive);
+
         /* Update the j_values, and figure out which hashes we'll need */
         /* to recompute */
         for (i = derive->j_levels-1;; i--) {
diff --git a/hss_fault.h b/hss_fault.h
new file mode 100644
index 0000000..cca1c1f
--- /dev/null
+++ b/hss_fault.h
@@ -0,0 +1,76 @@
+#if !defined( HSS_FAULT_H_ )
+#define HSS_FAULT_H_
+
+/*
+ * This file defines the interface that we use to tell the hash
+ * instrumentation why we're doing the hash.  The entire reason the
+ * hash code cares is when we need to do fault testing
+ *
+ * That is, it is possible that a hash miscomputation might cause us
+ * to sign two different messages with the same OTS; the whole point
+ * of this exercise is to make sure, with FAULT_RECOMPUTE or FAULT_CACHE_SIG
+ * on, this cannot happen
+ *
+ * This instrumentation code is able to introduce errors at fairly
+ * precise places (e.g. the next level 1 OTS public key generation)
+ * These are routines called by the LMS logic to tell the instrumentation
+ * where we are
+ *
+ * Note: if this instrumentation is on, we probably don't want to use
+ * threading (as this uses globals to communicate with the error injection
+ * code)
+ */
+
+#include "config.h"
+
+#if TEST_INSTRUMENTATION
+
+/*
+ * This informs the instrumentation that the next set of hashes will be done
+ * on the given Merkle level (where 0 == top-most).  For those hashes outside
+ * the hypertree, we just pass a 0
+ */
+void hss_set_level(int);
+
+/*
+ * These are the various reasons we do a hash.  Note that these categories
+ * are assigned with the fault testing logic in mind; hashes that will give
+ * the same basic result (e.g. initial message hash/ots signature generation
+ * and summarization) are all in the same bin
+ * ots_pkgen and ots_sign are in separate bins because we deliberately want
+ * to trigger them separately
+ */
+enum hash_reason {
+   h_reason_ots_pkgen,     /* Generating a OTS public key */
+   h_reason_ots_sign,      /* Signing a message with an OTS private key */
+                           /* also used for initial hash of the message */
+                           /* and signature verification */
+   h_reason_merkle,        /* Performing hashes within the Merkle tree */
+   h_reason_derive,        /* Deriving OTS private keys */
+   h_reason_derive_iseed,  /* Deriving I and seed values */
+   h_reason_derive_c,      /* Deriving message randomizer */
+   h_reason_priv_checksum, /* Computing a private key checksum */
+   h_reason_sig_hash,      /* Computing a hash of a signature (for the */
+                           /* CACHE_SIG logic) */
+   h_reason_other          /* The miscilaneous category; aux file */
+                           /* computations, root seed/I generation */
+                           /* These are ones where a fault is unlikely */
+                           /* to allow a forgery */
+};
+
+/*
+ * This informs the instrumentation of the reason for the next set of hashes
+ */
+void hss_set_hash_reason(enum hash_reason);
+
+#else
+
+/*
+ * If we aren't doing instrumentation, then null out the test calls
+ */
+#define hss_set_level(x)        ((void)0)
+#define hss_set_hash_reason(x)  ((void)0)
+
+#endif
+
+#endif /* HSS_FAULT_H_ */
diff --git a/hss_generate.c b/hss_generate.c
index ca96b8e..17f5a64 100644
--- a/hss_generate.c
+++ b/hss_generate.c
@@ -7,7 +7,7 @@
  * The code is made considerably more complex because we try to take
  * advantage of parallelism.  To do this, we explicitly list the parts
  * of the subtrees we need to build (which is most of the computation), and
- * have different worker threads build the various parts,
+ * have different worker threads build the various parts.
  *
  * However, it turns out that this is sometimes insufficient; sometimes,
  * the work consists of one or two expensive nodes (perhaps the top level
@@ -20,7 +20,7 @@
  * levels below (and have the main thread do the final computation when
  * all the threads are completed).
  *
- * This works out pretty good; however man does add complexity :-(
+ * This works out pretty good; however man does it add complexity :-(
  */
 #include <string.h>
 #include <limits.h>
@@ -32,6 +32,8 @@
 #include "hss_reserve.h"
 #include "lm_ots_common.h"
 #include "endian.h"
+#include "hss_fault.h"
+#include "hss_malloc.h"
 
 #define DO_FLOATING_POINT 1  /* If clear, we avoid floating point operations */
     /* You can turn this off for two reasons: */
@@ -173,6 +175,8 @@ struct init_order {
                                   /* threads do do anything */
                                   /* We may still need to build the */
                                   /* interiors of the subtrees, of course */
+    unsigned char tree_level;     /* What tree level within the hypertree */
+                                  /* we are working on; 0-7 */
 #if DO_FLOATING_POINT
     float cost;                   /* Approximate number of hash compression */
                                   /* operations per node */
@@ -234,6 +238,8 @@ static unsigned my_log2(float f) {
 bool hss_generate_working_key(
     bool (*read_private_key)(unsigned char *private_key,
             size_t len_private_key, void *context),
+    bool (*update_private_key)(unsigned char *private_key,
+            size_t len_private_key, void *context),
         void *context,
     const unsigned char *aux_data, size_t len_aux_data,  /* Optional */
     struct hss_working_key *w,
@@ -248,20 +254,36 @@ bool hss_generate_working_key(
     w->status = hss_error_key_uninitialized; /* In case we detect an */
                                              /* error midway */
 
-    if (!read_private_key && !context) {
-        info->error_code = hss_error_no_private_buffer;
-        return false;
+    /* Error checking */
+    if (!read_private_key) {
+        if (update_private_key) {  /* If we don't have a read routine, we */
+                                   /* must not have a write */
+            info->error_code = hss_error_incompatible_functions;
+            return false;
+         
+        }     
+        if (!context) {  /* If we have neither, we have to have a buffer */
+            info->error_code = hss_error_no_private_buffer;
+            return false;
+        }
+    } else {
+        if (!update_private_key) {  /* If we have a read routine, we must */
+                                    /* have a write */
+            info->error_code = hss_error_incompatible_functions;
+            return false;
+         
+        }
     }
+    w->read_private_key = read_private_key;     
+    w->update_private_key = update_private_key;     
+    w->context = context;     
 
     /* Read the private key */
-    unsigned char private_key[ PRIVATE_KEY_LEN ];
-    if (read_private_key) {
-        if (!read_private_key( private_key, PRIVATE_KEY_LEN, context)) {
-            info->error_code = hss_error_private_key_read_failed;
-            goto failed;
-        }
-    } else {
-        memcpy( private_key, context, PRIVATE_KEY_LEN );
+    unsigned char private_key[ PRIVATE_KEY_LEN(MAX_HSS_LEVELS) ];
+    enum hss_error_code e = hss_read_private_key( private_key, w );
+    if (e != hss_error_none) {
+        info->error_code = e;
+        goto failed;
     }
 
     /*
@@ -274,12 +296,19 @@ bool hss_generate_working_key(
             info->error_code = hss_error_internal;
             goto failed;
         }
-        unsigned char compressed[PRIVATE_KEY_PARAM_SET_LEN];
+        unsigned char compressed[PRIVATE_KEY_PARAM_SET_LEN(MAX_HSS_LEVELS)];
         param_set_t lm_type[MAX_HSS_LEVELS], lm_ots_type[MAX_HSS_LEVELS];
         int i;
         for (i=0; i<w->levels; i++) {
-            lm_type[i] = w->tree[i]->lm_type;
-            lm_ots_type[i] = w->tree[i]->lm_ots_type;
+            lm_type[i] = w->tree[0][i]->lm_type;
+            lm_ots_type[i] = w->tree[0][i]->lm_ots_type;
+#if FAULT_RECOMPUTE
+            if (lm_type[i] != w->tree[1][i]->lm_type ||
+                    lm_ots_type[i] != w->tree[1][i]->lm_ots_type) {
+                info->error_code = hss_error_internal;
+                goto failed;
+            }
+#endif
         }
             
         if (!hss_compress_param_set( compressed, w->levels,
@@ -289,12 +318,33 @@ bool hss_generate_working_key(
             info->error_code = hss_error_internal;
             goto failed;
         }
-        if (0 != memcmp( private_key + PRIVATE_KEY_PARAM_SET, compressed,
-                      PRIVATE_KEY_PARAM_SET_LEN )) {
+        if (0 != memcmp( private_key + PRIVATE_KEY_PARAM_SET(w->levels),
+                      compressed, PRIVATE_KEY_PARAM_SET_LEN(w->levels) )) {
                /* The working set was initiallized with a different parmset */
             info->error_code = hss_error_incompatible_param_set;
             goto failed;
         }
+
+        /* Get the maximum count, both from the parameter set */
+        /* and the private key */
+        sequence_t max_count_parm_set = hss_get_max_seqno( w->levels,
+                                            lm_type );
+        if (max_count_parm_set == 0) {
+               /* We're passed an unsupported param set */
+            info->error_code = hss_error_internal;
+            goto failed;
+        }
+
+        /* Get the maximum count allowed by the private key */
+        sequence_t max_count_key = get_bigendian(
+                    private_key + PRIVATE_KEY_MAX(w->levels),
+                                                  PRIVATE_KEY_MAX_LEN );
+        if (max_count_key > max_count_parm_set) {
+               /* The max from the key cannot exceed the parm set */
+            info->error_code = hss_error_bad_private_key;
+            goto failed;
+        }
+        w->max_count = max_count_key;
     }
 
     sequence_t current_count = get_bigendian(
@@ -305,7 +355,7 @@ bool hss_generate_working_key(
     }
     hss_set_reserve_count(w, current_count);
 
-    memcpy( w->private_key, private_key, PRIVATE_KEY_LEN );
+    memcpy( w->private_key, private_key, PRIVATE_KEY_LEN(w->levels) );
 
     /* Initialize all the levels of the tree */
 
@@ -313,60 +363,86 @@ bool hss_generate_working_key(
     int i;
     sequence_t count = current_count;
     for (i = w->levels - 1; i >= 0 ; i--) {
-        struct merkle_level *tree = w->tree[i];
+        struct merkle_level *tree = w->tree[0][i];
         unsigned index = count & tree->max_index;
         count >>= tree->level;
         tree->current_index = index;
+#if FAULT_RECOMPUTE
+        struct merkle_level *tree_redux = w->tree[1][i];
+        if (tree_redux->max_index != tree->max_index ||
+                tree_redux->level != tree->level) {
+            info->error_code = hss_error_internal;
+            goto failed;
+        }
+        tree_redux->current_index = index;
+#endif
     }
 
     /* Initialize the I values */
     for (i = 0; i < w->levels; i++) {
-        struct merkle_level *tree = w->tree[i];
-
-        /* Initialize the I, I_next elements */
-        if (i == 0) {
-            /* The root seed, I value is derived from the secret key */
-            if (!hss_generate_root_seed_I_value( tree->seed, tree->I,
-                                        private_key+PRIVATE_KEY_SEED,
-                                        tree->lm_type, tree->lm_ots_type )) {
-                info->error_code = hss_error_internal;
-                goto failed;
-            }
-            /* We don't use the I_next value */
-        } else {
-            /* The seed, I is derived from the parent's values */
-
-            /* Where we are in the Merkle tree */
-            struct merkle_level *parent = w->tree[i-1];
-            merkle_index_t index = parent->current_index;
+        int redux;
+        for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+            if (i == 0 && redux == 1) continue;
 
-            if (!hss_generate_child_seed_I_value( tree->seed, tree->I,
-                                             parent->seed,  parent->I,
-                                             index, parent->lm_type,
-                                             parent->lm_ots_type )) {
-                info->error_code = hss_error_internal;
-                goto failed;
-            }
-            /* The next seed, I is derived from either the parent's I */
-            /* or the parent's next value */
-            if (index == tree->max_index) {
-                if (!hss_generate_child_seed_I_value(
-                                            tree->seed_next, tree->I_next,
-                                            parent->seed_next,  parent->I_next,
-                                            0, parent->lm_type, 
-                                            parent->lm_ots_type)) {
+            struct merkle_level *tree = w->tree[redux][i];
+    
+            /* Initialize the I, I_next elements */
+            if (i == 0) {
+                /* The root seed, I value is derived from the secret key */
+                if (!hss_generate_root_seed_I_value( tree->seed, tree->I,
+                                  private_key+PRIVATE_KEY_SEED(w->levels),
+                                  tree->lm_type, tree->lm_ots_type)) {
                     info->error_code = hss_error_internal;
                     goto failed;
                 }
-            } else {
-                if (!hss_generate_child_seed_I_value(
-                                            tree->seed_next, tree->I_next,
-                                            parent->seed,  parent->I,
-                                            index+1, parent->lm_type,
-                                            parent->lm_ots_type)) {
-                    info->error_code = hss_error_internal;
+                /* We don't use the I_next value */
+#if FAULT_RECOMPUTE
+                /*
+                 * Double-check the values we just computed
+                 * Note that a failure here won't actually allow a forgery;
+                 * however it does trigger our fault tests, so we check for
+                 * it anyways; failing here on a fault is harmless
+                 */
+                unsigned char I_redux[I_LEN];
+                unsigned char seed_redux[SEED_LEN];
+                hss_generate_root_seed_I_value( seed_redux, I_redux,
+                                  private_key+PRIVATE_KEY_SEED(w->levels) );
+                int same = (0 == memcmp(tree->I, I_redux, I_LEN ) &&
+                            0 == memcmp(tree->seed, seed_redux, SEED_LEN));
+                hss_zeroize( seed_redux, sizeof seed_redux );
+                if (!same) {
+                    hss_zeroize( seed_redux, sizeof seed_redux );
+                    info->error_code = hss_error_fault_detected;
                     goto failed;
                 }
+#endif
+            } else {
+                /* The seed, I is derived from the parent's values */
+    
+                /* Where we are in the Merkle tree */
+                struct merkle_level *parent = w->tree[redux][i-1];
+                merkle_index_t index = parent->current_index;
+    
+                hss_generate_child_seed_I_value( tree->seed, tree->I,
+                                                 parent->seed,  parent->I,
+                                                 index, parent->lm_type,
+                                                 parent->lm_ots_type, i );
+                /* The next seed, I is derived from either the parent's I */
+                /* or the parent's next value */
+                if (index == tree->max_index) {
+                    hss_generate_child_seed_I_value( tree->seed_next,
+                                                tree->I_next,
+                                                parent->seed_next,
+                                                parent->I_next,
+                                                0, parent->lm_type, 
+                                                parent->lm_ots_type, i);
+                } else {
+                    hss_generate_child_seed_I_value( tree->seed_next,
+                                                tree->I_next,
+                                                parent->seed,  parent->I,
+                                                index+1, parent->lm_type,
+                                                parent->lm_ots_type, i);
+                }
             }
         }
     }
@@ -375,7 +451,7 @@ bool hss_generate_working_key(
     /* viable aux structure */
     struct expanded_aux_data *expanded_aux, temp_aux;
     expanded_aux = hss_expand_aux_data( aux_data, len_aux_data, &temp_aux,
-                                        w->tree[0]->hash_size, w );
+                                        w->tree[0][0]->hash_size, w );
 
     /*
      * Now, build all the subtrees within the tree
@@ -386,199 +462,225 @@ bool hss_generate_working_key(
      */
         /* There are enough structures in this array to handle the maximum */
         /* number of orders we'll ever see */
-    struct init_order order[MAX_HSS_LEVELS * MAX_SUBLEVELS * NUM_SUBTREE];
+    struct init_order order[MAX_HSS_LEVELS * MAX_SUBLEVELS * NUM_SUBTREE *
+                            (1 + FAULT_RECOMPUTE) ];
     struct init_order *p_order = order;
     int count_order = 0;
 
     /* Step through the levels, and for each Merkle tree, compile a list of */
     /* the orders to initialize the bottoms of the subtrees that we'll need */
     for (i = w->levels - 1; i >= 0 ; i--) {
-        struct merkle_level *tree = w->tree[i];
-        unsigned hash_size = tree->hash_size;
-            /* The current count within this tree */
-        merkle_index_t tree_count = tree->current_index;
-            /* The index of the leaf we're on */
-        merkle_index_t leaf_index = tree_count;
-
-        /* Generate the active subtrees */
-        int j;
-        int bot_level_subtree = tree->level;  /* The level of the bottom of */
-                                              /* the subtree */
-        unsigned char *active_prev_node = 0;
-        unsigned char *next_prev_node = 0;
-        for (j=tree->sublevels-1; j>=0; j--) {
-                /* The height of this subtree */
-            int h_subtree = (j == 0) ? tree->top_subtree_size :
-                                       tree->subtree_size;
-
-            /* Initialize the active tree */
-            struct subtree *active = tree->subtree[j][ACTIVE_TREE];
-
-                /* Total number of leaf nodes below this subtree */
-            merkle_index_t size_subtree = (merkle_index_t)1 <<
-                                             (h_subtree + active->levels_below);
-            /* Fill in the leaf index that's on the left side of this subtree */
-                /* This is the index of the leaf that we did when we first */
-                /* entered the active subtree */
-            merkle_index_t left_leaf = leaf_index & ~(size_subtree - 1);
-                /* This is the number of leaves we've done in this subtree */
-            merkle_index_t subtree_count = leaf_index - left_leaf;
-                /* If we're not in the bottom tree, it's possible that the */
-                /* update process will miss the very first update before we */
-                /* need to sign.  To account for that, generate one more */
-                /* node than what our current count would suggest */
-            if (i != w->levels - 1) {
-                subtree_count++;
-            }
-            active->current_index = 0;
-            active->left_leaf = left_leaf;
-            merkle_index_t num_bottom_nodes = (merkle_index_t)1 << h_subtree;
-
-            /* Check if we have aux data at this level */
-            int already_computed_lower = 0;
-            if (i == 0) {
-                merkle_index_t lower_index = num_bottom_nodes-1;
-                merkle_index_t node_offset = active->left_leaf>>active->levels_below;
-                if (hss_extract_aux_data(expanded_aux, active->level+h_subtree,
-                             w, &active->nodes[ hash_size * lower_index ],
-                             node_offset, num_bottom_nodes)) {
-                    /* We do have it precomputed in our aux data */
-                    already_computed_lower = 1;
+        int redux;
+        for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+            if (i == 0 && redux == 1) continue;
+            struct merkle_level *tree = w->tree[redux][i];
+            unsigned hash_size = tree->hash_size;
+                /* The current count within this tree */
+            merkle_index_t tree_count = tree->current_index;
+                /* The index of the leaf we're on */
+            merkle_index_t leaf_index = tree_count;
+    
+            /* Generate the active subtrees */
+            int j;
+            int bot_level_subtree = tree->level;  /* The level of the */
+                                        /* bottom of the subtree */
+            unsigned char *active_prev_node = 0;
+            unsigned char *next_prev_node = 0;
+            for (j=tree->sublevels-1; j>=0; j--) {
+                    /* The height of this subtree */
+                int h_subtree = (j == 0) ? tree->top_subtree_size :
+                                           tree->subtree_size;
+    
+                /* Initialize the active tree */
+                struct subtree *active = tree->subtree[j][ACTIVE_TREE];
+    
+                    /* Total number of leaf nodes below this subtree */
+                merkle_index_t size_subtree = (merkle_index_t)1 <<
+                                         (h_subtree + active->levels_below);
+                /* Fill in the leaf index that's on the left side of this */
+               /* subtree */
+                    /* This is the index of the leaf that we did when we */
+                    /* first entered the active subtree */
+                merkle_index_t left_leaf = leaf_index & ~(size_subtree - 1);
+                    /* This is the number of leaves we've done in this */
+                    /* subtree */
+                merkle_index_t subtree_count = leaf_index - left_leaf;
+                    /* If we're not in the bottom tree, it's possible that */
+                    /* the update process will miss the very first update */
+                    /* before we need to sign.  To account for that, */
+                    /* generate one more node than what our current count */
+                    /* would suggest */
+                if (i != w->levels - 1) {
+                    subtree_count++;
                 }
-            }
-            /* No aux data at this level; schedule the bottom row to be computed */
-            /* Schedule the creation of the entire active tree */
-            p_order->tree = tree;
-            p_order->subtree = active;
-            p_order->count_nodes = (merkle_index_t)1 << h_subtree; /* All */
-                                                /* the nodes in this subtree */
-            p_order->next_tree = 0;
-                /* Mark the root we inherented from the subtree just below us */
-            p_order->prev_node = already_computed_lower ? NULL : active_prev_node;
-            p_order->prev_index = (tree->current_index >> active->levels_below) & (num_bottom_nodes-1);
-
-            p_order->already_computed_lower = already_computed_lower;
-            p_order++; count_order++;
-
-            /* For the next subtree, here's where our root will be */
-            active_prev_node = &active->nodes[0];
-
-            /* And initialize the building tree, assuming there is one, and */
-            /* assuming that the active subtree isn't at the right edge of */
-            /* the Merkle tree */
-            if (j > 0 && (leaf_index + size_subtree <= tree->max_index )) {
-                struct subtree *building = tree->subtree[j][BUILDING_TREE];
-
-                    /* The number of leaves that make up one bottom node */
-                    /* of this subtree */
-                merkle_index_t size_below_tree = (merkle_index_t)1 << building->levels_below;
-                    /* We need to initialize the building tree current index */
-                    /* to a value at least as large as subtree_count */
-                    /* We'd prefer not to have to specificallly initialize */
-                    /* the stack, and so we round up to the next place the */
-                    /* stack is empty */
-                merkle_index_t building_count =
-                              (subtree_count + size_below_tree - 1) &
-                                                    ~(size_below_tree - 1);
-                    /* # of bottom level nodes we've building right now */
-                merkle_index_t num_nodes = building_count >> building->levels_below;
-                building->left_leaf = left_leaf + size_subtree;
-                building->current_index = building_count;
-
-                /* Check if this is already in the aux data */ 
-                already_computed_lower = 0;
+                active->current_index = 0;
+                active->left_leaf = left_leaf;
+                merkle_index_t num_bottom_nodes =
+                                            (merkle_index_t)1 << h_subtree;
+    
+                /* Check if we have aux data at this level */
+                int already_computed_lower = 0;
                 if (i == 0) {
                     merkle_index_t lower_index = num_bottom_nodes-1;
-                    merkle_index_t node_offset = building->left_leaf>>building->levels_below;
-                    if (hss_extract_aux_data(expanded_aux, building->level+h_subtree,
-                             w, &building->nodes[ hash_size * lower_index ],
-                             node_offset, num_nodes)) {
+                    merkle_index_t node_offset =
+                                   active->left_leaf>>active->levels_below;
+                    if (hss_extract_aux_data(expanded_aux,
+                                 active->level+h_subtree,
+                                 w, &active->nodes[ hash_size * lower_index ],
+                                 node_offset, num_bottom_nodes)) {
                         /* We do have it precomputed in our aux data */
                         already_computed_lower = 1;
                     }
                 }
-
-                /* Schedule the creation of the subset of the building tree */
+                /* No aux data at this level; schedule the bottom row to be */
+                /* computed. */
+                /* Schedule the creation of the entire active tree */
                 p_order->tree = tree;
-                p_order->subtree = building;
-                    /* # of nodes to construct */
-                p_order->count_nodes = num_nodes;
+                p_order->subtree = active;
+                p_order->tree_level = i;
+                p_order->count_nodes = (merkle_index_t)1 << h_subtree;
+                                        /* All *the nodes in this subtree */
                 p_order->next_tree = 0;
-                    /* We generally can't use the prev_node optimization */
-                p_order->prev_node = NULL;
-                p_order->prev_index = 0;
 
+                /* Mark the root we inhereted from the subtree just below us */
+                p_order->prev_node = already_computed_lower ? NULL : active_prev_node;
+                p_order->prev_index = (tree->current_index >>
+                                 active->levels_below) & (num_bottom_nodes-1);
+    
                 p_order->already_computed_lower = already_computed_lower;
                 p_order++; count_order++;
-            } else if (j > 0) {
-                tree->subtree[j][BUILDING_TREE]->current_index = 0;
-            }
-
-            /* And the NEXT_TREE (which is always left-aligned) */
-            if (i > 0) {
-                struct subtree *next = tree->subtree[j][NEXT_TREE];
-                next->left_leaf = 0;
-                merkle_index_t leaf_size =
-                                     (merkle_index_t)1 << next->levels_below;
-
-                merkle_index_t next_index = tree_count;
-                /* If we're not in the bottom tree, it's possible that the */
-                /* update process will miss the very first update before we */
-                /* need to sign.  To account for that, potetially generate */
-                /* one more node than what our current count would suggest */
-                if (i != w->levels - 1) {
-                    next_index++;
-                }
-
-                /* Make next_index the # of leaves we'll need to process to */
-                /* forward this NEXT subtree to this state */
-                next_index = (next_index + leaf_size - 1)/leaf_size;
-
-                    /* This is set if we have a previous subtree */
-                merkle_index_t prev_subtree = (next->levels_below ? 1 : 0);
-                merkle_index_t num_nodes;
-                unsigned char *next_next_node = 0;
-
-                /* If next_index == 1, then if we're on a nonbottom subtree */
-                /* the previous subtree is still building (and so we */
-                /* needn't do anything).  The exception is if we're on the */
-                /* bottom level, then there is no subtree, and so we still */
-                /* need to build the initial left leaf */
-                if (next_index <= prev_subtree) {
-                    /* We're not started on this subtree yet */
-                    next->current_index = 0;
-                    num_nodes = 0;
-                } else if (next_index < num_bottom_nodes) {
-                    /* We're in the middle of building this tree */
-                    next->current_index = next_index << next->levels_below;
-                    num_nodes = next_index;
-                } else {
-                    /* We've completed building this tree */
-                        /* How we note "we've generated this entire subtree" */
-                    next->current_index = MAX_SUBINDEX;
-                    num_nodes = num_bottom_nodes;
-                        /* We've generated this entire tree; allow it to */
-                        /* be inhereited for the next one */
-                    next_next_node = &next->nodes[0];
-                }
-                if (num_nodes > 0) {
-                    /* Schedule the creation of these nodes */
+    
+                /* For the next subtree, here's where our root will be */
+                active_prev_node = &active->nodes[0];
+    
+                /* And initialize the building tree, assuming there is one, */
+                /* and  assuming that the active subtree isn't at the right */
+                /* edge of the Merkle tree */
+                if (j > 0 && (leaf_index + size_subtree <= tree->max_index )) {
+                    struct subtree *building = tree->subtree[j][BUILDING_TREE];
+    
+                        /* The number of leaves that make up one bottom node */
+                        /* of this subtree */
+                    merkle_index_t size_below_tree =
+                                (merkle_index_t)1 << building->levels_below;
+                        /* We need to initialize the building tree current */
+                        /* index to a value at least as large as */
+                        /* subtree_count */
+                        /* We'd prefer not to have to specificallly */
+                        /* initialize the stack, and so we round up to the */
+                        /* next place the stack is empty */
+                    merkle_index_t building_count =
+                                  (subtree_count + size_below_tree - 1) &
+                                                        ~(size_below_tree - 1);
+                        /* # of bottom level nodes we've building right now */
+                    merkle_index_t num_nodes =
+                                     building_count >> building->levels_below;
+                    building->left_leaf = left_leaf + size_subtree;
+                    building->current_index = building_count;
+    
+                    /* Check if this is already in the aux data */ 
+                    already_computed_lower = 0;
+                    if (i == 0) {
+                        merkle_index_t lower_index = num_bottom_nodes-1;
+                        merkle_index_t node_offset =
+                                  building->left_leaf>>building->levels_below;
+                        if (hss_extract_aux_data(expanded_aux,
+                                 building->level+h_subtree,
+                                 w, &building->nodes[ hash_size * lower_index ],
+                                 node_offset, num_nodes)) {
+                            /* We do have it precomputed in our aux data */
+                            already_computed_lower = 1;
+                        }
+                    }
+    
+                    /* Schedule the creation of the subset of the building */
+                    /* tree */
                     p_order->tree = tree;
-                    p_order->subtree = next;
+                    p_order->subtree = building;
+                    p_order->tree_level = i;
                         /* # of nodes to construct */
                     p_order->count_nodes = num_nodes;
-                    p_order->next_tree = 1;
-                    p_order->prev_node = next_prev_node;
+                    p_order->next_tree = 0;
+                        /* We generally can't use the prev_node optimization */
+                    p_order->prev_node = NULL;
                     p_order->prev_index = 0;
-
-                    p_order->already_computed_lower = 0;
+    
+                    p_order->already_computed_lower = already_computed_lower;
                     p_order++; count_order++;
+                } else if (j > 0) {
+                    tree->subtree[j][BUILDING_TREE]->current_index = 0;
                 }
-                next_prev_node = next_next_node;
-            }
+    
+                /* And the NEXT_TREE (which is always left-aligned) */
+                if (i > 0) {
+                    struct subtree *next = tree->subtree[j][NEXT_TREE];
+                    next->left_leaf = 0;
+                    merkle_index_t leaf_size =
+                                     (merkle_index_t)1 << next->levels_below;
+    
+                    merkle_index_t next_index = tree_count;
+                    /* If we're not in the bottom tree, it's possible that */
+                    /* the update process will miss the very first update */
+                    /* before we need to sign.  To account for that, */
+                    /* potentially generate one more node than what our */
+                    /* current count would suggest */
+                    if (i != w->levels - 1) {
+                        next_index++;
+                    }
+    
+                    /* Make next_index the # of leaves we'll need to */
+                    /* process to forward this NEXT subtree to this state */
+                    next_index = (next_index + leaf_size - 1)/leaf_size;
+    
+                        /* This is set if we have a previous subtree */
+                    merkle_index_t prev_subtree = (next->levels_below ? 1 : 0);
+                    merkle_index_t num_nodes;
+                    unsigned char *next_next_node = 0;
+    
+                    /* If next_index == 1, then if we're on a nonbottom */
+                    /* subtree the previous subtree is still building (and */
+                    /* so we needn't do anything).  The exception is if */
+                    /* we're on the bottom level, then there is no */
+                    /* subtree, and so we still need to build the initial */
+                    /* left leaf */
+                    if (next_index <= prev_subtree) {
+                        /* We're not started on this subtree yet */
+                        next->current_index = 0;
+                        num_nodes = 0;
+                    } else if (next_index < num_bottom_nodes) {
+                        /* We're in the middle of building this tree */
+                        next->current_index = next_index << next->levels_below;
+                        num_nodes = next_index;
+                    } else {
+                        /* We've completed building this tree */
 
-            bot_level_subtree -= h_subtree;
-         }
+                        /* How we note "we've generated this entire subtree" */
+                        next->current_index = MAX_SUBINDEX;
+                        num_nodes = num_bottom_nodes;
+                            /* We've generated this entire tree; allow it to */
+                            /* be inhereited for the next one */
+                        next_next_node = &next->nodes[0];
+                    }
+                    if (num_nodes > 0) {
+                        /* Schedule the creation of these nodes */
+                        p_order->tree = tree;
+                        p_order->subtree = next;
+                        p_order->tree_level = i;
+                            /* # of nodes to construct */
+                        p_order->count_nodes = num_nodes;
+                        p_order->next_tree = 1;
+                        p_order->prev_node = next_prev_node;
+                        p_order->prev_index = 0;
+    
+                        p_order->already_computed_lower = 0;
+                        p_order++; count_order++;
+                    }
+                    next_prev_node = next_next_node;
+                }
+    
+                bot_level_subtree -= h_subtree;
+             }
+        }
     }
 
 #if DO_FLOATING_POINT
@@ -668,7 +770,8 @@ bool hss_generate_working_key(
         size_t total_hash = (hash_len * count_nodes) << subdiv;
         unsigned h_subtree = (subtree->level == 0) ? tree->top_subtree_size :
                                                      tree->subtree_size;
-        struct sub_order *sub = malloc( sizeof *sub + total_hash );
+        struct sub_order *sub = hss_malloc( sizeof *sub + total_hash,
+                                                               mu_suborder );
         if (!sub) continue;  /* On malloc failure, don't bother trying */
                              /* to subdivide */
 
@@ -746,6 +849,7 @@ bool hss_generate_working_key(
         detail.tree_height = tree->level;
         detail.I = (p_order->next_tree ? tree->I_next : tree->I);
         detail.got_error = &got_error;
+        detail.level = p_order->tree_level;
 
 #if DO_FLOATING_POINT
         /* Check if we're actually doing a suborder */
@@ -807,7 +911,7 @@ bool hss_generate_working_key(
 #if DO_FLOATING_POINT
             /* Don't leak suborders on an intermediate error */
         for (i=0; i<count_order; i++) {
-            free( order[i].sub );
+            hss_free( order[i].sub );
         }
 #endif
         info->error_code = got_error;
@@ -831,6 +935,7 @@ bool hss_generate_working_key(
         unsigned h_subtree = (subtree->level == 0) ? tree->top_subtree_size :
                                                      tree->subtree_size;
         merkle_index_t lower_index = ((merkle_index_t)1 << h_subtree) - 1;
+        hss_set_level(p_order->tree_level);
 
         int n;
         for (n = 0; n < p_order->count_nodes; n++ ) {
@@ -842,7 +947,7 @@ bool hss_generate_working_key(
                           hash_size, tree->h, I);
         }
 
-        free( sub );
+        hss_free( sub );
         p_order->sub = 0;
     }
 #endif
@@ -861,6 +966,7 @@ bool hss_generate_working_key(
         const struct merkle_level *tree = p_order->tree;
         const unsigned char *I = (p_order->next_tree ? tree->I_next : tree->I);
         struct subtree *subtree = p_order->subtree;
+        hss_set_level(p_order->tree_level);
 
         if (p_order->prev_node) {
             /* This subtree did have a bottom node that was the root node */
@@ -889,13 +995,63 @@ bool hss_generate_working_key(
      * Again, we could parallelize this; it's also fast enough not to be worth
      * the complexity
      */
+#if FAULT_CACHE_SIG
+    int num_updated_caches = 0;
+#endif
     for (i = 1; i < w->levels; i++) {
+        hss_set_level(i-1);
         if (!hss_create_signed_public_key( w->signed_pk[i], w->siglen[i-1],
-                                       w->tree[i], w->tree[i-1], w )) {
+                                       w->tree[0][i], w->tree[0][i-1], w )) {
             info->error_code = hss_error_internal; /* Really shouldn't */
                                                    /* happen */
             goto failed;
         }
+#if FAULT_RECOMPUTE
+        /* Now double check the signature we just made */
+        if (!hss_doublecheck_signed_public_key( w->signed_pk[i],
+                                       w->siglen[i-1],
+                                       w->tree[1][i], w->tree[1][i-1], w )) {
+            info->error_code = hss_error_fault_detected;
+            goto failed;
+        }
+#endif
+#if FAULT_CACHE_SIG
+        /* Check if the signature is the same as what we generated last time */
+        {
+            int sig_index = (w->levels - i - 1);
+            unsigned char *sig_cache = &w->private_key[
+                         PRIVATE_KEY_SIG_CACHE + sig_index * FAULT_CACHE_LEN ];
+            unsigned char sig_hash[ MAX_HASH ];
+            hss_set_level(i-1);
+            if (!hss_compute_hash_for_cache( sig_hash, w->signed_pk[i],
+                                              w->siglen[i-1] )) {
+                info->error_code = hss_error_internal; /* Really shouldn't */
+                                                       /* happen */
+                goto failed;
+            }
+            if (hss_all_zero( sig_cache, FAULT_CACHE_LEN )) {
+                /* We've never computed this signature before; store it */
+                memcpy( sig_cache, sig_hash, FAULT_CACHE_LEN );
+                    /* Remember to update the NVRAM */
+                if (num_updated_caches < sig_index+1) {
+                    num_updated_caches = sig_index + 1;
+                }
+            } else if (0 != memcmp( sig_cache, sig_hash, FAULT_CACHE_LEN )) {
+                /* The siganture does't match what we did before - error */
+                info->error_code = hss_error_fault_detected;
+                goto failed;
+            }
+        }
+#endif
+        /* We generated a signature, mark it from the parent */
+        hss_step_tree( w->tree[0][i-1] );
+
+#if FAULT_RECOMPUTE
+        /* Also mark it from the redundent tree (if it's not top-level) */
+        if (i > 1) {
+            hss_step_tree( w->tree[1][i-1] );
+        }
+#endif
     }
     hss_zeroize( private_key, sizeof private_key );
 
@@ -904,8 +1060,27 @@ bool hss_generate_working_key(
      * initialized them as already having the first update)
      */
     for (i = 0; i < w->levels - 1; i++) {
-        w->tree[i]->update_count = UPDATE_DONE;
+        int redux;
+        for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+            w->tree[redux][i]->update_count = UPDATE_DONE;
+        }
+    }
+
+#if FAULT_CACHE_SIG
+    /*
+     * Check if we computed any signatures for the first time; if so, then
+     * we'll need to save those in NVRAM (so they'll be available the next
+     * time we compute them)
+     */
+    if (num_updated_caches > 0) {
+        enum hss_error_code e = hss_write_private_key( 
+                             w->private_key, w, num_updated_caches );
+        if (e != hss_error_none) {
+            info->error_code = e;
+            goto failed;
+        }
     }
+#endif   
 
     w->status = hss_error_none; /* This working key has been officially */
                                 /* initialized, and now can be used */
@@ -913,6 +1088,19 @@ bool hss_generate_working_key(
 
 failed:
     hss_zeroize( private_key, sizeof private_key );
+
+    /* Clear out any seeds we may have placed in the Merkle trees */
+    for (i = 0; i < MAX_HSS_LEVELS; i++) {
+        int redux;
+        for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+            struct merkle_level *tree = w->tree[redux][i];
+            if (tree) {
+                hss_zeroize(tree->seed, sizeof tree->seed);
+                hss_zeroize(tree->seed_next, sizeof tree->seed_next);
+            }
+        }
+    }
+
     return false;
 }
 
diff --git a/hss_internal.h b/hss_internal.h
index 6efc1ba..34e8ed3 100644
--- a/hss_internal.h
+++ b/hss_internal.h
@@ -15,26 +15,59 @@
                                   /* lm_type and the lm_ots type for a */
                                   /* single level into 1 byte */
 
-#define PARM_SET_END 0xff   /* We set this marker in the parameter set */
-                            /* when fewer than the maximum levels are used */
-
-
 /*
  * The internal structure of a private key
  */
-#define PRIVATE_KEY_INDEX 0
+#define PRIVATE_KEY_FORMAT 0    /* 4 byte description of the key format */
+#define PRIVATE_KEY_FORMAT_NUM_LEVEL 3
+#define PRIVATE_KEY_FORMAT_LEN 4
+#define PRIVATE_KEY_INDEX (PRIVATE_KEY_FORMAT + PRIVATE_KEY_FORMAT_LEN)
 #define PRIVATE_KEY_INDEX_LEN 8  /* 2**64 signatures should be enough for */
                                  /* everyone */
-#define PRIVATE_KEY_PARAM_SET (PRIVATE_KEY_INDEX + PRIVATE_KEY_INDEX_LEN)
-#define PRIVATE_KEY_PARAM_SET_LEN (PARAM_SET_COMPRESS_LEN * MAX_HSS_LEVELS)
-#define PRIVATE_KEY_SEED (PRIVATE_KEY_PARAM_SET + PRIVATE_KEY_PARAM_SET_LEN)
+#define PRIVATE_KEY_CHECKSUM (PRIVATE_KEY_INDEX + PRIVATE_KEY_INDEX_LEN)
+#define PRIVATE_KEY_CHECKSUM_LEN 8
+#if FAULT_CACHE_SIG
+#define PRIVATE_KEY_SIG_CACHE (PRIVATE_KEY_CHECKSUM + PRIVATE_KEY_CHECKSUM_LEN)
+#define PRIVATE_KEY_SIG_CACHE_LEN(levels) ((levels-1) * FAULT_CACHE_LEN)
+#define PRIVATE_KEY_END_WRITABLE(levels) (PRIVATE_KEY_SIG_CACHE + \
+                                    PRIVATE_KEY_SIG_CACHE_LEN(levels)) 
+#else
+#define PRIVATE_KEY_END_WRITABLE(levels) (PRIVATE_KEY_CHECKSUM + \
+                PRIVATE_KEY_CHECKSUM_LEN ) 
+#endif
+/* PRIVATE_KEY_END_WRITABLE is the end of the part of the private key */
+/* that is dynamically written as the key is used */
+#define PRIVATE_KEY_MAX(levels)   PRIVATE_KEY_END_WRITABLE(levels)
+#define PRIVATE_KEY_MAX_LEN 8
+#define PRIVATE_KEY_PARAM_SET(levels) (PRIVATE_KEY_MAX(levels) + \
+                                          PRIVATE_KEY_MAX_LEN)
+#define PRIVATE_KEY_PARAM_SET_LEN(levels) (PARAM_SET_COMPRESS_LEN * levels)
+#define PRIVATE_KEY_SEED(levels) (PRIVATE_KEY_PARAM_SET(levels) + \
+                                  PRIVATE_KEY_PARAM_SET_LEN(levels))
 #if SECRET_METHOD == 2
 #define PRIVATE_KEY_SEED_LEN (SEED_LEN + I_LEN)
 #else
 #define PRIVATE_KEY_SEED_LEN SEED_LEN
 #endif
-#define PRIVATE_KEY_LEN (PRIVATE_KEY_SEED + PRIVATE_KEY_SEED_LEN) /* That's */
-                                                                /* 48 bytes */
+#define PRIVATE_KEY_LEN(levels) (PRIVATE_KEY_SEED(levels) + \
+                        PRIVATE_KEY_SEED_LEN) /* That's 60 bytes, plus */
+                                        /* FAULT_CACHE_LEN+1 per level */
+/*
+ * Routines to read/update the private key
+ */
+enum hss_error_code hss_read_private_key(unsigned char *private_key,
+            struct hss_working_key *w);
+enum hss_error_code hss_write_private_key(unsigned char *private_key,
+            struct hss_working_key *w, int num_cache_sig);
+enum hss_error_code hss_write_private_key_no_w(
+            unsigned char *private_key, size_t len,
+            bool (*read_private_key)(unsigned char *private_key,
+                                    size_t len_private_key, void *context),
+            bool (*update_private_key)(unsigned char *private_key,
+                                    size_t len_private_key, void *context),
+            void *context);
+bool hss_check_private_key(const unsigned char *private_key);
+void hss_set_private_key_format(unsigned char *private_key, int levels);
 
 struct merkle_level;
 struct hss_working_key {
@@ -47,19 +80,28 @@ struct hss_working_key {
                                   /* Will be higher than the 'current count' */
                                   /* if some signaures are 'reserved' */
     sequence_t max_count;         /* The maximum count we can ever have */
+                                  /* (from the parameter set) */
     unsigned autoreserve;         /* How many signatures to attempt to */
                                   /* reserve if the signing process hits */
                                   /* the end of the current reservation */
 
+    bool (*read_private_key)(     /* Function to read the private key */
+            unsigned char *private_key,
+            size_t len_private_key, void *context);
+    bool (*update_private_key)(   /* Function to write the private key */
+            unsigned char *private_key,
+            size_t len_private_key, void *context);
+    void *context;                /* Context pointer for the above two */
+
     size_t signature_len;         /* The length of the HSS signature */
 
     unsigned char *stack;         /* The stack memory used by the subtrees */
 
         /* The private key (in its entirety) */
-    unsigned char private_key[PRIVATE_KEY_LEN];
+    unsigned char private_key[PRIVATE_KEY_LEN(MAX_HSS_LEVELS)];
         /* The pointer to the seed (contained within the private key) */
         /* Warning: nonsyntaxic macro; need to be careful how we use this */
-#define working_key_seed private_key + PRIVATE_KEY_SEED
+#define working_key_seed private_key + PRIVATE_KEY_SEED(w->levels)
 
     size_t siglen[MAX_HSS_LEVELS]; /* The lengths of the signatures */
                                   /* generated by the various levels */
@@ -71,8 +113,13 @@ struct hss_working_key {
                                   /* current root value, signed by the */
                                   /* previous level.  Unused for the */
                                   /* topmost level */
-    struct merkle_level *tree[MAX_HSS_LEVELS]; /* The structures that manage */
-                                  /* each individual level */
+    struct merkle_level *tree[FAULT_RECOMPUTE+1][MAX_HSS_LEVELS]; /* The */
+                                  /* structures that manage each individual */
+                                  /* level.  The [1] versions are redundant */
+                                  /* copies used to double check */
+                                  /* Note: tree[1][0] == tree[0][0] */
+                                  /* Because errors in the top level tree */
+                                  /* don't allow forgeries */
 };
 
 #define MIN_SUBTREE    2  /* All subtrees (other than the root subtree) have */
@@ -172,6 +219,9 @@ bool hss_compress_param_set( unsigned char *compressed,
                    const param_set_t *lm_ots_type,
                    size_t len_compressed );
 
+/* Internal function to compute the maximum number of seqno for a parameter set */
+sequence_t hss_get_max_seqno( int levels, const param_set_t *lm_type );
+
 /* Internal function to generate the root seed, I value (based on the */
 /* private seed).  We do this (rather than selecting them  at random) so */
 /* that we don't need to store them in our private key; we can recompute */
@@ -185,7 +235,8 @@ bool hss_generate_root_seed_I_value(unsigned char *seed, unsigned char *I,
 bool hss_generate_child_seed_I_value( unsigned char *seed, unsigned char *I,
                    const unsigned char *parent_seed,
                    const unsigned char *parent_I, merkle_index_t index,
-                   param_set_t parent_lm, param_set_t parent_ots );
+                   param_set_t parent_lm, param_set_t parent_ots,
+                   int child_level );
 
 /* Combine two internal nodes */
 void hss_combine_internal_nodes( unsigned char *dest,
@@ -198,6 +249,15 @@ bool hss_create_signed_public_key(unsigned char *signed_key,
                                     struct merkle_level *tree,
                                     struct merkle_level *parent,
                                     struct hss_working_key *w);
+#if FAULT_RECOMPUTE
+bool hss_doublecheck_signed_public_key(const unsigned char *signed_key,
+                                    size_t len_signature,
+                                    struct merkle_level *tree,
+                                    struct merkle_level *parent,
+                                    struct hss_working_key *w);
+#endif
+/* This needs to be called after we've generated a signature */
+void hss_step_tree(struct merkle_level *tree);
 
 /* Used to generate the bottom nodes of a subtree in parallel */
 struct intermed_tree_detail {
@@ -211,6 +271,7 @@ struct intermed_tree_detail {
     const unsigned char *I;
     unsigned node_count;
     enum hss_error_code *got_error;
+    int level;         /* Which Merkle tree within the hypertree */
 };
 struct thread_collection;
 void hss_gen_intermediate_tree(const void *data,
@@ -233,6 +294,7 @@ struct verify_detail {
     size_t message_len;
     const unsigned char *signature;
     size_t signature_len;
+    int tree_level;
 };
 void validate_internal_sig(const void *data,
                                struct thread_collection *col);
@@ -241,4 +303,8 @@ struct seed_derive;
 void lm_ots_generate_randomizer(unsigned char *c, unsigned n,
                                 struct seed_derive *seed);
 
+bool hss_all_zero( unsigned char *s, size_t len);
+bool hss_compute_hash_for_cache( unsigned char *hash_output,
+                                 const unsigned char *sig, size_t sig_len );
+
 #endif /* HSS_INTERNAL_H_ */
diff --git a/hss_keygen.c b/hss_keygen.c
index d36f7dd..bb09be4 100644
--- a/hss_keygen.c
+++ b/hss_keygen.c
@@ -9,6 +9,7 @@
 #include "hss_thread.h"
 #include "lm_common.h"
 #include "lm_ots_common.h"
+#include "hss_fault.h"
 
 /* Count the number of 1 bits at the end (lsbits) of the integer */
 /* Do it in the obvious way; straightline code may be faster (no */
@@ -107,39 +108,52 @@ bool hss_generate_private_key(
         return false;
     }
 
-    unsigned char private_key[ PRIVATE_KEY_LEN ];
+    unsigned char private_key[ PRIVATE_KEY_LEN(MAX_HSS_LEVELS) ];
 
         /* First step: format the private key */
+    hss_set_private_key_format( private_key, levels );
     put_bigendian( private_key + PRIVATE_KEY_INDEX, 0,
                    PRIVATE_KEY_INDEX_LEN );
-    if (!hss_compress_param_set( private_key + PRIVATE_KEY_PARAM_SET,
+#if FAULT_CACHE_SIG
+        /* Mark all signatures as "not computed yet" */
+    memset( private_key + PRIVATE_KEY_SIG_CACHE, 0,
+            PRIVATE_KEY_SIG_CACHE_LEN(levels) );
+#endif
+    if (!hss_compress_param_set( private_key + PRIVATE_KEY_PARAM_SET(levels),
                    levels, lm_type, lm_ots_type,
-                   PRIVATE_KEY_PARAM_SET_LEN )) {
+                   PRIVATE_KEY_PARAM_SET_LEN(levels) )) {
         info->error_code = hss_error_bad_param_set;
         return false;
     }
-    if (!(*generate_random)( private_key + PRIVATE_KEY_SEED,
+        /* Fill in the maximum seqno */
+    sequence_t max_seqno = hss_get_max_seqno( levels, lm_type );
+    if (max_seqno == 0) {
+        info->error_code = hss_error_bad_param_set;
+        return false;
+    }
+    put_bigendian( private_key + PRIVATE_KEY_MAX(levels), max_seqno,
+                   PRIVATE_KEY_MAX_LEN );
+
+        /* Pick the random seed */
+    if (!(*generate_random)( private_key + PRIVATE_KEY_SEED(levels),
                    PRIVATE_KEY_SEED_LEN )) {
         info->error_code = hss_error_bad_randomness;
         return false;
     }
 
         /* Now make sure that the private key is written to NVRAM */
-    if (update_private_key) {
-        if (!(*update_private_key)( private_key, PRIVATE_KEY_LEN, context)) {
-            /* initial write of private key didn't take */
-            info->error_code = hss_error_private_key_write_failed;
-            hss_zeroize( private_key, sizeof private_key );
-            return false;
-        }
-    } else {
-        if (context == 0) {
-            /* We weren't given anywhere to place the private key */
-            info->error_code = hss_error_no_private_buffer;
-            hss_zeroize( private_key, sizeof private_key );
-            return false;
-        }
-        memcpy( context, private_key, PRIVATE_KEY_LEN );
+    if (!update_private_key && !context) {
+        /* We weren't given anywhere to place the private key */
+        info->error_code = hss_error_no_private_buffer;
+        hss_zeroize( private_key, sizeof private_key );
+        return false;
+    }
+    enum hss_error_code e = hss_write_private_key_no_w( private_key,
+                  PRIVATE_KEY_LEN(levels), 0, update_private_key, context );
+    if (e != hss_error_none) {
+        info->error_code = e;
+        hss_zeroize( private_key, sizeof private_key );
+        return false;
     }
 
     /* Figure out what would be the best trade-off for the aux level */
@@ -156,7 +170,7 @@ bool hss_generate_private_key(
 
     unsigned char I[I_LEN];
     unsigned char seed[SEED_LEN];
-    if (!hss_generate_root_seed_I_value( seed, I, private_key+PRIVATE_KEY_SEED,
+    if (!hss_generate_root_seed_I_value( seed, I, private_key+PRIVATE_KEY_SEED(levels),
                                     lm_type[0], lm_ots_type[0])) {
         info->error_code = hss_error_internal;
         hss_zeroize( private_key, sizeof private_key );
@@ -171,39 +185,30 @@ bool hss_generate_private_key(
     /* appears in the aux data, and 4*log2 of the number of core we have */
     unsigned num_cores = hss_thread_num_tracks(info->num_threads);
     unsigned level;
-    unsigned char *dest = 0;  /* The area we actually write to */
-    void *temp_buffer = 0;  /* The buffer we need to free when done */
-    for (level = h0-1; level > 2; level--) {
+    for (level = h0-1; level > 0; level--) {
             /* If our bottom-most aux data is at this level, we want it */
-        if (expanded_aux_data && expanded_aux_data->data[level]) {
-                /* Write directly into the aux area */
-            dest = expanded_aux_data->data[level];
-            break;
-        }
+        if (expanded_aux_data && expanded_aux_data->data[level]) break;
 
             /* If going to a higher levels would mean that we wouldn't */
             /* effectively use all the cores we have, use this level */ 
-        if ((1<<level) < 4*num_cores) {
-                /* We'll write into a temp area; malloc the space */
-            size_t temp_buffer_size = (size_t)size_hash << level;
-            temp_buffer = malloc(temp_buffer_size);
-            if (!temp_buffer) {
-                /* Couldn't malloc it; try again with s smaller buffer */
-                continue;
-            }
-                /* Use this buffer */
-            dest = temp_buffer;
-            break;
-        }
+        if ((1<<level) < 4*num_cores) break;
     }
 
-    /* Worse comes the worse, if we can't malloc anything, use a */
-    /* small backup buffer */
-    unsigned char worse_case_buffer[ 4*MAX_HASH ];
-    if (!dest) {
-        dest = worse_case_buffer;
-        /* level == 2 if we reach here, so the buffer is big enough */
+    /* Get the buffer where our parallel process is going to write into */
+    /* We'll either use the aux data itself, or a temp buffer */
+    unsigned temp_buffer_size;
+    unsigned char *dest;
+    if (expanded_aux_data && expanded_aux_data->data[level]) {
+        /* We're going directly into the aux data */
+        dest = expanded_aux_data->data[level];
+        temp_buffer_size = 1;  /* We're not using the temp buffer */
+     } else {
+        /* We're going into the temp buffer */
+        dest = 0;
+        temp_buffer_size = (size_t)size_hash << level;
     }
+    unsigned char temp_buffer[ temp_buffer_size ];
+    if (!dest) dest = temp_buffer;
 
     /*
      * Now, issue all the work items to generate the intermediate hashes
@@ -215,6 +220,7 @@ bool hss_generate_private_key(
 
     struct intermed_tree_detail details;
         /* Set the values in the details structure that are constant */
+    details.level = 0;
     details.seed = seed;
     details.lm_type = lm_type[0];
     details.lm_ots_type = lm_ots_type[0];
@@ -272,11 +278,11 @@ bool hss_generate_private_key(
         info->error_code = got_error;
         hss_zeroize( private_key, sizeof private_key );
         if (update_private_key) {
-            (void)(*update_private_key)(private_key, PRIVATE_KEY_LEN, context);
+            (void)(*update_private_key)(private_key, PRIVATE_KEY_LEN(levels),
+                                                                  context);
         } else {
-            hss_zeroize( context, PRIVATE_KEY_LEN );
+            hss_zeroize( context, PRIVATE_KEY_LEN(levels) );
         }
-        free(temp_buffer);
         return false;
     }
 
@@ -288,6 +294,7 @@ bool hss_generate_private_key(
 
     /* Generate the top levels of the tree, ending with the root node */
     merkle_index_t r, leaf_node;
+    hss_set_level(0);
     for (r=level_nodes, leaf_node = 0; leaf_node < level_nodes; r++, leaf_node++) {
 
         /* Walk up the stack, combining the current node with what's on */
@@ -333,7 +340,7 @@ bool hss_generate_private_key(
 
     /* Complete the computation of the aux data */
     hss_finalize_aux_data( expanded_aux_data, size_hash, h,
-                           private_key+PRIVATE_KEY_SEED );
+                           private_key+PRIVATE_KEY_SEED(levels) );
 
     /* We have the root value; now format the public key */
     put_bigendian( public_key, levels, 4 );
@@ -350,7 +357,6 @@ bool hss_generate_private_key(
     /* Hey, what do you know -- it all worked! */
     hss_zeroize( private_key, sizeof private_key ); /* Zeroize local copy of */
                                                    /* the private key */
-    free(temp_buffer);
     return true;
 }
 
@@ -362,5 +368,5 @@ size_t hss_get_private_key_len(unsigned levels,
                    const param_set_t *lm_ots_type) {
        /* A private key is a 'public object'?  Yes, in the sense that we */
        /* export it outside this module */
-    return PRIVATE_KEY_LEN;
+    return PRIVATE_KEY_LEN(levels);
 }
diff --git a/hss_malloc.c b/hss_malloc.c
new file mode 100644
index 0000000..bde2c32
--- /dev/null
+++ b/hss_malloc.c
@@ -0,0 +1,208 @@
+/*
+ * This is the instrumented malloc implementation; used for testing the
+ * HSS code for malloc issues (memory leaks, resilence against malloc
+ * failures, etc
+ *
+ * This is sort of electric-fence-light, however it's a lot easier in our
+ * case:
+ * - We won't have *that* many malloc's outstanding (hence a linked list
+ *   is a reasonable database)
+ * - The malloc's are free'd in mostly LIFO order (which a linked list really
+ *   likes
+ * - Even though we're multithreaded, only the main thread calls us (and
+ *   so we can just ignore the issue)
+ */
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "hss.h"
+#include "config.h"
+#include "common_defs.h"
+#include "hss_malloc.h"
+
+#if TEST_INSTRUMENTATION
+
+#include <stdio.h>
+
+/*
+ * We put one of these both before and after the user buffer; it is used
+ * to detect potential overwrite/underwrites
+ */
+union fence {
+    sequence_t align1;
+    void *align2;
+    double align3;
+    void (*align4)(void);
+    unsigned char buffer[1];
+};
+
+static union fence start_fence, end_fence;  /* These are the expected */
+                       /* images we write before and after the buffer */
+
+static unsigned char my_rand(void) {
+    static uint_fast32_t n = 0;
+    n += (n*n) | 5;
+    return n >> 24;
+}
+
+static void set_random_fence_value( union fence *fence ) {
+    size_t i;
+
+    for (i=0; i<sizeof *fence; ) {
+        unsigned char c = my_rand();
+            /* Skip very small and large values; this is here to detect */
+            /* overwrites; overwrite data is more likely to be very small */
+            /* and large values */
+        if (c < 10 || c > 250) continue;
+        fence->buffer[i++] = c;
+    }
+}
+
+struct malloc_chain {
+    struct malloc_chain *next;
+    size_t length;
+    enum malloc_usage usage;
+    union fence start_fence;
+        /* We assume there is no padding here (which isn't, strictly */
+        /* speaking, guarranteed by C99, however *any* same compiler will */
+        /* do it).  In any case, if there is padding, that just means that */
+        /* our underwrite checking is less effective */
+    unsigned char buffer[1]; /* We give this buffer to the application */
+    /* There's an end_fence at the end of the buffer (after length bytes) */
+};
+
+static struct malloc_chain *chain = 0;  /* The list of outstanding malloc's */
+
+
+void *hss_malloc( size_t length, enum malloc_usage usage ) {
+    if (length == 0) {
+        /* While C99 allows mallocing 0 length buffers, the behavior */
+        /* is implementation-defined; we error it out */
+        fprintf( stderr, "Error: zero length malloc detected: usage = %d\n",
+                                                                     usage );
+        exit(EXIT_FAILURE);
+    }
+
+    /* The actual ammount we allocate */
+    size_t real_length = sizeof (struct malloc_chain) +
+                         length + sizeof(union fence);
+    struct malloc_chain *p = malloc( real_length );
+    if (p == 0) {
+        /* The malloc we're using is supposed to have enough memory */
+        fprintf( stderr, "Error: real malloc failure: "
+                         "length = %u usage = %d\n", (unsigned)length, usage );
+        exit(EXIT_FAILURE);
+    }
+
+    /* If we're doing a first malloc (or if we've free'ed everything, and */
+    /* then malloc'ing), select random start_fence, end_fence values */
+    if (chain == 0) {
+        set_random_fence_value( &start_fence );
+        set_random_fence_value( &end_fence );
+    }
+
+    /* Put the malloc on the chain */
+    p->next = chain;
+    chain = p;
+
+    /* Fill in the malloc length and reason */
+    p->length = length;
+    p->usage = usage;
+
+    /* Set the guard that goes in front of the data */
+    p->start_fence = start_fence;
+
+    /* Fill the buffer with random data; this will trip up the code if */
+    /* it implicitly expects a zeroized buffer */
+    size_t i;
+    for (i=0; i<length; i++) {
+        p->buffer[i] = my_rand();
+    }
+
+    /* Fill in the end fence */
+    memcpy( &p->buffer[i], &end_fence, sizeof end_fence );
+
+    
+    return p->buffer;
+}
+
+void hss_free(void *buffer) {
+    if (buffer == 0) return;  /* free(NULL) does nothing */
+
+    struct malloc_chain **p, *q = 0;
+    /*
+     * Search for the buffer on the chain
+     */
+    for (p = &chain; *p; p = &(*p)->next ) {
+        q = *p;
+
+        if (q->buffer == buffer) {
+            /* Found it! */
+            break;
+        }
+    }
+
+    if (!*p) {
+        fprintf( stderr, "Error: attempt to free unallocated buffer\n" );
+        exit(EXIT_FAILURE);
+    }
+
+    /* Check the fences to see if they're still intact */
+    if (0 != memcmp( &q->start_fence, &start_fence, sizeof start_fence )) {
+        fprintf( stderr, "Error: buffer underwrite detected: usage = %d\n",
+                          q->usage );
+        exit(EXIT_FAILURE);
+    }
+    size_t length = q->length;
+    if (0 != memcmp( &q->buffer[ length ], &end_fence, sizeof end_fence )) {
+        fprintf( stderr, "Error: buffer overwrite detected: usage = %d\n",
+                          q->usage );
+        exit(EXIT_FAILURE);
+    }
+
+    /* Optionally, we could scan the buffer for potential secrets */
+
+    /* Scrub the buffer (so that if the code tries to access it again, */
+    /* it'll get random data) */
+    size_t i;
+    for (i=0; i<length; i++) {
+        q->buffer[ i ] = my_rand();
+    }
+
+    /* Everything looks good; remove the element from the chain */
+    *p = q->next;
+    free(q);
+}
+
+/*
+ * Report if we've seen any leaks
+ */
+bool hss_report_memory_leak(void) {
+    if (!chain) {
+        printf( "No memory leaks detected\n" );  /* Hurray! */
+        return true;
+    }
+    printf( "Memory leaks detected:\n" );  /* Grumble... */
+    int i;
+    struct malloc_chain *p;
+        /* Summarize what we've seen */
+    for (p = chain, i = 0; p && i < 20; p = p->next, i++) {
+        printf( " Buffer usage %d: length %u\n", p->usage,
+                                                (unsigned)p->length );
+    }
+    if (p) {
+        printf( " And more not listed...\n" );
+    }
+    return false;
+}
+
+#else
+/*
+ * Instrumentation is turned off; don't report about any memory leaks
+ * (as we haven't been tracking it)
+ */
+bool hss_report_memory_leak(void) {
+    return true;
+}
+#endif
diff --git a/hss_malloc.h b/hss_malloc.h
new file mode 100644
index 0000000..f02757a
--- /dev/null
+++ b/hss_malloc.h
@@ -0,0 +1,38 @@
+#if !defined( HSS_MALLOC_H_ )
+#define HSS_MALLOC_H_
+
+#include <stdlib.h>
+
+#if TEST_INSTRUMENTATION
+
+/* These are the various reasons we malloc things */
+enum malloc_usage {
+    mu_working_key = 1,
+    mu_signed_pk,
+    mu_stack,
+    mu_tree,
+    mu_subtree,
+    mu_suborder,
+    mu_thread_collection,
+    mu_work_item,
+    mu_max                /* Last item */
+};
+
+/*
+ * Our special instrumented malloc/free routines
+ */
+void *hss_malloc( size_t length, enum malloc_usage usage );
+void  hss_free( void *buffer );
+
+#else
+
+/*
+ * Instrumentation is turned off; go directly to the C library with malloc
+ * and free requests
+ */
+#define hss_malloc(length, usage)  malloc(length)
+#define hss_free(buffer)           free(buffer)
+
+#endif
+
+#endif /* HSS_MALLOC_H_ */
diff --git a/hss_param.c b/hss_param.c
index 0788370..b59b2d7 100644
--- a/hss_param.c
+++ b/hss_param.c
@@ -3,6 +3,7 @@
 #include "hss_internal.h"
 #include "endian.h"
 #include "hss_zeroize.h"
+#include "lm_common.h"
 
 /*
  * Convert a parameter set into the compressed version we use within a private
@@ -16,8 +17,9 @@ bool hss_compress_param_set( unsigned char *compressed,
                    size_t len_compressed ) {
     int i;
 
+    if (levels > len_compressed) return false;
+
     for (i=0; i<levels; i++) {
-        if (len_compressed == 0) return false;
         param_set_t a = *lm_type++;
         param_set_t b = *lm_ots_type++;
             /* All the parameter sets we support are small */
@@ -44,11 +46,6 @@ bool hss_compress_param_set( unsigned char *compressed,
         len_compressed--;
     }
 
-    while (len_compressed) {
-        *compressed++ = PARM_SET_END;
-        len_compressed--;
-    }
-
     return true;
 }
 
@@ -68,30 +65,55 @@ bool hss_compress_param_set( unsigned char *compressed,
  * On success, this returns true; on failure (can't read the private key, or
  * the private key is invalid), returns false 
  */
-bool hss_get_parameter_set( unsigned *levels,
+bool hss_get_parameter_set( unsigned *p_levels,
                            param_set_t lm_type[ MAX_HSS_LEVELS ],
                            param_set_t lm_ots_type[ MAX_HSS_LEVELS ],
                            bool (*read_private_key)(unsigned char *private_key,
                                        size_t len_private_key, void *context),
-                           void *context) {
-    unsigned char private_key[ PRIVATE_KEY_LEN ];
+                           void *context,
+                           struct hss_extra_info *info) {
+    enum hss_error_code temp_error, *error;
+    if (info) {
+        error = &info->error_code;
+    } else {
+        error = &temp_error;
+    }
+    unsigned char private_key[ HSS_MAX_PRIVATE_KEY_LEN ];
     bool success = false;
+    unsigned levels;
 
     if (read_private_key) {
-        if (!read_private_key( private_key, PRIVATE_KEY_SEED, context )) {
+        if (!read_private_key( private_key,
+                   PRIVATE_KEY_FORMAT + PRIVATE_KEY_FORMAT_LEN, context) ||
+            (levels = private_key[PRIVATE_KEY_FORMAT_NUM_LEVEL]) < 1 ||
+            levels > MAX_HSS_LEVELS ||
+             !read_private_key( private_key,
+                     PRIVATE_KEY_LEN(levels), context)) {
+            *error = hss_error_private_key_read_failed;
             goto failed;
         }
     } else {
-        if (!context) return false;
-        memcpy( private_key, context, PRIVATE_KEY_SEED );
+        if (!context) {
+            *error = hss_error_no_private_buffer;
+            return false;
+        }
+        levels = ((unsigned char*)context)[PRIVATE_KEY_FORMAT_NUM_LEVEL];
+        if (levels < 1 || levels > MAX_HSS_LEVELS) {
+             *error = hss_error_bad_private_key;
+             goto failed;
+        }
+        memcpy( private_key, context, PRIVATE_KEY_LEN(levels) );
+    }
+    if (!hss_check_private_key(private_key)) {
+         *error = hss_error_bad_private_key;
+         goto failed;
     }
 
     /* Scan through the private key to recover the parameter sets */
     unsigned total_height = 0;
     unsigned level;
-    for (level=0; level < MAX_HSS_LEVELS; level++) {
-        unsigned char c = private_key[PRIVATE_KEY_PARAM_SET + level];
-        if (c == PARM_SET_END) break;
+    for (level=0; level < levels; level++) {
+        unsigned char c = private_key[PRIVATE_KEY_PARAM_SET(levels) + level];
             /* Decode this level's parameter set */
         param_set_t lm = (c >> 4);
         param_set_t ots = (c & 0x0f);
@@ -103,30 +125,25 @@ bool hss_get_parameter_set( unsigned *levels,
         case LMS_SHA256_N32_H15: total_height += 15; break;
         case LMS_SHA256_N32_H20: total_height += 20; break;
         case LMS_SHA256_N32_H25: total_height += 25; break;
-        default: goto failed;
+        default:
+             *error = hss_error_bad_private_key;
+             goto failed;
         }
         switch (ots) {
         case LMOTS_SHA256_N32_W1:
         case LMOTS_SHA256_N32_W2:
         case LMOTS_SHA256_N32_W4:
         case LMOTS_SHA256_N32_W8:
-            break;
-        default: goto failed;
+             break;
+        default:
+             *error = hss_error_bad_private_key;
+             goto failed;
         }
         lm_type[level] = lm;
         lm_ots_type[level] = ots;
     }
 
-    if (level < MIN_HSS_LEVELS || level > MAX_HSS_LEVELS) goto failed;
-
-    *levels = level;
-
-    /* Make sure that the rest of the private key has PARM_SET_END */
-    unsigned i;
-    for (i = level+1; i<MAX_HSS_LEVELS; i++) {
-        unsigned char c = private_key[PRIVATE_KEY_PARAM_SET + i];
-        if (c != PARM_SET_END) goto failed;
-    }
+    *p_levels = levels;
 
     /* One final check; make sure that the sequence number listed in the */
     /* private key is in range */
@@ -143,7 +160,10 @@ bool hss_get_parameter_set( unsigned *levels,
     sequence_t current_count = get_bigendian(
                  private_key + PRIVATE_KEY_INDEX, PRIVATE_KEY_INDEX_LEN );
 
-    if (current_count > max_count) goto failed;  /* Private key expired */
+    if (current_count > max_count) {
+        *error = hss_error_private_key_expired;
+        goto failed;
+    }
 
     success = true;   /* It worked! */
 failed:
@@ -151,3 +171,32 @@ bool hss_get_parameter_set( unsigned *levels,
     hss_zeroize( private_key, sizeof private_key );
     return success;
 }
+
+/* Compute the max number of signatures we can generate */
+sequence_t hss_get_max_seqno( int levels, const param_set_t *lm_type ) {
+    int total_height = 0;
+    int i;
+
+    for (i=0; i<levels; i++) {
+        unsigned this_height;
+        if (!lm_look_up_parameter_set(lm_type[i], 0, 0, &this_height )) {
+            return 0;
+        }
+        total_height += this_height;
+    }
+
+    if (total_height > 64) total_height = 64;  /* (bounded by 2**64) */
+
+    sequence_t max_seqno = ((sequence_t)2 << (total_height-1)) - 1;
+        /* height-1 so we don't try to shift by 64, and hit undefined */
+        /* behavior */
+
+    /* We use the count 0xffff..ffff to signify 'we've used up all our */
+    /* signatures'.  Make sure that is above max_count, even for */
+    /* parameter sets that can literally generate 2**64 signatures (by */
+    /* letting them generate only 2**64-1) */
+    if (total_height == 64) max_seqno--;
+
+    return max_seqno;
+}
+
diff --git a/hss_reserve.c b/hss_reserve.c
index 986d6f9..660c7ab 100644
--- a/hss_reserve.c
+++ b/hss_reserve.c
@@ -3,6 +3,7 @@
 #include "hss_internal.h"
 #include "hss_reserve.h"
 #include "endian.h"
+#include "hss_fault.h"
 
 /*
  * Initialize the reservation count to the given value
@@ -31,20 +32,15 @@ bool hss_set_autoreserve(struct hss_working_key *w,
 }
 
 /*
- * This is called when we generate a signature; it checks if we need
- * to write out a new private key (and advance the reservation); if it
- * decides it needs to write out a new private key, it also decides how
- * far it needs to advance it
+ * This is called when we generate a signature; it checks if we hit the
+ * end of the current key.
  */
-bool hss_advance_count(struct hss_working_key *w, sequence_t cur_count,
-        bool (*update_private_key)(unsigned char *private_key,
-                size_t len_private_key, void *context),
-        void *context,
+bool hss_check_end_key(struct hss_working_key *w, sequence_t cur_count,
         struct hss_extra_info *info, bool *trash_private_key) {
 
     if (cur_count == w->max_count) {
-        /* We hit the end of the root; this will be the last signature */
-        /* this private key can do */
+        /* We hit the end of what we're allowed to do with this private key */
+        /* This will be the last signature this private key can do */
         w->status = hss_error_private_key_expired; /* Fail if they try to */
                                                    /* sign any more */
         info->last_signature = true;
@@ -52,47 +48,153 @@ bool hss_advance_count(struct hss_working_key *w, sequence_t cur_count,
         *trash_private_key = true;  /* We can't trash our copy of the */
                 /* private key until after we've generated the signature */
                 /* We can trash the copy in secure storage, though */
-        if (update_private_key) {
-            unsigned char private_key[PRIVATE_KEY_LEN];
-            memset( private_key, PARM_SET_END, PRIVATE_KEY_LEN );
-            if (!update_private_key(private_key, PRIVATE_KEY_LEN, context)) {
+        if (w->update_private_key) {
+            unsigned char private_key[PRIVATE_KEY_LEN(MAX_HSS_LEVELS)];
+            memset( private_key, 0xff, PRIVATE_KEY_LEN(w->levels) );
+            if (!w->update_private_key(private_key, PRIVATE_KEY_LEN(w->levels),
+                                       w->context)) {
                 info->error_code = hss_error_private_key_write_failed;
                 return false;
             }
         } else {
-            memset( context, PARM_SET_END, PRIVATE_KEY_LEN );
+            memset( w->context, 0xff, PRIVATE_KEY_LEN(w->levels) );
         }
-        return true;
     }
-    sequence_t new_count = cur_count + 1;
+    return true;
+}
 
-    if (new_count > w->reserve_count) {
-        /* We need to advance the reservation */
+#if FAULT_CACHE_SIG
+/*
+ * This is called when we advance the reservation; we assume that the hashes
+ * currently reflect the state old_count, and we want to update the hashes to
+ * reflect new_count.  This will mark any hashes as 'uncomputed' if we haven't
+ * computed them yet (in the new_count state).
+ * This will return the number of hashes we'll need to write to NVRAM
+ */
+static int update_cached_sigs_to_reflect_new_count( struct hss_working_key *w,
+              sequence_t old_count, sequence_t new_count ) {
+    int num_cache_to_update = 0;
+    int i, slot;
+    sequence_t diff = old_count ^ new_count;
+    for (i = w->levels-1, slot=0; i>=0; i--, slot++) {
+        struct merkle_level *tree = w->tree[0][i];
+        diff >>= tree->level;
+        if (diff == 0) break;  /* We use the same sigs from here */
 
-        /* Check if we have enough space to do the entire autoreservation */
-        if (w->max_count - new_count > w->autoreserve) {
-            new_count += w->autoreserve;
-        } else {
-            /* If we don't have enough space, reserve what we can */
-            new_count = w->max_count;
-        }
+        /* When we switch to the new_count, we'll be using a different */
+        /* singature at this level.  We don't know what that is yet, so */
+        /* just mark it as TBD */
+        memset( w->private_key + PRIVATE_KEY_SIG_CACHE +
+                                                 slot*FAULT_CACHE_LEN,
+                0, FAULT_CACHE_LEN );
+        num_cache_to_update = slot + 1; /* Remember to write it to NVRAM */
+    }
+    return num_cache_to_update;
+}
+#endif
 
-        put_bigendian( w->private_key + PRIVATE_KEY_INDEX, new_count,
-                       PRIVATE_KEY_INDEX_LEN );
-        if (update_private_key) {
-            if (!update_private_key(w->private_key, PRIVATE_KEY_INDEX_LEN,
-                                   context)) {
-                 /* Oops, we couldn't write the private key; undo the */
-                 /* reservation advance (and return an error) */
-                 info->error_code = hss_error_private_key_write_failed;
-                 put_bigendian( w->private_key + PRIVATE_KEY_INDEX,
-                       w->reserve_count, PRIVATE_KEY_INDEX_LEN );
+/*
+ * This is called when we generate a signature; it updates the private
+ * key in nvram (if needed), and advances the reservation (again, if needed)
+ * If it decides it needs to write out a new private key, it also decides how
+ * far it needs to advance it
+ */
+bool hss_advance_count(struct hss_working_key *w, sequence_t cur_count,
+                       struct hss_extra_info *info, int num_sigs_updated) {
+    int sigs_to_write = 0;
+#if FAULT_CACHE_SIG
+    /* Check to see if we've updated a sig that we need to write to NVRAM */
+    {
+        /* If set, we'll update all the new hashes we have */
+        bool force_update = (cur_count > w->reserve_count);
+        /* This tells us which hashes the new count uses (as compared to */
+        /* the reservation state */
+        sequence_t diff = cur_count ^ w->reserve_count;
+        int slot;
+        for (slot=0; slot<num_sigs_updated; slot++) {
+            int i = w->levels - 1 - slot;
+            struct merkle_level *tree = w->tree[0][i];
+            diff >>= tree->level;
+            if (!force_update && diff != 0) {
+                continue; /* Nope; at the reservation point, we use a */
+                         /* different signature; don't update it */
+            }
+            /* The cur_count has this new signature, while the current */
+            /* reservation state has a previous signature (or none) */
+            /* We'll need to update the signature in the private key */
+            /* so we can check it later */
+            unsigned char *sig_hash = w->private_key + PRIVATE_KEY_SIG_CACHE +
+                                                       slot * FAULT_CACHE_LEN;
+            hss_set_level(i-1);
+            if (!hss_compute_hash_for_cache( sig_hash, w->signed_pk[i],
+                                              w->siglen[i-1] )) {
                 return false;
             }
+            sigs_to_write = slot+1;  /* Make sure we write it */
+        }
+    }
+    /* At this point, the signatures within the private key reflect the */
+    /* state at cur_count.  And,  if that differs from the signature state */
+    /* at w->reserve_count, then we'll have sigs_to_write > 0 */
+#endif
+
+    /*
+     * We need to update the NVRAM if either we've gone past what has been
+     * previously reserved, or we need to update one of the hashed signatures
+     * stored in the NVRAM copy of the private key
+     */
+    if (cur_count > w->reserve_count || sigs_to_write > 0) {
+        /* We need to update the NVRAM */
+        sequence_t res_count;   /* The state that we'll write into NVRAM */
+
+        /* Figure out what the new reservation (that is, what we should */
+        /* write to NVRAM) should be */
+        if (w->max_count - cur_count <= w->autoreserve) {
+            /* The autoreservation would go past the end of where we're */
+            /* allowed to go - just reserve everything */
+            res_count = w->max_count;
+        } else if (w->reserve_count < w->autoreserve ||
+                   cur_count > w->reserve_count - w->autoreserve) {
+            /* The autoreservation based on the current count would go */
+            /* past the current reservation */
+            res_count = cur_count + w->autoreserve;
         } else {
-            put_bigendian( context, new_count, PRIVATE_KEY_INDEX_LEN );
+             /* We're updating the signature hashes we store in the */
+              /* private key, but keeping the reservation the same */
+            res_count = w->reserve_count;
+        }
+
+#if FAULT_CACHE_SIG
+        /*
+         * The hashed sigs now reflect the state at cur_count; because of
+         * autoreservation, we may have advanced things past that.  Update
+         * the hashed sigs to reflect the new res_count
+         */
+        int more_sigs_to_write = update_cached_sigs_to_reflect_new_count( w,
+                                                cur_count, res_count );
+        if (more_sigs_to_write > sigs_to_write) {
+            /* This second update may cause us to rewrite more hashed sigs */
+            /* than the original update */
+            sigs_to_write = more_sigs_to_write;
+        }
+#endif
+
+        put_bigendian( w->private_key + PRIVATE_KEY_INDEX, res_count,
+                       PRIVATE_KEY_INDEX_LEN );
+        enum hss_error_code e = hss_write_private_key( w->private_key, w,
+                                                       sigs_to_write );
+        if (e != hss_error_none) {
+             /* Oops, we couldn't write the private key; undo the */
+             /* reservation advance (and return an error) */
+             info->error_code = e;
+             /* The state of the NVRAM is out of sync with the in-memory */
+             /* version.  Instead of trying to fix tihs, throw up our hands */
+             /* and mark the entire working state as 'unusable' */
+             w->status = e;
+
+             return false;
         }
-        w->reserve_count = new_count;
+        w->reserve_count = res_count;
     }
 
     return true;
@@ -110,9 +212,6 @@ bool hss_advance_count(struct hss_working_key *w, sequence_t cur_count,
  */
 bool hss_reserve_signature(
     struct hss_working_key *w,
-    bool (*update_private_key)(unsigned char *private_key,
-            size_t len_private_key, void *context),
-    void *context,
     unsigned sigs_to_reserve,
     struct hss_extra_info *info) {
     struct hss_extra_info temp_info = { 0 };
@@ -138,8 +237,9 @@ bool hss_reserve_signature(
      * a raw private key (which is cheap to update), however there's no
      * reason we shouldn't support it
      */
-    if (!update_private_key) {
-        if (0 != memcmp( context, w->private_key, PRIVATE_KEY_LEN)) {
+    if (!w->update_private_key) {
+        if (0 != memcmp( w->context, w->private_key,
+                                             PRIVATE_KEY_LEN(w->levels))) {
             info->error_code = hss_error_key_mismatch;
             return false;   /* Private key mismatch */
         }
@@ -149,11 +249,18 @@ bool hss_reserve_signature(
     sequence_t current_count = 0;
     int i;
     for (i = 0; i<w->levels; i++) {
-        struct merkle_level *tree = w->tree[i];
+        struct merkle_level *tree = w->tree[0][i];
             /* -1 because the current_index counts the signatures to the */
             /* current next level */
         current_count = (current_count << tree->level) +
                                                   tree->current_index - 1;
+#if FAULT_RECOMPUTE
+        struct merkle_level *tree_redux = w->tree[1][i];
+        if (tree->level != tree_redux->level ||
+                  tree->current_index != tree_redux->current_index) {
+            return false;  /* Mismatch between primage and redundant trees */
+        }
+#endif
     }
     current_count += 1;   /* The bottom-most tree isn't advanced */
 
@@ -172,21 +279,27 @@ bool hss_reserve_signature(
         return true;
     }
 
+    int num_cache_to_update = 0;
+#if FAULT_CACHE_SIG
+    num_cache_to_update = update_cached_sigs_to_reflect_new_count(w,
+                                     w->reserve_count, new_reserve_count);
+#endif
+
     /* Attempt to update the count in the private key */
     put_bigendian( w->private_key + PRIVATE_KEY_INDEX, new_reserve_count,
                    PRIVATE_KEY_INDEX_LEN );
     /* Update the copy in NV storage */
-    if (update_private_key) {
-        if (!update_private_key(w->private_key, PRIVATE_KEY_INDEX_LEN,
-                                                                  context)) {
-             /* Oops, couldn't update it */
-             put_bigendian( w->private_key + PRIVATE_KEY_INDEX,
-                        w->reserve_count, PRIVATE_KEY_INDEX_LEN );
-             info->error_code = hss_error_private_key_write_failed;
-             return false;
-        }
-    } else {
-        memcpy( context, w->private_key, PRIVATE_KEY_INDEX_LEN );
+    enum hss_error_code e = hss_write_private_key(w->private_key, w,
+                                                  num_cache_to_update);
+    if (e != hss_error_none) {
+         /* Oops, couldn't update it */
+         info->error_code = e;
+         /* The state of the NVRAM is out of sync with the in-memory */
+         /* version.  Instead of trying to fix tihs, throw up our hands */
+         /* and mark the entire working state as 'unusable' */
+         w->status = e;
+
+         return false;
     }
     w->reserve_count = new_reserve_count;
 
diff --git a/hss_reserve.h b/hss_reserve.h
index 3b101c1..0e65d46 100644
--- a/hss_reserve.h
+++ b/hss_reserve.h
@@ -12,10 +12,10 @@ struct hss_working_key;
 
 void hss_set_reserve_count(struct hss_working_key *w, sequence_t count);
 
-bool hss_advance_count(struct hss_working_key *w, sequence_t new_count,
-        bool (*update_private_key)(unsigned char *private_key,
-                size_t len_private_key, void *context),
-        void *context,
+bool hss_check_end_key(struct hss_working_key *w, sequence_t new_count,
         struct hss_extra_info *info, bool *trash_private_key);
 
+bool hss_advance_count(struct hss_working_key *w, sequence_t new_count,
+        struct hss_extra_info *info, int num_sigs_updated);
+
 #endif /* HSS_RESERVE_H_ */
diff --git a/hss_sign.c b/hss_sign.c
index 05d4159..5936888 100644
--- a/hss_sign.c
+++ b/hss_sign.c
@@ -15,6 +15,7 @@
 #include "lm_ots.h"
 #include "lm_ots_common.h"
 #include "hss_derive.h"
+#include "hss_fault.h"
 
 /*
  * This adds one leaf to the building and next subtree.
@@ -221,14 +222,78 @@ static int generate_merkle_signature(
         index >>= height;
     }
 
-    /* Mark that we've generated a signature */
-    tree->current_index = current_index + 1;
-
     return 1;
 }
 
+#if FAULT_RECOMPUTE
+/*
+ * Verify that the Merkle signature is what we are supposed to generate
+ * Hmmmm, do we really need to double-check anythhing other than the OTS
+ * signature?
+ */
+static bool doublecheck_merkle_signature(
+                     const unsigned char *signature, unsigned signature_len,
+                     struct merkle_level *tree,
+                     const struct hss_working_key *w,
+                     const void *message, size_t message_len) {
+    /* First off, check the index value */
+    if (signature_len < 4) return false;
+    merkle_index_t current_index = tree->current_index;
+    if (current_index != get_bigendian( signature, 4 )) return false;
+    signature += 4; signature_len -= 4;
+
+    /* Verify the OTS signature */
+    size_t ots_sig_size = lm_ots_get_signature_len( tree->lm_ots_type );
+    if (ots_sig_size == 0 || ots_sig_size > signature_len) return 0;
+    {
+        struct seed_derive derive;
+        if (!hss_seed_derive_init( &derive,
+                            tree->lm_type, tree->lm_ots_type,
+                            tree->I, tree->seed )) return false;
+        hss_seed_derive_set_q(&derive, current_index);
+        bool success = lm_ots_doublecheck_signature( tree->lm_ots_type,
+                                    tree->I,
+                                    current_index, &derive,
+                                    message, message_len,
+                                    signature, ots_sig_size);
+        hss_seed_derive_done(&derive);
+        if (!success) return false;
+    }
+    signature += ots_sig_size; signature_len -= ots_sig_size;
+
+    /* Verify the LM parameter set */
+    if (signature_len < 4) return 0;
+    if (tree->lm_type != get_bigendian( signature, 4 )) return false;
+    signature += 4; signature_len -= 4;
+
+    /* Now, doublecheck the authentication path */
+    int i, j;
+    merkle_index_t index = current_index;
+    unsigned n = tree->hash_size;
+    for (i = tree->sublevels-1; i>=0; i--) {
+        int height = (i == 0) ? tree->top_subtree_size : tree->subtree_size;
+        struct subtree *subtree = tree->subtree[i][ACTIVE_TREE];
+        merkle_index_t subtree_index = (index &
+                                            (((merkle_index_t)1 << height) - 1)) +
+                                       ((merkle_index_t)1 << height);
+        for (j = height-1; j>=0; j--) {
+            if (signature_len < n) return 0;
+            if (0 != memcmp( signature,
+                         subtree->nodes + n * ((subtree_index^1) - 1), n )) {
+                return false;
+            }
+            signature += n; signature_len -= n;
+            subtree_index >>= 1;
+        }
+        index >>= height;
+    }
+
+    return true;
+}
+#endif
+
 /*
- * This signed the root of tree with the parent; it places both the signature
+ * This signs the root of tree with the parent; it places both the signature
  * and the public key into signed_key
  */
 bool hss_create_signed_public_key(unsigned char *signed_key,
@@ -263,6 +328,57 @@ bool hss_create_signed_public_key(unsigned char *signed_key,
     return true;
 }
 
+/* This marks the signature as having been generated */
+void hss_step_tree(struct merkle_level *tree) {
+    tree->current_index += 1;
+}
+
+#if FAULT_RECOMPUTE
+/*
+ * This checks the siganture of the root of the tree against what was
+ * previously signed, making sure that we signed what we expect.
+ * Note that we don't actually check the signature; the goal is to make sure
+ * that we don't accidentally sign two different messages with the same index
+ */
+bool hss_doublecheck_signed_public_key(const unsigned char *signed_key,
+                                    size_t len_signature,
+                                    struct merkle_level *tree,
+                                    struct merkle_level *parent,
+                                    struct hss_working_key *w) {
+    /* Where we place the public key */
+    const unsigned char *public_key = signed_key + len_signature;
+
+    /* Place the public key there */
+    if (tree->lm_type != get_bigendian( public_key + 0, 4 ) ||
+        tree->lm_ots_type != get_bigendian( public_key + 4, 4 ) ||
+        0 != memcmp( public_key + 8, tree->I, I_LEN )) {
+        return false;
+    }
+ 
+    unsigned hash_size = tree->hash_size;
+        /* This is where the root hash is */
+    if (0 != memcmp( public_key + 8 + I_LEN,
+                   tree->subtree[0][ACTIVE_TREE]->nodes,
+                   hash_size )) {
+        return false;
+    }
+
+    unsigned len_public_key = 8 + I_LEN + hash_size;
+
+        /* Now, check the signature */
+    if (!doublecheck_merkle_signature( signed_key, len_signature,
+                         parent, w, public_key, len_public_key)) {
+        return false;
+    }
+
+    parent->update_count = UPDATE_NEXT;  /* The parent has doublechecked a */
+                              /* signature; it's now eligible for another */
+                              /* round of updates */
+
+    return true;
+}
+#endif
+
 struct gen_sig_detail {
     unsigned char *signature;
     size_t signature_len;
@@ -301,10 +417,20 @@ static void do_gen_sig( const void *detail, struct thread_collection *col) {
     const unsigned char *message = d->message;
     size_t message_len = d->message_len;
 
+    hss_set_level(levels - 1);
     if (!generate_merkle_signature(signature, signature_len,
-              w->tree[ levels-1 ], w, message, message_len)) {
+              w->tree[0][ levels-1 ], w, message, message_len)) {
         goto failed;
     }
+    hss_step_tree(w->tree[0][ levels-1 ]);
+#if FAULT_RECOMPUTE
+    if (levels > 1) {
+        hss_step_tree(w->tree[1][ levels-1 ]);
+    }
+#endif
+
+    /* Note: this is the bottommost signature; it doesn't need to be */
+    /* double-checked */
 
     /* Success! */
     return;
@@ -328,6 +454,7 @@ static void do_step_next( const void *detail, struct thread_collection *col) {
     struct hss_working_key *w = d->w;
     struct merkle_level *tree = d->tree;
 
+    hss_set_level( w->levels - 1 );
     if (!hss_step_next_tree( tree, w, col )) {
         /* Report failure */
         hss_thread_before_write(col);
@@ -339,6 +466,7 @@ static void do_step_next( const void *detail, struct thread_collection *col) {
 struct step_building_detail {
     struct merkle_level *tree;
     struct subtree *subtree;
+    int tree_level;
     enum hss_error_code *got_error;
 };
 /* This steps the building tree */
@@ -348,6 +476,7 @@ static void do_step_building( const void *detail,
     const struct step_building_detail *d = detail;
     struct merkle_level *tree = d->tree;
     struct subtree *subtree = d->subtree;
+    hss_set_level( d->tree_level );
 
     switch (subtree_add_next_node( subtree, tree, 0, col )) {
     case subtree_got_error: default:
@@ -366,6 +495,7 @@ static void do_step_building( const void *detail,
 struct update_parent_detail {
     struct hss_working_key *w;
     enum hss_error_code *got_error;
+    int redux;
 };
 /*
  * This gives an update to the parent (non-bottom Merkle trees)
@@ -375,10 +505,14 @@ static void do_update_parent( const void *detail,
     const struct update_parent_detail *d = detail;
     struct hss_working_key *w = d->w;
     unsigned levels = w->levels;
+    int redux = d->redux;
     unsigned current_level = levels - 2;  /* We start with the first */
                                           /* non-bottom level */
     for (;;) {
-        struct merkle_level *tree = w->tree[current_level];
+        if (redux == 1 && current_level == 0) return;
+        hss_set_level(current_level);
+
+        struct merkle_level *tree = w->tree[redux][current_level];
         switch (tree->update_count) {
         case UPDATE_DONE: return;   /* No more updates needed */
         case UPDATE_NEXT:           /* Our job is to update the next tree */
@@ -442,9 +576,6 @@ static void do_update_parent( const void *detail,
  */
 bool hss_generate_signature(
     struct hss_working_key *w,
-    bool (*update_private_key)(unsigned char *private_key,
-            size_t len_private_key, void *context),
-    void *context,
     const void *message, size_t message_len,
     unsigned char *signature, size_t signature_buf_len,
     struct hss_extra_info *info) {
@@ -466,8 +597,9 @@ bool hss_generate_signature(
 
     /* If we're given a raw private key, make sure it's the one we're */
     /* thinking of */
-    if (!update_private_key) {
-        if (0 != memcmp( context, w->private_key, PRIVATE_KEY_LEN)) {
+    if (!w->update_private_key) {
+        if (0 != memcmp( w->context, w->private_key,
+                                            PRIVATE_KEY_LEN(w->levels))) {
             info->error_code = hss_error_key_mismatch;
             return false;   /* Private key mismatch */
         }
@@ -486,18 +618,25 @@ bool hss_generate_signature(
      */
     sequence_t current_count = 0;
     for (i=0; i < levels; i++) {
-        struct merkle_level *tree = w->tree[i];
+        struct merkle_level *tree = w->tree[0][i];
         current_count <<= tree->level;
             /* We subtract 1 because the nonbottom trees are already advanced */
         current_count += (sequence_t)tree->current_index - 1;
+#if FAULT_RECOMPUTE
+        struct merkle_level *tree_redux = w->tree[1][i];
+        if (tree->level != tree_redux->level ||
+            tree->current_index != tree_redux->current_index) {
+            /* We're inconsistent */
+            info->error_code = hss_error_internal;
+            goto failed;
+         }
+#endif
     }
     current_count += 1;   /* Bottom most tree isn't already advanced */
 
-    /* Ok, try to advance the private key */
-    if (!hss_advance_count(w, current_count,
-                               update_private_key, context, info,
-                               &trash_private_key)) {
-        /* hss_advance_count fills in the error reason */
+    /* Ok, check if we hit the end of the private key */
+    if (!hss_check_end_key(w, current_count, info, &trash_private_key)) {
+        /* hss_check_end_key fills in the error reason */
         goto failed;
     }
 
@@ -520,21 +659,33 @@ bool hss_generate_signature(
         hss_thread_issue_work(col, do_gen_sig, &gen_detail, sizeof gen_detail);
     }
 
+    /* If this is the last signature, we needn't bother to update the */
+    /* various Merkle trees (as we'll just throw them away) */
+    /* In addition, we sometimes get an error trying to derive a */
+    /* seed past the end */
+    if (trash_private_key) goto dont_bother_updating_trees;
+
     /* Update the bottom level next tree */
     if (levels > 1) {
-        struct step_next_detail step_detail;
-        step_detail.w = w;
-        step_detail.tree = w->tree[levels-1];
-        step_detail.got_error = &got_error;
+        int redux;
+        for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+            struct step_next_detail step_detail;
+            step_detail.w = w;
+            step_detail.tree = w->tree[redux][levels-1];
+            step_detail.got_error = &got_error;
 
-        hss_thread_issue_work(col, do_step_next, &step_detail, sizeof step_detail);
+            hss_thread_issue_work(col, do_step_next, &step_detail,
+                                  sizeof step_detail);
+        }
     }
-
     /* Issue orders to step each of the building subtrees in the bottom tree */
-    int skipped_a_level = 0;   /* Set if the below issued didn't issue an */
-                               /* order for at least one level */
-    {
-        struct merkle_level *tree = w->tree[levels-1];
+    int redux;
+    for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+        if (redux == 1 && levels == 1) continue;
+
+        int skipped_a_level = 0;   /* Set if the below issued didn't issue */
+                                   /* an order for at least one level */
+        struct merkle_level *tree = w->tree[redux][levels-1];
         merkle_index_t updates_before_end = tree->max_index - tree->current_index + 1;
         int h_subtree = tree->subtree_size;
         int i;
@@ -551,26 +702,31 @@ bool hss_generate_signature(
             step_detail.tree = tree;
             step_detail.subtree = subtree;
             step_detail.got_error = &got_error;
+            step_detail.tree_level = levels-1;
 
             hss_thread_issue_work(col, do_step_building, &step_detail, sizeof step_detail);
 
         }
             /* If there's only one sublevel, act as if we always skipped a sublevel */
         if (tree->sublevels == 1) skipped_a_level = 1;
-    }
 
-    /*
-     * And, if we're allowed to give the parent a chance to update, and
-     * there's a parent with some updating that needs to be done, schedule
-     * that to be done
-     */
-    if (skipped_a_level &&
-        levels > 1 && w->tree[levels-2]->update_count != UPDATE_DONE) {
-        struct update_parent_detail detail;
-        detail.w = w;
-        detail.got_error = &got_error;
-        hss_thread_issue_work(col, do_update_parent, &detail, sizeof detail);
+        /*
+         * And, if we're allowed to give the parent a chance to update, and
+         * there's a parent with some updating that needs to be done, schedule
+         * that to be done
+         */
+        if (skipped_a_level &&
+            levels > 1 && w->tree[0][levels-2]->update_count != UPDATE_DONE) {
+            if (redux && levels <= 2) continue; /* The previous iteration */
+                                       /* already took care of the parent */
+            struct update_parent_detail detail;
+            detail.w = w;
+            detail.got_error = &got_error;
+            detail.redux = redux;
+            hss_thread_issue_work(col, do_update_parent, &detail, sizeof detail);
+        }
     }
+dont_bother_updating_trees:
 
     /* Wait for all of them to finish */ 
     hss_thread_done(col);
@@ -588,124 +744,156 @@ bool hss_generate_signature(
      * Now, we scan to see if we exhausted a Merkle tree, and need to update it
      * At the same time, we check to see if we need to advance the subtrees
      */
-    sequence_t cur_count = current_count;
-    unsigned merkle_levels_below = 0;
-    int switch_merkle = w->levels;
-    struct merkle_level *tree;
-    for (i = w->levels-1; i>=0; i--, merkle_levels_below += tree->level) {
-        tree = w->tree[i];
-
-        if (0 == (cur_count & (((sequence_t)1 << (merkle_levels_below + tree->level))-1))) {
-            /* We exhausted this tree */
-            if (i == 0) {
-                /* We've run out of signatures; we've already caught this */
-                /* above; just make *sure* we've marked the key as */
-                /* unusable, and give up */
-                w->status = hss_error_private_key_expired;
-                break;
+    if (trash_private_key) goto dont_update_these_either;
+    int num_sig_updated = 0;
+    for (redux = 0; redux <= FAULT_RECOMPUTE; redux++) {
+        struct merkle_level *tree;
+        unsigned merkle_levels_below = 0;
+        int switch_merkle = w->levels;
+        for (i = w->levels-1; i>=0; i--, merkle_levels_below += tree->level) {
+            if (redux == 1 && i == 0) break;
+            tree = w->tree[redux][i];
+    
+            if (0 == (current_count & (((sequence_t)1 << (merkle_levels_below + tree->level))-1))) {
+                /* We exhausted this tree */
+                if (i == 0) {
+                    /* We've run out of signatures; we've already caught */
+                    /* this  above; just make *sure* we've marked the key as */
+                    /* unusable, and give up */
+                    w->status = hss_error_private_key_expired;
+                    break;
+                }
+    
+                /* Remember we'll need to switch to the NEXT_TREE */
+                switch_merkle = i;
+                continue;
             }
-
-            /* Remember we'll need to switch to the NEXT_TREE */
-            switch_merkle = i;
-            continue;
-        }
-
-        /* Check if we need to advance any of the subtrees */
-        unsigned subtree_levels_below = 0; 
-        int j;
-        for (j = tree->sublevels-1; j>0; j--) {
-            subtree_levels_below += tree->subtree_size;
-            if (0 != (cur_count & (((sequence_t)1 << (merkle_levels_below + subtree_levels_below))-1))) {
-                /* We're in the middle of this subtree */
-                goto done_advancing;
+    
+            /* Check if we need to advance any of the subtrees */
+            unsigned subtree_levels_below = 0; 
+            int j;
+            for (j = tree->sublevels-1; j>0; j--) {
+                subtree_levels_below += tree->subtree_size;
+                if (0 != (current_count & (((sequence_t)1 << (merkle_levels_below + subtree_levels_below))-1))) {
+                    /* We're in the middle of this subtree */
+                    goto done_advancing;
+                }
+    
+                /* Switch to the building subtree */
+                struct subtree *next = tree->subtree[j][BUILDING_TREE];
+                struct subtree *prev = tree->subtree[j][ACTIVE_TREE];
+                unsigned char *stack = next->stack;  /* Stack stays with */
+                                                     /* building tree */
+                tree->subtree[j][ACTIVE_TREE] = next;
+                    /* We need to reset the parameters on the new building */
+                    /* subtree */
+                prev->current_index = 0;
+                prev->left_leaf += (merkle_index_t)2 << subtree_levels_below;
+                tree->subtree[j][BUILDING_TREE] = prev;
+                next->stack = NULL;
+                prev->stack = stack;
             }
-
-            /* Switch to the building subtree */
-            struct subtree *next = tree->subtree[j][BUILDING_TREE];
-            struct subtree *prev = tree->subtree[j][ACTIVE_TREE];
-            unsigned char *stack = next->stack;  /* Stack stays with */
-                                                 /* building tree */
-            tree->subtree[j][ACTIVE_TREE] = next;
-                /* We need to reset the parameters on the new building subtree */
-            prev->current_index = 0;
-            prev->left_leaf += (merkle_index_t)2 << subtree_levels_below;
-            tree->subtree[j][BUILDING_TREE] = prev;
-            next->stack = NULL;
-            prev->stack = stack;
         }
-    }
 done_advancing:
-    /* Check if we used up any Merkle trees; if we have, switch to the */
-    /* NEXT_TREE (which we've built in our spare time) */
-    for (i = switch_merkle; i < w->levels; i++) {
-        struct merkle_level *tree = w->tree[i];
-        struct merkle_level *parent = w->tree[i-1];
-        int j;
-
-        /* Rearrange the subtrees */
-        for (j=0; j<tree->sublevels; j++) {
-            /* Make the NEXT_TREE active; replace it with the current active */
-            struct subtree *active = tree->subtree[j][NEXT_TREE];
-            struct subtree *next = tree->subtree[j][ACTIVE_TREE];
-            unsigned char *stack = active->stack;  /* Stack stays with */
-                                                 /* next tree */
-
-            active->left_leaf = 0;
-            next->current_index = 0;
-            next->left_leaf = 0;
-            tree->subtree[j][ACTIVE_TREE] = active;
-            tree->subtree[j][NEXT_TREE] = next;
-            active->stack = NULL;
-            next->stack = stack;
-            if (j > 0) {
-                /* Also reset the building tree */
-                struct subtree *building = tree->subtree[j][BUILDING_TREE];
-                building->current_index = 0;
-                merkle_index_t size_subtree = (merkle_index_t)1 <<
-                                (tree->subtree_size + building->levels_below);
-                building->left_leaf = size_subtree;
-            }
-        }
 
-        /* Copy in the value of seed, I we'll use for the new tree */
-        memcpy( tree->seed, tree->seed_next, SEED_LEN );
-        memcpy( tree->I, tree->I_next, I_LEN );
-
-        /* Compute the new next I, which is derived from either the parent's */
-        /* I or the parent's I_next value */
-        merkle_index_t index = parent->current_index;
-        if (index == parent->max_index) {
-            if (!hss_generate_child_seed_I_value(tree->seed_next, tree->I_next,
-                                       parent->seed_next, parent->I_next, 0,
-                                       parent->lm_type,
-                                       parent->lm_ots_type)) {
-                info->error_code = hss_error_internal;
-                goto failed;
+        /* Check if we used up any Merkle trees; if we have, switch to the */
+        /* NEXT_TREE (which we've built in our spare time) */
+        for (i = switch_merkle; i < w->levels; i++) {
+            struct merkle_level *tree = w->tree[redux][i];
+            struct merkle_level *parent = w->tree[redux][i-1];
+            int j;
+    
+            /* Rearrange the subtrees */
+            for (j=0; j<tree->sublevels; j++) {
+                /* Make the NEXT_TREE active; replace it with the current active */
+                struct subtree *active = tree->subtree[j][NEXT_TREE];
+                struct subtree *next = tree->subtree[j][ACTIVE_TREE];
+                unsigned char *stack = active->stack;  /* Stack stays with */
+                                                     /* next tree */
+    
+                active->left_leaf = 0;
+                next->current_index = 0;
+                next->left_leaf = 0;
+                tree->subtree[j][ACTIVE_TREE] = active;
+                tree->subtree[j][NEXT_TREE] = next;
+                active->stack = NULL;
+                next->stack = stack;
+                if (j > 0) {
+                    /* Also reset the building tree */
+                    struct subtree *building = tree->subtree[j][BUILDING_TREE];
+                    building->current_index = 0;
+                    merkle_index_t size_subtree = (merkle_index_t)1 <<
+                                    (tree->subtree_size + building->levels_below);
+                    building->left_leaf = size_subtree;
+                }
             }
-        } else {
-            if (!hss_generate_child_seed_I_value(tree->seed_next, tree->I_next,
-                                       parent->seed, parent->I, index+1,
-                                       parent->lm_type,
-                                       parent->lm_ots_type)) {
-                info->error_code = hss_error_internal;
-                goto failed;
+    
+            /* Copy in the value of seed, I we'll use for the new tree */
+            memcpy( tree->seed, tree->seed_next, SEED_LEN );
+            memcpy( tree->I, tree->I_next, I_LEN );
+    
+            /* Compute the new next I, which is derived from either the parent's */
+            /* I or the parent's I_next value */
+            merkle_index_t index = parent->current_index;
+            hss_set_level(i);
+            if (index == parent->max_index) {
+                hss_generate_child_seed_I_value(tree->seed_next, tree->I_next,
+                                           parent->seed_next, parent->I_next, 0,
+                                           parent->lm_type,
+                                           parent->lm_ots_type, i);
+            } else {
+                hss_generate_child_seed_I_value( tree->seed_next, tree->I_next,
+                                           parent->seed, parent->I, index+1,
+                                           parent->lm_type,
+                                           parent->lm_ots_type, i);
+             }
+    
+             tree->current_index = 0;  /* We're starting this from scratch */
+    
+             /* Generate the signature of the new level */
+             hss_set_level(i-1);  /* Now we'll work with the parent */
+                                    /* tree hashes */
+#if FAULT_RECOMPUTE
+             /* Double check the signature we make last iteration */
+             if (redux) {
+                 if (!hss_doublecheck_signed_public_key( w->signed_pk[i], w->siglen[i-1],
+                                            tree, parent, w )) {
+                    info->error_code = hss_error_fault_detected;
+                    goto failed;
+                 }
+                 hss_step_tree( parent );
+             } else
+#endif
+             {
+                 if (!hss_create_signed_public_key( w->signed_pk[i], w->siglen[i-1],
+                                            tree, parent, w )) {
+                    info->error_code = hss_error_internal;
+                    goto failed;
+                 }
+                 num_sig_updated += 1;
+
+                 /* Mark that we've generated a signature, UNLESS we'll be */
+                 /* using the tree with the redux == 1 iteration.  In that */
+                 /* case, we'll step the tree then */
+                 if (!FAULT_RECOMPUTE || i > 1) {
+                     hss_step_tree( parent );
+                }
             }
-         }
-
-         tree->current_index = 0;  /* We're starting this from scratch */
-
-         /* Generate the signature of the new level */
-         if (!hss_create_signed_public_key( w->signed_pk[i], w->siglen[i-1],
-                                        tree, parent, w )) {
-            info->error_code = hss_error_internal;
-            goto failed;
         }
     }
 
+    /* Now, update the NVRAM private key */
+    if (!hss_advance_count(w, current_count, info, num_sig_updated)) {
+        /* hss_check_end fills in the error reason */
+        goto failed;
+    }
+
+dont_update_these_either:
+
     /* And we've set things up for the next signature... */
 
     if (trash_private_key) {
-        memset( w->private_key, PARM_SET_END, PRIVATE_KEY_LEN );
+        memset( w->private_key, 0xff, sizeof w->private_key );
     }
 
     return true;
@@ -713,7 +901,7 @@ bool hss_generate_signature(
 failed:
 
     if (trash_private_key) {
-        memset( w->private_key, PARM_SET_END, PRIVATE_KEY_LEN );
+        memset( w->private_key, 0xff, sizeof w->private_key );
     }
 
     /* On failure, make sure that we don't return anything that might be */
@@ -733,8 +921,8 @@ size_t hss_get_signature_len_from_working_key(struct hss_working_key *w) {
     param_set_t lm[MAX_HSS_LEVELS], ots[MAX_HSS_LEVELS];
     int i;
     for (i=0; i<levels; i++) {
-        lm[i] = w->tree[i]->lm_type;
-        ots[i] = w->tree[i]->lm_ots_type;
+        lm[i] = w->tree[0][i]->lm_type;
+        ots[i] = w->tree[0][i]->lm_ots_type;
     }
     
     return hss_get_signature_len(levels, lm, ots);
diff --git a/hss_sign_inc.c b/hss_sign_inc.c
index 34e94f8..8e1e3c4 100644
--- a/hss_sign_inc.c
+++ b/hss_sign_inc.c
@@ -15,13 +15,12 @@
 #include "hss_internal.h"
 #include "hss_sign_inc.h"
 #include "hss_derive.h"
+#include "hss_fault.h"
 
 /*
  * Start the process of creating an HSS signature incrementally. Parameters:
  * ctx - The state we'll use to track the incremental signature
  * working_key - the in-memory version of the in-memory private key
- * update_private_key - function to call to update the master private key
- * context - context pointer for above
  * siganture - the buffer to hold the signature
  * signature_len - the length of the buffer
  * this_is_the_last_signature - if non-NULL, this will be set if this
@@ -30,9 +29,6 @@
 bool hss_sign_init(
     struct hss_sign_inc *ctx,
     struct hss_working_key *w,
-    bool (*update_private_key)(unsigned char *private_key,
-            size_t len_private_key, void *context),
-    void *context,
     unsigned char *signature, size_t signature_len,
     struct hss_extra_info *info) {
     struct hss_extra_info temp_info = { 0 };;
@@ -54,7 +50,15 @@ bool hss_sign_init(
         return false;
     }
 
-    struct merkle_level *bottom = w->tree[ w->levels - 1 ];
+    struct merkle_level *bottom = w->tree[0][ w->levels - 1 ];
+#if FAULT_RECOMPUTE
+    struct merkle_level *bottom_redux = w->tree[1][ w->levels - 1 ];
+    if (bottom->current_index != bottom_redux->current_index) {
+        info->error_code = hss_error_internal;
+        return false;
+    }
+#endif
+    
 
     unsigned char I[I_LEN];
     memcpy( I, bottom->I, I_LEN );
@@ -65,6 +69,7 @@ bool hss_sign_init(
     int h = bottom->h;
     ctx->h = h;
 
+    hss_set_level( w->levels-1 );
     struct seed_derive derive;
     if (!hss_seed_derive_init( &derive, bottom->lm_type, bottom->lm_ots_type,
                        bottom->I, bottom->seed )) return false;
@@ -77,7 +82,6 @@ bool hss_sign_init(
      * the bottom level OTS signature
      */
     bool success = hss_generate_signature( w,
-                            update_private_key, context,
                             NULL, 0,  /* <--- we don't have the message yet */
                             signature, signature_len, info );
     if (!success) {
@@ -119,7 +123,8 @@ bool hss_sign_update(
     return true;
 }
 
-/* We've added all the pieces of the messages, now do the validation */
+/* We've added all the pieces of the messages, now complete the */
+/* signature genration */
 bool hss_sign_finalize(
     struct hss_sign_inc *ctx,
     const struct hss_working_key *working_key,
@@ -144,8 +149,8 @@ bool hss_sign_finalize(
 
     /* Step through the signature, looking for the place to put the OTS */
     /* signature, and (while we're at it) recovering the I and seed values */
-    const unsigned char *I = working_key->tree[0]->I;
-    const unsigned char *seed = working_key->tree[0]->seed;
+    const unsigned char *I = working_key->tree[0][0]->I;
+    const unsigned char *seed = working_key->tree[0][0]->seed;
         /* Note: we alternate buffers during generation in case */
         /* hss_generate_child_seed_I_value doesn't allow new values to */
         /* overwrite old ones */
@@ -160,14 +165,15 @@ bool hss_sign_finalize(
     int i;
     for (i=0; i<L-1; i++) {
         merkle_index_t q = get_bigendian( signature, 4 );
-        if (q > working_key->tree[i]->max_index) {
+        if (q > working_key->tree[0][i]->max_index) {
             hss_zeroize( seed_buff, sizeof seed_buff );
             return 0;
         }
         if (!hss_generate_child_seed_I_value( seed_buff[i&1], I_buff[i&1],
                                          seed, I, q,
-                                         working_key->tree[i]->lm_type,
-                                         working_key->tree[i]->lm_ots_type )) {
+                                         working_key->tree[0][i]->lm_type,
+                                         working_key->tree[0][i]->lm_ots_type,
+                                         i )) {
             hss_zeroize( seed_buff, sizeof seed_buff );
             info->error_code = hss_error_internal;
             return false;
@@ -177,9 +183,9 @@ bool hss_sign_finalize(
         I = I_buff[i&1];
 
         /* Step to the end of this signed key */
-        signature += lm_get_signature_len( working_key->tree[i]->lm_type,
-                                            working_key->tree[i]->lm_ots_type);
-        signature += lm_get_public_key_len(working_key->tree[i+1]->lm_type);
+        signature += lm_get_signature_len( working_key->tree[0][i]->lm_type,
+                                            working_key->tree[0][i]->lm_ots_type);
+        signature += lm_get_public_key_len(working_key->tree[0][i+1]->lm_type);
     }
 
     /* Now, signature points to where the bottom LMS signature should go */
@@ -196,8 +202,9 @@ bool hss_sign_finalize(
     hss_finalize_hash_context( ctx->h, &ctx->hash_ctx, hash );
 
     /* And the final OTS signature based on that hash */
-    param_set_t lm_type = working_key->tree[i]->lm_type;
-    param_set_t ots_type = working_key->tree[i]->lm_ots_type;
+    param_set_t lm_type = working_key->tree[0][i]->lm_type;
+    param_set_t ots_type = working_key->tree[0][i]->lm_ots_type;
+    hss_set_level( i );
     struct seed_derive derive;
     bool success = hss_seed_derive_init( &derive, lm_type, ots_type,
                           I, seed );
diff --git a/hss_sign_inc.h b/hss_sign_inc.h
index 6dbd022..f6d25b9 100644
--- a/hss_sign_inc.h
+++ b/hss_sign_inc.h
@@ -14,7 +14,6 @@
  * Usage:
  *    struct hss_sign_inc ctx;
  *    bool success = hss_sign_init( &ctx, working_key,
- *            update_private_key, private_key_context,
  *            signature, signature_buffer_len, 
  *            &lsat_signature );
  *    hss_sign_update( &ctx, message_part_1, len_1 );
@@ -55,9 +54,6 @@ struct hss_extra_info;
 bool hss_sign_init(
     struct hss_sign_inc *ctx,
     struct hss_working_key *working_key,
-    bool (*update_private_key)(unsigned char *private_key,
-            size_t len_private_key, void *context),
-    void *context,
     unsigned char *signature, size_t signature_len,
     struct hss_extra_info *info);
 
diff --git a/hss_thread_pthread.c b/hss_thread_pthread.c
index be8a60e..6501fc3 100644
--- a/hss_thread_pthread.c
+++ b/hss_thread_pthread.c
@@ -2,6 +2,7 @@
 
 #include <pthread.h>
 #include <string.h>
+#include "hss_malloc.h"
 
 /*
  * This is an implementation of our threaded abstraction using the
@@ -15,11 +16,7 @@
  * implementation handy to test it
  */
 
-#define MAX_THREAD 16   /* Number try to create more than 16 threads, no */
-                        /* matter what the application tries to tell us */
-#define DEFAULT_THREAD 16 /* The number of threads to run if the */
-                        /* application doesn't tell us otherwise (e.g. */
-                        /* passes in 0) */
+#include "config.h"
 
 #define MIN_DETAIL 16   /* So the alignment kludge we do doesn't waste space */
 
@@ -84,19 +81,20 @@ struct thread_collection *hss_thread_init(int num_thread) {
                                     /* single threaded */
     if (num_thread > MAX_THREAD) num_thread = MAX_THREAD;
 
-    struct thread_collection *col = malloc( sizeof *col );
+    struct thread_collection *col = hss_malloc( sizeof *col,
+                                                   mu_thread_collection );
     if (!col) return 0;  /* On malloc failure, run single threaded */
 
     col->num_thread = num_thread;
 
     if (0 != pthread_mutex_init( &col->lock, 0 )) {
-        free(col);
+        hss_free(col);
         return 0;
     }
 
     if (0 != pthread_mutex_init( &col->write_lock, 0 )) {
         pthread_mutex_destroy( &col->lock );
-        free(col);
+        hss_free(col);
         return 0;
     }
 
@@ -125,7 +123,7 @@ static void *worker_thread( void *arg ) {
         (w->function)(w->x.detail, col);
 
         /* Ok, we did that */
-        free(w);
+        hss_free(w);
 
         /* Check if there's anything else to do */
         pthread_mutex_lock( &col->lock );
@@ -171,7 +169,7 @@ void hss_thread_issue_work(struct thread_collection *col,
     size_t extra_space;
     if (size_detail_structure < MIN_DETAIL) extra_space = 0;
     else extra_space = size_detail_structure - MIN_DETAIL;
-    struct work_item *w = malloc(sizeof *w + extra_space);
+    struct work_item *w = hss_malloc(sizeof *w + extra_space, mu_work_item);
 
     if (!w) {
         /* Can't allocate the work structure; fall back to single-threaded */
@@ -218,7 +216,7 @@ void hss_thread_issue_work(struct thread_collection *col,
                     /* Hmmm, couldn't spawn it; fall back */
                     default: /* On error condition */
                     pthread_mutex_unlock( &col->lock );
-                    free(w);
+                    hss_free(w);
                     function( detail, col );
                     return;
                 }
@@ -276,7 +274,7 @@ void hss_thread_done(struct thread_collection *col) {
 
     pthread_mutex_destroy( &col->lock );
     pthread_mutex_destroy( &col->write_lock );
-    free(col);
+    hss_free(col);
 }
 
 void hss_thread_before_write(struct thread_collection *col) {
diff --git a/hss_verify.c b/hss_verify.c
index d86015a..4ed17d3 100644
--- a/hss_verify.c
+++ b/hss_verify.c
@@ -13,6 +13,7 @@
 #include "hss_thread.h"
 #include "hss_internal.h"
 #include "hss.h"
+#include "hss_fault.h"
 
 /* The HSS public key consists of: */
 /* Number of levels (1-8) (4 bytes) */
@@ -30,6 +31,8 @@ void validate_internal_sig(const void *data,
                                struct thread_collection *col) {
     const struct verify_detail *d = data;
 
+    hss_set_level( d->tree_level );
+
     bool success = lm_validate_signature(d->public_key,
                                          d->message, d->message_len, false,
                                          d->signature, d->signature_len);
@@ -88,7 +91,11 @@ bool hss_validate_signature(
     /* key to use to validate the top level signature */
     public_key += 4;
 
-    struct thread_collection *col = hss_thread_init(info->num_threads);
+    /* Note: if we're verifying a 1-level tree, there really is no point */
+    /* to multithreading */
+    int num_threads = (levels == 1) ? 1 : info->num_threads;
+
+    struct thread_collection *col = hss_thread_init(num_threads);
     enum hss_error_code got_error = hss_error_none;
     struct verify_detail detail;
     detail.got_error = &got_error;
@@ -153,6 +160,7 @@ bool hss_validate_signature(
         detail.message_len = l_pubkeylen;
         detail.signature = l_sig;          /* Signature A */
         detail.signature_len = l_siglen;
+        detail.tree_level = i;
         hss_thread_issue_work( col, validate_internal_sig,
                                &detail, sizeof detail );
 
@@ -179,6 +187,7 @@ bool hss_validate_signature(
     detail.message_len = message_len;  /* validation */
     detail.signature = signature;      /* Bottom level LMS signature */
     detail.signature_len = signature_len;
+    detail.tree_level = levels-1;
     hss_thread_issue_work( col, validate_internal_sig,
                            &detail, sizeof detail );
 
diff --git a/hss_verify_inc.c b/hss_verify_inc.c
index d067c31..a8a2ca1 100644
--- a/hss_verify_inc.c
+++ b/hss_verify_inc.c
@@ -94,6 +94,7 @@ bool hss_validate_signature_init(
             detail.message_len = l_pubkeylen;
             detail.signature = l_sig;
             detail.signature_len = l_siglen;
+            detail.tree_level = i;
             hss_thread_issue_work( col, validate_internal_sig,
                                    &detail, sizeof detail );
 
diff --git a/lm_ots.h b/lm_ots.h
index 4fcf690..a919084 100644
--- a/lm_ots.h
+++ b/lm_ots.h
@@ -55,6 +55,18 @@ bool lm_ots_generate_signature(
     const void *message, size_t message_len, bool prehashed,
     unsigned char *signature, size_t signature_len);
 
+/*
+ * Same, but instead of writing the signature, we verify that we're written
+ * is precisely when we expect
+ */
+bool lm_ots_doublecheck_signature(
+    param_set_t lm_ots_type,
+    const unsigned char *I,
+    merkle_index_t q,
+    struct seed_derive *seed,
+    const void *message, size_t message_len,
+    const unsigned char *signature, size_t signature_len);
+
 /* The include file for the verification routine */
 #include "lm_ots_verify.h"
 
diff --git a/lm_ots_sign.c b/lm_ots_sign.c
index 52639bd..3674f8f 100644
--- a/lm_ots_sign.c
+++ b/lm_ots_sign.c
@@ -11,6 +11,7 @@
 #include "hss_zeroize.h"
 #include "hss_derive.h"
 #include "hss_internal.h"
+#include "hss_fault.h"
 
 bool lm_ots_generate_public_key(
     param_set_t lm_ots_type,
@@ -25,6 +26,7 @@ bool lm_ots_generate_public_key(
         return false;
 
     /* Start the hash that computes the final value */
+    hss_set_hash_reason(h_reason_ots_pkgen);
     union hash_context public_ctx;
     hss_init_hash_context(h, &public_ctx);
     {
@@ -54,6 +56,7 @@ bool lm_ots_generate_public_key(
         hss_seed_derive( buf + ITER_PREV, seed, i < p-1 );
         put_bigendian( buf + ITER_K, i, 2 );
         /* We'll place j in the buffer below */
+        hss_set_hash_reason(h_reason_ots_pkgen);
         for (j=0; j < (1<<w) - 1; j++) {
             buf[ITER_J] = j;
 
@@ -121,6 +124,7 @@ bool lm_ots_generate_signature(
     /* Compute the initial hash */
     unsigned char Q[MAX_HASH + 2];
     if (!prehashed) {
+        hss_set_hash_reason(h_reason_ots_sign);
         hss_init_hash_context(h, &ctx);
 
         /* First, we hash the message prefix */
@@ -154,6 +158,7 @@ bool lm_ots_generate_signature(
         hss_seed_derive( tmp + ITER_PREV, seed, i<p-1 );
         unsigned a = lm_ots_coef( Q, i, w );
         unsigned j;
+        hss_set_hash_reason(h_reason_ots_sign);
         for (j=0; j<a; j++) {
             tmp[ITER_J] = j;
             hss_hash_ctx( tmp + ITER_PREV, h, &ctx, tmp, ITER_LEN(n) );
@@ -165,3 +170,85 @@ bool lm_ots_generate_signature(
 
     return true;
 }
+
+#if FAULT_RECOMPUTE
+bool lm_ots_doublecheck_signature(
+    param_set_t lm_ots_type,
+    const unsigned char *I, /* Public key identifier */
+    merkle_index_t q,       /* Diversification string, 4 bytes value */
+    struct seed_derive *seed,
+    const void *message, size_t message_len,
+    const unsigned char *signature, size_t signature_len) {
+
+    /* Look up the parameter set */
+    unsigned h, n, w, p, ls;
+    if (!lm_ots_look_up_parameter_set( lm_ots_type, &h, &n, &w, &p, &ls ))
+        return false;
+
+    /* Check if we have enough room */
+    if (signature_len < 4 + n + p*n) return false;
+
+    /* Check the parameter set to the signature */
+    if (lm_ots_type != get_bigendian( signature, 4 )) return false;
+
+    union hash_context ctx;
+    /* Select the randomizer */
+    unsigned char randomizer[ MAX_HASH ];
+    lm_ots_generate_randomizer( randomizer, n, seed);
+    if (0 != memcmp( randomizer, signature+4, n )) {
+        goto failed;
+    }
+
+    /* Compute the initial hash */
+    unsigned char Q[MAX_HASH + 2];
+    hss_set_hash_reason(h_reason_ots_sign);
+    hss_init_hash_context(h, &ctx);
+
+    /* First, we hash the message prefix */
+    unsigned char prefix[MESG_PREFIX_MAXLEN];
+    memcpy( prefix + MESG_I, I, I_LEN );
+    put_bigendian( prefix + MESG_Q, q, 4 );
+    SET_D( prefix + MESG_D, D_MESG );
+    memcpy( prefix + MESG_C, randomizer, n );
+    hss_update_hash_context(h, &ctx, prefix, MESG_PREFIX_LEN(n) );
+
+        /* Then, the message */
+    hss_update_hash_context(h, &ctx, message, message_len );
+    hss_finalize_hash_context( h, &ctx, Q );
+
+    /* Append the checksum to the randomized hash */
+    put_bigendian( &Q[n], lm_ots_compute_checksum(Q, n, w, ls), 2 );
+
+    int i;
+    unsigned char tmp[ITER_MAX_LEN];
+
+    /* Preset the parts of tmp that don't change */
+    memcpy( tmp + ITER_I, I, I_LEN );
+    put_bigendian( tmp + ITER_Q, q, 4 );
+    
+    hss_seed_derive_set_j( seed, 0 );
+    for (i=0; i<p; i++) {
+        put_bigendian( tmp + ITER_K, i, 2 );
+        hss_seed_derive( tmp + ITER_PREV, seed, i<p-1 );
+        unsigned a = lm_ots_coef( Q, i, w );
+        unsigned j;
+        hss_set_hash_reason(h_reason_ots_sign);
+        for (j=0; j<a; j++) {
+            tmp[ITER_J] = j;
+            hss_hash_ctx( tmp + ITER_PREV, h, &ctx, tmp, ITER_LEN(n) );
+        }
+        if (0 != memcmp( &signature[ 4 + n + n*i ], tmp + ITER_PREV, n )) {
+            goto failed;
+        }
+    }
+
+    hss_zeroize( &ctx, sizeof ctx );
+
+    return true;
+
+failed:
+    hss_zeroize( &ctx, sizeof ctx );
+
+    return false;
+}
+#endif
diff --git a/lm_ots_verify.c b/lm_ots_verify.c
index 9706edd..605ce0d 100644
--- a/lm_ots_verify.c
+++ b/lm_ots_verify.c
@@ -8,6 +8,7 @@
 #include "hash.h"
 #include "endian.h"
 #include "common_defs.h"
+#include "hss_fault.h"
 
 /*
  * This validate a OTS signature for a message.  It doesn't actually use the
@@ -35,6 +36,7 @@ bool lm_ots_validate_signature_compute(
     const void *message, size_t message_len, bool message_prehashed,
     const unsigned char *signature, size_t signature_len,
     param_set_t expected_parameter_set) {
+    hss_set_hash_reason(h_reason_ots_sign);
     if (signature_len < 4) return false;  /* Ha, ha, very funny... */
 
     /* We don't trust the parameter set that's in the signature; verify it */
diff --git a/programming.notes b/programming.notes
index d2a4fa0..4edf4f8 100644
--- a/programming.notes
+++ b/programming.notes
@@ -19,6 +19,59 @@ sources.
   do leave to the user (such as the functions to load/store the private key)
   are there in attempt to make it easier to use the system securely.
 
+- Configuation file config.h
+  This file contains a set of tweakable parameters that the application may
+  want to customize for its use (and hardware platform).  Here are the current
+  flags:
+  MAX_THREAD This is the maximum number of child threads we will ever try
+          to spawn.
+  DEFAULT_THREAD This is the number of child threads we can attempt to spawn
+          if the application doesn't tell us otherwise.
+  FAULT_CACHE_SIG If this flag is set, then his package attempts to implement
+          protection against fault injectio attacks; it works by hashing the
+          internal signatures (when they are first computed) and storing them
+          in the private key; if we later recompute them, we check to see if
+          they have changed (and if so, we declare an error).  If you have an
+          attacker that can plausibly make you miscompute something, this (or
+          the next one) is of interest.  Turning this on does increase the
+          size of the private key; in addiiton, if you're doing autoreserve,
+          this may increase the number of times we update the private key.
+          This is fairly cheap, and so this is turned on in the release
+          package (and you can turn it off, if you want).
+  FAULT_CACHE_LEN If FAULT_CACHE_SIG is set, then this determines the size of
+          the hash we store in the private key; larger values make it harder
+          for an attacker that can precisely target faults to get one past
+          this logic; smaller values use up less room in the private key.
+  FAULT_RECOMPUTE If this flag is set, then this package attempts to implement
+          protection against fault injectio attacks; it works by computing the
+          public keys and internal signatures twice (and indepdently); if they
+          don't match, then an error is declared. If you have an attacker
+          that can plausibly make you miscompute something, this (or the
+          previous one) is of interest.  Turning this on does increase the
+          time taken and the memory used.
+  SECRET_METHOD This controls how seeds are generated
+          Note that changing this (or SECRET_MAX) will invalidate any existing
+          private keys.
+          0 -> Use the algorthm defined in Appendix A of the draft
+          1 -> Use a side channel resisteant process which limits the number
+               of distint hashes a secret value is used in.
+  SECRET_MAX If SECRET_METHOD==1, this we limit the number of hashes a secret
+               is sed with to 2**SECRET_MAX
+  USE_OPENSSL This defines whether we attempt to link to OpenSSL (only the
+          SHA256 implementation), or whether we use a slower portable C
+          version
+  ALLOW_VERBOSE If set, we insert debugging code that, if the global
+          hss_verbose is also set, we print out hash inputs and outputs.
+          This works only if USE_OPEN_SSL==0.
+          Obviously, this is not something you want to do on delivered code,
+          and is *really* chatty (and I'd recommend you turn off
+          multithreading during the test).
+  TEST_INSTRUMENTATION If set, we insert instrumentation code.  Currently,
+          the only instrumentation is to test mallocs and to delibberately
+          introduce faults into the hash functions (to test out the fault
+          protection code).  Obviously, the latter is not something you want
+          to do on delivered code,
+
 - Merkle trees, subtrees
   The core of this system is the signer, and the working key.  To sign
   a message, we generate the OTS signature for that message, and generate
@@ -103,6 +156,8 @@ sources.
   operations than expected (e.g. one of its ACTIVE_TREEs is on the right
   side, and so there's no need for us to update the BUILDING_TREE), and so
   the caller doesn't see any unexpected expense at all.
+  A third difference (if fault recomputation is on) is that we don't keep a
+  redundant copy of the top level LMS tree.
   When we load a private key in memory, the bulk of the work is initializing
   the subtrees to be what we'd expect them to hold, based on what the current
   count is.  Actually, we advance the building and next trees to be slightly
@@ -167,22 +222,52 @@ sources.
   this subsystem, and what they really mean
   private key
       This is what the raw private key looks like.  It's not a formal C
-      structure; instead, it is a byte array, currently 48 bytes long,
-      split up into:
+      structure; instead, it is a byte array, split up into:
+      - 3 bytes of key format; this is to allow us to modify the key format
+        in the future, without causing disasters if someone tries to load
+        a previous key format.  The 3 bytes are:
+           0x01 - The current format
+           SECRET_MAX - The SECRET_MAX this key was generated with; 0xff
+                   if SECRET_METHOD == 0
+           FAULT_CACHE_LEN - The FAULT_CACHE_LEN this key uses; 0x00 if
+                   FAULT_CACHE_SIG == 0
+      - 1 byte of L; the number of tree levels in the HSS structure; between
+        1 and 8
       - 8 bytes of count; this is the state that gets updated with every
         signature, and consists of a bigendian count of the number of
         signatures so far.  By convention, a setting of 0xffffffffffffffff
-        means 'we're used up all our signatures'
-      - 8 bytes of parameter set, in a compressed format.  This is here so
+        means 'we're used up all our signatures'.
+      - 8 bytes of checksum; this is the first 8 bytes of the SHA-256 of
+        the private key (skipping the checksum).  This is here to detect
+        when the private key is corrupted.  It can't detect rollbacks or
+        malicious modifications of the private key; it will detect
+        accidental ones
+      - (L-1) * FAULT_CACHE_LEN of hashed signed public keys; when we
+        generate a signed public key (and it is possible that, on a
+        key reload, we'll recreate that signed public key), we'll hash
+        the signed public key, and place the first FAULT_CACHE_LEN bytes
+        here (with the lowest level public key being the first).  The
+        public keys here are from the count listed in the count field
+        above (which might be in front of the count in the working key
+        if there is a reservation active).  Not present if
+        FAULT_CACHE_SIG == 0
+      - 8 bytes of max count; this is the maximum value that we'll allow
+        for the count.  This is here so that, in the future, we can issue
+        private keys that are of limited range (the deligated subkey idea)
+      - L bytes of parameter set, in a compressed format.  This is here so
         that the application needn't tell us what the parmaeter set when
         loading a key (and can't get it wrong)
       - 32 bytes of random seed; this is where all the security comes from.
         It is 32 bytes (256 bits) so Grover's algorthm can't recover it.
       This is a flat set of bytes, because we hand it to read_private_key
       and update_private_key routines, which are expected to read/write
-      them to long term storage.
-      Random musing: should we have included a version parameter (so we
-      could change the format without breaking things???)
+      them to long term storage.  This key is a total of
+      52 + L*(FAULT_CACHE_LEN+1) bytes long.
+      We put things in this order to place the frequently modified fields
+      up front.  Hence, if we need to update the hashed signatures, we're
+      able to ask the NV write routine to update a minimal number of bytes
+      (of course, the NV write routine may decide just to update the entire
+      key; we're just giving the NV write the option).
   struct hss_working_key
       This structure holds all the current state of a loaded private key.
       It contains a copy of the private key (so we can write it out as
@@ -202,6 +287,11 @@ sources.
       And, when we swap the active subtree with a building/next, we move
       the stack pointer from the old building/next subtree to the new
       (as the new active one doesn't need it).
+      In addition, the tree[redux][level] points to the tree level
+      data structure; for redux == 1, this points to the redundant
+      tree level (for fault hardening).  We also special case
+      tree[0][0] == tree[1][0], as the top level tree needn't be
+      redundant (as we never sign its public key).
   struct merkle_level
       Actually, this is not a Merkle tree (even though the code typically
       names variables of this type 'tree').  Instead, it stands for a
@@ -328,9 +418,10 @@ sources.
            the environment (rng failure, nvread/write failure, malloc
            failure)
      - hss_range_my_problem; these are errors caused by something internal
-           to this package; currently, they're all dubbed hss_error_internal,
-           and are caused by either something scribbling over our memory
-           or a bug somewhere
+           to this package; currently, they're either hss_error_internal,
+           caused by either something scribbling over our memory
+           or a bug somewhere; or hss_error_fault_detected (the fault
+           detection logic detected a miscompute).
    struct seed_derive
      This is the structure we use to derive keys in a potentially side
      channel resistant manner.  There are two different versions of this
@@ -357,7 +448,7 @@ sources.
     The programmer can modify the SECRET_METHOD/SECRET_MAX settings to
     change the efficiency/side channel resistance mix; however any
     such change modifies the mapping between seeds and private LM-OTS
-    values (that is, your private keys no longer work).
+    values (that is, your existing private keys no longer work).
 
 - Side channel resistance and key derivation.
   We are inherently resistant to timing and cache-based side channel attacks
@@ -380,6 +471,49 @@ sources.
   children.  Decreasing SECRET_MAX takes a bit more time (as the tree becomes
   deeper), however even SECERET_MAX==1 (smallest allowed value) isn't that
   expensive.
+
+- Protection against Fault attacks
+  One concern is that if we were to miscompute a hash (and hence miscompute
+  an LMS public key), we might end up signing one public key with the parent
+  OTS key, and then at a different time, compute it correctly, and thus sign
+  a different public key with the same OTS key.  This issue was first raised
+  by https://eprint.iacr.org/2018/674.pdf in the context of Sphincs; the same
+  concern is valid here.
+  This package implements optional protection against this fault attack,
+  enabled by the FAULT_RECOMPUTE or the FAULT_CACHE_SIG flags.  We actually
+  implement two separate protections; the user can choose to use either or
+  both.  BTW: why did we bother with two separate methods?  Well, I first
+  implemented the FAULT_RECOMPUTE method; after that work was done, I realized
+  that the FAULT_CACHE_SIG method would also work (and be cheaper), and so I
+  implemented that (and had no good reason to back out the FAULT_RECOMPUTE
+  code).
+  If the FAULT_CACHE_SIG flag is set, then the first time we generate an
+  signed public key (to insert into an HSS signature), we store a hash of that
+  signed public key in the private key; if we ever regenerate it (because of a
+  reload), we'll compare the new hash with what's in the private key (and fail
+  if they differ).  One exception, if we generate a signed public key and find
+  that we'll never recreate it (because the current reservation covers the
+  entire use of that public key), we won't write it.
+  If the FAULT_RECOMPUTE flag is set, we will redundently recompute the LMS
+  public keys (other than the top one), and if they get out of sync, fail.
+  Because a fault attack works by having two different public keys signed by
+  the same OTS private key, either of these methods protect against this.
+  You enable this protection by setting either the FAULT_RECOMPUTE or the
+  FAULT_CACHE_SIG in config.h to 1 and recompiling (or you could turn on
+  both, if you are extra paranoid, and don't mind both costs).
+  The costs of FAULT_CACHE_SIG: this increases the size of the private key.
+  In addition, if you're using the autoreserve feature (to decrease the
+  number of writes to NVRAM), this may increase it somewhat (because sometimes
+  we'll need to update the hsahes in the private key, even when our
+  reservation isn't quite up yet.  However, the additional computational costs
+  are minimal.
+  The costs of FAULT_RECOMPUTE: we increase both the signature generation
+  time (althogh multithreading helps here; we place the additional
+  computation into additional threads) and key load time (multithreading
+  doesn't help - we already max out the number of threads), and we
+  increase our memory footprint somewhat.  Note that if you use L=1, then
+  fault attacks aren't actually a concern (and you also don't get charged
+  the additional computational costs).
   
 - Threading architecture
   We support doing operations on multiple threads, however we'd rather not
@@ -440,8 +574,9 @@ sources.
   Also, while the key generation and loading can take advantage of as many
   threads as they can get, the signature generation and verification logic
   can't.  The signature generation logic is limited by the number of subtree
-  levels in the bottom merkle tree (plus one); the signature verification
-  logic is limited to the number of merkle levels.
+  levels in the bottom merkle tree (possibly times 2 if you're doing fault
+  hardening), plus one; the signature verification logic is limited to the
+  number of merkle levels.
 
 - Use of malloc
   If you go through the code, you'll see an occasional call to malloc.  In
@@ -458,10 +593,10 @@ sources.
   of memory, hence we feel we don't need a plan B there.
 
 - Use of Variable Length Arrays
-  Now, we used to use VLAs (a C99 language feature) at places.  However,
-  someone's compiler couldn't handle them (even though they implemented the
-  rest of the C99 features we used), and so we went and reworked the code to
-  remove them.  In any case, removing the VLAs might make this code a bit more
+  We used to use VLAs (a C99 language feature) at places.  However, someone's
+  compiler couldn't handle them (even though it implemented the rest of the
+  C99 features we used), and so we went and reworked the code to remove them.
+  In any case, removing the VLAs might make this code a bit more
   small-end-device friendly (as those small devices tend not to have huge
   stacks).
 
@@ -475,39 +610,78 @@ sources.
   someone to cause us to misbehave by modifying the aux data).
 
 - Use of globals
-  There are no globals (other than the optional debugging flag hss_verbose).
-  All memory is either a buffer provided by the calling application,
-  dynamically allocated (malloc), or automatic (stack).  Globals are evil,
-  reentrancy is good.  The regression code does have globals (for things like
-  coordinating with the randomness generator; no normal program has any need
-  for that); the regression code isn't intended for use for other programs...
+  There are no globals (other than the optional debugging flag hss_verbose
+  and instrumentation code, such as hash_fault_*).  All memory is either a
+  buffer provided by the calling application, dynamically allocated (malloc),
+  or automatic (stack).  Globals are evil, reentrancy is good.  The regression
+  code does have globals (for things like coordinating with the randomness
+  generator; no normal program has any need for that); the regression code
+  isn't intended for use for other programs...
 
 - Use of floating point
-  Crypto code hardly ever uses floating point.  However, we're an exception;
-  in the hss_generate.c function, we do actually do some float point
-  computations; we do this to figure out a reasonable way to split the
-  building task between threads (and for this task, the imprecision inherent
-  in floating point is not a problem; if two ways of splitting the task are
-  so close in cost that the rounding error actually makes a difference, it
-  doesn't really matter which way we go).  Now, we include a macro
-  (DO_FLOATING_POINT) which disables the use of floating point; a platform
-  that does not support floating point can set it to 0, and that code is
-  commented out.  Now, if you use threading, you really want DO_FLOATING_POINT
-  If you don't, it doesn't matter for performance, and actually, turing it off
-  comments out quite a bit of code that you doesn't actually buy you anything;
-  it doesn't matter how we divide tasks between threads if the same thread
-  will end up performing them all anyways...
+  Crypto code hardly ever uses floating point (except perhaps to speed up
+  integer multiplication).  However, we're an exception; in the hss_generate.c
+  function, we do actually do some float point computations; we do this to
+  figure out a reasonable way to split the building task between threads (and
+  for this task, the imprecision inherent in floating point is not a problem;
+  if two ways of splitting the task are so close in cost that the rounding
+  error actually makes a difference, it doesn't really matter which way we
+  go).  Now, we include a macro (DO_FLOATING_POINT) which disables the use of
+  floating point; a platform that does not support floating point can set it
+  to 0, and that code is commented out.  Now, if you use threading, you really
+  want DO_FLOATING_POINT If you don't, it doesn't matter for performance, and
+  actually, turing it off comments out quite a bit of code that you doesn't
+  actually buy you anything; it doesn't matter how we divide tasks between
+  threads if the same thread will end up performing them all anyways...
   We also use floating point in the regression code; to figure out when to
   update the displayed % completed.
 
 - Debugging
   Good luck...
 
+- Instrumentation
+  config.h has a TEST_INSTRUMENTATION flag; if turned on, this enables some
+  additional test infrastructure that allows us to test various error
+  conditions.  This instrumentation should never be enabled in release code;
+  it does allow the regression tests to check out some corner cases.
+  Currently, the instrumentation code enables two things.
+  - It enables malloc checking; it does some simple checks of the malloc's
+    and free's (making sure we don't assume a zero malloc buffer, checking
+    for free'ing correctness (e.g. no double free's), buffer
+    overwrites/underwrites, and memory leaks.  The API we use internally
+    is in hss_malloc.h; if TEST_INSTRUMENTATION is off, hss_malloc and
+    hss_free calls are directly translated into malloc and free.
+  - It allows us to inject hash failures; that is, at a controllable time, our
+    code can miscompute a hash.  Obviously this need not be in a release
+    version; it is useful if we want to double-check whether our
+    fault-hardening code is working as advertised.  The API used is in
+    hss_fault.h, along with the global hss_fault_enabled.  The
+    hss_fault_enabled is the overall mode; with 0 being "never fault", 1 being
+    "fault at the specified time", and 2 being "always fault".  In the case of
+    hash_fault_enabled == 1, we characterize the hashes based on level (which
+    is the Merkle tree level within the HSS hierarchy, with 0 being the
+    topmost; 0 is used for hashes computed outslide the HSS hierarchy), and
+    hash_fault_reason being the reason we're doing the hash (and hss_fault.h
+    lists 9 different cateogiies).  We characterize hashes this way so that
+    test_fault.c can tareget specific hashes (and test failures across the
+    hierarchy).  In the code, we'll call hss_set_level(int) when we know we'll
+    be working at s specific HSS level (and mostly we don't pass the level to
+    the low level routines, so a higher level routine generally does the
+    call); when we know which hash we're doing, we'll call
+    hss_set_hash_reason.
+
 - Regression tests
   This package includes the test_hss executable, which is meant to be a set
   of regression tests for this package.  It ought to be run early and often
   (with "test_hss all" being a good default), if not in -full mode, it's
   relatively quick.
+  We've include the fault and the malloc tests; both run only if the
+  instrumentation code is enabled (as both rely on additional testing code not
+  in the release version); in addition, the fault test only if fault
+  protection is enabled (the test is designed to test the effectiveness of
+  fault protection, and it'll fail if it's not present).
+  If the instrumentation is on, we will (at the end of the tests) check to
+  see if there were any memory leaks (and if so, announce thar).
 
   The usage is:
       test_hss [-f] [-q] [-full] test_1 test_2 test_3
@@ -530,14 +704,16 @@ sources.
          On my test machine, 'test_hss all' currently takes about 70 seconds.
 
   Now, there are things that the regression tests currently don't test:
-  - Do we assume malloc gives us an initiallized buffer?
   - Do we handle malloc failures as designed?
   - How about thread spawn failures?  We're supposed to handle those
     transparently
-  - Do we have any memory leaks?
   - We're supposed to be able to limit the number of times we hash any
     specific secret; do we actually abide by that?
-  Testing those would require more infrastructure than we have right now.
+  - We're supposed to zeroize anything that would allow a forgery before
+    freeing it; do we do so?
+  Testing those would require more infrastructure than we have right now
+  (although inserting those based on the TEST_INSTRUMENTATION flag would
+  be the obvious place to start).
   Also, it might not be that bad of an idea to run a code-coverage tool to
   check out how much of the code the regression tests actually tests.
 
@@ -547,6 +723,8 @@ sources.
 
   common_defs.h		This is a central spot to put definitions of general
 			interest of the entire subsystem
+  config.h              Common file that contains configurable options that
+                        the user may wish to tweak
   demo.c		This is an example program that uses this subsystem; it
 			implements a simple file signer.  Note: because it
 			doesn't get that great of randomness (due to the need
@@ -589,6 +767,13 @@ sources.
   hss_derive.[ch]	This is the structure that does key derivation.  It
 			allows a trade-off between efficiency, and side channel
 			resistance.
+  hss_fault.h           This contains the interface to the fault error
+                        injection logic; that is, routines that various
+                        functions use to declare why they're doing a hash.
+                        This information allows the error injection logic
+                        to target a specific hash location (rather than making
+                        a random hash fail).  The actual code to do fault
+                        error injection is in hash.c
   hss_generate.c	This is the routine that takes an allocated working key
 			(hss_alloc.c), and loads a private key into it.  Sound
 			simple?  Well, if you go through this, you'll find out
@@ -597,6 +782,9 @@ sources.
 			to this subsystem, but shouldn't be used outside of it.
   hss_keygen.c		This is the routine that generates a public/private
 			keypair.
+  hss_malloc.[ch]	These are routines that do our instrumented checking
+			of malloc/free.  If instrumentation is turned off, calls
+			are directly routed to the standard malloc/free.
   hss_param.c		These are routines that deal with parameter sets.
   hss_reserve.[ch]	These are routines that deal with reservations, and
 			updating the sequence number in a private key.
diff --git a/read.me b/read.me
index d334055..13b0c11 100644
--- a/read.me
+++ b/read.me
@@ -102,7 +102,7 @@ Statefulness; we do several things to mitigate this issue:
     generation, signature generation or reservation (see below)), we have
     the application provide a function that is supposed to write the new
     state to secure storage; the signature is not released to the application
-    (actually, not even generated) unless that function claims success.
+    unless that function claims success.
   - There's quite a lot of state involved with this package.  However, instead
     of writing all that to secure storage, what we do is write a summary.
     On a program reload, we read ("load") that summary back into memory,
@@ -116,7 +116,7 @@ Statefulness; we do several things to mitigate this issue:
     update.  Hence, we can move much of the work to time where we would have
     been idle. 
 Signature size; we can't actually do anything about the signature format
-(that's fixed in the draft), however what we can do is try to make selecting
+(that's fixed in the RFC), however what we can do is try to make selecting
 a parameter set with a short signature reasonable:
   - Efficiency; we try to make it efficient; efficiency allows you to use
     more aggressive parameter sets (chiefly, ones with W=8, and relatively few
@@ -161,12 +161,10 @@ feature-centric order, rather than a problem-centric one:
   private key into three parts:
   - A short section which is meant to be saved in secure storage (and this is
     the part which is referred to as the 'private key'; this includes the
-    number of signatures generated so far.  It's currently 48 bytes long
-    (with the part that needs to be actually updated only 8 bytes), we assume
-    that it is updated atomically.  These 48 bytes consist of an 8 byte count
-    of the number of sigantures generated so far, 8 bytes of a compressed
-    version of the parameter set, and 32 bytes of 'seed' (a random value that
-    is used to derive every secret)
+    number of signatures generated so far.  It's currently 70 bytes long (for
+    a two level HSS tree with the default configuration).  The part that needs
+    to be updated dynamically is only 20-28 bytes; we assume that it is
+    updated atomically.
   - A longer section (the "working key") that resides in memory; we assume
     that this is private, but need not be saved into long term (nonvolatile)
     storage.  This section include the Merkle tree contents (actually, a
@@ -234,13 +232,14 @@ We also have an optional third section to the private key:
      full-speed-ahead' mode doesn't use that much memory (unless the bottom
      level Merkle tree is huge).  Yes, there's a delta, but not a big one.
 
-- For any function that can update the private key, we have the application
-  (the program that is using the package to sign messages) pass a function
-  ("update_private_key") and a context pointer.  If this is non-NULL, then
-  when we update the private key, we call this function, what this function is
-  expected to do is write the private key into secure storage (and pass
-  nonzero on success); if you pass a NULL function pointer, then we assume
-  that passed context pointer is actually the pointer to the private key.
+- When we load the working key into memory, we have the application (the
+  program that is using the package to sign messages) pass two functions
+  ("read_private_key" and "update_private_key") and a context pointer.  If
+  these are non-NULL, then to read the private key or to update it, we call
+  one of these functions. What these functions are expected to do is
+  read/write the private key into secure storage (and pass nonzero on
+  success); if you pass NULL function pointers, then we assume that passed
+  context pointer is actually the pointer to the private key.
 
 - Explicit implementation of the reservation primitive.  Right now, we have
   an API that states "make sure that we have N signatures reserved (so that
@@ -272,7 +271,7 @@ real applications really ought not do.
 General notes:
 
 - Advice about parameter sets: while this supports all the HSS parameter sets
-  currently allowed in draft-mcgrew-hash-sigs-08, some work better than others
+  currently allowed in RFC 8554, some work better than others
   The general recommendation is to use a few large trees (that is, with lots
   of levels), rather than using a number of levels of smaller trees; this
   package tries to make large trees not that costly (and reducing the number
@@ -348,17 +347,8 @@ General notes:
 
 - Portability of the private keys: I believe that the private keys are
   portable to different CPU architectures (that is, picking up a private
-  key on one CPU and moving it to another will work)
-  However, I don't currently promise that private keys generated by this
-  version of the package will work on future versions; we reserve the right
-  to make changes that break current created private keys.  Eventually, we'll
-  need to preserve this (possibly by including a version flag in the private
-  key); however, I don't believe that this package is mature enough yet.
-  After all, essentially everything involved with signing (with the sole
-  exception of the bottom C signature randomizer) must be derived in the exact
-  same way from the top level seed; once we start doing versioning, that means
-  we must forever support that way of deriving internal values from the secret
-  seed.
+  key on one CPU and moving it to another will work); however that hasn't been
+  tested.
 
 - Same thing with the API; eventually, we'll need to stabilize the API;
   however, I don't believe we're there yet (although with the extra_info
@@ -486,11 +476,9 @@ likely not to implement it:
      be that the master does a fresh hss_generate_working_key; there are more
      efficient ways to do it, but there are lots of corner cases that would
      require testing to make sure we've covered everything.  The subkey we
-     send to the client would be the private key plus one parameter (the
-     allowed maximum); we might want to modify our private key format to
-     include this maximum if we're serious about this (so that the secondary
-     signer could run the standard HSS code).  There are likely a number of
-     gotcha's involved; however less than the below idea.
+     send to the client would be the private key with the maximum sequence
+     number set to X+N-1.  There are likely a number of gotcha's involved;
+     however, much of the work has already been done.
   2. We'd also have a master holder, but instead of giving the entire secret
      key to the secondary signer (and tell him "please only use sequence
      numbers from X to X+N-1"), we instead gave him an entire subtree (without
@@ -585,13 +573,24 @@ likely not to implement it:
   be seen in an uninitialized buffer.  Should we also include a magic value
   as well, for some simple validity checking?  Something like that certainly
   wouldn't be fool-proof; however it might catch some simple mistakes.
- 
-- Right now, we pass the update_private_key to every hss_generate_signature
-  operation.  Would it make more sense to pass it instead when we
-  hss_load_private_key, and just store it in the working_key?  Would it
-  make sense to use the same context for read_private_key and 
-  update_private_key?
 
 - Right now, hss_extra_info has a last_signature flag.  Would it be more
   useful to have a 'signatures_remaining' count instead (and when it hits
   0, that'd mean we just wrote the last signature)?
+
+- Private key versioning; we currently do have a version flag.  At some
+  point, we may modify the private key format (which would involve
+  incrementing the version flag - it may be possible to import previous
+  private keys, however only if all the other parameters (e.g. SECRET_METHOD)
+  also lined up (which may mean that it might not be a great idea to rely
+  on that)
+
+  Two ideas aimed to reducing the amount of RAM used:
+
+- We've figured out how to store both the ACTIVE_TREE and the BUILDING_TREE
+  into the same data structure (saving some memory); should we try to
+  take advantage of that?  That may make the generate logic more complicated
+
+- For the non-bottom Merkle trees, the BDS logic (rather than fractal)
+  would make sense (takes more updates, but we don't care about that).  That
+  would save a bit of memory - should we try?
diff --git a/sha256.h b/sha256.h
index 152419a..09dfcf8 100644
--- a/sha256.h
+++ b/sha256.h
@@ -1,9 +1,7 @@
 #if !defined(SHA256_H_)
 #define SHA256_H_
 
-#define USE_OPENSSL 1   /* We use the OpenSSL implementation for SHA-256 */
-                        /* (which is quite a bit faster than our portable */
-                        /* C version) */
+#include "config.h"
 
 /* Length of a SHA256 hash */
 #define SHA256_LEN		32
diff --git a/test_fault.c b/test_fault.c
new file mode 100644
index 0000000..ec575f5
--- /dev/null
+++ b/test_fault.c
@@ -0,0 +1,640 @@
+/*
+ * This will test out the protection against fault attacks.  This LMS
+ * implementation recomputes the same hash multiple times (both between
+ * reloads and within a single reload cycle).
+ * One concern is that if we were to miscompute such a hash one of those
+ * times (and so miscompute the public key), we might end up signing one
+ * public key with an OTS key, and then later signing a different public
+ * key with the same OTS key,  This concern was first raised by
+ * https://eprint.iacr.org/2018/674.pdf in the context of Sphincs; the
+ * same concern is valid here.
+ *
+ * This package implements optional protection against this fault attack
+ * (enabled by either the FAULT_RECOMPUTE or FAULT_CACHE_SIG flags); this
+ * regression test tries to hammer at it to make sure that it gives as much
+ * protection as we would hope.
+ *
+ * Here's how this test works; if TEST_INSTRUMENTATION is enabled, then
+ * we can selectively inject hash faults (currently, we have 19-21`different
+ * categories of fault locations); when triggered, the corresponding
+ * SHA526 hash is wrong.  What we do is generate a number of HSS signatures
+ * (periodically reloading the key; a fault during a key reload needs to
+ * be checked), and for each such signature, we parse it into the LMS
+ * signatures, and record for each signature, what was the I value
+ * (public key id), and the J index; and the value that was signed (and
+ * the C randomizer).  If we ever see the same OTS private key (I and J
+ * values) signing two different messages (and C randomizer), we declare
+ * failure.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "hss.h"
+#include "test_hss.h"
+#include "hash.h"
+#include "lm_common.h"
+
+/* These 4 match the globals from the instrumentation in hash.c */
+int hash_fault_enabled;  /* 0 -> act normally */
+                         /* 1 -> miscompute a hash, as specified below */
+                         /* 2 -> miscompute all hashes */
+    /* These two specify which hashes this miscomputaton logic applies to */
+    /* if hash_fault_enabled == 1 */
+int hash_fault_level;    /* Specify the level of hashes we're interested in */
+                         /* 0 -> root LMS level */
+                         /* 1 -> the LMS tree below the root, etc */
+int hash_fault_reason;   /* The reason the hash is done */
+                         /* See hss_fault.h for the list */
+long hash_fault_count;   /* Which hash to get wrong (count) */
+                         /* 1 -> the very next hash */
+                         /* Also is decremented on every matching hash */
+                         /* computation */
+
+/*
+ * This will do an initial check if allowing this test to run makes sense
+ * - If the hash function has not been instrumented, we can't inject
+ *   failures (and so there's no point to this test)
+ * - If the code doesn't have fault hardening enabled, then it won't have
+ *   any defenses against faults, and so there's no point in checking
+ */
+bool check_fault_enabled(bool fast_flag) {
+    bool allow_test_to_go_through = true;
+
+    /* Check whether the LMS implementation claims to be hardened */
+    /* against fault attacks.  Note: to test this test to see if it'll */
+    /* detect faults, comment out the below lines */
+    if (!hss_is_fault_hardening_on( 0 )) {
+        printf( "  Fault hardening is not enabled - test of fault hardening logic skipped\n" );
+        allow_test_to_go_through = false;
+            /* Lets not bother the user with whether instrumentation was */
+            /* turned on */
+        return allow_test_to_go_through;
+    }
+
+    /* Check if we can inject a hash fault */
+    unsigned char actual[MAX_HASH];
+    hash_fault_enabled = 2;  /* Miscompute all hashes */
+    hss_hash( actual, HASH_SHA256, "abc", 3 );
+    hash_fault_enabled = 0;  /* Shut off fault testing */
+
+    /* This is SHA256("abc"), assuming that we did compute it correctly */
+    static unsigned char good_hash[32] = {
+        0xba,0x78,0x16,0xbf,0x8f,0x01,0xcf,0xea,
+        0x41,0x41,0x40,0xde,0x5d,0xae,0x22,0x23,
+        0xb0,0x03,0x61,0xa3,0x96,0x17,0x7a,0x9c,
+        0xb4,0x10,0xff,0x61,0xf2,0x00,0x15,0xad,
+    };
+
+    if (0 == memcmp( actual, good_hash, 32 )) {
+        printf( "  Test instrumentation logic not enabled - test skipped\n" );
+        printf( "      Turn on TEST_INSTRUMENTATION flag to enable\n" );
+        allow_test_to_go_through = false;
+    }
+
+    return allow_test_to_go_through;
+}
+
+/*
+ * This is the exemplar private key we'll use.
+ */
+static unsigned char private_key[HSS_MAX_PRIVATE_KEY_LEN];
+static unsigned char aux_data[1000];
+
+static bool rand_1(void *output, size_t len) {
+    /* We really don't care what this is */
+    memset( output, 0x03, len );
+    return true;
+}
+
+static bool gen_private_key(unsigned num_level,
+                            const param_set_t *lm, const param_set_t *ots,
+                            unsigned long initial_count) {
+    unsigned char public_key[HSS_MAX_PUBLIC_KEY_LEN];
+    if (!hss_generate_private_key( rand_1,
+                                     num_level, lm, ots,
+                                     0, private_key,
+                                     public_key, sizeof public_key,
+                                     aux_data, sizeof aux_data,
+                                     0 )) {
+        return false;
+    }
+        /* If requested, advance the key to the specified location */
+    if (initial_count > 0) {
+        /* Step the tree to the requested location */
+        struct hss_working_key *w = hss_load_private_key( 0, 0, private_key,
+                              0, aux_data, sizeof aux_data, 0 );
+        if (!w) return false;
+        bool success = hss_reserve_signature( w, initial_count, 0 );
+        hss_free_working_key(w);
+        if (!success) return false;
+    }
+
+    return true;
+}
+
+#define FIRST_ITER      69  /* For the first iteration, generate 69 */
+                            /* signatures between reloads.  This is sized */
+                            /* so that second reload cycle starts in one */
+                            /* bottom tree, and ends in another */
+#define SECOND_ITER   1024  /* For the second iteration, generate 1024 */
+                            /* signatures between reloads.  This is sized */
+                            /* so that second reload cycle starts in one */
+                            /* second level tree, and ends in another */
+/*
+ * This runs the standard set of key reloads and signature generation
+ * We do the same sequence each time so that we perform the same hashes
+ * each time
+ * If allow_app_to_get_stats is provided, it'll be called at specific points
+ * while the test is running
+ * If store_signature is provided, it'll be called with the generated
+ * signatures
+ *
+ * The sequence of operations it performs is:
+ * - Key load
+ * - Generate the next N signatures
+ * (Point 0)
+ * - Key load
+ * (Point 1)
+ * - Generate the next N signatures
+ * (Point 2)
+ * - Key load
+ * - Generate the next N signatures
+ *
+ * This will call allow_app_to_get_stats at points 0, 1, 2; that records
+ * the data that'll allow a later run to target the hash fault either in
+ * the second key laod or the second set of signatures (depending on which
+ * we are testing).
+ *
+ * The idea behind this sequence is to allow the fault logic to target
+ * a fault either during the second key load (between points 0 and 1), or
+ * during the second set of signatures (between points 1 and 2).
+ * That is, if a fault during a key load causes us to corrupt our internal
+ * structures (without being detected), we'll see an inconsistancy between
+ * the first set of signatures and the second.
+ * And, if a fault during a signature generation causes us to corrupt our
+ * internal structures, we'll see an inconsistancy between and second set
+ * of signatures and the third.
+ */
+static bool run_iteration(int iter, bool fail_on_error,
+           void (*allow_app_to_get_stats)(int when, void *context),
+           bool (*store_signature)(const unsigned char *signature, int len_sig,
+                                   void *context, int num_levels),
+           int num_levels,
+           void *context) {
+    struct hss_extra_info info = { 0 };
+    hss_extra_info_set_threads( &info, 1 );  /* Shut off multithreading */
+        /* We tell the code 'cause the 7th hash from now to fail'; if the */
+        /* hashes are computed in parallel, which one is the 7th becomes */
+        /* problematic */
+
+    /* Create a local copy of the private key */
+    /* No, you *REALLY* shouldn't do this in a real application */
+    unsigned char my_private_key[HSS_MAX_PRIVATE_KEY_LEN];
+    memcpy( my_private_key, private_key, HSS_MAX_PRIVATE_KEY_LEN );
+
+    int len_signature = 0;
+    unsigned char *signature = 0;
+
+    int i;
+    /*
+     * We go through 3 reload cycles; one good one, one where the fault
+     * actually happens, and then another good one
+     */
+    for (i=0; i<3; i++) {
+        if (i == 1 && allow_app_to_get_stats)
+            allow_app_to_get_stats(0, context);  /* Point 0 */
+        /*
+         * Every so often, reload the private key
+         */
+        struct hss_working_key *w = hss_load_private_key(
+                   0, 0, my_private_key,
+                   100000, aux_data, sizeof aux_data,
+                   &info);
+        if (i == 1 && allow_app_to_get_stats)
+            allow_app_to_get_stats(1, context);  /* Point 1 */
+
+        if (!w) {
+            if (fail_on_error) {
+                 /* Huh?  We haven't injected a fault */
+                 /* This should have worked */
+                 free(signature);
+                 return false;
+            }
+            /* Perhaps we failed because of an internal fault; try again */
+            continue;
+        }
+        if (!signature) {
+            len_signature = hss_get_signature_len_from_working_key(w);
+            signature = malloc(len_signature);
+            if (!signature) { hss_free_working_key(w); return false; }
+        }
+
+        /*
+         * And then generate a series of signatures
+         * On the second iteration, we go over 1024 signatures (so that
+         * we start in one top OTS key, go to the next, and then go to
+         * a third -- more chances to hit a vulnerability)
+         */
+        int j;
+        for (j=0; j<FIRST_ITER + (SECOND_ITER-FIRST_ITER)*iter; j++) {
+            bool success = hss_generate_signature(w, "abc", 3,
+                            signature, len_signature,
+                            &info );
+            if (!success) {
+                /* The signature generation failed, possibly because of */
+                /* the injected fault */
+                if (fail_on_error) {
+                     /* Hey, we weren't injecting faults; something */
+                     /* is wrong */
+                     free(signature);
+                     hss_free_working_key(w);
+                     return false;
+                }
+                continue;
+            }
+
+            if (store_signature &&
+                  !store_signature( signature, len_signature,
+                                    context, num_levels )) {
+                free(signature);
+                hss_free_working_key(w);
+                return false;
+            }
+        }
+        hss_free_working_key(w);
+
+        if (i == 1 && allow_app_to_get_stats)
+            allow_app_to_get_stats(2, context);  /* Point 2 */
+    }
+
+    free(signature);
+    return true;
+}
+
+/*
+ * This is the data structure that tracks which messages have been
+ * signed by which OTS key
+ */
+struct seen_hash {
+    struct seen_hash *link;
+    unsigned char I[16];   /* The LMS public key that signed the message */
+    unsigned char q[4];    /* The LMS-OTS index that signed it */
+    unsigned char *message; /* The message that was signed */
+    unsigned len_msg;
+    unsigned char c[32];   /* The randomizer used - because it's hashed */
+                           /* with the message, a change here will also */
+                           /* allow a forgery */
+};
+
+/*
+ * Hash table of signed messages that we've seen so far
+ */
+struct database {
+    struct seen_hash *hash_table[256];
+
+    /* These aren't actually part of the database; it's just a convenient */
+    /* way to pass these values */
+    unsigned len_sig;
+    unsigned len_pk;
+};
+
+/* The hash function we use */
+static int hash( const unsigned char *I, const unsigned char *q) {
+     return (I[0] ^ q[3]) & 0xff;
+}
+
+static void init_database(struct database *d) {
+    int i;
+    for (i=0; i<256; i++) d->hash_table[i] = 0;
+}
+
+/*
+ * This inserts a signature into the database (and checks to see if
+ * we've seen the same i/q value with a different message)
+ */
+static bool insert_database(struct database *d,
+                            const unsigned char *i, const unsigned char *q,
+                            const unsigned char *message, unsigned len_msg,
+                            const unsigned char *c) {
+    int h = hash(i, q);
+    struct seen_hash *p;
+    for (p = d->hash_table[h]; p; p = p->link) {
+        if (0 == memcmp( i, p->I, 16 ) &&
+            0 == memcmp( q, p->q, 4 )) {
+            /* We've seen this entry before */
+            if (p->len_msg == len_msg &&
+                0 == memcmp( p->message, message, len_msg ) &&
+                0 == memcmp( p->c, c, 32)) {
+                /* Exact duplicate; ignore */
+                return true;
+            }
+
+            /*
+             * We detected the event that would allow a forgery
+             */
+            printf( " Discovered same OTS index signing two different messages\n" );
+            return false;
+       }
+    }
+
+    /* Not seen before; insert it */
+    p = malloc( sizeof *p); if (!p) return false;
+    p->message = malloc( len_msg ); if (!p->message) { free(p); return false; }
+
+    memcpy( p->I, i, 16 );
+    memcpy( p->q, q, 4 );
+    memcpy( p->message, message, len_msg );
+    memcpy( p->c, c, 32 );
+    p->len_msg = len_msg;
+    p->link = d->hash_table[h];
+    d->hash_table[h] = p;
+    return true;
+}
+
+static void delete_database(struct database *d) {
+    int i;
+    for (i=0; i<256; i++) {
+        while (d->hash_table[i]) {
+            struct seen_hash *p = d->hash_table[i];
+            d->hash_table[i] = p->link;
+            free(p->message);
+            free(p);
+        }
+    }
+}
+
+/*
+ * This takes an HSS signature, parses it into the component LMS
+ * sigantures/messages, and inserts those into the databae
+ */
+static bool store_sigs( const unsigned char *signature, int len_sig,
+                                   void *context, int num_levels) {
+    struct database *d = context;
+
+    signature += 4; len_sig -= 4; /* Skip over the number of levels */
+
+    int i;
+    for (i=0; i<num_levels; i++) {
+        /* Get the I value from the public key */
+        unsigned char I[16];
+        if (i == 0) {
+            /* Root hash; we could save it from the HSS public key; but */
+            /* it's easier to pass in a fixed value (as we only see one */
+            /* root) */
+            memset( I, 0, 16 );
+        } else {
+            if (len_sig < d->len_pk) return false;
+            memcpy( I, signature+8, 16 );
+            signature += d->len_pk; len_sig -= d->len_pk;
+        }
+
+        /* Get the actual signature */
+        const unsigned char *ots_sig = signature;
+        unsigned len_ots_sig = d->len_sig;
+        if (len_sig < len_ots_sig) return false;
+        const unsigned char *c = ots_sig+8;   /* Grab the randomizer */
+        signature += len_ots_sig; len_sig -= len_ots_sig;
+
+        /* Get the message that was signed */
+        const unsigned char *message;
+        int len_msg;
+        if (i == num_levels-1) {
+            message = (void *)"abc";
+            len_msg = 3;
+        } else {
+            message = signature;
+            len_msg = d->len_pk;
+        }
+
+        /* Log it */
+        if (!insert_database(d, I, ots_sig, message, len_msg, c )) {
+            return false;
+        }
+    }
+    return true;
+}
+
+/*
+ * This records the number of matching hashes we get at the three points
+ * we've defined (before the second reload, after the second reload and
+ * after the second sequence of signature generation
+ */
+static void get_stats( int index, void *p ) {
+    unsigned long *count = p;
+    count[index] = -hash_fault_count;
+}
+
+#define NUM_REASON 9   /* Currently, the LMS code defines 9 distinct reasons */
+                       /* (see hss_fault.h for the current list) */
+
+static bool do_test( int num_level,
+                     const param_set_t *lm, const param_set_t *ots,
+                     bool fast_flag, bool quiet_flag,
+                     unsigned long start_location,
+                     float start_percent, float end_percent ) {
+    /* Create the exemplar private key */
+    if (!gen_private_key(num_level, lm, ots, start_location)) return false;
+
+    int iter;
+    int max_iter = (fast_flag ? 1 : 2);
+    int percent = 0;
+    for (iter = 0; iter < max_iter; iter++) {
+        float start_range, stop_range;
+
+        if (fast_flag) {
+            start_range = 0; stop_range = 1;
+        } else {
+            float mid_range = (float)FIRST_ITER / (float)SECOND_ITER;
+            if (iter == 0) {
+                start_range = 0; stop_range = mid_range;
+            } else {
+                start_range = mid_range; stop_range = 1;
+            }
+        }
+
+        /*
+         * This is the number of hashes done while performing
+         * the test sequence, listed by hyper tree level and
+         * hash reason
+         * Last index:
+         *  0 -> # of hashes done at the start of the second key reload
+         *  1 -> # of hashes done at the end of the second key reload
+         *  2 -> # of hashes done at the end of the second sequnce of
+         *       signature generation
+         *  3 -> total # of hashes done
+         */
+        unsigned long count_hashes[MAX_HSS_LEVELS][NUM_REASON][4];
+
+        /*
+         * Count the number of times we compute each hash reason
+         * and more particularly, when we would need to time the
+         * failure so that it happens either during the second rekey reload
+         * or during the second set of signature generation
+         */
+        int level, reason;
+        int total_tests = 0;
+        for (level = 0; level < num_level; level++) {
+            for (reason = 0; reason < NUM_REASON; reason++) {
+                hash_fault_enabled = 1;
+                hash_fault_level = level;
+                hash_fault_reason = reason;
+                hash_fault_count = 0;  /* By setting count == 0, we */
+                    /* don't actually miscompute any hashes; however */
+                    /* hash_fault_count is still decremented every */
+                    /* time we get a match */
+                bool success = run_iteration(iter, true, get_stats, 0, 0,
+                                  &count_hashes[level][reason][0]);
+                hash_fault_enabled = 0;
+                if (!success) return false;
+                count_hashes[level][reason][3] = -hash_fault_count;
+#if 0
+    /* Useful printout if you're curious */
+                printf( "%d:%d - %ld %ld %ld %ld\n", level, reason,
+                           count_hashes[level][reason][0],
+                           count_hashes[level][reason][1],
+                           count_hashes[level][reason][2],
+                           count_hashes[level][reason][3] );
+#endif
+                total_tests += count_hashes[level][reason][1] >
+                                      count_hashes[level][reason][0];
+                total_tests += count_hashes[level][reason][2] >
+                                      count_hashes[level][reason][1];
+            }
+        }
+
+        /*
+         * For each hash reason that could occur at least once, make one
+         * of those hashes fail once (about half way through)
+         */
+        int tests_run = 0;
+        for (level = 0; level < num_level; level++) {
+            for (reason = 0; reason < NUM_REASON; reason++) {
+                int z;
+                for (z = 0; z<2; z++) {
+                    /* If z = 0, we'll trigger the fault during the reload */
+                    /* If z = 1, we'll trigger the fault during a sig gen */
+                    if (count_hashes[level][reason][z] ==
+                        count_hashes[level][reason][z+1]) {
+                        /* We don't compute that specific hash type then */
+                        continue;
+                   }
+
+                   if (!quiet_flag) {
+                      float new_percent = (float)tests_run / total_tests;
+                      new_percent = (stop_range - start_range) * new_percent;
+                      new_percent = start_percent + (end_percent - start_percent) * new_percent;
+                      if (new_percent >= percent+1) {
+                          percent = (int)new_percent;
+                          printf( "    %d%%\r", percent );
+                          fflush(stdout);
+                      }
+                   }
+                   tests_run++;
+  
+                   /* In -full mode, try various places for the hash */
+                   /* function to fail; iterate from the 0% (the very */
+                   /* first hash call) to the 100% (the very last */
+                   /* hash call) in 10% increments */
+                   /* In fast mode, just do the middle hash function */
+                   int min_decade = fast_flag ? 5 : 0;
+                   int max_decade = fast_flag ? 5 : 10;
+                   int decade;
+                   long prev_count = -1;
+                   for (decade = min_decade; decade <= max_decade; decade++) {
+                        /*
+                         * Turn on the fault logic; targetting the specific
+                         * hash type, and setting the count so that it'll fail
+                         * at the spot we're testing
+                         */
+                        hash_fault_enabled = 1;
+                        hash_fault_level = level;
+                        hash_fault_reason = reason;
+                        hash_fault_count = (
+                           (10-decade) * (count_hashes[level][reason][z]+1) +
+                            decade * count_hashes[level][reason][z+1]
+                           ) / 10 ;
+                        if (hash_fault_count == prev_count) {
+                            /* This iteration would be precisely the same */
+                            /* as the previous */
+                            continue;
+                        }
+                        prev_count = hash_fault_count;
+
+                        /* Create the table of signatures we've seen */
+                        struct database seen_sigs;
+                        init_database( &seen_sigs );
+                        seen_sigs.len_sig = lm_get_signature_len(
+                                     LMS_SHA256_N32_H5, LMOTS_SHA256_N32_W2 );
+                        seen_sigs.len_pk = lm_get_public_key_len(
+                                     LMS_SHA256_N32_H5);
+
+#if 0
+    /* Useful printout to let you see what the test is trying */
+printf( "*** RUNNING TESTS with level = %d reason = %d z = %d %d0%%\n", level, reason, z, decade );
+#endif
+   
+                        /* Run the test (with the specific failure */ 
+                        bool success = run_iteration(iter, false, 0,
+                                         store_sigs, num_level, &seen_sigs);
+
+                        /* Turn off failure testing */
+                        hash_fault_enabled = 0;
+
+                        delete_database( &seen_sigs );
+
+                        /* If we detected a failure, we're done */
+                        if (!success) return false;
+                    }
+                }
+            }
+        }
+
+        if (iter+1 == max_iter) break;
+        {
+            /* Advance the exemplar private key by 511 */
+            struct hss_working_key *w = hss_load_private_key(
+                   0, 0, private_key, 0, aux_data, sizeof aux_data, 0);
+            if (!w) return false;
+            bool success = hss_reserve_signature( w, 511, 0 );
+            hss_free_working_key(w);
+            if (!success) return false;
+        }
+    }
+    return true;
+}
+
+bool test_fault(bool fast_flag, bool quiet_flag) {
+    {
+        /* Run the test using a three level tree */
+        param_set_t lm_array[3] = { LMS_SHA256_N32_H5, LMS_SHA256_N32_H5,
+                                   LMS_SHA256_N32_H5 };
+        static param_set_t ots_array[3] = { LMOTS_SHA256_N32_W2,
+                                   LMOTS_SHA256_N32_W2, LMOTS_SHA256_N32_W2 };
+        float start = fast_flag ? 10  : 5;  /* Percentages for the program */
+        float stop  = fast_flag ? 100 : 40; /* output */
+        if (!do_test( 3, lm_array, ots_array, fast_flag, quiet_flag,
+                      0UL, start, stop )) {
+            return false;
+        }
+    }
+    if (!fast_flag) {
+        /* Run the test using a seven level tree, with us stepping through */
+        /* the bottom 6 trees during the test */
+        /* This works out some of the key storage with CACHE_SIG */ 
+        param_set_t lm_array[7] = { LMS_SHA256_N32_H5, LMS_SHA256_N32_H5,
+                                   LMS_SHA256_N32_H5, LMS_SHA256_N32_H5,
+                                   LMS_SHA256_N32_H5, LMS_SHA256_N32_H5,
+                                   LMS_SHA256_N32_H5 };
+        static param_set_t ots_array[7] = { LMOTS_SHA256_N32_W2,
+                                   LMOTS_SHA256_N32_W2, LMOTS_SHA256_N32_W2,
+                                   LMOTS_SHA256_N32_W2, LMOTS_SHA256_N32_W2,
+                                   LMOTS_SHA256_N32_W2, LMOTS_SHA256_N32_W2 };
+        if (!do_test( 7, lm_array, ots_array, fast_flag, quiet_flag,
+                      (1UL<<30) - 100, 50.0, 100.0 )) {
+            return false;
+        }
+    }
+
+    if (!quiet_flag) printf( "\n" );
+
+    return true;
+}
diff --git a/test_h25.c b/test_h25.c
index bd37a3b..c525d7a 100644
--- a/test_h25.c
+++ b/test_h25.c
@@ -64,7 +64,7 @@ bool test_h25(bool fast_flag, bool quiet_flag) {
         printf( "  Generated public key\n" ); fflush(stdout);
     }
 
-    struct hss_working_key *w = hss_load_private_key(NULL, privkey,
+    struct hss_working_key *w = hss_load_private_key(NULL, NULL, privkey,
                        100000, aux, sizeof aux, 0 );
     if (!w) {
         printf( "Error loading working key\n" );
@@ -81,7 +81,7 @@ bool test_h25(bool fast_flag, bool quiet_flag) {
         char message[30];
         sprintf( message, "Message %ld", i );
         unsigned message_len = strlen(message);
-        bool success = hss_generate_signature( w, NULL, privkey,
+        bool success = hss_generate_signature( w,
                       message, message_len,
                       sig, sig_size, 0 );
 
diff --git a/test_hss.c b/test_hss.c
index cc7bd96..fe4fd39 100644
--- a/test_hss.c
+++ b/test_hss.c
@@ -6,6 +6,7 @@
 #include <string.h>
 #include <stddef.h>
 #include "test_hss.h"
+#include "hss.h"
 
 /*
  * This is the list of tests we know about
@@ -16,7 +17,7 @@ static struct {
     const char *test_name;             /* Extended description */
     bool warn_expense;                 /* Should we warn that this test */
                                        /* will take a while in -full mode */
-    bool (*test_enabled)(bool);        /* Check if this tests is enabled */
+    bool (*test_enabled)(bool);        /* Check if this test is enabled */
 } test_list[] = {
     { "testvector", test_testvector, "test vectors from the draft", false },
     { "keygen", test_keygen, "key generation function test", false },
@@ -32,6 +33,8 @@ static struct {
     { "thread", test_thread, "threading logic test", false,
         check_threading_on },
     { "h25", test_h25, "H=25 test", true, check_h25 },
+    { "fault", test_fault, "fault test", true, check_fault_enabled },
+    { "update", test_update, "NVRAM update test", false },
  /* Add more here */  
 };
 
@@ -119,5 +122,14 @@ int main( int argc, char **argv ) {
         exit(EXIT_FAILURE);  /* FAILURE == We didn't pass the tests */
     }
 
-    return run_tests( tests_to_run, force_tests, fast_flag, quiet_flag );
+    int success = run_tests( tests_to_run, force_tests,
+                             fast_flag, quiet_flag );
+
+    /* Now that we've run the tests, check if there were memory leaks */
+    if (!hss_report_memory_leak()) {
+        /* A reported memory leak counts as a failure */
+        success = EXIT_FAILURE;
+    }
+
+    return success;
 }
diff --git a/test_hss.h b/test_hss.h
index 2ed8784..3ba861c 100644
--- a/test_hss.h
+++ b/test_hss.h
@@ -15,8 +15,11 @@ extern bool test_key_load(bool fast_flag, bool quiet_flag);
 extern bool test_reserve(bool fast_flag, bool quiet_flag);
 extern bool test_thread(bool fast_flag, bool quiet_flag);
 extern bool test_h25(bool fast_flag, bool quiet_flag);
+extern bool test_fault(bool fast_flag, bool quiet_flag);
+extern bool test_update(bool fast_flag, bool quiet_flag);
 
 extern bool check_threading_on(bool fast_flag);
 extern bool check_h25(bool fast_flag);
+extern bool check_fault_enabled(bool fast_flag);
 
 #endif /* TEST_HSS_H_ */
diff --git a/test_keygen.c b/test_keygen.c
index cd06021..d196893 100644
--- a/test_keygen.c
+++ b/test_keygen.c
@@ -37,12 +37,12 @@ static bool ignore_priv_key(unsigned char *priv_key, size_t len_priv_key, void *
 /* This is a function we use to pull in private keys, and record them */
 struct priv_key_reader {
     size_t length;
-    unsigned char priv_key[100];
+    unsigned char priv_key[HSS_MAX_PRIVATE_KEY_LEN];
 };
 static bool do_update_priv_key(unsigned char *priv_key, size_t len_priv_key, void *context) {
     struct priv_key_reader *p = context;
     p->length = len_priv_key;
-    if (len_priv_key > 100) return false;
+    if (len_priv_key > HSS_MAX_PRIVATE_KEY_LEN) return false;
     memcpy( p->priv_key, priv_key, len_priv_key);
     return true;
 }
@@ -276,6 +276,8 @@ bool test_keygen(bool fast_flag, bool quiet_flag) {
     {
         unsigned char pubkey[HSS_MAX_PUBLIC_KEY_LEN];
         unsigned char privkey[HSS_MAX_PRIVATE_KEY_LEN];
+        struct hss_extra_info info;
+        hss_init_extra_info( &info );
 
         /* Make sure that if we get the same priv key, whether we use an */
         /* update_private_key function, or if we don't */
@@ -283,8 +285,8 @@ bool test_keygen(bool fast_flag, bool quiet_flag) {
         if (!hss_generate_private_key( rand_1,
                      default_d, default_lm_type, default_ots_type,
                      NULL, privkey, pubkey, default_pubkey_size,
-                     NULL, 0, 0)) {
-            printf( "Huh, it worked last time???\n" );
+                     NULL, 0, &info)) {
+            printf( "Huh, it worked last time A %d\n", info.error_code );
             return false;
         }
         /* Writing the private key to an update function */
@@ -292,8 +294,8 @@ bool test_keygen(bool fast_flag, bool quiet_flag) {
         if (!hss_generate_private_key( rand_1,
                      default_d, default_lm_type, default_ots_type,
                      do_update_priv_key, &reader, pubkey, default_pubkey_size,
-                     NULL, 0, 0)) {
-            printf( "Huh, it worked last time???\n" );
+                     NULL, 0, &info)) {
+            printf( "Huh, it worked last time B %d\n", info.error_code );
             return false;
         }
 
@@ -305,8 +307,6 @@ bool test_keygen(bool fast_flag, bool quiet_flag) {
         }
 
         /* Make sure it fails if the update_priv_key function fails */
-        struct hss_extra_info info;
-        hss_init_extra_info( &info );
         if (hss_generate_private_key( rand_1,
                     default_d, default_lm_type, default_ots_type,
                     update_privkey_fail, NULL, pubkey, default_pubkey_size,
@@ -432,7 +432,7 @@ static bool gen_signature( unsigned char *privkey,
                unsigned char *sig, size_t sig_len,
                const unsigned char *pubkey) {
     /* Step 1: load the working key */
-    struct hss_working_key *w = hss_load_private_key(NULL, privkey,
+    struct hss_working_key *w = hss_load_private_key(NULL, NULL, privkey,
                        0, aux_data, aux_len, 0 );
     if (!w) {
         printf( "Error loading working key\n" );
@@ -441,7 +441,7 @@ static bool gen_signature( unsigned char *privkey,
 
     /* Step 2: use the working key to sign a message */
     static const unsigned char message[3] = "bcd";
-    bool success = hss_generate_signature( w, NULL, privkey,
+    bool success = hss_generate_signature( w,
                   message, sizeof message,
                   sig, sig_len, 0 );
     if (!success) {
diff --git a/test_keyload.c b/test_keyload.c
index df278ec..d09a457 100644
--- a/test_keyload.c
+++ b/test_keyload.c
@@ -44,6 +44,8 @@ static bool generate_random(void *output, size_t length) {
     return true;
 }
 
+static void *private_key_pointer;
+
 bool test_key_load(bool fast_flag, bool quiet_flag) {
     bool success_flag = false;
     param_set_t parm_set[3] = { PARM_SET, PARM_SET, PARM_SET };
@@ -65,19 +67,31 @@ bool test_key_load(bool fast_flag, bool quiet_flag) {
     unsigned char aux_data[2000];
 
     /* Generate the master private key that we'll use for everyone */
+    private_key_pointer = private_key;
+    unsigned char master_private_key[ HSS_MAX_PRIVATE_KEY_LEN ];
     if (!hss_generate_private_key( generate_random, 
                       LEVELS, parm_set, ots_parm_set,
-                      NULL, private_key,
+                      NULL, master_private_key,
                       public_key, len_public_key,
                       aux_data, sizeof aux_data, 0)) {
         printf( "Public/private key gen failed\n" );
         return false;
     }
+    private_key_pointer = NULL;
 
     int i;
 
     struct hss_working_key *w[ MAX_ITER+1 ];
-    for (i = 0; i <= MAX_ITER; i++) w[i] = 0;
+    unsigned char *priv_key[ MAX_ITER+1 ];
+    for (i = 0; i <= MAX_ITER; i++) { w[i] = 0; priv_key[i] = 0; }
+
+    struct hss_working_key *master_w = hss_load_private_key(
+                            0, 0, master_private_key,
+                            0, aux_data, sizeof aux_data, 0);
+    if (!master_w) {
+        printf( "Master load failed\n" );
+        return false;
+    }
 
     unsigned iter;
     if (fast_flag) iter = FAST_ITER; else iter = MAX_ITER;
@@ -112,11 +126,22 @@ if (len_signature == 0) return false;
             /* a tree-walking bug */
         size_t memory_target = (i % 7 == 5) ? 0 : 30000;
 
-        /* Create a fresh working set at the current index*/
-        w[i] = hss_load_private_key( NULL, private_key, 
+        /* Create a fresh working set at the current index */
+        /* Of course, in practice, we should *never* copy the private keys */
+        /* around like this */
+        private_key_pointer = private_key;
+        int private_key_len = hss_get_private_key_len(LEVELS,
+                                                 parm_set, ots_parm_set);
+        priv_key[i] = malloc( private_key_len );
+        if (!priv_key[i]) { printf( "Out of memory\n" ); goto failed; }
+        memcpy( priv_key[i], master_private_key, private_key_len );
+
+        struct hss_extra_info info = { 0 };
+        w[i] = hss_load_private_key( 0, 0, priv_key[i],
                 memory_target,
-                (i % 3 == 1) ? NULL : aux_data, sizeof aux_data, 0 );
-        if (!w[i]) { printf( "Out of memory\n" ); goto failed; }
+                (i % 3 == 1) ? NULL : aux_data, sizeof aux_data, &info );
+        private_key_pointer = NULL;
+        if (!w[i]) { printf( "load error %d at step %d\n", (int)info.error_code, i ); goto failed; }
 
         memcpy( orig_private_key, private_key, len_private_key );
 
@@ -124,12 +149,17 @@ if (len_signature == 0) return false;
         char text[ 100 ];
         unsigned len_text = sprintf( text, "Message #%d", i );
 
-        if (!hss_generate_signature( w[0], NULL, private_key,
+        /* Generate a signature from the master key.  This has the side */
+        /* effect of incrementing the master private key */
+        private_key_pointer = private_key;
+        if (!hss_generate_signature( master_w,
                 text, len_text,
                 signature, len_signature, 0)) {
             printf( "\nMaster generate signature failed\n" );
             goto failed;
         }
+        private_key_pointer = NULL;
+
         /* If we're doing a regression test, we really have to actually */
         /* check the signatures, even if it's not the point of the test */
         if (!hss_validate_signature( public_key, text,
@@ -141,15 +171,17 @@ if (len_signature == 0) return false;
         /* Now, go through and see if all the other working keys generate */
         /* the same signature */
         int j;
-        for (j=1; j<=i; j++) {
+        for (j=0; j<=i; j++) {
             memcpy( copy_private_key, orig_private_key, len_private_key );
 
-            if (!hss_generate_signature( w[j], NULL, copy_private_key,
+            private_key_pointer = copy_private_key;
+            if (!hss_generate_signature( w[j],
                 text, len_text,
                 copy_signature, len_signature, 0)) {
                 printf( "\nGenerate signature %d failed\n", j );
                 goto failed;
             }
+            private_key_pointer = NULL;
 
             /* The signature and the private key should be the same as */
             /* the master */
@@ -173,6 +205,9 @@ if (len_signature == 0) return false;
     free(signature);
     free(copy_signature);
     for (i = 0; i <= MAX_ITER; i++) hss_free_working_key(w[i]);
+    for (i = 0; i <= MAX_ITER; i++) free(priv_key[i]);
+    hss_free_working_key(master_w);
+    private_key_pointer = NULL;
 
     return success_flag;
 }
diff --git a/test_load.c b/test_load.c
index 0ad1ef3..a2c5785 100644
--- a/test_load.c
+++ b/test_load.c
@@ -9,6 +9,7 @@
 #include "hss.h"
 #include <stdio.h>
 #include <stdarg.h>
+#include <string.h>
 
 static bool rand_1( void *output, size_t len) {
     unsigned char *p = output;
@@ -55,7 +56,7 @@ static bool test_aux( param_set_t lm_setting ) {
 
         /* Now, load the working key */
         struct hss_working_key *w = hss_load_private_key(
-                      NULL, priv_key, 0, aux_data, aux_size, 0 );
+                      NULL, NULL, priv_key, 0, aux_data, aux_size, 0 );
         if (!w) {
             printf( "Error loading private key\n" );
             free(sig);
@@ -64,8 +65,7 @@ static bool test_aux( param_set_t lm_setting ) {
 
         /* Sign a test message */
         static unsigned char test_message[1] = "a";
-        if (!hss_generate_signature(w, NULL, priv_key,
-                             test_message, sizeof test_message,
+        if (!hss_generate_signature(w, test_message, sizeof test_message,
                              sig, len_sig, 0)) {
             hss_free_working_key(w);
             printf( "Error generating signature\n" );
@@ -137,6 +137,63 @@ static bool load_key( int *index, unsigned char priv_key[][HSS_MAX_PRIVATE_KEY_L
     return false;
 }
 
+/*
+ * This verifies that we detect a corrupted private key
+ */
+#define WHICH_GOOD 18  /* Which byte we don't cause an error with */
+static bool test_corrupt_private_key(void) {
+    unsigned char priv_key[HSS_MAX_PRIVATE_KEY_LEN];
+    unsigned char pub_key[HSS_MAX_PUBLIC_KEY_LEN];
+    param_set_t lm[1] = { LMS_SHA256_N32_H5 };
+    param_set_t ots[1] = { LMOTS_SHA256_N32_W2 };
+    unsigned char aux[1000];
+
+    if (!hss_generate_private_key( rand_1, 1, lm, ots,
+                                   0, priv_key,
+                                   pub_key, sizeof pub_key,
+                                   aux, sizeof aux,
+                                   0 )) {
+        return false;
+    }
+    size_t len_priv_key = hss_get_private_key_len(1, lm, ots);
+    if (!len_priv_key) return false;
+
+    size_t i;
+    for (i=0; i<len_priv_key; i++) {
+        unsigned char priv_key_corrupt[HSS_MAX_PRIVATE_KEY_LEN];
+        memcpy( priv_key_corrupt, priv_key, HSS_MAX_PRIVATE_KEY_LEN );
+           /* This corrupts the key unless i == WHICH_GOOD */
+        priv_key_corrupt[i] ^= (i - WHICH_GOOD) | (i >> 8);
+
+        struct hss_extra_info info;
+        hss_init_extra_info( &info );
+
+        struct hss_working_key *w = hss_load_private_key(
+                           0, 0, priv_key_corrupt,
+                           0, aux, sizeof aux, &info );
+        if (w) {
+            hss_free_working_key(w);
+            if (i != WHICH_GOOD) {
+                printf( "Corrupted key not detected\n" );
+                return false;
+            }
+        } else {
+            if (i == WHICH_GOOD) {
+                printf( "Good load failed\n" );
+                return false;
+            }
+            if (hss_extra_info_test_error_code(&info) !=
+                                  hss_error_bad_private_key ) {
+                printf( "Unexpected error type %d\n",
+                                  hss_extra_info_test_error_code(&info) );
+                return false;
+            }
+        }
+    }
+    
+    return true;
+}
+
 bool test_load(bool fast_flag, bool quiet_flag) {
 
     /*
@@ -174,7 +231,7 @@ bool test_load(bool fast_flag, bool quiet_flag) {
             bool expected_success = (i == j);
             struct hss_extra_info info = { 0 };
             bool success = hss_generate_working_key(
-                    NULL, priv_key[i], NULL, 0,
+                    NULL, NULL, priv_key[i], NULL, 0,
                     w[j], &info );
             if (success != expected_success) {
                 printf( "Error: for (%d, %d), got success %d\n", i, j, success );
@@ -193,5 +250,7 @@ bool test_load(bool fast_flag, bool quiet_flag) {
 all_done:
     for (i = 0; i<index; i++) hss_free_working_key( w[i] );
 
-    return retval;
+    if (!retval) return retval;
+
+    return test_corrupt_private_key();
 }
diff --git a/test_reserve.c b/test_reserve.c
index 84fd748..014bb38 100644
--- a/test_reserve.c
+++ b/test_reserve.c
@@ -5,6 +5,10 @@
  * maintain a separate counter tracking when we ought to update the
  * private key, and see if we update it at the proper times (and by the
  * expected updates)
+ *
+ * This is made somewhat more tricky if FAULT_CACHE_SIG is turned on; that
+ * also causes NVRAM updates at times; if that is turned on, then this checks
+ * if those updates happen as well (and at the expected times)
  */
 #include "test_hss.h"
 #include "hss.h"
@@ -12,7 +16,7 @@
 #include <string.h>
 #include <stdlib.h>
 
-static int rand_seed;
+static unsigned rand_seed;
 static int my_rand(void) {
     rand_seed += rand_seed*rand_seed | 5;
     return rand_seed >> 9;
@@ -29,6 +33,7 @@ static unsigned long last_seqno;
 static bool got_update;
 static bool got_error;
 static bool hit_end;
+static int max_len_private_key; /* The actual length of the private key */
 
 static bool read_private_key(unsigned char *private_key,
             size_t len_private_key, void *context) {
@@ -39,17 +44,25 @@ static bool read_private_key(unsigned char *private_key,
 
 static bool update_private_key(unsigned char *private_key,
             size_t len_private_key, void *context) {
-    if (len_private_key > HSS_MAX_PRIVATE_KEY_LEN || len_private_key < 8) return false;
+
+    if (len_private_key > HSS_MAX_PRIVATE_KEY_LEN || len_private_key < 16) return false;
 
     memcpy( priv_key, private_key, len_private_key );
 
+    /* Check to see if the update actually reflected everything */
+    /* that actually changed in the private key */
+    if (0 != memcmp( priv_key, private_key, max_len_private_key )) {
+        /* Something was wrong - report an error */
+        return false;
+    }
+
     got_update = true;
     hit_end = false;
     got_error = false;
 
     int i;
     for (i=0; i<8; i++) {
-        if (private_key[i] != 0xff) break;
+        if (private_key[i+4] != 0xff) break;
     }
     if (i == 8) {
         hit_end = true;
@@ -59,7 +72,7 @@ static bool update_private_key(unsigned char *private_key,
     /* Our tests never have seqno's larger than 2**32-1 */
     /* If we see any larger claimed, it's an error */
     for (i=0; i<4; i++) {
-        if (private_key[i] != 0x00) {
+        if (private_key[i+4] != 0x00) {
             got_error = true;
             return true;
         }
@@ -68,27 +81,27 @@ static bool update_private_key(unsigned char *private_key,
     /* Pull out the sequence number from the private key */
     last_seqno = 0;
     for (i=4; i<8; i++) {
-        last_seqno = 256*last_seqno + private_key[i];
+        last_seqno = 256*last_seqno + private_key[i+4];
     }
 
     return true;
 }
 
-bool test_reserve(bool fast_flag, bool quiet_flag) {
+static bool do_test( int default_d, param_set_t *default_lm_type,
+                     param_set_t *default_ots_type, bool fast_flag,
+                     bool verify_sig_index, bool expect_update_on_32) {
     int reserve, do_manual_res;
 
-        /* d=1 makes it esay to extract the sequence number from */
-        /* the signature */
-    int default_d = 1;
-    param_set_t default_lm_type[1] = { LMS_SHA256_N32_H10 };
-    param_set_t default_ots_type[1] = { LMOTS_SHA256_N32_W2 };
+    max_len_private_key = hss_get_private_key_len( default_d,
+                                  default_lm_type, default_ots_type );
 
     for (do_manual_res = 0; do_manual_res <= 1; do_manual_res++) {
         /* In full mode, we also run the tests skipping all manual */
         /* reservations; this makes sure that the autoreservations are */
         /* tested in all situations */
         if (fast_flag && !do_manual_res) continue;
-    for (reserve = 0; reserve < 40; reserve++) {
+    int max_reserve = (fast_flag ? 25 : 50);
+    for (reserve = 0; reserve < max_reserve; reserve++) {
         rand_seed = 2*reserve + do_manual_res;
         unsigned char pub_key[ 200 ];
         unsigned char aux_data[ 200 ];
@@ -103,7 +116,7 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
         }
 
         struct hss_working_key *w = hss_load_private_key(
-                read_private_key, NULL, 50000,
+                read_private_key, update_private_key, NULL, 50000,
                 aux_data, sizeof aux_data, NULL );
         if (!w) {
             printf( "Error: unable to load private key\n" );
@@ -119,7 +132,7 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
         }
 
         unsigned i;
-        unsigned reserved = 0;  /* Our model for how many are reserved */
+        int reserved = 0;  /* Our model for how many are reserved */
         for (i=0; i<=1024; i++) {
 
             /* During the manual_res test, we randomly ask for manual */
@@ -127,8 +140,7 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
             if (do_manual_res && (my_rand() & 0x1f) == 0x0d) {
                 unsigned manual_res = my_rand() & 0x0f;
                 got_update = false;
-                if (!hss_reserve_signature(w, update_private_key, NULL,
-                        manual_res, NULL)) {
+                if (!hss_reserve_signature(w, manual_res, NULL)) {
                     hss_free_working_key(w);
                     printf( "Error: unable to do manual reserve\n" );
                     return false;
@@ -174,8 +186,7 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
             got_update = false;
             struct hss_extra_info info = { 0 };
             unsigned char signature[ 16000 ];
-            if (!hss_generate_signature(w, update_private_key, NULL,
-                     message, len_message,
+            if (!hss_generate_signature(w, message, len_message,
                      signature, sizeof signature,
                      &info )) {
                 hss_free_working_key(w);
@@ -184,15 +195,18 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
             }
 
             /* Make sure that the index used in the signature is what we */
-            /* expect */
-            unsigned long sig_index = (signature[4] << 24UL) +
-                                      (signature[5] << 16UL) +
-                                      (signature[6] <<  8UL) +
-                                      (signature[7]      );
-            if (i != sig_index) {
-                hss_free_working_key(w);
-                printf( "Error: unexpected signature index\n" );
-                return false;
+            /* expect.  It's trickier when using a level > 1 param set */
+            /* and doesn't really do any extra testing, so we skip it */
+            if (verify_sig_index) {
+                unsigned long sig_index = (signature[4] << 24UL) +
+                                          (signature[5] << 16UL) +
+                                          (signature[6] <<  8UL) +
+                                          (signature[7]      );
+                if (i != sig_index) {
+                    hss_free_working_key(w);
+                    printf( "Error: unexpected signature index\n" );
+                    return false;
+                }
             }
 
             if (got_update && got_error) {
@@ -201,10 +215,19 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
                         "to illegal value\n" );
                 return false;
             }
-            if (reserved > 0 && i < 1023) {
+
+            /* Compute whether we expected an update */
+            bool expected_update = (reserved <= 0 || i == 1023);
+            /* When we are in CACHE_SIG mode, we'll also get updates when */
+            /* we step into a tree that is partially reserved */
+            if (expect_update_on_32 && (i % 32) == 31 && reserved < 32) {
+                expected_update = true;
+            }
+
+            if (!expected_update) {
                 if (got_update) {
                     hss_free_working_key(w);
-                    printf( "Error: siganture unexpectedly set "
+                    printf( "Error: signature unexpectedly set "
                             "private key " );
                     return false;
                 }
@@ -224,6 +247,8 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
                     }
                 } else {
                     int expected_seqno = i + 1 + reserve;
+                    int expected_seqno_2 = i + reserved;
+                    if (expected_seqno_2 > expected_seqno) expected_seqno = expected_seqno_2;
                     if (expected_seqno >= 1024) expected_seqno = 1023;
                     if (hit_end) {
                         hss_free_working_key(w);
@@ -237,7 +262,9 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
                                 "unexpected sequence number\n" );
                         return false;
                     }
-                    reserved = reserve;
+                    reserved--;
+                    if (reserved < reserve)
+                        reserved = reserve;
                 }
             }
             if (hss_extra_info_test_last_signature( &info )) {
@@ -250,3 +277,37 @@ bool test_reserve(bool fast_flag, bool quiet_flag) {
 
     return true;
 }
+
+/*
+ * This tests if the user has configured FAULT_CACHE_SIG
+ */
+static bool check_if_cache_sig_is_on(void) {
+    return hss_is_fault_hardening_on( 1 );
+}
+
+bool test_reserve(bool fast_flag, bool quiet_flag) {
+
+    {
+	/* d=1 makes it esay to extract the sequence number from */
+	/* the signature */
+	int default_d = 1;
+	param_set_t default_lm_type[1] = { LMS_SHA256_N32_H10 };
+	param_set_t default_ots_type[1] = { LMOTS_SHA256_N32_W2 };
+
+	if (!do_test( default_d, default_lm_type, default_ots_type,
+                      fast_flag, true, false )) return false;
+    }
+    {
+	/* try it again with a two level tree.  We actually do this to */
+	/* stress out the FAULT_CACHE_SIG logic, which has some interaction */
+	/* with the autoreserve logic */
+	int default_d = 2;
+	param_set_t default_lm_type[2] = { LMS_SHA256_N32_H5, LMS_SHA256_N32_H5 };
+	param_set_t default_ots_type[2] = { LMOTS_SHA256_N32_W2, LMOTS_SHA256_N32_W2 };
+
+	if (!do_test( default_d, default_lm_type, default_ots_type,
+                      fast_flag, false, check_if_cache_sig_is_on() )) return false;
+    }
+
+    return true;
+}
diff --git a/test_sign.c b/test_sign.c
index 56e22bf..84dd53a 100644
--- a/test_sign.c
+++ b/test_sign.c
@@ -1,5 +1,6 @@
 #include <stdio.h>
 #include <stdarg.h>
+#include <string.h>
 #include "hss.h"
 #include "test_hss.h"
 
@@ -10,8 +11,17 @@ static bool rand_1(void *output, size_t len) {
     return true;
 }
 
-static bool update_fail(unsigned char *priv_key, size_t len_priv_key, void *context) {
-    return false;
+static bool read_private_key(unsigned char *priv_key, size_t len_priv_key, void *context) {
+    memcpy( priv_key, context, len_priv_key );
+    return true;
+}
+
+static bool force_fail = false;
+
+static bool update_private_key(unsigned char *priv_key, size_t len_priv_key, void *context) {
+    if (force_fail) return false;
+    memcpy( context, priv_key, len_priv_key );
+    return true;
 }
 
 static bool all_zeros(unsigned char *p, size_t len) {
@@ -97,15 +107,17 @@ static bool test_parm( int d, long num_sig, ... ) {
     if (!sig) return false;
     unsigned char privkey[HSS_MAX_PRIVATE_KEY_LEN];
 
+    force_fail = false;
     if (!hss_generate_private_key( rand_1, d, lm_type, ots_type,
-                                   NULL, privkey, pubkey, pubkey_size,
+                                   update_private_key, privkey, pubkey, pubkey_size,
                                    NULL, 0, 0)) {
         printf( "Pubkey gen failure\n" );
         free(sig);
         return false;
     }
 
-    struct hss_working_key *w = hss_load_private_key(NULL, privkey,
+    struct hss_working_key *w = hss_load_private_key(read_private_key,
+                       update_private_key, privkey,
                        0, NULL, 0, 0 );
     if (!w) {
         printf( "Error loading working key\n" );
@@ -120,7 +132,7 @@ static bool test_parm( int d, long num_sig, ... ) {
     {
         /* Try to generate a signature with a buffer that's too short */
         struct hss_extra_info info = { 0 };
-        bool success = hss_generate_signature( w, NULL, privkey,
+        bool success = hss_generate_signature( w,
                       message, sizeof message,
                       sig, sig_size-1, &info );
         if (success) {
@@ -135,30 +147,13 @@ static bool test_parm( int d, long num_sig, ... ) {
              free(sig);
              return false;
         }
-
-        /* Try to generate a signature with a buffer when the update fails */
-        success = hss_generate_signature( w, update_fail, NULL,
-                      message, sizeof message,
-                      sig, sig_size, &info );
-        if (success || !all_zeros(sig, sig_size)) {
-             printf( "Error: signature succeeded when key update failed\n" );
-             hss_free_working_key(w);
-             free(sig);
-             return false;
-        }
-        if (hss_extra_info_test_error_code(&info) != hss_error_private_key_write_failed) {
-             printf( "Error: update failure gives wrong error\n" );
-             hss_free_working_key(w);
-             free(sig);
-             return false;
-        }
     }
 
     bool retval = true;
     for (i=0; i<2000; i++) {
         struct hss_extra_info info;
         hss_init_extra_info( &info );
-        bool success = hss_generate_signature( w, NULL, privkey,
+        bool success = hss_generate_signature( w,
                       message, sizeof message,
                       sig, sig_size, &info );
 
@@ -250,6 +245,32 @@ static bool test_parm( int d, long num_sig, ... ) {
         if (offset != sig_size) goto failed;
     }
 
+    if (i == 2000) {
+        struct hss_extra_info info;
+        hss_init_extra_info( &info );
+
+        /* Try to generate a signature with a buffer when the update fails */
+        /* We do this at the end because it'll advance the current count, */
+        /* which would the above test doesn't expect */
+        force_fail = true;
+        bool success = hss_generate_signature( w,
+                      message, sizeof message,
+                      sig, sig_size, &info );
+        force_fail = false;
+        if (success || !all_zeros(sig, sig_size)) {
+             printf( "Error: signature succeeded when key update failed\n" );
+             hss_free_working_key(w);
+             free(sig);
+             return false;
+        }
+        if (hss_extra_info_test_error_code(&info) != hss_error_private_key_write_failed) {
+             printf( "Error: update failure gives wrong error\n" );
+             hss_free_working_key(w);
+             free(sig);
+             return false;
+        }
+    }
+
     hss_free_working_key(w);
     free(sig);
     return retval;
diff --git a/test_sign_inc.c b/test_sign_inc.c
index de24302..3474d55 100644
--- a/test_sign_inc.c
+++ b/test_sign_inc.c
@@ -18,19 +18,9 @@ static bool generate_random(void *output, size_t length) {
     return true;
 }
 
-/* We have no reason to write the key updates anywhere */
-static bool ignore_update(unsigned char *private_key, size_t len, void *ctx) {
-    return true;
-}
-
 static bool run_test(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
                      unsigned num_iter, bool at_end) {
 
-    size_t len_private_key = hss_get_private_key_len(d, lm_array, lm_ots_array );
-    if (len_private_key == 0 || len_private_key > HSS_MAX_PRIVATE_KEY_LEN) { 
-        printf( "    Len private key failed\n" );
-        return false;
-    }
     unsigned char private_key[HSS_MAX_PRIVATE_KEY_LEN];
 
     unsigned len_public_key = hss_get_public_key_len(d, lm_array, lm_ots_array );
@@ -60,13 +50,16 @@ static bool run_test(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
         return false;
     }
 
+    unsigned char private_key_2[HSS_MAX_PRIVATE_KEY_LEN];
+    memcpy( private_key_2, private_key, HSS_MAX_PRIVATE_KEY_LEN );
+
     /* Load the private key into memory (twice!) */
     struct hss_working_key *w = hss_load_private_key(
-                           NULL, private_key,
+                           NULL, NULL, private_key,
                            0,     /* Minimal memory */
                            aux_data, sizeof aux_data, 0 );
     struct hss_working_key *w2 = hss_load_private_key(
-                           NULL, private_key,
+                           NULL, NULL, private_key_2,
                            0,     /* Minimal memory */
                            aux_data, sizeof aux_data, 0 );
     if (!w || !w2) {
@@ -87,8 +80,7 @@ static bool run_test(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
 
         /* Generate a signature using the standard API */
         unsigned char message[3] = "ABC";
-        if (!hss_generate_signature( w, ignore_update, NULL,
-                   message, sizeof message,
+        if (!hss_generate_signature( w, message, sizeof message,
                    sig_1, len_sig, 0 )) {
             printf( "    *** failed normal signature\n" );
             hss_free_working_key(w);
@@ -101,8 +93,7 @@ static bool run_test(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
         struct hss_sign_inc ctx;
         struct hss_extra_info info;
         hss_init_extra_info( &info );
-        if (!hss_sign_init(&ctx, w2, ignore_update, NULL,
-                sig_2, len_sig, &info )) {
+        if (!hss_sign_init(&ctx, w2, sig_2, len_sig, &info )) {
             printf( "    *** failed signature init\n" );
             hss_free_working_key(w);
             hss_free_working_key(w2);
@@ -121,7 +112,6 @@ static bool run_test(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
             return false;
         }
 
-
         if (!hss_sign_update( &ctx, "A", 1) ||
             !hss_sign_update( &ctx, "BC", 2)) {
             printf( "    *** failed signature update\n" );
@@ -154,8 +144,7 @@ static bool run_test(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
     if (at_end) {
         struct hss_sign_inc ctx;
         struct hss_extra_info info = { 0 };
-        if (hss_sign_init(&ctx, w2, ignore_update, NULL,
-                sig_2, len_sig, &info )) {
+        if (hss_sign_init(&ctx, w2, sig_2, len_sig, &info )) {
             printf( "    *** signinit succeeded when it should have failed\n" );
             hss_free_working_key(w);
             hss_free_working_key(w2);
@@ -186,6 +175,7 @@ static bool run_test_2(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
         return false;
     }
     unsigned char private_key[HSS_MAX_PRIVATE_KEY_LEN];
+    unsigned char private_key_2[HSS_MAX_PRIVATE_KEY_LEN];
 
     unsigned len_public_key = hss_get_public_key_len(d, lm_array, lm_ots_array );
     if (len_public_key == 0 || len_public_key > HSS_MAX_PUBLIC_KEY_LEN) { 
@@ -213,14 +203,15 @@ static bool run_test_2(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
         printf( "    Gen private key failed\n" );
         return false;
     }
+    memcpy( private_key_2, private_key, HSS_MAX_PRIVATE_KEY_LEN );
 
     /* Load the private key into memory (twice!) */
     struct hss_working_key *w = hss_load_private_key(
-                           NULL, private_key,
+                           NULL, NULL, private_key,
                            0,     /* Minimal memory */
                            aux_data, sizeof aux_data, 0 );
     struct hss_working_key *w2 = hss_load_private_key(
-                           NULL, private_key,
+                           NULL, NULL, private_key_2,
                            0,     /* Minimal memory */
                            aux_data, sizeof aux_data, 0 );
     if (!w || !w2) {
@@ -244,8 +235,7 @@ static bool run_test_2(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
     for (i = 0; i<num_iter; i++) {
 
         /* Start the signature with the incremental API */
-        if (!hss_sign_init(&ctx[i], w2, ignore_update, NULL,
-                &sig[i * len_sig], len_sig, 0 )) {
+        if (!hss_sign_init(&ctx[i], w2, &sig[i * len_sig], len_sig, 0 )) {
             printf( "    *** failed signature init\n" );
             hss_free_working_key(w);
             hss_free_working_key(w2);
@@ -267,8 +257,7 @@ static bool run_test_2(int d, param_set_t *lm_array, param_set_t *lm_ots_array,
 
         /* Generate a signature using the standard API */
         unsigned char message[3] = "ABC";
-        if (!hss_generate_signature( w, ignore_update, NULL,
-                   message, sizeof message,
+        if (!hss_generate_signature( w, message, sizeof message,
                    sig_2, len_sig, 0 )) {
             printf( "    *** failed normal signature\n" );
             hss_free_working_key(w);
diff --git a/test_stat.c b/test_stat.c
index 6d61cfe..ea84411 100644
--- a/test_stat.c
+++ b/test_stat.c
@@ -143,7 +143,7 @@ bool test_stat(bool fast_flag, bool quiet_flag) {
             }
 
             struct hss_working_key *w = hss_load_private_key(
-                      NULL, private_key, 0, aux_data, sizeof aux_data, 0);
+                      NULL, NULL, private_key, 0, aux_data, sizeof aux_data, 0);
             if (!w) {
                 printf( "    Privkey load failure\n" );
                 goto failed;
@@ -154,8 +154,7 @@ bool test_stat(bool fast_flag, bool quiet_flag) {
             for (i=0; i<HASH_PER_MERKLE_TREE; i++) {
                 static char test_message[3] = "abc";
                 /* Generate a signature */
-                if (!hss_generate_signature(w, NULL, private_key,
-                                            test_message, sizeof test_message,
+                if (!hss_generate_signature(w, test_message, sizeof test_message,
                                             sig, sig_len, 0)) {
                     printf( "    Signature failure\n" );
                     hss_free_working_key(w);
@@ -225,14 +224,14 @@ if (sig_offset != sig_len) { printf( "Oops: we got something wrong here: %d %d\n
                      /* Hack to advance the key 32**(d-1) - 1 times */
                      /* We do this to make sure that the non-top Merkle */
                      /* trees we use are fresh */
-                     if (!hss_reserve_signature( w, NULL, private_key,
+                     if (!hss_reserve_signature( w,
                              (1L << (LOG_HASH_PER_MERKLE_TREE*(d-1))) - 1,
                              0)) {
                          printf( "    Reservation failure\n" );
                          hss_free_working_key(w);
                          goto failed;
                      }
-                     if (!hss_generate_working_key( NULL, private_key,
+                     if (!hss_generate_working_key( NULL, NULL, private_key,
                                           aux_data, sizeof aux_data, w,
                                           0 )) {
                          printf( "    Regeneration failure\n" );
diff --git a/test_thread.c b/test_thread.c
index fd119c0..fff614b 100644
--- a/test_thread.c
+++ b/test_thread.c
@@ -29,6 +29,24 @@ static bool rand_1( void *output, size_t len) {
     return true;
 }
 
+static bool read_private_key(unsigned char *private_key,
+                             size_t len_private_key, void *context) {
+    unsigned char **p = context;
+    if (!*p) return false;
+
+    memcpy( private_key, *p, len_private_key );
+    return true;
+}
+
+static bool update_private_key(unsigned char *private_key,
+                             size_t len_private_key, void *context) {
+    unsigned char **p = context;
+    if (!*p) return false;
+
+    memcpy( *p, private_key, len_private_key );
+    return true;
+}
+
 #define MAX_THREAD 16
 
 bool run_test(unsigned L, const param_set_t *lm, const param_set_t *ots) {
@@ -97,9 +115,12 @@ bool run_test(unsigned L, const param_set_t *lm, const param_set_t *ots) {
     /* Now, test out the key loading logic */
     bool success_flag = false;
     struct hss_working_key *w[MAX_THREAD] = { 0 };
+    unsigned char *current_key = NULL;
     for (i=0; i<MAX_THREAD; i++) {
-        w[i] = hss_load_private_key( 0, private,
+        current_key = private;
+        w[i] = hss_load_private_key( read_private_key, update_private_key, &current_key,
                  0, aux, aux_len, &info[i] );
+            current_key = NULL;
         if (!w[i]) {
             printf( "  Load private key failed\n" );
             goto failed;
@@ -119,14 +140,15 @@ bool run_test(unsigned L, const param_set_t *lm, const param_set_t *ots) {
         for (i=0; i<MAX_THREAD; i++) {
             unsigned char private_temp[ HSS_MAX_PRIVATE_KEY_LEN ];
             memcpy( private_temp, private, private_len );
+            current_key = private_temp;
             if (!hss_generate_signature( w[i],
-                     0, private_temp,
                      test_message, sizeof test_message, 
                      sig_temp, sig_len,
                      &info[i] )) {
                 printf( "  Signature gen failed\n" );
                 goto failed;
             }
+            current_key = NULL;
 
             if (i == 0) {
                 memcpy( private_next, private_temp, private_len );
diff --git a/test_update.c b/test_update.c
new file mode 100644
index 0000000..5f9d321
--- /dev/null
+++ b/test_update.c
@@ -0,0 +1,398 @@
+/*
+ * This tests checks to see if we update the private key when we're supposed
+ * to (and as much as we're supposed to
+ * The correctness of SIG_CACHE depends on doing this correctly, and so we
+ * have a special test for this.  If SIG_CACHE isn't on, this isn't as
+ * critical (the update rules are qute simple), however there's no particular
+ * reason not to do this test
+ */
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <stdint.h>
+#include "hss.h"
+#include "test_hss.h"
+
+/*
+ * These two values are constants based on the config settings
+ */
+static int size;       /* The base size of the update */
+static int increment;  /* The # of bytes of increase for every signed pk */
+                       /* we need to update */
+
+static unsigned my_rand_word(void) {
+    static uint_fast32_t n = 0;
+    n += (n*n) | 5;
+    return n >> 16;
+}
+
+static bool my_rand(void *output, size_t length) {
+    size_t i;
+    unsigned char *out = output;
+    for (i=0; i<length; i++) {
+        *out++ = my_rand_word() & 0xff;
+    }
+    return true;
+}
+    
+
+struct update {
+    unsigned char pk[ HSS_MAX_PRIVATE_KEY_LEN ];
+    int did_update;
+    size_t len_update;
+};
+
+static bool update_key( unsigned char *private_key, size_t len, void *ctx) {
+    struct update *context = ctx;
+    if (len > HSS_MAX_PRIVATE_KEY_LEN) return false;
+    memcpy( context->pk, private_key, len );
+    context->did_update += 1;
+    context->len_update = len;
+    return true;
+}
+
+static bool read_key( unsigned char *private_key, size_t len, void *ctx) {
+    struct update *context = ctx;
+    if (len > HSS_MAX_PRIVATE_KEY_LEN) return false;
+    memcpy( private_key, context->pk, len );
+    return true;
+}
+
+/*
+ * We use H=5 trees everywhere because we're testing what happens if we
+ * step between trees; short trees make that happen a lot more often
+ */
+static param_set_t lm_type[] = {
+    LMS_SHA256_N32_H5, LMS_SHA256_N32_H5, LMS_SHA256_N32_H5,
+    LMS_SHA256_N32_H5, LMS_SHA256_N32_H5, LMS_SHA256_N32_H5,
+    LMS_SHA256_N32_H5, LMS_SHA256_N32_H5,
+};
+
+/*
+ * W=8???  Doesn't that slow things down?  Well, yes, it does, however the
+ * the tests are so fast that it's still a reasonable speed (10 seconds on
+ * on my test setup)
+ */
+static param_set_t ots_type[] = {
+    LMOTS_SHA256_N32_W8, LMOTS_SHA256_N32_W8, LMOTS_SHA256_N32_W8,
+    LMOTS_SHA256_N32_W8, LMOTS_SHA256_N32_W8, LMOTS_SHA256_N32_W8,
+    LMOTS_SHA256_N32_W8, LMOTS_SHA256_N32_W8,
+};
+
+/*
+ * This deduces various sizes of the private key internals (what we update
+ * when we just do the sequence number; how much that increases when we also
+ * update a signature); these are ultimately based on config.h settings;
+ * but we can't just read that file
+ */
+static bool get_size_increment(void) {
+    /* The increment is easy; we have an explicit API for that */
+    /* We will double-check it below */
+    increment = hss_is_fault_hardening_on( 1 );
+
+    /* The size takes a little more work */
+
+    /* First, create the key */
+    struct update context;
+    memset( &context, 0, sizeof context );
+    unsigned char public_key[ HSS_MAX_PUBLIC_KEY_LEN ];
+    unsigned char aux_data[ 500 ];
+    bool success = hss_generate_private_key( my_rand, 2, lm_type, ots_type,
+                      update_key, &context,
+                      public_key, sizeof public_key,
+                      aux_data, sizeof aux_data, 0);
+    if (!success) return false;
+
+    /* Now, load the key into memory */
+    struct hss_working_key *w = hss_load_private_key(
+                      read_key, update_key, &context,
+                      0, aux_data, sizeof aux_data, 0 );
+    if (!w) return false;
+
+    /* Now, generate a signature (and record the size of the update) */
+    size_t sig_size = hss_get_signature_len( 2, lm_type, ots_type );
+    if (sig_size == 0) { hss_free_working_key(w); return false; }
+    void *sig = malloc( sig_size );
+    if (!sig) { hss_free_working_key(w); return false; }
+
+    context.did_update = 0;
+    if (!hss_generate_signature( w, "abc", 3, sig, sig_size, 0 )) {
+        free(sig);
+        hss_free_working_key(w);
+        return false;
+    }
+
+    free(sig);    /* Don't need this buffer anymore */
+
+    if (context.did_update != 1) {
+        hss_free_working_key(w);
+        return false;
+    }
+
+    /* That signature updated the sequene number (and none of the hashes) */
+    size = context.len_update;
+
+    /* Now, do a quick double-check, since we're here anyways */
+    context.did_update = 0;
+    /* Reserving 35 signatures will put us into the next bottom tree, */
+    /* requiring us to update that hashed sig */
+    if (!hss_reserve_signature( w, 35, 0)) {
+        hss_free_working_key(w);
+        return false;
+    }
+    hss_free_working_key(w);    /* Don't need this anymore */
+
+    if (context.did_update != 1 || context.len_update != size + increment ) {
+        return false;
+    }
+    return true;
+}
+
+/*
+ * This inspects the context structure (which is updated on an NVRAM write)
+ * to see if it matches what we're told to expect
+ */
+static bool check_update_context(const struct update *context,
+                                 bool expect_update, int num_hashes_updated) {
+    if (!expect_update) {
+        /* No updates expected; pass only if we didn't see one */
+        return context->did_update == 0;
+    } else {
+        /* Update expected; verify we got one */
+        if (context->did_update != 1) return false;
+
+        /* Verify that it was the expected size */
+        if (context->len_update != size + num_hashes_updated * increment ) {
+            return false;
+        }
+
+        /* Looks good */
+        return true;
+    }
+}
+
+/* This returns the number of places we need to go forward to reach the */
+/* next level L epoch (that is, internal Merkle tree) */
+static unsigned to_next_step( sequence_t current, int L ) {
+    unsigned max = 1U << (5*L);
+    return max - (current & (max-1));
+}
+
+bool test_update(bool fast_flag, bool quiet_flag) {
+    /* Check on what the update size and increment is */
+    bool success = get_size_increment();
+    if (!success) {
+        return false;
+    }
+
+    int L;
+    bool failed = false;
+    /* Step through the possible hss tree levels */
+    for (L=1; L<=8; L++) {
+
+        /* Create an HSS tree with L levels (each of which is a height */
+        /* 5 tree) */
+        struct update context;
+        memset( &context, 0, sizeof context );
+        unsigned char public_key[ HSS_MAX_PUBLIC_KEY_LEN ];
+        unsigned char aux_data[ 500 ];
+        bool success = hss_generate_private_key( my_rand, L, lm_type, ots_type,
+                      update_key, &context,
+                      public_key, sizeof public_key,
+                      aux_data, sizeof aux_data, 0);
+        if (!success) return false;
+        if (context.did_update != 1 || context.len_update !=
+                         hss_get_private_key_len( L, lm_type, ots_type )) {
+             return false;
+        }
+            /* The length of the full private key */
+        int len_private_key = context.len_update;
+
+        /* Now, load the key into memory */
+        context.did_update = 0;
+        struct hss_working_key *w = hss_load_private_key(
+                      read_key, update_key, &context,
+                      0, aux_data, sizeof aux_data, 0 );
+        if (!w) return false;
+        /* Verify that it did the expected update */
+        /* That should have updated if we have SIG_CACHE on *AND* we're */
+        /* doing a multilevel tree (single level trees don't have any */
+        /* internal signatures) */
+        if (!check_update_context( &context, L > 1 && increment > 0, L-1 )) {
+            hss_free_working_key(w);
+            return false;
+        }
+
+        sequence_t current_pos = 0;  /* Our model for the private key's */
+                                     /* current position */
+
+        /* Allocate a buffer to hold signatures */
+        size_t sig_size = hss_get_signature_len( L, lm_type, ots_type );
+        if (sig_size == 0) { hss_free_working_key(w); return false; }
+        void *sig = malloc( sig_size );
+        if (!sig) { hss_free_working_key(w); return false; }
+
+        int H;
+        for (H=0; H<L; H++) {
+            /* Verify that we can actually advance between level H trees */
+            if (((sequence_t)1 << (5*H)) > UINT_MAX) {
+                /* hss_reserve_signature takes an unsigned; on this */
+                /* platform, that's not big enough for this iteration */
+                break;
+            }
+            /* Step to right before the next level-H transition */
+            int skip = 0;
+            if (H > 0) {
+                switch (my_rand_word() & 3) {
+                case 0: skip =  0; break;
+                case 1: skip = 31; break;
+                default: skip = my_rand_word() & 0x1f; break;
+                }
+                unsigned step = to_next_step( current_pos, H ) - (skip+1);
+                context.did_update = 0;
+                if (!hss_reserve_signature( w, step, 0 )) {
+                    failed = true;
+                    break;
+                }
+                if (!check_update_context( &context, step>0, H-1 )) {
+                    failed = true;
+                    break;
+                }
+                current_pos += step;
+
+                context.did_update = 0;
+                if (!hss_generate_working_key( read_key, update_key, &context,
+                                 aux_data, sizeof aux_data, w, 0 )) {
+                    failed = true;
+                    break;
+                }
+                if (!check_update_context( &context, H>1, H-1 )) {
+                    failed = true;
+                    break;
+                }
+            }
+
+            /* Now, generate 32 signatures; the skip'th one should trigger */
+            /* an update of size H */
+            int count_sig = 0;
+            for (count_sig = 0; count_sig < 32; count_sig++) {
+                context.did_update = 0;
+                if (!hss_generate_signature( w, "abc", 3, sig, sig_size, 0 )) {
+                    failed = true;
+                    break;
+                }
+                if (L == 1 && count_sig == 31) {
+                    /* Special case; if we hit the end of the key, we're */
+                    /* supposed to overwrite the entire private key */
+                    if (context.did_update != 1 ||
+                                      context.len_update != len_private_key) {
+                        failed = true;
+                        break;
+                    }
+                } else {
+                    int expect_write;
+                    if (H == 0) {
+                        expect_write = (count_sig == 31) ? 1 : 0;
+                    } else {
+                        expect_write = (count_sig==skip) ? H : 0;
+                    }
+                    if (!check_update_context( &context, true, expect_write )) {
+                        failed = true;
+                        break;
+                    }
+                    current_pos += 1;
+                }
+            }
+            if (failed) break;
+
+            if (L == 1) break;  /* With L=1, we just used up the */
+                              /* entire key.  Now, we could regen the key */
+                              /* and start over, however that wouldn't test */
+                              /* that much more, and so we don't bother */
+
+            /*
+             * Now, this rather lengthy section tests our behavior if we
+             * step into a tree that is entirely reserved (and makes sure we
+             * don't update in that case)
+             */ 
+            if (H == 0) continue; /* At the bottom, there's no "next tree" */
+
+            /*
+             * Now, advance the current_pos so that it is and the end of
+             * the current tree
+             */
+            unsigned step = to_next_step( current_pos, H ) + (1<<(5*H)) -
+                                                ((my_rand_word() & 0xf)+1);
+            context.did_update = 0;
+            if (!hss_reserve_signature( w, step, 0 )) {
+                failed = true;
+                break;
+            }
+            if (!check_update_context( &context, true, H )) {
+                failed = true;
+                break;
+            }
+            current_pos += step;
+
+            context.did_update = 0;
+            if (!hss_generate_working_key( read_key, update_key, &context,
+                                 aux_data, sizeof aux_data, w, 0 )) {
+                failed = true;
+                break;
+            }
+            if (!check_update_context( &context, true, H )) {
+                failed = true;
+                break;
+            }
+
+            /*
+             * Now, advance the current_pos so that it is two trees from now
+             */
+            step = to_next_step( current_pos, H ) + 2*(1<<(5*H)) +
+                                                ((my_rand_word() & 0xf)+1);
+            context.did_update = 0;
+            if (!hss_reserve_signature( w, step, 0 )) {
+                failed = true;
+                break;
+            }
+            if (!check_update_context( &context, true, H )) {
+                failed = true;
+                break;
+            }
+            current_pos += step;
+            
+            /* Now, generate 32 signatures; none of them should trigger an */
+            /* update (even though we advance to the next tree) */
+            for (count_sig = 0; count_sig < 32; count_sig++) {
+                context.did_update = 0;
+                if (!hss_generate_signature( w, "abc", 3, sig, sig_size, 0 )) {
+                    failed = true;
+                    break;
+                }
+                if (!check_update_context( &context, false, 0 )) {
+                     failed = true;
+                     break;
+                 }
+            }
+
+            /* Step to the current position (the next iteration will */
+            /* expect us to be current) */
+            context.did_update = 0;
+            if (!hss_generate_working_key( read_key, update_key, &context,
+                                 aux_data, sizeof aux_data, w, 0 )) {
+                failed = true;
+                break;
+            }
+            if (!check_update_context( &context, true, H )) {
+                failed = true;
+                break;
+            }
+        }
+
+        free(sig);
+        hss_free_working_key(w);
+        if (failed) return false;
+    }
+
+    return true;
+}
diff --git a/test_verify.c b/test_verify.c
index 789f948..9f40d39 100644
--- a/test_verify.c
+++ b/test_verify.c
@@ -169,7 +169,7 @@ static bool do_verify( unsigned char *private_key, unsigned char *public_key,
 
     /* Step 1: load the private key into memory */
     w = hss_load_private_key(
-                           NULL, private_key,
+                           NULL, NULL, private_key,
                            0,     /* Minimal memory */
                            aux_data, len_aux_data, 0 );
     if (!w) {
@@ -180,8 +180,7 @@ static bool do_verify( unsigned char *private_key, unsigned char *public_key,
     /* Step 2: generate a valid signature */
     char test_message[3] = "abc";
   
-    if (!hss_generate_signature( w, NULL, private_key,
-                                 test_message, sizeof test_message,
+    if (!hss_generate_signature( w, test_message, sizeof test_message,
                                  signature, signature_len, 0 )) {
         printf( "    *** failed signaing test message\n" );
         goto failed;
diff --git a/test_verify_inc.c b/test_verify_inc.c
index c52867e..94d573b 100644
--- a/test_verify_inc.c
+++ b/test_verify_inc.c
@@ -101,7 +101,7 @@ bool test_verify_inc(bool fast_flag, bool quiet_flag) {
 
     /* Load the private key into memory */
     struct hss_working_key *w = hss_load_private_key(
-                           NULL, private_key,
+                           NULL, NULL, private_key,
                            0,     /* Minimal memory */
                            aux_data, sizeof aux_data, 0 );
     if (!w) {
@@ -126,8 +126,7 @@ bool test_verify_inc(bool fast_flag, bool quiet_flag) {
           "nor prohibited by it to the States, are reserved to the States "
           "respectively, or to the people";
   
-        if (!hss_generate_signature( w, NULL, private_key,
-                                 test_message, sizeof test_message,
+        if (!hss_generate_signature( w, test_message, sizeof test_message,
                                  signature, len_signature, 0 )) {
             printf( "    *** failed signing test message\n" );
             hss_free_working_key(w);
@@ -168,8 +167,7 @@ bool test_verify_inc(bool fast_flag, bool quiet_flag) {
     int i;
     unsigned char test_message[] =  "The powers ...";
   
-    if (!hss_generate_signature( w, NULL, private_key,
-                                 test_message, sizeof test_message,
+    if (!hss_generate_signature( w, test_message, sizeof test_message,
                                  signature, len_signature, 0 )) {
         printf( "    *** failed signing test message\n" );
         hss_free_working_key(w);