From f72e84bd716bacc4593b442a34f89dfe36e31297 Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Sat, 1 Aug 2020 10:55:22 -0600
Subject: [PATCH 1/9] Removing dependency on submodule.

---
 .gitmodules             |  3 ---
 build/README.md         | 31 ++++++++++++++++++++++++++++---
 llvm-project            |  1 -
 projects/CMakeLists.txt |  7 +++++--
 4 files changed, 33 insertions(+), 9 deletions(-)
 delete mode 160000 llvm-project

diff --git a/.gitmodules b/.gitmodules
index 26bd48aa2..e69de29bb 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +0,0 @@
-[submodule "llvm-project"]
-	path = llvm-project
-	url = https://github.com/llvm/llvm-project.git
diff --git a/build/README.md b/build/README.md
index b0d892ea6..5c0469cde 100644
--- a/build/README.md
+++ b/build/README.md
@@ -1,4 +1,29 @@
-Build LLVM by using CMAKE to create the Makefile:
+# Install LLVM
+
+This project depends on LLVM and `clang` v7.0.0.
+
+No support is offered for other versions.
+
+## Option 1: System Packages
+
+#### Ubuntu 18.04 and up:
+
+```
+sudo apt install llvm-7
+sudo apt install clang-7
+```
+
+#### Other systems:
+
+Check the package manager for the system you're on.
+
+## Option 2: Official Releases
+
+Check the [Official Releases page](https://github.com/llvm/llvm-project/releases) for downloads.
+
+## Option 3: Build from Source
+
+Build LLVM and `clang` by using CMAKE to create the Makefile.  In this directory, run:
 
 ```
 cmake -G "Unix Makefiles" -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug -DLLVM_ENABLE_ASSERTIONS=On ../llvm-project/llvm/
@@ -6,10 +31,10 @@ cmake -G "Unix Makefiles" -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Debug
 
 To build with the RISC-V backend enabled, add the flag `-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=RISCV`
 
-Then run make.  Make sure to do a parallel build, otherwise it will take a long time:
+Then run `make`.  Make sure to do a parallel build, otherwise it will take a long time:
 
 ```
 make -j4
 ```
 
-Not enabling debug or assertions will make the compile time faster. However, if you are developing passes, having debug enabled is well worth the wait.
+Not enabling debug or assertions will make the compile time faster. However, if you are developing passes, having debug enabled is well worth the wait.  In fact, developing passes is probably the only reason you'd want to build from source.  Otherwise, just use the package.
diff --git a/llvm-project b/llvm-project
deleted file mode 160000
index 4856a9330..000000000
--- a/llvm-project
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 4856a9330ee01d30e9e11b6c2f991662b4c04b07
diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt
index 61e5a54c0..327a52256 100644
--- a/projects/CMakeLists.txt
+++ b/projects/CMakeLists.txt
@@ -1,13 +1,16 @@
 cmake_minimum_required(VERSION 3.5)
+project(coast)
 
 set ( COAST_BASE_DIR ..)
 if(NOT EXISTS ${COAST_BASE_DIR})
 	message ( FATAL_ERROR " COAST_BASE_DIR (${COAST_BASE_DIR}) is invalid ")
 endif ()
 
-set(LLVM_DIR ../build/cmake/modules/CMakeFiles)
+# set(LLVM_DIR ../build/cmake/modules/CMakeFiles)
 
-find_package ( LLVM REQUIRED CONFIG NO_DEFAULT_PATH)
+find_package ( LLVM 7.0 EXACT REQUIRED CONFIG )
+message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
+message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
 
 list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}")
 

From 38519b3cfa2bf0f9e6d8a27d70d8b1d9ec55e929 Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Mon, 3 Aug 2020 09:14:40 -0600
Subject: [PATCH 2/9] Updating build instructions.

---
 README.md             |  7 +++++++
 docs/source/index.rst | 10 ++++++----
 docs/source/setup.rst | 44 +++++++++++++++++++++++++++++++++++++------
 3 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index a34c2c5de..ee87e001a 100644
--- a/README.md
+++ b/README.md
@@ -3,8 +3,15 @@
 Welcome to the repository for COAST (COmpiler-Assisted Software fault Tolerance), BYU's tool for automated software mitigation! To get started, please refer to our [documentation pages](https://coast-compiler.readthedocs.io/en/latest/).
 
 
+## Dependencies
+
+See [the build folder](build/README.md) for instructions on installation and dependencies.
+
+
 ## Cloning
 
+If you plan to build LLVM from source, use the following commands to clone this repository:
+
 ```
 git clone --recursive -j2 git@github.com:byuccl/coast-private.git
 ```
diff --git a/docs/source/index.rst b/docs/source/index.rst
index f52c10024..34e3bcee3 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -20,18 +20,19 @@ COAST
     cfcss
     release_notes
 
+
 Folder guide
 ==============
 
-build
+boards
 ---------
 
-This folder contains instructions on how to build LLVM, and when built will contain the binaries needed to compile source code.
+This folder has support files needed for the various target architectures we have used in testing COAST.
 
-llvm
+build
 ---------
 
-The source code for LLVM and associated tools.
+This folder contains instructions on how to build LLVM, and when built will contain the binaries needed to compile source code.  Note: building LLVM from source is optional.
 
 projects
 ---------
@@ -43,6 +44,7 @@ tests
 
 Benchmarks we use to validate the correct operation of COAST.
 
+
 Results
 ========
 
diff --git a/docs/source/setup.rst b/docs/source/setup.rst
index 7833892ed..d7dc34da5 100644
--- a/docs/source/setup.rst
+++ b/docs/source/setup.rst
@@ -14,22 +14,50 @@ Prerequisites
 
 - Have a version of Linux that has ``cmake`` and ``make`` installed.
 
-For reference, development of this tool has been done on Ubuntu 16.04.
+For reference, development of this tool has been done on Ubuntu 16.04 and 18.04.
 
-Building LLVM
+Installing LLVM
 ================
 
+There are a few different ways that LLVM and Clang can be installed, depending on your system and preferences.  This project uses LLVM v7.0, so make sure you install the correct version.
+
+Option 1 - System Packages
+----------------------------
+
+With Ubuntu 18.04 and higher, use the following commands:
+
+.. code-block:: bash
+
+    sudo apt install llvm-7
+    sudo apt install clang-7
+
+Other Linux distributions may also have packages available.
+
+Option 2 - Precompiled Binaries
+--------------------------------
+
+You can obtain precompiled binaries from the `official GitHub page <https://github.com/llvm/llvm-project/releases>`_ for the LLVM project.
+
+Option 3 - Build from Source
+------------------------------
+
+If the other two options do not work for your system, or if you prefer to have access to the source files for enhanced debugging purposes, you can build LLVM from source.
+
 - Create a folder to house the repository.  It is recommended that the folder containing this repository be in your home directory.  For example, ``~/coast/``.
 
 - Check out the project:
 
- ``git clone https://github.com/byuccl/coast.git ~/coast``
+.. code-block:: bash
+
+    git clone https://github.com/byuccl/coast.git ~/coast
 
 - Change to the "build" directory and configure the Makefiles.  Example invocation:
 
- ``cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DLLVM_ENABLE_ASSERTIONS=On ../llvm/``
+.. code-block:: bash
 
- See the ``README.md`` in the "build" folder for more information on how to further configure LLVM.
+    cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DLLVM_ENABLE_ASSERTIONS=On ../llvm/
+
+See the ``README.md`` in the "build" folder for more information on how to further configure LLVM.
 
 - Run ``make``.  This may take quite a while, up to an hour or more if you do not parallelize the job.  Adding the flag ``-jn`` allows you to parallelize across ``n`` cores.
 
@@ -37,7 +65,10 @@ Building LLVM
 
 If you wish to add the LLVM binaries to your ``PATH`` variable, add the following to the end of your ``.bashrc`` file:
 
-    ``export PATH="/home/$USER/coast/build/bin:$PATH"``
+.. code-block:: bash
+
+    export PATH="/home/$USER/coast/build/bin:$PATH"
+
 
 Building the Passes
 =====================
@@ -45,5 +76,6 @@ Building the Passes
 To build the passes so they can be used to protect your code:
 
 - Go the "projects" directory
+- Make a new subdirectory called "build" and ``cd`` into it
 - Run ``cmake ..``
 - Run ``make``  (with optional ``-jn`` flag as before)

From fe7fa9389321b9c33b442ceb87ecd36b2b22b6f5 Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Mon, 3 Aug 2020 11:11:28 -0600
Subject: [PATCH 3/9] Updating COAST passes, tests, and build system.

---
 projects/dataflowProtection/cloning.cpp       | 1865 ++++++++++++++---
 .../dataflowProtection/dataflowProtection.cpp |   80 +-
 .../dataflowProtection/dataflowProtection.h   |  154 +-
 projects/dataflowProtection/inspection.cpp    |  186 ++
 projects/dataflowProtection/interface.cpp     |  745 +++++++
 .../dataflowProtection/synchronization.cpp    | 1074 +++++++---
 projects/dataflowProtection/utils.cpp         | 1107 ++++------
 projects/dataflowProtection/verification.cpp  | 1122 ++++++++++
 tests/COAST.h                                 |   59 +-
 tests/TMRregression/Makefile                  |   36 +-
 tests/TMRregression/unitTestDriver.py         |  342 ++-
 tests/TMRregression/unitTests/annotations.c   |   21 +
 tests/TMRregression/unitTests/argAttrs.c      |   71 +
 tests/TMRregression/unitTests/argSync.c       |   11 +-
 tests/TMRregression/unitTests/arm_locks.c     |   15 +-
 tests/TMRregression/unitTests/atomics.c       |   11 +
 .../TMRregression/unitTests/cloneAfterCall.c  |   96 +
 tests/TMRregression/unitTests/fibonacci.c     |   50 +
 tests/TMRregression/unitTests/funcPtrStruct.c |   65 +
 .../TMRregression/unitTests/globalPointers.c  |  164 ++
 tests/TMRregression/unitTests/halfProtected.c |   98 +
 tests/TMRregression/unitTests/helloWorld.cpp  |    5 +
 tests/TMRregression/unitTests/linkedList.c    |  273 +++
 tests/TMRregression/unitTests/load_store.c    |   21 +-
 tests/TMRregression/unitTests/mallocTest.c    |   40 +-
 tests/TMRregression/unitTests/nestedCalls.c   |  145 ++
 tests/TMRregression/unitTests/protectedLib.c  |   73 +
 tests/TMRregression/unitTests/ptrArith.c      |   21 +-
 tests/TMRregression/unitTests/replReturn.c    |   54 +
 tests/TMRregression/unitTests/segmenting.c    |    6 +-
 .../TMRregression/unitTests/signalHandlers.c  |   55 +
 tests/TMRregression/unitTests/simd.c          |    6 +
 tests/TMRregression/unitTests/stackAttack.c   |  270 +++
 tests/TMRregression/unitTests/stackProtect.c  |   70 +
 tests/TMRregression/unitTests/structCompare.c |   16 +-
 tests/TMRregression/unitTests/testFuncPtrs.c  |   51 +-
 tests/TMRregression/unitTests/time_c.c        |   11 +-
 tests/TMRregression/unitTests/vecTest.cpp     |    6 +-
 tests/TMRregression/unitTests/verifyOptions.c |   31 +-
 tests/TMRregression/unitTests/zeroInit.c      |   17 +-
 tests/chstone/aes/aes.c                       |   60 +-
 tests/makefiles/Makefile.compile              |   20 +-
 tests/makefiles/Makefile.compile.hiFive1      |    2 +-
 tests/makefiles/Makefile.compile.llvmLLI      |    5 +
 tests/makefiles/Makefile.compile.pynq         |   76 +-
 tests/makefiles/Makefile.compile.ultra96      |    6 +-
 tests/makefiles/Makefile.compile.x86          |  119 +-
 tests/makefiles/Makefile.program              |   16 +-
 tests/makefiles/config                        |    1 +
 49 files changed, 7330 insertions(+), 1518 deletions(-)
 create mode 100644 projects/dataflowProtection/inspection.cpp
 create mode 100644 projects/dataflowProtection/interface.cpp
 create mode 100644 projects/dataflowProtection/verification.cpp
 create mode 100644 tests/TMRregression/unitTests/argAttrs.c
 create mode 100644 tests/TMRregression/unitTests/cloneAfterCall.c
 create mode 100644 tests/TMRregression/unitTests/fibonacci.c
 create mode 100644 tests/TMRregression/unitTests/funcPtrStruct.c
 create mode 100644 tests/TMRregression/unitTests/globalPointers.c
 create mode 100644 tests/TMRregression/unitTests/halfProtected.c
 create mode 100644 tests/TMRregression/unitTests/linkedList.c
 create mode 100644 tests/TMRregression/unitTests/nestedCalls.c
 create mode 100644 tests/TMRregression/unitTests/protectedLib.c
 create mode 100644 tests/TMRregression/unitTests/replReturn.c
 create mode 100644 tests/TMRregression/unitTests/signalHandlers.c
 create mode 100644 tests/TMRregression/unitTests/stackAttack.c
 create mode 100644 tests/TMRregression/unitTests/stackProtect.c

diff --git a/projects/dataflowProtection/cloning.cpp b/projects/dataflowProtection/cloning.cpp
index e3cdc5095..f644f7147 100644
--- a/projects/dataflowProtection/cloning.cpp
+++ b/projects/dataflowProtection/cloning.cpp
@@ -1,4 +1,8 @@
-//This file contains the functions necessary for the cloning logic in dataflowProtection
+/*
+ * cloning.cpp
+ *
+ * This file contains the functions necessary for the cloning logic in dataflowProtection
+ */
 
 #include "dataflowProtection.h"
 
@@ -19,35 +23,58 @@
 #include <llvm/Analysis/AliasSetTracker.h>
 #include <llvm-c/Core.h>
 
-//Arrays from function pointer are partially developed
+using namespace llvm;
+
+
+// Arrays of function pointers are partially developed
 #define NO_FN_PTR_ARRAY
 
-//Command line option
+// Command line options
 extern std::list<std::string> clGlobalsToRuntimeInit;
 extern std::list<std::string> ignoreGlbl;
-extern cl::opt<bool> noMemReplicationFlag;
 extern std::list<std::string> skipLibCalls;
 extern std::list<std::string> coarseGrainedUserFunctions;
+extern std::list<std::string> protectedLib;
+extern cl::opt<bool> noMemReplicationFlag;
 extern cl::opt<bool> verboseFlag;
-extern std::list<std::string> unsupportedFunctions;
+extern cl::opt<bool> noCloneOperandsCheckFlag;
 
-using namespace llvm;
+// other shared variables
+extern std::set<StoreInst*> syncGlobalStores;
+extern std::map<Function*, std::set<int> > noXmrArgList;
+
+/* There are some functions that are not supported.
+ * It is in here instead of the config file because we don't want users touching it.
+ * TODO: with recent changes to COAST, it may be possible to support these (cloneAfterCall)
+ */
+std::set<std::string> unsupportedFunctions = {"fscanf", "scanf", "fgets", "gets", "sscanf", "__isoc99_fscanf"};
+
+// for debug info
+static DIBuilder* dBuilder = nullptr;
+
+/*
+ * NOTE: look at Function::hasAddressTaken() as a way to see if uses of functions are calls or not
+ */
 
 //----------------------------------------------------------------------------//
 // Initialization
 //----------------------------------------------------------------------------//
 void dataflowProtection::populateValuesToClone(Module& M) {
-	//why was this here? Makes it impossible to clone local variables with in-code directives
-	//Because some pointers become stale. Therefore, second set of Instructions that is not volatile
-	// contains the instructions marked as such by the annotations.
+	// Some pointers become stale. Therefore, second set of Instructions that is not volatile
+	//  contains the instructions marked as such by the annotations.
 	instsToClone.clear();
 	instsToClone.insert(instsToCloneAnno.begin(), instsToCloneAnno.end());
-	// globalsToClone.clear();
 	constantExprToClone.clear();
 
+	static std::set<Value*> warnValueLater;
+
+	// make sure DIBuilder set up
+	if (dBuilder == nullptr) {
+		dBuilder = new DIBuilder(M);
+	}
+
 	for (auto F : fnsToClone) {
-		if (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
-				F->getName()) != coarseGrainedUserFunctions.end()) {
+		if (isCoarseGrainedFunction(F->getName())) {
 //			errs() << F->getName() << " is coarse grained. Not replicating.\n";
 			continue;
 		}
@@ -60,7 +87,7 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 					continue;
 				}
 
-				//If store instructions not cloned, skip them
+				// If store instructions not cloned, skip them
 				if (noMemReplicationFlag) {
 					if (dyn_cast<StoreInst>(&I)) {
 						continue;
@@ -69,11 +96,17 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 
 				if (CallInst * ci = dyn_cast<CallInst>(&I)) {
 
-					//Don't touch/clone inline assembly
-					if (ci->isInlineAsm())
+					// Don't touch/clone inline assembly
+					if (ci->isInlineAsm()) {
+						continue;
+					}
+
+					// Skip special clone after call
+					if (cloneAfterCallArgMap.find(ci) != cloneAfterCallArgMap.end()) {
 						continue;
+					}
 
-					//Clone constants in the function call
+					// Clone constants in the function call
 					for (unsigned int i = 0; i < ci->getNumArgOperands(); i++) {
 						Value * arg = ci->getArgOperand(i);
 						if (ConstantExpr * e = dyn_cast<ConstantExpr>(arg)) {
@@ -84,21 +117,50 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 					// skip bitcasts and print a warning message, because this might skip more than bitcasts
 					if (!isIndirectFunctionCall(ci, "populateValuesToClone", false)) {
 						Function* cF = ci->getCalledFunction();
+
+						// C standard library header atomics.h is not supported
+						if (cF->getName().startswith("atomic_")) {
+							errs() << err_string << " function \"" << cF->getName() << "\" not supported in.\n";
+							errs() << "COAST does not work well with atomic operations.\n";
+
+							std::exit(-1);
+							assert(false && "Atomic instructions not supported");
+						}
+
 						if (std::find(skipLibCalls.begin(), skipLibCalls.end(),
 								cF->getName()) != skipLibCalls.end()) {
 //							errs() << "Skipping the libcall " << cF->getName() << "\n";
 							continue;
 						}
 
-						//Only replicate coarseGrained user functions
+						// Only replicate coarseGrained user functions
 						if ( !(cF->hasExternalLinkage() && cF->isDeclaration()) ) {
-							if (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
-									cF->getName()) == coarseGrainedUserFunctions.end()) {
+							if (!isCoarseGrainedFunction(cF->getName())) {
 //								errs() << cF->getName() << " is coarse-grained user function\n";
 								continue;
 							}
 						}
 
+						if (!isCoarseGrainedFunction(cF->getName())) {
+							// If this isn't in the list of function calls to clone,
+							//  and it's a declaration
+							if (cF->isDeclaration()) {
+								// If none of the operands are going to be cloned,
+								//  then don't need to clone the instruction itself
+								bool opsWillBeCloned = false;
+								for (unsigned opNum = 0; opNum < ci->getNumOperands(); opNum++) {
+									auto op = ci->getOperand(opNum);
+									if (willBeCloned(op)) {
+										opsWillBeCloned = true;
+										break;
+									}
+								}
+								if (!opsWillBeCloned) {
+									continue;
+								}
+							}
+						}
+
 						// skip replicating debug function calls, the debugger only knows about the
 						//  original variable names anyway.
 						if (cF->getName().startswith_lower("llvm.dbg.") ||
@@ -109,38 +171,42 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 					} else {	// it is an indirect function call
 
 						Value* calledValue = ci->getCalledValue();
-//						Value* calledValue = ci->getOperand(0);
 
 						if (auto* cexpr = dyn_cast<ConstantExpr>(calledValue)) {
 
-							//then see if we've got a name for a function in there
+							// then see if we've got a name for a function in there
 							if (Function* indirectF = dyn_cast<Function>(calledValue->stripPointerCasts())) {
 								StringRef indirectName = indirectF->getName();
 //								errs() << "The name of the indirect function called is " << indirectName << "\n";
 
-								//perform the same checks as above for the function name
+								// perform the same checks as above for the function name
 								if (std::find(skipLibCalls.begin(), skipLibCalls.end(),
 										indirectF->getName()) != skipLibCalls.end()) {
 									continue;
 								}
 								if ( !(indirectF->hasExternalLinkage() && indirectF->isDeclaration()) ) {
-									if (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
-											indirectName) == coarseGrainedUserFunctions.end()) {
+									if (!isCoarseGrainedFunction(indirectName)) {
 										continue;
 									}
 								}
 							}
 
-							//see if we've got a bitcast
+							// see if we've got a bitcast
 							if (cexpr->isCast()) {
+								// TODO
 								errs() << "We have found a bitcast:\n";
 								errs() << "\t" << *calledValue << "\n";
 							}
 
-						} else {		//if not, print some kind of warning message
-							if (verboseFlag) {
-								errs() << warn_string << " unidentified indirect function call is being added to the clone list:\n";
-								errs() << *calledValue << "\n";
+						}
+						// if not, print some kind of warning message
+						else {
+							if (warnValueLater.find(calledValue) == warnValueLater.end()) {
+								if (verboseFlag) {
+									errs() << warn_string << " unidentified indirect function call is being added to the clone list:\n";
+									errs() << *calledValue << "\n";
+								}
+								warnValueLater.insert(calledValue);
 							}
 						}
 
@@ -148,14 +214,13 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 
 				}
 
-				//We don't clone terminators
-				//Invoke is "designed to operate as a standard call instruction in most regards" - don't clone
+				// We don't clone terminators
+				// Invoke is "designed to operate as a standard call instruction in most regards" - don't clone
 				if (I.isTerminator() || isa<InvokeInst>(I)) {
-					//we do need to clone the invokes if the function they call is marked as coarse-grained
+					// we do need to clone the invokes if the function they call is marked as coarse-grained
 					if (InvokeInst* invInst = dyn_cast<InvokeInst>(&I)) {
-						if (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
-								invInst->getCalledFunction()->getName()) != coarseGrainedUserFunctions.end()) {
-							;	//add it to the list
+						if (isCoarseGrainedFunction(invInst->getCalledFunction()->getName())) {
+							;	// add it to the list
 						} else {
 							continue;
 						}
@@ -164,7 +229,7 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 					}
 				}
 
-				//Don't clone stores to external globals - assumed to be devices
+				// Don't clone stores to external globals - assumed to be devices
 				if (StoreInst* SI = dyn_cast<StoreInst>(&I)) {
 					if (GlobalVariable* GV = dyn_cast<GlobalVariable>(SI->getPointerOperand())) {
 						assert(GV && "GV?");
@@ -174,12 +239,11 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 					}
 				}
 
-				//don't clone landingpad instructions; there can only be one at the head of a basic block
+				// don't clone landingpad instructions; there can only be one at the head of a basic block
 				if (isa<LandingPadInst>(&I)) {
 					continue;
 				}
 
-//				if (instsToClone.empty())
 				instsToClone.insert(&I);
 			}
 		}
@@ -193,14 +257,17 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 			continue;
 		}
 
-		//Don't clone ISR function pointers
-		//TODO: not a good way of checking for ISRs
-		if (globalName.startswith("__vector") || globalName.startswith("isr_")) {
-//			errs() << "WARNING: not duplicating global value " << g.getName() << ", assuming it is llvm-created\n";
-			continue;
+		// Don't clone ISR function pointers
+		if (g.getType()->isPointerTy() && g.getNumOperands() == 1) {
+			auto gVal = g.getOperand(0);
+			if (auto gFuncVal = dyn_cast<Function>(gVal)) {
+				if (isISR(*gFuncVal)) {
+					continue;
+				}
+			}
 		}
 
-		//Externally available globals without initializer -> external global
+		// Externally available globals without initializer -> external global
 		if (g.hasExternalLinkage() && !g.hasInitializer())
 			continue;
 
@@ -209,6 +276,10 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 			continue;
 		}
 
+		if (std::find(ignoreGlbl.begin(), ignoreGlbl.end(), g.getName().str()) != ignoreGlbl.end()) {
+			continue;
+		}
+
 		if (xMR_default) {
 			globalsToClone.insert(&g);
 		}
@@ -216,36 +287,33 @@ void dataflowProtection::populateValuesToClone(Module& M) {
 
 }
 
+
 //----------------------------------------------------------------------------//
 // Modify functions
 //----------------------------------------------------------------------------//
 void dataflowProtection::populateFnWorklist(Module& M) {
 
-	//Populate a set with all user-defined functions
+	// Populate a set with all user-defined functions
 	std::set<Function*> fnList;
 	for (auto & fn_it : M) {
-		//check for unsupported functions
-		if (std::find(unsupportedFunctions.begin(), unsupportedFunctions.end(),
-				fn_it.getName()) != unsupportedFunctions.end()) {
-			errs() << "ERROR: \n    " << fn_it.getName() << ": function is not supported!\n\n\n";
-			// definitely will quit
-			std::exit(-1);
-			assert(false && "Function is not supported!");
+		// check for unsupported functions
+		if (unsupportedFunctions.find(fn_it.getName()) != unsupportedFunctions.end()) {
+			errs() << err_string << "\n    " << fn_it.getName() << ": function is not supported!\n\n\n";
+			// don't quit, because application writer may have way of dealing with it
 		}
 
-		//Ignore library calls
+		// Ignore library calls
 		if (fn_it.isDeclaration()) {
 			continue;
 		}
 
-		//Don't erase ISRs
+		// Don't erase ISRs
 		if (isISR(fn_it)) {
 			continue;
 		}
 
 		// skip user marked coarse-grained functions
-		if (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
-				fn_it.getName())!=coarseGrainedUserFunctions.end()) {
+		if (isCoarseGrainedFunction(fn_it.getName())) {
 			continue;
 		}
 
@@ -253,9 +321,11 @@ void dataflowProtection::populateFnWorklist(Module& M) {
 	}
 
 
-	//Get a list of all the functions that should not be modified because they
-	//are related to fnToSkip
+	// Get a list of all the functions that should not be modified because they
+	//  are related to fnToSkip
 	bool fnsAdded = true;
+	std::set<CallInst*> skippedIndirectCalls;
+
 	while (fnsAdded) {
 		fnsAdded = false;
 		for (auto & F : fnsToSkip) {
@@ -265,13 +335,21 @@ void dataflowProtection::populateFnWorklist(Module& M) {
 						if (CI->isInlineAsm()) {
 							continue;
 						}
-						//Skip any thing that doesn't have a called function, print warnings
-						if (isIndirectFunctionCall(CI, "populateFnWorklist"))
+						// Skip any thing that doesn't have a called function, print warnings
+						if (isIndirectFunctionCall(CI, "populateFnWorklist", false)) {
+							skippedIndirectCalls.insert(CI);
 							continue;
-						if (CI->getCalledFunction()->isDeclaration()) {
+						}
+						Function* calledF = CI->getCalledFunction();
+						if (calledF->isDeclaration()) {
 							continue;
-						} else if(fnsToSkip.find(CI->getCalledFunction())==fnsToSkip.end()) {
-							fnsToSkip.insert(CI->getCalledFunction());
+						} else if (fnsToSkip.find(calledF) == fnsToSkip.end()) {
+							// Add anything that inherits from a function marked to be skipped
+							if (fnsToClone.find(calledF) != fnsToClone.end()) {
+								// unless is specifically marked to be cloned
+								continue;
+							}
+							fnsToSkip.insert(calledF);
 							fnsAdded = true;
 						}
 					}
@@ -280,19 +358,24 @@ void dataflowProtection::populateFnWorklist(Module& M) {
 		}
 	}
 
-	//Iterate through the fnsToErase list and remove them from the main function list
+	// Iterate through the fnsToErase list and remove them from the main function list
 	for (auto & e : fnsToSkip) {
 		fnList.erase(e);
 	}
 
-	//Get a list of all the functions that should be modified
-	//Start with main, and look at subfunctions
+	// Get a list of all the functions that should be modified
+	// Start with main, and look at subfunctions
 	fnsAdded = true;
 	Function* mainF = M.getFunction("main");
 
 	if (xMR_default) {
-		if (!mainF) { //If we don't have main, insert all
+		// If we don't have a main(), insert all found functions
+		if (!mainF) {
+			fnsToClone = fnList;
+		// or if user said to skip main()
+		} else if (fnsToSkip.find(mainF) != fnsToSkip.end()) {
 			fnsToClone = fnList;
+		// otherwise, visit all descendants of main()
 		} else {
 			fnsToClone.insert(mainF);
 			while (fnsAdded) {
@@ -304,14 +387,15 @@ void dataflowProtection::populateFnWorklist(Module& M) {
 								if (CI->isInlineAsm())
 									continue;
 								// skip any thing that doesn't have a called function and print warning
-								if (isIndirectFunctionCall(CI, "populateFnWorklist"))
+								if (isIndirectFunctionCall(CI, "populateFnWorklist", false)) {
+									skippedIndirectCalls.insert(CI);
 									continue;
+								}
 								if (CI->getCalledFunction()->isDeclaration())
 									continue;
-								else if (std::find(fnsToSkip.begin(), fnsToSkip.end(),
-										CI->getCalledFunction()) != fnsToSkip.end())
+								else if (fnsToSkip.find(CI->getCalledFunction()) != fnsToSkip.end())
 									continue;
-								else if (fnsToClone.find(CI->getCalledFunction())==fnsToClone.end()) {
+								else if (fnsToClone.find(CI->getCalledFunction()) == fnsToClone.end()) {
 									fnsToClone.insert(CI->getCalledFunction());
 									fnsAdded = true;
 								}
@@ -323,52 +407,123 @@ void dataflowProtection::populateFnWorklist(Module& M) {
 		}
 	}
 
-	//Get a list of all functions that are meant to be both cloned and skipped
+	// print warnings
+	if (skippedIndirectCalls.size() > 0) {
+		errs() << warn_string << " skipping indirect function calls in populateFnWorklist:\n";
+		for (auto CI : skippedIndirectCalls) {
+			PRINT_VALUE(CI);
+		}
+	}
+
+	// Get a list of all functions that are meant to be both cloned and skipped
 	for (auto & skip_it: fnsToSkip) {
-		if (fnsToClone.find(skip_it)!=fnsToClone.end())
+		if (fnsToClone.find(skip_it) != fnsToClone.end())
 			fnsToCloneAndSkip.insert(skip_it);
 	}
 
-	//Make sure coarse grained functions aren't modified
+	// Make sure coarse grained functions aren't modified
 	for (auto it : fnsToClone) {
-		if (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
-				it->getName())!=coarseGrainedUserFunctions.end()) {
+		if (isCoarseGrainedFunction(it->getName())) {
 			fnsToClone.erase(it);
 		}
 	}
 
 }
 
+
+/*
+ * Copies the attributes from the old attribute to the new one.
+ * (Things like nocapture, readonly, etc)
+ */
+static void addArgAttrs(Function* orig, Function* cloned, unsigned int orig_idx, unsigned int new_idx) {
+	Attribute::AttrKind attr_kind;
+	unsigned int endNum = static_cast<unsigned int>(Attribute::AttrKind::EndAttrKinds);
+
+	for (unsigned int idx = 0; idx != endNum; idx++) {
+		attr_kind = static_cast<Attribute::AttrKind>(idx);
+
+		if (orig->hasParamAttribute(orig_idx, attr_kind)) {
+			// according to Atrributes.cpp:1342, there are some we can't copy
+			if (attr_kind == Attribute::AttrKind::Dereferenceable) {
+				// TODO: is there another way to add it?
+				continue;
+			}
+			cloned->addParamAttr(new_idx, attr_kind);
+//			errs() << "Adding " << attr_kind << " to '" << cloned->getName() << "' argument " << new_idx << "\n";
+		}
+	}
+}
+
+
+/*
+ * We maintain some lists of instructions that will be dealt with later in the pass,
+ *  but after the functions that will be cloned have been.  Some of the instructions
+ *  were marked in the original function body.  We need to add the equivalent
+ *  instructions from the new function body to the same lists.
+ *
+ * TODO: there may be more lists we need to look at
+ */
+void dataflowProtection::updateInstLists(Function* F, Function* Fnew) {
+	// BasicBlock iterators
+	auto bbOld = F->begin();		auto oldEnd = F->end();
+	auto bbNew = Fnew->begin();		auto newEnd = Fnew->end();
+	for (; bbOld != oldEnd && bbNew != newEnd; ++bbOld, ++bbNew) {
+		// Instruction iterators
+		auto iOld = bbOld->begin();		auto iOldEnd = bbOld->end();
+		auto iNew = bbNew->begin();		auto iNewEnd = bbNew->end();
+		for (; iOld != iOldEnd && iNew != iNewEnd; ++iOld, ++iNew) {
+			if (StoreInst* si = dyn_cast<StoreInst>(&*iOld))  {
+				if (syncGlobalStores.find(si) != syncGlobalStores.end()) {
+					// add to list
+					syncGlobalStores.insert(dyn_cast<StoreInst>(&*iNew));
+					// see if we should remove the original
+					if (fnsToCloneAndSkip.find(F) == fnsToCloneAndSkip.end()) {
+						syncGlobalStores.erase(si);
+						// errs() << "removing from syncGlobalStores:\n";
+						// PRINT_VALUE(si);
+					}
+				}
+			}
+		}
+	}
+}
+
+
+// #define DBG_CLN_FN_ARGS
 void dataflowProtection::cloneFunctionArguments(Module & M) {
 	std::vector<Function*> functionsToFix;
 	int warnedFnPtrs = 0;
-
-	//If we aren't replicating everything by default then don't update fn sig
-	//There won't be any clones to pass into it
-	// if(!xMR_default){
-	// 	return;
-	// }
-
-	//a list of aggregates which are users of functions
+	// since the functionality is now broken into 2 parts, we have to
+	//  keep track of some values across the for loops
+	typedef std::tuple< Function*, std::vector<bool> > funcArg_t;
+	std::map<Function*, funcArg_t> newFuncArgsMap;
+
+	// If we aren't replicating everything by default then don't update fn sig
+	// There won't be any clones to pass into it
+	#ifdef DBG_CLN_FN_ARGS
+	int debugFlag = 0;
+	#endif
+
+	// a list of aggregates which are users of functions
 	// these will be used later to skip users of functions that are not CallInsts
 	std::list<ConstantAggregate*> skipAggList;
 
 	for (auto g_it = M.global_begin(); g_it != M.global_end(); g_it++) {
-		//we're looking for a particular global that causes problems
+		// we're looking for a particular global that causes problems
 		if (g_it->getName() == "llvm.global_ctors") {
-			//all the operands of this global
+			// all the operands of this global
 			for (auto op = g_it->op_begin(); op != g_it->op_end(); op++) {
 //				errs() << *(*op) << "\n";
-				//see if it's a ConstantArray
+				// see if it's a ConstantArray
 				if (auto cnst = dyn_cast<ConstantArray>(*op)) {
-					//look at all of its operands
+					// look at all of its operands
 					for (auto op2 = cnst->op_begin(); op2 != cnst->op_end(); op2++) {
 //						errs() << *(*op2) << "\n";
-						//see if the operand of the array is an aggregate type
+						// see if the operand of the array is an aggregate type
 						if (auto agg = dyn_cast<ConstantAggregate>(*op2)) {
-							//look at all the operands of the aggregate
+							// look at all the operands of the aggregate
 							for (auto op3 = agg->op_begin(); op3 != agg->op_end(); op3++) {
-								//if one of these operands is a function, then keep track of the aggregate
+								// if one of these operands is a function, then keep track of the aggregate
 								if (auto opf = dyn_cast<Function>(*op3)) {
 									skipAggList.push_back(agg);
 //									errs() << opf->getName() << "\n";
@@ -390,14 +545,36 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 	for (auto F : functionsToFix) {
 		unsigned int numArgs = F->arg_size();
 
+		#ifdef DBG_CLN_FN_ARGS
+		if (F->getName() == "ff_fprintf") {
+			errs() << "Found function '" << F->getName() << "'\n";
+			debugFlag = 1;
+		} else {
+			debugFlag = 0;
+		}
+		#endif
+
 		if (isISR(*F)) {
 			continue;
 		}
 
-		if (std::find(fnsToSkip.begin(),fnsToSkip.end(),F) != fnsToSkip.end()) {
+		// do not alter the function signatures for ones that will be a "library" call
+		if (protectedLibList.find(F) != protectedLibList.end()) {
 			continue;
 		}
 
+		if (fnsToSkip.find(F) != fnsToSkip.end()) {
+			// it can be in both
+			if (fnsToClone.find(F) == fnsToClone.end()) {
+				#ifdef DBG_CLN_FN_ARGS
+				if (debugFlag) {
+					PRINT_STRING("marked to skip this function");
+				}
+				#endif
+				continue;
+			}
+		}
+
 		if (verboseFlag) {
 			errs() << "Adding clone arguments to function: " << F->getName() << "\n";
 		}
@@ -407,7 +584,7 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 		// See if what is passed in has a clone
 		for (auto u : F->users()) {
 
-			//Ignore global annotations - globals containing bitcasts
+			// Ignore global annotations - globals containing bitcasts
 			if (auto ce = dyn_cast<ConstantExpr>(u)) {
 				if (ce->isCast()) {
 //					errs() << "WARNING: In cloneFnArgs in cloning.cpp\n";
@@ -416,13 +593,13 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 				}
 			}
 
-			//see if it's used in an aggregate type constructor global variable
+			// see if it's used in an aggregate type constructor global variable
 			if (std::find(skipAggList.begin(), skipAggList.end(), u) != skipAggList.end()) {
 //				errs() << info_string << " Skipping " << *u << "\n";
 				continue;
 			}
 
-			//check for aliases and skip them
+			// check for aliases and skip them
 			if (isa<GlobalAlias>(u)) {
 				if (verboseFlag) {
 					errs() << info_string << " Skipping global alias in cloneFunctionArguments()\n";
@@ -430,7 +607,7 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 				continue;
 			}
 
-			//check for invoke instructions
+			// check for invoke instructions
 			if (InvokeInst* invInst = dyn_cast<InvokeInst>(u)) {
 				if (verboseFlag) {
 					errs() << info_string << " Synchronizing on an InvokeInst\n";
@@ -453,7 +630,7 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 				continue;
 			}
 
-			//Handle arrays of function pointers by marking what should be modified
+			// Handle arrays of function pointers by marking what should be modified
 			if (ConstantArray* ca = dyn_cast<ConstantArray>(u)) {
 				#ifndef NO_FN_PTR_ARRAY
 				for(int i=0; i<ca->getNumOperands(); i++){
@@ -482,11 +659,17 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 				}
 				continue;
 			}
+			assert(callInst && "User is not a call instruction");
 
 			// It's possible that the function user is not actually a call to the function, but a call
 			//  to some other function that passes this one as a parameter.
 			Function* CF = callInst->getCalledFunction();
 			if (CF != F) {
+				#ifdef DBG_CLN_FN_ARGS
+				if (debugFlag) {
+					errs() << " > " << *u << "\n";
+				}
+				#endif
 				continue;
 			}
 
@@ -494,12 +677,17 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 				if (willBeCloned(callInst->getArgOperand(i))) {
 					cloneArg[i] = true;
 				}
+				// override: user directives could force certain arguments to
+				//  *not* be cloned
+				if (noXmrArgList.find(F) != noXmrArgList.end()) {
+					if (noXmrArgList[F].find(i) != noXmrArgList[F].end()) {
+						cloneArg[i] = false;
+					}
+				}
 			}
 		}
 		warnedFnPtrs = 0;
 
-		// TODO: some of the arguments could be specifically requested to NOT clone them
-
 		// Check if any parameters need clones
 		bool needClones = false;
 		for (auto b : cloneArg) {
@@ -507,6 +695,11 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 		}
 
 		if (!needClones) {
+			#ifdef DBG_CLN_FN_ARGS
+			if (debugFlag) {
+				PRINT_STRING("Doesn't need clones!");
+			}
+			#endif
 			continue;
 		}
 
@@ -527,8 +720,9 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 
 		ArrayRef<Type*> paramArray(params);
 
-		FunctionType * Ftype = FunctionType::get(
-				F->getFunctionType()->getReturnType(), paramArray, false);
+		FunctionType* oldFtype = F->getFunctionType();
+		FunctionType* Ftype = FunctionType::get(
+				oldFtype->getReturnType(), paramArray, oldFtype->isVarArg());
 
 		std::string Fname;
 		if (!TMR)
@@ -538,7 +732,6 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 		Constant * c = M.getOrInsertFunction(Fname, Ftype);
 		Function * Fnew = dyn_cast<Function>(c);
 		assert(Fnew && "New function is non-void");
-		newFunctions.push_back(Fnew);
 
 		unsigned int i = 0;
 
@@ -573,6 +766,13 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 			i++;
 		}
 
+//		errs() << "Function arguments: \n";
+//		for(auto p = Fnew->arg_begin(); p != Fnew->arg_end(); p++){
+//			errs() << *p << "\n";
+//		}
+//		errs() << "\n";
+
+
 		SmallVector<ReturnInst*, 8> returns;
 		CloneFunctionInto(Fnew, F, paramMap, true, returns);
 		origFunctions.push_back(F);
@@ -580,30 +780,71 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 		fnsToClone.erase(F);
 //		errs() << "\nReplacing " << F->getName() << " with " << Fnew->getName() << "\n";
 
+		// if there's an entry already for this, it's from cloneFunctionReturnVals
+		if (functionMap.find(F) != functionMap.end()) {
+			functionMap[Fnew] = functionMap[F];
+		}
 		functionMap[F] = Fnew;
 
-		//This is needed because we clone functions into new functions while updating references
-		//Occasionally, functions had been cloned but instsToClone hadn't been updated,
-		// so nothing in the new function was listed in instsToClone
-		//This led to the pass refusing to replace the cloned arguments in calls when
-		// the call lived in a new function, because none of the insts in it were in instsToClone
+		// also need to update the replReturn set
+		if (replReturn.find(F) != replReturn.end()) {
+			replReturn.insert(Fnew);
+			replReturn.erase(F);
+			// We need to remove the old one from the list here
+			// Because there should only be one version of it in the list
+		}
+
+		/*
+		 * This is needed because we clone functions into new functions while updating references.
+		 * Occasionally, functions had been cloned but instsToClone hadn't been updated,
+		 *  so nothing in the new function was listed in instsToClone.
+		 * This led to the pass refusing to replace the cloned arguments in calls when
+		 *  the call lived in a new function, because none of the insts in it were in instsToClone.
+		 */
 		populateValuesToClone(M);
+		// there are also some special lists that may need to be updated
+		updateInstLists(F, Fnew);
+
+		// TODO: might want to break up this whole function right here into 2 parts
+		//  so that replacing calls all takes place after the function clones have
+		//  been created
+		newFuncArgsMap[F] = funcArg_t(Fnew, cloneArg);
+	}
+
+	for (auto F : functionsToFix) {
+		// only do this if it's in the map (right?)
+		if (newFuncArgsMap.find(F) == newFuncArgsMap.end())
+			continue;
+
+		// set up values
+		unsigned int numArgs = F->arg_size();
+		funcArg_t funcArgs = newFuncArgsMap[F];
+		Function* Fnew = std::get<0>(funcArgs);
+		std::vector<bool> cloneArg = std::get<1>(funcArgs);
 
 //		errs() << "Function: " << F->getName() << "\n";
 		// Replace all function calls
 		for (auto u : F->users()) {
-			//Check for bitcasts in case of annotations
+			// Check for bitcasts in case of annotations
 			if (auto ce = dyn_cast<ConstantExpr>(u)) {
 				if (ce->isCast()) {
 					continue;
 				}
 			}
 
+			#ifdef DBG_CLN_FN_ARGS
+			if (F->getName() == "ff_fprintf") {
+				debugFlag = 1;
+			} else {
+				debugFlag = 0;
+			}
+			#endif
+
 //			errs() << "original function call: " << *u << "\n";
 
 			std::vector<Value*> args;
 
-			//Account for arrays of fn pointers
+			// Account for arrays of fn pointers
 			unsigned int j = 0;
 			if (ConstantArray* ca = dyn_cast<ConstantArray>(u)) {
 				for (int i=0; i<ca->getNumOperands(); i++) {
@@ -634,12 +875,21 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 				assert(callInst && "Replacing function calls in cloneFnArgs");
 
 				Function* parentFn = callInst->getParent()->getParent();
+				// this is the right check, because original functions were removed from this set,
+				//  and their clones added to it
 				if (fnsToClone.find(parentFn) == fnsToClone.end()) {
 					continue;
 				}
+				#ifdef DBG_CLN_FN_ARGS
+				if (debugFlag) {
+					errs() << " > " << *callInst << " in '" << parentFn->getName() << "'\n";
+				}
+				#endif
 
-				// if the use of the function is actually a function pointer *in* the call,
-				//  then need to skip doing anything to this one
+				// If the use of the function is actually a function pointer *in* the call,
+				//  then need to skip doing anything to this one.
+				// NOTE: possible error if calling a function passes
+				//  the called function as a parameter as well (unlikely)
 				if (callInst->getCalledFunction() != F) {
 					continue;
 				}
@@ -660,21 +910,42 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 					}
 				}
 
+				/*
+				 * Special check for calls to variadic functions.
+				 * Make sure to add the extra arguments to the new function call.
+				 */
+				if (F->isVarArg() && (callInst->getNumArgOperands() > numArgs)) {
+					#ifdef DBG_CLN_FN_ARGS
+					if (debugFlag) {
+						errs() << " - orig call has " << callInst->getNumArgOperands() << " arguments\n";
+						errs() << " - new var arg? " << Fnew->isVarArg() << "\n";
+					}
+					#endif
+					// add the rest
+					for (unsigned int i = numArgs; i < callInst->getNumArgOperands(); i++) {
+						Value* extraArg = callInst->getArgOperand(i);
+						args.push_back(extraArg);
+					}
+				}
+
 				ArrayRef<Value*>* callArgs;
 				callArgs = new ArrayRef<Value*>(args);
 				CallInst* newCallInst;
 
-				//turns out that void returning function calls have no name, so have to be careful here
-				if (Fnew->getReturnType() == Type::getVoidTy(M.getContext())) {
+				// Turns out that void returning function calls have no name, so have to be careful here.
+				// There may be other cases where it doesn't have a name; check those too.
+				if ( (Fnew->getReturnType() == Type::getVoidTy(M.getContext())) ||
+					 (!callInst->hasName()) )
+				{
 					newCallInst = CallInst::Create((Value*) Fnew, *callArgs);
 					newCallInst->insertBefore(callInst);
 				} else {
-					//The casting here is to stop from complaining that the Create call doesn't have the right types
+					// The casting here is to stop Eclipse from complaining that the Create call doesn't have the right types
 					newCallInst = CallInst::Create((Value*) Fnew, *callArgs,
 							Twine(callInst->getName()), (Instruction*) callInst);
 				}
 
-				//Deal with function calls inside function args when casted - not recognized as callInsts
+				// Deal with function calls inside function args when casted - not recognized as callInsts
 				for (auto ops : newCallInst->operand_values()) {
 					if (auto ce = dyn_cast<ConstantExpr>(ops)) {
 						if (ce->isCast()) {
@@ -687,7 +958,7 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 					}
 				}
 
-				//if there's debug information for the call, preserve it
+				// if there's debug information for the call, preserve it
 				if (auto dbgLoc = callInst->getDebugLoc()) {
 					newCallInst->setDebugLoc(dbgLoc);
 				}
@@ -709,7 +980,7 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 				assert(invInst && "Replacing function calls in cloneFnArgs");
 
 				Function* parentFn = invInst->getParent()->getParent();
-				if (fnsToClone.find(parentFn)==fnsToClone.end()) {
+				if (fnsToClone.find(parentFn) == fnsToClone.end()) {
 					continue;
 				}
 
@@ -732,11 +1003,11 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 				ArrayRef<Value*>* callArgs;
 				callArgs = new ArrayRef<Value*>(args);
 
-				//The casting here is to stop from complaining that the Create call doesn't have the right types
+				// The casting here is to stop Eclipse from complaining that the Create call doesn't have the right types
 				InvokeInst* newInvInst = InvokeInst::Create((Value*) Fnew, invInst->getNormalDest(),
 						invInst->getUnwindDest(), *callArgs, Twine(invInst->getName()), (Instruction*) invInst);
 
-				//Deal with function calls inside function args when casted - not recognized as callInsts
+				// Deal with function calls inside function args when casted - not recognized as callInsts
 				for (auto ops : newInvInst->operand_values()) {
 					if (auto ce = dyn_cast<ConstantExpr>(ops)) {
 						if (ce->isCast()) {
@@ -749,6 +1020,11 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 					}
 				}
 
+				// preserve debug information
+				if (auto dbgLoc = invInst->getDebugLoc()) {
+					newInvInst->setDebugLoc(dbgLoc);
+				}
+
 				// Replace all uses of the original call instruction with the new one
 				invInst->replaceAllUsesWith(newInvInst);
 				invInst->eraseFromParent();
@@ -759,14 +1035,32 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 
 		}
 
-		//update the metadata describing the new function
+		// update the metadata describing the new function
 		cloneMetadata(M, Fnew);
 
+		// copy all of the function attributes to the cloned attributes
+		unsigned int i = 0;
+		unsigned int j = 0;
+
+		while (i < numArgs) {
+			if (cloneArg[i]) {
+				j++;
+				addArgAttrs(F, Fnew, i, j);
+
+				if (TMR) {
+					j++;
+					addArgAttrs(F, Fnew, i, j);
+				}
+			}
+			i++;
+			j++;
+		}
+
 	}
 
 	#ifndef NO_FN_PTR_ARRAY
-	//Update any arrays of function pointers
-	//They are stored as global arrays
+	// Update any arrays of function pointers
+	// They are stored as global arrays
 	for(GlobalVariable & g : M.getGlobalList()){
 		if(g.hasPrivateLinkage() && g.hasAtLeastLocalUnnamedAddr()){
 			if(g.getNumOperands() == 1){
@@ -812,18 +1106,588 @@ void dataflowProtection::cloneFunctionArguments(Module & M) {
 	 * When you fix this, feel free to get rid of the NO_FN_PTR_ARRAY flag.
 	 * To test this, run with -dumpModule.
 	 * It will print out the entire module, which you can paste into an *.ll file and run with lli.
+	 * NOTE: Function->hasAddressTaken() could be useful
 	 */
 	 #endif
 
 }
 
-//#define DBG_UPDATE_CALLS
+
+/*
+ * There is a command line argument called "cloneReturn" that specifies functions which
+ *  should have their return values replicated.
+ * We will implement this by changing the function signature to include 2 new
+ *  pointers at the beginning where the extra return values can be stored.
+ * The call sites will be changed to allocate this space.
+ * NOTE: should we do anything differently based on the calling convention?
+ *
+ * The reason this is called before cloneInsns() is because we want the functions to
+ *  exist so that the clones will actually be in the cloneMap for later.
+ * There is another function that will finish up the functionality for this.
+ */
+void dataflowProtection::cloneFunctionReturnVals(Module& M) {
+	for (Function* F : replReturn) {
+		// things about the current function
+		Type* retType = F->getReturnType();
+		if (retType->isVoidTy()) {
+			// skip void functions
+			errs() << warn_string << " cannot replicate return values of function '"
+				   << F->getName() << "' because it is a void type\n";
+			continue;
+		}
+		FunctionType* fType = F->getFunctionType();
+		auto argList = fType->params();
+
+		/*
+		 * Add 2 new arguments to the parameter list.
+		 * These will go at the end of the current list of parameters.
+		 * Even if this is a variadic function, extra variables aren't in the function signature;
+		 *  they show up in the function call, if at all.
+		 * Since these values will only be used on entrance/exit of the function, it will reduce
+		 *  register spilling by keeping these arguments in the stack instead of something that
+		 *  is used more frequently.
+		 */
+		Type* newRetType = retType->getPointerTo();
+		std::vector<Type*> newParams;
+		// add old args
+		for (auto oldArg : fType->params()) {
+			newParams.push_back(oldArg);
+		}
+		// add new return value args
+		newParams.push_back(newRetType);
+		if (TMR)
+			newParams.push_back(newRetType);
+
+		// new function type, same arguments
+		FunctionType* newFuncType = fType->get(retType,     	   	/* return type */
+											   newParams,   		/* arguments */
+											   fType->isVarArg());	/* variadic */
+
+		// create a new function
+		Function* newFunc = F->Create(newFuncType,					/* function type */
+									  F->getLinkage(),				/* linkage */
+							  		  F->getName() + ".RR",			/* name */
+									  &M);
+
+		// set up stuff for copying
+		ValueToValueMapTy paramMap;
+		SmallVector<ReturnInst*, 8> returns;
+		unsigned int i = 0;
+		unsigned int numArgs = F->arg_size();
+		auto argIt = F->arg_begin();
+		auto argItNew = newFunc->arg_begin();
+
+		while (i < numArgs) {
+			// similar to in cloneFunctionArguments, but 1:1 mapping
+			argItNew->setName(argIt->getName());
+
+			Argument* argNew = &*argItNew;
+			Argument* arg = &*argIt;
+			paramMap[arg] = argNew;
+
+			// see if it's in the clone map
+			if (isCloned(arg)) {
+				Value *v1, *v2;
+				v1 = &*(argItNew + 1);
+				if (TMR)
+					v2 = &*(argItNew + 2);
+				cloneMap[argNew] = ValuePair(v1, v2);
+			}
+
+			argIt++;
+			argItNew++;
+			i++;
+		}
+
+		// add the new return pointers
+		argItNew->setName("__retVal.DWC");
+		argItNew++;
+		if (TMR) {
+			argItNew->setName("__retVal.TMR");
+			argItNew++;
+		}
+
+		// copy the body of the function
+		CloneFunctionInto(newFunc, F, paramMap, true, returns);
+
+		// find the last alloca in the entry block
+		BasicBlock& entryBB = newFunc->getEntryBlock();
+		replRetMap[F] = returns;
+		functionMap[F] = newFunc;
+
+		// record these things
+		fnsToClone.insert(newFunc);
+		if (verboseFlag) {
+			errs() << info_string << " Created new function named '"
+				   << newFunc->getName() << "'\n";
+		}
+	}
+}
+
+/* 
+ * If there are instructions that are nested that both show up in the replReturn list,
+ *  then we need to save un-fixed use checking until after all the updates have run.
+ * This includes removing unused functions, because we expect the original function
+ *  to be removed.
+ * See validateRRFuncs()
+ */
+static std::set<Instruction*> checkUsesLater;
+
+// #define DEBUG_CHANGE_RR_CALLS
+/*
+ * Finish updating the functions that are marked to replicate return values,
+ *  as well as the associated call sites.
+ * Probably isn't a case where we want the original as well as the changed function
+ */
+void dataflowProtection::updateRRFuncs(Module& M) {
+
+	for (auto& kv : replRetMap) {
+
+        ////////////////////// unpack information //////////////////////
+
+		Function* F = kv.first;
+		Function* rrFunc = functionMap[F];
+		SmallVector<ReturnInst*, 8> returns = kv.second;
+
+		// errs() << "Looking at function '" << rrFunc->getName() << "'\n";
+		Type* newRetType = rrFunc->getReturnType()->getPointerTo();
+		BasicBlock& entryBB = rrFunc->getEntryBlock();
+
+		// get a handle for the first iterator
+		auto argIt = rrFunc->arg_end();
+		argIt--;
+		if (TMR) argIt--;
+
+        //////////////////////// allocate addr /////////////////////////
+
+		// we can now insert the alloca's, because they won't get xMR'd here
+		AllocaInst* alloc1, * alloc2;
+		Constant* one = ConstantInt::get(
+			IntegerType::getInt32Ty(M.getContext()), 1, false);
+		unsigned int addrSpace = M.getDataLayout().getAllocaAddrSpace();
+		unsigned int alignNum = M.getDataLayout().getPrefTypeAlignment(newRetType);
+		alloc1 = new AllocaInst(
+			newRetType,		/* Type */
+			addrSpace,		/* AddrSpace */
+			one,			/* Value* ArraySize */
+			alignNum,		/* Align */
+			Twine((argIt)->getName() + ".addr")
+		);
+		alloc1->insertBefore(&*entryBB.getFirstInsertionPt());
+
+		if (TMR) {
+			alloc2 = new AllocaInst(
+				newRetType,		/* Type */
+				addrSpace,		/* AddrSpace */
+				one,			/* Value* ArraySize */
+				alignNum,		/* Align */
+				Twine((argIt+1)->getName() + ".addr")
+			);
+			alloc2->insertAfter(alloc1);
+		}
+
+        //////////////////////// store ptr addr ////////////////////////
+
+		// store the addresses
+		StoreInst* storeRetAddr1, * storeRetAddr2;
+		storeRetAddr1 = new StoreInst(&*argIt, alloc1);
+		storeRetAddr1->insertAfter(alloc1);
+		storeRetAddr1->setAlignment(alignNum);
+		if (TMR) {
+			storeRetAddr2 = new StoreInst(&*(argIt+1), alloc2);
+			storeRetAddr2->insertAfter(alloc2);
+			storeRetAddr2->setAlignment(alignNum);
+		}
+
+        ///////////////////// change return points /////////////////////
+
+		for (auto ret : returns) {
+			Value* retVal = ret->getReturnValue();
+			ValuePair retClones = getClone(retVal);
+
+			// load the argument
+			LoadInst* loadRet = new LoadInst(alloc1, "loadRet", ret);
+			// PRINT_VALUE(loadRet);
+			// store the copy
+			StoreInst* storeRet = new StoreInst(
+				retClones.first,	/* value to store */
+				loadRet,			/* pointer where to store */
+				ret					/* InsertBefore */
+			);
+			StoreInst* storeRet2;
+			if (TMR) {
+				LoadInst* loadRet2 = new LoadInst(alloc2, "loadRet2", ret);
+				storeRet2 = new StoreInst(
+					retClones.second,	/* value to store */
+					loadRet2,			/* pointer where to store */
+					ret					/* InsertBefore */
+				);
+			}
+
+			// change how it's registered as part of synchronization logic
+			//  so segmenting works
+			auto retIt = startOfSyncLogic.find(ret);
+			if (retIt == startOfSyncLogic.end()) {
+				syncPoints.push_back(ret);
+				// if not specific spot already, make it the load
+				startOfSyncLogic[ret] = loadRet;
+			} else if (retIt->second == ret) {
+				startOfSyncLogic[ret] = loadRet;
+			}
+
+			// also register as clones
+			cloneMap[ret] = ValuePair(storeRet, storeRet2);
+			// PRINT_VALUE(storeRet);
+			// if (ret->getParent()->getName() == "prvInitialiseMutex.exit")
+			// 	PRINT_VALUE(ret->getParent());
+		}
+
+        ////////////////////// change call sites ///////////////////////
+
+		// change the call sites - based on code in cloneFunctionArguments
+		for (auto u : F->users()) {
+			// Check for bitcasts in case of annotations
+			if (auto ce = dyn_cast<ConstantExpr>(u)) {
+				if (ce->isCast()) {
+					continue;
+				}
+			}
+
+			// we won't deal with arrays of function pointers here just yet
+			CallInst* callInst = dyn_cast<CallInst>(u);
+			InvokeInst* invInst = dyn_cast<InvokeInst>(u);
+
+			if (callInst || invInst) {
+
+                ///////////////// ones to skip /////////////////
+
+				// skip ones that aren't being cloned
+				Function* parentFn = callInst->getParent()->getParent();
+				if (fnsToClone.find(parentFn) == fnsToClone.end()) {
+					continue;
+				}
+
+				// If the use of the function is actually a function pointer *in*
+				//  the call, then need to skip doing anything to this CallInst.
+				if (callInst && (callInst->getCalledFunction() != F)) {
+					continue;
+				}
+
+				// get entry point of current function
+				StringRef callName;
+				BasicBlock* callEntryBB;
+				Instruction* oldInst;
+				if (callInst) {
+					callEntryBB = &callInst->getParent()->getParent()->getEntryBlock();
+					callName = callInst->getName();
+					oldInst = callInst;
+				} else {
+					callEntryBB = &invInst->getParent()->getParent()->getEntryBlock();
+					callName = invInst->getName();
+					oldInst = invInst;
+				}
+
+                /////////////// call site alloca ///////////////
+				Type* normalRetType = rrFunc->getReturnType();
+
+				// allocate some space for the new return value pointers
+				AllocaInst* callAlloca1, * callAlloca2;
+				callAlloca1 = new AllocaInst(
+					normalRetType,	/* Type */
+					addrSpace,		/* AddrSpace */
+					one,			/* Value* ArraySize */
+					alignNum,		/* Align */
+					Twine(callName + ".DWC.addr")
+				);
+				callAlloca1->insertBefore(&*(*callEntryBB).getFirstInsertionPt());
+				if (TMR) {
+					callAlloca2 = new AllocaInst(
+						normalRetType,	/* Type */
+						addrSpace,		/* AddrSpace */
+						one,			/* Value* ArraySize */
+						alignNum,		/* Align */
+						Twine(callName + ".TMR.addr")
+					);
+					callAlloca2->insertAfter(callAlloca1);
+				}
+				// PRINT_VALUE(callAlloca1);
+
+                ///////////////// create call //////////////////
+
+				Instruction* newInst;
+				if (callInst) {
+					// create call arg list
+					std::vector<Value*> args;
+					// add existing arguments
+					for (unsigned i = 0; i < callInst->getNumArgOperands(); i++) {
+						args.push_back(callInst->getArgOperand(i));
+					}
+					// finish arg list
+					args.push_back(callAlloca1);
+					if (TMR) {
+						args.push_back(callAlloca2);
+					}
+					ArrayRef<Value*> callArgs(args);
+
+					// make the new call instruction
+					CallInst* newCallInst;
+					if (!callInst->hasName()) {
+						// In some strange cases, the call may not have a name, even though it's not a void function.
+						newCallInst = CallInst::Create(
+							rrFunc,							/* Function to call */
+							callArgs						/* argument list */
+						);
+						newCallInst->insertBefore(callInst);
+					} else {
+						newCallInst = CallInst::Create(
+							rrFunc,							/* Function to call */
+							callArgs,						/* argument list */
+							Twine(callInst->getName()),		/* name */
+							callInst						/* InsertBefore */
+						);
+					}
+
+					// do we need to worry about line 767?
+
+					newInst = newCallInst;
+				}
+				else {		/* invInst */
+					// create call arg list
+					std::vector<Value*> args;
+					// add existing arguments
+					for (unsigned i = 0; i < invInst->getNumArgOperands(); i++) {
+						args.push_back(invInst->getArgOperand(i));
+					}
+					// finish arg list
+					args.push_back(callAlloca1);
+					if (TMR) {
+						args.push_back(callAlloca2);
+					}
+					ArrayRef<Value*> callArgs(args);
+
+					InvokeInst* newInvInst = InvokeInst::Create(
+						rrFunc,							/* Function to call */
+						invInst->getNormalDest(),		/* IfNormal */
+						invInst->getUnwindDest(),		/* IfException */
+						callArgs,						/* argument list */
+						Twine(invInst->getName()),		/* name */
+						invInst							/* InsertBefore */
+					);
+
+					// do we need to worry about line 829?
+
+					newInst = newInvInst;
+				}
+				// PRINT_VALUE(newInst);
+
+				// copy debug info
+				if (auto dbgLoc = oldInst->getDebugLoc()) {
+					newInst->setDebugLoc(dbgLoc);
+				}
+
+                ////////////// load return clones //////////////
+
+				LoadInst* loadRet1 = new LoadInst(
+						callAlloca1, newInst->getName() + ".DWC");
+				loadRet1->insertAfter(newInst);
+				LoadInst* loadRet2;
+				if (TMR) {
+					loadRet2 = new LoadInst(
+							callAlloca2, newInst->getName() + ".TMR");
+					loadRet2->insertAfter(loadRet1);
+				}
+				// register them as clones
+				cloneMap[newInst] = ValuePair(loadRet1, loadRet2);
+				
+				// PRINT_VALUE(loadRet1);
+
+                ///////////////// replace uses /////////////////
+
+				#ifdef DEBUG_CHANGE_RR_CALLS
+				int debugThis = false;
+				if (callName == "call1" && callInst->getCalledFunction()->getName() == "xQueueGenericCreate_TMR") {
+					debugThis = true;
+					// PRINT_VALUE(oldInst->getParent()->getParent());
+					errs() << "In function '" << oldInst->getParent()->getParent()->getName() << "'\n";
+				}
+				#endif
+				/*
+				 * This is trickier than just doing "replaceAllUsesWith()", because we
+				 *  have to replace the uses that are clones with the correct value.
+				 *
+				 * Somehow the original instruction is sometimes not in the user list, but the clones are.
+				 * I don't know why that would happen, but we'll have to work around that, as stupid
+				 *  as that is.
+				 * The call to getCloneOrig() should help detect if the original is missing
+				 *  from the user list.
+				 * First, make a list (set) of all Values to look at.
+				 * Then replace the operands accordingly.
+				 */
+				std::set<User*> checkTheseUses;
+				std::set<Instruction*> callUses;
+
+				for (auto use : oldInst->users()) {
+					// normal lookup
+					if (isCloned(use)) {
+						checkTheseUses.insert(use);
+					} else {
+						// inverse lookup
+						Value* origUse = getCloneOrig(use);
+						if (auto U = dyn_cast_or_null<User>(origUse)) {
+							checkTheseUses.insert(U);
+						}
+						// Also check for call instructions (and presumably invoke as well)
+						//  for which the operands are the oldInst
+						else if (auto callUse = dyn_cast<CallInst>(use)) {
+							// errs() << " &> Found call use:\n" << *callUse << "\n";
+							callUses.insert(callUse);
+						}
+						else if (auto invokeUse = dyn_cast<InvokeInst>(use)) {
+							callUses.insert(invokeUse);
+						}
+						#ifdef DEBUG_CHANGE_RR_CALLS
+						else if (debugThis) {
+							errs() << "leftover: " << *use << "\n";
+						}
+						#endif
+					}
+				}
+
+				// specially handle call uses
+				for (auto instUse : callUses) {
+					// either call or invoke - because can't instantiate CallBase
+					auto callUse = dyn_cast<CallInst>(instUse);
+					auto invokeUse = dyn_cast<InvokeInst>(instUse);
+					// get called function
+					Function* Fcalled;
+					if (callUse)
+						Fcalled = callUse->getCalledFunction();
+					else
+						Fcalled = invokeUse->getCalledFunction();
+					// iterate over operands
+					for (unsigned opNum = 0; opNum < instUse->getNumOperands(); opNum++) {
+						Value* op = instUse->getOperand(opNum);
+						if (op == oldInst) {
+							// at least replace the old one
+							instUse->setOperand(opNum, newInst);
+							// now check if the args themselves are cloned
+							if (Fcalled) {
+								auto argsCloned = argNumsCloned[Fcalled];
+								// if the vector contains opNum, then change the next 1/2 args
+								if (std::count(argsCloned.begin(), argsCloned.end(), opNum)) {
+									auto clones = getClone(newInst);
+									instUse->setOperand(opNum+1, clones.first);
+									if (TMR) {
+										instUse->setOperand(opNum+2, clones.second);
+									}
+								}
+							}
+						}
+					}
+				}
+
+				// Now check all of these uses and replace with the new instruction
+				for (auto use : checkTheseUses) {
+					ValuePair clones = getClone(use);
+					std::vector<unsigned> replaceIdxs;
+
+					// replace original uses and record indices
+					for (unsigned opNum = 0; opNum < use->getNumOperands(); opNum++) {
+						Value* op = use->getOperand(opNum);
+						if (op == oldInst) {
+							use->setOperand(opNum, newInst);
+							replaceIdxs.push_back(opNum);
+						}
+					}
+					#ifdef DEBUG_CHANGE_RR_CALLS
+					if (debugThis && replaceIdxs.size() < 1) {
+						errs() << "size too small!\n";
+					}
+					#endif
+
+					// unpack clones
+					Instruction* c1, * c2;
+					c1 = dyn_cast<Instruction>(clones.first);
+					if (TMR) {
+						c2 = dyn_cast<Instruction>(clones.second);
+						assert(c2 && "clone exists");
+					}
+
+					// replace the clones using marked indices
+					for (unsigned opNum : replaceIdxs) {
+						c1->setOperand(opNum, loadRet1);
+						#ifdef DEBUG_CHANGE_RR_CALLS
+						if (debugThis) {
+							errs() << "setting op " << opNum << " of " << *c1 << "\n";
+						}
+						#endif
+						if (TMR) {
+							c2->setOperand(opNum, loadRet2);
+						}
+					}
+				}
+
+				// remove old call - try now
+				if (oldInst->use_empty()) {
+					oldInst->eraseFromParent();
+				} else {
+					// if it doesn't work, try again later
+					checkUsesLater.insert(oldInst);
+				}
+			}
+		}
+	}
+}
+
+
+/*
+ * Checks to make sure old calls to functions that have had their
+ *  return values replicated have been removed successfully.
+ */
+void dataflowProtection::validateRRFuncs(void) {
+	bool foundProblem = false;
+	// Now we can remove the old instructions
+	for (auto oldInst : checkUsesLater) {
+		if (oldInst && (oldInst->use_empty()) ) {
+			// also check if the parent was already removed
+			BasicBlock* parentBB = oldInst->getParent();
+			if (parentBB) {
+				Function* parentF = parentBB->getParent();
+				if (parentF) {
+					oldInst->eraseFromParent();
+				}
+			}
+		}
+		else {
+			errs() << "Still have uses for " << *oldInst << "\n";
+			for (auto U : oldInst->users()) {
+				PRINT_VALUE(U);
+			}
+			Function* parentF = oldInst->getParent()->getParent();
+			errs() << "in " << (parentF->getName()) << "\n";
+			for (auto use : parentF->users()) {
+				errs() << "  - " << *use << "\n";
+			}
+			foundProblem = true;
+		}
+	}
+
+	assert(!foundProblem && "must remove the original call!");
+	// If your code hits this assertion, please contact the maintainers
+}
+
+
+// #define DBG_UPDATE_CALLS
 void dataflowProtection::updateCallInsns(Module & M) {
 
+#ifdef DBG_UPDATE_CALLS
+	bool debugFlag = 0;
+#endif
+
 	for (auto &F : M) {
-		//If we are skipping the function, don't update the call instructions
-		if (fnsToCloneAndSkip.find(&F)!=fnsToCloneAndSkip.end()) {
-			if (fnsToClone.find(&F)==fnsToClone.end()) {
+		// If we are skipping the function, don't update the call instructions
+		if (fnsToCloneAndSkip.find(&F) != fnsToCloneAndSkip.end()) {
+			if (fnsToClone.find(&F) == fnsToClone.end()) {
 				continue;
 			}
 		}
@@ -833,10 +1697,92 @@ void dataflowProtection::updateCallInsns(Module & M) {
 				if (CallInst * CI = dyn_cast<CallInst>(&I)) {
 					Function * Fcalled = CI->getCalledFunction();
 
-					if (argNumsCloned.find(Fcalled) != argNumsCloned.end()) {
+					if (cloneAfterFnCall.find(Fcalled) != cloneAfterFnCall.end()) {
+						// This handles cases where all of the arguments are
+						//  going to be cloned.
+						unsigned int numArgs = CI->getNumArgOperands();
+						for (int argNum = 0; argNum < numArgs; ++argNum) {
+							// get the clones
+							Value* op = CI->getArgOperand(argNum);
+							ValuePair clonePair = getClone(op);
+							Value* clone1 = clonePair.first;
+							assert(clone1 && "value is cloned!");
+
+							// load the original
+							LoadInst* loadOrig = new LoadInst(op, "loadOrig");
+							loadOrig->insertAfter(CI);
+
+							// store to the copy
+							StoreInst* storeCopy = new StoreInst(
+								loadOrig,			/* value to store */
+								clone1				/* pointer where to store */
+							);
+							storeCopy->insertAfter(loadOrig);
+
+							if (TMR) {
+								// one more store instruction for TMR copy
+								Value* clone2 = clonePair.second;
+								assert(clone2 && "valid 2nd clone with TMR");
+								StoreInst* storeCopy2 = new StoreInst(
+										loadOrig, clone2);
+								storeCopy2->insertAfter(storeCopy);
+							}
+						}
+						// PRINT_VALUE(CI->getParent());
+					}
+
+					else if (cloneAfterCallArgMap.find(CI) != cloneAfterCallArgMap.end()) {
+						// This handles cases where the application writer specifies
+						//  certain arguments to clone-after-call.
+						unsigned int numArgs = CI->getNumArgOperands();
+						// iterate through the ones specified
+						for (auto argNum : cloneAfterCallArgMap[CI]) {
+							// check bounds
+							if (argNum > (numArgs + 1)) {
+								continue;
+							}
+
+							// get clone
+							Value* op = CI->getArgOperand(argNum);
+							ValuePair clonePair = getClone(op);
+							Value* clone1 = clonePair.first;
+							assert(clone1 && "value is cloned!");
+
+							// load original
+							LoadInst* loadOrig = new LoadInst(op, "loadOrig");
+							loadOrig->insertAfter(CI);
+
+							// store to copy
+							StoreInst* storeCopy = new StoreInst(loadOrig, clone1);
+							storeCopy->insertAfter(loadOrig);
+
+							if (TMR) {
+								// once more for TMR copy
+								Value* clone2 = clonePair.second;
+								assert(clone2 && "valid 2nd clone with TMR");
+								StoreInst* storeCopy2 = new StoreInst(
+										loadOrig, clone2);
+								storeCopy2->insertAfter(storeCopy);
+							}
+						}
+					}
+
+					else if (argNumsCloned.find(Fcalled) != argNumsCloned.end()) {
 						auto argsCloned = argNumsCloned[Fcalled];
 
+						#ifdef DBG_UPDATE_CALLS
+						if (Fcalled && (Fcalled->getName() == "ff_fprintf_TMR") ) {
+							debugFlag = 1;
+							errs() << " # " << *CI << "\n";
+						}
+						#endif
+
 						for (auto argNum : argsCloned) {
+							#ifdef DBG_UPDATE_CALLS
+							if (debugFlag) {
+								errs() << "arg " << argNum << "\n";
+							}
+							#endif
 							Value* op = CI->getArgOperand(argNum);
 							if (isCloned(op)) {
 								Value* clone1 = getClone(op).first;
@@ -850,30 +1796,33 @@ void dataflowProtection::updateCallInsns(Module & M) {
 							}
 						}
 					}
+					#ifdef DBG_UPDATE_CALLS
+					debugFlag = 0;
+					#endif
 				}
 			}
 		}
 	}
-	errs() << "\n";
 }
 
+
 void dataflowProtection::updateInvokeInsns(Module & M) {
 
 	for (auto &F : M) {
-		//If we are skipping the function, don't update the call instructions
-		if (fnsToCloneAndSkip.find(&F)!=fnsToCloneAndSkip.end()) {
-			if (fnsToClone.find(&F)==fnsToClone.end()) {
+		// If we are skipping the function, don't update the call instructions
+		if (fnsToCloneAndSkip.find(&F) != fnsToCloneAndSkip.end()) {
+			if (fnsToClone.find(&F) == fnsToClone.end()) {
 				continue;
 			}
 		}
 
 		for (auto & bb : F) {
 			for (auto & I : bb) {
-				//also need to update Invoke instructions
+				// also need to update Invoke instructions
 				if (InvokeInst* invInst = dyn_cast<InvokeInst>(&I)) {
 					Function* Fcalled = invInst->getCalledFunction();
 
-					//clone the arguments
+					// clone the arguments
 					if (argNumsCloned.find(Fcalled) != argNumsCloned.end()) {
 						auto argsCloned = argNumsCloned[Fcalled];
 
@@ -884,7 +1833,7 @@ void dataflowProtection::updateInvokeInsns(Module & M) {
 								invInst->setArgOperand(argNum + 1, clone1);
 
 								Value* clone2;
-								if(TMR){
+								if (TMR) {
 									clone2 = getClone(op).second;
 									invInst->setArgOperand(argNum + 2, clone2);
 								}
@@ -897,15 +1846,272 @@ void dataflowProtection::updateInvokeInsns(Module & M) {
 	}
 }
 
-//#define DEBUGGING_MEMSET
+
+/*
+ * Helper function for cloneInsns().
+ * Handle changing references in complicated ConstantExpr's.
+ */
+void dataflowProtection::cloneConstantExprOperands(ConstantExpr* ce, InstructionPair clone, unsigned i) {
+	// Don't need to update references to constant ints
+	assert(ce && "Null ConstantExpr ce");
+	if (isa<ConstantInt>(ce->getOperand(0))) {
+		return;
+	}
+
+	/*				needed to be down lower to not filter out things too early
+			if (!willBeCloned(ce->getOperand(0))) {
+				continue;
+			}
+	 */
+
+	// Don't mess with loads with inline GEPs
+	if (noMemReplicationFlag) {
+		if (ce->isGEPWithNoNotionalOverIndexing()) {
+			return;
+		}
+	}
+
+	/*
+	 * check if it's an inline bitcast
+	 * This can occur if the source code has a global array that ends with a series of 0 values
+	 *  Clang will compile the code to use the 'zeroinitializer' directive, which changes the
+	 *  type of the variable. Instead of having something like
+	 *     @array1 = dso_local constant [64 x i8]
+	 *  it will output
+	 *     @array2 = dso_local constant <{ [32 x i8], [32 x i8] }>
+	 * Then the call to accessing an element of this array will look like
+	 * 	   %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* bitcast (<{ [32 x i8], [32 x i8] }>* @array2 to [64 x i8]*), i64 0, i64 %idxprom
+	 * This only is a problem when the noMemReplication flag, therefore it's OK to skip changing
+	 *  the instruction arguments, since it would all be the same argument anyway.
+	 *
+	 * might be an inline reference to a global variable. example:
+	 * %0 = load <4 x i32>, <4 x i32>* bitcast ([2 x [8 x i32]]* @matrix to <4 x i32>*), align 16, !tbaa !2
+	 *
+	 * More tricky stuff:
+	 * call void @llvm.memset.p0i8.i64(i8* align 4 bitcast (i32* getelementptr inbounds (%struct.block_s, %struct.block_s* @globalBlock, i64 0, i32 2, i64 0) to i8*), i8 0, i64 64, i1 false)
+	 * store %struct.xMINI_LIST_ITEM* getelementptr inbounds ([4 x %struct.xLIST], [4 x %struct.xLIST]* @pxReadyTasksLists, i64 0, i64 0, i32 2), %struct.xMINI_LIST_ITEM** bitcast (%struct.xLIST_ITEM** getelementptr inbounds ([4 x %struct.xLIST], [4 x %struct.xLIST]* @pxReadyTasksLists, i64 0, i64 0, i32 1) to %struct.xMINI_LIST_ITEM**), align 8, !tbaa !9
+	 * Or the worst:
+	 * store <2 x %struct.xMINI_LIST_ITEM*> <%struct.xMINI_LIST_ITEM* getelementptr inbounds ([4 x %struct.xLIST], [4 x %struct.xLIST]* @pxReadyTasksLists, i64 0, i64 0, i32 2), %struct.xMINI_LIST_ITEM* getelementptr inbounds ([4 x %struct.xLIST], [4 x %struct.xLIST]* @pxReadyTasksLists, i64 0, i64 0, i32 2)>, <2 x %struct.xMINI_LIST_ITEM*>* bitcast (%struct.xLIST_ITEM** getelementptr inbounds ([4 x %struct.xLIST], [4 x %struct.xLIST]* @pxReadyTasksLists, i64 0, i64 0, i32 2, i32 1) to <2 x %struct.xMINI_LIST_ITEM*>*), align 8, !tbaa !11
+	 */
+
+	// in the following code segment, the leading underscores in names represent levels of indirection
+	if (ce->isCast()) {
+
+		if (noMemReplicationFlag)
+			return;
+
+		Value* _op = ce->getOperand(0);
+		if (isCloned(_op)) {
+//						errs() << *_op << "\n";
+			ConstantExpr* ce1 = dyn_cast<ConstantExpr>(clone.first->getOperand(i));
+			Value* _op1 = cloneMap[_op].first;
+			assert(_op1 && "valid clone");
+//						errs() << *_op1 << "\n";
+			Constant* _nop1 = dyn_cast<Constant>(_op1);
+			Constant* nce1 = ce1->getWithOperandReplaced(0, _nop1);
+//						errs() << *nce1 << "\n";
+			clone.first->setOperand(i, nce1);
+			if (TMR) {
+				ConstantExpr* ce2 = dyn_cast<ConstantExpr>(clone.second->getOperand(i));
+				Value* _op2 = cloneMap[_op].second;
+				assert(_op2 && "valid second clone");
+				Constant* _nop2 = dyn_cast<Constant>(_op2);
+				Constant* nce2 = ce2->getWithOperandReplaced(0, _nop2);
+				clone.second->setOperand(i, nce2);
+			}
+			return;
+		}
+		// could be something ugly like:
+		//%2 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([2 x [8 x i32]], [2 x [8 x i32]]* @matrix, i64 0, i64 0, i64 4) to <4 x i32>*), align 16, !tbaa !2
+		ConstantExpr* innerGEPclone1 = dyn_cast<ConstantExpr>(_op);
+		if (innerGEPclone1 && innerGEPclone1->isGEPWithNoNotionalOverIndexing()) {
+
+			// get the place to update
+			ConstantExpr* innerGEPclone1 = dyn_cast<ConstantExpr>(ce->getOperand(0));
+//						errs() << " - " << *innerGEPclone1 << "\n";
+
+			// this next thing is what has the clone(s)
+			Value* GEPvalOrig = innerGEPclone1->getOperand(0);
+
+			// have to check if it's been cloned
+			if (isCloned(GEPvalOrig)) {
+				// get the clone
+				Value* GEPvalClone1 = cloneMap[GEPvalOrig].first;
+				assert(GEPvalClone1 && "valid clone");
+
+				// replace uses
+				Constant* newGEPclone1 = innerGEPclone1->getWithOperandReplaced(
+						0, dyn_cast<Constant>(GEPvalClone1));
+				Constant* newCE = ConstantExpr::getCast(
+						ce->getOpcode(), newGEPclone1, ce->getType());
+				clone.first->setOperand(i, newCE);
+//				errs() << " - " << *ce << "\n";
+//				errs() << " - " << *clone.first << "\n";
+
+				if (TMR) {
+					ConstantExpr* ce2 = dyn_cast<ConstantExpr>(clone.second->getOperand(i));
+					ConstantExpr* innerGEPclone2 = dyn_cast<ConstantExpr>(ce2->getOperand(0));
+					Value* GEPvalClone2 = cloneMap[GEPvalOrig].second;
+					assert(GEPvalClone2 && "valid second clone");
+					Constant* newGEPclone2 = innerGEPclone2->getWithOperandReplaced(
+							0, dyn_cast<Constant>(GEPvalClone2));
+					Constant* newCE2 = ConstantExpr::getCast(
+							ce2->getOpcode(), newGEPclone2, ce2->getType());
+					clone.second->setOperand(i, newCE2);
+				}
+				return;
+			}
+		}
+		// otherwise, throw an error
+		else if (verboseFlag) {
+			errs() << warn_string << " In cloneInsns() skipping processing cloned ConstantExpr:\n";
+			errs() << " " << *ce << "\n";
+		}
+		return;
+	}
+
+	if (!willBeCloned(ce->getOperand(0))) {
+		return;
+	}
+
+	/*
+	 * Error checking here for things missing in the cloneMap.
+	 * If this is NULL, then that means we just inserted the operand
+	 *  into the map, and therefore it wasn't in there before.
+	 *
+	 * Trying to dereference 0 is a bad idea
+	 * How did this get in the list, but not in the map?
+	 */
+	Value* v_temp = cloneMap[ce->getOperand(0)].first;
+	if (v_temp == nullptr) {
+		errs() << err_string << " in cloneInsns!\n";
+		errs() << *ce << "\n";
+	}
+	assert(v_temp && "ConstantExpr is in cloneMap");
+
+	Constant* newOp1 = dyn_cast<Constant>(v_temp);
+	assert(newOp1 && "Null Constant newOp1");
+	Constant* c1 = ce->getWithOperandReplaced(0, newOp1);
+	ConstantExpr* eNew1 = dyn_cast<ConstantExpr>(c1);
+	assert(eNew1 && "Null ConstantExpr eNew1");
+	clone.first->setOperand(i, eNew1);
+
+	if (TMR) {
+		Constant* newOp2 = dyn_cast<Constant>(cloneMap[ce->getOperand(0)].second);
+		assert(newOp2 && "Null Constant newOp2");
+		Constant* c2 = ce->getWithOperandReplaced(0, newOp2);
+		ConstantExpr* eNew2 = dyn_cast<ConstantExpr>(c2);
+		assert(eNew2 && "Null ConstantExpr eNew2");
+		clone.second->setOperand(i, eNew2);
+	}
+}
+
+/*
+ * Helper function to clone the operands of ConstantVector's.
+ * Example:
+ *  <2 x %struct.xMINI_LIST_ITEM*>
+ *  <
+ *    %struct.xMINI_LIST_ITEM* getelementptr inbounds
+ *      ([4 x %struct.xLIST], [4 x %struct.xLIST]* @pxReadyTasksLists, i64 0, i64 3, i32 2),
+ *    %struct.xMINI_LIST_ITEM* getelementptr inbounds
+ *      ([4 x %struct.xLIST], [4 x %struct.xLIST]* @pxReadyTasksLists, i64 0, i64 3, i32 2)
+ *  >
+ *
+ * NOTE: if this ever updates to LLVM 10, we will need to change how we're getting the number of elements.
+ */
+void dataflowProtection::cloneConstantVectorOperands(ConstantVector* constVec, InstructionPair clone, unsigned i) {
+
+	// how many elements in the vector
+	VectorType* vType = dyn_cast<VectorType>(constVec->getType());
+	unsigned elemCount = vType->getVectorNumElements();
+//	errs() << " * " << *constVec << "\n";
+//	errs() << "constant vector type with " << elemCount << " elements:\n";
+
+	// initialize some vectors which will later be converted into new constants
+	std::vector<Constant*> newVecArray_DWC(elemCount, nullptr);
+	std::vector<Constant*> newVecArray_TMR(elemCount, nullptr);
+
+	// look at all the elements in the vector
+	for (unsigned int k = 0; k < elemCount; k++) {
+		Constant* vc = constVec->getAggregateElement(k);
+
+		if (auto vc_const = dyn_cast<ConstantExpr>(vc)) {
+			if (vc_const->isGEPWithNoNotionalOverIndexing()) {
+				Value* _op = vc_const->getOperand(0);
+
+				if (isCloned(_op)) {
+//					errs() << *_op << "\n";
+					Value* _op1 = cloneMap[_op].first;
+					assert(_op1 && "valid clone");
+					Constant* _nop1 = dyn_cast<Constant>(_op1);
+//					errs() << *_nop1 << "\n";
+
+					// clone of the vector
+					ConstantVector* constVec_clone = dyn_cast<ConstantVector>(clone.first->getOperand(i));
+//					errs() << *constVec_clone << "\n";
+					// get vector element
+					Constant* vc_clone = constVec_clone->getAggregateElement(k);
+					ConstantExpr* vc_clone_expr = dyn_cast<ConstantExpr>(vc_clone);
+
+					// get a new constant with the operand replaced with correct cloned value
+					Constant* vc_clone_new = vc_clone_expr->getWithOperandReplaced(0, _nop1);
+//					errs() << *vc_clone_new << "\n";
+
+					// here's our new constant GEP that goes inside a vector
+					newVecArray_DWC[k] = vc_clone_new;
+//					errs() << *constVec_clone << "\n";
+
+					if (TMR) {
+						Value* _op2 = cloneMap[_op].second;
+						assert(_op2 && "valid clone");
+						Constant* _nop2 = dyn_cast<Constant>(_op2);
+
+						ConstantVector* constVec_clone2 = dyn_cast<ConstantVector>(clone.second->getOperand(i));
+						ConstantExpr* vc_clone_expr2 = dyn_cast<ConstantExpr>(constVec_clone2->getAggregateElement(k));
+
+						Constant* vc_clone_new2 = vc_clone_expr2->getWithOperandReplaced(0, _nop2);
+						newVecArray_TMR[k] = vc_clone_new2;
+					}
+				}
+			}
+		}
+
+		// Anything that didn't have a reference changed, keep the same.
+		if (newVecArray_DWC[k] == nullptr) {
+			newVecArray_DWC[k] = vc;
+		}
+		if (TMR) {
+			if (newVecArray_TMR[k] == nullptr) {
+				newVecArray_TMR[k] = vc;
+			}
+		}
+
+	}
+
+	// Make some new constant vectors, and change the clone operands
+	ArrayRef<Constant*> newVecArrayRef_DWC = ArrayRef<Constant*>(newVecArray_DWC);
+	ConstantVector* newVec_DWC = dyn_cast<ConstantVector>(ConstantVector::get(newVecArrayRef_DWC));
+	clone.first->setOperand(i, newVec_DWC);
+
+	if (TMR) {
+		ArrayRef<Constant*> newVecArrayRef_TMR = ArrayRef<Constant*>(newVecArray_TMR);
+		ConstantVector* newVec_TMR = dyn_cast<ConstantVector>(ConstantVector::get(newVecArrayRef_TMR));
+		clone.second->setOperand(i, newVec_TMR);
+	}
+}
+
+extern bool comesFromSingleCall(Instruction* storeUse);
+
+
+// #define DEBUGGING_CLONE_INSNS
 //----------------------------------------------------------------------------//
 // Fine-grained cloning of instructions
 //----------------------------------------------------------------------------//
 bool dataflowProtection::cloneInsns() {
 	std::deque<Instruction*> cloneList;
-	std::vector<std::pair<Instruction*,Instruction*>> instsCloned;
+	std::vector<InstructionPair> instsCloned;
 
-	//Populate the clone list
+	// Populate the clone list
 	for (auto I : instsToClone) {
 		Instruction* newI1;
 		Instruction* newI2;
@@ -915,7 +2121,7 @@ bool dataflowProtection::cloneInsns() {
 			}
 			Function* Fparent = invInst->getParent()->getParent();
 
-			//we need to create a new basic block to branch to on success
+			// we need to create a new basic block to branch to on success
 			BasicBlock* beforeBlock = invInst->getParent();
 			BasicBlock* afterBlock = invInst->getNormalDest();
 			BasicBlock* landingBlock = invInst->getUnwindDest();
@@ -925,21 +2131,21 @@ bool dataflowProtection::cloneInsns() {
 					blockName1, Fparent, afterBlock);
 			afterBlock = invInst->getNormalDest();
 
-			//set original invoke to have new normal destination
+			// set original invoke to have new normal destination
 			invInst->setNormalDest(newBlock1);
 
-			//make a dummy instruction so we have somewhere to put the invoke
+			// make a dummy instruction so we have somewhere to put the invoke
 			ConstantInt* nothing = ConstantInt::get(IntegerType::getInt16Ty(Fparent->getContext()), 1, false);
 			BinaryOperator* dummy1 = BinaryOperator::CreateNeg(nothing, "dummy1", newBlock1);
 
-			//that contains a copy of the same invoke instruction
+			// that contains a copy of the same invoke instruction
 			InvokeInst* newInv1 = dyn_cast<InvokeInst>(invInst->clone());
 			InvokeInst* newInv2;
 			newInv1->setName(invInst->getName() + ".DWC");
 			newInv1->insertAfter(dummy1);
 			dummy1->eraseFromParent();
 
-			//the new one will have the same unwind location
+			// the new one will have the same unwind location
 			newInv1->setUnwindDest(landingBlock);
 
 			if (TMR) {
@@ -967,10 +2173,19 @@ bool dataflowProtection::cloneInsns() {
 				newInv1->setNormalDest(afterBlock);
 //				errs() << " - new basic block:\n" << *newBlock1 << "\n";
 			}
-			//for the map
+			// for the map
 			newI1 = dyn_cast<Instruction>(newInv1);
 
-		} else {	//everything else besides InvokeInst
+			// debug stuff
+			if (invInst->getDebugLoc()) {
+				newInv1->setDebugLoc(invInst->getDebugLoc());
+				if (TMR) {
+					newInv2->setDebugLoc(invInst->getDebugLoc());
+				}
+			}
+
+		} else {	// everything else besides InvokeInst
+			// TODO: for alloca, copy/fix debug info
 			newI1 = I->clone();
 
 			if (!I->getType()->isVoidTy()) {
@@ -989,194 +2204,178 @@ bool dataflowProtection::cloneInsns() {
 			}
 		}
 
-		instsCloned.push_back(std::pair<Instruction*,Instruction*>(newI1, newI2));
+		instsCloned.push_back(std::make_pair(newI1, newI2));
 		cloneMap[I] = ValuePair(newI1, newI2);
 	}
 
-	//Iterate over the clone list and change references
+	// Iterate over the clone list and change references
 	for (auto clone : instsCloned) {
-		//Iterate over the operands in the instruction
+		// Iterate over the operands in the instruction
+		#ifdef DEBUGGING_CLONE_INSNS
+		bool debugPrint = false;
+		if (auto* debug_inst = dyn_cast<StoreInst>(clone.first)) {
+			Function* parentF = debug_inst->getParent()->getParent();
+			if (parentF->getName() == "core_list_mergesort_TMR.RR") {
+				if (debug_inst->getParent()->getName() == "entry") {
+					PRINT_VALUE(debug_inst);
+					debugPrint = true;
+				}
+				// if (ai->getName().startswith_lower("res.addr")) {
+				// 	PRINT_VALUE(debug_inst);
+				// 	debugPrint = true;
+				// }
+			}
+		}
+		#endif
 
 		for (unsigned i = 0; i < clone.first->getNumOperands(); i++) {
-			//If the operand is found in the map change the reference
+			// If the operand is found in the map change the reference
 			Value* op = clone.first->getOperand(i);
 
-			//skip changing basic block references on the invoke instructions,
+			// skip changing basic block references on the invoke instructions,
 			// we already set them up correctly above
 			if (isa<InvokeInst>(clone.first) && isa<BasicBlock>(op)) {
 				continue;
 			}
 
-			if (cloneMap.find(op) != cloneMap.end()) { 	//If we found it
-				if (noMemReplicationFlag) { 			//Not replicating memory
-					//If we aren't replicating memory then we should not change the load inst. address
-					if (dyn_cast<LoadInst>(clone.first)) { //Don't change load instructions
+			if (isCloned(op)) { 		// If we found it
+				#ifdef DEBUGGING_CLONE_INSNS
+				if (debugPrint) {
+					PRINT_VALUE(op);
+				}
+				#endif
+				if (noMemReplicationFlag) { 				// Not replicating memory
+					// If we aren't replicating memory then we should not change the load inst. address
+					if (dyn_cast<LoadInst>(clone.first)) { 	// Don't change load instructions
 						assert(clone.first && "Clone exists when updating operand");
 						clone.first->setOperand(i, op);
 						if (TMR) {
 							assert(clone.second && "Clone exists when updating operand");
 							clone.second->setOperand(i, op);
 						}
-					} else { //Else update as normal
+					} else { 								// Else update as normal
 						clone.first->setOperand(i, cloneMap[op].first);
 						if (TMR) {
 							clone.second->setOperand(i, cloneMap[op].second);
 						}
 					}
-				} else { //Replicating memory
-					clone.first->setOperand(i, cloneMap[op].first);
-					if (TMR) {
-						clone.second->setOperand(i, cloneMap[op].second);
+				} else { 									// Replicating memory
+					// GEPs nested inside bitcasts, and other tricky things
+					if (ConstantExpr* ce = dyn_cast<ConstantExpr>(op)) {
+						cloneConstantExprOperands(ce, clone, i);
+					}
+					// otherwise, it's simple to handle
+					else {
+						clone.first->setOperand(i, cloneMap[op].first);
+						if (TMR) {
+							clone.second->setOperand(i, cloneMap[op].second);
+						}
 					}
 				}
+
 			} else if (ConstantExpr* ce = dyn_cast<ConstantExpr>(op)) {
-				//Don't need to update references to constant ints
-				assert(ce && "Null ConstantExpr ce");
-				if (isa<ConstantInt>(ce->getOperand(0))) {
-					continue;
+				/*
+				 * Broken out into its own function.
+				 */
+				cloneConstantExprOperands(ce, clone, i);
+			/*
+			 * Sometimes there are packed vector instructions with inline GEPs to initialize them.
+			 */
+			} else if (op->getType()->isVectorTy()) {
+				if (ConstantVector* constVec = dyn_cast<ConstantVector>(op)) {
+					cloneConstantVectorOperands(constVec, clone, i);
 				}
 
-				if (!willBeCloned(ce->getOperand(0))) {
-					continue;
+			} else {
+				clone.first->setOperand(i, op);
+				if (TMR) {
+					assert(clone.second && "Clone exists to set operand");
+					clone.second->setOperand(i, op);
 				}
+			}
+		}
+		#ifdef DEBUGGING_CLONE_INSNS
+		debugPrint = false;
+		#endif
+	}
 
-				//Don't mess with loads with inline GEPs
-				if (noMemReplicationFlag) {
-					if (ce->isGEPWithNoNotionalOverIndexing()) {
+	return (instsToClone.size() > 0);
+}
+
+void dataflowProtection::verifyCloningSuccess() {
+	if (!noMemReplicationFlag) {
+		bool uhOhFlag = false;
+		/*
+		 * Sanity check: are any of the operands of the clones
+		 *  equal to the operands of the original?
+		 */
+		for (auto entry : cloneMap) {
+			Value* v0 = entry.first;
+			if (Instruction* i0 = dyn_cast<Instruction>(v0)) {
+
+				// Exception: comes from a single function call
+				if (auto si = dyn_cast<StoreInst>(i0)) {
+					if (comesFromSingleCall(si)) {
 						continue;
 					}
 				}
 
-				/*
-				 * check if it's an inline bitcast
-				 * This can occur if the source code has a global array that ends with a series of 0 values
-				 *  Clang will compile the code to use the 'zeroinitializer' directive, which changes the
-				 *  type of the variable. Instead of having something like
-				 *     @array1 = dso_local constant [64 x i8]
-				 *  it will output
-				 *     @array2 = dso_local constant <{ [32 x i8], [32 x i8] }>
-				 * Then the call to accessing an element of this array will look like
-				 * 	   %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* bitcast (<{ [32 x i8], [32 x i8] }>* @array2 to [64 x i8]*), i64 0, i64 %idxprom
-				 * This only is a problem when the noMemReplication flag, therefore it's OK to skip changing
-				 *  the instruction arguments, since it would all be the same argument anyway.
-				 */
-				//might be an inline reference to a global variable. example:
-				//%0 = load <4 x i32>, <4 x i32>* bitcast ([2 x [8 x i32]]* @matrix to <4 x i32>*), align 16, !tbaa !2
-				//in the following code segment, the leading underscores in names represent levels of indirection
-				if (ce->isCast()) {
-					if (noMemReplicationFlag)
-						continue;
-
-					Value* _op = ce->getOperand(0);
-					if (isCloned(_op)) {
-//						errs() << *_op << "\n";
-						ConstantExpr* ce1 = dyn_cast<ConstantExpr>(clone.first->getOperand(i));
-						Value* _op1 = cloneMap[_op].first;
-						assert(_op1 && "valid clone");
-//						errs() << *_op1 << "\n";
-						Constant* _nop1 = dyn_cast<Constant>(_op1);
-						Constant* nce1 = ce1->getWithOperandReplaced(0, _nop1);
-//						errs() << *nce1 << "\n";
-						clone.first->setOperand(i, nce1);
-						if (TMR) {
-							ConstantExpr* ce2 = dyn_cast<ConstantExpr>(clone.second->getOperand(i));
-							Value* _op2 = cloneMap[_op].second;
-							assert(_op2 && "valid second clone");
-							Constant* _nop2 = dyn_cast<Constant>(_op2);
-							Constant* nce2 = ce2->getWithOperandReplaced(0, _nop2);
-							clone.second->setOperand(i, nce2);
+				// Exception: use immediately following function argument (that isn't cloned)
+				Function* parentF = i0->getParent()->getParent();
+				for (auto argIter = parentF->arg_begin(); argIter != parentF->arg_end(); argIter++) {
+					for (auto useIter = argIter->use_begin(); useIter != argIter->use_end(); useIter++) {
+						if (cast<Value>(&*useIter) == cast<Value>(i0)) {
+							continue;
 						}
-						continue;
 					}
-					//could be something ugly like:
-					//%2 = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([2 x [8 x i32]], [2 x [8 x i32]]* @matrix, i64 0, i64 0, i64 4) to <4 x i32>*), align 16, !tbaa !2
-					ConstantExpr* innerGEPclone1 = dyn_cast<ConstantExpr>(_op);
-					if (innerGEPclone1 && innerGEPclone1->isGEPWithNoNotionalOverIndexing()) {
+				}
 
-						//get the place to update
-						ConstantExpr* innerGEPclone1 = dyn_cast<ConstantExpr>(ce->getOperand(0));
-//						errs() << " - " << *innerGEPclone1 << "\n";
+				Instruction* i1 = dyn_cast<Instruction>(entry.second.first);
 
-						//this next thing is what has the clone(s)
-						Value* GEPvalOrig = innerGEPclone1->getOperand(0);
-//						errs() << " - " << *GEPvalOrig << "\n";
-						//get the clone
-						Value* GEPvalClone1 = cloneMap[GEPvalOrig].first;
-						assert(GEPvalClone1 && "valid clone");
-
-						//replace uses
-						Constant* newGEPclone1 = innerGEPclone1->getWithOperandReplaced(
-								0, dyn_cast<Constant>(GEPvalClone1));
-						Constant* newCE = ConstantExpr::getCast(
-								ce->getOpcode(), newGEPclone1, ce->getType());
-						clone.first->setOperand(i, newCE);
-//						errs() << " - " << *ce << "\n";
-//						errs() << " - " << *clone.first << "\n";
+				// Iterate over the operands in the instruction
+				for (unsigned i = 0; i < i0->getNumOperands(); i++) {
+					Value* op0 = i0->getOperand(i);
+					Value* op1 = i1->getOperand(i);
+					Type* opType = op0->getType();
 
-						if (TMR) {
-							ConstantExpr* ce2 = dyn_cast<ConstantExpr>(clone.second->getOperand(i));
-							ConstantExpr* innerGEPclone2 = dyn_cast<ConstantExpr>(ce2->getOperand(0));
-							Value* GEPvalClone2 = cloneMap[GEPvalOrig].second;
-							assert(GEPvalClone2 && "valid second clone");
-							Constant* newGEPclone2 = innerGEPclone2->getWithOperandReplaced(
-									0, dyn_cast<Constant>(GEPvalClone2));
-							Constant* newCE2 = ConstantExpr::getCast(
-									ce2->getOpcode(), newGEPclone2, ce2->getType());
-							clone.second->setOperand(i, newCE2);
-						}
+					// Exception: instruction uses the same constant number for each operand, casted to pointer
+					if (i0->isCast()) {
+						opType = i0->stripPointerCasts()->getType();
 					}
-					//otherwise, throw an error
-					else if (verboseFlag) {
-						errs() << warn_string << " In cloneInsns() skipping processing cloned ConstantExpr:\n";
-						errs() << " " << *ce << "\n";
-					}
-					continue;
-				}
 
-				if (!willBeCloned(ce->getOperand(0))) {
-					continue;
-				}
-
-				// error checking here for things missing in the cloneMap
-				//  if this is NULL, then that means we just inserted the operand
-				//	into the map, and therefore it wasn't in there before
-				// trying to dereference 0 is a bad idea
-				// how did this get in the list, but not in the map?
-				Value* v_temp = cloneMap[ce->getOperand(0)].first;
-				if (v_temp == nullptr) {
-					errs() << err_string << " in cloneInsns!\n";
-					errs() << *ce << "\n";
-				}
-				assert(v_temp && "ConstantExpr is in cloneMap");
-
-				Constant* newOp1 = dyn_cast<Constant>(v_temp);
-				assert(newOp1 && "Null Constant newOp1");
-				Constant* c1 = ce->getWithOperandReplaced(0, newOp1);
-				ConstantExpr* eNew1 = dyn_cast<ConstantExpr>(c1);
-				assert(eNew1 && "Null ConstantExpr eNew1");
-				clone.first->setOperand(i, eNew1);
+					// special treatment for nested constant expressions
+					if (ConstantExpr* ce = dyn_cast<ConstantExpr>(op0)) {
+						op0 = ce->getOperand(0);
+						ConstantExpr* ce1 = dyn_cast<ConstantExpr>(op1);
+						op1 = ce1->getOperand(0);
+					}
 
-				if (TMR) {
-					Constant* newOp2 = dyn_cast<Constant>(cloneMap[ce->getOperand(0)].second);
-					assert(newOp2 && "Null Constant newOp2");
-					Constant* c2 = ce->getWithOperandReplaced(0, newOp2);
-					ConstantExpr* eNew2 = dyn_cast<ConstantExpr>(c2);
-					assert(eNew2 && "Null ConstantExpr eNew2");
-					clone.second->setOperand(i, eNew2);
-				}
-			} else {
-				clone.first->setOperand(i, op);
-				if (TMR) {
-					assert(clone.second && "Clone exists to set operand");
-					clone.second->setOperand(i, op);
+					if (opType->isPointerTy() || opType->isVectorTy() || isa<ConstantExpr>(op0)) {
+						if (isa<Function>(op0)) {
+							continue;
+						}
+						// See if the operands are the same as the clone
+						if (op0 == op1) {
+							uhOhFlag = true;
+							errs() << err_string << " operands are the same for each copy of instruction\n" << *i0 << "\n";
+							// don't need to report more than once per instruction
+							break;
+						}
+					}
 				}
 			}
 		}
-	}
 
-	return (instsToClone.size() > 0);
+		if (uhOhFlag && !noCloneOperandsCheckFlag) {
+			// by default, will exit here
+			errs() << info_string << " COAST is having a hard time replicating the operands of these instructions.\n";
+			errs() << "Please attempt to make the expression this comes from less complex, or contact the maintainers.\n\n";
+			std::exit(-1);
+		}
+	}
 }
 
+
 //----------------------------------------------------------------------------//
 // Cloning of constants
 //----------------------------------------------------------------------------//
@@ -1194,7 +2393,7 @@ void dataflowProtection::cloneConstantExpr() {
 			assert(e1);
 
 			ConstantExpr* e2;
-			if(TMR){
+			if (TMR) {
 				Constant* constantOp2 = dyn_cast<Constant>(clones.second);
 				assert(constantOp2);
 				Constant* c2 = e->getWithOperandReplaced(0, constantOp2);
@@ -1202,8 +2401,8 @@ void dataflowProtection::cloneConstantExpr() {
 				assert(e2);
 			}
 
-			//assert(eNew->isGEPWithNoNotionalOverIndexing());
-			cloneMap[e] = ValuePair(e1,e2);
+			// assert(eNew->isGEPWithNoNotionalOverIndexing());
+			cloneMap[e] = ValuePair(e1, e2);
 		} else {
 //			TODO: what could cause this to fail?
 			assert(false && "Constant expr to clone not matching expected form");
@@ -1211,6 +2410,7 @@ void dataflowProtection::cloneConstantExpr() {
 	}
 }
 
+
 //----------------------------------------------------------------------------//
 // Cloning of globals
 //----------------------------------------------------------------------------//
@@ -1233,45 +2433,102 @@ void dataflowProtection::cloneGlobals(Module & M) {
 	}
 
 	for (auto g : globalsToClone) {
-		//Skip specified globals
+		// Skip specified globals
 		if (std::find(ignoreGlbl.begin(), ignoreGlbl.end(), g->getName().str()) != ignoreGlbl.end()) {
 			if (verboseFlag) errs() << "Not replicating " << g->getName() << "\n";
 			continue;
 		}
 
-		GlobalVariable* gNew = copyGlobal(M, g, "_DWC");
+		GlobalVariable* gNew = copyGlobal(M, g, g->getName().str() + "_DWC");
 
 		GlobalVariable* gNew2;
 		if (TMR) {
-			gNew2 = copyGlobal(M, g, "_TMR");
+			gNew2 = copyGlobal(M, g, g->getName().str() + "_TMR");
 		}
 
-		cloneMap[g] = ValuePair(gNew,gNew2);
+		cloneMap[g] = ValuePair(gNew, gNew2);
+		/*
+		 * One thing that's slightly annoying, is the ordering that these globals
+		 *  end up in.  The constructor for GlobalVariable requires a parameter
+		 *  `InsertBefore`.  This means the copies will be inserted in the address
+		 *  space _before_ the original.
+		 * TODO: why doesn't GDB know the global type? In the LLVM IR, it's the same
+		 *  metadata info for the type.
+		 * Perhaps one way to get around the weird address space issue would be to
+		 * 	use iterators from `M.global_begin()` or something like that.
+		 */
 	}
 
 }
 
-GlobalVariable * dataflowProtection::copyGlobal(Module & M, GlobalVariable * g, std::string suffix) {
+/*
+ * Creates a new global of the same type as `copyFrom`, but with name `newName` instead,
+ *  and inserts the new global before `copyFrom`.
+ */
+GlobalVariable * dataflowProtection::copyGlobal(Module & M, GlobalVariable* copyFrom, std::string newName) {
 
 	Constant * initializer;
 
-	if (globalsToRuntimeInit.find(g) == globalsToRuntimeInit.end()) {
-		initializer = g->getInitializer();
+	if (globalsToRuntimeInit.find(copyFrom) == globalsToRuntimeInit.end()) {
+		initializer = copyFrom->getInitializer();
 	} else {
-		Type * initType = g->getInitializer()->getType();
+		Type * initType = copyFrom->getInitializer()->getType();
 
 		// for now we only support runtime initialization of arrays
 		assert(isa<ArrayType>(initType));
 
 		initializer = ConstantAggregateZero::get(initType);
 
-		if(verboseFlag)	errs() << "Using zero initializer for global " << g->getName() + suffix << "\n";
+		if (verboseFlag)	errs() << "Using zero initializer for global " << newName << "\n";
 
 	}
 
-	GlobalVariable* gNew = new GlobalVariable(M, g->getValueType(), g->isConstant(), g->getLinkage(),
-			initializer, g->getName() + suffix, g);
-	gNew->setUnnamedAddr(g->getUnnamedAddr());
+	// create new global
+	GlobalVariable* gNew = new GlobalVariable(
+		M,							/* Module */
+		copyFrom->getValueType(), 	/* Type */
+		copyFrom->isConstant(), 	/* isConstant */
+		copyFrom->getLinkage(),		/* Linkage */
+		initializer,				/* Initializer */
+		newName,					/* Name */
+		copyFrom					/* InsertBefore */
+	);
+
+	// copy the other attributes
+	gNew->setUnnamedAddr(copyFrom->getUnnamedAddr());
+	gNew->copyAttributesFrom(copyFrom);
+
+	// copy the debug information
+	SmallVector<DIGlobalVariableExpression*, 4> debugInfo;
+	copyFrom->getDebugInfo(debugInfo);
+	for (auto dbg : debugInfo) {
+		// we need to make a new entry for the variable name
+		auto dbgVar = dbg->getVariable();
+
+		// first, get the variable type
+		DIType* varType = dyn_cast<DIType>(dbgVar->getRawType());
+		// use debug info builder
+		DIGlobalVariableExpression* newDbgInfo =
+				dBuilder->createGlobalVariableExpression(
+					dbgVar->getScope(), 		/* DIScope* Context */
+					gNew->getName(), 			/* StringRef Name */
+					dbgVar->getLinkageName(), 	/* StringRef LinkageName */
+					dbgVar->getFile(), 			/* DIFile* File */
+					dbgVar->getLine(), 			/* unsigned LineNo */
+					varType, 					/* DIType* Ty */
+					dbgVar->isLocalToUnit(), 	/* bool isLocalToUnit */
+					/* bool isDefined - not in the version we're using */
+					dbg->getExpression() 		/* DIExpression* Expr */
+					/* MDNode* Decl=nullptr */
+					/* MDTuple* TemplateParams=nullptr
+					    - not in the version we're using */
+					/* uint32_t AlignInBits=0 */
+				);
+		// errs() << *newDbgInfo << "\n"
+		// 	   << *(newDbgInfo->getVariable()) << "\n";
+
+		gNew->addDebugInfo(newDbgInfo);
+	}
 
 	if (verboseFlag)
 		errs() << "New duplicate global: " << gNew->getName() << "\n";
@@ -1279,8 +2536,10 @@ GlobalVariable * dataflowProtection::copyGlobal(Module & M, GlobalVariable * g,
 	return gNew;
 }
 
-// For all globals that need to be initialized at runtime, insert memcpy calls
-// at the start of main
+
+/*
+ * For all globals that need to be initialized at runtime, insert memcpy calls at the start of main
+ */
 void dataflowProtection::addGlobalRuntimeInit(Module & M) {
 	for (auto g : globalsToRuntimeInit) {
 		ArrayType * arrayType = dyn_cast<ArrayType>(g->getType()->getContainedType(0));
@@ -1328,6 +2587,10 @@ void dataflowProtection::addGlobalRuntimeInit(Module & M) {
 	}
 }
 
+
+//----------------------------------------------------------------------------//
+// Cloning debug information
+//----------------------------------------------------------------------------//
 /*
  * The debug information automatically generated for the new function is nearly identical to the old,
  *  except that it correctly changes the retainedNodes entry to match the local variables in the body
@@ -1373,7 +2636,7 @@ void dataflowProtection::cloneMetadata(Module& M, Function* Fnew) {
 	} */
 #endif
 
-	//have to make new types, based on signature of new function
+	// have to make new types, based on signature of new function
 	DISubroutineType* dtype = autoSp->getType();
 //	errs() << dtype << "\n";
 	DITypeRefArray dtypeArray = dtype->getTypeArray();
diff --git a/projects/dataflowProtection/dataflowProtection.cpp b/projects/dataflowProtection/dataflowProtection.cpp
index 0be5726a3..84c3f9db6 100644
--- a/projects/dataflowProtection/dataflowProtection.cpp
+++ b/projects/dataflowProtection/dataflowProtection.cpp
@@ -10,30 +10,42 @@ using namespace llvm;
 //--------------------------------------------------------------------------//
 // Command line options for the pass
 //--------------------------------------------------------------------------//
-//Replication rules
+// Replication rules
 cl::opt<bool> noMemReplicationFlag ("noMemReplication", cl::desc("Do not duplicate variables in memory"));
 cl::opt<bool> noLoadSyncFlag ("noLoadSync", cl::desc("Do not synchronize on data loads"));
 cl::opt<bool> noStoreDataSyncFlag ("noStoreDataSync", cl::desc("Do not synchronize data on data stores"));
 cl::opt<bool> noStoreAddrSyncFlag ("noStoreAddrSync", cl::desc("Do not synchronize address on data stores"));
 cl::opt<bool> storeDataSyncFlag ("storeDataSync", cl::desc("Force synchronize data on data stores (not default)"));
 
-//Replication scope
-//note: any changes to list names must also be changed at the top of utils.cpp
-cl::list<std::string> skipFnCl("ignoreFns", cl::desc("Specify function to not protect. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
-cl::list<std::string> ignoreGlblCl("ignoreGlbls", cl::desc("Specify global variables to not protect. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
-cl::list<std::string> skipLibCallsCl("skipLibCalls", cl::desc("Specify library calls to not clone. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
-cl::list<std::string> replicateUserFunctionsCallCl("replicateFnCalls", cl::desc("Specify user calls where the call, not the function body, should be triplicated. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
-cl::opt<std::string> configFileLocation("configFile", cl::desc("Location of configuration file"));
-
-//Other options
+// Replication scope
+// note: any changes to list names must also be changed at the top of interface.cpp
+cl::list<std::string> skipFnCl ("ignoreFns", cl::desc("Specify function to not protect. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
+cl::list<std::string> ignoreGlblCl ("ignoreGlbls", cl::desc("Specify global variables to not protect. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
+cl::list<std::string> skipLibCallsCl ("skipLibCalls", cl::desc("Specify library calls to not clone. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
+cl::list<std::string> replicateUserFunctionsCallCl ("replicateFnCalls", cl::desc("Specify user calls where the call, not the function body, should be triplicated. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
+cl::list<std::string> isrFunctionListCl ("isrFunctions", cl::desc("These functions are considered Interrupt Service Handlers and will be treated differently."), cl::CommaSeparated, cl::ZeroOrMore);
+// should also be able to specify functions/globals to clone from command line
+cl::list<std::string> cloneFnCl ("cloneFns", cl::desc("Specify function(s) to protect. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
+cl::list<std::string> cloneGlblCl ("cloneGlbls", cl::desc("Specify global(s) to protect. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
+// specify function names which should return multiple values
+cl::list<std::string> replReturnCl ("cloneReturn", cl::desc("Specify function(s) which should return multiple values. Defaults to none."), cl::CommaSeparated, cl::ZeroOrMore);
+cl::list<std::string> cloneAfterCallCl ("cloneAfterCall", cl::desc("Specify function(s) of which the argument(s) should be cloned after the function is called once (ie. scanf)"), cl::CommaSeparated, cl::ZeroOrMore);
+cl::list<std::string> protectedLibCl ("protectedLibFn", cl::desc("Specify function(s) which should be treated as protected library functions."), cl::CommaSeparated, cl::ZeroOrMore);
+
+// Other options
+cl::opt<std::string> configFileLocation ("configFile", cl::desc("Location of configuration file"));
 cl::opt<bool> ReportErrorsFlag ("countErrors", cl::desc("Instrument TMR'd code so it counts the number of corrections"), cl::value_desc("TMR error counting"));
 cl::opt<bool> OriginalReportErrorsFlag ("reportErrors", cl::desc("Instrument TMR'd code so it reports if TMR corrected an error (deprecated)"), cl::value_desc("TMR error signaling (deprecated)"));
 cl::opt<bool> InterleaveFlag ("i", cl::desc("Interleave instructions, rather than segmenting within a basic block. Default behavior."));
 cl::opt<bool> SegmentFlag ("s", cl::desc("Segment instructions, rather than interleaving within a basic block"));
-cl::list<std::string> globalsToRuntimeInitCl("runtimeInitGlobals", cl::CommaSeparated, cl::ZeroOrMore);
+cl::list<std::string> globalsToRuntimeInitCl ("runtimeInitGlobals", cl::CommaSeparated, cl::ZeroOrMore);
 cl::opt<bool> dumpModuleFlag ("dumpModule", cl::desc("Print out the module immediately before pass concludes. Option is for pass debugging."));
 cl::opt<bool> verboseFlag ("verbose", cl::desc("Increase the amount of output"));
 cl::opt<bool> noMainFlag ("noMain", cl::desc("There is no 'main' function in this module"));
+cl::opt<bool> noCloneOperandsCheckFlag ("noCloneOpsCheck", cl::desc("Continue compilation even if instruction operands weren't correctly cloned."));
+cl::opt<bool> countSyncsFlag ("countSyncs", cl::desc("Dynamic count of synchronization points"));
+cl::opt<bool> protectStackFlag ("protectStack", cl::desc("Vote on values of return address and frame pointer before returning from function call."));
+
 
 //--------------------------------------------------------------------------//
 // Top level behavior
@@ -51,19 +63,21 @@ bool dataflowProtection::runOnModule(Module &M) {
 bool dataflowProtection::run(Module &M, int numClones) {
 	// Remove user functions that are never called in the module to reduce code size, processing time
 	// These are mainly inlined by prior optimizations
-	//removeUnusedFunctions(M);
+	if (verboseFlag)
+		PRINT_STRING("The following functions are unused, removing them:");
+	removeUnusedFunctions(M);
 
 	// Process user commands inside of the source code
 	// Must happen before processCommandLine to make sure we don't clone things if not needed
 	processAnnotations(M);
 
-	//Remove annotations here so they aren't cloned
+	// Remove annotations here so they aren't cloned
 	removeAnnotations(M);
 
 	// Make sure that the command line options are correct
 	processCommandLine(M, numClones);
 
-	//Populate the list of functions to touch
+	// Populate the list of functions to touch
 	populateFnWorklist(M);
 
 	// First figure out which instructions are going to be cloned
@@ -77,11 +91,16 @@ bool dataflowProtection::run(Module &M, int numClones) {
 	// populateValuesToClone has to be called before this so we know which
 	// instructions are cloned, and thus when functions need to have extra arguments
 	cloneFunctionArguments(M);
-	removeOrigFunctions();
+	cloneFunctionReturnVals(M);
 
 	// deal with function wrappers
 	updateFnWrappers(M);
 
+	// Parse the annotations on local variables within functions so that
+	//  list of values to clone is up to date
+	processLocalAnnotations(M);
+	removeLocalAnnotations(M);
+
 	// Once again figure out which instructions are going to be cloned
 	// This need to be re-run after creating the new functions as the old
 	// pointers will be stale
@@ -96,7 +115,7 @@ bool dataflowProtection::run(Module &M, int numClones) {
 	updateCallInsns(M);
 	updateInvokeInsns(M);
 
-	//Insert error detection/handling
+	// Insert error detection/handling
 	insertErrorFunction(M, numClones);
 	createErrorBlocks(M, numClones);
 
@@ -104,28 +123,47 @@ bool dataflowProtection::run(Module &M, int numClones) {
 	populateSyncPoints(M);
 
 	// Insert synchronization statements
-	processSyncPoints(M,numClones);
+	processSyncPoints(M, numClones);
 
 	// Global runtime initialization
 	addGlobalRuntimeInit(M);
+	updateRRFuncs(M);
+
+	// stack protection
+	insertStackProtection(M);
 
 	// Clean up
 	removeUnusedErrorBlocks(M);
 	checkForUnusedClones(M);
+	removeOrigFunctions();
 	removeUnusedGlobals(M);
 
 	// This is executed if code is segmented instead of interleaved
 	moveClonesToEndIfSegmented(M);
 
-//	removeUnusedFunctions(M);
-
-	//Option executed when -dumpModule is passed in
+	if (verboseFlag)
+		PRINT_STRING("Removing unused functions...");
+	/*
+	 * Final check for unused functions.
+	 * It's possible that there are circular dependencies here.
+	 * For example, not removing a function because it's still used in a call,
+	 *  but then removing the function that had that call in it right after.
+	 * Keep calling until nothing new is removed.
+	 */
+	int numRemoved = 0;
+	do {
+		numRemoved = removeUnusedFunctions(M);
+	} while (numRemoved > 0);
+	// Make sure old calls to functions with replicated return values are removed
+	validateRRFuncs();
+
+	// Option executed when -dumpModule is passed in
 	dumpModule(M);
 
 	return true;
 }
 
-//set pass dependencies
+// set pass dependencies
 void dataflowProtection::getAnalysisUsage(AnalysisUsage& AU) const {
 	ModulePass::getAnalysisUsage(AU);
 }
diff --git a/projects/dataflowProtection/dataflowProtection.h b/projects/dataflowProtection/dataflowProtection.h
index 8c2c083cb..bfb2ee468 100644
--- a/projects/dataflowProtection/dataflowProtection.h
+++ b/projects/dataflowProtection/dataflowProtection.h
@@ -11,12 +11,44 @@
 #include <llvm/IR/Constants.h>
 #include <llvm/IR/Instructions.h>
 
-#define FIX_STORE_SEGMENTING
-
 using namespace llvm;
 
+
+// useful debugging helps
+#define PRINT_VALUE(v) errs() << *(v) << "\n"
+#define PRINT_STRING(s) errs() << (s) << "\n"
+
+
+//----------------------------------------------------------------------------//
+// Verification Types
+//----------------------------------------------------------------------------//
+typedef std::pair< Function*, Instruction* > FuncInstPair;
+// for making the set only ordered by the function, not the debug information
+// https://stackoverflow.com/questions/27893968/stdset-with-stdpair-how-to-write-comparator-for-elements
+struct Comparator
+{
+    bool operator() (const FuncInstPair& lhs, const FuncInstPair& rhs) const
+    {
+        return lhs.first < rhs.first;
+    }
+};
+// type which represents the two clones of an instruction
+// type which maps a global variable to any functions in which it is used (improperly)
+typedef std::set< FuncInstPair, Comparator > FunctionDebugSet;
+typedef std::map< GlobalVariable*, FunctionDebugSet > GlobalFunctionSetMap;
+
+// types for cloning
 typedef std::pair<Value*, Value*> ValuePair;
+typedef std::pair<Instruction*, Instruction*> InstructionPair;
+
+// types for verification
+typedef std::tuple< Value*, GlobalVariable*, Function* > LoadRecordType;
+typedef std::tuple< StoreInst*, GlobalVariable*, Function* > StoreRecordType;
+typedef std::tuple< CallInst*, GlobalVariable*, Function* , long > CallRecordType;
 
+//----------------------------------------------------------------------------//
+// Class definition
+//----------------------------------------------------------------------------//
 class dataflowProtection : public ModulePass {
 public:
   static char ID;
@@ -41,16 +73,27 @@ class dataflowProtection : public ModulePass {
   const std::string default_xMR    = "set_xMR_default";
   const std::string default_no_xMR = "set_no_xMR_default";
   const std::string default_global = "__xMR_DEFAULT_BEHAVIOR__";
-  const std::string coast_volatile = "coast_volatile";
+  const std::string repl_ret_anno  = "repl_return_val";
+  const std::string isr_anno	     = "isr_function";
+  const std::string prot_lib_anno  = "protected_lib";
+  const std::string cloneAfterCallAnno = "clone-after-call-";
 
   //----------------------------------------------------------------------------//
   // Constant strings for fancy printing
   //----------------------------------------------------------------------------//
+  #ifdef NO_COLOR_PRINTING
+  const std::string err_string		= "ERROR:";
+  const std::string warn_string		= "WARNING:";
+  const std::string info_string		= "INFO:";
+  const std::string blue_string		= "";
+  const std::string no_color_string	= "";
+  #else
   const std::string err_string		= "\033[0;31mERROR:\033[0m";
   const std::string warn_string		= "\033[0;33mWARNING:\033[0m";
   const std::string info_string		= "\033[0;35mINFO:\033[0m";
   const std::string blue_string		= "\033[0;34m";
   const std::string no_color_string	= "\033[0m";
+  #endif
 
   //----------------------------------------------------------------------------//
   // Internal variables to keep track of the different mappings
@@ -63,34 +106,38 @@ class dataflowProtection : public ModulePass {
   std::set<GlobalVariable*> globalsToClone;
   std::set<GlobalVariable*> globalsToSkip;
   std::set<GlobalVariable*> volatileGlobals;
-  std::set<Function*> usedFunctions; 	/* marked with __attribute__((used)) */
+  std::set<Function*> usedFunctions; 	    /* marked with __attribute__((used)) */
+  std::set<Function*> isrFunctions;		    /* marked with directive as ISR */
+  std::set<Function*> replReturn; 		    /* marked to replicate return values */
+  std::set<Function*> cloneAfterFnCall;   /* marked to only call once */
+  std::set<Function*> protectedLibList;   /* marked to protect w/o changing signature */
   std::set<GlobalVariable*> globalsToRuntimeInit;
   std::set<ConstantExpr*> constantExprToClone;
-  std::set<Function*> fnsUsedIndirectly;
-  std::set<Type*> indirectFnSignatures;
 
   std::set<Instruction*> instsToCloneAnno;
   std::set<Instruction*> wrapperInsts;
+  std::map<CallInst*, std::vector<int> > cloneAfterCallArgMap;
 
   std::vector<Instruction*> syncPoints;
-  std::vector<Instruction*> newSyncPoints;		//added while processing old ones
+  std::vector<Instruction*> newSyncPoints;		// added while processing old ones
   std::map<Value*,ValuePair> cloneMap;
   std::map<Function*, BasicBlock*> errBlockMap;
   std::map<Function*, Function*> functionMap;
+  std::map<Function*, SmallVector<ReturnInst*, 8>> replRetMap;
 
+  // vector probably actually is faster in this case, since no find() being called
   std::vector<Function*> origFunctions;
-  std::vector<Function*> newFunctions;
 
   std::map<Function*, std::vector<unsigned int>> argNumsCloned;
 
-  //For moving clones to end
-  //Store the cmp instruction inserted when the blocks split
+  // For moving clones to end
+  // Store the cmp instruction inserted when the blocks split
   std::map<BasicBlock*, Instruction*> syncCheckMap;
-  //Map the above cmp to the logic it relies on
+  // Map the above cmp to the logic it relies on
   std::map<BasicBlock*, std::vector<Instruction*> > syncHelperMap;
-  //For TMR, map the sync instruction to the start of the logic chain
+  // For TMR, map the sync instruction to the start of the logic chain
   std::map<Instruction*, Instruction*> startOfSyncLogic;
-  //in the case of SIMD instructions, need special support for compare logic
+  // in the case of SIMD instructions, need special support for compare logic
   std::map<Instruction*, std::tuple<Instruction*, Instruction*, Instruction*> > simdMap;
 
   //----------------------------------------------------------------------------//
@@ -101,16 +148,26 @@ class dataflowProtection : public ModulePass {
   // Modify functions
   void populateFnWorklist(Module& M);
   void cloneFunctionArguments(Module& M);
+  void cloneFunctionReturnVals(Module& M);
+  void updateRRFuncs(Module& M);
+  void validateRRFuncs(void);
   void updateCallInsns(Module& M);
   void updateInvokeInsns(Module& M);
   // Clone instructions
   bool cloneInsns();
+  void cloneConstantExprOperands(ConstantExpr* ce, std::pair<Instruction *,Instruction *> clone, unsigned i);
+  void cloneConstantVectorOperands(ConstantVector* constVec, InstructionPair clone, unsigned i);
+  void verifyCloningSuccess();
   // Clone constants
   void cloneConstantExpr();
   // Clone globals
   void cloneGlobals(Module& M);
-  GlobalVariable* copyGlobal(Module& M, GlobalVariable * g, std::string suffix);
+  GlobalVariable* copyGlobal(Module& M, GlobalVariable* copyFrom, std::string newName);
   void addGlobalRuntimeInit(Module& M);
+  // cloning debug information
+  void cloneMetadata(Module& M, Function* Fnew);
+  // fix instruction lists
+  void updateInstLists(Function* F, Function* Fnew);
 
   //----------------------------------------------------------------------------//
   // synchronization.cpp
@@ -120,58 +177,81 @@ class dataflowProtection : public ModulePass {
   // Insert synchronization logic
   void processSyncPoints(Module& M, int numClones);
   bool syncGEP(GetElementPtrInst* currGEP, GlobalVariable* TMRErrorDetected);
-  void syncStoreInst(StoreInst* currStoreInst, GlobalVariable* TMRErrorDetected);
+  void syncStoreInst(StoreInst* currStoreInst, GlobalVariable* TMRErrorDetected, bool forceFlag = false);
   void processCallSync(CallInst* currCallInst, GlobalVariable* TMRErrorDetected);
   void syncTerminator(TerminatorInst* currTerminator, GlobalVariable* TMRErrorDetected);
-  void splitBlocks(Instruction* I, BasicBlock* errBlock);
+  Instruction* splitBlocks(Instruction* I, BasicBlock* errBlock);
   // DWC error handling
   void insertErrorFunction(Module& M, int numClones);
   void createErrorBlocks(Module& M, int numClones);
   // TMR error detection
-  void insertTMRCorrectionCount(Instruction* cmpInst, GlobalVariable* TMRErrorDetected, bool updateSyncPoint = false);
   void insertTMRDetectionFlag(Instruction* cmpInst, GlobalVariable* TMRErrorDetected);
+  void insertTMRCorrectionCount(Instruction* cmpInst, GlobalVariable* TMRErrorDetected, bool updateSyncPoint = false);
   void insertVectorTMRCorrectionCount(Instruction* cmpInst, Instruction* cmpInst2, GlobalVariable* TMRErrorDetected);
+  // stack protection
+  void insertStackProtection(Module& M);
 
   //----------------------------------------------------------------------------//
   // utils.cpp
   //----------------------------------------------------------------------------//
-  // Initialization
-  void removeUnusedFunctions(Module& M);
-  void processCommandLine(Module& M, int numClones);
-  void processAnnotations(Module& M);
-  void verifyOptions(Module& M);
-  // Cleanup
-  void removeAnnotations(Module& M);
+  // Cleanup unused things
+  int removeUnusedFunctions(Module& M);
   void removeOrigFunctions();
   void removeUnusedErrorBlocks(Module& M);
   void removeUnusedGlobals(Module& M);
   void checkForUnusedClones(Module& M);
-  // Cloning utilities
-  bool willBeSkipped(Instruction* I);
-  bool willBeCloned(Value* v);
-  bool isCloned(Value* v);
-  ValuePair getClone(Value* I);
+  // Synchronization utilities
   void moveClonesToEndIfSegmented(Module& M);
+  GlobalVariable* createGlobalVariable(Module& M, std::string name, unsigned int byteSz);
+  // Run-time initialization of globals
   int getArrayTypeSize(Module& M, ArrayType * arrayType);
   int getArrayTypeElementBitWidth(Module& M, ArrayType * arrayType);
   void recursivelyVisitCalls(Module& M, Function* F, std::set<Function*> &functionList);
-  bool isISR(Function& F);
+  // Miscellaneous
   void walkInstructionUses(Instruction* I, bool xMR);
-  void cloneMetadata(Module& M, Function* Fnew);
+  void updateFnWrappers(Module& M);
+  std::string getRandomString(std::size_t len);
+  void dumpModule(Module& M);
+
+  //----------------------------------------------------------------------------//
+  // verification.cpp
+  //----------------------------------------------------------------------------//
+  bool comesFromSingleCall(Instruction* storeUse);
+  void walkUnPtStores(StoreRecordType &record);
+  void verifyOptions(Module& M);
+  void printGlobalScopeErrorMessage(GlobalFunctionSetMap &globalMap,
+  		bool globalPt, std::string directionMessage);
+  
+  //----------------------------------------------------------------------------//
+  // inspection.cpp
+  //----------------------------------------------------------------------------//
+  // Cloning utilities
+  bool willBeSkipped(Instruction* I);
+  bool willBeCloned(Value* v);
+  bool isCloned(Value* v);
+  ValuePair getClone(Value* I);
+  Value* getCloneOrig(Value* v);
+  bool isCoarseGrainedFunction(StringRef fnName);
   // Synchronization utilities
   bool isSyncPoint(Instruction* I);
-#ifdef FIX_STORE_SEGMENTING
   bool isStoreMovePoint(StoreInst* SI);
-#endif
   bool isCallMovePoint(CallInst* ci);
   bool checkCoarseSync(StoreInst* inst);
   // Miscellaneous
   bool isIndirectFunctionCall(CallInst* CI, std::string errMsg, bool print=true);
-  std::string getRandomString(std::size_t len);
-  int getFunctionsFromConfig();
+  bool isISR(Function& F);
+
+  //----------------------------------------------------------------------------//
+  // interface.cpp
+  //----------------------------------------------------------------------------//
   void getFunctionsFromCL();
-  void dumpModule(Module& M);
-  void updateFnWrappers(Module& M);
+  int getFunctionsFromConfig();
+  void processCommandLine(Module& M, int numClones);
+  void processAnnotations(Module& M);
+  void processLocalAnnotations(Module& M);
+  // cleanup
+  void removeAnnotations(Module& M);
+  void removeLocalAnnotations(Module& M);
 
 };
 
diff --git a/projects/dataflowProtection/inspection.cpp b/projects/dataflowProtection/inspection.cpp
new file mode 100644
index 000000000..b92c5deaa
--- /dev/null
+++ b/projects/dataflowProtection/inspection.cpp
@@ -0,0 +1,186 @@
+/*
+ * inspection.cpp
+ *
+ * This file has small functions which will give information about short queries.
+ */
+
+#include "dataflowProtection.h"
+
+// standard library includes
+#include <algorithm>
+#include <string>
+#include <list>
+
+
+using namespace llvm;
+
+
+// command line options
+extern cl::opt<bool> verboseFlag;
+
+// shared variables
+extern std::list<std::string> coarseGrainedUserFunctions;
+
+
+//----------------------------------------------------------------------------//
+// Cloning utilities
+//----------------------------------------------------------------------------//
+bool dataflowProtection::willBeSkipped(Instruction* I) {
+	return instsToSkip.find(I) != instsToSkip.end();
+}
+
+
+bool dataflowProtection::willBeCloned(Value* v) {
+	Instruction* I = dyn_cast<Instruction>(v);
+	if (I) {
+		return instsToClone.find(I) != instsToClone.end();
+	}
+
+	GlobalVariable* g = dyn_cast<GlobalVariable>(v);
+	if (g) {
+		return globalsToClone.find(g) != globalsToClone.end();
+	}
+
+	ConstantExpr* e = dyn_cast<ConstantExpr>(v);
+	if (e) {
+		return constantExprToClone.find(e) != constantExprToClone.end();
+	}
+
+	if (Argument* a = dyn_cast<Argument>(v)) {
+		Function * f = a->getParent();
+		return fnsToClone.find(f) != fnsToClone.end();
+	}
+
+	return false;
+}
+
+
+bool dataflowProtection::isCloned(Value * v) {
+	return cloneMap.find(v) != cloneMap.end();
+}
+
+
+ValuePair dataflowProtection::getClone(Value* I) {
+	if (cloneMap.find(I) == cloneMap.end()) {
+		return ValuePair(I,I);
+	} else {
+		return cloneMap[I];
+	}
+}
+
+/*
+ * Reverse lookup of getClone.
+ * Returns nullptr if the input value isn't a clone of anything.
+ */
+Value* dataflowProtection::getCloneOrig(Value* v) {
+	for (auto cloneIt : cloneMap) {
+		Value* orig = cloneIt.first;
+		ValuePair clones = cloneIt.second;
+		Value* clone1 = clones.first;
+		Value* clone2 = clones.second;
+		if ( (clone1 == v) || (clone2 == v) ) {
+			return orig;
+		}
+	}
+	return nullptr;
+}
+
+
+bool dataflowProtection::isCoarseGrainedFunction(StringRef fnName) {
+	if (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
+					fnName.str()) != coarseGrainedUserFunctions.end())
+	{
+		return true;
+	} else {
+		return false;
+	}
+}
+
+
+//----------------------------------------------------------------------------//
+// Synchronization utilities
+//----------------------------------------------------------------------------//
+bool dataflowProtection::isSyncPoint(Instruction* I) {
+	if (isa<StoreInst>(I) || isa<CallInst>(I) || isa<TerminatorInst>(I) || isa<GetElementPtrInst>(I))
+		return std::find(syncPoints.begin(), syncPoints.end(), I) != syncPoints.end();
+	else
+		return false;
+}
+
+
+bool dataflowProtection::isStoreMovePoint(StoreInst* SI) {
+	if ( 	(getClone(SI).first == SI) ||						/* Doesn't have a clone */
+			(SI->getOperand(0)->getType()->isPointerTy()) ||	/* Storing a pointer type */
+			(dyn_cast<PtrToIntInst>(SI->getOperand(0))) ) 		/* Casted pointer */
+	{
+		return false;
+	}
+	// otherwise, we need to segment them together
+	else
+		return true;
+}
+
+
+bool dataflowProtection::isCallMovePoint(CallInst* ci) {
+	if ( (getClone(ci)).first == ci) {
+		return false;
+	} else {
+		return true;
+	}
+}
+
+
+/*
+ * Returns true if this will try to sync on a coarse-grained function return value
+ * These should be avoided for things like the case of malloc()
+ * If returns false, then it's OK to sync on the value
+ */
+bool dataflowProtection::checkCoarseSync(StoreInst* inst) {
+	// need to check for if this value came from a replicated function call
+	Value* op0 = inst->getOperand(0);
+	if (CallInst* CI = dyn_cast<CallInst>(op0)) {
+		Function* calledF = CI->getCalledFunction();
+		if (calledF && calledF->hasName() &&
+				isCoarseGrainedFunction(calledF->getName()))
+		{
+			// then we've got a coarse-grained value
+			return true;
+		}
+	} else if (InvokeInst* II = dyn_cast<InvokeInst>(op0)) {
+		Function* calledF = II->getCalledFunction();
+		if (calledF && calledF->hasName() &&
+				isCoarseGrainedFunction(calledF->getName()))
+		{
+			// again
+			return true;
+		}
+	}
+	return false;
+}
+
+
+//----------------------------------------------------------------------------//
+// Miscellaneous
+//----------------------------------------------------------------------------//
+bool dataflowProtection::isIndirectFunctionCall(CallInst* CI, std::string errMsg, bool print) {
+	// This partially handles bitcasts and other inline LLVM functions
+	if (CI->getCalledFunction() == nullptr) {
+		// probably don't want to hear about skipping inline assembly, clean up output
+		if ( print && (!CI->isInlineAsm()) ) {
+			errs() << warn_string << " in " << errMsg << " skipping:\n\t" << *CI << "\n";
+		}
+		return true;
+	} else {
+		return false;
+	}
+}
+
+
+/*
+ * A function will only be treated as an ISR if it's marked as such by the user.
+ * This can be done with in-code directives or a command-line option.
+ */
+bool dataflowProtection::isISR(Function& F) {
+	bool ans = isrFunctions.find(&F) != isrFunctions.end();
+	return ans;
+}
diff --git a/projects/dataflowProtection/interface.cpp b/projects/dataflowProtection/interface.cpp
new file mode 100644
index 000000000..758d6f534
--- /dev/null
+++ b/projects/dataflowProtection/interface.cpp
@@ -0,0 +1,745 @@
+/*
+ * interface.cpp
+ *
+ * This file contains functions that deal with the interface into COAST.
+ * This includes command line options and in-code directives.
+ */
+
+#include "dataflowProtection.h"
+
+// standard library includes
+#include <string>
+#include <list>
+#include <fstream>
+#include <sstream>
+
+// LLVM includes
+#include <llvm/Option/Option.h>
+#include <llvm/IR/Module.h>
+#include "llvm/Support/CommandLine.h"
+#include <llvm/IR/Constants.h>
+#include <llvm/IR/IRBuilder.h>
+
+using namespace llvm;
+
+
+// Shared variables
+extern cl::list<std::string> skipFnCl;
+extern cl::list<std::string> skipLibCallsCl;
+extern cl::list<std::string> replicateUserFunctionsCallCl;
+extern cl::list<std::string> ignoreGlblCl;
+extern cl::list<std::string> globalsToRuntimeInitCl;
+extern cl::list<std::string> isrFunctionListCl;
+extern cl::list<std::string> cloneFnCl;
+extern cl::list<std::string> cloneGlblCl;
+extern cl::list<std::string> replReturnCl;
+extern cl::list<std::string> cloneAfterCallCl;
+extern cl::list<std::string> protectedLibCl;
+
+extern cl::opt<std::string> configFileLocation;
+extern cl::opt<bool> SegmentFlag;
+extern cl::opt<bool> storeDataSyncFlag;
+extern cl::opt<bool> noStoreDataSyncFlag;
+extern cl::opt<bool> InterleaveFlag;
+extern cl::opt<bool> noMemReplicationFlag;
+extern cl::opt<bool> verboseFlag;
+
+extern std::string tmr_global_count_name;
+
+
+// New global lists to be used to track function names
+std::list<std::string> skipFn;
+std::list<std::string> skipLibCalls;
+std::list<std::string> coarseGrainedUserFunctions;
+std::list<std::string> ignoreGlbl;
+std::list<std::string> clGlobalsToRuntimeInit;
+std::list<std::string> isrFuncNameList;
+std::list<std::string> tempCloneFnList;
+std::list<std::string> tempCloneGlblList;
+std::list<std::string> tempReplReturnList;
+std::list<std::string> cloneAfterCallList;
+std::list<std::string> tempProtectedLibList;
+std::map<Function*, std::set<int> > noXmrArgList;
+// see removeAnnotations()
+std::set<ConstantExpr*> annotationExpressions;
+
+
+// These are the names of the above CL lists.
+// Any changes to these must also be changed at the head of dataflowProtection.cpp
+const std::string skipFnName = "ignoreFns";
+const std::string ignoreGlblName = "ignoreGlbls";
+const std::string skipLibCallsName = "skipLibCalls";
+const std::string coarseFnsName = "replicateFnCalls";
+const std::string runtimeGlblInitName = "runtimeInitGlobals";
+const std::string isrFuncListString = "isrFunctions";
+const std::string cloneAfterCallString = "cloneAfterCall";
+
+// track functions that we should ignore invalid SOR crossings
+extern std::map<GlobalVariable*, std::set<Function*> > globalCrossMap;
+
+
+// Copy all (fixed) things from the command line to the internal, editable lists
+void dataflowProtection::getFunctionsFromCL() {
+	// The order these lists are parsed in is pretty much reverse priority
+	//  if names show up in multiple lists
+
+	for (auto x : skipLibCallsCl) {
+		if (verboseFlag)
+			errs() << "CL: do not replicate calls to function '" << x << "'\n";
+		skipLibCalls.push_back(x);
+	}
+
+	for (auto x : skipFnCl) {
+		if (verboseFlag)
+			errs() << "CL: do not clone function '" << x << "'\n";
+		skipFn.push_back(x);
+	}
+
+	for (auto x : replicateUserFunctionsCallCl) {
+		if (verboseFlag)
+			errs() << "CL: replicate calls to function '" << x << "'\n";
+		coarseGrainedUserFunctions.push_back(x);
+		// check if it needs to be removed from a skipping list
+		if (std::find(skipLibCalls.begin(), skipLibCalls.end(), x) != skipLibCalls.end()) {
+			skipLibCalls.remove(x);
+		}
+	}
+
+	for (auto x : ignoreGlblCl) {
+		if (verboseFlag)
+			errs() << "CL: do not clone global variable '" << x << "'\n";
+		ignoreGlbl.push_back(x);
+	}
+
+	for (auto x : globalsToRuntimeInitCl) {
+		clGlobalsToRuntimeInit.push_back(x);
+	}
+
+	for (auto x : isrFunctionListCl) {
+		if (verboseFlag)
+			errs() << "CL: function '" << x << "' is an ISR\n";
+		isrFuncNameList.push_back(x);
+	}
+
+	for (auto x : cloneFnCl) {
+		if (verboseFlag)
+			errs() << "CL: clone function '" << x << "'\n";
+		tempCloneFnList.push_back(x);
+		// check if it needs to be removed from a skipping list
+		if (std::find(skipFn.begin(), skipFn.end(), x) != skipFn.end()) {
+			skipFn.remove(x);
+		}
+	}
+
+	for (auto x : cloneGlblCl) {
+		if (verboseFlag)
+			errs() << "CL: clone global '" << x << "'\n";
+		tempCloneGlblList.push_back(x);
+		// check if it needs to be removed from skipping list
+		if (std::find(ignoreGlbl.begin(), ignoreGlbl.end(), x) != ignoreGlbl.end()) {
+			ignoreGlbl.remove(x);
+		}
+	}
+
+	for (auto x : replReturnCl) {
+		if (verboseFlag)
+			errs() << "CL: clone function '" << x << "' return value\n";
+		tempReplReturnList.push_back(x);
+	}
+
+	for (auto x : cloneAfterCallCl) {
+		if (verboseFlag)
+			errs() << "CL: clone function '" << x << "' args after call\n";
+		cloneAfterCallList.push_back(x);
+		// also, don't touch the insides, or make more than one call
+		skipLibCalls.push_back(x);
+		skipFn.push_back(x);
+	}
+
+	for (auto x : protectedLibCl) {
+		if (verboseFlag)
+			errs() << "CL: treat function '" << x << "' as a protected library\n";
+		tempProtectedLibList.push_back(x);
+	}
+}
+
+
+/*
+ * This function extracts function names from the configuration file
+ * The lists it writes to already exist: they were created in dataflowProtection.cpp
+ * The return value indicates successor failure.
+ */
+int dataflowProtection::getFunctionsFromConfig() {
+	std::string filename;
+	if (configFileLocation != "") {
+		filename = configFileLocation;
+	} else {
+		char* coast = std::getenv("COAST_ROOT");
+		if (coast) {
+			filename = std::string(coast) + "/projects/dataflowProtection/functions.config";
+		} else {
+			// just look in the current directory
+			filename = "functions.config";
+		}
+	}
+	std::ifstream ifs(filename, std::ifstream::in);
+
+	if (!ifs.is_open()) {
+		errs() << "ERROR: No configuration file found at '" << filename << "'\n";
+		errs() << "         Please pass one in using -configFile\n";
+		return -1;
+	}
+
+	std::list<std::string>* lptr;
+	std::string line;
+	while (getline(ifs, line)) {
+		if (line.length() == 0) {  	// Blank line
+			continue;
+		}
+
+		if (line[0] == '#') { 		// # is the comment symbol
+			continue;
+		}
+
+		// Remove all whitespace
+		line.erase(remove (line.begin(), line.end(), ' '), line.end());
+
+		std::istringstream iss(line);
+		std::string substr;
+		getline(iss, substr, '=');
+
+		// Find the option we're using
+		if (substr == skipLibCallsName) {
+			lptr = &skipLibCalls;
+		} else if (substr == skipFnName) {
+			lptr = &skipFn;
+		} else if (substr == coarseFnsName) {
+			lptr = &coarseGrainedUserFunctions;
+		} else if (substr == ignoreGlblName) {
+			lptr = &ignoreGlbl;
+		} else if (substr == runtimeGlblInitName) {
+			lptr = &clGlobalsToRuntimeInit;
+		} else if (substr == isrFuncListString) {
+			lptr = &isrFuncNameList;
+		} else {
+			errs() << "ERROR: unrecognized option '" << substr;
+			errs() << "' in configuration file '" << filename << "'\n\n";
+			return 1;
+		}
+
+		// Insert all options into vector
+		while (iss.good()) {
+			getline(iss, substr, ',');
+			if (substr.length() == 0)
+				continue;
+			lptr->push_back(substr);
+		}
+
+	}
+	ifs.close();
+	return 0;
+}
+
+
+void dataflowProtection::processCommandLine(Module& M, int numClones) {
+	if (InterleaveFlag == SegmentFlag) {
+		SegmentFlag = true;
+	}
+	TMR = (numClones==3);
+
+	if (noMemReplicationFlag && noStoreDataSyncFlag) {
+		errs() << warn_string << " noMemDuplication and noStoreDataSync set simultaneously. Recommend not setting the two together.\n";
+	}
+
+	if (noStoreDataSyncFlag && storeDataSyncFlag) {
+		errs() << err_string << " conflicting flags for store and noStore!\n";
+		exit(-1);
+	}
+
+	// Parse information from config file
+	if (getFunctionsFromConfig()) {
+		assert("Configuration file error!" && false);
+	}
+
+	// Copy command line lists to internal lists
+	// This should be able to override config file
+	getFunctionsFromCL();
+
+	// convert function names to actual pointers
+	for (Function & F : M) {
+		if (std::find(isrFuncNameList.begin(), isrFuncNameList.end(), F.getName()) != isrFuncNameList.end()) {
+			isrFunctions.insert(&F);
+		}
+		else if (std::find(tempCloneFnList.begin(), tempCloneFnList.end(), F.getName()) != tempCloneFnList.end()) {
+			fnsToClone.insert(&F);
+		}
+		else if (std::find(tempReplReturnList.begin(), tempReplReturnList.end(), F.getName()) != tempReplReturnList.end()) {
+			replReturn.insert(&F);
+		}
+		else if (std::find(cloneAfterCallList.begin(), cloneAfterCallList.end(), F.getName()) != cloneAfterCallList.end()) {
+			cloneAfterFnCall.insert(&F);
+		}
+		else if (std::find(tempProtectedLibList.begin(), tempProtectedLibList.end(), F.getName()) != tempProtectedLibList.end()) {
+			protectedLibList.insert(&F);
+			// it needs to be added to clone list as well
+			fnsToClone.insert(&F);
+		}
+	}
+
+	// more useful missing function information
+	std::set<std::string> missingFuncNames;
+
+	if (skipFn.size() == 0) {
+		for (auto & fn_it : M) {
+
+			// Ignore library calls
+			if (fn_it.isDeclaration()) {
+				continue;
+			}
+
+			// Don't erase ISRs
+			if (isISR(fn_it)) {
+				continue;
+			}
+
+			if (xMR_default) {
+				if (fnsToSkip.find(&fn_it) == fnsToSkip.end()) {
+					// This should yield to the fnsToSkip list, as it
+					//  should be fully populated by now
+					fnsToClone.insert(&fn_it);
+				}
+			}
+		}
+	} else {
+		for (auto fcn : skipFn) {
+			Function* f = M.getFunction(StringRef(fcn));
+			if (!f) {
+				// If the name doesn't exist, stick it in a list for later
+				// This way, we can report missing ones all at once
+				missingFuncNames.insert(fcn);
+				continue;
+			}
+			fnsToSkip.insert(f);
+		}
+	}
+
+	// Report missing function names from command line
+	if (missingFuncNames.size() > 0) {
+		errs() << "\n" << err_string << " The following function names do not exist!\n";
+		for (auto fcn : missingFuncNames) {
+			errs() << "  '" << fcn << "'\n";
+		}
+		errs() << "Check the spelling, check if the optimizer inlined it, or if the name was mangled\n\n";
+		exit(-1);
+	}
+
+	// more useful missing global information
+	std::set<std::string> missingGlblNames;
+
+	// convert global names to references
+	for (auto glblName : tempCloneGlblList) {
+		// allowInteral = true; this can then detect static variables
+		GlobalVariable* glbl = M.getGlobalVariable(StringRef(glblName), true);
+		if (!glbl) {
+			missingGlblNames.insert(glblName);
+		} else {
+			globalsToClone.insert(glbl);
+		}
+	}
+
+	if (missingGlblNames.size() > 0) {
+		errs() << "\n" << err_string 
+			   << " The following global variable names do not exist!\n";
+		for (auto glblName : missingGlblNames) {
+			errs() << "  '" << glblName << "'\n";
+		}
+		errs() << "Check the spelling, or if the name was mangled\n\n";
+		exit(-1);
+	}
+
+	// special case
+	ignoreGlbl.push_back(tmr_global_count_name);
+}
+
+void dataflowProtection::processAnnotations(Module& M) {
+	// Inspired by http://bholt.org/posts/llvm-quick-tricks.html
+	auto global_annos = M.getNamedGlobal("llvm.global.annotations");
+	if (global_annos) {
+		auto a = cast<ConstantArray>(global_annos->getOperand(0));
+		// check that it is the right type
+		if (a) {
+			for (int i=0; i < a->getNumOperands(); i++) {
+				auto e = cast<ConstantStruct>(a->getOperand(i));
+
+				// extract data
+				auto anno = cast<ConstantDataArray>(cast<GlobalVariable>(e->getOperand(1)->getOperand(0))->getOperand(0))->getAsCString();
+
+				// Function annotations
+				if (auto fn = dyn_cast<Function>(e->getOperand(0)->getOperand(0))) {
+					if (anno == no_xMR_anno) {
+						if (verboseFlag) errs() << "Directive: do not clone function '" << fn->getName() << "'\n";
+						fnsToSkip.insert(fn);
+						if (fnsToClone.find(fn) != fnsToClone.end()) {
+							fnsToClone.erase(fn);
+						}
+					} else if (anno == xMR_anno) {
+						if (verboseFlag) errs() << "Directive: clone function '" << fn->getName() << "'\n";
+						fnsToClone.insert(fn);
+					} else if (anno == xMR_call_anno) {
+						if (verboseFlag) errs() << "Directive: replicate calls to function '" << fn->getName() << "'\n";
+						coarseGrainedUserFunctions.push_back(fn->getName());
+					} else if (anno == skip_call_anno) {
+						if (verboseFlag) errs() << "Directive: do not clone calls to function '"  << fn->getName() << "'\n";
+						skipLibCalls.push_back(fn->getName());
+						// TODO: do we need to worry about duplicates? - make it a set instead
+					} else if (anno.startswith("no-verify-")) {
+						StringRef global_name = anno.substr(10, anno.size() - 10);
+
+						GlobalValue* glbl = M.getNamedValue(global_name);
+						if (glbl) {
+							GlobalVariable* glblVar = dyn_cast<GlobalVariable>(glbl);
+							if (glblVar) {
+								// make sure the set exists already
+								if (globalCrossMap.find(glblVar) == globalCrossMap.end()) {
+									std::set<Function*> tempSet;
+									globalCrossMap[glblVar] = tempSet;
+								}
+								globalCrossMap[glblVar].insert(fn);
+								if (verboseFlag) {
+									errs() << "Directive: ignoring global '" << global_name
+										   << "' being used in function '" << fn->getName() << "'\n";
+								}
+							}
+						} else {
+							errs() << warn_string << " global '" << global_name << "' doesn't exist\n";
+						}
+
+					} else if (anno.startswith("no_xMR_arg-")) {
+						StringRef argNumStr = anno.substr(11, anno.size() - 11);
+						int argNum = std::stoi(argNumStr.str());
+						// argNumStr.getAsInteger(10, &argNum);
+
+						if (argNum >= fn->getFunctionType()->params().size()) {
+							errs() << warn_string << " index '" << argNum
+								   << "' is greater than the number of operands in function '"
+								   << fn->getName() << "'\n";
+							// Don't exit
+							// std::exit(-1);
+						} else {
+							// create set if first one
+							if (noXmrArgList.find(fn) == noXmrArgList.end()) {
+								std::set<int> tempSet;
+								noXmrArgList[fn] = tempSet;
+							}
+							// add to set of function arguments indices to skip
+							noXmrArgList[fn].insert(argNum);
+							if (verboseFlag) {
+								errs() << "Directive: do not clone argument "
+									   << argNum << " in function '"
+									   << fn->getName() << "'\n";
+							}
+						}
+
+					} else if (anno.startswith(cloneAfterCallAnno)) {
+						if (verboseFlag) errs() << "Directive: replicate function '" << fn->getName() << "' arguments after the call\n";
+						if (anno.size() == cloneAfterCallAnno.size()) {
+							// clone all the args
+							cloneAfterFnCall.insert(fn);
+							// also, don't touch the insides, or make more than one call
+							skipFn.push_back(fn->getName());
+							skipLibCalls.push_back(fn->getName());
+						} else {
+							// it's a list of indices to clone
+							StringRef argList = anno.substr(cloneAfterCallAnno.size(), anno.size() - cloneAfterCallAnno.size());
+							errs() << err_string << " this feature is not yet supported as a directive!\n";
+							errs() << anno << "\n";
+							exit(-1);
+						}
+
+					} else if (anno == isr_anno) {
+						if (verboseFlag) errs() << "Directive: function '" << fn->getName() << "' is an ISR\n";
+						isrFunctions.insert(fn);
+					} else if (anno == repl_ret_anno) {
+						if (verboseFlag) errs() << "Directive: clone function '" << fn->getName() << "' return value\n";
+						replReturn.insert(fn);
+					} else if (anno == prot_lib_anno) {
+						if (verboseFlag) errs() << "Directive: treat function '" << fn->getName() << "' as a protected library\n";
+						protectedLibList.insert(fn);
+						// it needs to be added to clone list as well
+						fnsToClone.insert(fn);
+					} else {
+						assert(false && "Invalid option on function");
+					}
+
+				}
+				// Global annotations
+				else if (auto gv = dyn_cast<GlobalVariable>(e->getOperand(0)->getOperand(0))) {
+					if (anno == no_xMR_anno) {
+						if (verboseFlag) errs() << "Directive: do not clone global variable '" << gv->getName() << "'\n";
+						globalsToSkip.insert(gv);
+					} else if (anno == xMR_anno) {
+						if (verboseFlag) errs() << "Directive: clone global variable '" << gv->getName() << "'\n";
+						globalsToClone.insert(gv);
+					} else if (anno == default_xMR) {
+						if (verboseFlag) errs() << "Directive: set xMR as default\n";
+					} else if (anno == default_no_xMR) {
+						if (verboseFlag) errs() << "Directive: set no xMR as default\n";
+						xMR_default = false;
+					} else {
+						if (verboseFlag) errs() << "Directive: " << anno << "\n";
+						assert(false && "Invalid option on global value");
+					}
+				}
+				else {
+					assert(false && "Non-function annotation");
+				}
+			}
+		} else {
+			errs() << warn_string << " global annotations of wrong type!\n" << *global_annos << "\n";
+		}
+	}
+
+	/*
+	 * get the data from the list of "used" globals, and add it to volatileGlobals
+	 * For example, in the FreeRTOS kernel, the list looks like this:
+	 * @llvm.used = appending global [4 x i8*] [i8* bitcast (i32* @ulICCEOIR to i8*),
+	 * 		i8* bitcast (i32* @ulICCIAR to i8*), i8* bitcast (i32* @ulICCPMR to i8*),
+	 * 		i8* bitcast (i32* @ulMaxAPIPriorityMask to i8*)], section "llvm.metadata"
+	 * If the global type is already a i8*, then we can detect that. Won't be that for functions.
+	 */
+	auto used_annos = M.getNamedGlobal("llvm.used");
+	if (used_annos) {
+		auto ua = cast<ConstantArray>(used_annos->getOperand(0));
+		if (ua) {
+			for (int i=0; i < ua->getNumOperands(); i++) {
+				auto element = ua->getOperand(i);
+				if (BitCastOperator* bc = dyn_cast<BitCastOperator>(element)) {
+					if (GlobalVariable* gv = dyn_cast<GlobalVariable>(bc->getOperand(0))) {
+						// found a global marked as "used"
+						volatileGlobals.insert(gv);
+						if (verboseFlag) errs() << "Directive: don't remove '" << gv->getName() << "'\n";
+					} else if (Function* fn = dyn_cast<Function>(bc->getOperand(0))) {
+						// found a function marked as "used"
+						usedFunctions.insert(fn);
+					}
+				} else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(element)) {
+					if (verboseFlag) errs() << "Directive: don't remove '" << gv->getName() << "'\n";
+					volatileGlobals.insert(gv);
+				}
+			}
+		}
+	}
+}
+
+
+void dataflowProtection::processLocalAnnotations(Module& M) {
+	// Debug printing
+	std::set<CallInst*> skippedIndirectCalls;
+	// Local variables
+	for (auto &F : M) {
+		for (auto &bb : F) {
+			for (auto &I : bb) {
+				if ( auto CI = dyn_cast<CallInst>(&I) ) {
+					// have to skip any bitcasts in function calls because they aren't actually a function
+					if (isIndirectFunctionCall(CI, "processAnnotations", false)) {
+						if (!CI->isInlineAsm()) {
+							skippedIndirectCalls.insert(CI);
+						}
+						continue;
+					}
+					if (CI->getCalledFunction()->getName() == "llvm.var.annotation") {
+						// Get variable
+						auto adr = dyn_cast<BitCastInst>(CI->getOperand(0));
+						AllocaInst* var;
+						if (!adr) {
+							// there could be no bitcast if the alloca is already of type i8
+							var = dyn_cast<AllocaInst>(CI->getOperand(0));
+						} else {
+							var = dyn_cast<AllocaInst>(adr->getOperand(0));
+						}
+						assert(var && "valid alloca");
+
+						auto ce = dyn_cast<ConstantExpr>(CI->getOperand(1));
+						auto gv  = dyn_cast<GlobalVariable>(ce->getOperand(0));
+						auto init = dyn_cast<ConstantDataArray>(gv->getInitializer());
+
+						if (init) {
+							auto anno = init->getAsCString();
+							if (anno == no_xMR_anno) {
+								if (verboseFlag) errs() << "Directive: do not clone local variable '" << *var << "'\n";
+								instsToSkip.insert(var);
+								walkInstructionUses(var, false);
+							} else if (anno == xMR_anno) {
+								if (verboseFlag) errs() << "Directive: clone local variable '" << *var << "'\n";
+								instsToCloneAnno.insert(var);
+								// if this is all we do, it will only clone the `alloca` instruction, but
+								//  we want it to clone all instructions that use the same variable
+								walkInstructionUses(var, true);
+								// how do we get the syncpoints to happen?
+								// have to add them manually
+							} else {
+								errs() << anno << "\n";
+								assert(false && "Unrecognized variable annotation");
+							}
+						} else {
+							errs() << "Local variable not alloca:\n";
+							PRINT_VALUE(CI);
+							assert(false && "Local variable not alloca");
+						}
+					}
+				}
+			}
+		}
+	}
+	// print warnings
+	if (verboseFlag && skippedIndirectCalls.size() > 0) {
+		errs() << warn_string << " skipping indirect function calls in processLocalAnnotations:\n";
+		for (auto CI : skippedIndirectCalls) {
+			PRINT_VALUE(CI);
+		}
+	}
+}
+
+
+//----------------------------------------------------------------------------//
+// Cleanup
+//----------------------------------------------------------------------------//
+
+// shared variables
+static std::set<GlobalVariable*> anno_strings;
+static std::set<BitCastOperator*> anno_casts;
+
+void dataflowProtection::removeAnnotations(Module& M) {
+	auto global_annos = M.getNamedGlobal("llvm.global.annotations");
+	if (!global_annos)
+		return;
+	auto a = cast<ConstantArray>(global_annos->getOperand(0));
+	if (!a)
+		return;
+
+
+	// Populate a list of global strings that are only used in annotations
+	// There are also function "uses" which only show up here; remove them later
+	for (int i=0; i < a->getNumOperands(); i++) {
+		auto e = cast<ConstantStruct>(a->getOperand(i)); 	// This is part of global_anno
+
+		for (int j=0; j < e->getNumOperands(); j++) {
+			auto op = e->getOperand(j);
+			if (op->getNumOperands() >= 1) {
+				// constant string
+				if (auto cs = dyn_cast<GlobalVariable>(e->getOperand(j)->getOperand(0))) {
+					if (cs->getSection() == "llvm.metadata") {
+						anno_strings.insert(cs);
+					}
+				}
+				// remove constant expressions so they don't count as users later
+				else if (auto ce = dyn_cast<ConstantExpr>(op)) {
+					// Can't delete them here, so keep a list for later
+					annotationExpressions.insert(ce);
+				}
+			}
+			// They will always be casts because it is to i8*, which is not a valid function type by itself
+			if (auto bCast = dyn_cast<BitCastOperator>(op)) {
+				anno_casts.insert(bCast);
+			}
+		}
+	}
+
+	// Remove global annotations
+	for (auto a_s : anno_strings) {
+		if (a_s->getNumUses() < 1) {
+			a_s->eraseFromParent();
+		} else {
+			globalsToSkip.insert(a_s);
+		}
+	}
+	// Remove all the bitcasts that were inside that global annotations because they can cause problems
+	int removedCount = 0;
+	for (auto annoBitCast : anno_casts) {
+		if (annoBitCast->getNumUses() == 0) {
+			annoBitCast->dropAllReferences();
+			removedCount++;
+		}
+	}
+	// if (verboseFlag)
+	// 	errs() << "Removed " << removedCount << " unused bitcasts from global annotations\n";
+
+	// Remove the global that defines the default behavior of COAST (if exists)
+	if (auto default_behavior = M.getNamedGlobal(default_global)) {
+		if (default_behavior->getNumUses() < 1) {
+			default_behavior->eraseFromParent();
+		}
+	}
+}
+
+
+void dataflowProtection::removeLocalAnnotations(Module& M) {
+	// Remove llvm.var.annotation calls
+	std::set<Instruction*> toRemove;
+	Function* lva = NULL;
+
+	for (auto &F : M) {
+		for (auto & bb : F) {
+			for (auto & I : bb) {
+				if (auto CI = dyn_cast<CallInst>(&I)) {
+					auto called = CI->getCalledFunction();
+					if ( (called != nullptr) && (called->getName() == "llvm.var.annotation") ) {
+						lva = called;
+						toRemove.insert(CI);
+					}
+				}
+			}
+		}
+	}
+
+	for (auto rm : toRemove) {
+		auto op0 = dyn_cast<Instruction>(rm->getOperand(0));
+		if (rm->getNumUses() < 1) {
+			if (rm->getParent()) {
+				rm->eraseFromParent();
+			}
+		}
+		// Do this 2nd so that the one possible user is removed first
+		if (op0 && op0->getNumUses() < 1) {
+			if (op0->getParent()) {
+				op0->eraseFromParent();
+			}
+			// We probably added this (which is probably a bitcast) to the list of instructions to clone
+			if (instsToCloneAnno.find(op0) != instsToCloneAnno.end()) {
+				instsToCloneAnno.erase(op0);
+			}
+		}
+	}
+
+	if (lva) {
+		lva->removeFromParent();
+	}
+
+	// Remove global annotations
+	auto global_annos = M.getNamedGlobal("llvm.global.annotations");
+	if (global_annos) {
+		M.getGlobalList().erase(global_annos);
+	}
+	// Remove strings it used
+	for (auto a_s : anno_strings) {
+		a_s->eraseFromParent();
+	}
+	// Remove all the bitcasts that were inside that global annotations because they can cause problems
+	int removedCount = 0;
+	for (auto annoBitCast : anno_casts) {
+		if (annoBitCast->getNumUses() == 0) {
+			annoBitCast->dropAllReferences();
+			removedCount++;
+		}
+	}
+	// if (verboseFlag)
+	// 	errs() << "Removed " << removedCount << " unused bitcasts from global annotations\n";
+
+	// Try again: Remove the global that defines the default behavior of COAST (if exists)
+	if (auto default_behavior = M.getNamedGlobal(default_global)) {
+		assert( (default_behavior->getNumUses() < 1) && "no more uses for global default");
+		default_behavior->eraseFromParent();
+	}
+
+	return;
+}
diff --git a/projects/dataflowProtection/synchronization.cpp b/projects/dataflowProtection/synchronization.cpp
index 78cf97ab5..6b179f5ea 100644
--- a/projects/dataflowProtection/synchronization.cpp
+++ b/projects/dataflowProtection/synchronization.cpp
@@ -3,6 +3,7 @@
 #include "dataflowProtection.h"
 
 #include <deque>
+#include <list>
 
 #include <llvm/IR/Module.h>
 #include "llvm/Support/CommandLine.h"
@@ -11,6 +12,9 @@
 #include <llvm/Analysis/LoopInfo.h>
 #include <llvm/IR/IRBuilder.h>
 
+using namespace llvm;
+
+
 // Command line options
 extern cl::opt<bool> OriginalReportErrorsFlag;
 extern cl::opt<bool> ReportErrorsFlag;
@@ -21,47 +25,150 @@ extern cl::opt<bool> noMemReplicationFlag;
 extern cl::opt<bool> storeDataSyncFlag;
 extern cl::opt<bool> verboseFlag;
 extern cl::opt<bool> noMainFlag;
+extern cl::opt<bool> countSyncsFlag;
+extern cl::opt<bool> protectStackFlag;
 
-using namespace llvm;
+// another set of sync points from boundary crossings
+// see verifyOptions()
+extern std::set<StoreInst*> syncGlobalStores;
 
 // commonly used strings
 std::string fault_function_name = "FAULT_DETECTED_DWC";
 std::string tmr_vote_inst_name = "vote";
 std::string tmr_global_count_name = "TMR_ERROR_CNT";
 
+// comparison names
+std::string gep_cmp_name = "gcmp";
+std::string call_cmp_name = "ccmp";
+std::string store_cmp_name = "scmp";
+std::string terminator_cmp_name = "tcmp";
+
+// dynamically count the number of times we synchronize
+std::string dynCountName = "__SYNC_COUNT";
+GlobalVariable* dynamicSyncCount = nullptr;
+
+/* commonly used comparison predicates
+ * The "ordered" type of comparisons ensure that, if the operand is a vector type,
+ * then no entries in the vector are NaN.  Since we don't want any NaNs as a result
+ * of xMR comparisons, we'll use the "ordered" type of comparisons.
+ * This only applies to floating point types.  For integer types, simple equality
+ * doesn't depend on signed or unsigned types.
+ */
+static Instruction::OtherOps fpCmpType = Instruction::OtherOps::FCmp;
+static Instruction::OtherOps intCmpType = Instruction::OtherOps::ICmp;
+static CmpInst::Predicate fpCmpEqual = CmpInst::FCMP_OEQ;
+static CmpInst::Predicate intCmpEqual = CmpInst::ICMP_EQ;
+static CmpInst::Predicate fpCmpNotEqual = CmpInst::FCMP_ONE;
+static CmpInst::Predicate intCmpNotEqual = CmpInst::ICMP_NE;
+
+
+//----------------------------------------------------------------------------//
+// Helper functions
+//----------------------------------------------------------------------------//
+/*
+ * Helper function for getting the correct comparison type
+ */
+Instruction::OtherOps getComparisonType(Type* opType) {
+	if (opType->isFPOrFPVectorTy()) {
+		return fpCmpType;
+	} else {
+		return intCmpType;
+	}
+}
+
+/*
+ * Helper function for getting the correct comparison operation
+ */
+CmpInst::Predicate getComparisonPredicate(Type* opType) {
+	if (opType->isFPOrFPVectorTy()) {
+		return fpCmpEqual;
+	} else {
+		return intCmpEqual;
+	}
+}
+
+
 //----------------------------------------------------------------------------//
 // Obtain synchronization points
 //----------------------------------------------------------------------------//
+// #define DBG_POP_SYNC_PTS
 void dataflowProtection::populateSyncPoints(Module& M) {
+	#ifdef DBG_POP_SYNC_PTS
+	int debugFlag = 0;
+	#endif
+
+	/*
+	 * Create counter that will count the number of times a syncpoint is reached
+	 */
+	if (countSyncsFlag) {
+		dynamicSyncCount = M.getGlobalVariable(dynCountName);
+		if (!dynamicSyncCount) {
+			dynamicSyncCount = cast<GlobalVariable>(M.getOrInsertGlobal(dynCountName,
+																	IntegerType::getInt64Ty(M.getContext())));
+			// if there is no main in this module, keep this global as extern
+			if (noMainFlag) {
+				dynamicSyncCount->setExternallyInitialized(true);
+				dynamicSyncCount->setLinkage(GlobalValue::LinkageTypes::ExternalLinkage);
+			} else {
+				// otherwise, will be initialized to 0
+				dynamicSyncCount->setConstant(false);
+				dynamicSyncCount->setInitializer(ConstantInt::getNullValue(IntegerType::getInt64Ty(M.getContext())));
+				dynamicSyncCount->setUnnamedAddr( GlobalValue::UnnamedAddr() );
+				dynamicSyncCount->setAlignment(8);
+			}
+			globalsToSkip.insert(dynamicSyncCount);
+		}
+	}
+
+	// delay printing error messages
+	std::set<CallInst*> skippedIndirectCalls;
+
 	for (auto F : fnsToClone) {
-		if (F->getName().startswith("FAULT_DETECTED")) //Don't sync in err handler
+		// Don't sync in error handler
+		if (F->getName() == fault_function_name)
 			continue;
+		#ifdef DBG_POP_SYNC_PTS
+		if (F->getName() == "xTimerCreate.RR") {
+			debugFlag = 1;
+			// PRINT_VALUE(F);
+		}
+		#endif
 
 		for (auto & bb : *F) {
+			#ifdef DBG_POP_SYNC_PTS
+			if (debugFlag)
+				errs() << bb.size() << " instructions\n";
+			#endif
+
 			for (auto & I : bb) {
 
-				//Sync before branches
+				// Sync before branches
 				if (I.isTerminator()) {
-					//skip syncing on unreachable instructions
+					// skip syncing on unreachable instructions
 					if (UnreachableInst* unreach = dyn_cast<UnreachableInst>(&I))
 						continue;
+					#ifdef DBG_POP_SYNC_PTS
+					if (debugFlag)
+						PRINT_VALUE(&I);
+					#endif
 					syncPoints.push_back(&I);
 				}
 
-				//Sync at external function calls - they're only declared, not defined
+				// Sync at external function calls - they're only declared, not defined
 				if (CallInst* CI = dyn_cast<CallInst>(&I)) {
-					//Calling linkage on inline assembly causes errors, make this check first
-					if(CI->isInlineAsm())
+					// Calling linkage on inline assembly causes errors, make this check first
+					if (CI->isInlineAsm())
 						continue;
 
-					//Skip any thing that doesn't have a called function and print warning
-					if(isIndirectFunctionCall(CI, "populateSyncPoints")){
+					// Skip any thing that doesn't have a called function and print warning
+					if (isIndirectFunctionCall(CI, "populateSyncPoints", false)) {
+						skippedIndirectCalls.insert(CI);
 						continue;
 					}
 
 					Function* calledF = CI->getCalledFunction();
 
-					//skip debug function calls
+					// skip debug function calls
 					if (calledF->hasName()) {
 						if (calledF->getName().startswith_lower("llvm.dbg.") ||
 								calledF->getName().startswith_lower("llvm.lifetime."))
@@ -76,19 +183,24 @@ void dataflowProtection::populateSyncPoints(Module& M) {
 						continue;
 					}
 
+					// sync before function declarations and calls to external functions
 					if (calledF->hasExternalLinkage() && calledF->isDeclaration()) {
 						syncPoints.push_back(&I);
 //						errs() << "Adding " << CI->getCalledFunction()->getName() << " to syncpoints\n";
 					}
+					#ifdef DBG_POP_SYNC_PTS
+					if (debugFlag)
+						PRINT_VALUE(&I);
+					#endif
 				}
 
-				//Sync data on all stores unless explicitly instructed not to
+				// Sync data on all stores unless explicitly instructed not to
 				if (StoreInst* SI = dyn_cast<StoreInst>(&I)) {
-					//Don't sync pointers, they will be different
+					// Don't sync pointers, they will be different
 					if (SI->getOperand(0)->getType()->isPointerTy()) {
 						continue;
-					} else if(dyn_cast<PtrToIntInst>(SI->getOperand(0))) {
-						//Likewise, don't check casted pointers
+					} else if (dyn_cast<PtrToIntInst>(SI->getOperand(0))) {
+						// Likewise, don't check casted pointers
 						continue;
 					}
 					// if this is not a cloned instruction
@@ -96,29 +208,57 @@ void dataflowProtection::populateSyncPoints(Module& M) {
 							   !noMemReplicationFlag ) {
 						continue;
 					}
-					//by default, we don't sync on stores, unless specifically told to
-					//have to sync on stores, data and addr, if no mem replication
+					// By default, we don't sync on stores, unless specifically told to
+					// Have to sync on stores, data and addr, if no mem replication
 					else if (!noMemReplicationFlag && !storeDataSyncFlag) {
 						continue;
 					}
-					//otherwise, go ahead and add it to the list of sync-points
+					// Otherwise, go ahead and add it to the list of sync-points
 					else {
 						syncPoints.push_back(&I);
+						#ifdef DBG_POP_SYNC_PTS
+						if (debugFlag)
+							PRINT_VALUE(&I);
+						#endif
 					}
 				}
 
-				//Sync offsets of GEPs
-				if(GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(&I)){
-					if(willBeCloned(GEP) || isCloned(GEP)){
+				// Sync offsets of GEPs
+				if (GetElementPtrInst* GEP = dyn_cast<GetElementPtrInst>(&I)) {
+					if (willBeCloned(GEP) || isCloned(GEP)) {
+						#ifdef DBG_POP_SYNC_PTS
+						if (debugFlag)
+							PRINT_VALUE(&I);
+						#endif
 						syncPoints.push_back(&I);
 					}
 				}
 
 			}
 		}
+		#ifdef DBG_POP_SYNC_PTS
+		if (debugFlag)
+			debugFlag = 0;
+		#endif
+	}
+
+	// print warnings
+	if (skippedIndirectCalls.size() > 0) {
+		errs() << warn_string
+			   << " skipping indirect function calls in populateSyncPoints:\n";
+		for (auto CI : skippedIndirectCalls) {
+			PRINT_VALUE(CI);
+		}
+	}
+
+	// add the global stores found earlier (verifyOptions())
+	for (auto si : syncGlobalStores) {
+//		errs() << "sync global store: " << *si << "\n";
+		syncPoints.push_back(si);
 	}
 }
 
+
 //----------------------------------------------------------------------------//
 // Insert synchronization logic
 //----------------------------------------------------------------------------//
@@ -128,11 +268,12 @@ void dataflowProtection::processSyncPoints(Module & M, int numClones) {
 
 	GlobalVariable* TMRErrorDetected = M.getGlobalVariable(tmr_global_count_name);
 
-	//Look for the variable first. If it doesn't exist, make one
-	//If it is unneeded, it is erased at the end of this function
+	// Look for the variable first. If it doesn't exist, make one
+	// If it is unneeded, it is erased at the end of this function
 	if (!TMRErrorDetected) {
-		if(TMR && ReportErrorsFlag && verboseFlag)
-			errs() << "Could not find " << tmr_global_count_name << " flag! Creating one...\n";
+		if (TMR && ReportErrorsFlag && verboseFlag) {
+			errs() << info_string << " Could not find '" << tmr_global_count_name << "' flag! Creating one...\n";
+		}
 
 		TMRErrorDetected = cast<GlobalVariable>(M.getOrInsertGlobal(tmr_global_count_name,
 														IntegerType::getInt32Ty(M.getContext())));
@@ -147,11 +288,12 @@ void dataflowProtection::processSyncPoints(Module & M, int numClones) {
 			TMRErrorDetected->setUnnamedAddr( GlobalValue::UnnamedAddr() );
 			TMRErrorDetected->setAlignment(4);
 		}
-		globalsToSkip.insert(TMRErrorDetected);
 	}
 	assert(TMRErrorDetected != nullptr);
+	// make sure to skip this - I think this check is too late
+	globalsToSkip.insert(TMRErrorDetected);
 
-	// Some of the syncpoints may be invalidated during this process, but we can't remove them
+	// Some of the syncpoints may be invalidated during this next process, but we can't remove them
 	//  from this list we're iterating over.  Make a list to delete them later.
 	std::vector<Instruction*> deleteItLater;
 
@@ -160,16 +302,35 @@ void dataflowProtection::processSyncPoints(Module & M, int numClones) {
 		assert(I && "How did a null pointer get into syncpoints?");
 
 		if (StoreInst* currStoreInst = dyn_cast<StoreInst>(I)) {
-			if(!noStoreDataSyncFlag){
+			/* Sync here if it's a special global store across SoR */
+			if (syncGlobalStores.find(currStoreInst) != syncGlobalStores.end()) {
+				syncStoreInst(currStoreInst, TMRErrorDetected, true);
+//				errs() << *currStoreInst << "\n";
+
+				/* If it is a special store, then also can remove the clones of the StoreInst */
+				ValuePair clones = getClone(currStoreInst);
+				if (clones.first != currStoreInst) {
+					Instruction* firstClone = dyn_cast<Instruction>(clones.first);
+					firstClone->eraseFromParent();
+					if (TMR) {
+						Instruction* secondClone = dyn_cast<Instruction>(clones.second);
+						secondClone->eraseFromParent();
+					}
+					/* Now we have to clean up the map to avoid stale pointers */
+					cloneMap.erase(currStoreInst);
+				}
+			}
+			/* Sync here if the flag is set */
+			else if (!noStoreDataSyncFlag) {
 				syncStoreInst(currStoreInst, TMRErrorDetected);
 			}
 		} else if (CallInst* currCallInst = dyn_cast<CallInst>(I)) {
 			processCallSync(currCallInst, TMRErrorDetected);
 
-		} else if(TerminatorInst* currTerminator = dyn_cast<TerminatorInst>(I)) { //is a terminator
+		} else if (TerminatorInst* currTerminator = dyn_cast<TerminatorInst>(I)) { // is a terminator
 			syncTerminator(currTerminator, TMRErrorDetected);
 
-		} else if(GetElementPtrInst* currGEP = dyn_cast<GetElementPtrInst>(I)) {
+		} else if (GetElementPtrInst* currGEP = dyn_cast<GetElementPtrInst>(I)) {
 
 			// default is DON'T sync on addresses, can only do that when there is no second
 			//  copy in memory
@@ -178,11 +339,11 @@ void dataflowProtection::processSyncPoints(Module & M, int numClones) {
 			}
 
 			if (noLoadSyncFlag) {
-				//Don't sync address of loads
+				// Don't sync address of loads
 				if ( dyn_cast<LoadInst>(currGEP->user_back()) ) {
 					continue;
 				} else if (GetElementPtrInst* nextGEP = dyn_cast<GetElementPtrInst>(currGEP->user_back())) {
-					//Don't want to sync GEPs that feed GEPs of load inst
+					// Don't want to sync GEPs that feed GEPs of load inst
 					if (nextGEP->getNumUses() == 1) {
 						if ( dyn_cast<LoadInst>(nextGEP->user_back()) ) {
 							continue;
@@ -192,11 +353,11 @@ void dataflowProtection::processSyncPoints(Module & M, int numClones) {
 			}
 
 			if (noStoreAddrSyncFlag) {
-				//Don't address of stores
+				// Don't address of stores
 				if ( dyn_cast<StoreInst>(currGEP->user_back()) ) {
 					continue;
 				} else if (GetElementPtrInst* nextGEP = dyn_cast<GetElementPtrInst>(currGEP->user_back())) {
-					//Don't want to sync GEPs that feed GEPs of store inst
+					// Don't want to sync GEPs that feed GEPs of store inst
 					if (nextGEP->getNumUses() == 1) {
 						if ( dyn_cast<StoreInst>(nextGEP->user_back()) ) {
 							continue;
@@ -205,11 +366,12 @@ void dataflowProtection::processSyncPoints(Module & M, int numClones) {
 				}
 			}
 
-			//else there is noMemReplication
+			// else there is noMemReplication
 			if (syncGEP(currGEP, TMRErrorDetected)) {
 				deleteItLater.push_back(I);
 			}
 		} else {
+			assert(isa<Instruction>(I) && "non-instruction value in syncpoints");
 			// more detailed information about the failure
 			if (BasicBlock* wrongBB = dyn_cast<BasicBlock>(I)) {
 				errs() << "Something is wrong here...\n";
@@ -238,17 +400,19 @@ void dataflowProtection::processSyncPoints(Module & M, int numClones) {
 		syncPoints.push_back(ns);
 	}
 
-	if(!TMR && TMRErrorDetected->getNumUses() < 1)
+	// remove the TMR counter if it wasn't used
+	if (!TMR && TMRErrorDetected->getNumUses() < 1)
 		TMRErrorDetected->eraseFromParent();
 }
 
+
 /*
  * Returns true if it invalidates the pointer to currGEP.  The calling function is responsible
  *  for handling this.
  */
 bool dataflowProtection::syncGEP(GetElementPtrInst* currGEP, GlobalVariable* TMRErrorDetected) {
-	//2 forms of GEP with different number of arguments
-	//Offset is the last argument
+	// 2 forms of GEP with different number of arguments
+	// Offset is the last argument
 	std::vector<Instruction*> syncInsts;
 	Value* orig = currGEP->getOperand(currGEP->getNumOperands()-1);
 
@@ -266,17 +430,13 @@ bool dataflowProtection::syncGEP(GetElementPtrInst* currGEP, GlobalVariable* TMR
 
 	Value* clone1 = getClone(orig).first;
 	assert(clone1 && "Cloned value exists");
-	Instruction::OtherOps cmp_op;
-	CmpInst::Predicate cmp_eq;
-	if (orig->getType()->isFPOrFPVectorTy()) {
-		cmp_op = Instruction::OtherOps::FCmp;
-		cmp_eq = CmpInst::FCMP_UEQ; //Should this be OEQ instead?
-	} else {
-		cmp_op = Instruction::OtherOps::ICmp;
-		cmp_eq = CmpInst::ICMP_EQ;
-	}
 
-	Instruction* cmp = CmpInst::Create(cmp_op, cmp_eq, orig, clone1, "cmp", currGEP);
+	// get the correct comparison type for this instruction
+	Type* opType = orig->getType();
+	Instruction::OtherOps cmp_op = getComparisonType(opType);
+	CmpInst::Predicate cmp_eq = getComparisonPredicate(opType);
+
+	Instruction* cmp = CmpInst::Create(cmp_op, cmp_eq, orig, clone1, gep_cmp_name, currGEP);
 	cmp->removeFromParent();
 	cmp->insertBefore(currGEP);
 
@@ -297,37 +457,40 @@ bool dataflowProtection::syncGEP(GetElementPtrInst* currGEP, GlobalVariable* TMR
 		currGEPClone1->setOperand(currGEPClone1->getNumOperands()-1,sel);
 		currGEPClone2->setOperand(currGEPClone2->getNumOperands()-1,sel);
 
-		//Too many cases to account for this, so assertion is removed for now
+		// Too many cases to account for this, so assertion is removed for now
 //		if(!isa<PHINode>(orig)){
 //			assert(numUses == 2 &&  "Instruction only used in GEP synchronization");
 //		}
 
 		insertTMRCorrectionCount(cmp, TMRErrorDetected);
-	} else {		//DWC
+	} else {		// DWC
 		Function* currFn = currGEP->getParent()->getParent();
 		splitBlocks(cmp, errBlockMap[currFn]);
-		//fix invalidated pointer - see note in processCallSync()
+		// fix invalidated pointer - see note in processCallSync()
 		startOfSyncLogic[currGEP] = currGEP;
 	}
 
 	return false;
 }
 
-void dataflowProtection::syncStoreInst(StoreInst* currStoreInst, GlobalVariable* TMRErrorDetected) {
-	//Keep track of the inserted instructions
+void dataflowProtection::syncStoreInst(StoreInst* currStoreInst, GlobalVariable* TMRErrorDetected, bool forceFlag) {
+	// Keep track of the inserted instructions
 	std::vector<Instruction*> syncInsts;
 
 	// Sync the value of the store instruction
 	// If memory is cloned we shouldn't sync the address, as they will be different
 	Value* orig = currStoreInst->getOperand(0);
 
+	if (forceFlag) {
+		; // going to sync no matter what
+	}
 	// No need to sync if value is not cloned
-	//Additionally, makes sure we don't sync on copies, unless we are forced to sync here
-	if (!isCloned(orig) && !noMemReplicationFlag) {
+	// Additionally, makes sure we don't sync on copies, unless we are forced to sync here
+	else if (!isCloned(orig) && !noMemReplicationFlag) {
 		return;
 	}
 	else if (noMemReplicationFlag) {
-		//Make sure we don't sync on single return points when memory isn't duplicated
+		// Make sure we don't sync on single return points when memory isn't duplicated
 		if (!dyn_cast<StoreInst>(orig) && !isCloned(orig)) {
 			return;
 		}
@@ -336,22 +499,17 @@ void dataflowProtection::syncStoreInst(StoreInst* currStoreInst, GlobalVariable*
 	Value* clone1 = getClone(orig).first;
 	assert(clone1 && "Cloned value exists");
 
-	//Disabling synchronization on constant store
+	// Disabling synchronization on constant store
 	if (dyn_cast<ConstantInt>(orig)) {
 		return;
 	}
 
-	Instruction::OtherOps cmp_op;
-	CmpInst::Predicate cmp_eq;
-	if (currStoreInst->getOperand(0)->getType()->isFPOrFPVectorTy()) {
-		cmp_op = Instruction::OtherOps::FCmp;
-		cmp_eq = CmpInst::FCMP_UEQ; //Should this be OEQ instead?
-	} else {
-		cmp_op = Instruction::OtherOps::ICmp;
-		cmp_eq = CmpInst::ICMP_EQ;
-	}
+	// get the correct comparison type for this instruction
+	Type* opType = currStoreInst->getOperand(0)->getType();
+	Instruction::OtherOps cmp_op = getComparisonType(opType);
+	CmpInst::Predicate cmp_eq = getComparisonPredicate(opType);
 
-	Instruction* cmp = CmpInst::Create(cmp_op, cmp_eq, orig, clone1, "cmp", currStoreInst);
+	Instruction* cmp = CmpInst::Create(cmp_op, cmp_eq, orig, clone1, store_cmp_name, currStoreInst);
 	cmp->removeFromParent();
 	cmp->insertBefore(currStoreInst);
 
@@ -366,25 +524,25 @@ void dataflowProtection::syncStoreInst(StoreInst* currStoreInst, GlobalVariable*
 
 		assert(getClone(currStoreInst).first && "Store instruction has a clone");
 
-		currStoreInst->setOperand(0,sel);
-		dyn_cast<StoreInst>(getClone(currStoreInst).first)->setOperand(0,sel);
-		dyn_cast<StoreInst>(getClone(currStoreInst).second)->setOperand(0,sel);
+		currStoreInst->setOperand(0, sel);
+		dyn_cast<StoreInst>(getClone(currStoreInst).first)->setOperand(0, sel);
+		dyn_cast<StoreInst>(getClone(currStoreInst).second)->setOperand(0, sel);
 
-		//Make sure that the voted value is propagated downstream
+		// Make sure that the voted value is propagated downstream
 		if (orig->getNumUses() != 2) {
 			if (Instruction* origInst = dyn_cast<Instruction>(orig)) {
 				DominatorTree DT = DominatorTree(*origInst->getParent()->getParent());
 				for (auto u : origInst->users()) {
-					//Find any and all instructions that were not updated
-					if (std::find(syncInsts.begin(),syncInsts.end(),u) == syncInsts.end()) {
-						//Get all operands that should be updated
+					// Find any and all instructions that were not updated
+					if (std::find(syncInsts.begin() ,syncInsts.end(), u) == syncInsts.end()) {
+						// Get all operands that should be updated
 						for (unsigned int opNum=0; opNum < u->getNumOperands(); opNum++) {
-							//Update if and only if the instruction is dominated by sel
-							if (u->getOperand(opNum) == orig && DT.dominates(sel,dyn_cast<Instruction>(u))) {
-								u->setOperand(opNum,sel);
+							// Update if and only if the instruction is dominated by sel
+							if (u->getOperand(opNum) == orig && DT.dominates(sel, dyn_cast<Instruction>(u))) {
+								u->setOperand(opNum, sel);
 								if (isCloned(u)) {
-									dyn_cast<Instruction>(getClone(u).first)->setOperand(opNum,sel);
-									dyn_cast<Instruction>(getClone(u).second)->setOperand(opNum,sel);
+									dyn_cast<Instruction>(getClone(u).first)->setOperand(opNum, sel);
+									dyn_cast<Instruction>(getClone(u).second)->setOperand(opNum, sel);
 								}
 							}
 						}
@@ -394,17 +552,17 @@ void dataflowProtection::syncStoreInst(StoreInst* currStoreInst, GlobalVariable*
 		}
 
 		insertTMRCorrectionCount(cmp, TMRErrorDetected);
-	} else {		//DWC
+	} else {		// DWC
 		Function* currFn = currStoreInst->getParent()->getParent();
 		splitBlocks(cmp, errBlockMap[currFn]);
-		//fix invalidated pointer - see note in processCallSync()
+		// fix invalidated pointer - see note in processCallSync()
 		startOfSyncLogic[currStoreInst] = currStoreInst;
 	}
 }
 
 void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable* TMRErrorDetected) {
-	//Get a list of the non-constant/GEP arguments, no purpose in checking them
-	//Don't compare pointer values either
+	// Get a list of the non-constant/GEP arguments, no purpose in checking them
+	// Don't compare pointer values either
 	std::vector<Instruction*> syncInsts;
 
 	/* We need to check if any of the parameters in the call instruction are actually arguments
@@ -424,7 +582,7 @@ void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable*
 		if (isa<Constant>(currCallInst->getArgOperand(it))
 				|| isa<GetElementPtrInst>(currCallInst->getArgOperand(it)))
 			continue;
-		if(isa<PointerType>(currCallInst->getArgOperand(it)->getType()))
+		if (isa<PointerType>(currCallInst->getArgOperand(it)->getType()))
 			continue;
 		cloneableOperandsList.push_back(currCallInst->getArgOperand(it));
 	}
@@ -433,7 +591,7 @@ void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable*
 		return;
 	}
 
-	//We now have a list of (an unknown number of) operands, insert comparisons for all of them
+	// We now have a list of (an unknown number of) operands, insert comparisons for all of them
 	std::deque<Value*> cmpInstList;
 	std::vector<Instruction*> syncHelperList;
 	BasicBlock* currBB = currCallInst->getParent();
@@ -448,21 +606,32 @@ void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable*
 		}
 		ValuePair clones = getClone(orig);
 
-		//Make sure we're inserting the right type of comparison
-		Instruction::OtherOps cmp_op;
-		CmpInst::Predicate cmp_eq;
-		if (orig->getType()->isFPOrFPVectorTy()) {
-			cmp_op = Instruction::OtherOps::FCmp;
-			cmp_eq = CmpInst::FCMP_UEQ; //Should this be OEQ instead?
-		} else {
-			cmp_op = Instruction::OtherOps::ICmp;
-			cmp_eq = CmpInst::ICMP_EQ;
+		Type* opType = orig->getType();
+		// also need to skip syncing on array types
+		if (opType->isArrayTy()) {
+			continue;
 		}
+		// Make sure we're inserting the right type of comparison
+		Instruction::OtherOps cmp_op = getComparisonType(opType);
+		CmpInst::Predicate cmp_eq = getComparisonPredicate(opType);
 
-		Instruction* cmp = CmpInst::Create(cmp_op, cmp_eq, orig, clones.first,"cmp", currCallInst);
+		/*
+		 * NOTE: this can fail if `orig` is the wrong type
+		 * include/llvm/IR/Instructions.h:1121:
+		 * void llvm::ICmpInst::AssertOK():
+		 * Assertion `(getOperand(0)->getType()->isIntOrIntVectorTy() || getOperand(0)->getType()->isPtrOrPtrVectorTy()) && "Invalid operand types for ICmp instruction"' failed
+		 * TODO: figure out how to fix this besides just skipping syncing on array types
+		 */
+		if ( (cmp_op == intCmpType) && !(orig->getType()->isIntOrIntVectorTy() || orig->getType()->isPtrOrPtrVectorTy()) ) {
+			// debug
+			PRINT_VALUE(currCallInst);
+			PRINT_VALUE(orig);
+			assert(!orig->getType()->isArrayTy() && "array type not allowed here");
+		}
+		Instruction* cmp = CmpInst::Create(cmp_op, cmp_eq, orig, clones.first, call_cmp_name, currCallInst);
 		cmp->removeFromParent();
 		cmp->insertBefore(currCallInst);
-		if(firstIteration){
+		if (firstIteration) {
 			startOfSyncLogic[currCallInst] = cmp;
 			firstIteration = false;
 		}
@@ -473,15 +642,17 @@ void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable*
 			SelectInst* sel = SelectInst::Create(cmp, orig, clones.second, tmr_vote_inst_name, currCallInst);
 			syncInsts.push_back(sel);
 
-			currCallInst->replaceUsesOfWith(orig,sel);
+			currCallInst->replaceUsesOfWith(orig, sel);
 			dyn_cast<CallInst>(getClone(currCallInst).first)->replaceUsesOfWith(clones.first, sel);
 			dyn_cast<CallInst>(getClone(currCallInst).second)->replaceUsesOfWith(clones.second, sel);
 
-			//If something fails this assertion, it means that it is used after the call synchronization
-			//Might have to change it later in case we find a case where this is ok
-			//But extensive tests haven't found a case where this is necessary
-			// update: The condition does NOT hold if the operand is one that is passed in by an argument,
-			// and it hasn't been alloca'd; then every reference is to the original argument.
+			/*
+			 * If something fails this check for useCount, it means that it is used after the call synchronization
+			 * Might have to change it later in case we find a case where this is ok
+			 * But extensive tests haven't found a case where this is necessary
+			 *  update: The condition does NOT hold if the operand is one that is passed in by an argument,
+			 *  and it hasn't been alloca'd; then every reference is to the original argument.
+			 */
 			int useCount = orig->getNumUses();
 			if (useCount != 2) {
 				if (Instruction* origInst = dyn_cast<Instruction>(orig)) {
@@ -491,14 +662,14 @@ void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable*
 						uses.push_back(dyn_cast<Instruction>(uu));
 					}
 					for (auto u : uses) {
-						//Find any and all instructions that were not updated
+						// Find any and all instructions that were not updated
 						if (std::find(syncInsts.begin(),syncInsts.end(),u) == syncInsts.end()) {
 							if (!DT.dominates(sel, dyn_cast<Instruction>(u))) {
 								useCount--;
-							//Get all operands that should be updated
+							// Get all operands that should be updated
 							} else {
 								for (unsigned int opNum=0; opNum < u->getNumOperands(); opNum++) {
-									//Update if and only if the instruction is dominated by sel
+									// Update if and only if the instruction is dominated by sel
 									if (u->getOperand(opNum) == orig && DT.dominates(sel, dyn_cast<Instruction>(u))) {
 										u->setOperand(opNum, sel);
 										if (isCloned(u)) {
@@ -524,7 +695,7 @@ void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable*
 				// TODO: examine what could cause this to fail
 			}
 			insertTMRCorrectionCount(cmp, TMRErrorDetected);
-		} else {		//DWC
+		} else {		// DWC
 			cmpInstList.push_back(cmp);
 			syncHelperMap[currBB].push_back(cmp);
 		}
@@ -535,7 +706,7 @@ void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable*
 			return;
 		}
 
-		//Reduce the comparisons to a single instruction
+		// Reduce the comparisons to a single instruction
 		while (cmpInstList.size() > 1) {
 			Value* cmp0 = cmpInstList[0];
 			Value* cmp1 = cmpInstList[1];
@@ -566,123 +737,133 @@ void dataflowProtection::processCallSync(CallInst* currCallInst, GlobalVariable*
 	}
 }
 
+
 void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVariable* TMRErrorDetected) {
 	assert(currTerminator);
 
-	//Only sync if there are arguments to duplicate
+	// Only sync if there are arguments to duplicate
 	if (isa<BranchInst>(currTerminator)) {
-		if (currTerminator->getNumSuccessors() < 2){ //1 successor, or none = unconditional
+		// 1 successor, or none = unconditional
+		if (currTerminator->getNumSuccessors() < 2) {
 			startOfSyncLogic[currTerminator] = currTerminator;
 			return;
 		}
 	} else if (isa<ResumeInst>(currTerminator)) {
-		//Resume is only for exception handlers, sync always
+		;	// Resume is only for exception handlers, sync always
 	} else if (isa<InvokeInst>(currTerminator)) {
-		//anything special need to go here?
+		;	// anything special need to go here?
 	} else if (isa<ReturnInst>(currTerminator)) {
-		if (currTerminator->getNumOperands() == 0){ //Returning nothing
+		// Returns nothing
+		if (currTerminator->getNumOperands() == 0) {
 			startOfSyncLogic[currTerminator] = currTerminator;
 			return;
 		}
 	} else if (isa<SwitchInst>(currTerminator)) {
-		if (currTerminator->getNumSuccessors() == 1){ // Don't check unconditional branches
+		// Don't check unconditional branches in switch statements
+		if (currTerminator->getNumSuccessors() == 1) {
 			startOfSyncLogic[currTerminator] = currTerminator;
 			return;
 		}
-	} else { //indirectbr, invoke, catchswitch, catchret, unreachable, cleanupret
-		//Do nothing, the other branch types don't have arguments to clone
+	} else { // indirectbr, catchswitch, catchret, unreachable, cleanupret
+		// Do nothing, the other branch types don't have arguments to clone
 		startOfSyncLogic[currTerminator] = currTerminator;
 		return;
 	}
 
 	if (TMR) {
-		std::vector<Instruction*> syncInsts;
 		Value* op = currTerminator->getOperand(0);
 
 		if (!isCloned(op))
 			return;
 
-		Instruction* clone1 = dyn_cast<Instruction>(getClone(op).first);
-		Instruction* clone2 = dyn_cast<Instruction>(getClone(op).second);
+		Value* clone1 = dyn_cast<Instruction>(getClone(op).first);
+		Value* clone2 = dyn_cast<Instruction>(getClone(op).second);
+		// Also need to check if the operand is a function argument
+		if (!clone1) {
+			clone1 = dyn_cast<Argument>(getClone(op).first);
+			clone2 = dyn_cast<Argument>(getClone(op).second);
+		}
 		assert(clone1 && clone2 && "Instruction has clones");
 		// TODO: examine what could cause this assertion to fail
 
-		//Make sure we're inserting the right type of comparison
+		// Make sure we're inserting the right type of comparison
 		Instruction::OtherOps cmp_op;
 		CmpInst::Predicate cmp_eq;
 		Type* opType = op->getType();
 
-		//if it's a pointer type, is it ever safe to compare return values?
-		// could have been allocated with malloc()
-		// you would have to dereference the pointer to compare the insides of it
+		// If it's a pointer type, is it ever safe to compare return values?
+		// It could have been allocated with malloc()
+		// You would have to dereference the pointer to compare the insides of it
 		if (opType->isPointerTy()) {
 			if (verboseFlag) {
 				errs() << warn_string << " skipping synchronizing on return instruction of pointer type:\n";
-				errs() << " in " << currTerminator->getParent()->getName() << "\n";
+				errs() << " in '" << currTerminator->getParent()->getName()
+					   << "' of function '"
+					   << currTerminator->getParent()->getParent()->getName()
+					   << "'\n";
 			}
 			startOfSyncLogic[currTerminator] = currTerminator;
 			return;
 		}
-		//seems to be a problem with the "this" pointer
+		// seems to be a problem with the "this" pointer
 
 		if (opType->isFPOrFPVectorTy()) {
-			cmp_op = Instruction::OtherOps::FCmp;
-			cmp_eq = CmpInst::FCMP_UEQ; //Should this be OEQ instead?
-		} else if(opType->isIntOrIntVectorTy()) {
-			cmp_op = Instruction::OtherOps::ICmp;
-			cmp_eq = CmpInst::ICMP_EQ;
+			cmp_op = fpCmpType;
+			cmp_eq = fpCmpEqual;
+		} else if (opType->isIntOrIntVectorTy()) {
+			cmp_op = intCmpType;
+			cmp_eq = intCmpEqual;
 		} else if (opType->isStructTy()) {
 
-//			cmp_op = Instruction::OtherOps::FCmp;
-			//get the size of the struct
+			// get the size of the struct
 			StructType* sType = dyn_cast<StructType>(opType);
 			uint64_t nTypes = sType->getStructNumElements();
-			//load each of the inner values and get their types
-			//need to get the actual struct value to operate on
+			// load each of the inner values and get their types
+			// need to get the actual struct value to operate on
 			Value* op0 = currTerminator->getOperand(0);
-			//NOTE: will there ever be more than one operand to worry about?
+			// TODO: will there ever be more than one operand to worry about?
+			// Yes, perhaps nested struct types. Hmm...
 			Value* op1 = cloneMap[op0].first;
 			Value* op2 = cloneMap[op0].second;
 //			errs() << *op << "\n" << *op2 << "\n" << *op3 << "\n";
 			unsigned arr[] = {0};
 
-			//we'll need these later
+			// we'll need these later
 			SelectInst* eSel[nTypes];
 			int firstTime = 1;
 
 			for (int i = 0; i < nTypes; i+=1) {
-				//type to compare
+				// type to compare
 				auto eType = sType->getStructElementType(i);
 //				errs() << " Type " << i << ": " << *eType << "\n";
 
-				//index to extract
+				// index to extract
 				arr[0] = i;
 				auto extractIdx = ArrayRef<unsigned>(arr);
 
-				//names of instructions
+				// names of instructions
 				std::string extractName    = "getToCompare." + std::to_string(i);
 				std::string extractNameDWC = extractName + ".DWC";
 				std::string extractNameTMR = extractName + ".TMR";
-				std::string cmpName = "cmpElement." + std::to_string(i);
-				std::string selName = "selElement." + std::to_string(i);
+				std::string cmpName 	   = "cmpElement." + std::to_string(i);
+				std::string selName 	   = "selElement." + std::to_string(i);
 
-				//create the ExtractValueInst's
+				// create the ExtractValueInst's
 				ExtractValueInst* extract0 = ExtractValueInst::Create(op0, extractIdx, extractName);
 				ExtractValueInst* extract1 = ExtractValueInst::Create(op1, extractIdx, extractNameDWC);
 				ExtractValueInst* extract2 = ExtractValueInst::Create(op2, extractIdx, extractNameTMR);
 
-				//create the compare instructions
+				// create the compare instructions
 				if (eType->isFPOrFPVectorTy()) {
-					cmp_op = Instruction::OtherOps::FCmp;
-					cmp_eq = CmpInst::FCMP_UEQ;
-					//TODO: what's with the "ordered" and "unordered" stuff?
+					cmp_op = fpCmpType;
+					cmp_eq = fpCmpEqual;
 				} else if (eType->isIntOrIntVectorTy()) {
-					cmp_op = Instruction::OtherOps::ICmp;
-					cmp_eq = CmpInst::ICMP_EQ;
-					//compare equal - returns true (1) if equal
+					cmp_op = intCmpType;
+					cmp_eq = intCmpEqual;
+					// compare equal - returns true (1) if equal
 				} else if (eType->isPointerTy()) {
-					//we'll have to skip syncing on this value
-					//delete the extra instructions that aren't being used
+					// we'll have to skip syncing on this value
+					// delete the extra instructions that aren't being used
 					extract0->deleteValue();
 					extract1->deleteValue();
 					extract2->deleteValue();
@@ -692,7 +873,7 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 					assert(cmp_op && "valid comparison type assigned");
 				}
 
-				//only set the logic point if it's still valid
+				// only set the logic point if it's still valid
 				if (firstTime) {
 					firstTime = 0;
 					startOfSyncLogic[currTerminator] = extract0;
@@ -700,11 +881,11 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 				Instruction* eCmp = CmpInst::Create(cmp_op, cmp_eq, extract0, extract1, cmpName);
 				eSel[i] = SelectInst::Create(eCmp, extract0, extract2, selName);
 
-				//debug
+				// debug
 //				errs() << *extract0 << "\n" << *extract1 << "\n" << *extract2 << "\n";
 //				errs() << *eCmp << "\n" << *eSel[i] << "\n";
 
-				//insert the instructions into the basic block
+				// insert the instructions into the basic block
 				extract0->insertBefore(currTerminator);
 				extract1->insertAfter(extract0);
 				extract2->insertAfter(extract1);
@@ -712,14 +893,14 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 				eSel[i]->insertAfter(eCmp);
 			}
 
-			//we use the results of the SelectInst's to populate the final return value
+			// we use the results of the SelectInst's to populate the final return value
 			// which we will just use the existing first copy of the struct
 			for (int i = 0; i < nTypes; i+=1) {
-				//only sync if these are non-pointer values
+				// only sync if these are non-pointer values
 				if (eSel[i] == nullptr) {
 					continue;
 				}
-				//insert the select values into the first copy of the struct
+				// insert the select values into the first copy of the struct
 				arr[0] = i;
 				auto insertIdx = ArrayRef<unsigned>(arr);
 				std::string insertName = "voter.insert." + std::to_string(i);
@@ -727,9 +908,8 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 				insVal->insertBefore(currTerminator);
 //				errs() << *insVal << "\n";
 			}
-			//don't even have to change the Terminator!
-			//this part is so different from the other kinds that we skip all the rest of the stuff below
-			//TODO: what's up the with syncInsts list?
+			// don't even have to change the Terminator!
+			// this part is so different from the other kinds that we skip all the rest of the stuff below
 			return;
 
 		} else if (opType->isAggregateType()) {
@@ -751,7 +931,7 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 		}
 		assert(cmp_op && "return type not supported!");
 
-		Instruction* cmp = CmpInst::Create(cmp_op, cmp_eq, op, clone1, "vcmp", currTerminator);
+		Instruction* cmp = CmpInst::Create(cmp_op, cmp_eq, op, clone1, terminator_cmp_name, currTerminator);
 
 		startOfSyncLogic[currTerminator] = cmp;
 
@@ -759,9 +939,6 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 
 		currTerminator->replaceUsesOfWith(op, sel);
 
-		syncInsts.push_back(cmp);
-		syncInsts.push_back(sel);
-
 		// Too many cases to account for each possibility, this is removed
 		// assert(numUses == 2 && "Instruction only used in terminator synchronization");
 
@@ -770,7 +947,7 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 		//  TMR error count instructions.
 		insertTMRCorrectionCount(cmp, TMRErrorDetected, true);
 
-	} else {		//DWC
+	} else {		// DWC
 		if (!isCloned(currTerminator->getOperand(0))) {
 			return;
 		}
@@ -782,65 +959,67 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 		CmpInst::Predicate cmp_eq;
 		auto opType = currTerminator->getOperand(0)->getType();
 
-		//see comments in TMR section about synchronizing on pointer values
+		// see comments in TMR section about synchronizing on pointer values
 		if (opType->isPointerTy()) {
 			if (verboseFlag) {
 				errs() << warn_string << " skipping synchronizing on return instruction of pointer type:\n";
-				errs() << " in " << currTerminator->getParent()->getName() << "\n";
+				errs() << " in '" << currTerminator->getParent()->getName()
+					   << "' of function '"
+					   << currTerminator->getParent()->getParent()->getName()
+					   << "'\n";
 			}
 			return;
 		}
 
 		if (opType->isFPOrFPVectorTy()) {
-			cmp_op = Instruction::OtherOps::FCmp;
-			cmp_eq = CmpInst::FCMP_UEQ; //Should this be OEQ instead?
+			cmp_op = fpCmpType;
+			cmp_eq = fpCmpEqual;
 		} else if (opType->isIntOrIntVectorTy()) {
-			cmp_op = Instruction::OtherOps::ICmp;
-			cmp_eq = CmpInst::ICMP_EQ;
+			cmp_op = intCmpType;
+			cmp_eq = intCmpEqual;
 		} else if (opType->isStructTy()) {
-			//get the size of the struct
+			// get the size of the struct
 			StructType* sType = dyn_cast<StructType>(opType);
 			uint64_t nTypes = sType->getStructNumElements();
-			//load each of the inner values and get their types
+			// load each of the inner values and get their types
 			Value* op0 = currTerminator->getOperand(0);
 			Value* op1 = cloneMap[op0].first;
 
-			//we'll need these later
+			// we'll need these later
 			unsigned arr[] = {0};
-//			CmpInst* eCmp[nTypes];
 			std::vector<CmpInst*> eCmp;
 			int firstTime = 1;
 			Instruction* syncPointLater;
 
 			for (int i = 0; i < nTypes; i+=1) {
-				//type to compare
+				// type to compare
 				auto eType = sType->getStructElementType(i);
 
-				//index to extract
+				// index to extract
 				arr[0] = i;
 				auto extractIdx = ArrayRef<unsigned>(arr);
 
-				//names of instructions
+				// names of instructions
 				std::string extractName    = "getToCompare." + std::to_string(i);
 				std::string extractNameDWC = extractName + ".DWC";
 				std::string cmpName = "cmpElement." + std::to_string(i);
 
-				//create the ExtractValueInst's
+				// create the ExtractValueInst's
 				ExtractValueInst* extract0 = ExtractValueInst::Create(op0, extractIdx, extractName);
 				ExtractValueInst* extract1 = ExtractValueInst::Create(op1, extractIdx, extractNameDWC);
 
-				//create the compare instructions
+				// create the compare instructions
 				if (eType->isFPOrFPVectorTy()) {
-					cmp_op = Instruction::OtherOps::FCmp;
-					cmp_eq = CmpInst::FCMP_UNE;
+					cmp_op = fpCmpType;
+					cmp_eq = fpCmpNotEqual;
+					// see the note in syncTerminator() about comparison predicate types
 				} else if (eType->isIntOrIntVectorTy()) {
-					cmp_op = Instruction::OtherOps::ICmp;
-					cmp_eq = CmpInst::ICMP_NE;
-					//compare not equal - returns true (1) if not equal, so expect all to be false (0)
+					cmp_op = intCmpType;
+					cmp_eq = intCmpNotEqual;
+					// compare not equal - returns true (1) if not equal, so expect all to be false (0)
 				} else if (eType->isPointerTy()) {
-					//we'll have to skip syncing on this value
-					//do the extract instructions just disappear if we don't insert them anywhere?
-					//nope!
+					// we'll have to skip syncing on this value
+					// delete the unused extract instructions
 					extract0->deleteValue();
 					extract1->deleteValue();
 					continue;
@@ -849,33 +1028,33 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 					assert(cmp_op && "valid comparison type assigned");
 				}
 
-				//only set the logic point if it's still valid
+				// only set the logic point if it's still valid
 				if (firstTime) {
 					firstTime = 0;
 					syncPointLater = extract0;
 				}
 				eCmp.push_back(CmpInst::Create(cmp_op, cmp_eq, extract0, extract1, cmpName));
 
-				//debug
+				// debug
 //				errs() << *extract0 << "\n" << *extract1 << "\n" << *eCmp[i] << "\n";
 
-				//insert the instructions into the basic block
+				// insert the instructions into the basic block
 				extract0->insertBefore(currTerminator);
 				extract1->insertAfter(extract0);
 				eCmp.back()->insertAfter(extract1);
 			}
 
-			//this doesn't help with the below anymore, but still a good check
+			// this doesn't help with the below anymore, but still a good check
 			assert(nTypes > 1);
 
 			Instruction* cmpInst;
-			cmp_op = Instruction::OtherOps::ICmp;
-			cmp_eq = CmpInst::ICMP_EQ;
+			cmp_op = intCmpType;
+			cmp_eq = intCmpEqual;
 
 			if (eCmp.size() > 2) {
-				//final reduce & compare
-				//use OR because if any one of them is 1, it will get set to 1 and stay that way
-				//there will be at least 2, so OR them together first
+				// final reduce & compare
+				// use OR because if any one of them is 1, it will get set to 1 and stay that way
+				// there will be at least 2, so OR them together first
 				BinaryOperator* acc;
 				int i = 0;
 				do {
@@ -885,26 +1064,26 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 					i+=1;
 //					errs() << *acc << "\n";
 				} while (i < eCmp.size()-1);
-				//compare against 0
+				// compare against 0
 				ConstantInt* compareAgainst =
 						ConstantInt::get(dyn_cast<IntegerType>(acc->getType()), 0, false);
 				cmpInst = CmpInst::Create(cmp_op, cmp_eq, acc, compareAgainst);
 				cmpInst->insertBefore(currTerminator);
 //				errs() << *cmpInst << "\n";		errs() << *cmpInst->getParent() << "\n";
 			} else if (eCmp.size() == 1) {
-				//only one element - then just compare the
+				// only one element - then just compare the
 				ConstantInt* compareAgainst =
 						ConstantInt::get(dyn_cast<IntegerType>(eCmp.at(0)->getType()), 0, false);
 				cmpInst = CmpInst::Create(cmp_op, cmp_eq, eCmp.at(0), compareAgainst);
 				cmpInst->insertBefore(currTerminator);
 			} else {
-				//nothing compared because they're all pointers
-				//so there's no synchronization necessary (?)
+				// nothing compared because they're all pointers
+				// so there's no synchronization necessary (?)
 				syncPoints.push_back(currTerminator);
 				return;
 			}
 
-			//split the block
+			// split the block
 			Function* currFn = currTerminator->getParent()->getParent();
 			Instruction* lookAtLater = cmpInst->getPrevNode();
 			assert(lookAtLater);
@@ -933,31 +1112,33 @@ void dataflowProtection::syncTerminator(TerminatorInst* currTerminator, GlobalVa
 	}
 }
 
-void dataflowProtection::splitBlocks(Instruction* I, BasicBlock* errBlock) {
-	//Split at I, return a pointer to the new error block
 
-	//Create a copy of tmpCmpInst that will reside in the current basic block
+//#define DEBUG_SIMD_SYNCING
+Instruction* dataflowProtection::splitBlocks(Instruction* I, BasicBlock* errBlock) {
+	// Split at I, return a pointer to the new instruction that was invalidated
+
+	// Create a copy of tmpCmpInst that will reside in the current basic block
 	Instruction* newCmpInst = I->clone();
 	newCmpInst->setName("syncCheck.");
 	newCmpInst->insertBefore(I);
 
-	//Create the continuation of the current basic block
+	// Create the continuation of the current basic block
 	BasicBlock* originalBlock = I->getParent();
 	const Twine& name = originalBlock->getParent()->getName() + ".cont";
 	BasicBlock* newBlock = originalBlock->splitBasicBlock(I, name);
 
-	//The compare instruction is copied to the new basicBlock by calling split, so we remove it
+	// The compare instruction is copied to the new basicBlock by calling split, so we remove it
 	I->eraseFromParent();
 
-	//Delete originalBlock's terminator
+	// Delete originalBlock's terminator
 	originalBlock->getTerminator()->eraseFromParent();
-	//create conditional branch
-	//there are some times it will try to branch on a vector value.
-	//Instead need to insert additional compare logic. Only necessary with DWC.
+	// create conditional branch
+	// there are some times it will try to branch on a vector value.
+	// Instead need to insert additional compare logic. Only necessary with DWC.
 	if (!newCmpInst->getType()->isIntegerTy(1) && !TMR) {
-		//it is possible that the value being compared is a vector type instead of a basic type
+		// it is possible that the value being compared is a vector type instead of a basic type
 
-		//need to sign extend the boolean vector
+		// need to sign extend the boolean vector
 		int numElements = newCmpInst->getType()->getVectorNumElements();
 		Type* newVecType = VectorType::get(IntegerType::getInt16Ty(originalBlock->getContext()), numElements);
 
@@ -965,27 +1146,33 @@ void dataflowProtection::splitBlocks(Instruction* I, BasicBlock* errBlock) {
 		signExt->setName("syncExt");
 		signExt->insertAfter(newCmpInst);
 
-		//what size should the new type be? 16 bits * the number of elements
+		// what size should the new type be? 16 bits * the number of elements
+		// TODO: why 16 bits?
 		int vecSize = numElements * 16;
-		//get something to compare against
+		// get something to compare against
 		IntegerType* intType = IntegerType::get(originalBlock->getContext(), vecSize);
 		Constant* newIntVec = Constant::getAllOnesValue(intType);
 
-		//bitcast the vector to a scalar value
+		// bitcast the vector to a scalar value
 		BitCastInst* vecToScalar = new BitCastInst(signExt, intType);
 		vecToScalar->setName("b_cast");
 		vecToScalar->insertAfter(signExt);
 
-		//create one more compare instruction
-		CmpInst* nextCmpInst = CmpInst::Create(Instruction::OtherOps::ICmp,CmpInst::ICMP_EQ,vecToScalar,newIntVec);
+#ifdef DEBUG_SIMD_SYNCING
+		errs() << "SExt: " << *signExt << "\n";
+		errs() << "Bcast: " << *vecToScalar << "\n";
+#endif
+
+		// create one more compare instruction
+		CmpInst* nextCmpInst = CmpInst::Create(intCmpType, intCmpEqual, vecToScalar, newIntVec);
 		nextCmpInst->setName("simdSync");
 		nextCmpInst->insertAfter(vecToScalar);
 
-		//create terminator
+		// create terminator
 		BranchInst* newTerm;
 		newTerm = BranchInst::Create(newBlock, errBlock, nextCmpInst, originalBlock);
 		startOfSyncLogic[newTerm] = newCmpInst;
-		//this map will help with moving things later if the code is segmented
+		// this map will help with moving things later if the code is segmented
 		simdMap[newCmpInst] = std::make_tuple(signExt, vecToScalar, nextCmpInst);
 
 	} else {
@@ -994,66 +1181,113 @@ void dataflowProtection::splitBlocks(Instruction* I, BasicBlock* errBlock) {
 		startOfSyncLogic[newTerm] = newCmpInst;
 	}
 
+	// if the original block is already in the map, replace the entry with
+	//  the new block
+	if (syncCheckMap.find(originalBlock) != syncCheckMap.end()) {
+		syncCheckMap[newBlock] = syncCheckMap[originalBlock];
+	}
+
 	syncCheckMap[originalBlock] = newCmpInst;
+	return newCmpInst;
 }
 
+
 //----------------------------------------------------------------------------//
 // DWC error handling function/blocks
 //----------------------------------------------------------------------------//
 void dataflowProtection::insertErrorFunction(Module &M, int numClones) {
 	Type* t_void = Type::getVoidTy(M.getContext());
+	/*
+	 * There are 3 scenarios for inserting an error function:
+	 * 1) it already exists, defined by application programmer
+	 * 2) it does not exist, but -noMain flag is passed
+	 * 3) it does not exist
+	 */
 
-	//have to update fault detection block name so it's unique to this module
-	// that way the output code can be included in a library file
-//	TODO: check if it exists. If it does, use it, otherwise, make random
-//	std::string random_suffix = getRandomString(12);
-//	fault_function_name += random_suffix;
-
+	// Will be created if either 1) DWC or 2) Stack Protection
 	Constant* c;
-	if(numClones==2)
+	if ( (numClones == 2) || (protectStackFlag) ) {
 		c = M.getOrInsertFunction(fault_function_name, t_void, NULL);
-	else
+	} else {
 		return;
+	}
 
 	Function* errFn = dyn_cast<Function>(c);
 	assert(errFn && "Fault detection function is non-void");
+	// TODO: not sure if adding this attribute is necessary, but check some aggressive optimizations
 	errFn->addFnAttr(Attribute::get(M.getContext(), "noinline"));
 
-	//If the user has declared their own error handler, use that
-	if( errFn->getBasicBlockList().size() != 0){
+	/*
+	 * Scenario 1:
+	 * The user has declared their own error handler, use that.
+	 */
+	if ( errFn->getBasicBlockList().size() != 0) {
 		if (verboseFlag) errs() << info_string << " Found existing DWC error handler function\n";
 		return;
 	}
 
-	//If not, create our own
+	/*
+	 * Scenario 2:
+	 * Error handler will be added later.
+	 * We are to mark the function as "extern" and return.
+	 */
+	if (noMainFlag) {
+		errFn->setLinkage(GlobalValue::LinkageTypes::ExternalLinkage);
+		return;
+	}
+
+	/*
+	 * Scenario 3:
+	 * Error handler does not exist, so we need to create one.
+	 * Change the fault detection block name so it's unique to this module,
+	 *  in case someone tries to link this against another object file later.
+	 * Randomize the name, that way the output code can be included in a library file.
+	 */
+
+	// First, remove the function we created already
+	errFn->removeFromParent();
+
+	// Then, name the new one. Have to change global name because is used elsewhere
+	std::string random_suffix = getRandomString(12);
+	fault_function_name += random_suffix;
+	c = M.getOrInsertFunction(fault_function_name, t_void, NULL);
+	errFn = dyn_cast<Function>(c);
+
+	// reference to "abort" call
 	Constant* abortC = M.getOrInsertFunction("abort", t_void, NULL);
 	Function* abortF = dyn_cast<Function>(abortC);
 	assert(abortF && "Abort function detected");
+	// TODO: on what platform would an "abort" function not exist?
+	// in this case, the application programmer would have to provide their own error function
 
-	//Create a basic block that calls abort
+	// Create a basic block that calls abort
 	BasicBlock* bb = BasicBlock::Create(M.getContext(), Twine("entry"), errFn, NULL);
 	CallInst* new_abort = CallInst::Create(abortF, "", bb);
 	UnreachableInst* term = new UnreachableInst(M.getContext(), bb);
 }
 
+
 void dataflowProtection::createErrorBlocks(Module &M, int numClones) {
 	Type* t_void = Type::getVoidTy(M.getContext());
 
-	//Create an error handler block for each function - they can't share one
+	// Create an error handler block for each function - they can't share one
+	// Will be created if either 1) DWC or 2) Stack Protection
 	Constant* c;
-	if(numClones == 2)
+	if ( (numClones == 2) || (protectStackFlag) ) {
 		c = M.getOrInsertFunction(fault_function_name, t_void, NULL);
-	else
+	} else {
 		return;
+	}
 
 	Function* errFn = dyn_cast<Function>(c);
 	assert(errFn && "error function exists");
 
+	// TODO: should this iterate over fnsToClone instead?
 	for (auto & F : M) {
 		if (F.getBasicBlockList().size() == 0)
 			continue;
 
-		if(isISR(F))
+		if (isISR(F))
 			continue;
 
 		BasicBlock* originalBlock = &(F.back());
@@ -1080,9 +1314,9 @@ void dataflowProtection::createErrorBlocks(Module &M, int numClones) {
 
 		errBlockMap[&F] = errBlock;
 	}
-
 }
 
+
 //----------------------------------------------------------------------------//
 // TMR error detection
 //----------------------------------------------------------------------------//
@@ -1098,20 +1332,16 @@ void dataflowProtection::insertTMRDetectionFlag(Instruction* cmpInst, GlobalVari
 	Value* clone1 = getClone(orig).first;
 	Value* clone2 = getClone(orig).second;
 
-	//Insert additional OR operations
-	Instruction::OtherOps cmp_op;
-	CmpInst::Predicate cmp_eq;
-	if (orig->getType()->isFPOrFPVectorTy()) {
-		cmp_op = Instruction::OtherOps::FCmp;
-		cmp_eq = CmpInst::FCMP_UEQ; //Should this be OEQ instead?
-	} else {
-		cmp_op = Instruction::OtherOps::ICmp;
-		cmp_eq = CmpInst::ICMP_EQ;
-	}
+	// get the correct comparison type for this instruction
+	Type* opType = orig->getType();
+	Instruction::OtherOps cmp_op = getComparisonType(opType);
+	CmpInst::Predicate cmp_eq = getComparisonPredicate(opType);
+
+	// Insert additional OR operations
 	Instruction* cmpInst2 = CmpInst::Create(cmp_op, cmp_eq, orig, clone2, "cmp", nextInst);
 	BinaryOperator* andCmps = BinaryOperator::CreateAnd(cmpInst, cmpInst2, "cmpReduction", nextInst);
 
-	//Insert a load, or after the sel inst
+	// Insert a load, or after the sel inst
 	LoadInst* LI = new LoadInst(TMRErrorDetected, "errFlagLoad", nextInst);
 	CastInst* castedCmp = CastInst::CreateZExtOrBitCast(andCmps, LI->getType(), "extendedCmp", LI);
 
@@ -1119,6 +1349,8 @@ void dataflowProtection::insertTMRDetectionFlag(Instruction* cmpInst, GlobalVari
 	StoreInst* SI = new StoreInst(BI, TMRErrorDetected, nextInst);
 }
 
+
+//#define DEBUG_INSERT_TMR_COUNT
 void dataflowProtection::insertTMRCorrectionCount(Instruction* cmpInst, GlobalVariable* TMRErrorDetected, bool updateSyncPoint) {
 	assert(cmpInst && "valid compare instruction");
 	assert(TMRErrorDetected && "valid TMR count global");
@@ -1130,6 +1362,17 @@ void dataflowProtection::insertTMRCorrectionCount(Instruction* cmpInst, GlobalVa
 		return;
 	}
 
+#ifdef DEBUG_INSERT_TMR_COUNT
+	int flag = 0;
+	if (cmpInst->getParent()->getName() == "if.then.i3" &&
+			cmpInst->getParent()->getParent()->getName() == "uxQueueSpacesAvailable")
+	{
+		flag = 1;
+		errs() << "Inserting TMR correction counters into BB:\n";
+		errs() << *cmpInst->getParent() << "\n";
+	}
+#endif
+
 	Instruction* nextInst = cmpInst->getNextNode();
 	// value being synchronized on
 	Value* orig = dyn_cast<Value>(cmpInst->getOperand(0));
@@ -1138,16 +1381,12 @@ void dataflowProtection::insertTMRCorrectionCount(Instruction* cmpInst, GlobalVa
 	Value* clone1 = getClone(orig).first;
 	Value* clone2 = getClone(orig).second;
 
-	//Insert additional OR operations
-	Instruction::OtherOps cmp_op;
-	CmpInst::Predicate cmp_eq;
-	if (orig->getType()->isFPOrFPVectorTy()) {
-		cmp_op = Instruction::OtherOps::FCmp;
-		cmp_eq = CmpInst::FCMP_UEQ; //Should this be OEQ instead?
-	} else {
-		cmp_op = Instruction::OtherOps::ICmp;
-		cmp_eq = CmpInst::ICMP_EQ;
-	}
+	// get the correct comparison type for this instruction
+	Type* opType = orig->getType();
+	Instruction::OtherOps cmp_op = getComparisonType(opType);
+	CmpInst::Predicate cmp_eq = getComparisonPredicate(opType);
+
+	// Insert additional OR operations
 	// compare the original with the 2nd clone
 	Instruction* cmpInst2 = CmpInst::Create(cmp_op, cmp_eq, orig, clone2, "cmp", nextInst);
 
@@ -1173,13 +1412,13 @@ void dataflowProtection::insertTMRCorrectionCount(Instruction* cmpInst, GlobalVa
 			"errorHandler." + Twine(originalBlock->getParent()->getName()),
 			originalBlock->getParent(), originalBlock);
 
-	//Populate new block -- load global counter, increment, store
+	// Populate new block -- load global counter, increment, store
 	LoadInst* LI = new LoadInst(TMRErrorDetected, "errFlagLoad", errBlock);
 	Constant* one = ConstantInt::get(LI->getType(), 1, false);
 	BinaryOperator* BI = BinaryOperator::CreateAdd(LI, one, "errFlagAdd", errBlock);
 	StoreInst* SI = new StoreInst(BI, TMRErrorDetected, errBlock);
 
-	//Split blocks, deal with terminators
+	// Split blocks, deal with terminators
 	const Twine& name = originalBlock->getParent()->getName() + ".cont";
 	// the "vote" instruction is the first one in the new BB
 	BasicBlock* originalBlockContinued = originalBlock->splitBasicBlock(nextInst, name);
@@ -1197,7 +1436,13 @@ void dataflowProtection::insertTMRCorrectionCount(Instruction* cmpInst, GlobalVa
 		newSyncPoints.push_back(condGoToErrBlock);
 	}
 
-	//Update how to divide up blocks
+#ifdef DEBUG_INSERT_TMR_COUNT
+	if (flag) {
+		flag = 0;
+	}
+#endif
+
+	// Update how to divide up blocks
 	std::vector<Instruction*> syncHelperList;
 	syncHelperMap[originalBlock] = syncHelperList;
 	syncHelperMap[originalBlock].push_back(cmpInst);
@@ -1207,25 +1452,27 @@ void dataflowProtection::insertTMRCorrectionCount(Instruction* cmpInst, GlobalVa
 	startOfSyncLogic[condGoToErrBlock] = cmpInst;
 }
 
+
 // invalidates the first two arguments
 void dataflowProtection::insertVectorTMRCorrectionCount(Instruction* cmpInst, Instruction* cmpInst2, GlobalVariable* TMRErrorDetected) {
-	//don't support pointers (yet)
+	// don't support pointers (yet)
 	if (cmpInst->getType()->isPtrOrPtrVectorTy()) {
 		assert(false && "not supporting TMR detector with vectors of pointers");
 	}
 
-	//change the comparisons to be NotEqual so we can add the results for a total error count
+	// change the comparisons to be NotEqual so we can add the results for a total error count
 	Instruction::OtherOps cmp_op;
 	CmpInst::Predicate cmp_neq;
 	Type* vType = cmpInst->getOperand(0)->getType();
 	if (vType->isIntOrIntVectorTy()) {
-		//integer type
-		cmp_op = Instruction::OtherOps::ICmp;
-		cmp_neq = CmpInst::ICMP_NE;
+		// integer type
+		cmp_op = intCmpType;
+		cmp_neq = intCmpNotEqual;
 	} else if (vType->isFPOrFPVectorTy()) {
-		//floating point type
-		cmp_op = Instruction::OtherOps::FCmp;
-		cmp_neq = CmpInst::FCMP_UNE;
+		// floating point type
+		cmp_op = fpCmpType;
+		cmp_neq = fpCmpNotEqual;
+		// see the note in syncTerminator() about comparison predicate types
 	} else {
 		assert(false && "unsupported vector type");
 	}
@@ -1238,7 +1485,7 @@ void dataflowProtection::insertVectorTMRCorrectionCount(Instruction* cmpInst, In
 	cmpInst2->replaceAllUsesWith(newCmpInst2);
 	cmpInst2->eraseFromParent();
 
-	//need to OR the two cmp's first
+	// need to OR the two cmp's first
 	BinaryOperator* cmpOr = BinaryOperator::Create(Instruction::BinaryOps::Or, \
 			newCmpInst, newCmpInst2, "reduceOr");
 	cmpOr->insertAfter(newCmpInst2);
@@ -1247,21 +1494,21 @@ void dataflowProtection::insertVectorTMRCorrectionCount(Instruction* cmpInst, In
 	IRBuilder<> builder(thisBlock);
 
 	VectorType* typ = dyn_cast<VectorType>(newCmpInst->getType());
-	//have to extract each element
+	// have to extract each element
 	uint64_t nTypes = typ->getNumElements();
 	VectorType* newVType = VectorType::get(TMRErrorDetected->getValueType(), nTypes);
 
-	//zero-extend the cmpOr result to be the same size as the TMR error counter
+	// zero-extend the cmpOr result to be the same size as the TMR error counter
 	CastInst* zext = CastInst::Create(Instruction::CastOps::ZExt, cmpOr, newVType);
 	zext->insertAfter(cmpOr);
 
-	//the alternative to this is to implement a similar approach as is found in how
+	// the alternative to this is to implement a similar approach as is found in how
 	// syncTerminator() deals with struct types
 	CallInst* redAdd = builder.CreateAddReduce(zext);
 	redAdd->moveAfter(zext);
 
-	//add this to the global
-	//if there were no errors, then it's just adding 0
+	// add this to the global
+	// if there were no errors, then it's just adding 0
 	LoadInst* LI = new LoadInst(TMRErrorDetected, "errFlagLoad", thisBlock);
 	BinaryOperator* BI = BinaryOperator::CreateAdd(LI, redAdd, "errFlagAdd", thisBlock);
 	StoreInst* SI = new StoreInst(BI, TMRErrorDetected, thisBlock);
@@ -1270,3 +1517,284 @@ void dataflowProtection::insertVectorTMRCorrectionCount(Instruction* cmpInst, In
 	return;
 }
 
+
+//----------------------------------------------------------------------------//
+// Stack Protection
+//----------------------------------------------------------------------------//
+/*
+ * Helper function that will generate the name of a global.
+ */
+std::string getSavedFrameGlobalName(StringRef FunctionName, std::string type) {
+	std::string fName = FunctionName.str();
+	std::string tempName = "__frm_" + fName + "_" + type;
+	return tempName;
+}
+
+/*
+ * Helper function for insertStackProtection.
+ * Load and cast a global variable.
+ * Returns a reference to the cast instruction.
+ */
+PtrToIntInst* helperCallStackFunc(Function* toCall,	Instruction* insertBefore,
+		ArrayRef<Value*>* commonArgs, Type* glblType, std::string nameMod)
+{
+	CallInst* callInst = CallInst::Create(
+		toCall,						/* FunctionCallee */
+		*commonArgs,				/* Args */
+		Twine("call" + nameMod)		/* name */
+	);
+	callInst->insertBefore(insertBefore);
+	// have to cast to int first
+	PtrToIntInst* castCall = new PtrToIntInst(
+		callInst,					/* Value to cast */
+		glblType,		 			/* new type */
+		Twine("cast" + nameMod)		/* name */
+	);
+	castCall->insertBefore(insertBefore);
+	return castCall;
+}
+
+
+#define PROTECT_RETURN_ADDRESS
+#define ADDR_OF_RET_ADDR
+/*
+ * This function will insert instructions that:
+ * - Save return address and stack pointer at the beginning of a function
+ * - Vote on saved and actual RA and SP and the end of a function
+ * Saving the values also on the stack has the benefit of the saved value
+ *  acting somewhat like a canary.
+ */
+void dataflowProtection::insertStackProtection(Module& M) {
+	if (!protectStackFlag) {
+		return;
+	}
+
+	// query the module to see how big the pointers are for the target
+	// http://llvm.org/docs/LangRef.html#data-layout
+	const DataLayout& layout = M.getDataLayout();
+	unsigned int ptrSz = layout.getPointerSize();
+	unsigned int addrSpace = layout.getAllocaAddrSpace();
+	// get target triple to see if we can support addressofreturnaddress
+	const std::string targetTriple = M.getTargetTriple();
+	// extract target architecture
+	std::string delimiter = "-";
+	std::string targetArch = targetTriple.substr(0, targetTriple.find(delimiter));
+	if (verboseFlag) {
+		errs() << "Target arch is " << targetArch << "\n";
+	}
+	// does this target support getting the address of the return address?
+	bool supportsAddrRetAddr = false;
+	// Supposedly supports x86_64 and aarch64, but I guess not all aarch64,
+	//  because didn't work for ultra96 board.
+	if ( (targetArch == "x86_64") ) {
+		supportsAddrRetAddr = true;
+	}
+
+	// types needed
+	Type* voidPtrType = PointerType::get(
+			IntegerType::get(M.getContext(), 8), addrSpace);
+	Type* int32Type = Type::getInt32Ty(M.getContext());
+	std::vector<Type*> bit32Type = std::vector<Type*> (1, int32Type);
+	Type* glblType = IntegerType::get(M.getContext(), ptrSz * 8);
+	FunctionType* voidPtrFuncRetType = FunctionType::get(
+		voidPtrType,		/* result type */
+		bit32Type,			/* params types */
+		false				/* isVarArg */
+	);
+
+	// common argument lists for function calls
+	ArrayRef<Type*>* noArgsType = new ArrayRef<Type*>(std::vector<Type*>());
+	ArrayRef<Value*>* noArgs = new ArrayRef<Value*>(std::vector<Value*>());
+	// APInt* intOne = new APInt(32, 1);
+	// Value* oneVal  = dyn_cast<Value>(Constant::getIntegerValue(int32Type, *intOne));
+	Value* zeroVal = dyn_cast<Value>(ConstantInt::getNullValue(int32Type));
+	std::vector<Value*> argVec = std::vector<Value*> (1, zeroVal);
+	ArrayRef<Value*>* commonArgs = new ArrayRef<Value*> (argVec);
+
+	#ifdef PROTECT_RETURN_ADDRESS
+	// make a reference to the functions that get the values
+	Constant* constRetAddrFunc = M.getOrInsertFunction(
+			"llvm.returnaddress", voidPtrFuncRetType);
+	Function* getRetAddrFunc = dyn_cast<Function>(constRetAddrFunc);
+	assert(getRetAddrFunc && "return address function defined");
+	#endif
+
+	#ifdef ADDR_OF_RET_ADDR
+	Function* addrOfRetAddrFunc;
+	if ( supportsAddrRetAddr && TMR ) {
+		addrOfRetAddrFunc = Intrinsic::getDeclaration(
+				&M, Intrinsic::addressofreturnaddress, *noArgsType);
+	}
+	#endif
+
+	// Iterate over the list of protected functions
+	for (auto F : fnsToClone) {
+		// skip some things
+		if (isCoarseGrainedFunction(F->getName())) {
+			continue;
+		}
+
+		// get first block
+		BasicBlock* entryBB = &F->getEntryBlock();
+		assert(entryBB && "no entry block");
+		// get first instruction point
+		Instruction* firstSpot = entryBB->getFirstNonPHIOrDbgOrLifetime();
+
+		// create local variables to store things in this function
+		#ifdef PROTECT_RETURN_ADDRESS
+		// get the return address
+		std::string retAddrName = getSavedFrameGlobalName(
+				F->getName(), "retAddr");
+		AllocaInst* retAddrLcl = new AllocaInst(
+			glblType,			/* type */
+			addrSpace,			/* address space */
+			retAddrName,		/* name */
+			firstSpot			/* InsertBefore */
+		);
+		// call the function that gets the return address
+		PtrToIntInst* castRetVal = helperCallStackFunc(getRetAddrFunc,
+				firstSpot, commonArgs, glblType, "RetVal");
+		// save the current return address to the global
+		StoreInst* storeRetAddr = new StoreInst(
+			castRetVal,			/* Value to store */
+			retAddrLcl,			/* Addr to store in */
+			firstSpot			/* InsertBefore */
+		);
+		AllocaInst* retAddrLcl_TMR;
+		if ( supportsAddrRetAddr && TMR ) {
+			// 2nd copy
+			retAddrLcl_TMR = new AllocaInst(glblType, addrSpace,
+					retAddrName + "_TMR", firstSpot);
+			// TODO: do we need to call it more than once?
+			PtrToIntInst* castRetVal_TMR = helperCallStackFunc(getRetAddrFunc,
+					firstSpot, commonArgs, glblType, "RetVal_TMR");
+			StoreInst* storeRetAddr = new StoreInst(castRetVal_TMR,
+					retAddrLcl_TMR, firstSpot);
+		}
+		#endif
+
+		// need to find all of the return points
+		std::vector<Instruction*> returns;
+		for (auto & bb : *F) {
+			auto term = bb.getTerminator();
+			if (ReturnInst* retTerm = dyn_cast<ReturnInst>(term)) {
+				if (startOfSyncLogic.find(term) != startOfSyncLogic.end()) {
+					returns.push_back(startOfSyncLogic[term]);
+				} else {
+					returns.push_back(retTerm);
+				}
+			}
+		}
+
+		// Make sure an error block exists for this function
+		// Should have been created during createErrorBlocks() call
+		BasicBlock* errBlock = errBlockMap[F];
+		assert(errBlock && "error block exists");
+		// TODO: might not always need these
+
+		// At all of the return points, check return address
+		for (auto ret : returns) {
+			// Call the function again to get return address from stack
+			#ifdef PROTECT_RETURN_ADDRESS
+			PtrToIntInst* castRetValAgain = helperCallStackFunc(getRetAddrFunc,
+				ret, commonArgs, glblType, "RetVal");
+
+			// load at the end
+			LoadInst* loadRet = new LoadInst(
+				glblType, 			/* type */
+				retAddrLcl,			/* value */
+				"loadRetAddr"		/* name */
+			);
+			// for some reason, have to do this separate from the constructor
+			loadRet->insertBefore(ret);
+
+			// compare
+			CmpInst* cmp0 = CmpInst::Create(
+				intCmpType,			/* compare type */
+				intCmpEqual,		/* opcode */
+				castRetValAgain,	/* val0 */
+				loadRet,			/* val1 */
+				"cmpRet",			/* name */
+				ret					/* InsertBefore */
+			);
+
+			#ifdef ADDR_OF_RET_ADDR
+			// Can vote and store
+			if (supportsAddrRetAddr && TMR) {
+				// Load the 2nd copy of the global
+				LoadInst* loadRet2 = new LoadInst(glblType,
+						retAddrLcl_TMR, "loadRetAddr_TMR");
+				loadRet2->insertBefore(ret);
+				// majority wins
+				SelectInst* sel = SelectInst::Create(cmp0, castRetValAgain,
+						loadRet2, tmr_vote_inst_name, ret);
+				// Get the address of the return address
+				CallInst* callAddrRetAddr = CallInst::Create(
+					addrOfRetAddrFunc,			/* FunctionCallee */
+					*noArgs,					/* Args */
+					Twine("callAddrRetVal")		/* name */
+				);
+				callAddrRetAddr->insertBefore(ret);
+				// Bitcast to the right size
+				BitCastInst* castAddrRetAddr = new BitCastInst(
+					callAddrRetAddr,			/* value */
+					glblType->getPointerTo(),	/* type */
+					"castAddrRetVal",			/* name */
+					ret							/* InsertBefore */
+				);
+				// Store at the spot in the stack
+				StoreInst* storeAddrRetAddr = new StoreInst(
+						sel, castAddrRetAddr, ret);
+				// fix sync point stuff
+				// since the current instruction isn't guarunteed to be a terminator
+				TerminatorInst* curTerminator = ret->getParent()->getTerminator();
+				Instruction* callRetAgain = castRetValAgain->getPrevNode();
+				startOfSyncLogic[curTerminator] = callRetAgain;
+				syncPoints.push_back(curTerminator);
+			}
+
+			else {
+			#endif /* ADDR_OF_RET_ADDR */
+			// compare and abort if error
+			Instruction* newCmp0 = splitBlocks(cmp0, errBlock);
+			// We have to mark the terminator of the new block so that
+			//  instructions don't get moved to the wrong spot
+			BasicBlock* newBlock0 = newCmp0->getParent();
+			TerminatorInst* newTerm0 = newBlock0->getTerminator();
+			Instruction* callRetAgain = castRetValAgain->getPrevNode();
+			startOfSyncLogic[newTerm0] = callRetAgain;
+			syncPoints.push_back(newTerm0);
+			#ifdef ADDR_OF_RET_ADDR
+			}
+			#endif /* ADDR_OF_RET_ADDR */
+			#endif /* PROTECT_RETURN_ADDRESS */
+		}
+	}
+	/*
+	 * NOTES:
+	 *
+	 * https://llvm.org/docs/LangRef.html#llvm-addressofreturnaddress-intrinsic
+	 * @llvm.addressofreturnaddress
+	 * gives pointer to the place in the stack frame where the return address is stored
+	 * unfortunately, only implemented for x86 and aarch64
+	 *
+	 * https://llvm.org/docs/LangRef.html#llvm-sponentry-intrinsic
+	 * @llvm.sponentry
+	 * the value of the stack pointer at the entry of the current function
+	 * not available in LLVM 7.0, first in 8.0
+	 * https://github.com/llvm/llvm-project/blob/release/8.x/llvm/include/llvm/IR/Intrinsics.td
+	 *
+	 * https://llvm.org/docs/LangRef.html#llvm-frameaddress-intrinsic
+	 * @llvm.frameaddress
+	 * the value of the frame pointer of the specified stack frame
+	 * pretty much don't use it except for the current frame
+	 *
+	 * https://llvm.org/docs/LangRef.html#llvm-read-register-and-llvm-write-register-intrinsics
+	 * @llvm.read_register, @llvm.write_register
+	 * this doesn't seem portable enough to be included in COAST, but it would be really useful
+	 *
+	 * https://llvm.org/docs/LangRef.html#llvm-stackprotector-intrinsic
+	 * https://llvm.org/docs/LangRef.html#llvm-stackguard-intrinsic
+	 * Looks like LLVM supports intrinsics which implement some of the stack protection passes like StackProtect and StackGuard.
+	 */
+}
diff --git a/projects/dataflowProtection/utils.cpp b/projects/dataflowProtection/utils.cpp
index 9ba381917..215d79c63 100644
--- a/projects/dataflowProtection/utils.cpp
+++ b/projects/dataflowProtection/utils.cpp
@@ -1,9 +1,15 @@
-// This file contains utilities that help set up or clean up after the pass
+/*
+ * utils.cpp
+ *
+ * This file contains utilities that help set up or clean up after the pass.
+ */
 
 #include "dataflowProtection.h"
 
+// standard library includes
 #include <queue>
 #include <list>
+#include <set>
 #include <iostream>
 #include <fstream>
 #include <sstream>
@@ -11,6 +17,8 @@
 #include <ctime>
 #include <cstdlib>
 
+// LLVM includes
+#include <llvm/Option/Option.h>
 #include <llvm/IR/Module.h>
 #include "llvm/Support/CommandLine.h"
 #include <llvm/Support/Debug.h>
@@ -19,536 +27,163 @@
 #include <llvm/IR/IRBuilder.h>
 #include "llvm/ADT/StringRef.h"
 
+using namespace llvm;
+
+
 // Command line options
 extern cl::opt<bool> InterleaveFlag;
-extern cl::opt<bool> SegmentFlag;
-extern cl::list<std::string> skipFnCl;
-extern cl::list<std::string> skipLibCallsCl;
-extern cl::list<std::string> replicateUserFunctionsCallCl;
-extern cl::list<std::string> ignoreGlblCl;
-extern cl::list<std::string> globalsToRuntimeInitCl;
 extern cl::opt<bool> noMemReplicationFlag;
-extern cl::opt<bool> noStoreDataSyncFlag;
-extern cl::opt<bool> storeDataSyncFlag;
 extern cl::opt<bool> ReportErrorsFlag;
-extern cl::opt<std::string> configFileLocation;
 extern cl::opt<bool> dumpModuleFlag;
 extern cl::opt<bool> verboseFlag;
+extern std::set<ConstantExpr*> annotationExpressions;
 
-//these are the names of the above CL lists
-//any changes to these must also be changed at the head of dataflowProtection.cpp
-const std::string skipFnName = "ignoreFns";
-const std::string ignoreGlblName = "ignoreGlbls";
-const std::string skipLibCallsName = "skipLibCalls";
-const std::string coarseFnsName = "replicateFnCalls";
-const std::string runtimeGlblInitName = "runtimeInitGlobals";
-
-//new global lists to be used to track function names
-std::list<std::string> skipFn;
-std::list<std::string> skipLibCalls;
-std::list<std::string> coarseGrainedUserFunctions;
-std::list<std::string> ignoreGlbl;
-std::list<std::string> clGlobalsToRuntimeInit;
-
-//track functions that we should ignore invalid SOR crossings
-std::map<GlobalVariable*, Function*> globalCrossMap;
-
-//also, there are some functions that are not supported
-//it is in here instead of the config file because we don't want users touching it
-std::list<std::string> unsupportedFunctions = {"fscanf", "scanf", "fgets", "gets", "sscanf", "__isoc99_fscanf"};
-
-using namespace llvm;
-
-//----------------------------------------------------------------------------//
-// Miscellaneous
-//----------------------------------------------------------------------------//
-bool dataflowProtection::isIndirectFunctionCall(CallInst* CI, std::string errMsg, bool print) {
-	//This partially handles bitcasts and other inline LLVM functions
-	if (CI->getCalledFunction() == nullptr) {
-		// probably don't want to hear about skipping inline assembly, clean up output
-		if( (print || verboseFlag) && !CI->isInlineAsm())
-			errs() << warn_string << " in " << errMsg << " skipping:\n\t" << *CI << "\n";
-		return true;
-	} else {
-		return false;
-	}
-}
-
-// returns a string of random characters of the requested size
-// used to name-mangle the DWC error handler block (under construction)
-std::string dataflowProtection::getRandomString(std::size_t len) {
-	//init rand
-	std::srand(time(0));
-
-	const char chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
-	int charLen = sizeof(chars) - 1;
-	std::string result = "";
-
-	for (size_t i = 0; i < len; i+=1) {
-		result += chars[rand() % charLen];
-	}
-
-	return result;
-}
-
-void dataflowProtection::getFunctionsFromCL() {
-	//copy all (fixed) things from the command line to the internal, editable lists
-	for(auto x : skipLibCallsCl){
-		skipLibCalls.push_back(x);
-	}
-
-	for(auto x : skipFnCl){
-		skipFn.push_back(x);
-	}
-
-	for(auto x : replicateUserFunctionsCallCl){
-		coarseGrainedUserFunctions.push_back(x);
-	}
-
-	for(auto x : ignoreGlblCl){
-		ignoreGlbl.push_back(x);
-	}
-
-	for(auto x : globalsToRuntimeInitCl){
-		clGlobalsToRuntimeInit.push_back(x);
-	}
-}
-
-//function to extract function names from the configuration file
-//lists already exist, created in dataflowProtection.cpp
-//return value indicates success or failure
-int dataflowProtection::getFunctionsFromConfig() {
-	std::string filename;
-	if (configFileLocation!="") {
-		filename = configFileLocation;
-	} else {
-		char* coast = std::getenv("COAST_ROOT");
-		if (coast) {
-			filename = std::string(coast) + "/projects/dataflowProtection/functions.config";
-		} else {
-			// just look in the current directory
-			filename = "functions.config";
-		}
-	}
-	std::ifstream ifs(filename, std::ifstream::in);
-
-	if (!ifs.is_open()) {
-		errs() << "ERROR: No configuration file found at " << filename << '\n';
-		errs() << "         Please pass one in using -configFile\n";
-		return -1;
-	}
-
-	std::list<std::string>* lptr;
-	std::string line;
-	while (getline(ifs, line)) {
-		if (line.length() == 0) {  //Blank line
-			continue;
-		}
-
-		if (line[0] == '#') { //# is the comment symbol
-			continue;
-		}
-
-		//remove all whitespace
-		line.erase(remove (line.begin(), line.end(), ' '), line.end());
-
-		std::istringstream iss(line);
-		std::string substr;
-		getline(iss, substr, '=');
-
-		//Find the option we're using
-		if(substr == skipLibCallsName)
-			lptr = &skipLibCalls;
-		else if(substr == skipFnName)
-			lptr = &skipFn;
-		else if (substr == coarseFnsName)
-			lptr = &coarseGrainedUserFunctions;
-		else if (substr == ignoreGlblName)
-			lptr = &ignoreGlbl;
-		else if (substr == runtimeGlblInitName)
-			lptr = &clGlobalsToRuntimeInit;
-		else{
-			errs() << "ERROR: unrecognized option " << substr;
-			errs() <<" in configuration file " << filename << "\n\n";
-			return 1;
-		}
-
-		//insert all options into vector
-		while (iss.good()) {
-			getline(iss, substr, ',');
-			if (substr.length() == 0)
-				continue;
-			lptr->push_back(substr);
-		}
-
-	}
-	ifs.close();
-	return 0;
-}
-
-//If -dumpModule is passed in, then print the entire module out
-//This is helpful when the pass crashes on cleanup
-//It is in a format that can be pasted into an *.ll file and run
-void dataflowProtection::dumpModule(Module& M) {
-	if (!dumpModuleFlag)
-		return;
-
-	for (GlobalVariable& g : M.getGlobalList()) {
-		errs() << g << "\n";
-	}
-	errs() << "\n";
-	for (auto &f : M) {
-		errs() << f << "\n";
-	}
-}
 
 //----------------------------------------------------------------------------//
-// Initialization code
+// Cleanup unused things
 //----------------------------------------------------------------------------//
-void dataflowProtection::removeUnusedFunctions(Module& M) {
+// #define DEBUG_DELETE_FUNCTIONS
+// returns the number of functions removed
+int dataflowProtection::removeUnusedFunctions(Module& M) {
+	int numRemoved = 0;
 
-	//get reference to main() function
+	// get reference to main() function
 	Function* mainFunction = M.getFunction("main");
-	//If we don't have a main, don't remove any functions
+	// If we don't have a main, don't remove any functions
 	if (!mainFunction) {
-		return;
+		return 0;
 	}
 
-	//Populate a list of all functions in the module
+	// Populate a list of all functions in the module
 	std::set<Function*> functionList;
 	for (auto & F : M) {
-		//Ignore external function declarations
+		// Ignore external function declarations
 		if (F.hasExternalLinkage() && F.isDeclaration()) {
 			continue;
 		}
 
-		//Don't erase fault handlers
+		// Don't erase fault handlers
 		if (F.getName().startswith("FAULT_DETECTED_")) {
 			continue;
 		}
 
-		//Don't erase ISRs
-		if(isISR(F))
+		// Don't erase ISRs
+		if (isISR(F))
 			continue;
 
-		if(F.getNumUses() != 0)
+		#ifdef DEBUG_DELETE_FUNCTIONS
+		bool debugDeleteFlag = false;
+		if (F.hasName() && (F.getName() == "vStdioWithCWDTest")) {
+			errs() << " == " << F.getName() << " == \n";
+			debugDeleteFlag = true;
+		}
+		#endif
+
+		// Have to detect unused recursive functions
+		bool noSkip = false;
+		size_t userCount = 0, userRecurse = 0;
+		for (User* U : F.users()) {
+			userCount++;
+			// calls for which the called function is the same as the parent function are recursive
+			if (auto callUse = dyn_cast<CallInst>(U)) {
+				Function* calledF = callUse->getCalledFunction();
+				Function* parentF = callUse->getParent()->getParent();
+				// if it calls the function that it's in
+				if (calledF == parentF) {
+					userRecurse++;
+				}
+				#ifdef DEBUG_DELETE_FUNCTIONS
+				else if (debugDeleteFlag) {
+					errs() << " call is in " << parentF->getName() << "\n";
+				}
+				#endif
+			}
+			else if (auto funcUse = dyn_cast<Function>(U)) {
+				// strange issues with functions reported as using themselves, but not actually anywhere
+				if (funcUse == &F) {
+					userRecurse++;
+				}
+				else if (funcUse->getName() == F.getName()) {
+					errs() << "why do pointers not match?\n"
+						   << funcUse << " != " << &F << " ?\n";
+					assert(false && "multiple copies of the same function?");
+				}
+			}
+			#ifdef DEBUG_DELETE_FUNCTIONS
+			// any other users?
+			if (debugDeleteFlag) {
+				if (auto funcUse = dyn_cast<Function>(U)) {
+					PRINT_STRING(funcUse->getName());
+				} else {
+					PRINT_VALUE(U);
+				}
+			}
+			#endif
+		}
+		#ifdef DEBUG_DELETE_FUNCTIONS
+		// report how many uses were found
+		if (debugDeleteFlag) {
+			errs() << "Found " << userCount << " users\n";
+		}
+		#endif
+		if (userCount != userRecurse) {
 			continue;
+		}
 
+		// functions that we are told by the application programmer are used
 		if (usedFunctions.find(&F) != usedFunctions.end())
 			continue;
 
+		// everything else may be considered unused
 		functionList.insert(&F);
 	}
 
 	recursivelyVisitCalls(M, mainFunction, functionList);
 
 	if (functionList.size() == 0) {
-		return;
+		return 0;
 	}
 
-	// TODO: fix assertion - it's possible for a xMR'd function to be in the list of no uses,
-	//  if it's used as a function pointer only
-	if(functionList.size() > 0)
-		if(verboseFlag) errs() << "The following functions are unused, removing them: \n";
+	// It's possible for a xMR'd function to be in the list of no uses,
+	//  if it's used as a function pointer only.
+	// The other reason would be if the function was replicated by default, but it is used in as
+	//  a function pointer, in which case the code would still just use the original version.
 	for (auto q : functionList) {
-		if (fnsToClone.find(q) != fnsToClone.end()) {
-			errs() << "Failed removing function '" << q->getName() << "'\n";
-		}
-		assert( (fnsToClone.find(q) == fnsToClone.end()) && "The specified function is not called, so is being removed");
-		if(verboseFlag) errs() << "    " << q->getName() << "\n";
+		if (verboseFlag) errs() << "    " << q->getName() << "\n";
 		q->eraseFromParent();
+		numRemoved++;
 	}
 
+	return numRemoved;
 }
 
-void dataflowProtection::processCommandLine(Module& M, int numClones) {
-	if (InterleaveFlag == SegmentFlag) {
-		SegmentFlag = true;
-	}
-	TMR = (numClones==3);
-
-	if (noMemReplicationFlag && noStoreDataSyncFlag) {
-		errs() << warn_string << " noMemDuplication and noStoreDataSync set simultaneously. Recommend not setting the two together.\n";
-	}
-
-	if (noStoreDataSyncFlag && storeDataSyncFlag) {
-		errs() << err_string << " conflicting flags for store and noStore!\n";
-		exit(-1);
-	}
-
-	//copy command line lists to internal lists
-	getFunctionsFromCL();
-
-	if (getFunctionsFromConfig()) {
-		assert("Configuration file error!" && false);
-	}
-
-	if (skipFn.size() == 0) {
-		for (auto & fn_it : M) {
-
-			if (fn_it.isDeclaration()) { //Ignore library calls
-				continue;
-			}
-
-			if (isISR(fn_it)) { //Don't erase ISRs
-				continue;
-			}
-
-			if (xMR_default)
-				fnsToClone.insert(&fn_it);
-		}
-	} else {
-		for (auto fcn : skipFn) {
-			Function* f = M.getFunction(StringRef(fcn));
-			if (!f) {
-				errs() << "\n" << err_string << "Specified function " << fcn << " does not exist!\n";
-				errs() << "Check the spelling, check if the optimizer inlined it, of if name was mangled\n\n";
-				assert(f);
-			}
-			fnsToSkip.insert(f);
-		}
-	}
-
-}
-
-void dataflowProtection::processAnnotations(Module& M) {
-	//Inspired by http://bholt.org/posts/llvm-quick-tricks.html
-	auto global_annos = M.getNamedGlobal("llvm.global.annotations");
-	if (global_annos) {
-		auto a = cast<ConstantArray>(global_annos->getOperand(0));
-		//check that it is the right type
-		if (a) {
-			for (int i=0; i < a->getNumOperands(); i++) {
-				auto e = cast<ConstantStruct>(a->getOperand(i));
-
-				//extract data
-				auto anno = cast<ConstantDataArray>(cast<GlobalVariable>(e->getOperand(1)->getOperand(0))->getOperand(0))->getAsCString();
-
-				//Function annotations
-				if (auto fn = dyn_cast<Function>(e->getOperand(0)->getOperand(0))) {
-					if (anno == no_xMR_anno) {
-						if(verboseFlag) errs() << "Directive: do not clone function '" << fn->getName() << "'\n";
-						fnsToSkip.insert(fn);
-						if (fnsToClone.find(fn)!=fnsToClone.end())
-							fnsToClone.erase(fn);
-					} else if (anno == xMR_anno) {
-						if(verboseFlag) errs() << "Directive: clone function '" << fn->getName() << "'\n";
-						fnsToClone.insert(fn);
-					} else if (anno == xMR_call_anno) {
-						if(verboseFlag) errs() << "Directive: replicate calls to function '" << fn->getName() << "'\n";
-						coarseGrainedUserFunctions.push_back(fn->getName());
-					} else if (anno == skip_call_anno) {
-						if(verboseFlag) errs() << "Directive: do not clone calls to function '"  << fn->getName() << "'\n";
-						skipLibCalls.push_back(fn->getName());
-						//TODO: do we need to worry about duplicates?
-					} else if (anno.startswith("no-verify-")) {
-						StringRef global_name = anno.substr(10, anno.size() - 10);
-
-						GlobalValue* glbl = M.getNamedValue(global_name);
-						if (glbl) {
-							GlobalVariable* glblVar = dyn_cast<GlobalVariable>(glbl);
-							if (glblVar) {
-								globalCrossMap[glblVar] = fn;
-								errs() << "Directive: ignoring global \"" << global_name
-										<< "\" being used in unprotected function \"" << fn->getName() << "\"\n";
-							}
-						} else {
-							errs() << warn_string << " global " << global_name << " doesn't exist\n";
-						}
-
-					} else {
-						assert(false && "Invalid option on function");
-					}
-
-				}
-				//Global annotations
-				else if (auto gv = dyn_cast<GlobalVariable>(e->getOperand(0)->getOperand(0))) {
-					if (anno == no_xMR_anno) {
-						if(verboseFlag) errs() << "Directive: do not clone global variable '" << gv->getName() << "'\n";
-						globalsToSkip.insert(gv);
-					} else if (anno == xMR_anno) {
-						if(verboseFlag) errs() << "Directive: clone global variable '" << gv->getName() << "'\n";
-						globalsToClone.insert(gv);
-					} else if (anno == default_xMR) {
-						if(verboseFlag) errs() << "Directive: set xMR as default\n";
-					} else if (anno == default_no_xMR) {
-						if(verboseFlag) errs() << "Directive: set no xMR as default\n";
-						xMR_default = false;
-					} else if (anno == coast_volatile) {
-						if(verboseFlag) errs() << "Directive: don't remove '" << gv->getName() << "'\n";
-						volatileGlobals.insert(gv);
-					} else {
-						if(verboseFlag) errs() << "Directive: " << anno << "\n";
-						assert(false && "Invalid option on global value");
-					}
-				}
-				else {
-					assert(false && "Non-function annotation");
-				}
-			}
-		} else {
-			errs() << warn_string << " global annotations of wrong type!\n" << *global_annos << "\n";
-		}
-	}
-
-	// get the data from the list of "used" globals, and add it to volatileGlobals
-	auto used_annos = M.getNamedGlobal("llvm.used");
-	if (used_annos) {
-		auto ua = cast<ConstantArray>(used_annos->getOperand(0));
-		if (ua) {
-			for (int i=0; i < ua->getNumOperands(); i++) {
-				auto element = ua->getOperand(i);
-				if (BitCastOperator* bc = dyn_cast<BitCastOperator>(element)) {
-					if (GlobalVariable* gv = dyn_cast<GlobalVariable>(bc->getOperand(0))) {
-						volatileGlobals.insert(gv);
-					} else if (Function* fn = dyn_cast<Function>(bc->getOperand(0))) {
-						usedFunctions.insert(fn);
-					}
-				}	// TODO: what if it doesn't have to be bit-casted?
-			}
-		}
-	}
-
-	//Local variables
-	for(auto &F : M){
-		for(auto &bb : F){
-			for(auto &I : bb){
-				if( auto CI = dyn_cast<CallInst>(&I) ){
-					// have to skip any bitcasts in function calls because they aren't actually a function
-					if(isIndirectFunctionCall(CI, "processAnnotations", false))
-						continue;
-					if(CI->getCalledFunction()->getName() == "llvm.var.annotation"){
-						//Get variable
-						auto adr = dyn_cast<BitCastInst>(CI->getOperand(0));
-						AllocaInst* var;
-						if (!adr) {
-							//there could be no bitcast if the alloca is already of type i8
-							var = dyn_cast<AllocaInst>(CI->getOperand(0));
-						} else {
-							var = dyn_cast<AllocaInst>(adr->getOperand(0));
-						}
-						assert(var && "valid alloca");
-
-						auto ce = dyn_cast<ConstantExpr>(CI->getOperand(1));
-						auto gv  = dyn_cast<GlobalVariable>(ce->getOperand(0));
-						auto anno = dyn_cast<ConstantDataArray>(gv->getInitializer())->getAsCString();
-
-						if(var){
-							if(anno == no_xMR_anno){
-								if(verboseFlag) errs() << "Directive: do not clone local variable '" << *var << "'\n";
-								instsToSkip.insert(var);
-								walkInstructionUses(var, false);
-							} else if(anno == xMR_anno){
-								if(verboseFlag) errs() << "Directive: clone local variable '" << *var << "'\n";
-								instsToCloneAnno.insert(var);
-								//if this is all we do, it will only clone the `alloca` instruction, but
-								// we want it to clone all instructions that use the same variable
-								walkInstructionUses(var, true);
-								//how do we get the syncpoints to happen?
-								//have to add them manually
-							} else{
-								errs() << anno << "\n";
-								assert(false && "Unrecognized variable annotation");
-							}
-						} else{
-							assert(false && "Local variable not alloca");
-						}
-					}
-				}
-			}
-		}
-	}
-}
-
-//----------------------------------------------------------------------------//
-// Cleanup
-//----------------------------------------------------------------------------//
-void dataflowProtection::removeAnnotations(Module& M) {
-	auto global_annos = M.getNamedGlobal("llvm.global.annotations");
-	if (!global_annos)
-		return;
-	auto a = cast<ConstantArray>(global_annos->getOperand(0));
-	if (!a)
-		return;
-
-	std::set<GlobalVariable*> anno_strings;
-
-	//Populate a list of global strings that are only used in annotations
-	for (int i=0; i < a->getNumOperands(); i++) {
-		auto e = cast<ConstantStruct>(a->getOperand(i)); //This is part of global_anno
-		auto anno = cast<GlobalVariable>(e->getOperand(1)->getOperand(0)); //This is the global string
-
-		for (int j=0; j < e->getNumOperands(); j++) {
-			if (e->getOperand(j)->getNumOperands() >= 1) {
-				if (auto cs = dyn_cast<GlobalVariable>(e->getOperand(j)->getOperand(0))) {
-					if (cs->getSection() == "llvm.metadata") {
-						anno_strings.insert(cs);
-					}
-				}
-			}
-		}
-	}
-
-	//Remove llvm.var.annotation calls
-	std::set<Instruction*> toRemove;
-	Function* lva = NULL;
-	for (auto &F : M) {
-		for (auto & bb : F) {
-			for (auto & I : bb) {
-				if (auto CI = dyn_cast<CallInst>(&I)) {
-					auto called = CI->getCalledFunction();
-					if ( (called != nullptr) && (called->getName() == "llvm.var.annotation") ) {
-						lva = called;
-						toRemove.insert(CI);
-					}
-				}
-			}
-		}
-	}
-
-	for (auto rm : toRemove) {
-		auto op0 = dyn_cast<Instruction>(rm->getOperand(0));
-		if (rm->getNumUses() < 1) {
-			if (rm->getParent()) {
-				rm->eraseFromParent();
-			}
-		}
-		//do this 2nd so that the one possible user is removed first
-		if (op0 && op0->getNumUses() < 1) {
-			if (op0->getParent()) {
-				op0->eraseFromParent();
-			}
-			//we probably added this (which is probably a bitcast) to the list of instructions to clone
-			if (std::find(instsToCloneAnno.begin(), instsToCloneAnno.end(), op0) != instsToCloneAnno.end()) {
-				instsToCloneAnno.erase(op0);
-			}
-		}
-	}
-
-	if (lva) {
-		lva->removeFromParent();
-	}
-
-	//Remove global annotations
-	M.getGlobalList().erase(global_annos);
-	for (auto a_s : anno_strings) {
-		a_s->eraseFromParent();
-	}
-
-	if (auto default_behavior = M.getNamedGlobal(default_global)) {
-		default_behavior->eraseFromParent();
-	}
-}
 
 void dataflowProtection::removeOrigFunctions() {
+	if (verboseFlag)
+		PRINT_STRING("Removing original & unused functions:");
 	for (auto F : origFunctions) {
-		if (fnsToCloneAndSkip.find(F)==fnsToCloneAndSkip.end()) {
+		// TODO: why is this not just fnsToClone?
+		if (fnsToCloneAndSkip.find(F) == fnsToCloneAndSkip.end()) {
 			/*
 			 * If not all of the uses are gone, then this function likely is called from within
-			 * and without the Scope Of Replication (SOR). We'll keep it around in that case
+			 * and without the Scope Of Replication (SOR). We'll keep it around in that case.
 			 */
 			if (F->use_empty()) {
+				if (verboseFlag && F->hasName()) {
+					errs() << "    " << F->getName() << "\n";
+				}
 				F->eraseFromParent();
+			} else if (F->hasOneUse()) {
+				// also remove if the only user was in llvm.global.annotations
+				// this was removed, but some of the expressions stuck around
+				auto F_use = *(F->user_begin());
+				if (ConstantExpr* ce = dyn_cast<ConstantExpr>(F_use)) {
+					if (annotationExpressions.find(ce)
+							!= annotationExpressions.end())
+					{
+						F->eraseFromParent();
+					}
+				}
 			}
 		}
 	}
@@ -571,28 +206,31 @@ void dataflowProtection::removeUnusedGlobals(Module& M) {
 
 	for (GlobalVariable & g : M.getGlobalList()) {
 		if (volatileGlobals.find(&g) != volatileGlobals.end()) {
-			// skip removing globals marked as volatile
-			// it's possible the same feature could be implemented by marking variables with
-			//  the attribute "used", instead of an annotation
+			/* Skip removing globals marked as volatile.
+			 * COAST uses the GCC attribute "used", meaning to not remove the variable.
+			 */
 			continue;
 		} else if (g.getNumUses() == 0) {
 			StringRef gName = g.getName();
-			//Don't touch ISR related variables
-			if (!(gName.startswith("llvm") || gName.startswith("__vector") || gName.startswith("isr_"))) {
+			// Don't touch ISR related variables
+			if (!(gName.startswith("llvm")) ) {
 				unusedGlobals.push_back(&g);
 			}
 		} else if (g.getNumUses() == 1) {
 			for (auto u : g.users()) {
 				if (Instruction* UI = dyn_cast<Instruction>(u)) {
-					//If it's in a function marked as __attribute__((used)), then skip this
-					Function* parentF = UI->getParent()->getParent();
-					if (usedFunctions.find(parentF) != usedFunctions.end()) {
-						continue;
+					// If it's in a function marked as __attribute__((used)), then skip this
+					BasicBlock* UIparentBB = UI->getParent();
+					if (UIparentBB) {
+						Function* parentF = UIparentBB->getParent();
+						if (usedFunctions.find(parentF) != usedFunctions.end()) {
+							continue;
+						}
 					}
 				}
-				//Account for instructions that will be cleaned up at the end of the pass
-				//it could also be a call instruction to a library function that has side effects, but
-				// we ignore the return value
+				// Account for instructions that will be cleaned up at the end of the pass
+				// it could also be a call instruction to a library function that has side effects, but
+				//  we ignore the return value
 				if ( (u->getNumUses() == 0) && !isa<StoreInst>(u) && !isa<CallInst>(u) && !isa<InvokeInst>(u)) {
 					unusedGlobals.push_back(&g);
 				}
@@ -600,9 +238,12 @@ void dataflowProtection::removeUnusedGlobals(Module& M) {
 		}
 	}
 
+	if (verboseFlag && (unusedGlobals.size() > 0)) {
+		PRINT_STRING("Removing unused globals:");
+	}
 	for (auto ug : unusedGlobals) {
 		if (verboseFlag) {
-			errs() << "Removing unused global: " << ug->getName() << "\n";
+			errs() << "    " << ug->getName() << "\n";
 		}
 		if (ug->getParent()) {
 			ug->eraseFromParent();
@@ -616,16 +257,17 @@ void dataflowProtection::checkForUnusedClones(Module & M) {
 	for (auto cloneM : cloneMap) {
 		Value* orig = cloneM.first;
 		Value* clone = cloneM.second.first;
+
 		if (clone->getNumUses() == 0) {
-			//Store instructions aren't cloned
+			// Store instructions aren't cloned
 			if (isa<StoreInst>(clone))
 				continue;
 
-			//If the original isn't used, the clone will not be either
+			// If the original isn't used, the clone will not be either
 			if (orig->getNumUses() == 0)
 				continue;
 
-			//Used only in a single external function call, eg printf
+			// Used only in a single external function call, eg printf
 			if (orig->hasOneUse() && isa<CallInst>(orig->user_back())) {
 				if (CallInst* CI = dyn_cast<CallInst>(orig->user_back())) {
 					if (isIndirectFunctionCall(CI, "checkForUnusedClones"))
@@ -635,13 +277,13 @@ void dataflowProtection::checkForUnusedClones(Module & M) {
 				}
 			}
 
-			//If original is only used in external function calls
+			// If original is only used in external function calls
 			if (Instruction* inst = dyn_cast<Instruction>(orig)) {
-				//accumulator - proof by contradiction
+				// accumulator - proof by contradiction
 				bool allExternal = true;
 				for (auto u : inst->users()) {
 					if (CallInst* ci = dyn_cast<CallInst>(u)) {
-						//make sure we're not calling a function on a null pointer
+						// make sure we're not calling a function on a null pointer
 						if (isIndirectFunctionCall(ci, "checkForUnusedClones"))
 							continue;
 						else if (ci->getCalledFunction()->hasExternalLinkage())
@@ -652,10 +294,10 @@ void dataflowProtection::checkForUnusedClones(Module & M) {
 						}
 					}
 				}
-				if(allExternal) continue;
+				if (allExternal) continue;
 
 				// sometimes clones are erroneously created when the instructions were supposed to be skipped
-				if (instsToSkip.find(inst) != instsToSkip.end()) {
+				if (willBeSkipped(inst)) {
 					if (verboseFlag) errs() << "Removing unused local variable: " << *inst << "\n";
 					inst->eraseFromParent();
 
@@ -666,12 +308,12 @@ void dataflowProtection::checkForUnusedClones(Module & M) {
 					}
 				}
 
-				//TODO: add here, also when function calls are supposed to be skipped
+				// TODO: add here, also when function calls are supposed to be skipped
 			}
 
-			//Global duplicated strings aren't used in uncloned printfs. Remove the unused clones
+			// Global duplicated strings aren't used in uncloned printfs. Remove the unused clones
 			if (ConstantExpr* ce = dyn_cast<ConstantExpr>(clone)) {
-				if(verboseFlag) errs() << "Removing unused global string: " << *ce << "\n";
+				if (verboseFlag) errs() << "Removing unused global string: " << *ce << "\n";
 				ce->destroyConstant();
 				if (TMR) {
 					ConstantExpr* ce2 = dyn_cast<ConstantExpr>(cloneM.second.second);
@@ -685,7 +327,7 @@ void dataflowProtection::checkForUnusedClones(Module & M) {
 				continue;
 			}
 
-			//If using noMemDuplicationFlag then don't worry about unused arguments
+			// If using noMemDuplicationFlag then don't worry about unused arguments
 			if (noMemReplicationFlag) {
 				if (dyn_cast<Argument>(orig)) {
 					continue;
@@ -720,51 +362,12 @@ void dataflowProtection::checkForUnusedClones(Module & M) {
 	}
 }
 
+
 //----------------------------------------------------------------------------//
-// Cloning utilities
+// Synchronization utilities
 //----------------------------------------------------------------------------//
-bool dataflowProtection::willBeSkipped(Instruction* I){
-	return instsToSkip.find(I) != instsToSkip.end();
-}
-
-bool dataflowProtection::willBeCloned(Value* v) {
-	Instruction* I = dyn_cast<Instruction>(v);
-	if (I) {
-		return instsToClone.find(I) != instsToClone.end();
-	}
-
-	GlobalVariable* g = dyn_cast<GlobalVariable>(v);
-	if (g) {
-		return globalsToClone.find(g) != globalsToClone.end();
-	}
-
-	ConstantExpr* e = dyn_cast<ConstantExpr>(v);
-	if (e) {
-		return constantExprToClone.find(e) != constantExprToClone.end();
-	}
-
-	if (Argument* a = dyn_cast<Argument>(v)) {
-		Function * f = a->getParent();
-		return fnsToClone.find(f) != fnsToClone.end();
-	}	
-
-	return false;
-}
-
-bool dataflowProtection::isCloned(Value * v) {
-	return cloneMap.find(v) != cloneMap.end();
-}
-
-ValuePair dataflowProtection::getClone(Value* I) {
-	if (cloneMap.find(I) == cloneMap.end()){
-		return ValuePair(I,I);
-	} else
-		return cloneMap[I];
-}
-
-//helper function
-//#define DEBUG_INST_MOVING
-void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
+// #define DEBUG_INST_MOVING
+void dataflowProtection::moveClonesToEndIfSegmented(Module & M) {
 	if (InterleaveFlag)
 		return;
 
@@ -775,17 +378,17 @@ void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
 		for (auto & bb : *F) {
 
 #ifdef DEBUG_INST_MOVING
-			if (bb.getName() == "entry" && F->getName() == "main") {
+			if (bb.getName() == "entry" && F->getName() == "returnTest.RR") {
 				flag = 1;
 			}
 
 			if (flag) {
 				errs() << F->getName() << "\n";
-				errs() << bb << "\n";
+				errs() << "bb: " << bb << "\n";
 			}
 #endif
 
-			//Populate list of things to move before
+			// Populate list of things to move before
 			std::queue<Instruction*> movePoints;
 			for (auto &I : bb) {
 				if (CallInst* CI = dyn_cast<CallInst>(&I)) {
@@ -799,14 +402,16 @@ void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
 						movePoints.push(startOfSyncLogic[&I]);
 					}
 					else if (CI->getCalledFunction() != nullptr && CI->getCalledFunction()->isIntrinsic()) {
-						;	//don't add intrinsics, because they will be expanded underneath (in assembly)
-							// to be a series of inline instructions, not an actual call
+						;	// don't add intrinsics, because they will be expanded underneath (in assembly)
+							//  to be a series of inline instructions, not an actual call
+						// TODO: might want to look at getIntrinsicID() instead, because
+						//  then we can compare enum ranges instead of just names
 					}
 					else {
 //						errs() << "    Move point at CI " << I << "\n";
 						movePoints.push(&I);
 					}
-				} else if(TerminatorInst* TI = dyn_cast<TerminatorInst>(&I)) {
+				} else if (TerminatorInst* TI = dyn_cast<TerminatorInst>(&I)) {
 					if (isSyncPoint(TI)) {
 //						errs() << "    Move point at TI sync " << *startOfSyncLogic[&I] << "\n";
 						movePoints.push(startOfSyncLogic[&I]);
@@ -814,7 +419,7 @@ void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
 //						errs() << "    Move point at TI" << I << "\n";
 						movePoints.push(&I);
 					}
-				} else if(StoreInst* SI = dyn_cast<StoreInst>(&I)) {
+				} else if (StoreInst* SI = dyn_cast<StoreInst>(&I)) {
 					if (isSyncPoint(SI)) {
 						/*
 						 * One problem we saw was when a basic block was split, the instruction which
@@ -832,7 +437,6 @@ void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
 //							errs() << "    Move point at SI: " << *SI << "\n";
 						}
 					}
-#ifdef FIX_STORE_SEGMENTING
 					/* There is a case where we need to keep the stores next to each other, as in the
 					 * load-increment-store pattern.  For StoreInst's which aren't syncpoints, this would
 					 * cause the variable to be incremented twice.  Check for if it has a clone and if
@@ -840,10 +444,9 @@ void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
 					else if (isStoreMovePoint(SI)) {
 						movePoints.push(&I);
 					}
-#endif
-				} else if(GetElementPtrInst* GI = dyn_cast<GetElementPtrInst>(&I)) {
+				} else if (GetElementPtrInst* GI = dyn_cast<GetElementPtrInst>(&I)) {
 					if (isSyncPoint(GI)) {
-						//not all GEP syncpoints have a corresponding entry in the map
+						// not all GEP syncpoints have a corresponding entry in the map
 						if ( (startOfSyncLogic.find(&I) != startOfSyncLogic.end() ) &&
 							 (startOfSyncLogic[&I]->getParent() == I.getParent()) ) {
 							movePoints.push(startOfSyncLogic[&I]);
@@ -857,25 +460,28 @@ void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
 			std::vector<Instruction*> listI1;
 			std::vector<Instruction*> listI2;
 
-			//Move all clones before the sync points
-			for(auto & I : bb) {
+			// Move all clones before the sync points
+			for (auto & I : bb) {
 #ifdef DEBUG_INST_MOVING
 				if (flag) {
-					errs() << I << "\n";
+					// errs() << I << "\n";
 				}
 #endif
-				//see if it's a clone
+				// see if it's a clone
 				if (PHINode* PN = dyn_cast<PHINode>(&I)) {
-					//don't move it, phi nodes must be at the start
+					// don't move it, phi nodes must be at the start
 				} else if ( (getClone(&I).first != &I) && !(isSyncPoint(&I))
-#ifdef FIX_STORE_SEGMENTING
-						&& !(isStoreMovePoint(dyn_cast<StoreInst>(&I)))
-#endif
-						&& !(isCallMovePoint(dyn_cast<CallInst>(&I)))
+							&& !(isStoreMovePoint(dyn_cast<StoreInst>(&I)))
+							&& !(isCallMovePoint(dyn_cast<CallInst>(&I)))
 						/* could also check if it's the head of the list */
 				) {
 					Instruction* cloneI1 = dyn_cast<Instruction>(getClone(&I).first);
 					listI1.push_back(cloneI1);
+					#ifdef DEBUG_INST_MOVING
+					if (flag) {
+						errs() << "to move: " << I << "\n";
+					}
+					#endif
 					if (TMR) {
 						Instruction* cloneI2 = dyn_cast<Instruction>(getClone(&I).second);
 						listI2.push_back(cloneI2);
@@ -895,33 +501,37 @@ void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
 					}
 					listI2.clear();
 
-					movePoints.pop();
-
 #ifdef DEBUG_INST_MOVING
 					if (flag) {
-						errs() << bb << "\n";
+						errs() << "moved before " << *movePoints.front() << "\n";
+						errs() << "now bb: " << bb << "\n";
 					}
 #endif
+					movePoints.pop();
+
 				}
 			}
 
 
-			//Move all sync logic to before the branch
+			// Move all sync logic to before the branch
 			if (!TMR || ReportErrorsFlag) {
-				if (syncCheckMap.find(&bb) != syncCheckMap.end()) { //If block has been split
+				// If block has been split
+				if (syncCheckMap.find(&bb) != syncCheckMap.end()) {
 
-					Instruction* cmpInst = syncCheckMap[&bb]; //Get instruction block split on
+					// Get instruction that the block was split on
+					Instruction* cmpInst = syncCheckMap[&bb];
 					assert(cmpInst && "Block split and the cmpInst stuck around");
 					cmpInst->moveBefore(cmpInst->getParent()->getTerminator());
 
-					if (syncHelperMap.find(&bb) != syncHelperMap.end()) { //Move logic before it
+					// Move logic before it
+					if (syncHelperMap.find(&bb) != syncHelperMap.end()) {
 						for (auto I : syncHelperMap[&bb]) {
 							assert(I && "Moving valid instructions\n");
 							I->moveBefore(cmpInst);
 						}
 					}
 
-					//if there are SIMD instructions, need to move the special compare operators
+					// if there are SIMD instructions, need to move the special compare operators
 					if (simdMap.find(cmpInst) != simdMap.end()) {
 						std::get<0>(simdMap[cmpInst])->moveBefore(cmpInst->getParent()->getTerminator());
 						std::get<1>(simdMap[cmpInst])->moveBefore(cmpInst->getParent()->getTerminator());
@@ -939,6 +549,40 @@ void dataflowProtection::moveClonesToEndIfSegmented(Module & M){
 	}
 }
 
+
+/*
+ * Gets or creates a global variable.
+ */
+GlobalVariable* dataflowProtection::createGlobalVariable(Module& M,
+		std::string name, unsigned int byteSz)
+{
+	StringRef srName = StringRef(name);
+	// see if it already exists
+	GlobalVariable* newGV = M.getGlobalVariable(srName);
+	if (newGV) {
+		return newGV;
+	}
+
+	// Get a type of the right size
+	Type* newGVtype = IntegerType::get(M.getContext(), byteSz * 8);
+	// Insert as constant first
+	Constant* newConstGV = M.getOrInsertGlobal(srName, newGVtype);
+	// Cast to correct type
+	newGV = cast<GlobalVariable>(newConstGV);
+
+	// Set the properties
+	newGV->setConstant(false);
+	newGV->setInitializer(ConstantInt::getNullValue(newGVtype));
+	newGV->setUnnamedAddr(GlobalValue::UnnamedAddr());
+	newGV->setAlignment(byteSz);
+
+	return newGV;
+}
+
+
+//----------------------------------------------------------------------------//
+// Run-time initialization of globals
+//----------------------------------------------------------------------------//
 // Find the total size in bytes of a 1+ dimension array
 int dataflowProtection::getArrayTypeSize(Module & M, ArrayType * arrayType) {
 	Type * containedType = arrayType->getContainedType(0);
@@ -966,8 +610,8 @@ int dataflowProtection::getArrayTypeElementBitWidth(Module & M, ArrayType * arra
 }
 
 void dataflowProtection::recursivelyVisitCalls(Module& M, Function* F, std::set<Function*> &functionList) {
-	//If we've already deleted this function from the list
-	if (functionList.find(F)==functionList.end())
+	// If we've already deleted this function from the list
+	if (functionList.find(F) == functionList.end())
 		return;
 
 	functionList.erase(F);
@@ -982,74 +626,14 @@ void dataflowProtection::recursivelyVisitCalls(Module& M, Function* F, std::set<
 
 }
 
-//TODO: this is not sound logic
-bool dataflowProtection::isISR(Function& F) {
-	bool ans = F.getName().endswith("ISR") || F.getName().endswith("isr");
-	return ans;
-}
 
 //----------------------------------------------------------------------------//
-// Synchronization utilities
+// Miscellaneous
 //----------------------------------------------------------------------------//
-bool dataflowProtection::isSyncPoint(Instruction* I) {
-	if (isa<StoreInst>(I) || isa<CallInst>(I) || isa<TerminatorInst>(I) || isa<GetElementPtrInst>(I))
-		return std::find(syncPoints.begin(), syncPoints.end(), I) != syncPoints.end();
-	else
-		return false;
-}
-
-#ifdef FIX_STORE_SEGMENTING
-bool dataflowProtection::isStoreMovePoint(StoreInst* SI) {
-	if ( 	(getClone(SI).first == SI) ||						/* Doesn't have a clone */
-			(SI->getOperand(0)->getType()->isPointerTy()) ||	/* Storing a pointer type */
-			(dyn_cast<PtrToIntInst>(SI->getOperand(0))) ) 		/* Casted pointer */
-	{
-		return false;
-	}
-	// otherwise, we need to segment them together
-	else
-		return true;
-}
-#endif
-
-bool dataflowProtection::isCallMovePoint(CallInst* ci) {
-	if ( (getClone(ci)).first == ci) {
-		return false;
-	} else {
-		return true;
-	}
-}
-
-/*
- * returns true if this will try to sync on a coarse-grained function return value
- * these should be avoided for things like the case of malloc()
- * if returns false, then it's OK to sync on the value
- */
-bool dataflowProtection::checkCoarseSync(StoreInst* inst) {
-	//need to check for if this value came from a replicated function call
-	Value* op0 = inst->getOperand(0);
-	if (CallInst* CI = dyn_cast<CallInst>(op0)) {
-		Function* calledF = CI->getCalledFunction();
-		if (calledF && (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
-				calledF->getName()) != coarseGrainedUserFunctions.end()) ) {
-			//then we've got a coarse-grained value
-			return true;
-		}
-	} else if (InvokeInst* II = dyn_cast<InvokeInst>(op0)) {
-		Function* calledF = II->getCalledFunction();
-		if (calledF && (std::find(coarseGrainedUserFunctions.begin(), coarseGrainedUserFunctions.end(),
-				calledF->getName()) != coarseGrainedUserFunctions.end()) ) {
-			//again
-			return true;
-		}
-	}
-	return false;
-}
-
-//visit all uses of an instruction and see if they are also instructions to add to clone list
+// visit all uses of an instruction and see if they are also instructions to add to clone list
 void dataflowProtection::walkInstructionUses(Instruction* I, bool xMR) {
 
-	//add it to clone or skip list, depending on annotation, passed through argument xMR
+	// add it to clone or skip list, depending on annotation, passed through argument xMR
 	std::set<Instruction*> * addSet;
 	if (xMR) {
 		addSet = &instsToCloneAnno;
@@ -1063,29 +647,44 @@ void dataflowProtection::walkInstructionUses(Instruction* I, bool xMR) {
 			StoreInst* SI = dyn_cast<StoreInst>(instUse);
 			PHINode* phiInst = dyn_cast<PHINode>(instUse);
 
-			//should we add it to the list?
+			// should we add it to the list?
 			if (phiInst) {
 				;
 			} else if (CI) {
-				//skip all call instructions for now
+				// skip all call instructions for now
 				;
 			} else if (TerminatorInst* TI = dyn_cast<TerminatorInst>(instUse)) {
-				//this should become a syncpoint
-				// really? needs more testing
+				// this should become a syncpoint
+				//  really? needs more testing
 //				if (xMR) syncPoints.push_back(instUse);
 			} else if (SI && (noMemReplicationFlag) ) {
-				//don't replicate store instructions if flags
-				//also, this will become a syncpoint
+				// don't replicate store instructions if flags
+				// also, this will become a syncpoint
 //				if (xMR) syncPoints.push_back(instUse);
 			} else {
-				addSet->insert(instUse);
-//				errs() << *instUse << "\n";
+				// Check if any of the operands will be cloned
+				bool safeToInsert = true;
+				for (unsigned opNum = 0; opNum < I->getNumOperands(); opNum++) {
+					auto op = I->getOperand(opNum);
+					if ( (!xMR) && willBeCloned(op)) {
+						// If they are, don't skip this instruction
+						safeToInsert = false;
+						break;
+					}
+				}
+				if (safeToInsert) {
+					addSet->insert(instUse);
+					// errs() << *instUse << "\n";
+				} else {
+					// if not safe, don't bother following this one
+					continue;
+				}
 			}
 
-			//should we visit its uses?
-			//as long as it has more than 1 uses
+			// should we visit its uses?
+			//  as long as it has more than 1 uses
 			if ( (instUse->getNumUses() > 0) && !phiInst) {
-				//recursive call
+				// recursive call
 				walkInstructionUses(instUse, xMR);
 			}
 
@@ -1093,63 +692,34 @@ void dataflowProtection::walkInstructionUses(Instruction* I, bool xMR) {
 	}
 }
 
+
 /*
- * verify that all of the options used to configure COAST for this pass are safe to follow
+ * Helper function which splits string on delimiter
+ * https://www.fluentcpp.com/2017/04/21/how-to-split-a-string-in-c/
+ * Modifed to return ints
  */
-void dataflowProtection::verifyOptions(Module& M) {
-	std::map< GlobalVariable*, std::set<Function*> > glblFnMap;
-
-	// check that the globals being cloned are only used in protected functions
-	for (auto g : globalsToClone) {
-		// get all the users
-		for (auto u : g->users()) {
-			// is it an instruction?
-			if (Instruction* UI = dyn_cast<Instruction>(u)) {
-				Function* parentF = UI->getParent()->getParent();
-
-				// have we been asked to skip it?
-				if (globalCrossMap.find(g) != globalCrossMap.end()) {
-					if (globalCrossMap[g] == parentF) {
-						// skip if it's the marked function
-						continue;
-					}
-				}
-
-				// is the instruction in a protected function?
-				if (fnsToClone.find(parentF) == fnsToClone.end()) {
-					if (glblFnMap.find(g) == glblFnMap.end()) {
-						std::set<Function*> tempSet;
-						glblFnMap[g] = tempSet;
-					}
-
-					glblFnMap[g].insert(parentF);
-				}
-
-			}
-		}
-	}
-
-	// print warning messages
-	for (auto item : glblFnMap) {
-		errs() << err_string << " global \"" << item.first->getName() << "\"\n\tused in functions: ";
-		for (auto fns : item.second) {
-			errs() << "\"" << fns->getName() << "\", ";
-		}
-		errs() << "\nwhich are not protected\n";
-	}
-
-	if (glblFnMap.size() > 0) {
-		std::exit(-1);
-	}
-
+std::vector<int> splitOnDelim(const std::string& s, char delimiter) {
+	std::vector<int> tokens;
+	std::string token;
+	std::istringstream tokenStream(s);
+	while (std::getline(tokenStream, token, delimiter)) {
+		tokens.push_back(std::stoi(token));
+	}
+	return tokens;
 }
 
 
+/*
+ * Some function calls must be handled with specific attributes.
+ * This is where we make those modifications and registrations.
+ */
 void dataflowProtection::updateFnWrappers(Module& M) {
 	std::string wrapperFnEnding = "_COAST_WRAPPER";
+	std::string cloneAfterName = "_CLONE_AFTER_CALL_";
 	// have to create a map and edit afterwards; editing users while iterating over them is a bad idea
 	std::map<Function*, Function*> wrapperMap;
 	std::set<Function*> wrapperFns;
+	std::map<Function*, std::vector<int> > tempCloneAfterCallArgMap;
 
 	// update fn replication wrappers
 	for (auto &fn : M) {
@@ -1173,14 +743,33 @@ void dataflowProtection::updateFnWrappers(Module& M) {
 			Function* normalFn = dyn_cast<Function>(fnC);
 			wrapperMap[&fn] = normalFn;
 
-			// find all CallInsts with target of fnName function
-//			for (auto u : fn.users()) {
-//				if (CallInst* uc = dyn_cast<CallInst>(u)) {
-//					wrapperMap[uc] = normalFn;
-//				} else if (BitCastInst* bc = dyn_cast<BitCastInst>(u)) {
-//					wrapperMap[uc] = normalFn;
-//				}
-//			}
+		}
+		else if (fnName.contains(cloneAfterName)) {
+			wrapperFns.insert(&fn);
+
+			// extract the name
+			// where is the expected string?
+			size_t firstCharIdx = fnName.find(cloneAfterName);
+			StringRef normalFnName = fnName.substr(0, firstCharIdx);
+
+			// extract the argument numbers
+			std::string argNumStr = fnName.substr(
+					firstCharIdx + cloneAfterName.size(), fnName.size()).str();
+			std::vector<int> argNums = splitOnDelim(argNumStr, '_');
+			tempCloneAfterCallArgMap[&fn] = argNums;
+
+			Constant* fnC = M.getOrInsertFunction(normalFnName, fn.getFunctionType());
+			if (!fnC) {
+				errs() << "Matching function call to '" << normalFnName << "' doesn't exist!\n";
+				exit(-1);
+			}
+			else {
+				if (verboseFlag)
+					errs() << info_string << " Found wrapper match: '" << normalFnName << "'\n";
+			}
+
+			Function* normalFn = dyn_cast<Function>(fnC);
+			wrapperMap[&fn] = normalFn;
 		}
 	}
 
@@ -1188,7 +777,7 @@ void dataflowProtection::updateFnWrappers(Module& M) {
 		for (auto &bb: fn) {
 			for (auto &I : bb) {
 
-				//look for call instructions
+				// look for call instructions
 				if (CallInst* ci = dyn_cast<CallInst>(&I)) {
 
 					auto op0 = ci->getOperand(0);
@@ -1206,7 +795,14 @@ void dataflowProtection::updateFnWrappers(Module& M) {
 //							errs() << " -" << *ci << "\n";
 							// duplicate this call, but only if it's in the list of functions to clone
 							if (fnsToClone.find(&fn) != fnsToClone.end()) {
-								instsToCloneAnno.insert(ci);
+								// see if user has specified certain args to be cloned after call
+								auto foundArgClone = tempCloneAfterCallArgMap.find(calledF);
+								if (foundArgClone != tempCloneAfterCallArgMap.end()) {
+									// These ones shouldn't be replicated
+									cloneAfterCallArgMap[ci] = tempCloneAfterCallArgMap[calledF];
+								} else {
+									instsToCloneAnno.insert(ci);
+								}
 								wrapperInsts.insert(ci);
 							}
 						} else if (BitCastOperator* bco = dyn_cast<BitCastOperator>(v)) {
@@ -1232,3 +828,102 @@ void dataflowProtection::updateFnWrappers(Module& M) {
 		fn->eraseFromParent();
 	}
 }
+
+
+// returns a string of random characters of the requested size
+// used to name-mangle the DWC error handler block
+std::string dataflowProtection::getRandomString(std::size_t len) {
+	// init rand
+	std::srand(time(0));
+
+	const char chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
+	int charLen = sizeof(chars) - 1;
+	std::string result = "";
+
+	for (size_t i = 0; i < len; i+=1) {
+		result += chars[rand() % charLen];
+	}
+
+	return result;
+}
+
+
+// helper function for dumpModule - not yet implemented
+std::set<MDNode*> mdnSet;
+
+void createMDSlot(MDNode* N) {
+	// add to set
+	mdnSet.insert(N);
+
+	// call on all operands
+	for (unsigned i = 0, e = N->getNumOperands(); i != e; i++) {
+		if (MDNode* op = dyn_cast_or_null<MDNode>(N->getOperand(i))) {
+			createMDSlot(op);
+		}
+	}
+}
+
+// helper function for dumpModule - not yet implemented
+void getAllMDNFunc(Function& F) {
+	SmallVector< std::pair<unsigned, MDNode*>, 4 > MDForInst;
+
+	// iterate over basic blocks in function
+	for (auto BB = F.begin(), E = F.end(); BB != E; BB++) {
+		// iterate over instructions in basic block
+		for (auto I = BB->begin(), E2 = BB->end(); I != E2; I++) {
+
+			/*// first get metadata from intrinsic functions
+			if (CallInst* CI = dyn_cast<CallInst>(I)) {
+				if (Function* calledF = CI->getCalledFunction()) {
+					if (F.hasName() && F.getName().startswith("llvm.")) {
+						for (unsigned i = 0, e = I->getNumOperands(); i != e; i++) {
+							auto op = I->getOperand(i);
+							if (op) {
+								if (MDNode* N = dyn_cast<MDNode>(op)) {
+									createMDSlot(N);
+								}
+							}
+//							if (MDNode* N = dyn_cast_or_null<MDNode>(I->getOperand(i))) {
+//								createMDSlot(N);
+//							}
+						}
+					}
+				}
+			}
+*/
+			// then look at instructions normally
+			I->getAllMetadata(MDForInst);
+			for (unsigned i = 0, e = MDForInst.size(); i != e; i++) {
+				createMDSlot(MDForInst[i].second);
+			}
+			MDForInst.clear();
+		}
+	}
+}
+
+/*
+ * If -dumpModule is passed in, then print the entire module out
+ * This is helpful when the pass crashes on cleanup
+ * It is in a format that can be pasted into an *.ll file and run
+ */
+void dataflowProtection::dumpModule(Module& M) {
+	if (!dumpModuleFlag)
+		return;
+
+	for (GlobalVariable& g : M.getGlobalList()) {
+		errs() << g << "\n";
+	}
+	errs() << "\n";
+	for (auto &f : M) {
+		errs() << f << "\n";
+	}
+
+	// print all the debug metadata
+	for (auto md = M.named_metadata_begin(); md != M.named_metadata_end(); md++) {
+		md->print(errs());
+	}
+	for (auto n : mdnSet) {
+		errs() << *n << "\n";
+	}
+	errs() << "\n";
+}
diff --git a/projects/dataflowProtection/verification.cpp b/projects/dataflowProtection/verification.cpp
new file mode 100644
index 000000000..02038f39f
--- /dev/null
+++ b/projects/dataflowProtection/verification.cpp
@@ -0,0 +1,1122 @@
+/*
+ * verification.cpp
+ *
+ * This file contains the functions used by the dataflowProtection pass to verify
+ *  that the configuration options are consistent with each other.
+ */
+
+#include "dataflowProtection.h"
+
+// standard library includes
+#include <map>
+#include <set>
+#include <list>
+
+// LLVM includes
+#include <llvm/IR/Module.h>
+#include <llvm/IR/DIBuilder.h>
+#include <llvm/IR/Intrinsics.h>
+
+using namespace llvm;
+
+
+// all of the stores to globals that should become sync points
+std::set<StoreInst*> syncGlobalStores;
+// crossings that are marked to be skipped
+std::map<GlobalVariable*, std::set<Function*> > globalCrossMap;
+
+// shared variables
+extern std::list<std::string> skipLibCalls;
+extern cl::opt<bool> noMemReplicationFlag;
+extern cl::opt<bool> verboseFlag;
+
+
+// maps that describe different invalid use cases
+static GlobalFunctionSetMap unPtWritesToPtGlbls;		/* Unprotected writes to protected globals */
+static GlobalFunctionSetMap unPtReadsFromPtGlbls;		/* Unprotected reads from protected globals */
+static GlobalFunctionSetMap ptWritesToUnPtGlbls;		/* Protected writes to unprotected globals */
+
+static std::list< CallRecordType > ptCallsList;			/* Walk calls from protected functions that use unprotected globals */
+static std::list< CallRecordType > unPtCallsList;		/* Walk calls from unprotected functions that use protected globals */
+
+static GlobalFunctionSetMap ptCallsWithUnPtGlbls;		/* Protected function calls with unprotected globals as arguments */
+static GlobalFunctionSetMap unPtCallsWithPtGlbls;		/* Unprotected function calls with protected globals as arguments */
+
+static const std::set<Function*>* fnsToClone_ptr;
+
+static bool verifyDebug = false;
+
+/*
+ * Helper function that looks for stores that inherit from loads.
+ * Essentially verifying if a given memory reference is read-only or not.
+ * Returns nullptr if it is SURE it is read-only,
+ *  StoreInst that is in the use chain if not.
+ * Call instructions will be tracked later, but still return them.
+ *
+ * Edited to allow looking at Values instead of just Instructions.
+ * This lets us track CallInst Arguments.
+ */
+Instruction* hasStoreUsage(Value* i) {
+	if (!i) {
+		return nullptr;
+	} else if (i->getNumUses() == 0) {
+		return nullptr;
+	}
+	static std::set<PHINode*> seenPhiSet;
+
+	// walk the users
+	for (auto use : i->users()) {
+		// we only care about the instructions
+		if (auto instUse = dyn_cast<Instruction>(use)) {
+//			if (verifyDebug) errs() << *instUse << "\n";
+
+			// PHI nodes break the recursion, otherwise infinite loop
+			if (PHINode* phiUse = dyn_cast<PHINode>(instUse)) {
+				// if we haven't seen it yet, mark it as seen and fall through
+				if (seenPhiSet.find(phiUse) == seenPhiSet.end()) {
+					seenPhiSet.insert(phiUse);
+				} else {
+					// skip the one's we've seen already
+					continue;
+				}
+			}
+
+			// if we are loading a scalar value from this, then skip it
+			if (LoadInst* loadUse = dyn_cast<LoadInst>(instUse)) {
+				Type* loadType = loadUse->getType();
+				if (!loadType->isPtrOrPtrVectorTy()) {
+					continue;
+				}
+			}
+
+			// if the user is a compare instruction, we don't have to keep tracking it
+			//  because it fundamentally changes the type of data
+			if (CmpInst* cmpUse = dyn_cast<CmpInst>(instUse)) {
+				continue;
+			}
+
+			// if its a store, then we're done
+			if (StoreInst* storeUse = dyn_cast<StoreInst>(instUse)) {
+				return instUse;
+			} else if (CallInst* callUse = dyn_cast<CallInst>(instUse)) {
+				return instUse;
+			} else {
+				return hasStoreUsage(instUse);
+			}
+		}
+	}
+	// if there are no more users, then we've checked everything, return true
+	return nullptr;
+}
+
+/*
+ * Helper function that looks to see if a local pointer is used in stores or GEPs.
+ * 'ignoreThis' means it's the original store, so we don't want to detect it again.
+ * Returns nullptr if it is never used,
+ * 	Instruction that is the user otherwise
+ */
+Instruction* isDereferenced(Instruction* i, Instruction* ignoreThis) {
+	// walk the users
+	for (auto use: i->users()) {
+		if (auto instUse = dyn_cast<Instruction>(use)) {
+			if (instUse == ignoreThis) {
+				continue;
+			}
+
+			// look for stores or GEPs
+			if (StoreInst* storeUse = dyn_cast<StoreInst>(instUse)) {
+				return instUse;
+			} else if (GetElementPtrInst* gepUse = dyn_cast<GetElementPtrInst>(instUse)) {
+				return instUse;
+			} else if (CallInst* callUse = dyn_cast<CallInst>(instUse)) {
+				return instUse;
+			} else {
+				return isDereferenced(instUse, ignoreThis);
+			}
+		}
+	}
+	return nullptr;
+}
+
+
+/*
+ * Helper function to see if name matches certain pattern that we think
+ *  indicates the global is actually static inside a function.
+ *
+ * TODO: if the name of the variable is the original function which has been
+ *  inlined, then the conditions don't match.  Can we look at debug info?
+ */
+bool globalIsStaticToFunction(GlobalVariable* gv, Function* parentF, Instruction* spot) {
+	if (gv->getName().str().find(parentF->getName().str()) != std::string::npos) {
+		return true;
+	}
+	// is it possible to get the original function name if it has been inlined?
+	// DebugLoc dbgLoc = spot->getDebugLoc();
+
+	return false;
+}
+
+
+/*
+ * Helper function to check if it's in the cross map.
+ * Before this was only a map of GlobalVariable -> Function.  Which meant that
+ *  we could only mark one function per GV.  Now it's a set, so that's fixed.
+ */
+bool shouldSkipGlobalUsage(GlobalVariable* gv, Function* parentF) {
+	auto found_iter = globalCrossMap.find(gv);
+	// if the global is in the map
+	if (found_iter != globalCrossMap.end()) {
+		// returns a (key, value) pair
+		if ((*found_iter).second.find(parentF) != (*found_iter).second.end()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+
+/*
+ * Helper function to make it easier to put a new value in one of the maps in the below function
+ */
+void writeToGlobalMap(GlobalFunctionSetMap &globalMap, GlobalVariable* gv, Function* parentF, Instruction* spot) {
+
+	/* We want to skip "globals" that are actually just static
+	 * variables inside functions.  For example, '_sbrk.heap' is a variable
+	 * in the function '_sbrk' named 'heap', marked as 'static'.
+	 * This should be skipped.
+	 * We know this by:
+	 * 1) internal linkage
+	 * 2) unnamed_addr
+	 * 3) name is parentF.varName (might only be true for C)
+	 *
+	 * Workaround: even though it's not a global, treat it as such and
+	 *  add it to the correct list for the desired behavior.
+	 */
+	if ( (gv->hasInternalLinkage()) &&
+		 (gv->hasGlobalUnnamedAddr()) &&
+		 (gv->getName().str().find(".") != std::string::npos) &&
+		 (globalIsStaticToFunction(gv, parentF, spot)) )
+	{
+		// errs() << "Found static global(?)\n\t" << *gv << "\n";
+		// don't add to list
+		return;
+	}
+
+	// have we been asked to skip it?
+	if (shouldSkipGlobalUsage(gv, parentF)) {
+		return;
+	}
+
+	if (globalMap.find(gv) == globalMap.end()) {
+		FunctionDebugSet tempSet;
+		globalMap[gv] = tempSet;
+	}
+	// debug
+	// errs() << "Inserting '" << gv->getName() << "' from '"
+	// 	   << parentF->getName() <<"'\n";
+	globalMap[gv].insert(std::make_pair(parentF, spot));
+}
+
+/*
+ * Helper function to walk backwards the instruction uses to find the AllocaInst.
+ * If one cannot be found, return nullptr
+ */
+AllocaInst* findAllocaInst(Instruction* inst) {
+	for (int i = 0; i < inst->getNumOperands(); i++) {
+		Value* nextVal = inst->getOperand(i);
+		if (AllocaInst* ai = dyn_cast<AllocaInst>(nextVal)) {
+			return ai;
+		} else if (Instruction* nextInst = dyn_cast<Instruction>(nextVal)) {
+			return findAllocaInst(nextInst);
+		}
+	}
+	return nullptr;
+}
+
+/*
+ * Helper function to see if the call instructions calls a function that is marked
+ *  to not be called more than once (skipLibCalls)
+ */
+bool fnToBeSkipped(Function* f) {
+	if ((f != nullptr) && (f->hasName())) {
+		auto found = std::find(skipLibCalls.begin(), skipLibCalls.end(), f->getName());
+		if (found != skipLibCalls.end()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+
+bool fnToBeCloned(Function* f) {
+	if ((f != nullptr) && (f->hasName())) {
+		auto found = fnsToClone_ptr->find(f);
+		if (found != fnsToClone_ptr->end()) {
+			return true;
+		}
+	}
+	return false;
+}
+
+/*
+ * Helper function to find the next store (if any) from a load
+ *  that isn't storing to a local variable (comes from an AllocaInst).
+ * Return value may be nullptr.
+ */
+Instruction* getNextNonAllocaStore(StoreInst* storeUse) {
+	/*
+	 * When this function starts, we have the first store instruction that inherits
+	 *  from a load of a global.  We need to find out
+	 *   1) is this a store to a local variable
+	 *   2) when is the next store (if any) that's NOT to a local variable
+	 */
+	Instruction* storeSpot = dyn_cast<Instruction>(storeUse->getOperand(1));
+	if (!storeSpot) {
+//		errs() << "Couldn't get 2nd operand of " << *storeUse << "\n";
+		// probably an inline GEP
+		return storeUse;
+	}
+
+	/* If it isn't then this store is the first non-alloca. */
+	if (! isa<AllocaInst>(storeSpot)) {
+		return storeUse;
+	}
+
+	/* Init things */
+	Instruction* toIgnore = dyn_cast<Instruction>(storeUse);
+	Instruction* nextStore = nullptr;
+
+	/* Otherwise, we need to look through all the uses until we find one.
+	   But don't run forever, it has a limit. */
+	for (int i = 0; i < 10; i++) {
+		nextStore = isDereferenced(storeSpot, toIgnore);
+
+		/* This is one exit condition, in that we didn't find any more uses. */
+		if (nextStore == nullptr) {
+			return nullptr;
+		}
+
+		if (nextStore) {
+			if (isa<StoreInst>(nextStore)) {
+				storeSpot = dyn_cast<Instruction>(nextStore->getOperand(1));
+//				if (verifyDebug) errs() << " - nextStore     = " << *nextStore << "\n";
+//				if (verifyDebug) errs() << " - nextStoreSpot = " << *storeSpot << "\n";
+				if (! isa<AllocaInst>(storeSpot)) {
+					return storeSpot;
+				}
+			}
+			/* otherwise it's a GEP or CallInst, so definitely return it */
+			else {
+				return nextStore;
+			}
+		}
+		toIgnore = nextStore;
+	}
+	return storeUse;
+}
+
+/*
+ * Helper function that walks backwards to see if a stored value inherits from a single
+ *  call to an unprotected function (skipLibCalls).
+ * TODO: is there anything that could corrupt the value, even if it did inherit from here?
+ */
+bool dataflowProtection::comesFromSingleCall(Instruction* storeUse) {
+	// default is failed
+	bool returnVal = false;
+	static std::set<PHINode*> seenPhiSet;
+
+	for (int i = 0; i < storeUse->getNumOperands(); i++) {
+		Value* nextVal = storeUse->getOperand(i);
+		if (CallInst* ci = dyn_cast<CallInst>(nextVal)) {
+			Function* calledF = ci->getCalledFunction();
+			// need to handle intrinsic functions here
+			if (calledF->getIntrinsicID() != Intrinsic::ID::not_intrinsic) {
+				if (willBeCloned(ci)) {
+					returnVal = false;
+				} else {
+					returnVal = true;
+				}
+				break;
+			}
+
+			// check that the function being called will only be called once
+			if (calledF && fnToBeSkipped(calledF)) {
+				returnVal = true;
+				break;
+			} else if (calledF && !fnToBeCloned(calledF)) {
+				returnVal = true;
+				break;
+			} else {
+				break;
+			}
+		}
+		// the recursion is broken if we get into a PHI node loop
+		else if (PHINode* nextPhi = dyn_cast<PHINode>(nextVal)) {
+			// if we haven't seen it before, go ahead and follow
+			if (seenPhiSet.find(nextPhi) == seenPhiSet.end()) {
+				seenPhiSet.insert(nextPhi);
+				return comesFromSingleCall(nextPhi);
+			}
+			// otherwise, problems, need to stop now
+			else {
+				break;
+			}
+		}
+		// normal instructions can be followed
+		else if (Instruction* nextInst = dyn_cast<Instruction>(nextVal)) {
+//			if (verifyDebug) errs() << *nextInst << "\n";
+			return comesFromSingleCall(nextInst);
+		}
+	}
+
+	seenPhiSet.clear();
+	return returnVal;
+}
+
+/*
+ * Uses the first place a global is stored to determine which argument index
+ *  in the call instruction inherits from the global load.
+ * Although parameters are marked with 'unsigned int' type, use long for the
+ *  return type here, so we can use negative error codes and still represent
+ *  the entire range of integer values in 'unsigned int'.
+ */
+long getCallArgIndex(Instruction* instUse, CallInst* callUse) {
+	static std::set<PHINode*> seenPhiSet;
+
+	// because a StoreInst has no users (no return value), look at the users of the 2nd operand
+	if (isa<StoreInst>(instUse)) {
+		instUse = dyn_cast_or_null<Instruction>(instUse->getOperand(1));
+		if (!instUse) {
+			return -1;
+		}
+	}
+//	if (verifyDebug) errs() << "nextStore:" << *instUse << "\n";
+
+	/* What if the instUse is directly an operand of callUse? */
+	for (unsigned int idx = 0; idx < callUse->getNumOperands(); idx += 1) {
+		Value* nextOp = callUse->getOperand(idx);
+		if (nextOp == instUse) {
+			return idx;
+		}
+	}
+
+	// look at all the uses of the instruction
+	for (auto use: instUse->users()) {
+		// does it match any of the the arguments?
+		if (Instruction* nextUse = dyn_cast<Instruction>(use)) {
+//			if (verifyDebug) errs () << "      use:" << *nextUse << "\n";
+			for (unsigned int idx = 0; idx < callUse->getNumOperands(); idx += 1) {
+				Value* nextOp = callUse->getOperand(idx);
+//				if (verifyDebug && !isa<Function>(nextOp)) errs() << "   callop:" << *nextOp << "\n";
+				if (use == nextOp) {
+					return idx;
+				}
+			}
+		}
+	}
+
+	// if we made it all the way through, look at the operands users (recursive tail call)
+	for (auto op: instUse->users()) {
+		if (Instruction* instNext = dyn_cast<Instruction>(op)) {
+			// skip seen PHI nodes
+			if (PHINode* nextPhi = dyn_cast<PHINode>(instNext)) {
+				// if we haven't seen it before, go ahead and follow
+				if (seenPhiSet.find(nextPhi) == seenPhiSet.end()) {
+					// but mark as seen
+					seenPhiSet.insert(nextPhi);
+				} else {
+					// skip if we've seen it before
+					continue;
+				}
+			}
+
+//			if (verifyDebug) errs() << "    useop:" << *op << "\n";
+			long nextIdx = getCallArgIndex(instNext, callUse);
+			if (nextIdx >= 0)
+				return nextIdx;
+		}
+	}
+	// if we can't find anything, error code
+	return -1;
+}
+
+
+/*
+ * Walks the uses of a load instruction to see if it's read-only,
+ *  or if it's ever used in a store instruction.
+ * Loading a protected global inside an unprotected function.
+ *
+ * Updated to also look at function arguments.
+ */
+void walkUnPtLoads(LoadRecordType &record) {
+	Value* v = std::get<0>(record);
+	LoadInst* li = dyn_cast<LoadInst>(v);
+
+	GlobalVariable* gv = std::get<1>(record);
+	Function* parentF = std::get<2>(record);
+
+	// debug
+	if (parentF->getName() == "xTimerGenericCommand" && gv->getName() == "xCtrlTimer") {
+		// verifyDebug = true;
+	}
+
+	/* Have to walk the uses to see if it's ever used to store */
+	Instruction* instUse = hasStoreUsage(v);
+	StoreInst* storeUse = dyn_cast_or_null<StoreInst>(instUse);
+	CallInst* callUse = dyn_cast_or_null<CallInst>(instUse);
+
+	if (storeUse) {
+		// get the address/register where the global is being put
+		// TODO: make sure this is best way to tell if it's locally stored
+		Instruction* storeSpot = getNextNonAllocaStore(storeUse);
+		if (verifyDebug) errs() << "init inst:  " << *v << "\n";
+		if (verifyDebug) errs() << "store use:  " << *storeUse << '\n';
+//		if (verifyDebug && storeSpot) errs() << "store spot: " << *storeSpot << '\n';
+
+		if (CallInst* callUse2 = dyn_cast_or_null<CallInst>(storeSpot)) {
+			Type* storeType = storeUse->getOperand(0)->getType();
+
+			if (storeType->isPtrOrPtrVectorTy()) {
+
+				long argIdx = getCallArgIndex(storeUse, callUse2);
+				CallRecordType newRecord = std::make_tuple(callUse2, gv, parentF, argIdx);
+				unPtCallsList.push_back(newRecord);
+			} else {
+				;   // sync global stores?
+			}
+		}
+
+		/* Otherwise, we know this is being used to write to */
+		else if (storeSpot) {
+			/* if the load type is a scalar value, it's fine */
+
+			Type* loadType = v->getType();
+			if (loadType->isPtrOrPtrVectorTy()) {
+				if (li)
+					writeToGlobalMap(unPtReadsFromPtGlbls, gv, parentF, li);
+				else
+					writeToGlobalMap(unPtReadsFromPtGlbls, gv, parentF, storeSpot);
+			} else {
+				;	// it's fine, can't be dereferenced
+			}
+		}
+	}
+
+	else if (callUse) {
+		if (li) {
+//			errs() << *li << "\n";
+			long argIdx = getCallArgIndex(li, callUse);
+			CallRecordType newRecord = std::make_tuple(callUse, gv, parentF, argIdx);
+			unPtCallsList.push_back(newRecord);
+		} else {
+			writeToGlobalMap(unPtCallsWithPtGlbls, gv, parentF, callUse);
+		}
+	}
+
+	verifyDebug = false;
+}
+
+
+/*
+ * Same as above, but for loading unprotected globals by protected functions.
+ */
+// #define DBG_WALK_PT_LOADS
+void walkPtLoads(LoadRecordType &record) {
+	Value* v = std::get<0>(record);
+	LoadInst* li = dyn_cast<LoadInst>(v);
+
+	GlobalVariable* gv = std::get<1>(record);
+	Function* parentF = std::get<2>(record);
+
+	/* If we're loading a scalar, it's fine */
+	Type* loadType = v->getType();
+	if (!loadType->isPtrOrPtrVectorTy()) {
+		return;
+	}
+
+	#ifdef DBG_WALK_PT_LOADS
+	// debug
+	if (parentF->getName() == "protectedPtrArrayWrite" && gv->getName() == "globalPtr") {
+		verifyDebug = true;
+	}
+	#endif
+
+	Instruction* instUse = hasStoreUsage(v);
+	StoreInst* storeUse = dyn_cast_or_null<StoreInst>(instUse);
+	CallInst* callUse = dyn_cast_or_null<CallInst>(instUse);
+
+	if (storeUse) {
+		Instruction* storeSpot = getNextNonAllocaStore(storeUse);
+		#ifdef DBG_WALK_PT_LOADS
+		// breaks for Argument* 's
+		// if (verifyDebug) errs() << "loadUse:  " << *li << "\n";
+		if (verifyDebug) errs() << "storeUse: " << *storeUse << "\n";
+		if (verifyDebug && storeSpot) errs() << "storeSpot:" << *storeSpot << "\n\n";
+		#endif
+
+		if (CallInst* callUse2 = dyn_cast_or_null<CallInst>(storeSpot)) {
+			Function* calledF = callUse2->getCalledFunction();
+
+			/* Skip it if it's to a function which is only called once (skipLibCalls) */
+			if (fnToBeSkipped(calledF)) {
+				return;
+			}
+
+			/* Check the type of the argument being used */
+			Type* argType = storeUse->getOperand(0)->getType();
+
+			if (argType->isPtrOrPtrVectorTy()) {
+				long argIdx = getCallArgIndex(storeUse, callUse2);
+				CallRecordType newRecord = std::make_tuple(callUse2, gv, parentF, argIdx);
+				ptCallsList.push_back(newRecord);
+			} else {
+				;   // sync global stores?
+			}
+		}
+
+		else if (storeSpot) {
+			Type* storeType = storeUse->getOperand(0)->getType();
+
+			if (storeType->isPtrOrPtrVectorTy()) {
+				if (li) {
+					writeToGlobalMap(ptWritesToUnPtGlbls, gv, parentF, li);
+				} else {
+					writeToGlobalMap(ptWritesToUnPtGlbls, gv, parentF, storeUse);
+				}
+			} else {
+				syncGlobalStores.insert(storeUse);
+				storeUse->getDebugLoc();
+			}
+		}
+	}
+
+	else if (callUse) {
+		Function* calledF = callUse->getCalledFunction();
+
+		/* Skip it if it's to a function which is only called once (skipLibCalls) */
+		if (fnToBeSkipped(calledF)) {
+			return;
+		}
+
+		/* Add this to the list to report */
+//		if (verifyDebug)
+//			errs() << "call use: " << *callUse << "\n";
+//		if (verifyDebug && li)
+//			errs() << "load use: " << *li << "\n";
+		if (li) {
+			long argIdx = getCallArgIndex(li, callUse);
+			CallRecordType newRecord = std::make_tuple(callUse, gv, parentF, argIdx);
+			ptCallsList.push_back(newRecord);
+		} else {
+			writeToGlobalMap(ptCallsWithUnPtGlbls, gv, parentF, callUse);
+		}
+	}
+
+	#ifdef DBG_WALK_PT_LOADS
+	verifyDebug = false;
+	#endif
+}
+
+
+/*
+ * Storing to unprotected globals from protected functions.
+ */
+// #define DBG_WALK_UNPT_STORES
+void dataflowProtection::walkUnPtStores(StoreRecordType &record) {
+	StoreInst* si = std::get<0>(record);
+	GlobalVariable* gv = std::get<1>(record);
+	Function* parentF = std::get<2>(record);
+	Type* storeType = si->getOperand(0)->getType();
+
+	// have we been asked to skip it?
+	if (shouldSkipGlobalUsage(gv, parentF)) {
+		return;
+	}
+
+	#ifdef DBG_WALK_UNPT_STORES
+	// debug
+	if (parentF->getName() == "scalarMultiply" && gv->getName() == "matrix0") {
+		verifyDebug = true;
+		errs() << *si << "\n";
+	}
+	#endif
+
+	/* There are some functions which are only called once (skipLibCalls).
+	 * Writing from these return values is not an issue. */
+	if (comesFromSingleCall(si)) {
+		#ifdef DBG_WALK_UNPT_STORES
+		if (verifyDebug) errs() << "but comes from single call\n";
+		#endif
+		return;
+	}
+
+	if (storeType->isPtrOrPtrVectorTy()) {
+		/* This is actually OK if the thing being pointed to is const
+		 * but LLVM doesn't have a nice way of checking this.
+		 * Walk the uses and see if the pointer it's stored in is dereferenced */
+//		if (verifyDebug) errs() << "storeRecord: " << *si << "\n";
+
+		AllocaInst* ai = findAllocaInst(si);
+		if (ai) {
+			Instruction* laterUse = isDereferenced(ai, si);
+
+			if (laterUse) {
+				writeToGlobalMap(ptWritesToUnPtGlbls, gv, parentF, si);
+			} else {
+				;    // it is read-only
+			}
+		} else {
+			writeToGlobalMap(ptWritesToUnPtGlbls, gv, parentF, si);
+		}
+
+	}
+
+	/* We can vote on these values before storing */
+	else {
+		syncGlobalStores.insert(si);
+	}
+
+	#ifdef DBG_WALK_UNPT_STORES
+	verifyDebug = false;
+	#endif
+}
+
+
+/*
+ * Verify that all of the options used to configure COAST for this pass are safe to follow.
+ *
+ * Here are the rules:
+ *
+ * +---------+-------------------------+-------------------------------------+
+ * |  ====================  Protected -> Not Protected  ===================  |
+ * +---------+-------------------------+-------------------------------------+
+ * |         | Reading                 | Writing                             |
+ * +---------+-------------------------+-------------------------------------+
+ * | Value   | OK                      | OK                                  |
+ * |         |                         | Vote first to preserve protection   |
+ * +---------+-------------------------+-------------------------------------+
+ * |         |                         | A pointer can only be stored        |
+ * | Pointer | Only if it's never used | if the value it points to is const. |
+ * |         | to write (const)        | No voting is allowed                |
+ * |         |                         | and non-consts are not allowed.     |
+ * +---------+-------------------------+-------------------------------------+
+ * |  ====================  Not Protected -> Protected  ===================  |
+ * +---------+-------------------------+-------------------------------------+
+ * |         | Reading                 | Writing                             |
+ * +---------+-------------------------+-------------------------------------+
+ * | Value   | OK                      | Not OK                              |
+ * +---------+-------------------------+-------------------------------------+
+ * | Pointer | OK (?)                  | Not OK                              |
+ * +---------+-------------------------+-------------------------------------+
+ *
+ * Since the LLVM IR does not contain information about a variable's
+ *  const-ness, our pass looks for these usages itself.
+ * The load of any protected pointer must be followed to the end of the
+ *  use chain to make sure no attempts are made to write to this address.
+ *
+ * TODO: track pointers across function calls
+ */
+void dataflowProtection::verifyOptions(Module& M) {
+	fnsToClone_ptr = &fnsToClone;
+
+    // catalog all the loads across the replication boundary
+    std::list< LoadRecordType > unPtLoadRecords;
+    std::list< LoadRecordType > ptLoadRecords;
+    std::list< StoreRecordType > unPtStoreRecords;
+
+    // look through the protected global variables
+    for (auto g : globalsToClone) {
+		// get all the users
+		for (auto u : g->users()) {
+			// is it an instruction?
+			if (Instruction* UI = dyn_cast<Instruction>(u)) {
+				Function* parentF = UI->getParent()->getParent();
+
+                // is the instruction in a protected function?
+				if (fnsToClone.find(parentF) == fnsToClone.end()) {
+
+                    /* Any stores in here are not allowed (non-protected function to protected global) */
+					if (StoreInst* si = dyn_cast<StoreInst>(UI)) {
+						// add it to the list of infractions
+						writeToGlobalMap(unPtWritesToPtGlbls, g, parentF, si);
+					}
+                    
+                    /* note any load instructions to track later */
+                    else if (LoadInst* li = dyn_cast<LoadInst>(UI)) {
+                        LoadRecordType newRecord = std::make_tuple(li, g, parentF);
+                        unPtLoadRecords.push_back(newRecord);
+                    }
+                }
+
+            }
+			/* end instruction use */
+            
+            /* GEPs are often inline */
+			else if (ConstantExpr* CE = dyn_cast<ConstantExpr>(u)) {
+                if (CE->isGEPWithNoNotionalOverIndexing()) {
+                    for (auto cu : CE->users()) {
+						if (LoadInst* li = dyn_cast<LoadInst>(cu)) {
+                            Function* parentF = li->getParent()->getParent();
+
+                            // is the instruction in a protected function?
+                            if (fnsToClone.find(parentF) == fnsToClone.end()) {
+                            	LoadRecordType newRecord = std::make_tuple(li, g, parentF);
+                            	unPtLoadRecords.push_back(newRecord);
+                            }
+                        }
+                    }
+                }
+            }
+			/* end GEP use */
+        }
+    }
+
+    // Make sure that all unprotected globals are not used in protected functions
+	auto moduleEnd = M.global_end();
+	for (auto g = M.global_begin(); g != moduleEnd; g++) {
+		GlobalVariable* gv = &(*g);
+		if (willBeCloned(gv)) {
+			continue;
+		}
+
+		/* Skip any globals that are constant, because we can't change them anyway. */
+		else if (gv->isConstant()) {
+			continue;
+		}
+
+        /* Now it's either in globalsToSkip, or not marked at all.
+         * Either way, shouldn't be used in a protected function. */
+        else {
+			for (auto u : gv->users()) {
+				// is it an instruction?
+				if (Instruction* UI = dyn_cast<Instruction>(u)) {
+					Function* parentF = UI->getParent()->getParent();
+
+					// debug
+					if (gv->getName() == "vTaskSwitchContext" && parentF->getName() == "pxCurrentTCB") {
+						// verifyDebug = true;
+					}
+
+                    // is the instruction in a protected function?
+					if (fnsToClone.find(parentF) != fnsToClone.end()) {
+                        /* Stores to unprotected globals from protected functions are not allowed */
+						if (fnsToSkip.find(parentF) != fnsToSkip.end()) {
+							continue;
+						}
+						if (StoreInst* si = dyn_cast<StoreInst>(UI)) {
+                            StoreRecordType newRecord = std::make_tuple(si, gv, parentF);
+                            unPtStoreRecords.push_back(newRecord);
+//                            if (verifyDebug) errs() << *si << '\n';
+                        }
+
+                        /* We want to walk the load uses here too to find any later stores */
+						else if (LoadInst* li = dyn_cast<LoadInst>(UI)) {
+                            LoadRecordType newRecord = std::make_tuple(li, gv, parentF);
+                            ptLoadRecords.push_back(newRecord);
+                        }
+                    }
+                }
+				/* end instruction use */
+
+                /* GEPs are often inline */
+				else if (ConstantExpr* CE = dyn_cast<ConstantExpr>(u)) {
+					if (CE->isGEPWithNoNotionalOverIndexing()) {
+
+                        for (auto cu : CE->users()) {
+							/* GEPs used by stores are what we're looking for */
+							if (StoreInst* si = dyn_cast<StoreInst>(cu)) {
+                                Function* parentF = si->getParent()->getParent();
+
+                                // is the instruction in a protected function?
+                                if (fnsToClone.find(parentF) != fnsToClone.end()) {
+                                	StoreRecordType newRecord = std::make_tuple(si, gv, parentF);
+                                	unPtStoreRecords.push_back(newRecord);
+                                }
+                            }
+							else if (ConstantExpr* CE2 = dyn_cast<ConstantExpr>(cu)) {
+								/* more casts hiding inside of things
+								 * possible ugly things we have to deal with:
+								 * store <4 x i32> %1, <4 x i32>* bitcast (i32* getelementptr inbounds ([2 x [8 x i32]], [2 x [8 x i32]]* @matrix0, i64 0, i64 0, i64 4) to <4 x i32>*)
+								 */
+								if (CE2->isCast()) {
+									if (noMemReplicationFlag)
+										continue;
+									for (auto user : CE2->users()) {
+										if (StoreInst* si = dyn_cast<StoreInst>(user)) {
+											Function* parentF = si->getParent()->getParent();
+
+											// is the instruction in a protected function?
+											if (fnsToClone.find(parentF) != fnsToClone.end()) {
+												StoreRecordType newRecord = std::make_tuple(si, gv, parentF);
+												unPtStoreRecords.push_back(newRecord);
+											}
+										}
+									}
+								}
+							}
+                        }
+                    }
+					/* end GEP use */
+					else if (CE->isCast()) {
+						/* casts hiding inside things -
+						 * see cloneConstantExprOperands in cloning.cpp */
+						if (noMemReplicationFlag)
+							continue;
+
+						// have to see if any of it's users are instructions
+						for (auto user : CE->users()) {
+							// specifically Store instructions
+							if (StoreInst* si = dyn_cast<StoreInst>(user)) {
+								Function* parentF = si->getParent()->getParent();
+
+								// is the instruction in a protected function?
+								if (fnsToClone.find(parentF) != fnsToClone.end()) {
+									StoreRecordType newRecord = std::make_tuple(si, gv, parentF);
+									unPtStoreRecords.push_back(newRecord);
+								}
+							}
+						}
+					}
+					/* end other ConstantExpr use */
+                }
+
+				else {
+					PRINT_STRING("-- unidentified global user:");
+					PRINT_VALUE(u);
+				}
+				verifyDebug = false;
+            }
+        }
+    }
+
+	/* Repeat these as long as the sizes keep increasing */
+	while (true) {
+		/* Each unprotected load should be traced to make sure it's read-only. */
+		for (auto record : unPtLoadRecords) {
+			walkUnPtLoads(record);
+		}
+		unPtLoadRecords.clear();
+
+		for (auto record : ptLoadRecords) {
+			walkPtLoads(record);
+		}
+		ptLoadRecords.clear();
+		/* Clear afterwards for later checking. */
+
+		/* Now we have to walk all of the calls */
+		// protected functions using unprotected globals
+//		if (ptCallsList.size() > 0)
+//			errs() << "\nprotected functions using unprotected globals in calls:\n";
+		for (auto record : ptCallsList) {
+			CallInst* ci = std::get<0>(record);
+			GlobalVariable* gv = std::get<1>(record);
+			Function* parentF = std::get<2>(record);
+			long argIdx = std::get<3>(record);
+
+			/* Being used by protected or unprotected function is fine, as long as it's read-only.
+			 * Track each argument use as if it was a load instruction of the global. */
+
+			Function* calledFunction = ci->getCalledFunction();
+			if (!calledFunction) {
+				/* If this is null, then this is an indirect function call, which we can't track.
+				 * Conservatively add this to the list of things that are not allowed. */
+				writeToGlobalMap(unPtCallsWithPtGlbls, gv, parentF, ci);
+				continue;
+			}
+
+			/* If the function being called is not protected, this is fine.
+			 * Unless the callee then calls a protected function with the argument
+			 *  and it's not read-only? */
+			if (fnsToClone.find(calledFunction) == fnsToClone.end()) {
+				continue;	// TODO: above comment
+			}
+
+			/* If it's a call to a replFnCalls function, then that's not allowed */
+			if (isCoarseGrainedFunction(calledFunction->getName())) {
+				writeToGlobalMap(ptCallsWithUnPtGlbls, gv, parentF, ci);
+				continue;
+			}
+
+			if (argIdx < 0) {
+				/* Then we couldn't find the relationship. Conservatively add to the list
+				 * of things that are not allowed. */
+				errs() << info_string << " Couldn't find argument index for call:\n" << *ci << "\n";
+				errs() << "  (using unprotected global '" << gv->getName() << "' in basic block '"
+					   << ci->getParent()->getName() << "' of function '" << parentF->getName() << "')\n";
+				writeToGlobalMap(ptCallsWithUnPtGlbls, gv, parentF, ci);
+				continue;
+			}
+
+			auto argIter = calledFunction->arg_begin() + argIdx;
+			LoadRecordType newRecord = std::make_tuple(&(*argIter), gv, calledFunction);
+			ptLoadRecords.push_back(newRecord);
+		}
+		ptCallsList.clear();
+
+//		if (unPtCallsList.size() > 0)
+//			errs() << "\nunprotected functions using protected globals in calls:\n";
+		// unprotected functions using protected globals
+		for (auto record : unPtCallsList) {
+			CallInst* ci = std::get<0>(record);
+			GlobalVariable* gv = std::get<1>(record);
+			Function* parentF = std::get<2>(record);
+			long argIdx = std::get<3>(record);
+
+			if (argIdx < 0) {
+				/* Then we couldn't find the relationship. Conservatively add to the list
+				 * of things that are not allowed. */
+				errs() << info_string << " Couldn't find argument index for call:\n" << *ci << "\n";
+				errs() << "  (using protected global '" << gv->getName() << "' in basic block '"
+					   << ci->getParent()->getName() << "' of function '" << parentF->getName() << "')\n";
+				writeToGlobalMap(unPtCallsWithPtGlbls, gv, parentF, ci);
+				continue;
+			}
+//			errs() << "CallInst: " << *ci << "\n";
+
+			/* Being used by protected or unprotected function is fine, as long as it's read-only.
+			 * Track each argument use as if it was a load instruction of the global. */
+
+			Function* calledFunction = ci->getCalledFunction();
+			if (!calledFunction) {
+				/* If this is null, then this is an indirect function call, which we can't track.
+				 * Conservatively add this to the list of things that are not allowed. */
+				writeToGlobalMap(unPtCallsWithPtGlbls, gv, parentF, ci);
+				continue;
+			} else if (calledFunction->isVarArg()) {
+				/* We also can't track functions that are variadic, because of
+				 * the way LLVM does function argument lists.
+				 */
+				writeToGlobalMap(unPtCallsWithPtGlbls, gv, parentF, ci);
+				continue;
+			}
+//			errs() << "Inside function '" << parentF->getName() << "'\n";
+
+			auto argIter = calledFunction->arg_begin() + argIdx;
+			// TODO: might want to use ->getArg() instead, but that function
+			//  isn't available until LLVM version 10
+
+// 			errs() << "operand number " << argIdx << " of function '" << calledFunction->getName() << "': ";
+//			errs() << *argIter; // << "\n Of type: " << *argIter->getType();
+//			errs() << "\nUses:\n";
+//			for (auto use : argIter->users()) {
+//				errs() << *use << "\n";
+//			}
+
+			if (argIdx >= calledFunction->arg_size()) {
+				errs() << err_string
+					   << " function doesn't have that many arguments! (0 indexed)\n"
+					   << "  " << calledFunction->getName()
+					   << " (" << argIdx << " >= "
+					   << calledFunction->arg_size() << ")\n";
+			}
+			LoadRecordType newRecord = std::make_tuple(&(*argIter), gv, calledFunction);
+			// put it in the right list
+			if (fnsToClone.find(calledFunction) == fnsToClone.end()) {
+				// not protected function
+				unPtLoadRecords.push_back(newRecord);
+			} else {
+				// protected function
+				ptLoadRecords.push_back(newRecord);
+			}
+		}
+//		errs() << "\n";
+		unPtCallsList.clear();
+
+		/* Leaving conditions */
+		if (unPtLoadRecords.size() == 0 && ptLoadRecords.size() == 0) {
+			break;
+		}
+	}
+
+	/* This is only done once, so outside the loop */
+    for (auto record : unPtStoreRecords) {
+    	walkUnPtStores(record);
+    }
+
+    /* Print scope crossing warning messages */
+	// referencing protected globals from unprotected functions
+	printGlobalScopeErrorMessage(unPtWritesToPtGlbls, true, "written in");
+	printGlobalScopeErrorMessage(unPtReadsFromPtGlbls, true, "read in");
+	if (unPtReadsFromPtGlbls.size() > 0) {
+		errs() << " -- Please verify that these kinds of reads are read-only --\n";
+	}
+
+	// referencing unprotected globals from protected functions
+	printGlobalScopeErrorMessage(ptWritesToUnPtGlbls, false, "read from and written to inside");
+
+	// using globals across scope boundaries in function calls
+	printGlobalScopeErrorMessage(ptCallsWithUnPtGlbls, false, "used in a function call in");
+	printGlobalScopeErrorMessage(unPtCallsWithPtGlbls, true, "used in a function call in");
+	if ( (ptCallsWithUnPtGlbls.size() > 0) || (unPtCallsWithPtGlbls.size() > 0) ) {
+		errs() << " -- COAST currently does not support tracking global pointer crossings across function calls --\n";
+	}
+
+	// kill the compilation if we saw any of these errors
+	if ( 	(unPtWritesToPtGlbls.size()  > 0)  ||
+			(unPtReadsFromPtGlbls.size() > 0)  ||
+			(ptWritesToUnPtGlbls.size()  > 0)  ||
+			(ptCallsWithUnPtGlbls.size() > 0)  ||
+			(unPtCallsWithPtGlbls.size() > 0)  )
+	{
+		errs() << "\nExiting...\n";
+		// good place for debug
+		dumpModule(M);
+		std::exit(-1);
+	}
+
+	// print some more stats
+	if (verboseFlag && syncGlobalStores.size() > 0) {
+		errs() << info_string << " syncing before store\n";
+		for (auto si : syncGlobalStores) {
+			errs() << *si << "\n  in function '"
+				   << si->getParent()->getParent()->getName() << "'\n";
+		}
+	}
+
+	return;
+}
+
+
+void dataflowProtection::printGlobalScopeErrorMessage(GlobalFunctionSetMap &globalMap,
+		bool globalPt, std::string directionMessage)
+{
+	// short circuit
+	if (globalMap.size() == 0) {
+		return;
+	}
+
+	// printing specific to this set map
+	std::string firstMessage;
+	std::string secondMessage;
+	if (globalPt) {
+		firstMessage = err_string + " protected global \"";
+		secondMessage = "\" is being " + directionMessage + " unprotected functions:\n";
+	} else {
+		firstMessage = err_string + " unprotected global \"";
+		secondMessage = "\" is being " + directionMessage + " protected functions:\n";
+	}
+
+	// look at all the items
+	for (auto item : globalMap) {
+		errs() << firstMessage << item.first->getName() << secondMessage;
+		for (auto fnSet : item.second) {
+			Function* f = fnSet.first;
+			Instruction* i = fnSet.second;
+			auto dbgInfo = i->getDebugLoc();
+			errs() << "\t\"" << f->getName() << "\"";
+			if (CallInst* callUse = dyn_cast<CallInst>(i)) {
+				Function* calledF = callUse->getCalledFunction();
+				if (calledF && calledF->hasName()) {
+					errs() << " in call to \"" << calledF->getName() << "\"";
+				}
+			}
+			errs() << " at ";
+			if (dbgInfo) {
+				dbgInfo.print(errs());
+				errs() << ",\n";
+			} else {
+				errs() << "  " << *i << ",\n";
+			}
+		}
+	}
+}
diff --git a/tests/COAST.h b/tests/COAST.h
index 448f51969..90c0e7c8c 100644
--- a/tests/COAST.h
+++ b/tests/COAST.h
@@ -1,33 +1,56 @@
-#ifndef __TROPIC_MACROS__
-#define __TROPIC_MACROS__
+#ifndef __COAST_MACROS__
+#define __COAST_MACROS__
 
-//This file contains the macros for the TROPIC pass.
-//The annotations need to match those in dataflowProtection.h
+/*
+ * This file contains the macros for the COAST pass.
+ * The annotations need to match those in dataflowProtection.h
+ * See documentation for how to use these properly.
+ */
 
-//Macros for variables, functions
+// Macros for variables, functions
 #define __NO_xMR __attribute__((annotate("no_xMR")))
 #define __xMR __attribute__((annotate("xMR")))
 
-//Macro for function calls - same as replicateFnCalls
+// Macro for function calls - same as replicateFnCalls
 #define __xMR_FN_CALL __attribute__((annotate("xMR_call")))
-//same as skipLibCalls
+// same as skipLibCalls
 #define __SKIP_FN_CALL __attribute__((annotate("coast_call_once")))
 
-//Macros to set the default behavior of the code
+// Macros to set the default behavior of the code
 #define __DEFAULT_xMR int __attribute__((annotate("set_xMR_default"))) __xMR_DEFAULT_BEHAVIOR__;
 #define __DEFAULT_NO_xMR int __attribute__((annotate("set_no_xMR_default"))) __xMR_DEFAULT_BEHAVIOR__;
 
-//The variable should not be optimized away
-#define __COAST_VOLATILE __attribute__((annotate("coast_volatile")))
+// The variable should not be optimized away
+// Formerly a separate annotation, now use GCC "used" annotation
+#define __COAST_VOLATILE __attribute__((used))
 
-//register a function as one which wraps malloc()
-#define MALLOC_WRAPPER_REGISTER(fname) void* fname##_COAST_WRAPPER(size_t size);
+// This function is an Interrupt Service Routine (ISR)
+#define __ISR_FUNC __attribute__((annotate("isr_function")))
+
+// Replicate the return values of this function
+#define __xMR_RET_VAL __attribute((annotate("repl_return_val")))
+
+// This function will be a protected library function (don't change signature)
+#define __xMR_PROT_LIB __attribute((annotate("protected_lib")))
+
+// Clone function arguments *after* the call (ie. for scanf)
+// There is a version which clones all of the args for every function call
+#define __xMR_ALL_AFTER_CALL __attribute((annotate("clone-after-call-")))
+// And another version which can specificy argument numbers for each call
+// Specifiy the arg numbers as (name, 1_2_3)
+// Linters might not like the underscores, but it's needed for valid function names
+// They must also be registered, similar to below, to make it through the compiler
+#define __xMR_AFTER_CALL(fname, x) fname##_CLONE_AFTER_CALL_##x
+
+// Register a function as one which wraps malloc()
+#define MALLOC_WRAPPER_REGISTER(fname) void* fname##_COAST_WRAPPER(size_t size)
 #define MALLOC_WRAPPER_CALL(fname, x) fname##_COAST_WRAPPER((x))
 
-// also one which wraps printf, or something like it
-#define PRINTF_WRAPPER_REGISTER(fname) int fname##_COAST_WRAPPER(const char* format, ...);
+// Also one which wraps printf, or something like it
+#define PRINTF_WRAPPER_REGISTER(fname) int fname##_COAST_WRAPPER(const char* format, ...)
 #define PRINTF_WRAPPER_CALL(fname, fmt, ...) fname##_COAST_WRAPPER(fmt, __VA_ARGS__)
 
+// A generic macro for any kind of wrapper you want to use
 #define GENERIC_COAST_WRAPPER(fname) fname##_COAST_WRAPPER
 
 // COAST normally checks that a replicated global is used only in
@@ -35,4 +58,12 @@
 //  a function, with the name of the global to ignore boundary crossing
 #define __COAST_IGNORE_GLOBAL(name) __attribute__((annotate("no-verify-"#name)))
 
+// This directive is used to tell COAST that the argument [num] should not be replicated.
+// If multiple arguments need to be marked this way, this directive should be placed
+//  on the function multiple times.
+#define __NO_xMR_ARG(num) __attribute__((annotate("no_xMR_arg-"#num)))
+
+// convenience for no-inlining functions
+#define __COAST_NO_INLINE __attribute__((noinline))
+
 #endif
diff --git a/tests/TMRregression/Makefile b/tests/TMRregression/Makefile
index bda6a71a7..92364e58c 100644
--- a/tests/TMRregression/Makefile
+++ b/tests/TMRregression/Makefile
@@ -24,23 +24,22 @@ CPPFLAGS := -stdlib=libc++ -I/usr/include/c++/5.4.0/ -I/usr/include/x86_64-linux
 # CPPSTD - C++ standard to use when compiling files, default is c++11
 ###########################################################
 
-LLVMROOT = $(HOME)/coast
-CLANG = $(LLVMROOT)/build/bin/clang
-CLANG++ = $(LLVMROOT)/build/bin/clang++
-LLVMDIS = $(LLVMROOT)/build/bin/llvm-dis
-LLVMOPT = $(LLVMROOT)/build/bin/opt
-LLVMLLC	= $(LLVMROOT)/build/bin/llc
-LLVMLINK= $(LLVMROOT)/build/bin/llvm-link
+CLANG = clang-7
+CLANG++ = clang++-7
+LLVMDIS = llvm-dis-7
+LLVMOPT = opt-7
+LLVMLLC	= llc-7
+LLVMLINK= llvm-link-7
 
-export COAST_ROOT = $(LLVMROOT)
+export COAST_ROOT = $(dir $(abspath $(lastword $(MAKEFILE_LIST))))/../../
 
 CFLAGS = -I$(SRCFOLDER)
 
 ## Recursive wildcard
 rwildcard=$(wildcard $1$2)$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2))
-PROJECT_BUILD_DIR = $(LLVMROOT)/projects/build/
+PROJECT_BUILD_DIR = $(COAST_ROOT)/projects/build/
 PROJECT_LIBS := $(call rwildcard,$(PROJECT_BUILD_DIR),*.so)
-OPT_LIBS_LOAD = -load $(LLVMROOT)/projects/build/errorBlocks/ErrorBlocks.so -load $(LLVMROOT)/projects/build/dataflowProtection/DataflowProtection.so $(foreach lib, $(PROJECT_LIBS) , -load $(lib))
+OPT_LIBS_LOAD = -load $(COAST_ROOT)/projects/build/errorBlocks/ErrorBlocks.so -load $(COAST_ROOT)/projects/build/dataflowProtection/DataflowProtection.so $(foreach lib, $(PROJECT_LIBS) , -load $(lib))
 
 CFILES = $(filter %.c,$(SRCFILES))
 CPPFILES = $(filter %.cpp,$(SRCFILES))
@@ -53,6 +52,15 @@ ifndef $(BCPPFILES)
 BCPPFILES = $(CPPFILES:.cpp=.bcpp)
 endif
 
+# ANSI escape color codes
+COLOR_BLUE			:= '\033[0;34m'
+COLOR_MAGENTA		:= '\033[0;35m'
+COLOR_YELLOW		:= '\033[0;33m'
+COLOR_GREEN			:= '\033[0;32m'
+COLOR_BLACK			:= '\033[0;30m'
+NO_COLOR			:= '\033[0m'
+HIGHLIGHT			:= '\033[0;7m'
+
 .PHONY: debug
 
 run: $(TARGET)
@@ -74,7 +82,7 @@ $(TARGET): $(TARGET).s
 ifeq ($(CPPFILES),)
 	$(CLANG) $(TARGET).s $(XLFLAGS) -o $@
 else
-	$(CLANG++) $(TARGET).s $(XLFLAGS) -o $@
+	$(CLANG++) -stdlib=libc++ $(TARGET).s $(XLFLAGS) -o $@
 endif
 
 # rule for .s files
@@ -83,12 +91,12 @@ $(TARGET).s: $(TARGET).opt.bc
 
 # rule for opt.bc files
 $(TARGET).opt.bc: $(TARGET).clang.bc
-	$(LLVMDIS) -f $<
+	@$(LLVMDIS) -f $<
 	$(LLVMOPT) $(OPT_FLAGS) $(OPT_LIBS_LOAD) $(OPT_PASSES) -o $@ $<
-	$(LLVMDIS) -f $@
+	@$(LLVMDIS) -f $@
 
 $(TARGET).clang.bc: $(BCFILES) $(BCPPFILES)
-	$(LLVMLINK) $(BCFILES) $(BCPPFILES) -o $(TARGET).clang.bc
+	@$(LLVMLINK) $(BCFILES) $(BCPPFILES) -o $(TARGET).clang.bc
 
 # rule for .bc files
 %.bc: $(SRCFOLDER)/%.c
diff --git a/tests/TMRregression/unitTestDriver.py b/tests/TMRregression/unitTestDriver.py
index 2e731236b..69bbef957 100755
--- a/tests/TMRregression/unitTestDriver.py
+++ b/tests/TMRregression/unitTestDriver.py
@@ -7,82 +7,338 @@
 #  before your list of arguments
 
 import os
+import re
 import sys
+import time
+import shlex
+import signal
+import pathlib
 import argparse
 import subprocess
-import shlex
 
+
+# globals
 singleFlag = False
 verboseFlag = False
+this_dir = pathlib.Path(__file__).resolve().parents[0]
+coast_root = this_dir.parents[1]
+makefile_path = coast_root / "unittest" / "makefile.customFile"
+code_path = this_dir / "unitTests"
+user_incs = code_path.parents[1]
+qemuWaitTime = 2
+
+# discover GCC version
+gcc_proc = subprocess.Popen(['gcc', '--version'], stdout=subprocess.PIPE)
+gcc_output = gcc_proc.communicate()[0].decode()
+gcc_version_re = re.compile(r"gcc \((.*)\) (\d\.\d\.\d)")
+gcc_match = gcc_version_re.search(gcc_output)
+gcc_version_num = gcc_match.group(2)
+
+# long regexes
+ptrArithRegex = re.compile(r"""\
+ 1\s+2\s+0\s+4\s+0\s+-1\s+7\s+5\s+
+ 0xFA, 0x55
+ 2\s+2\s+7\s+4\s+5\s+6\s+7\s+8\s+
+ 3
+ 1 @ 0x[A-Fa-f0-9]+
+ 2 @ 0x[A-Fa-f0-9]+
+ 3 @ 0x[A-Fa-f0-9]+
+ 4 @ 0x[A-Fa-f0-9]+
+0x[A-Fa-f0-9]+, 0x[A-Fa-f0-9]+
+ 1 @ 0x[A-Fa-f0-9]+
+ 8 @ 0x[A-Fa-f0-9]+
+42 @ 0x[A-Fa-f0-9]+
+ 4 @ 0x[A-Fa-f0-9]+
+ 0x00AB
+ 4\s+7\s+16\s+32\s+
+ [579]
+Success!""", re.MULTILINE)
+# TODO: last integer
+timeCRegex = re.compile(r"""\
+Using time and ctime: [A-z]{3,5} [A-z]{3}\s+(0?[1-9]|[12]\d|3[01]) ([01]\d|2[0-3]):?([0-5]\d):?([0-5]\d) \d+
+Using localtime and asctime: [A-z]{3,5} [A-z]{3}\s+(0?[1-9]|[12]\d|3[01]) ([01]\d|2[0-3]):?([0-5]\d):?([0-5]\d) \d+
+Using difftime and mktime: \d+\.\d+ seconds since today started
+Using gmtime and strftime: GMT - ([01]\d|2[0-3]):?([0-5]\d)
+Using clock: \d+ clicks to run \(\d\.\d+ seconds\)
+""")
+whetstoneRegex = re.compile(r"""\
+Loops: [1-9][0-9]+, Iterations: [1-9][0-9]*, Duration: [0-9]+ sec.
+C Converted Double Precision Whetstones: [0-9]+\.[0-9]+ MIPS
+""")
+
 
 # class that represents a configuration
 class runConfig(object):
     """docstring for runConfig."""
-    def __init__(self, f, ef=None, xc=None, op=None, nm=None):
+    def __init__(self, f, ef=None, xc=None, op=None, nm=None, cf=False, hk=False, sn=False, xl=None, xlc=None, qtm=None, rgx=None, brd=None):
         self.fname = f
-        self.extraFiles = ef
-        self.xcFlg = xc
-        self.optFlg = op
-        self.noMemFlg = nm
+        self.extraFiles = ef    # other files to use in compilation
+        self.xcFlg = xc         # additional flags in clang compile step
+        self.optFlg = op        # additional flags in optimizer step
+        self.xlFlg = xl         # additional flags in link step
+        self.xlcFlg = xlc       # additional flags in assembler step
+        self.noMemFlg = nm      # flags to add to opt when "noMemReplication" is used
+        self.compileFail = cf   # supposed to fail in compile
+        self.hardKill = hk      # must be killed by ctrl-c
+        self.skipNormal = sn    # don't run without COAST
+        self.qemuTime = qtm     # how long to wait before terminating QEMU
+        self.outRegx = rgx      # regex for validating output printing
+        self.board = brd        # specify default test target
 
 # keep this up to date manually
 # dictionary of specific flags for each unitTest
 customConfigs = [
     runConfig("annotations.c"),
+    runConfig("argAttrs.c"),
     runConfig("argSync.c", xc="-O3"),
-    runConfig("atomics.c"),
+    runConfig("arm_locks.c", brd="pynq", hk=True),
+    runConfig("atomics.c", nm="__SKIP_THIS",
+        rgx=re.compile(r"counter = [2-4]")),
     runConfig("basicIR.c"),
     runConfig("bsearch_strcmp.c"),
     runConfig("classTest.cpp"),
+    runConfig("cloneAfterCall.c", sn=True,
+        rgx=re.compile(r"Bob \(16\): 3.7[0-9]*\nSuccess!\n", re.MULTILINE)),
     runConfig("exceptions.cpp", \
         op="-replicateFnCalls=_ZNSt12_Vector_baseIiSaIiEE11_M_allocateEm,_ZSt27__uninitialized_default_n_aIPimiET_S1_T0_RSaIT1_E",  \
         nm="-ignoreFns=_ZNSt12_Vector_baseIiSaIiEE13_M_deallocateEPim"),
+    runConfig("fibonacci.c", sn=True),
     runConfig("fSigTypes.c", \
         ef="fSigTypes_ext.c"),
+    runConfig("funcPtrStruct.c",
+        rgx=re.compile(r"100 150\n(1 2 3\n){1,3}Finished", re.MULTILINE)),
+    runConfig("globalPointers.c", \
+        xc="-g3", cf=True, sn=True),
+    runConfig("halfProtected.c", op="-skipLibCalls=malloc"),
     runConfig("helloWorld.cpp"),
     runConfig("inlining.c", \
         xc="-O2"),
+    runConfig("linkedList.c", xc="-g3", cf=True, sn=True),
     runConfig("load_store.c"),
-    runConfig("mallocTest.c", \
-        nm="-skipLibCalls=free"),
-    runConfig("nestedCalls.c", \
+    runConfig("mallocTest.c", sn=True,
+        rgx=re.compile(r"^Finished", re.MULTILINE)),
+    runConfig("nestedCalls.c", xc="-O2",\
         op="-replicateFnCalls=memset"),
-    runConfig("ptrArith.c"),
+    runConfig("ptrArith.c", rgx=ptrArithRegex),
+    runConfig("protectedLib.c", op="-protectedLibFn=sharedFunc"),
+    runConfig("replReturn.c", sn=True, nm="__SKIP_THIS",
+        op="-cloneReturn=returnTest -replicateFnCalls=malloc -cloneFns=testWrapper",
+        rgx=re.compile(r"(0x[0-9A-Fa-f]+\n){2,3}Success!\n", re.MULTILINE)),
     runConfig("returnPointer.c"),
     runConfig("segmenting.c"),
+    runConfig("signalHandlers.c", hk=True,
+        op="-skipLibCalls=__sysv_signal,signal"),
     runConfig("simd.c", \
         xc="-O3"),
+    runConfig("stackAttack.c", xc="-g3"),
+    runConfig("stackProtect.c", qtm=1, xc="-g3", op="-protectStack"),
     runConfig("structCompare.c"),
     runConfig("testFuncPtrs.c"),
-    runConfig("time_c.c"),
-    runConfig("vecTest.cpp", \
-        op="-replicateFnCalls=_ZNSt12_Vector_baseIiSaIiEE11_M_allocateEm,_ZSt34__uninitialized_move_if_noexcept_aIPiS0_SaIiEET0_T_S3_S2_RT1_", \
-        nm="-ignoreFns=_ZNSt12_Vector_baseIiSaIiEE13_M_deallocateEPim"),
-    runConfig("verifyOptions.c"),
-    runConfig("whetstone.c"),
+    runConfig("time_c.c", op="-skipLibCalls=clock -cloneAfterCall=time",
+        rgx=timeCRegex),
+# The Travis Docker has GCC v7.5.0, Ubuntu 18.04. vecTest.cpp was tested on GCC v5.4.0, Ubuntu 16.04.
+# Between compiler versions, there were apparently significant changes to how vectors work,
+#  and these flags actually now break the test instead of fixing it.
+# Add them in when on the old system.
+    runConfig("vecTest.cpp",
+        op="-replicateFnCalls=_ZNSt12_Vector_baseIiSaIiEE11_M_allocateEm,_ZSt34__uninitialized_move_if_noexcept_aIPiS0_SaIiEET0_T_S3_S2_RT1_" if gcc_version_num == "5.4.0" else None,
+        nm="-ignoreFns=_ZNSt12_Vector_baseIiSaIiEE13_M_deallocateEPim" if gcc_version_num == "5.4.0" else None),
+    runConfig("verifyOptions.c", cf=True, sn=True),
+    runConfig("whetstone.c", xl="-lm", rgx=whetstoneRegex),
     runConfig("zeroInit.c"),
 ]
 
-def run(cfg, config, dir_path):
+
+def run(cfg, config, dir_path, board=None, no_clean=False):
+    """run a single test with the given configuration.
+
+    cfg - a runConfig object
+    config - the COAST command line configuration
+    dir_path - path to the test to run
+    board - argument to the Make variable `BOARD`
+    no_clean - request that the files not be removed,
+            even if run is successful
+    """
+    # skip if no COAST applied
+    if cfg.skipNormal and (not config):
+        return 0
+
+    # corner case skip
+    if ("noMemReplication" in config) and (cfg.noMemFlg == "__SKIP_THIS"):
+        return 0
+
+    if (board is None) and (cfg.board is not None):
+        board = cfg.board
+
     # first clean before compiling
-    clean = subprocess.Popen(['make', '-C', dir_path, 'small_clean'])
+    target_name = os.path.splitext(cfg.fname)[0]
+    clean_cmd = "make --no-print-directory --file={mkfl} -C {dir} TARGET={tgt} clean".format(
+        mkfl=makefile_path,
+        dir=dir_path,
+        tgt=target_name
+    )
+    if board is not None:
+        clean_cmd += " 'BOARD={}'".format(board)
+    clean = subprocess.Popen(shlex.split(clean_cmd))
     clean.wait()
+
     # now build the test
-    cmd = "make -C {} SRCFOLDER=./unitTests 'SRCFILES={}' 'XCFLAGS={}' 'OPT_PASSES={}'"
+    cmd = "make --no-print-directory --file={mkfl} -C {srcdir} 'PROJECT_SRC={codedir}' '{flspec}={srcfiles}' 'USER_CFLAGS={xcfl}' '{inc_name}={incs}' 'XLFLAGS={xlfl}' 'XLLCFLAGS={xllc}' 'OPT_PASSES={opt}' 'TARGET={tgt}'"
     fls = cfg.fname + " " + cfg.extraFiles if cfg.extraFiles else cfg.fname
     xcf = cfg.xcFlg if cfg.xcFlg else ""
+    xlf = cfg.xlFlg if cfg.xlFlg else ""
+    xllc = cfg.xlcFlg if cfg.xlcFlg else ""
     ps = config + " " + cfg.optFlg \
             if cfg.optFlg else config
     ps = ps + " " + cfg.noMemFlg \
             if (cfg.noMemFlg and "noMemReplication" in config) \
             else ps
-    command = cmd.format(dir_path, fls, xcf, ps)
+
+    # default is x86 for automated testing, but support other boards
+    #  for manual testing purposes
+    if board == "x86":
+        fileSpecName = "SRCFILES"
+        runProgName = "program"
+        incName = "USER_INCS"
+    elif board == "pynq":
+        fileSpecName = "CSRCS"
+        runProgName = "qemu"
+        incName = "INC_DIRS"
+    elif board == "ultra96":
+        fileSpecName = "CSRCS"
+        runProgName = "program"
+        incName = "USER_INCS"
+        # TODO: cool to get QEMU working with this target too
+    else:
+        # default - TODO: check correctness
+        fileSpecName = "SRCFILES"
+        runProgName = "program"
+        incName = "USER_INCS"
+
+    command = cmd.format(
+        mkfl=makefile_path,
+        srcdir=dir_path,
+        codedir=code_path,
+        flspec=fileSpecName,
+        srcfiles=fls,
+        xcfl=xcf,
+        inc_name=incName,
+        incs=user_incs,
+        xlfl=xlf,
+        xllc=xllc,
+        opt=ps,
+        tgt=target_name
+    )
+    if board is not None:
+        command += " 'BOARD={}'".format(board)
+    if board == 'pynq':
+        command += " 'BUILD_FOR_SIMULATOR=1'"
     if singleFlag or verboseFlag:
         print(command)
-    p = subprocess.Popen(shlex.split(command))
-    p.wait()
+    try:
+        p = subprocess.Popen(shlex.split(command + " exe"))
+        p.wait()
+    except KeyboardInterrupt as ki:
+        if cfg.hardKill:
+            # success
+            return 0
+        else:
+            raise ki
+    # other exceptions are not handled
+    except Exception as e:
+        raise e
     # print(" --- return code: {}".format(p.returncode))
-    return p.returncode
+
+    if cfg.compileFail:
+        # this shouldn't be tested for success, because it's
+        #  supposed to fail
+        print(" (Expected compilation failure)")
+        return not p.returncode
+    elif p.returncode:
+        return p.returncode
+
+    # now run it
+    try:
+        runCmd = command + " {}".format(runProgName)
+        # QEMU must be killed here, because it otherwise will hang the whole test
+        if board == 'pynq':
+            # https://stackoverflow.com/a/4791612/12940429
+            p = subprocess.Popen(shlex.split(runCmd), preexec_fn=os.setsid,
+                    stdout=subprocess.PIPE)
+            # wait for process to run
+            if cfg.qemuTime is not None:
+                # allow override sleep time
+                time.sleep(cfg.qemuTime)
+            else:
+                # these are short running things
+                time.sleep(qemuWaitTime)
+            # sends signal to whole process group
+            os.killpg(os.getpgid(p.pid), signal.SIGTERM)
+            # fix the terminal echo
+            os.system("stty echo")
+        else:
+            p = subprocess.Popen(shlex.split(runCmd),
+                    stdout=subprocess.PIPE)
+        p.wait()
+        output, errorMsg = p.communicate()
+        output = output.decode()
+        print(output.rstrip())
+    except KeyboardInterrupt as ki:
+        if cfg.hardKill:
+            return 0
+        else:
+            raise ki
+
+    # return value: assume fail unless otherwise noted
+    returnVal = None
+
+    # some may have output regexes
+    if cfg.outRegx is not None:
+        outputMatch = re.search(cfg.outRegx, output)
+        if outputMatch:
+            # success
+            returnVal = 0
+        else:
+            print("Didn't match!")
+            returnVal = -1
+
+    # now check return code
+    if p.returncode:
+        returnVal = p.returncode
+    elif (returnVal is None) and (p.returncode == 0):
+        # success
+        returnVal = 0
+
+    # clean at end also, if succeeded
+    if (not returnVal) and (not no_clean):
+        clean2 = subprocess.Popen(shlex.split(clean_cmd))
+        clean2.wait()
+    # now we're done
+    return returnVal
+
+
+def addMoreFlags(runCfg, args):
+    if args.extra_clang_flags:
+        if runCfg.xcFlg is None:
+            runCfg.xcFlg = args.extra_clang_flags
+        else:
+            runCfg.xcFlg += args.extra_clang_flags
+    if args.extra_opt_flags:
+        if runCfg.optFlg is None:
+            runCfg.optFlg = args.extra_opt_flags
+        else:
+            runCfg.optFlg += args.extra_opt_flags
+    if args.extra_link_flags:
+        if runCfg.xlFlg is None:
+            runCfg.xlFlg = args.extra_link_flags
+        else:
+            runCfg.xlFlg += args.extra_link_flags
+    if args.extra_lc_flags:
+        if runCfg.xlcFlg is None:
+            runCfg.xlcFlg = args.extra_lc_flags
+        else:
+            runCfg.xlcFlg += args.extra_lc_flags
+    return runCfg
 
 
 def main():
@@ -92,36 +348,58 @@ def main():
     parser.add_argument('config', help='configuration, without any file-specific flags')
     parser.add_argument('--single-run', '-s', help='Run only one file')
     parser.add_argument('--verbose', '-v', help='extra output', action='store_true')
+    parser.add_argument('--board', '-b', type=str, help='specify board, if other than x86 (default)')
+    parser.add_argument('--extra-clang-flags', '-c', type=str, help='Extra flags to pass to `clang`')
+    parser.add_argument('--extra-opt-flags', '-o', type=str, help='Extra flags to pass to `opt`')
+    parser.add_argument('--extra-link-flags', '-l', type=str, help='Extra flags to pass to linker')
+    parser.add_argument('--extra-lc-flags', '-a', type=str, help='Extra flags to pass to `llc`, the assembler')
     args = parser.parse_args()
+    returnVal = 0
 
+    # process some args
     if args.verbose:
         verboseFlag = True
+    if args.board:
+        boardFlag = args.board
+    else:
+        boardFlag = None
+    coast_config = args.config.lstrip()
 
     # test dir
     dir_path = os.path.dirname(os.path.realpath(__file__))
 
+    # only run 1 test
     if args.single_run:
+        # validate the test name
         single = [x for x in customConfigs if x.fname == args.single_run]
         returnVal = 0
         if len(single) > 0:
             singleFlag = True
-            returnVal = run(single[0], args.config.lstrip(), dir_path)
+            # add extra flags
+            singleCfg = addMoreFlags(single[0], args)
+            # run it!
+            returnVal = run(singleCfg, coast_config, dir_path, board=boardFlag, no_clean=True)
         else:
             print("File name not found!")
         return returnVal
+
+    # run all the tests
     else:
         for cfg in customConfigs:
-            returnVal = run(cfg, args.config.lstrip(), dir_path)
+            if cfg.hardKill:
+                # don't test infinite loops automatically
+                # TODO: make it so it kills it itself
+                continue
+            cfg = addMoreFlags(cfg, args)
+            returnVal = run(cfg, coast_config, dir_path, board=boardFlag)
             if returnVal != 0:
-                if cfg.fname == "verifyOptions.c":
-                    # this shouldn't be tested for success, because it's
-                    #  supposed to fail
-                    continue
-                else:
-                    return returnVal
+                return returnVal
     # clean one more time if we did all of them
     clean = subprocess.Popen(['make', '-C', dir_path, 'clean'])
     clean.wait()
+
+    if returnVal == 0:
+        print("Success!")
     return returnVal
 
 
diff --git a/tests/TMRregression/unitTests/annotations.c b/tests/TMRregression/unitTests/annotations.c
index bc5a290e9..915f5e8fb 100644
--- a/tests/TMRregression/unitTests/annotations.c
+++ b/tests/TMRregression/unitTests/annotations.c
@@ -5,6 +5,7 @@
  * The most important requirement is that it actually syncs on the values.
  * We add in some dynamically allocated structs as examples of things we
  *  wouldn't want to xMR
+ * Also test if we can turn on/off the protection of specific function arguments.
  */
 
 #include <stdio.h>
@@ -35,6 +36,23 @@ int moreMath(int a, int b) __xMR {
     return (a * p) + (b << q);
 }
 
+void halfProtected(int* a, int* __NO_xMR b) __xMR {
+    int* local_a = a;
+    int* local_b = b;
+}
+
+void halfNotProtected(int* a, int* __xMR b) {
+    int* local_a = a;
+    int* local_b = b;
+}
+
+void callHalfFunctions() __xMR {
+    int a = 2;
+    int b = 3;
+    halfProtected(&a, &b);
+    halfNotProtected(&a, &b);
+}
+
 
 int main() {
     int __xMR result;
@@ -60,5 +78,8 @@ int main() {
         status |= -1;
     }
 
+    // this doesn't do anything
+    callHalfFunctions();
+
     return status;
 }
diff --git a/tests/TMRregression/unitTests/argAttrs.c b/tests/TMRregression/unitTests/argAttrs.c
new file mode 100644
index 000000000..b29acf62c
--- /dev/null
+++ b/tests/TMRregression/unitTests/argAttrs.c
@@ -0,0 +1,71 @@
+/*
+ * argAttrs.c
+ * This unit test created to make sure COAST respects attributes
+ * given to function arguments.
+ * 
+ * This guy says they must come after the argument type and name:
+ * http://unixwiz.net/techtips/gnu-c-attributes.html
+ * However, if we look at an example attribute like nonnull
+ * https://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Function-Attributes.html#:~:text=nonnull%(arg
+ * We see that the attribute actually requires the application programmer
+ *  to specify the argument you want to be marked as such.
+ * So our implementation will do something similar.
+ * 
+ * See the last section of processAnnotations() in interface.cpp
+ * And the TODO on line 662 of cloning.cpp
+ * 
+ * The reason this unit test was created is when trying to protect only
+ *  the kernel of a FreeRTOS application, both a TMR'd and normal version
+ *  of the function xQueueReceive (and others) was being called.
+ * This caused certain kernel globals to be used in- and out-side the scope
+ *  of replication.
+ * We un-protected the globals (like xTimerQueue), but COAST was still
+ *  replicating the function args of xQueueReceive.  We wished to be able
+ *  to mark the function args to not be replicated, but COAST ignored
+ *  putting the macro `__NO_xMR` onto the argument.
+ * Previously, this was only marking the "alloca" instruction to not xMR,
+ *  but this would not affect the actual arguments being passed in.
+ * The new implementation uses function attributes which specify arguments.
+ *
+ * This test is to make sure that what we changed fixed that error.
+ * Make sure it also still removes the unused original function (if possible).
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include "COAST.h"
+
+
+/*
+ * This function takes in a pointer argument that should not be replicated.
+ */
+// this way doesn't work:
+// uint32_t passPointer(uint32_t* ptr __NO_xMR, uint32_t val)
+// have to use the function annotation instead
+__NO_xMR_ARG(0)
+uint32_t passPointer(uint32_t* ptr, uint32_t val) {
+    uint32_t temp = val + (*ptr);
+    return temp;
+}
+
+
+uint32_t callWrapper(uint32_t* ptr) {
+    uint32_t b = 3;
+    return passPointer(ptr, b);
+}
+
+
+int main() {
+    uint32_t __NO_xMR a = 4;
+
+    // temporary fix
+    uint32_t __NO_xMR result = callWrapper(&a);
+    if (result == 7) {
+        printf("Success!\n");
+    } else {
+        printf("Failure: %d\n", result);
+    }
+
+    return 0;
+}
diff --git a/tests/TMRregression/unitTests/argSync.c b/tests/TMRregression/unitTests/argSync.c
index 7ebf00a49..07bf3d42b 100644
--- a/tests/TMRregression/unitTests/argSync.c
+++ b/tests/TMRregression/unitTests/argSync.c
@@ -40,5 +40,14 @@ int runTest(int a) __xMR {
 
 int main() {
     int x = runTest(32);
-    printf("%d\n", x);
+    // expected value: 5
+
+    if (x == 5) {
+        printf("Success!\n");
+    } else {
+        printf("Error: %d\n", x);
+        return x;
+    }
+
+    return 0;
 }
diff --git a/tests/TMRregression/unitTests/arm_locks.c b/tests/TMRregression/unitTests/arm_locks.c
index 6f41a0016..fcc7ded75 100644
--- a/tests/TMRregression/unitTests/arm_locks.c
+++ b/tests/TMRregression/unitTests/arm_locks.c
@@ -2,8 +2,19 @@
  * arm_locks.c
  *
  * This is to test the synchronization primitives from the ARM ISA.
+ * TODO: add more primitives
+ *
+ * __swp
+ * "swap data between registers and memory"
+ * https://developer.arm.com/documentation/dui0472/m/compiler-specific-features/--swp-intrinsic
  */
 
+#ifndef __arm
+#error This unit test only works with ARM targets
+#endif
+
+
+/********************************** Includes **********************************/
 #include <stdio.h>
 #include <arm_acle.h>
 
@@ -13,11 +24,11 @@
 __DEFAULT_NO_xMR
 
 
-#ifdef __arm
+/********************************* Functions **********************************/
+// wrap intrinsic
 void swap(unsigned int* a, unsigned int* b) __xMR {
     *a = __swp(*a, (unsigned int*)b);
 }
-#endif
 
 
 int main() {
diff --git a/tests/TMRregression/unitTests/atomics.c b/tests/TMRregression/unitTests/atomics.c
index 2824c4d7d..b95e2fc16 100644
--- a/tests/TMRregression/unitTests/atomics.c
+++ b/tests/TMRregression/unitTests/atomics.c
@@ -3,6 +3,9 @@
  *
  * Test atomic operations and the effect of COAST SoR crossings on them.
  * requires -std=c11
+ *
+ * Does not work with -noMemReplication flag, because it tries to sync on
+ *  the atomics, but the values are after each atomic call.
  */
 
 #include <stdio.h>
@@ -17,10 +20,18 @@ void incAtomic(atomic_uint* at) __xMR {
 }
 
 int main() {
+    // initialize an atomic counter to have value = 1
     atomic_uint counter;
     atomic_init(&counter, 1);
 
+    // add 1 to the number
     incAtomic(&counter);
 
+    // normal, expect result to be = 2
+    // with DWC, expect counter = 3
+    // with TMR, expect counter = 4
+
     printf("counter = %d\n", counter);
+
+    return 0;
 }
diff --git a/tests/TMRregression/unitTests/cloneAfterCall.c b/tests/TMRregression/unitTests/cloneAfterCall.c
new file mode 100644
index 000000000..cf4439849
--- /dev/null
+++ b/tests/TMRregression/unitTests/cloneAfterCall.c
@@ -0,0 +1,96 @@
+/*
+ * cloneAfterCall.c
+ * This benchmark was created to test replicating arguments to functions
+ *  which are modified by the function, and are replicated afterwards,
+ *  because we can only call the function once.
+ * An example of this would be `scanf`.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "COAST.h"
+
+
+// more difficult test that exercises parameter-specific annotations
+#define SCANF_TEST
+
+
+// This function has similar behavior to scanf, but has only 1 argument
+//  to worry about
+__xMR_ALL_AFTER_CALL
+void simpleVoidFunc(uint32_t* modPtr) {
+    *modPtr = 42;
+}
+
+
+int simpleTest() {
+    uint32_t modifyMe = 0;
+    simpleVoidFunc(&modifyMe);
+
+    if (modifyMe != 42) {
+        printf("Wrong modify value %d!\n", modifyMe);
+        return 1;
+    }
+
+    return 0;
+}
+
+
+#ifdef SCANF_TEST
+// specifically clone-after-call arguments 2, 3, and 4 (0 indexed)
+int __xMR_AFTER_CALL(sscanf, 2_3_4)(const char * s, const char * format, ...);
+
+int scanfTest() {
+    int ret = 0;
+    int expected = 3;
+
+    // name, age, grade
+    const char* inputStr = "Bob 16 3.7";
+
+    char nameBuf[16];
+    uint32_t age;
+    float grade;
+
+    // read input
+    ret = __xMR_AFTER_CALL(sscanf, 2_3_4)(inputStr, "%s %d %f", nameBuf, &age, &grade);
+
+    if (ret != expected) {
+        printf("Error, return value %d\n", ret);
+        return ret;
+    }
+
+    printf("%s (%d): %f\n", nameBuf, age, grade);
+    return 0;
+}
+#endif
+
+
+int main() {
+    int ret = 0;
+
+    // simple test
+    ret |= simpleTest();
+
+    #ifdef SCANF_TEST
+    // more complex test
+    ret |= scanfTest();
+    #endif
+
+    // validate
+    if (ret) {
+        printf("Error: %d\n", ret);
+    } else {
+        printf("Success!\n");
+    }
+
+    return ret;
+}
+
+
+// custom error handler
+void FAULT_DETECTED_DWC() {
+    printf("Error, fault detected!\n");
+    exit(1);
+}
diff --git a/tests/TMRregression/unitTests/fibonacci.c b/tests/TMRregression/unitTests/fibonacci.c
new file mode 100644
index 000000000..9f5e29645
--- /dev/null
+++ b/tests/TMRregression/unitTests/fibonacci.c
@@ -0,0 +1,50 @@
+/*
+ * fibonacci.c
+ *
+ * Implementation of the fibonacci sequence using recursion.
+ * This should allow for some interesting tests.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// COAST configuration
+#include "../../COAST.h"
+
+__DEFAULT_NO_xMR
+
+PRINTF_WRAPPER_REGISTER(printf);
+
+
+int fib(int n) __xMR {
+    if (n <= 1) {
+        return n;
+    } else {
+        return fib(n-1) + fib(n-2);
+    }
+}
+
+int testWrapper() __xMR {
+    // decide size
+    size_t sz = 10;
+
+    // calculate
+    int res = fib(sz);
+
+    PRINTF_WRAPPER_CALL(printf, "%d\n", res);
+
+    return !(res == 55);
+}
+
+
+int main() {
+    int status = testWrapper();
+
+    if (status) {
+        printf("Failure!\n");
+        return -1;
+    } else {
+        printf("Success!\n");
+        return 0;
+    }
+}
diff --git a/tests/TMRregression/unitTests/funcPtrStruct.c b/tests/TMRregression/unitTests/funcPtrStruct.c
new file mode 100644
index 000000000..8e6a98af4
--- /dev/null
+++ b/tests/TMRregression/unitTests/funcPtrStruct.c
@@ -0,0 +1,65 @@
+/*
+ * funcPtrStruct.c
+ *
+ * This unit test is designed to ensure that COAST properly detects function
+ *  pointers being used as elements in a struct
+ * Essentially, detect when a function is used as an input to an assignment
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "../../COAST.h"
+
+
+                        //////// type definitions ////////
+//function signature we will be using
+typedef void (*myFuncSignature) (void *CallBackRef, uint32_t StatusEvent);
+
+//this struct will have a function pointer in it
+typedef struct _test_struct {
+    int x;
+    int y;
+    myFuncSignature StatusHandler;  /* Event handler function */
+} test_struct;
+
+//this struct will just have data
+typedef struct _data_struct {
+    int a;
+    int b;
+} data_struct;
+
+                        //////// function definitions ////////
+// this function will be assigned as a value
+static void StubHandler(void *CallBackRef, uint32_t StatusEvent) {
+    // cast
+    data_struct* dst = (data_struct*)CallBackRef;
+    // print values
+    printf("%d %d %d\n", dst->a, dst->b, StatusEvent);
+}
+
+// this function allocates a struct
+test_struct* __xMR_FN_CALL alloc_struct(){
+    test_struct* st = (test_struct*) malloc(sizeof(test_struct));
+    return st;
+}
+
+int main(){
+    // set up the structs
+    test_struct* st = alloc_struct();
+    data_struct dst = {1, 2};
+
+    st->x = 100;
+    st->y = 150;
+    st->StatusHandler = StubHandler;
+
+    // print some data about the structs
+    printf("%d %d\n", st->x, st->y);
+    st->StatusHandler(&dst, 3);
+    // COAST will replicate these calls even though they are indirect.
+    // If you want it only called once, have to create an intermediate wrapper function.
+
+    // cleanup
+    free(st);
+    printf("Finished\n");
+}
diff --git a/tests/TMRregression/unitTests/globalPointers.c b/tests/TMRregression/unitTests/globalPointers.c
new file mode 100644
index 000000000..fd0af78ec
--- /dev/null
+++ b/tests/TMRregression/unitTests/globalPointers.c
@@ -0,0 +1,164 @@
+/*
+ * globalPointers.c
+ * 
+ * This unit test is designed to show one of the difficulties
+ *  encountered when protecting the FreeRTOS kernel.
+ * How do global pointers crossing the Sphere of Replication
+ *  cause incorrect execution results?
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+// COAST configuration
+#include "../../COAST.h"
+__DEFAULT_NO_xMR
+
+
+// global value that exists as a pointer
+uint32_t* glblPtr;
+
+
+/****************************** Helper Functions ******************************/
+
+/*
+ * Function for printing out the value of a pointer and its clones.
+ */
+void printPtrVal(uint32_t* ptr) __xMR_FN_CALL {
+    printf("%14p: %d\n", ptr, *ptr);
+}
+
+
+/************************** Replicate Function Calls **************************/
+
+/* 
+ * Dereferences a pointer and increments the value.
+ * By the time any pointer gets here, we don't know
+ *  if it was a local or global.
+ * This function is called multiple times.
+ */
+void incPtrValCoarse(uint32_t* ptr)  __xMR_FN_CALL {
+    *ptr += 1;
+}
+
+
+void ptrCmp0() __xMR {
+    // create a local variable that will be xMR'd
+    uint32_t localVar = 0;
+
+    // increment each pointer
+    incPtrValCoarse(&localVar);
+    incPtrValCoarse(glblPtr);
+
+    // print out what the values are
+    printPtrVal(&localVar);
+    printPtrVal(glblPtr);
+
+    return;
+}
+
+
+/***************************** Change Signatures ******************************/
+
+void incPtrValFine(uint32_t* ptr) __xMR {
+    *ptr += 1;
+}
+
+/*
+ * This way of accessing a global pointer works fine.
+ * However, this is after we implemented store segmenting.  This makes sure all
+ *  3 versions of the arithmetic data streams are operating on the same
+ *  initial value of the data from the pointer.
+ * Maybe that wasn't the right solution.
+ */
+void ptrCmp1() __xMR {
+    // create a local variable that will be xMR'd
+    uint32_t localVar = 0;
+
+    // we pass both the local and global into the function, 
+    //  parameters replicated
+    incPtrValFine(&localVar);
+    incPtrValFine(glblPtr);
+
+    // print out what the values are
+    printPtrVal(&localVar);
+    printPtrVal(glblPtr);
+
+    return;
+}
+
+/******************************* Global Struct ********************************/
+
+typedef struct test_s {
+    uint32_t x;
+    uint32_t* y;
+} test_t;
+
+test_t globalStruct;
+
+/*
+ * Uses the address of a global struct pointer
+ */
+void ptrCmp2() __xMR {
+    // get pointer to global
+    uint32_t* ptr = &(globalStruct.x);
+    printPtrVal(ptr);
+
+    // init local struct
+    test_t localStruct;
+    localStruct.x = 0;
+    localStruct.y = NULL;
+}
+
+
+/******************************* Global uint32 ********************************/
+
+uint32_t globalInteger;
+
+/*
+ * Uses the address of a global struct pointer
+ */
+void ptrCmp3() __xMR {
+    // get pointer to global
+    uint32_t val = globalInteger;
+    val += 3;
+
+    uint32_t* localPtr = &globalInteger;
+    *localPtr += 1;
+
+    printPtrVal(&val);
+    printPtrVal(&globalInteger);
+}
+
+
+/************************************ Main ************************************/
+
+int main() {
+    // init the global pointer
+    glblPtr = (uint32_t*) malloc(sizeof(uint32_t));
+    *glblPtr = 0;
+
+    // ptrCmp0();
+    // ptrCmp1();
+
+    memset(&globalStruct, 0, sizeof(test_t));
+    // ptrCmp2();
+
+    globalInteger = 42;
+    ptrCmp3();
+
+    return 0;
+}
+
+
+/*
+ * Expected output (fails in compilation):
+ * ERROR: unprotected global "globalInteger" is being read from and written to inside protected functions:
+ *  "ptrCmp3" at unitTests/globalPointers.c:128:15,
+ * ERROR: unprotected global "glblPtr" is being read from and written to inside protected functions:
+ *  "incPtrValFine" at     store i32* %ptr, i32** %ptr.addr, align 8,
+ * ERROR: unprotected global "globalStruct" is being read from and written to inside protected functions:
+ *  "ptrCmp2" at unitTests/globalPointers.c:106:15,
+ */
diff --git a/tests/TMRregression/unitTests/halfProtected.c b/tests/TMRregression/unitTests/halfProtected.c
new file mode 100644
index 000000000..6744abbe4
--- /dev/null
+++ b/tests/TMRregression/unitTests/halfProtected.c
@@ -0,0 +1,98 @@
+/*
+ * halfProtected.c
+ *
+ * This test designed to show what it's like when protecting a top layer
+ *  of functions, leaving some bottom "system calls" alone.
+ * One of the problems is in properly getting the return values from
+ *  function calls.
+ *
+ * Need a call to function with one of the args specified NO_xMR,
+ *  and another with just strings or constant args.
+ * 
+ * NOTE: these functions definitely aren't completely safe for all possible
+ *  arguments. Doesn't matter, just need one call.
+ */
+
+
+/********************************** Includes **********************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "COAST.h"
+
+
+/******************************** Definitions *********************************/
+#define BUF_SIZE 16
+
+#define FAKE_FILE_SIZE 64
+char __NO_xMR fakeFile[FAKE_FILE_SIZE];
+
+
+/********************************* Functions **********************************/
+// writes bytesToWrite to file
+// returns number of bytes written
+__NO_xMR
+int fakeFileWrite(char* buffer, int bytesToWrite) {
+    int numWritten;
+
+    for (numWritten = 0;
+         (numWritten < bytesToWrite) && (numWritten < FAKE_FILE_SIZE);
+         numWritten++)
+    {
+        fakeFile[numWritten] = buffer[numWritten];
+    }
+
+    return numWritten;
+}
+
+
+__NO_xMR
+int doSomeMath(const char* mathStr, int addNum) {
+    int x = atoi(mathStr);
+    return x + addNum;
+}
+
+
+// This function checks that calls to library functions without
+//  any replicated args will not be replicated.
+void checkCloneFnCall() {
+    char __NO_xMR tempBuff[BUF_SIZE];
+    memset(tempBuff, 0, BUF_SIZE);
+    printf("tempBuf[4] = %hhu\n", tempBuff[4]);
+    return;
+}
+
+
+int main() {
+    // declare variables
+    int numRet, status;
+    // create a buffer that isn't protected
+    char* __NO_xMR buffer;
+
+    // set up string buffer
+    buffer = malloc(BUF_SIZE);
+    snprintf(buffer, BUF_SIZE, "hello there");
+
+    // use it in call
+    numRet = fakeFileWrite(buffer, strlen(buffer));
+    // expected: 11
+    if (numRet != 11) {
+        printf("ERROR: num bytes written = %d\n", numRet);
+        return -1;
+    }
+
+    // do math call
+    numRet = doSomeMath("4", 5);
+    // expected: 9
+    if (numRet != 9) {
+        printf("ERROR: result = %d\n", numRet);
+        return -1;
+    }
+
+    // this one isn't self-checking
+    checkCloneFnCall();
+
+    printf("Success!\n");
+    return 0;
+}
diff --git a/tests/TMRregression/unitTests/helloWorld.cpp b/tests/TMRregression/unitTests/helloWorld.cpp
index f67742cf7..619c589c1 100644
--- a/tests/TMRregression/unitTests/helloWorld.cpp
+++ b/tests/TMRregression/unitTests/helloWorld.cpp
@@ -1,3 +1,8 @@
+/*
+ * helloWorld.cpp
+ * Tests to see if C++ can be compiled with COAST
+ */
+
 #include <iostream>         // for cout and cin
 
 int main() {
diff --git a/tests/TMRregression/unitTests/linkedList.c b/tests/TMRregression/unitTests/linkedList.c
new file mode 100644
index 000000000..2d6d00b87
--- /dev/null
+++ b/tests/TMRregression/unitTests/linkedList.c
@@ -0,0 +1,273 @@
+/*
+ * linkedList.c
+ * 
+ * This unit test is designed to show one of the difficulties
+ *  encountered when protecting the FreeRTOS kernel.
+ * Linked list items should be kept inside or outside the
+ *  Sphere or Replication (SoR), because crossing that boundary
+ *  with pointers can cause wonderfully interesting problems.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+// COAST configuration
+#include "../../COAST.h"
+__DEFAULT_NO_xMR
+
+
+// we define a type that represents a linked list.
+typedef struct node_s node_t;
+
+// each node has a value, and a previous and next pointer
+struct node_s {
+    uint32_t val;
+    node_t* prev;
+    node_t* next;
+};
+
+
+/****************************** Verbose printing ******************************/
+typedef struct buf_s {
+    char buf[256];
+    int idx;
+} buf_t;
+
+
+#define PRINTBUF(b, i, fmt, ...) \
+    b[i].idx += sprintf(b[i].buf+b[i].idx, fmt, __VA_ARGS__)
+#define SIZE 7
+void printListVerbose(node_t* list) __xMR_FN_CALL {
+    // allocate some buffers
+    buf_t b[SIZE];
+    int i;
+    for (i = 0; i < SIZE; i+=1) {
+        memset(&b[i], 0, sizeof(buf_t));
+    }
+    // filler string
+    char* filler = "───────────────────────────────";
+    int fillLen = 36;
+
+    // fill with nicely formatted data
+    while (list != NULL) {
+        // usually prints 12 characters
+        PRINTBUF(b, 1, "│ %10p │  ", list);
+        PRINTBUF(b, 3, "│ %10p │⇢ ", list->next);
+        PRINTBUF(b, 4, "│ %10p │⇠ ", list->prev);
+        PRINTBUF(b, 5, "│ %10d │  ", list->val);
+        PRINTBUF(b, 0, "┌%*.*s┐  ", fillLen, fillLen, filler);
+        PRINTBUF(b, 2, "├%*.*s┤  ", fillLen, fillLen, filler);
+        PRINTBUF(b, 6, "└%*.*s┘  ", fillLen, fillLen, filler);
+        list = list->next;
+    }
+
+    // print it out
+    for (i = 0; i < SIZE; i+=1) {
+        printf("%s\n", b[i].buf);
+    }
+    printf("\n");
+}
+
+/******************************************************************************/
+
+
+// inserts a new node at a spot in the list
+void listInsertAfter(node_t* listSpot, node_t* node) __xMR_FN_CALL {
+    node_t* temp;
+
+    // save current next pointer
+    temp = listSpot->next;
+    
+    // give node the right pointers
+    node->next = temp;
+    node->prev = listSpot;
+
+    // update the ones around it
+    listSpot->next = node;
+    if (temp) {
+        temp->prev = node;
+    }
+
+    return;
+}
+
+
+// this version will be TMR'd
+void listInsertAfter3(node_t* listSpot, node_t* node) __xMR {
+    node_t* temp;
+
+    // save current next pointer
+    temp = listSpot->next;
+    
+    // give node the right pointers
+    node->next = temp;
+    node->prev = listSpot;
+
+    // update the ones around it
+    listSpot->next = node;
+    if (temp) {
+        temp->prev = node;
+    }
+
+    return;
+}
+
+
+// remove this item from the list
+void listDeleteNode(node_t* node) __xMR_FN_CALL {
+    if (!node) {
+        printf("Error, node is NULL!\n");
+        return;
+    }
+    // remove all references
+    node->next->prev = node->prev;
+    node->prev->next = node->next;
+    
+    // because the pointer is being orphaned, we're going 
+    //  to free the memory
+    // free(node);
+    // leave out for functionality sake
+    // yes, I know there will be memory leaks
+
+    return;
+}
+
+
+// helper function to create a new node, unconnected
+node_t* createNode() __xMR_FN_CALL {
+    // allocate the memory
+    node_t* n = (node_t*) malloc(sizeof(node_t));
+
+    // set the links to NULL
+    n->prev = NULL;
+    n->next = NULL;
+
+    return n;
+}
+
+
+// textual representation of a list
+void printList(node_t* list) __xMR_FN_CALL {
+    while (list != NULL) {
+        printf("%d, ", list->val);
+        list = list->next;
+    }
+    printf("\n");
+}
+
+
+void normalUsage(void) {
+    int i;
+    int size = 4;
+
+    // create a base
+    node_t* list = createNode();
+    list->val = 0;
+
+    // let's add some nodes
+    for (i = size; i > 1; i-=1) {
+        node_t* next = createNode();
+        next->val = i-1;
+        listInsertAfter(list, next);
+    }
+
+    printListVerbose(list);
+    listDeleteNode(list->next);
+    listDeleteNode(list->next);
+    printListVerbose(list);
+
+    /*
+     * expected output:
+     * 0, 1, 2, 3, 4, 
+     * 0, 3, 4, 
+     */
+}
+
+
+void tmrUsage(void) __xMR {
+    int i;
+    int size = 4;
+
+    // create a base
+    node_t* list = createNode();
+    list->val = 0;
+
+    // let's add some nodes
+    for (i = size; i > 1; i-=1) {
+        node_t* next = createNode();
+        next->val = i-1;
+        listInsertAfter(list, next);
+    }
+
+    // call these for each of the clones
+    printListVerbose(list);
+    listDeleteNode(list->next);
+    listDeleteNode(list->next);
+    printListVerbose(list);
+    /*
+     * expected output:
+     * 0, 1, 2, 3, 4, 
+     * 0, 1, 2, 3, 4, 
+     * 0, 1, 2, 3, 4, 
+     * 0, 3, 4, 
+     * 0, 3, 4, 
+     * 0, 3, 4, 
+     */
+}
+
+
+// this has to be a global
+static node_t* specialNode;
+
+void crossBoundaryAdd() __xMR {
+    // create a list
+    node_t* list = createNode();
+    list->val = 0;
+    node_t* last = createNode();
+    last->val = 42;
+
+    // add a global
+    listInsertAfter3(list, last);
+    printListVerbose(list);
+    listInsertAfter3(list, specialNode);
+    printf("address of specialNode: %p\n\n", specialNode);
+    printListVerbose(list);
+    listDeleteNode(list->next);
+    printListVerbose(list);
+}
+
+
+/*
+ * This will have 3 copies of a list created in the SoR,
+ *  but try to add and then remove a value from outside the Sor.
+ */
+void crossBoundaryUsage(void) {
+    specialNode = createNode();
+    specialNode->val = 1234;
+    crossBoundaryAdd();
+}
+
+
+int main() {
+    
+    printf("\nNormal usage:\n");
+    normalUsage();
+
+    printf("\nTMR usage:\n");
+    tmrUsage();
+
+    printf("\nCross boundary usage:\n");
+    crossBoundaryUsage();
+    printf("\n");
+
+    return 0;
+}
+
+
+/*
+ * Expected output (fails in compilation):
+ * ERROR: unprotected global "specialNode" is being read from and written to inside protected functions:
+ *  "listInsertAfter3" at     store %struct.node_s* %node, %struct.node_s** %node.addr, align 8,
+ */
diff --git a/tests/TMRregression/unitTests/load_store.c b/tests/TMRregression/unitTests/load_store.c
index 68dbc9fe4..cc277f636 100644
--- a/tests/TMRregression/unitTests/load_store.c
+++ b/tests/TMRregression/unitTests/load_store.c
@@ -27,22 +27,29 @@ struct myStruct
 };
 
 
+// Modify struct value by reference
 void touchStruct(struct myStruct* ms) FUNCTION_TAG {
     (ms->x)++;
-
-    if ( (ms->x) == 1) {
-        printf("ms == 1\r\n");
-    } else {
-        printf("ms == %d\r\n", ms->x);
-    }
-
     return;
 }
 
 
 int main() {
+    // setup struct
     struct myStruct ms;
     ms.x = 0;
     ms.y = 0;
+
+    // modify struct
     touchStruct(&ms);
+
+    // check value
+    if ( (ms.x) == 1) {
+        printf("Success!\n");
+    } else {
+        printf("Error: %d\n", ms.x);
+        return ms.x;
+    }
+
+    return 0;
 }
diff --git a/tests/TMRregression/unitTests/mallocTest.c b/tests/TMRregression/unitTests/mallocTest.c
index bad462544..bc53c9bc5 100644
--- a/tests/TMRregression/unitTests/mallocTest.c
+++ b/tests/TMRregression/unitTests/mallocTest.c
@@ -1,10 +1,27 @@
-// This unit test is to make sure malloc works with TMR
-// malloc() wrappers may need special treatment
-//  must include `-replicateFnCalls=alloc_struct ` after -TMR,
-//  or include the annotations as shown below
+/*
+ * mallocTest.c
+ *
+ * This unit test is to make sure malloc works with TMR
+ * malloc() wrappers may need special treatment
+ *  must include `-replicateFnCalls=alloc_struct ` after -TMR,
+ *  or include the annotations as shown below
+ *
+ * Also, to make sure that all pointers are free'd, wrap up
+ *  `free` with the directive.
+ */
+
+/********************************** Includes **********************************/
 #include <stdio.h>
 #include <stdlib.h>
 
+
+/**************************** COAST configuration *****************************/
+#include "COAST.h"
+
+void GENERIC_COAST_WRAPPER(free)(void* ptr);
+
+
+/******************************** Definitions *********************************/
 #define ARRAY_SIZE  4
 #define ELEMENT_SIZE    10
 
@@ -20,17 +37,22 @@ typedef struct {
     inner_struct z;
 } outer_struct;
 
-outer_struct* __attribute__((annotate("xMR_call"))) alloc_struct() {
+
+/********************************* Functions **********************************/
+
+__xMR_FN_CALL
+outer_struct* alloc_struct(void) {
     outer_struct* st = (outer_struct*) malloc(sizeof(outer_struct));
     return st;
 }
 
 int main() {
-    //don't even need to do anything with the struct, just create it and destroy it
+    // don't even need to do anything with the struct, just create it and destroy it
     outer_struct* st = alloc_struct();
-    free(st);
+    GENERIC_COAST_WRAPPER(free)(st);
+
     printf("Finished\n");
-    //this unit test considered to have succeeded
-    // if there are no memory leaks (double free corruption, etc)
+    // This unit test considered to have succeeded
+    //  if there are no memory leaks (double free corruption, etc)
     return 0;
 }
diff --git a/tests/TMRregression/unitTests/nestedCalls.c b/tests/TMRregression/unitTests/nestedCalls.c
new file mode 100644
index 000000000..7be59e110
--- /dev/null
+++ b/tests/TMRregression/unitTests/nestedCalls.c
@@ -0,0 +1,145 @@
+/*
+ * nestedCalls.c
+ * 
+ * This unit test was created to explore issues with COAST that arise
+ *  when there is a bitcast *and* GEP inside the same call instruction.
+ * This seems to affect correctly changing the parameters of the 
+ *  cloned function call in cases like memset, when it is a function
+ *  whose call is replicated.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+
+#include "../../COAST.h"
+
+
+/******************************** Definitions *********************************/
+
+// define a struct with a block inside of it
+typedef struct block_s {
+    int a;
+    int b;
+    int block[16];
+} block_t;
+
+block_t globalBlock;
+
+// define a struct with aliasing pointer and value types
+// borrowed from FreeRTOS list.h
+typedef uint32_t UBaseType_t;
+struct xLIST_ITEM
+{
+	UBaseType_t xItemValue;
+	struct xLIST_ITEM * pxNext;
+	struct xLIST_ITEM * pxPrevious;
+	void * pvOwner;
+	void * pvContainer;
+};
+typedef struct xLIST_ITEM ListItem_t;
+
+struct xMINI_LIST_ITEM
+{
+	UBaseType_t xItemValue;
+	struct xLIST_ITEM * pxNext;
+	struct xLIST_ITEM * pxPrevious;
+};
+typedef struct xMINI_LIST_ITEM MiniListItem_t;
+
+typedef struct xLIST {
+    UBaseType_t uxNumberOfItems;
+    ListItem_t * pxIndex;
+    MiniListItem_t xListEnd;
+} List_t;
+
+static List_t pxReadyTasksLists[ 4 ];
+
+
+/********************************* Functions **********************************/
+
+__attribute__((noinline))
+void memset_test() {
+    block_t myBlock;
+    
+    memset(myBlock.block, 0, sizeof(myBlock.block));
+    memset(globalBlock.block, 0, sizeof(myBlock.block));
+
+    // do something with it so it doesn't get optimized out
+    myBlock.a = 42;
+
+    // This will be checked later
+    globalBlock.block[2] = myBlock.a + globalBlock.block[0];
+
+    return;
+}
+
+
+void vListInitialise( List_t * const pxList )
+{
+	pxList->pxIndex = ( ListItem_t * ) &( pxList->xListEnd );
+
+	pxList->xListEnd.xItemValue = ( UBaseType_t ) 0xffffffffUL;
+
+	pxList->xListEnd.pxNext = ( ListItem_t * ) &( pxList->xListEnd );
+	pxList->xListEnd.pxPrevious = ( ListItem_t * ) &( pxList->xListEnd );
+
+	pxList->uxNumberOfItems = ( UBaseType_t ) 0U;
+}
+
+__attribute__((noinline))
+void vListPrintInfo( List_t * const pxList, UBaseType_t uxPriority) __xMR_FN_CALL 
+{
+    printf("List priority %d: \n", uxPriority);
+    printf("  uxNumberOfItems: %d\n", pxList->uxNumberOfItems);
+    printf("  pxIndex @%p\n", pxList->pxIndex);
+    printf("  xListEnd @%p\n", &pxList->xListEnd);
+    printf("    xItemValue = %u\n", pxList->xListEnd.xItemValue);
+}
+
+/* Compiling this with -O2 causes the compiler to unroll the loop and inline
+ * the initialization function.  Then each store will be all one LLVM IR
+ * instruction, with a GEP inside of a bitcast, the thing that is tricky
+ * to clone correctly. */
+__attribute__((noinline))
+void struct_test() {
+    UBaseType_t uxPriority;
+    for( uxPriority = 0U; uxPriority < 4; uxPriority++ )
+	{
+		vListInitialise( &( pxReadyTasksLists[ uxPriority ] ) );
+	}
+
+    // print them out
+    // for ( uxPriority = 0U; uxPriority < 4; uxPriority++ ) {
+    //     vListPrintInfo(&pxReadyTasksLists[uxPriority], uxPriority);
+    // }
+    int q = 57;
+    pxReadyTasksLists[0].pxIndex->xItemValue = q;
+}
+
+
+int main() {
+    int status = 0;
+
+    memset_test();
+    // expect globalBlock.block[2] = 42
+    if (globalBlock.block[2] != 42) {
+        status += 1;
+    }
+
+    struct_test();
+
+    // expect pxReadyTasksLists[0].pxIndex->xItemValue = 57
+    if (pxReadyTasksLists[0].pxIndex->xItemValue != 57) {
+        status += 1;
+    }
+
+    if (status) {
+        printf("Error: %d\n", status);
+    } else {
+        printf("Success!\n");
+    }
+
+    return status;
+}
diff --git a/tests/TMRregression/unitTests/protectedLib.c b/tests/TMRregression/unitTests/protectedLib.c
new file mode 100644
index 000000000..8ed33ac04
--- /dev/null
+++ b/tests/TMRregression/unitTests/protectedLib.c
@@ -0,0 +1,73 @@
+/*
+ * protectedLib.c
+ *
+ * This unit test was created to show what can happen when a "library" style
+ *  function is used inside and outside the scope of replication, espcecially
+ *  when it also accesses a protected global variable.
+ *
+ * Is it possible to make all the calls to this function have the same signature?
+ * Then it would be easier to call in- and outside the scope of replication.
+ *
+ * Without changing the COAST code, this would only be possible with 2 separate
+ *  compilation units, and the first one run with -noMain flag
+ *
+ * We have succeeded:
+ * Run with the command line parameter -protectedLibFn=sharedFunc
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "COAST.h"
+
+
+// protected global
+static uint32_t __xMR protectedUInt = 0;
+
+
+/*
+ * This function called from in- and out-side the scope of replication.
+ * It modifies a global variable.
+ */
+void sharedFunc(uint32_t* ptr, uint32_t val) {
+    protectedUInt += val;
+    *ptr = protectedUInt;
+}
+
+
+int protectedWrapper() __xMR {
+    int status = 0;
+    // can't protect this because modified by reference
+    // might be able to do it with cloneAfterCall
+    uint32_t __NO_xMR a = 2;
+    uint32_t b = 3;
+
+    sharedFunc(&a, b);
+    // expected result: a = proctedUInt+3 = 5+3 = 8
+
+    status = (a != 8);
+    return status;
+}
+
+
+int main() __NO_xMR {
+    int status = 0;
+    uint32_t a = 4;
+    uint32_t b = 5;
+
+    sharedFunc(&a, b);
+    // expected output: a = 5
+    status |= (a != 5);
+
+    // call the protected one 2nd
+    status |= protectedWrapper();
+
+    if (status) {
+        printf("Error: %d\n", status);
+        return status;
+    } else {
+        printf("Success!\n");
+    }
+
+    return 0;
+}
\ No newline at end of file
diff --git a/tests/TMRregression/unitTests/ptrArith.c b/tests/TMRregression/unitTests/ptrArith.c
index 7586318b0..6a3a04ce1 100644
--- a/tests/TMRregression/unitTests/ptrArith.c
+++ b/tests/TMRregression/unitTests/ptrArith.c
@@ -108,6 +108,8 @@ void callFnPtr(int* x, MyFnType fnPtr) __xMR {
 /************************************ main ************************************/
 #define ASIZE 8
 int main() {
+    int status = 0;
+
     ///////////////////////////// mutate array /////////////////////////////
     int a1[ASIZE];
     int a2[ASIZE] = {1, 2, 0, 4, 0, -1, 7, 5};   //golden
@@ -119,6 +121,7 @@ int main() {
 
     if (memcmp(a1, a2, ASIZE * sizeof(int))) {
         printf(" !! Error !!\n");
+        status += 1;
     }
 
     /////////////////////////////// xor swap ///////////////////////////////
@@ -141,6 +144,7 @@ int main() {
     printf(" %d\n", incThis);
     if (incThis != 3) {
         puts("Error!");
+        status += 1;
     }
 
     /////////////////////////// double pointers ////////////////////////////
@@ -161,6 +165,9 @@ int main() {
     doubleCross(&val);
     //expected 0x00AB
     printf(" 0x%04X\n", val);
+    if (val != 0x00AB) {
+        status += 1;
+    }
 
     /////////////////////////// storing pointers ///////////////////////////
     int sp0[4] = {4, 8, 16, 32};
@@ -172,6 +179,18 @@ int main() {
     int fnX = 3;
     callFnPtr(&fnX, &intMath);
     printf("%2d\n", fnX);
+    //expected 5
+    // TODO: call gets replicated
+    // if (fnX != 5) {
+    //     status += 1;
+    // }
+
+    ///////////////////////////// Status Check /////////////////////////////
+    if (status) {
+        printf("Error: %d\n", status);
+    } else {
+        printf("Success!\n");
+    }
 
-    return 0;
+    return status;
 }
diff --git a/tests/TMRregression/unitTests/replReturn.c b/tests/TMRregression/unitTests/replReturn.c
new file mode 100644
index 000000000..7c4c66037
--- /dev/null
+++ b/tests/TMRregression/unitTests/replReturn.c
@@ -0,0 +1,54 @@
+/*
+ * replReturn.c
+ *
+ * This unit test is intended to test COAST support for returning
+ *  multiple values from a function call.
+ * Invocation should include
+ *  `-cloneReturn=returnTest -replicateFnCalls=malloc -cloneFns=testWrapper`
+ *
+ * Don't run with -noMemReplication because that defeats the point of the unit test.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+// COAST configuration
+#include "../../COAST.h"
+// __DEFAULT_NO_xMR
+
+PRINTF_WRAPPER_REGISTER(printf);
+
+// TODO: make a test that already returns an aggregate type
+
+
+int* returnTest(size_t sz) {
+    int* myPointer = (int*) malloc(sizeof(int) * sz);
+    return myPointer;
+}
+
+int testWrapper() {
+    // decide size
+    size_t sz = 4;
+
+    // create a pointer
+    int* newPointer = returnTest(sz);
+    PRINTF_WRAPPER_CALL(printf, "%p\n", newPointer);
+
+    // use pointer
+    newPointer[1] = 42;
+
+    return !(newPointer[1] == 42);
+}
+
+
+int main() __NO_xMR {
+    int status = testWrapper();
+
+    if (status) {
+        printf("Failure!\n");
+        return -1;
+    } else {
+        printf("Success!\n");
+        return 0;
+    }
+}
diff --git a/tests/TMRregression/unitTests/segmenting.c b/tests/TMRregression/unitTests/segmenting.c
index c8917ad81..207ba425c 100644
--- a/tests/TMRregression/unitTests/segmenting.c
+++ b/tests/TMRregression/unitTests/segmenting.c
@@ -10,7 +10,7 @@
 #include "../../COAST.h"
 
 
-int simpleMath(int x, int y) {
+int simpleMath(int x, int y) __xMR_FN_CALL {
     return x + y;
 }
 
@@ -23,7 +23,9 @@ int main() {
 
     if (result != 30) {
         printf("Error! %d\n", result);
+        return -1;
     } else {
         printf("Success!\n");
+        return 0;
     }
-}
\ No newline at end of file
+}
diff --git a/tests/TMRregression/unitTests/signalHandlers.c b/tests/TMRregression/unitTests/signalHandlers.c
new file mode 100644
index 000000000..087761803
--- /dev/null
+++ b/tests/TMRregression/unitTests/signalHandlers.c
@@ -0,0 +1,55 @@
+/*
+ * signalHandlers.c
+ * 
+ * This unit test is designed to test
+ *  1) how COAST works with signal handlers
+ *  2) if we can mark functions as an ISR and COAST will leave them alone
+ *
+ * If the program doesn't stop with Ctrl+C, then find the PID and send the
+ *  signal directly to it, like
+ * `kill -s SIGINT 12345`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+
+// COAST configuration
+#include "../../COAST.h"
+__DEFAULT_xMR
+
+
+// print something and exit
+// this shouldn't be touched by COAST at all
+void SIGINT_handler(int sig_num) __ISR_FUNC {
+    // Can only call functions which are safe for signal handlers
+    // https://stackoverflow.com/a/12902707/12940429
+    char msg[] = "\nCaught Ctrl-C\nExiting gracefully...\n";
+    write(STDOUT_FILENO, msg, sizeof(msg));
+    _exit(1);
+}
+
+
+// this function will never return, just do math operations forever
+void doMath(void) __xMR {
+    int x = 0, y = 1, z = -1;
+
+    while (1) {
+        // this should be fun
+        x += y;
+        y ^= z;
+        z *= x;
+    }
+}
+
+
+int main(void) {
+    // register handler
+    signal(SIGINT, SIGINT_handler);
+
+    // do some math forever
+    doMath();
+
+    return 0;
+}
\ No newline at end of file
diff --git a/tests/TMRregression/unitTests/simd.c b/tests/TMRregression/unitTests/simd.c
index 6ef12631a..aa0b474f8 100644
--- a/tests/TMRregression/unitTests/simd.c
+++ b/tests/TMRregression/unitTests/simd.c
@@ -4,6 +4,10 @@
  * This unit test is to see how the LLVM IR represents SIMD instructions
  * All this does is double all of the values in a matrix
  * have to make sure the flag XCFLAGS="-O3"
+ *
+ * SoR notes:
+ * matrix0 is not protected, but they are just scalar values, so COAST
+ *  synchronizes the values before storing to it.
  */
 
 #include <stdio.h>
@@ -50,6 +54,8 @@ test_t golden0[ROW_SIZE][COL_SIZE] = {
 
 // don't inline the matrix multiply call so it can be xMR'd correctly
 __attribute__((noinline))
+// TODO: why do we need to explicitly mark this?
+__COAST_IGNORE_GLOBAL(matrix0)
 #ifdef WITH_INTRINSICS
 #ifdef __x86_64
 // hand optimized for x86_64 architecture with the SSE2 extension
diff --git a/tests/TMRregression/unitTests/stackAttack.c b/tests/TMRregression/unitTests/stackAttack.c
new file mode 100644
index 000000000..5ab56665c
--- /dev/null
+++ b/tests/TMRregression/unitTests/stackAttack.c
@@ -0,0 +1,270 @@
+/*
+ * stackAttack.c
+ * This unit test specifically formulated to have a predictable
+ *  call stack so we can try some techniques to protect against
+ *  SDCs in stack frames.
+ * Mark all the functions so they are not inlined.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+
+
+// print some messages
+#if defined(__has_feature)
+# if __has_feature(shadow_call_stack)
+/***************************** Shadow Call Stack ******************************/
+#   pragma message ("Using Shadow Call Stack protection")
+
+    #ifdef __x86_64__
+    ///////////////////////////// x86 run-time /////////////////////////////
+    // No run-time is provided for the x86 implementation.
+    #include <asm/prctl.h>
+    #include <sys/prctl.h>
+    // for some reason, compiler still complains about implicit declaration
+    extern int arch_prctl(int code, unsigned long *addr);
+    
+    // Set aside space for the gs register to save return addresses in.
+    #define SHADOW_STACK_SIZE 512
+    uint64_t shadowStack[SHADOW_STACK_SIZE];
+
+    // Run some code before main() starts
+    // https://stackoverflow.com/a/8713662/12940429
+    void premain(void) __attribute__ ((constructor))
+            __attribute__((no_sanitize("shadow-call-stack")));
+
+    void premain(void) {
+        // We need to set the `gs` register.
+        // https://stackoverflow.com/a/59282564/12940429
+        arch_prctl(ARCH_SET_GS, &shadowStack[SHADOW_STACK_SIZE - 1]);
+    }
+    #endif
+
+# endif
+
+# if (__SSP_ALL__ == 3) || (__SSP_STRONG__ == 2) || (__SSP__ == 1)
+/************************** Stack Protector (Canary) **************************/
+    #ifdef __ARM_EABI__
+    //////////////////////////// ARM32 run-time ////////////////////////////
+    // automatic for x86, not for ARM clang baremetal
+    // https://embeddedartistry.com/blog/2020/05/18/implementing-stack-smashing-protection-for-microcontrollers-and-embedded-artistrys-libc/
+    // also this, but it's not as good
+    // https://antoinealb.net/programming/2016/06/01/stack-smashing-protector-on-microcontrollers.html
+
+    // did the user set it already?
+    #ifndef STACK_CHK_GUARD_VALUE
+        // stack guard value - 32 or 64 bit?
+        #if (UINTPTR_MAX == UINT32_MAX)
+        #define STACK_CHK_GUARD 0xdeadbeef
+        #else
+        #define STACK_CHK_GUARD 0xdeadbeef8badf00d
+        #endif
+    #endif
+
+    // this is the canary value
+    uintptr_t __attribute__((weak)) __stack_chk_guard = 0;
+
+    // randomly generated canary value?
+    // https://github.com/gcc-mirror/gcc/blob/master/libssp/ssp.c
+
+    // callback can be overwritten by user
+    __attribute__((weak)) uintptr_t __stack_chk_guard_init(void) {
+        return STACK_CHK_GUARD;
+    }
+
+    // pre-main initialization of stack guard value
+    static void __attribute__((constructor,no_stack_protector)) __construct_stk_chk_guard()
+    {
+        if (__stack_chk_guard == 0) {
+            __stack_chk_guard = __stack_chk_guard_init();
+        }
+    }
+
+    #include <stdlib.h>
+    // this gets called at a mismatch
+    void __stack_chk_fail(void) __attribute__((weak, noreturn));
+    void __stack_chk_fail(void) {
+        printf("Stack smashed! Aborting...\n");
+        abort();
+    }
+    #endif
+# endif
+
+#endif
+
+
+/********************************* Functions **********************************/
+// simple functions to test nested stack frames
+__attribute__((noinline))
+int func3(uint32_t arg3) {
+    return arg3 + 3;
+}
+
+__attribute__((noinline))
+int func2(uint32_t arg2) {
+    return func3(arg2) + 2;
+}
+
+__attribute__((noinline))
+int func1(uint32_t arg1) {
+    return func2(arg1) + 1;
+}
+
+
+#ifdef FORTIFY_SOURCE
+/*
+ * Function that tries to overwrite the stack return address.
+ * Simple mistake to make, forgetting to count the null terminator.
+ */
+void unsafeCopy(void) {
+    char buf[12];
+    strcpy(buf, "Hello there!");
+    printf("%s\n", buf);
+    return;
+}
+#endif
+
+
+int main() {
+    int ret = 0;
+    uint32_t x = 42;
+    // expected result: (((42 + 3) + 2) + 1) = 48
+    uint32_t result = func1(x);
+
+    #ifdef FORTIFY_SOURCE
+    // buffer test
+    unsafeCopy();
+    #endif
+
+    if (result != 48) {
+        printf("Error, got %u, expected 48!\n", result);
+        ret = -1;
+    } else {
+        printf("Success!\n");
+    }
+
+    #ifdef __QEMU_SIM
+    // have to spin forever instead of actually returning
+    while (1);
+    #endif
+    return ret;
+}
+
+
+/********************************* x86 notes **********************************/
+/*
+ * normal disassembly of func2:
+ * 0x00000000004004f0 <+0>:	    push   %rbp
+ * 0x00000000004004f1 <+1>:	    mov    %rsp,%rbp
+ * 0x00000000004004f4 <+4>:	    sub    $0x10,%rsp
+ * 0x00000000004004f8 <+8>:	    mov    %edi,-0x4(%rbp)
+ * 0x00000000004004fb <+11>:	mov    -0x4(%rbp),%edi
+ * 0x00000000004004fe <+14>:	callq  0x4004e0 <func3>
+ * 0x0000000000400503 <+19>:	add    $0x2,%eax
+ * 0x0000000000400506 <+22>:	add    $0x10,%rsp
+ * 0x000000000040050a <+26>:	pop    %rbp
+ * 0x000000000040050b <+27>:	retq   
+ */
+
+/*
+ * with -fsanitize=shadow-call-stack
+ * (removed in LLVM 9.0, for future reference)
+ * 0000000000400570 <func2>:
+ *   400570:       4c 8b 14 24             mov    (%rsp),%r10
+ *   400574:       4d 31 db                xor    %r11,%r11
+ *   400577:       65 49 83 03 08          addq   $0x8,%gs:(%r11)
+ *   40057c:       65 4d 8b 1b             mov    %gs:(%r11),%r11
+ *   400580:       65 4d 89 13             mov    %r10,%gs:(%r11)
+ *   400584:       55                      push   %rbp
+ *   400585:       48 89 e5                mov    %rsp,%rbp
+ *   400588:       48 83 ec 10             sub    $0x10,%rsp
+ *   40058c:       89 7d fc                mov    %edi,-0x4(%rbp)
+ *   40058f:       8b 7d fc                mov    -0x4(%rbp),%edi
+ *   400592:       e8 b9 ff ff ff          callq  400550 <func3>
+ *   400597:       83 c0 02                add    $0x2,%eax
+ *   40059a:       48 83 c4 10             add    $0x10,%rsp
+ *   40059e:       5d                      pop    %rbp
+ *   40059f:       4d 31 db                xor    %r11,%r11
+ *   4005a2:       65 4d 8b 13             mov    %gs:(%r11),%r10
+ *   4005a6:       65 4d 8b 12             mov    %gs:(%r10),%r10
+ *   4005aa:       65 49 83 2b 08          subq   $0x8,%gs:(%r11)
+ *   4005af:       4c 39 14 24             cmp    %r10,(%rsp)
+ *   4005b3:       75 01                   jne    4005b6 <func2+0x46>
+ *   4005b5:       c3                      retq
+ *   4005b6:       0f 0b                   ud2
+ *   4005b8:       0f 1f 84 00 00 00 00    nopl   0x0(%rax,%rax,1)
+ *   4005bf:       00
+ * see https://releases.llvm.org/7.0.1/tools/clang/docs/ShadowCallStack.html
+ */
+
+/*
+ * with -fstack-protector-all
+ * 0000000000400580 <func2>:
+ *   400580:       55                      push   %rbp
+ *   400581:       48 89 e5                mov    %rsp,%rbp
+ *   400584:       48 83 ec 10             sub    $0x10,%rsp
+ *   400588:       64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
+ *   40058f:       00 00
+ *   400591:       48 89 45 f8             mov    %rax,-0x8(%rbp)
+ *   400595:       89 7d f4                mov    %edi,-0xc(%rbp)
+ *   400598:       8b 7d f4                mov    -0xc(%rbp),%edi
+ *   40059b:       e8 a0 ff ff ff          callq  400540 <func3>
+ *   4005a0:       83 c0 02                add    $0x2,%eax
+ *   4005a3:       64 48 8b 0c 25 28 00    mov    %fs:0x28,%rcx
+ *   4005aa:       00 00
+ *   4005ac:       48 8b 55 f8             mov    -0x8(%rbp),%rdx
+ *   4005b0:       48 39 d1                cmp    %rdx,%rcx
+ *   4005b3:       75 06                   jne    4005bb <func2+0x3b>
+ *   4005b5:       48 83 c4 10             add    $0x10,%rsp
+ *   4005b9:       5d                      pop    %rbp
+ *   4005ba:       c3                      retq
+ *   4005bb:       e8 70 fe ff ff          callq  400430 <__stack_chk_fail@plt>
+ */
+
+
+/********************************* ARM notes **********************************/
+/*
+ * normal disassembly of func2:
+ * 0x001004cc <+0>:	    push	{r11, lr}
+ * 0x001004d0 <+4>:	    mov	r11, sp
+ * 0x001004d4 <+8>:	    sub	sp, sp, #8
+ * 0x001004d8 <+12>:	str	r0, [sp, #4]
+ * 0x001004dc <+16>:	ldr	r0, [sp, #4]
+ * 0x001004e0 <+20>:	bl	0x1004b4 <func3>
+ * 0x001004e4 <+24>:	add	r0, r0, #2
+ * 0x001004e8 <+28>:	mov	sp, r11
+ * 0x001004ec <+32>:	pop	{r11, pc}
+ */
+
+/*
+ * ShadowCallStack doesn't support 32-bit ARM, only aarch64.
+ * We would need a 64-bit ARM simulator to test this.
+ */
+
+/*
+ * with -fstack-protector-all
+ * 001005ac <func2>:
+ *   1005ac:       e92d4800        push    {fp, lr}
+ *   1005b0:       e1a0b00d        mov     fp, sp
+ *   1005b4:       e24dd008        sub     sp, sp, #8
+ *   1005b8:       e3041034        movw    r1, #16436      ; 0x4034
+ *   1005bc:       e3401011        movt    r1, #17
+ *   1005c0:       e5911000        ldr     r1, [r1]
+ *   1005c4:       e58d1004        str     r1, [sp, #4]
+ *   1005c8:       e58d0000        str     r0, [sp]
+ *   1005cc:       e59d0000        ldr     r0, [sp]
+ *   1005d0:       ebffffe0        bl      100558 <func3>
+ *   1005d4:       e2800002        add     r0, r0, #2
+ *   1005d8:       e3041034        movw    r1, #16436      ; 0x4034
+ *   1005dc:       e3401011        movt    r1, #17
+ *   1005e0:       e5911000        ldr     r1, [r1]
+ *   1005e4:       e59d2004        ldr     r2, [sp, #4]
+ *   1005e8:       e0411002        sub     r1, r1, r2
+ *   1005ec:       e3510000        cmp     r1, #0
+ *   1005f0:       1a000000        bne     1005f8 <func2+0x4c>
+ *   1005f4:       ea000000        b       1005fc <func2+0x50>
+ *   1005f8:       ebffffcf        bl      10053c <__stack_chk_fail>
+ *   1005fc:       e1a0d00b        mov     sp, fp
+ *   100600:       e8bd8800        pop     {fp, pc}
+ */
diff --git a/tests/TMRregression/unitTests/stackProtect.c b/tests/TMRregression/unitTests/stackProtect.c
new file mode 100644
index 000000000..3ba06253b
--- /dev/null
+++ b/tests/TMRregression/unitTests/stackProtect.c
@@ -0,0 +1,70 @@
+/*
+ * stackProtect.c
+ * This unit test created to test new COAST ability to protect stack
+ *  from corruption of return address and previous stack pointer value.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+// using this file to figure out the correct signatures for the
+//  builtin functions
+// #define FIGURE_OUT_FUNCTION_INFO
+#ifdef FIGURE_OUT_FUNCTION_INFO
+
+// stringification macros
+// https://stackoverflow.com/a/47346160/12940429
+#define __STRINGIFY(x) #x
+#define STRINGIFY(x) __STRINGIFY(x)
+
+// silence annoying compiler warnings about length specifiers on printing pointers
+#ifdef __x86_64__
+#define PTR_FMT lX
+#else
+#define PTR_FMT X
+#endif
+
+// globals
+uintptr_t glblRet = 0;
+uintptr_t glblFrmPtr = 0;
+#endif
+
+
+int call1(int b) {
+    return b + 1;
+}
+
+
+int call0(int a) {
+    return call1(a);
+}
+
+
+int main() {
+    #ifdef FIGURE_OUT_FUNCTION_INFO
+    uintptr_t ret = (uintptr_t)__builtin_return_address(0);
+    uintptr_t fp  = (uintptr_t)__builtin_frame_address(0);
+    glblRet = ret;
+    glblFrmPtr = fp;
+
+    printf("ret: 0x%08" STRINGIFY(PTR_FMT) ", fp: 0x%08" STRINGIFY(PTR_FMT) "\n",
+            ret, fp);
+    #endif
+
+    int res = call0(2);
+    // expected result: res = 3
+    if (res != 3) {
+        printf("Error! %d\n", res);
+        return 3;
+    } else {
+        printf("Success!\n");
+    }
+
+    return 0;
+}
+
+void FAULT_DETECTED_DWC() {
+    printf("corrupted return address\r\n");
+    abort();
+}
diff --git a/tests/TMRregression/unitTests/structCompare.c b/tests/TMRregression/unitTests/structCompare.c
index 36e24953e..cd80881a1 100644
--- a/tests/TMRregression/unitTests/structCompare.c
+++ b/tests/TMRregression/unitTests/structCompare.c
@@ -1,12 +1,12 @@
 /*
-    Unit test designed to exercise returning struct types.
-    Can change the data types to see what happens.
-
-    Notes:
-    if the data types can fit within a normal wordsize of the target system,
-    such as how 2 ints can fit into a 64-bit register, then it's not a problem.
-    float returns a vector type, which is interesting.
-*/
+ *  Unit test designed to exercise returning struct types.
+ *  Can change the data types to see what happens.
+ *
+ *  Notes:
+ *  if the data types can fit within a normal wordsize of the target system,
+ *  such as how 2 ints can fit into a 64-bit register, then it's not a problem.
+ *  float returns a vector type, which is interesting.
+ */
 
 #include <stdio.h>
 
diff --git a/tests/TMRregression/unitTests/testFuncPtrs.c b/tests/TMRregression/unitTests/testFuncPtrs.c
index d49bf9144..449ee1405 100644
--- a/tests/TMRregression/unitTests/testFuncPtrs.c
+++ b/tests/TMRregression/unitTests/testFuncPtrs.c
@@ -1,8 +1,35 @@
-// This was added specifically to make sure TMR works with function pointers
-// Original source:
-// https://gist.github.com/robstewart57/b11353feb69dc1a6dc30
+/*
+ * testFuncPtrs.c
+ *
+ * This was added specifically to make sure TMR works with function pointers
+ * Original source:
+ * https://gist.github.com/robstewart57/b11353feb69dc1a6dc30
+ *
+ * Also add a global function pointer to make sure it isn't cloned.
+ * Test to see if COAST treats ISR function pointers correctly.
+ */
+
+
+/********************************** Includes **********************************/
 #include <stdio.h>
 
+/**************************** Function Prototypes *****************************/
+void fakeISRfunc(int x);
+
+/*********************************** Values ***********************************/
+// global pointer to fake ISR function
+void (*fakeISRptr) (int x) = fakeISRfunc;
+
+
+/********************************* Functions **********************************/
+
+void fakeISRfunc(int x) {
+    x += 1;
+    return;
+}
+
+
+// Some math functions
 int add(int i, int j)
 {
    return (i + j);
@@ -13,11 +40,13 @@ int sub(int i, int j)
    return (i - j);
 }
 
+
+// Print results of math function from ptr call
 int print(int x, int y, int (*func)())
 {
     int val = (*func)(x, y);
-    printf("value is : %d\n", val);
-    //check values
+    // printf("value is : %d\n", val);
+    // check values
     if ( (val == 300) || (val == -100) ) {
         return 0;
     } else {
@@ -25,6 +54,7 @@ int print(int x, int y, int (*func)())
     }
 }
 
+
 int main()
 {
     int returnVal = 0;
@@ -33,6 +63,7 @@ int main()
     // test with calling function pointers
     returnVal |= print(x, y, add);     // expected output: 300
     returnVal |= print(x, y, sub);     // expected output: -100
+    // NOTE: COAST replicates the calls themselves
 
     // see if we can create arrays of function pointers
     int (* pBitCntFunc[2])(int, int) = {
@@ -40,5 +71,15 @@ int main()
         sub
     };
 
+    // test global function pointer
+    fakeISRptr(0);
+
+    // Check values
+    if (returnVal) {
+        printf("Error: %d\n", returnVal);
+    } else {
+        printf("Success!\n");
+    }
+
     return returnVal;
 }
diff --git a/tests/TMRregression/unitTests/time_c.c b/tests/TMRregression/unitTests/time_c.c
index a749bda95..8bb9fdcab 100644
--- a/tests/TMRregression/unitTests/time_c.c
+++ b/tests/TMRregression/unitTests/time_c.c
@@ -7,6 +7,10 @@
  * examples for the various functions
  *
  * make sure to add clock() to -skipLibCalls
+ *
+ * This is also a good example of functions that can only be called once,
+ *  but we still want to have multiple copies of the variables.
+ * So `time` needs to be marked with `cloneAfterCall`.
  */
 
 #include <stdio.h>
@@ -25,14 +29,14 @@ int main() {
     timeInfo = localtime(&rawTime);
     printf ( "Using localtime and asctime: %s", asctime (timeInfo) );
 
-    //set the time to the beginning of the day
+    // set the time to the beginning of the day
     timeInfo->tm_hour = 0; timeInfo->tm_min = 0; timeInfo->tm_sec = 0;
     double seconds = difftime(rawTime, mktime(timeInfo));
 
     printf("Using difftime and mktime: ");
     printf("%f seconds since today started\n",seconds);
 
-    //gmtime and strftime
+    // gmtime and strftime
     struct tm * ptm;
     ptm = gmtime(&rawTime);
     char buffer [80];
@@ -46,5 +50,6 @@ int main() {
     printf("Using clock: %ld clicks to run (%f seconds)\n", dur, ((float)dur)/CLOCKS_PER_SEC);
 
     return 0;
-    //is there a good way to make this unit test self-checking?
+    // TODO: is there a good way to make this unit test self-checking?
+    // For now, there is a regex in unitTestDriver.py that will check the output
 }
diff --git a/tests/TMRregression/unitTests/vecTest.cpp b/tests/TMRregression/unitTests/vecTest.cpp
index c5fd7d7ca..e72420f5c 100644
--- a/tests/TMRregression/unitTests/vecTest.cpp
+++ b/tests/TMRregression/unitTests/vecTest.cpp
@@ -32,13 +32,15 @@ int main() {
         vec.push_back(i);
     }
 
+    // query size
     std::size_t vsize = vec.size();
-    std::cout << "vector size: " << vsize << "\n";
 
-    //check
+    // check
     if (vsize == SIZE) {
+        std::cout << "Success!\n";
         return 0;
     } else {
+        std::cout << "vector size: " << vsize << "\n";
         return -1;
     }
 
diff --git a/tests/TMRregression/unitTests/verifyOptions.c b/tests/TMRregression/unitTests/verifyOptions.c
index 18ebf3ba0..dcff6e6b1 100644
--- a/tests/TMRregression/unitTests/verifyOptions.c
+++ b/tests/TMRregression/unitTests/verifyOptions.c
@@ -1,8 +1,10 @@
 /*
  * verifyOptions.c
- * 
+ *
  * Unit test to see if COAST can detect when replication rules
  * are being violated.
+ *
+ * Also if COAST is treating "used" variables correctly.
  */
 
 #include <stdio.h>
@@ -10,30 +12,41 @@
 __DEFAULT_NO_xMR
 
 
-int __xMR myGlobal = 0;
+// some things that are never used
+int __attribute__((used)) unusedInt = 4;
+char* __COAST_VOLATILE unusedString = "Hello there!";
+char __attribute__((used)) unusedChar = 'c';
+
+
+// globals used in and out of scope
+int __xMR protectedGlobal = 0;
+int normalGlobal = 0;
 
 
 void incGlbl(void) __xMR {
-    myGlobal++;
+    protectedGlobal++;
+    normalGlobal++;
 }
 
 void decGlbl(void) __NO_xMR {
-    myGlobal--;
+    protectedGlobal--;
+    normalGlobal--;
 }
 
-__COAST_IGNORE_GLOBAL(myGlobal)
 void mulGlbl(void) {
-    myGlobal *= 2;
+    protectedGlobal *= 2;
+    normalGlobal *= 2;
 }
 
 
+__COAST_IGNORE_GLOBAL(protectedGlobal)
 int main() {
 
-    printf("%d, ", myGlobal);
+    printf("%d, ", protectedGlobal);
     incGlbl();
-    printf("%d, ", myGlobal);
+    printf("%d, ", protectedGlobal);
     decGlbl();
-    printf("%d\n", myGlobal);
+    printf("%d\n", protectedGlobal);
 
     return 0;
 }
diff --git a/tests/TMRregression/unitTests/zeroInit.c b/tests/TMRregression/unitTests/zeroInit.c
index 22e1171ad..261ba0d2b 100644
--- a/tests/TMRregression/unitTests/zeroInit.c
+++ b/tests/TMRregression/unitTests/zeroInit.c
@@ -1,4 +1,11 @@
-//Pulled these constants from the support includes for the AES test
+/*
+ * zeroInit.c
+ *
+ * This unit test was created to make sure that COAST correctly supports the
+ *  `zeroinitializer` LLVM IR command.
+ *
+ * Pulled these constants from the support includes for the AES test
+ */
 
 #include <stdio.h>
 
@@ -28,13 +35,13 @@ int main() {
 		acc1 += zeroStartArray[i];
 	}
 
-	printf("Calculated: %d, %d\n", acc1, acc2);
-	printf("  Expected: %d, %d\n", 4240, 4220);
-
-	//check
+	// check
 	if ( (acc1 == 4240) && (acc2 == 4220) ) {
+		printf("Success!\n");
 		return 0;
 	} else {
+		printf("Calculated: %d, %d\n", acc1, acc2);
+		printf("  Expected: %d, %d\n", 4240, 4220);
 		return -1;
 	}
 }
diff --git a/tests/chstone/aes/aes.c b/tests/chstone/aes/aes.c
index a684d18af..d5b7d112d 100755
--- a/tests/chstone/aes/aes.c
+++ b/tests/chstone/aes/aes.c
@@ -29,8 +29,8 @@
  * And if you want to contact us, send an email to Kimitake Wakayama
  * (wakayama@elcom.nitech.ac.jp)
  *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
  *
  * 1. Redistributions of source code must retain the above copyright notice,
  *    this list of conditions and the following disclaimer.
@@ -42,12 +42,14 @@
  * 3. All advertising materials mentioning features or use of this software must
  *    display the following acknowledgment:
  *    "This product includes software developed by Akira Iwata Laboratory,
- *    Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
+ *    Nagoya Institute of Technology in Japan
+ * (http://mars.elcom.nitech.ac.jp/)."
  *
  * 4. Redistributions of any form whatsoever must retain the following
  *    acknowledgment:
  *    "This product includes software developed by Akira Iwata Laboratory,
- *     Nagoya Institute of Technology in Japan (http://mars.elcom.nitech.ac.jp/)."
+ *     Nagoya Institute of Technology in Japan
+ * (http://mars.elcom.nitech.ac.jp/)."
  *
  *   THIS SOFTWARE IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY.
  *   AKIRA IWATA LABORATORY DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS
@@ -61,14 +63,10 @@
  */
 #include <stdio.h>
 
-
 int main_result;
 
-//To meet fiji testing standard:
-__attribute__((noinline))
-void generateGolden(){
-    ;
-}
+// To meet fiji testing standard:
+__attribute__((noinline)) void generateGolden() { ; }
 int golden;
 
 #include "aes.h"
@@ -84,17 +82,14 @@ int key[32];
 int statemt[32];
 int word[4][120];
 
-
 /* ***************** main **************************** */
-int
-aes_main (void)
-{
-/*
-+--------------------------------------------------------------------------+
-| * Test Vectors (added for CHStone)                                       |
-|     statemt, key : input data                                            |
-+--------------------------------------------------------------------------+
-*/
+int aes_main(void) {
+  /*
+  +--------------------------------------------------------------------------+
+  | * Test Vectors (added for CHStone)                                       |
+  |     statemt, key : input data                                            |
+  +--------------------------------------------------------------------------+
+  */
   statemt[0] = 50;
   statemt[1] = 67;
   statemt[2] = 246;
@@ -129,18 +124,21 @@ aes_main (void)
   key[14] = 79;
   key[15] = 60;
 
-  encrypt (statemt, key, 128128);
-  decrypt (statemt, key, 128128);
+  encrypt(statemt, key, 128128);
+  decrypt(statemt, key, 128128);
   return 0;
 }
 
-int
-main ()
-{
-      main_result = 0;
-      generateGolden();
+int main() {
+  main_result = 0;
+  generateGolden();
 
-      aes_main ();
-      printf ("\n%d\n", main_result);
-      return main_result;
-    }
+  aes_main();
+  if (main_result == 0) {
+    printf("RESULT: PASS\n");
+    return 0;
+  } else {
+    printf("RESULT: FAIL\n");
+    return 1;
+  }
+}
diff --git a/tests/makefiles/Makefile.compile b/tests/makefiles/Makefile.compile
index e2b0e770b..bc4f27b48 100644
--- a/tests/makefiles/Makefile.compile
+++ b/tests/makefiles/Makefile.compile
@@ -1,13 +1,13 @@
 LLVM_BIN = $(LLVM_BUILD_DIR)/bin
 
-CLANG 		:= $(LLVM_BIN)/clang
-CLANG++ 	:= $(LLVM_BIN)/clang++
-LLVM_DIS 	:= $(LLVM_BIN)/llvm-dis
-LLVM_OPT 	:= $(LLVM_BIN)/opt
-LLVM_LLI 	:= $(LLVM_BIN)/lli
-LLVM_LLC 	:= $(LLVM_BIN)/llc
-LLVM_LINK	:= $(LLVM_BIN)/llvm-link
-LLVM_MC	  	:= $(LLVM_BIN)/llvm-mc
+CLANG 		:= clang-7
+CLANG++ 	:= clang++-7
+LLVM_DIS 	:= llvm-dis-7
+LLVM_OPT 	:= opt-7
+LLVM_LLI 	:= lli-7
+LLVM_LLC 	:= llc-7
+LLVM_LINK	:= llvm-link-7
+LLVM_MC	  	:= llvm-mc-7
 
 PROJECT_BUILD_DIR = $(COAST_ROOT)/projects/build/
 rwildcard=$(wildcard $1$2)$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2))
@@ -34,6 +34,10 @@ else ifeq ($(BOARD), $(BOARD_PYNQ))
 
 include $(LEVEL)/makefiles/Makefile.compile.pynq
 
+else ifeq ($(BOARD), $(BOARD_PYNQ_RTOS))
+
+include $(LEVEL)/makefiles/Makefile.compile.pynqrtos
+
 else ifeq ($(BOARD), $(BOARD_ULTRA96))
 
 include $(LEVEL)/makefiles/Makefile.compile.ultra96
diff --git a/tests/makefiles/Makefile.compile.hiFive1 b/tests/makefiles/Makefile.compile.hiFive1
index b54c5fb47..2fa5e4960 100644
--- a/tests/makefiles/Makefile.compile.hiFive1
+++ b/tests/makefiles/Makefile.compile.hiFive1
@@ -35,4 +35,4 @@ $(TARGET): $(TARGET).o $(FREEDOM_BSP_LIBS)
 	--specs=nano.specs
 
 clean:
-	rm -f $(TARGET) *.o *.ll
+	rm -f $(TARGET) *.o *.ll *.bc
diff --git a/tests/makefiles/Makefile.compile.llvmLLI b/tests/makefiles/Makefile.compile.llvmLLI
index c97b335d2..98cd56eff 100644
--- a/tests/makefiles/Makefile.compile.llvmLLI
+++ b/tests/makefiles/Makefile.compile.llvmLLI
@@ -40,9 +40,14 @@ $(TARGET).lbc: $(BCFILES) $(BCPPFILES)
 	@echo '  'flags = $(CLANG_FLAGS)
 	$(CLANG++) $(INCS) $(CLANG_FLAGS) -emit-llvm $< -c -o $@
 
+.PHONY: clean clean_opt cfg
+
 clean:
 	@$(RM) -rf *.bc *.bcpp *.lbc *.llvm.bc *.ll ./cfg
 
+clean_opt:
+	@$(RM) $(TARGET).lbc
+
 cfg: $(TARGET).opt.ll $(TARGET).clang.ll
 	@rm -rf cfg
 	@mkdir cfg
diff --git a/tests/makefiles/Makefile.compile.pynq b/tests/makefiles/Makefile.compile.pynq
index fd6b86eae..c8807bd27 100644
--- a/tests/makefiles/Makefile.compile.pynq
+++ b/tests/makefiles/Makefile.compile.pynq
@@ -1,7 +1,7 @@
 ################################################################################
 # Sources
 PROJECT_SRC := ${CURDIR} $(SRC_DIRS)
-CSRCS 		:= $(foreach dir,$(PROJECT_SRC),$(wildcard $(dir)/*.c))
+CSRCS 		?= $(foreach dir,$(PROJECT_SRC),$(wildcard $(dir)/*.c))
 BUILD_DIR	?= ./build
 INC_DIRS	?=
 BC_FILES	:= $(patsubst %.c,$(BUILD_DIR)/%.bc,$(notdir $(CSRCS)))
@@ -16,12 +16,25 @@ BSP_INC 	:= $(BSP_DIR)/include
 
 ################################################################################
 # Flags
-LIB_INCS	:= -I"$(SDK_TRIPLE)/lib/gcc/arm-none-eabi/7.2.1/include/"  -I"$(SDK_TRIPLE)/lib/gcc/arm-none-eabi/7.2.1/include-fixed/" -I"$(SDK_TRIPLE)/arm-none-eabi/include/" -I"$(SDK_TRIPLE)/arm-none-eabi/libc/usr/include/"
+LIB_INCS	:= -I"$(COAST_ROOT)/llvm-project/clang/lib/Headers/" -I"$(SDK_TRIPLE)/lib/gcc/arm-none-eabi/7.2.1/include/"  -I"$(SDK_TRIPLE)/lib/gcc/arm-none-eabi/7.2.1/include-fixed/" -I"$(SDK_TRIPLE)/arm-none-eabi/include/" -I"$(SDK_TRIPLE)/arm-none-eabi/libc/usr/include/"
 NEW_INCS	:= $(addprefix -I,$(INC_DIRS))
 SRC_INCS	:= -nostdinc -I"$(BSP_INC)/" $(LIB_INCS) $(NEW_INCS)
 LIB_DIR		?= $(BSP_DIR)
 LIBS		:= -lxil -lgcc -lc
 LIBS 		+= $(PROJ_LIBS)
+# user link-time flags
+XLFLAGS		?=
+XLLCFLAGS	?=
+
+ifneq ($(BUILD_FOR_SIMULATOR),)
+# how to use ARM semihosting interface
+SEMIHOSTING_PATH := /usr/lib/arm-none-eabi/newlib/fpu
+SEMIHOSTING_LIB  := rdimon
+LIB_DIR			 := $(BSP_DIR)/qemu_bsp
+QEMU_DIR 		 := $(COAST_ROOT)/simulation/qemu-ccl
+QEMU_INSTALL_DIR := $(QEMU_DIR)/install/bin
+QEMU_PLUGIN_DIR  := $(QEMU_DIR)/build/tests/plugin
+endif
 
 # NEON support
 ifneq ($(ARM_NEON),)
@@ -47,7 +60,7 @@ CLANG_FLAGS += -mfloat-abi=softfp -mfpu=neon-fp-armv8 -D"__ARM_NEON"
 endif
 CLANG_FLAGS += $(addprefix -D,$(USER_DEFS))
 
-LLC_FLAGS 	:= -asm-verbose -filetype=asm -march=arm -mcpu=cortex-a9 -mattr=+vfp3 -float-abi=hard
+LLC_FLAGS 	:= -asm-verbose -filetype=asm -march=arm -mcpu=cortex-a9 -mattr=+vfp3 -float-abi=hard $(XLLCFLAGS)
 
 MC_FLAGS 	:= -triple=armv7-none-gnueabi -mcpu=cortex-a9 -target-abi=hard -filetype=obj
 
@@ -92,12 +105,18 @@ $(BUILD_DIR)/$(NEW_LINK_F): $(LNK_SCRIPT)
 ################################################################################
 LD_FLAGS	:= -fdiagnostics-color -fshort-enums -mcpu=cortex-a9 -mfpu=$(FPU_NAME) -mfloat-abi=hard -mhard-float -Wl,--build-id=none -specs=$(SPEC_SRC) -Wl,-T -Wl,$(BOARD_SW)/lscript.ld -Wl,-Map,$(BUILD_DIR)/$(TARGET).map
 LD_LIBS		:= -Wl,-L$(BUILD_DIR),-L$(LIB_DIR) -Wl,--start-group,$(LIBS),--end-group
+ifneq ($(BUILD_FOR_SIMULATOR),)
+LD_FLAGS    := -Wl,-L$(SEMIHOSTING_PATH) -specs=rdimon.specs $(LD_FLAGS)
+LD_LIBS     := -Wl,-lrdimon $(LD_LIBS) -Wl,--defsym=end=_end
+LD 			:= arm-none-eabi-gcc
+# comes from package `libnewlib-arm-none-eabi`
+endif
 
 $(BUILD_DIR)/$(TARGET).elf: $(BSP_LIB) $(BUILD_DIR)/$(TARGET).o | $(BUILD_DIR)/$(NEW_LINK_F)
 	@echo -e $(COLOR_MAGENTA)linking with libraries $(NO_COLOR)
-	@echo -e '  'flags = $(LD_FLAGS)
+	@echo -e '  'flags = $(LD_FLAGS) $(XLFLAGS)
 	@echo -e '  'libs = $(LD_LIBS)
-	@$(LD) -g $(LD_FLAGS) $(BUILD_DIR)/$(TARGET).o -o $@ $(LD_LIBS)
+	@$(LD) -g $(LD_FLAGS) $(XLFLAGS) $(BUILD_DIR)/$(TARGET).o -o $@ $(LD_LIBS)
 
 $(BUILD_DIR)/$(TARGET)_%.elf: $(BUILD_DIR)/$(TARGET)_%.o $(BSP_LIB) | $(BUILD_DIR)/$(NEW_LINK_F)
 	@echo -e $(COLOR_MAGENTA)linking with libraries $(NO_COLOR)
@@ -207,6 +226,32 @@ endef
 $(foreach dir,$(PROJECT_SRC),$(eval $(call bc_file_compile,$(dir))))
 
 
+################################################################################
+# Rules for running the executable with QEMU                                   #
+################################################################################
+
+.PHONY: qemu qemu-cache print-qemu
+
+SEMIHOST_FLAGS  := -semihosting --semihosting-config enable=on,target=native
+QEMU_DISP_FLAGS := -nographic -serial mon:stdio
+QEMU_MACH_FLAGS := -machine xilinx-zynq-a9 -m 512M -cpu cortex-a9
+QEMU_ALL_FLAGS  := $(SEMIHOST_FLAGS) $(QEMU_DISP_FLAGS) $(QEMU_MACH_FLAGS)
+QEMU_DBG_FLAGS  := -gdb tcp::12345 -S
+
+qemu:
+	@$(QEMU_INSTALL_DIR)/qemu-system-arm $(QEMU_ALL_FLAGS) -kernel $(BUILD_DIR)/$(TARGET).elf
+
+qemu-dbg:
+	@$(QEMU_INSTALL_DIR)/qemu-system-arm $(QEMU_ALL_FLAGS) $(QEMU_DBG_FLAGS) -kernel $(BUILD_DIR)/$(TARGET).elf
+
+# TODO: plugin args
+qemu-cache:
+	$(QEMU_INSTALL_DIR)/qemu-system-arm $(QEMU_ALL_FLAGS) -kernel $(BUILD_DIR)/$(TARGET).elf -plugin $(QEMU_PLUGIN_DIR)/libcache.so,arg=0x100000,arg=0x105D30,arg=10000,arg=2,arg=1,arg=48,arg=icache -d plugin
+
+print-qemu:
+	@echo $(QEMU_INSTALL_DIR)
+
+
 ################################################################################
 # Rules for building the BSP		                                           #
 ################################################################################
@@ -216,18 +261,16 @@ $(foreach dir,$(PROJECT_SRC),$(eval $(call bc_file_compile,$(dir))))
 BSP_DIRS	:= $(shell find ${BSP_DIR}/ps7_cortexa9_0/libsrc/*/src/ -type d -print)
 # all the dirs with source files for compiling BSP
 BSP_SRCS	:= $(filter %/src/,$(BSP_DIRS))
-BSP_FRTOS_SRC := $(filter %/freertos10_xilinx_v1_1/src/,$(BSP_SRCS))
-BSP_STALN_SRC := $(filter %/standalone_v6_7/src/,$(BSP_SRCS))
 
 # debug
 # BSP_SRCS 	:= $(word 1, $(BSP_SRCS))
-BSP_LIBX_NAME	:= libxil.a
+BSP_LIBX_NAME	?= libxil.a
 BSP_SFLAGS		:= "SHELL=/bin/sh"
 BSP_BUILD		:= "BUILD_DIR=$(abspath $(BUILD_DIR))"
 BSP_LEVEL		:= "LEVEL=$(abspath $(LEVEL))"
 BSP_LIB			:= $(BSP_DIR)/$(BSP_LIBX_NAME)
 BSP_LIB_PATH	:= "LIB=$(abspath $(BSP_LIB))"
-BSP_MAKE		:= $(CURDIR)/makefile.bspsrc
+BSP_MAKE		?= $(CURDIR)/makefile.bspsrc
 BSP_ADD_DEF		:= "XDEFS=$(BSP_XDEFS)"
 BSP_ALL_FLAGS	:= $(BSP_SFLAGS) $(BSP_LEVEL) $(BSP_BUILD) $(BSP_ADD_DEF) $(BSP_LIB_PATH)
 
@@ -255,6 +298,13 @@ define bsp_lib_clean =
 
 endef
 
+# debug printing in each
+define bsp_lib_test_print = 
+@make -C $(1) --file=$(BSP_MAKE) --no-print-directory -s test_print $(BSP_ALL_FLAGS) -j4 2>&1 | sed '/: warning: /d'
+
+endef
+
+
 bsp_lib:
 	@echo -e $(COLOR_YELLOW)compiling bsp $(NO_COLOR)
 	$(foreach dir,$(BSP_SRCS),$(call bsp_lib_comp,$(abspath $(dir))))
@@ -263,6 +313,9 @@ bsp_include:
 	@echo -e $(COLOR_YELLOW)including bsp $(NO_COLOR)
 	$(foreach dir,$(BSP_SRCS),$(call bsp_inc_comp,$(dir)))
 
+bsp_test_print:
+	$(foreach dir,$(BSP_SRCS),$(call bsp_lib_test_print,$(dir)))
+
 bsp: $(BSP_LIB)
 
 $(BSP_LIB): | $(BUILD_DIR)/
@@ -291,6 +344,9 @@ clean_lib:
 clean_bsp:
 	$(foreach dir,$(BSP_SRCS),$(call bsp_lib_clean, $(dir)))
 
+clean_bsp_tiny:
+	@rm -f $(BUILD_DIR)/boot.o $(BUILD_DIR)/xscugic_selftest.bc $(BUILD_DIR)/xil-crt0.o $(BUILD_DIR)/xuartps_hw.bc
+
 clean_opt:
 	@rm -f $(BUILD_DIR)/$(TARGET).opt.bc
 
@@ -298,10 +354,10 @@ print:
 	@echo $(CURDIR)
 	@echo $(PROJECT_SRC)
 	@echo $(CSRCS)
+	@echo $(LIB_DIR)
 
 print_bsp:
 	@echo $(BSP_SRCS)
 	@echo $(words $(BSP_SRCS))
-	@echo $(BSP_FRTOS_SRC)
 	@echo $(BSP_LIB)
 	@echo $(BSP_ADD_DEF)
diff --git a/tests/makefiles/Makefile.compile.ultra96 b/tests/makefiles/Makefile.compile.ultra96
index c90b9ff58..0ad17e0d4 100644
--- a/tests/makefiles/Makefile.compile.ultra96
+++ b/tests/makefiles/Makefile.compile.ultra96
@@ -17,8 +17,10 @@ CORE_INC 	:= $(BOARD_SW)/psu_coretexa53_0/include
 
 ################################################################################
 # Flags
+USER_INCS   ?=
+NEW_INCS	:= $(addprefix -I,$(USER_INCS))
 LIB_INCS	:= -I"$(TOOLCHAIN)/lib/gcc/$(TRIPLE)/$(GCC_VERSION)/include/" -I"$(TOOLCHAIN)/lib/gcc/$(TRIPLE)/$(GCC_VERSION)/include-fixed/" -I"$(TOOLCHAIN)/include/" -I"$(TOOLCHAIN)/$(TRIPLE)/libc/usr/include/"
-SRC_INCS	:= -nostdinc -I$(CORE_INC) $(LIB_INCS)
+SRC_INCS	:= -nostdinc -I$(CORE_INC) $(LIB_INCS) $(NEW_INCS)
 LIB_DIR		:= $(BSP_DIR)
 LIBS		:= -lxil,-lgcc,-lc
 
@@ -159,7 +161,7 @@ $(BUILD_DIR)/$(TARGET)_3_linked.bc: $(patsubst %.c,$(BUILD_DIR)/%.bc.3,$(notdir
 # Rules for .bc compilation			                                           #
 ################################################################################
 
-$(BUILD_DIR)/%.bc: $(PROJECT_SRC)/%.c $(MAKEFILES) | $(BUILD_DIR)/
+$(BUILD_DIR)/%.bc: $(PROJECT_SRC)/%.c | $(BUILD_DIR)/
 	@echo -e $(COLOR_BLUE)building $(notdir $@)$(NO_COLOR)
 	@echo -e '  'flags  = $(CLANG_FLAGS)
 	@$(CLANG) -emit-llvm $(CLANG_FLAGS) $(SRC_INCS) $< -c -o $@
diff --git a/tests/makefiles/Makefile.compile.x86 b/tests/makefiles/Makefile.compile.x86
index 137b17071..510e583df 100644
--- a/tests/makefiles/Makefile.compile.x86
+++ b/tests/makefiles/Makefile.compile.x86
@@ -6,30 +6,86 @@
 # but no definitive answer is given
 # https://stackoverflow.com/questions/3509215/llvm-jit-and-native
 
-PROJECT_SRC := ${CURDIR}
-C_SRCS		= $(wildcard $(PROJECT_SRC)/*.c)
-CPP_SRCS	= $(wildcard $(PROJECT_SRC)/*.cpp)
+PROJECT_SRC ?= ${CURDIR}
+SRCFILES    ?= $(wildcard $(PROJECT_SRC)/*.c*)
+C_SRCS 		= $(filter %.c,$(SRCFILES))
+CPP_SRCS 	= $(filter %.cpp,$(SRCFILES))
 BCFILES		= $(patsubst %.c,%.clang.bc,$(notdir $(C_SRCS)))
 BCPPFILES	= $(patsubst %.cpp,%.clang.bcpp,$(notdir $(CPP_SRCS)))
+USER_INCS   ?=
 
 CLANG_FLAGS := -fcolor-diagnostics $(USER_CFLAGS)
+# user link-time flags
 XLFLAGS 	?= -lm
+XLLCFLAGS   ?=
+PROF_FLAGS  := -L"/home/$(USER)/tools/gperftools-2.7/lib-install/lib" -lprofiler
+# set up includes
 INCS		:=-I$(LEVEL)
-
+ifneq ($(USER_INCS),)
+USR_INC_PFX	:= $(addprefix -I,$(USER_INCS))
+INCS		+= $(USR_INC_PFX)
+endif
 
 exe: $(TARGET).out
 
+############# CPU Profiling #############
+# build executable with pperf library enabled
+build_prof: $(TARGET).prof.out
+
+$(TARGET).prof.out: $(TARGET).s
+ifeq ($(CPP_SRCS),)
+	$(CLANG) $< $(XLFLAGS) $(PROF_FLAGS) -o $@
+else
+	$(CLANG++) $< $(XLFLAGS) $(PROF_FLAGS) -o $@
+endif
+
+# run and collect profile
+profile:
+	env CPUPROFILE=$(TARGET).prof ./$(TARGET).prof.out
+
+# display profiling results with gv
+gv_prof:
+	pprof --gv ./$(TARGET).prof.out ./$(TARGET).prof
+
+pdf_prof:
+	pprof --pdf ./$(TARGET).prof.out ./$(TARGET).prof > $(TARGET).prof.pdf
+
+############ Cache Profiling ############
+# all sizes in bytes
+ICACHE_SIZE  		:= 32768
+DCACHE_SIZE  		:= 32768
+LLCACHE_SIZE 		:= 524288
+ICACHE_ASSOC  		:= 4
+DCACHE_ASSOC  		:= 4
+LLCACHE_ASSOC 		:= 8
+ICACHE_LINE_SIZE  	:= 32
+DCACHE_LINE_SIZE  	:= 32
+LLCACHE_LINE_SIZE 	:= 32
+
+ICACHE_PARAMS := --I1=$(ICACHE_SIZE),$(ICACHE_ASSOC),$(ICACHE_LINE_SIZE)
+DCACHE_PARAMS := --D1=$(DCACHE_SIZE),$(DCACHE_ASSOC),$(DCACHE_LINE_SIZE)
+LLCACHE_PARAMS := --LL=$(LLCACHE_SIZE),$(LLCACHE_ASSOC),$(LLCACHE_LINE_SIZE)
+CACHEGRIND_PARAMS := --tool=cachegrind --cache-sim=yes
+CACHEGRIND_PARAMS += $(ICACHE_PARAMS) $(DCACHE_PARAMS) $(LLCACHE_PARAMS)
+CACHEGRIND_PARAMS += --cachegrind-out-file=$(TARGET).cache_prof.log
+
+cache_prof:
+	valgrind $(CACHEGRIND_PARAMS) ./$(TARGET).out
+
+cache_view:
+	kcachegrind $(TARGET).cache_prof.log 2> /dev/null
+
 ############### Link step ###############
 $(TARGET).out: $(TARGET).s
 ifeq ($(CPP_SRCS),)
-	$(CLANG) $(TARGET).s $(XLFLAGS) -o $@
+	@$(CLANG) $(TARGET).s $(XLFLAGS) -o $@
 else
-	$(CLANG++) $(TARGET).s $(XLFLAGS) -o $@
+	@$(CLANG++) $(TARGET).s $(XLFLAGS) -o $@
 endif
 
 ################## LLC ##################
 %.s: %.opt.bc
-	$(LLVM_LLC) -asm-verbose $< -o=$@
+	@$(LLVM_LLC) -asm-verbose $(XLLCFLAGS) $< -o=$@
 
 $(TARGET).bc: $(TARGET).opt.bc
 	@cp $< $@
@@ -39,7 +95,8 @@ $(TARGET).bc: $(TARGET).opt.bc
 $(TARGET).opt.bc: $(TARGET).lbc
 	@echo -e $(COLOR_BLUE)Running through optimizer $(NO_COLOR)
 	@echo "  flags = $(OPT_FLAGS) $(OPT_PASSES)"
-	$(LLVM_OPT) $(OPT_FLAGS) $(OPT_LIBS_LOAD) $(OPT_PASSES) -o $@ $<
+	@$(LLVM_OPT) $(OPT_FLAGS) $(OPT_LIBS_LOAD) $(OPT_PASSES) -o $@ $<
+	@$(LLVM_DIS) -f $@
 
 %.ll: %.bc
 	@$(LLVM_DIS) -f $<
@@ -47,21 +104,59 @@ $(TARGET).opt.bc: $(TARGET).lbc
 ############### LLVM-LINK ###############
 $(TARGET).lbc: $(BCFILES) $(BCPPFILES)
 	@echo -e $(COLOR_MAGENTA)Linking source files $(NO_COLOR)
-	$(LLVM_LINK) $^ -o $(TARGET).lbc
+	@$(LLVM_LINK) $^ -o $(TARGET).lbc
 
 ################# CLANG #################
 # Lowest level target should depend on the Makefiles
-%.clang.bc: %.c
+%.clang.bc: $(PROJECT_SRC)/%.c
 	@echo -e $(COLOR_BLUE) Building $@ $(NO_COLOR)
 	@echo '  'flags = $(CLANG_FLAGS)
 	@$(CLANG) $(INCS) $(CLANG_FLAGS) -emit-llvm $< -c -o $@
 
 ################ CLANG++ ################
 # Lowest level target should depend on the Makefiles
-%.clang.bcpp: %.cpp $(MAKEFILES)
+%.clang.bcpp: $(PROJECT_SRC)/%.cpp
 	@echo -e $(COLOR_BLUE) Building $@ $(NO_COLOR)
 	@echo '  'flags = $(CLANG_FLAGS)
-	$(CLANG++) $(INCS) $(CLANG_FLAGS) -emit-llvm $< -c -o $@
+	@$(CLANG++) $(INCS) $(CLANG_FLAGS) -emit-llvm $< -c -o $@
+
+
+#########################################
+# support sources in sub-directories
+
+# C files
+define bc_file_compile =
+%.clang.bc: $(1)/%.c
+	@echo -e $$(COLOR_BLUE)Building $$(notdir $$@)$$(NO_COLOR)
+	@$$(CLANG) $$(INCS) $$(CLANG_FLAGS) -emit-llvm $$< -c -o $$@
+
+endef
+
+# C++ files
+define bcpp_file_compile =
+%.clang.bcpp: $(1)/%.cpp
+	@echo -e $$(COLOR_BLUE)Building $$(notdir $$@)$$(NO_COLOR)
+	@$$(CLANG++) $$(INCS) $$(CLANG_FLAGS) -emit-llvm $$< -c -o $$@
+
+endef
+
+$(foreach dir,$(PROJECT_SUB_DIRS),$(eval $(call bc_file_compile,$(dir))))
+$(foreach dir,$(PROJECT_SUB_DIRS),$(eval $(call bcpp_file_compile,$(dir))))
+
+
+############### cleaning ################
+.PHONY: clean clean_opt debug_print
 
 clean:
 	@$(RM) -rf *.bc *.bcpp *.lbc *.llvm.bc *.ll *.s $(TARGET).out
+
+clean_opt:
+	@$(RM) $(TARGET).lbc
+
+# used to verify correct Macro definition
+debug_print:
+	@echo SRCFILES = $(SRCFILES)
+	@echo C_SRCS = $(C_SRCS)
+	@echo CPP_SRCS = $(CPP_SRCS)
+	@echo BCFILES = $(BCFILES)
+	@echo BCPPFILES = $(BCPPFILES)
diff --git a/tests/makefiles/Makefile.program b/tests/makefiles/Makefile.program
index 85de82295..304357c3f 100644
--- a/tests/makefiles/Makefile.program
+++ b/tests/makefiles/Makefile.program
@@ -35,7 +35,7 @@ ifeq ($(BOARD), $(BOARD_HIFIVE1))
 	-ex "monitor resume" \
 	-ex "monitor shutdown" \
 	-ex "quit" \
-	&& echo "Successfully uploaded 'hello' to freedom-e300-hifive1."
+	&& echo "Successfully uploaded '$(TARGET)' to freedom-e300-hifive1."
 
 else ifeq ($(BOARD), $(BOARD_X86))
 	@echo -e $(COLOR_YELLOW)Executing$(NO_COLOR) $(TARGET).out
@@ -46,7 +46,12 @@ else ifeq ($(BOARD), $(BOARD_LLI))
 	@$(LLVM_LLI) $(LLI_ARGS) $(TARGET).ll
 
 else ifeq ($(BOARD), $(BOARD_PYNQ))
-	ELF_FILE=./build/$(TARGET) \
+	ELF_FILE=./$(BUILD_DIR)/$(TARGET) \
+	JTAG_CABLE_FILTER="Xilinx PYNQ-Z1 *" \
+	$(XILINX_SDK)/bin/xsdk -batch  $(BOARD_DIR)/sw/run_elf.tcl | $(SKIP_SECTION)
+
+else ifeq ($(BOARD), $(BOARD_PYNQ_RTOS))
+	ELF_FILE=./$(BUILD_DIR)/$(TARGET) \
 	JTAG_CABLE_FILTER="Xilinx PYNQ-Z1 *" \
 	$(XILINX_SDK)/bin/xsdk -batch  $(BOARD_DIR)/sw/run_elf.tcl | $(SKIP_SECTION)
 
@@ -87,6 +92,13 @@ else ifeq ($(BOARD), $(BOARD_PYNQ))
 	JTAG_DEVICE_INDEX=1 \
 	JTAG_REGISTER_DONE_BIT=REGISTER.CONFIG_STATUS.BIT14_DONE_PIN \
 	$(VIVADO)/bin/vivado -mode batch -source $(BOARDS)/program_bit.tcl -notrace
+
+else ifeq ($(BOARD), $(BOARD_PYNQ_RTOS))
+	BITSTREAM=$(BOARD_DIR)/hw/system_wrapper.bit \
+	JTAG_BOARD_FILTER="localhost:3121/xilinx_tcf/Digilent/*" \
+	JTAG_DEVICE_INDEX=1 \
+	JTAG_REGISTER_DONE_BIT=REGISTER.CONFIG_STATUS.BIT14_DONE_PIN \
+	$(VIVADO)/bin/vivado -mode batch -source $(BOARDS)/program_bit.tcl -notrace
 else
 	@echo "Invalid board";
 endif
diff --git a/tests/makefiles/config b/tests/makefiles/config
index 6dc81518c..d79560e23 100644
--- a/tests/makefiles/config
+++ b/tests/makefiles/config
@@ -3,6 +3,7 @@ BOARD_LLI = lli
 BOARD_X86 = x86
 BOARD_ULTRA96 = ultra96
 BOARD_PYNQ = pynq
+BOARD_PYNQ_RTOS = pynqrtos
 BOARD_TMS1224 = tms1224
 BOARD_TMS4357 = tms4357
 BOARD_MSP432  = msp432

From 661f237401298f0689837d836f7d99cf1c04ad82 Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Mon, 3 Aug 2020 15:56:42 -0600
Subject: [PATCH 4/9] Updating documenation with the latest features.

---
 docs/.gitignore                               |   1 +
 docs/source/conf.py                           |   2 +-
 docs/source/eclipse.rst                       |  34 +++++-
 docs/source/index.rst                         |   5 +-
 docs/source/passes.rst                        | 113 +++++++++++++++++-
 docs/source/release_notes.rst                 |  53 +++++++-
 docs/source/repl_scope.rst                    |  26 ++++
 .../dataflowProtection/synchronization.cpp    |  12 ++
 8 files changed, 228 insertions(+), 18 deletions(-)
 create mode 100644 docs/source/repl_scope.rst

diff --git a/docs/.gitignore b/docs/.gitignore
index a7747886c..3aaf9ee1d 100644
--- a/docs/.gitignore
+++ b/docs/.gitignore
@@ -1,2 +1,3 @@
 /build
+.vscode
 
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 67684e9d7..c01a1c783 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -18,7 +18,7 @@
 # -- Project information -----------------------------------------------------
 
 project = 'COAST'
-copyright = '2019, Jeffrey Goeders, Benjamin James'
+copyright = '2020, Jeffrey Goeders, Benjamin James'
 author = 'Jeffrey Goeders, Benjamin James, Matthew Bohman'
 
 
diff --git a/docs/source/eclipse.rst b/docs/source/eclipse.rst
index 5db85e59a..5befb60c4 100644
--- a/docs/source/eclipse.rst
+++ b/docs/source/eclipse.rst
@@ -1,12 +1,17 @@
 .. guide to using Eclipse with LLVM
 
+Using an IDE to aid LLVM development
+*************************************
+
+We have used both Eclipse and Visual Studio Code in the development of COAST.  This is very helpful because it allows code completion hints that inform you what methods are available for specific classes.
+
 Using Eclipse with LLVM
-************************
+=========================
 
-You can write your code in a plain text editor, or you can use Eclipse to help you manage all of the classes and methods. This guide was written for Eclipse 4.10.0 using the CDT.
+This guide was written for Eclipse 4.10.0 using the CDT.
 
 Setting up the project
-=========================
+-------------------------
 
 1. Select "File -> New -> Makefile Project with Existing Code".
 2. Enter ``projects`` as the project name.
@@ -19,7 +24,7 @@ Setting up the project
 9. When you click on the "Build" button the projects will be compiled.
 
 Building the projects
-==========================
+-------------------------
 
 1. Right click on the ``projects/build`` subdirectory, then "Make Targets -> Create".
 2. Call the target name ``all`` and click OK.
@@ -28,7 +33,7 @@ Building the projects
 
 
 Fixing the CDT settings
-========================
+-------------------------
 
 The default settings of the project are not sufficient to allow the Eclipse CDT indexer to work correctly.  While not necessary to fix the CDT settings, it allows you to use the auotcomplete functionality of Eclipse.
 
@@ -49,3 +54,22 @@ The default settings of the project are not sufficient to allow the Eclipse CDT
 #. Under the "Discovery" tab select "CDT GCC Built-in Compiler Settings"
 #. Edit the "Command to get compiler specs" the same as before
 #. Select "Apply and Close"
+
+
+Using VS Code with LLVM
+=========================
+
+1. Open VS Code
+#. File -> Open Folder
+#. Select the directory that contains the files for the pass you want to develop
+#. On the bottom ribbon at the right there will be a button next to the language configuration (ours says "Linux")
+#. Hovering over this button says "C/C++ Configuration". Click on it
+#. You will be taken to a page that allows you to set up a specific configuration for this directory.
+#. Click the button "Add Configuration" and give it a name
+#. Add the path to the LLVM include files in the section "Include path"
+#. For example, because I built LLVM from source, I added the following:
+
+.. code-block:: bash
+
+    /home/$USER/coast/llvm-project/llvm/include
+    /home/$USER/coast/build/include
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 34e3bcee3..4720041e1 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -14,11 +14,12 @@ COAST
 
     setup
     make_system
-    eclipse
     passes
+    repl_scope
     troubleshooting
-    cfcss
     release_notes
+    eclipse
+    cfcss
 
 
 Folder guide
diff --git a/docs/source/passes.rst b/docs/source/passes.rst
index e0ffaa901..22d47cbfb 100644
--- a/docs/source/passes.rst
+++ b/docs/source/passes.rst
@@ -14,6 +14,7 @@ Description
 - **DWC**\ : This pass implements duplication with compare (DWC) as a form of data flow protection. DWC is also known as dual modular redundancy (DMR). It is based on EDDI [#f2]_. Behind the scenes, this pass simply calls the dataflowProtection pass with the proper arguments.
 - **exitMarker**\ : For software fault injection we found it helpful to have known breakpoints at the different places that ``main()`` can return. This pass places a function call to a dummy function, ``EXIT MARKER``, immediately before these return statements. Breakpoints placed at this function allow debuggers to access the final processor state.
 - **TMR**\ : This pass implements triple modular redundancy (TMR) as a form of data flow protection. It is based on SWIFT-R [#f3]_ and Trikaya [#f4]_. Behind the scenes, this pass simply calls the dataflowProtection pass with the proper arguments.
+- **smallProfile**\ : This pass can be used to collect dynamic function call counts.
 
 Configuration Options
 ======================
@@ -28,6 +29,8 @@ Command Line Parameters
 
 These options are only applicable to the ``-DWC`` and ``-TMR`` passes.
 
+The details for each of these options can be found in the :ref:`repl_details` section.
+
 .. table::
     :widths: 25 40
 
@@ -94,6 +97,40 @@ These options are only applicable to the ``-DWC`` and ``-TMR`` passes.
 Note: Replication rules defined by Chielle et al. [#f5]_\ .
 
 
+.. versionadded:: 1.4
+
+.. table::
+    :widths: 25 40
+
+    +-------------------------+-------------------------------------------+
+    |  ``-isrFunctions=<X>``  | <X> is a comma separated list of the      |
+    |                         | function names that should be treated     |
+    |                         | as Interrupt Service Routines (ISRs).     |
+    +-------------------------+-------------------------------------------+
+    |   ``-cloneReturn=<X>``  | <X> is a comma separated list of the      |
+    |                         | function names that should have their     |
+    |                         | return values cloned.                     |
+    +-------------------------+-------------------------------------------+
+    | ``-cloneAfterCall=<X>`` | <X> is a comma separated list of the      |
+    |                         | function names that will have their       |
+    |                         | arguments cloned after the call.          |
+    +-------------------------+-------------------------------------------+
+    | ``-protectedLibFn=<X>`` | <X> is a comma separated list of the      |
+    |                         | function names that should be protected   |
+    |                         | without having their signatures changed.  |
+    +-------------------------+-------------------------------------------+
+    |     ``-countSyncs``     | Instructs COAST to keep track of the      |
+    |                         | dynamic number of synchronization checks. |
+    |                         | Requires ``-countErrors``.                |
+    +-------------------------+-------------------------------------------+
+    |    ``-protectStack``    | Enable experimental stack protection.     |
+    +-------------------------+-------------------------------------------+
+    |   ``-noCloneOpsCheck``  | Disable exiting on failure of check       |
+    |                         | ``verifyCloningSuccess``.                 |
+    +-------------------------+-------------------------------------------+
+
+
+
 .. _in_code_directives:
 
 In-code Directives
@@ -131,7 +168,7 @@ In-code Directives
     |                      | instead of modifying the function body.               |
     +----------------------+-------------------------------------------------------+
 
-.. versionadded:: Oct2019
+.. versionadded:: 1.2
 
 .. table::
     :widths: 25 40
@@ -172,6 +209,42 @@ In-code Directives
     |                                    | calling the function.                           |
     +------------------------------------+-------------------------------------------------+
 
+.. versionadded:: 1.4
+
+.. table::
+    :widths: 25 40
+
+    +--------------------------------+---------------------------------------+
+    |         ``__ISR_FUNC``         | Used to mark functions that should be |
+    |                                | treated as Interrupt Service Routines |
+    |                                | (ISRs).                               |
+    +--------------------------------+---------------------------------------+
+    |        ``__xMR_RET_VAL``       | Used to mark functions that should    |
+    |                                | have their return values cloned.      |
+    +--------------------------------+---------------------------------------+
+    |       ``__xMR_PROT_LIB``       | Used to mark functions that should    |
+    |                                | be protected without having their     |
+    |                                | signatures changed.                   |
+    +--------------------------------+---------------------------------------+
+    |    ``__xMR_ALL_AFTER_CALL``    | Used to mark functions that should    |
+    |                                | have their arguments cloned after     |
+    |                                | the call.                             |
+    +--------------------------------+---------------------------------------+
+    | ``__xMR_AFTER_CALL(fname, x)`` | Specific version of the above macro.  |
+    |                                | Specifiy the arg numbers as           |
+    |                                | ``(name, 1_2_3)``.                    |
+    |                                | Must be registered, similar to        |
+    |                                | ``GENERIC_COAST_WRAPPER(fname)``      |
+    +--------------------------------+---------------------------------------+
+    |      ``__NO_xMR_ARG(num)``     | The argument [num] should not be      |
+    |                                | replicated. If multiple arguments     |
+    |                                | need to be marked, this directive     |
+    |                                | should be placed on the function      |
+    |                                | multiple times.                       |
+    +--------------------------------+---------------------------------------+
+    |      ``__COAST_NO_INLINE``     | Convenience for no-inlining functions |
+    +--------------------------------+---------------------------------------+
+
 
 See the file COAST.h_
 
@@ -227,8 +300,17 @@ When to use replication command line options
       - Library
       - ``-skipLibCalls=<X>``
       - Whenever the call should not be repeated, such as calls interfacing with I/O.
+    * - Protect without changing signature
+      - User
+      - ``-protectedLibFn=<X>``
+      - Library functions you have the source code for.
+    * -
+      - Library
+      - N/A
+      - Can't protect it if you don't have the source code.
 
 
+.. _repl_details:
 
 Details
 =========
@@ -242,7 +324,7 @@ The first option, ``-noMemReplication``, should be used whenever memory has a se
 
 The option ``-noStoreAddrSync`` corresponds to C5. In EDDI, memory was simply duplicated and each duplicate was offset from the original value by a constant. However, COAST runs before the linker, and thus has no notion of an address space. We implement rules C3 and C5, checking addresses before stores and loads, for data structures such as arrays and structs that have an offset from a base address. These offsets, instead of the base addresses, are compared in the synchronization logic.
 
-.. versionchanged:: Oct2019
+.. versionchanged:: 1.2
 
 As of the October 2019 release, COAST no longer syncs before storing data.  Test data indicated that, in many cases, the number of synchronization points generated by this rule limited the effective protection that the replication of variables afforded.  This behavior can be overridden using the ``-storeDataSync`` flag.
 
@@ -253,9 +335,28 @@ Replication Scope
 
 The user can specify any functions and global variables that should not be protected using ``-ignoreFns`` and ``-ignoreGlbls``. At minimum, these options should be used to exclude code that interacts with hardware devices (GPIO, UART) from the SoR. Replicating this code is likely to lead to errors. The option ``-replicateFnCalls`` causes user functions to be called in a coarse grained way, meaning the call is replicated instead of fine-grained instruction replication within the function body. Library function calls can also be excluded from replication via the flag ``-skipLibCalls``, which causes those calls to only be executed once. These two options should be used when multiple independent copies of a return value should be generated, instead of a single return value propagating through all replicated instructions. Changing the scope of replication can cause problems across function calls.
 
-.. versionadded:: Oct2019
+.. versionadded:: 1.2
+
+Before processing the IR code, COAST begins by checking to make sure the replication scope rules it was given are consistent.  It checks to make sure all cloned globals are only used in functions that are also protected.  If they are not, the compilation will fail, with an error message informing the user which global is used in which function.  The user has the option to ignore these checks if they feel that it is safe.  This is done using the ``__COAST_IGNORE_GLOBAL`` macro mentioned above.
+
+.. versionadded:: 1.4
+
+There are also some options that have been added that allow more fine-grained control over how different functions and values are protected.  The first of these is the command line argument ``-cloneReturn``, or directive ``__xMR_RET_VAL``.  This instructs COAST that the return value of the function should be cloned.  This has been implemented by adding extra arguments to the end of the parameter list that are pointer types of the normal return value.  This prevents the values from passing through a bottleneck.  This is particulary useful for functions that return addresses to memory spaces that have been dynamically allocated.
+
+Another recently added option is the ability to mark functions as "protected library functions" (``-protectedLibFn=<X>``, ``__xMR_PROT_LIB``).  The idea behind this is that there are some functions that should not have their signatures changed, but should still have their bodies protected.
+
+Another interesting feature added in this version is the ability to copy the value of the original variable into its clone(s) *after* the function call has been completed.  An example of when this might be useful is the function `sscanf <http://www.cplusplus.com/reference/cstdio/sscanf/>`_.  This function will read values from a string based on a format specifier and put the values into the pointers provided.
+
+.. code-block:: c
+
+  sscanf (sentence,"%s %*s %d",str,&i);
+
+This will allow the copies of the variables to stay in sync with each other even when calling a library function that can only be called once, that modifies a variable by reference.
+
+We have introduced a way to mark functions as Interrupt Service Routines (ISRs), which means they will not be changed in any way, nor removed if they don't appear to have any uses.
+
+COAST now has much better support for changing the protection of variables that are local to protected functions.  They can be excluded from the Scope of Replication using the macro ``__NO_xMR``.  Even function arguments can be excluded using the macro ``__NO_xMR_ARG(num)``.
 
-Before processing the IR code, COAST begins by checking to make sure the replication scope rules it was given are consistent.  It checks to make sure all cloned globals are only used in functions that are also protected.  If they are not, the compilation will fail, with an error message informating the user which global is used in which function.  The user has the option to ignore these checks if they feel that it is safe.  This is done using the ``__COAST_IGNORE_GLOBAL`` macro mentioned above.
 
 Other Options
 ----------------
@@ -295,7 +396,7 @@ Debug Statements
 
 By default, the Debug Statements pass will add code to the beginning of every basic block that prints out the function name followed by the name of the basic block.  For example, you would expect the first message to be ``main->entry``.  This can produce 100s of MegaBytes of data, so it is important to redirect this output to a file, as shown in the example above.  This verbose output represents a complete call graph of the execution, although trawling through all of this data can be quite difficult.
 
-.. versionadded:: Oct2019
+.. versionadded:: 1.2
 
 There is an option to only add print statements to certain functions.  Pass ``-fnPrintList=`` with a comma-separated list of function names that will be instrumented with the print statements.  This will allow examining smaller parts of the execution at a time.
 
@@ -303,7 +404,7 @@ There is an option to only add print statements to certain functions.  Pass ``-f
 Small Profiler
 -----------------
 
-.. versionadded:: Oct2019
+.. versionadded:: 1.2
 
 The Small Profiler is a pass which simply counts the number of calls to each function in the module.  It creates global variables that correspond to each function in the module.  Each time a function is called, the corresponding global variable is incremented.  The pass adds a call to a function named ``PRINT_PROFILE_STATS`` immediately before the ``main`` function exits.  If the program does not terminate, calls to this function may be inserted manually by the programmer.
 
diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index 4fe07fae1..ea9f3c738 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -4,14 +4,59 @@ Release Notes
 **************
 
 
-November 2019
-==============
+v1.4 - August 2020
+=====================
+
+Features
+---------
+
+- Support for cloning function return values
+
+- New unit tests
+
+- Better copying of debug info
+
+- Experimental stack protection
+
+- 7 new command line arguments
+
+See :ref:`coast_cli_params` for more information.
+
+
+Directives
+------------
+
+7 new directives
+
+- ``__ISR_FUNC``
+- ``__xMR_RET_VAL``
+- ``__xMR_PROT_LIB``
+- ``__xMR_ALL_AFTER_CALL``
+- ``__xMR_AFTER_CALL``
+- ``__NO_xMR_ARG``
+- ``__COAST_NO_INLINE``
+
+See :ref:`in_code_directives` for more information.
+
+
+Bug Fixes
+-------------
+
+- Correct support for variadic functions
+- Fix up debug info for global variables so it works better with GDB
+- Better removal of unused functions
+- Official way of marking ISR functions instead of function name text matching
+
+
+
+v1.3 - November 2019
+=====================
 
 Changed the source of the LLVM project files from SVN (`deprecated <https://llvm.org/docs/Proposals/GitHubMove.html>`_) to the Git mono-repo, `version 7.1.0 <https://github.com/llvm/llvm-project/tree/llvmorg-7.1.0>`_.
 
 
-October 2019
-==============
+v1.2 - October 2019
+====================
 
 
 Features
diff --git a/docs/source/repl_scope.rst b/docs/source/repl_scope.rst
new file mode 100644
index 000000000..d866a3f7d
--- /dev/null
+++ b/docs/source/repl_scope.rst
@@ -0,0 +1,26 @@
+.. This file describes what the Scope of Replication is and why its integrity must be maintained
+
+.. _scope_of_replication:
+
+Scope of Replication
+**********************
+
+We use the term Sphere of Replication (SoR) to indicate which portions of the source code are to be protected.  In large applications, it may be too much overhead to have the entire program protected by COAST, so there is a way to configure COAST to only protect certain functions, using macros found in the header file `COAST.h <https://github.com/byuccl/coast/blob/master/tests/COAST.h>`_.
+
+
+Configuration
+===============
+
+COAST allows for very detailed control over what belongs inside or outside of the Scope of Replication.  There are numerous :ref:`coast_cli_params` and :ref:`in_code_directives` which allow for projects to be configured very precisely.  COAST even includes a verification step that tries to ensure all SoR rules are self-consistent.  It can detect if protected global variables are used inside unprotected functions, or vice-versa.  However, this system is not perfect, and so the application writer must be aware of the potential pitfalls that could be encountered when using specific replication rules.
+
+
+Pointer Crossings
+==================
+
+One of the most common problems to be aware of is pointers which cross the SoR boundaries.  Many applications use dynamically allocated memory.  If the function that allocates this memory is inside the SoR, then *all* references to these addresses must also be within the SoR.  It is true that read-only access would not cause errors, as in the case of using ``printf`` to view the value of such a pointer.  But no writes can happen outside the SoR, otherwise the addresses will get out of sync.
+
+
+Example
+==========
+
+The unit test `linkedList.c <https://github.com/byuccl/coast/blob/prepRelease_1_4/tests/TMRregression/unitTests/linkedList.c>`_ shows exactly how SoR crossings can go wrong by looking at a possible implementation of a linked list.
diff --git a/projects/dataflowProtection/synchronization.cpp b/projects/dataflowProtection/synchronization.cpp
index 6b179f5ea..0e78c9835 100644
--- a/projects/dataflowProtection/synchronization.cpp
+++ b/projects/dataflowProtection/synchronization.cpp
@@ -1412,6 +1412,18 @@ void dataflowProtection::insertTMRCorrectionCount(Instruction* cmpInst, GlobalVa
 			"errorHandler." + Twine(originalBlock->getParent()->getName()),
 			originalBlock->getParent(), originalBlock);
 
+	if (countSyncsFlag) {
+		/*
+		 * Increment global sync counter
+		 */
+		// Populate new block -- load global counter, increment, store
+		LoadInst* loadSyncCounter = new LoadInst(dynamicSyncCount, "ldSyncCnt", cmpInst);
+		Constant* one = ConstantInt::get(loadSyncCounter->getType(), 1, false);
+		BinaryOperator* incSyncCounter = BinaryOperator::CreateAdd(
+										loadSyncCounter, one, "incSyncCnt", cmpInst);
+		StoreInst* SI = new StoreInst(incSyncCounter, dynamicSyncCount, cmpInst);
+	}
+
 	// Populate new block -- load global counter, increment, store
 	LoadInst* LI = new LoadInst(TMRErrorDetected, "errFlagLoad", errBlock);
 	Constant* one = ConstantInt::get(LI->getType(), 1, false);

From 0f7718196e69a7999aa51847cc3e32c5c1fac5a2 Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Mon, 3 Aug 2020 16:05:33 -0600
Subject: [PATCH 5/9] Small fix to docs.

---
 docs/source/release_notes.rst | 5 ++---
 docs/source/repl_scope.rst    | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/docs/source/release_notes.rst b/docs/source/release_notes.rst
index ea9f3c738..d7393aba4 100644
--- a/docs/source/release_notes.rst
+++ b/docs/source/release_notes.rst
@@ -18,9 +18,8 @@ Features
 
 - Experimental stack protection
 
-- 7 new command line arguments
-
-See :ref:`coast_cli_params` for more information.
+- | 7 new command line arguments
+  | See :ref:`coast_cli_params` for more information.
 
 
 Directives
diff --git a/docs/source/repl_scope.rst b/docs/source/repl_scope.rst
index d866a3f7d..2f93dca3d 100644
--- a/docs/source/repl_scope.rst
+++ b/docs/source/repl_scope.rst
@@ -23,4 +23,4 @@ One of the most common problems to be aware of is pointers which cross the SoR b
 Example
 ==========
 
-The unit test `linkedList.c <https://github.com/byuccl/coast/blob/prepRelease_1_4/tests/TMRregression/unitTests/linkedList.c>`_ shows exactly how SoR crossings can go wrong by looking at a possible implementation of a linked list.
+The unit test `linkedList.c <https://github.com/byuccl/coast/blob/master/tests/TMRregression/unitTests/linkedList.c>`_ shows exactly how SoR crossings can go wrong by looking at a possible implementation of a linked list.

From efdae825735a1f72c4fad912f684ca784440558b Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Wed, 5 Aug 2020 08:49:44 -0600
Subject: [PATCH 6/9] Doc fix, adding Travis files.

---
 .github/.travis/build.sh     |   3 +
 .travis.yml                  |  41 ++++++++++
 Makefile                     |  18 +++++
 docs/source/setup.rst        |   4 +-
 unittest/cfg/fast.yml        |   7 ++
 unittest/cfg/full.yml        |  36 +++++++++
 unittest/cfg/full_tmr.yml    |  19 +++++
 unittest/cfg/regression.yml  |  11 +++
 unittest/llvm-stress.py      |  83 ++++++++++++++++++++
 unittest/makefile.customFile |  16 ++++
 unittest/pyDriver.py         |  88 +++++++++++++++++++++
 unittest/unittest.py         | 144 +++++++++++++++++++++++++++++++++++
 12 files changed, 469 insertions(+), 1 deletion(-)
 create mode 100644 .github/.travis/build.sh
 create mode 100644 .travis.yml
 create mode 100644 Makefile
 create mode 100644 unittest/cfg/fast.yml
 create mode 100644 unittest/cfg/full.yml
 create mode 100644 unittest/cfg/full_tmr.yml
 create mode 100644 unittest/cfg/regression.yml
 create mode 100644 unittest/llvm-stress.py
 create mode 100644 unittest/makefile.customFile
 create mode 100644 unittest/pyDriver.py
 create mode 100644 unittest/unittest.py

diff --git a/.github/.travis/build.sh b/.github/.travis/build.sh
new file mode 100644
index 000000000..79bd8f22d
--- /dev/null
+++ b/.github/.travis/build.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+cd projects/build && cmake .. && make
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 000000000..4de1a6045
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,41 @@
+language: c
+os: linux
+
+dist: bionic
+addons:
+  apt:
+    packages:
+      - clang-7
+      - llvm-7
+      - libc++-dev
+      - libc++abi-dev
+      - gcc-arm-none-eabi
+      - libnewlib-arm-none-eabi
+
+git:
+  submodules: false
+
+jobs:
+  include:
+    - stage: build
+      name: build
+      script:
+        - .github/.travis/build.sh
+
+    - stage: test
+      name: test_fast
+      script:
+        - .github/.travis/build.sh
+        - make test_fast
+
+    - stage: test
+      name: test_full
+      script:
+        - .github/.travis/build.sh
+        - make test_full
+    
+    - stage: test
+      name: regression_test
+      script:
+        - .github/.travis/build.sh
+        - make test_regression
diff --git a/Makefile b/Makefile
new file mode 100644
index 000000000..0da588b79
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,18 @@
+
+build:
+	cd projects/build && cmake .. && make
+
+test_fast:
+	python3 unittest/unittest.py unittest/cfg/fast.yml
+
+test_full_tmr:
+	python3 unittest/unittest.py unittest/cfg/full_tmr.yml
+
+test_full:
+	python3 unittest/unittest.py unittest/cfg/full.yml
+
+# runs COAST on the unit tests
+test_regression:
+	python3 unittest/pyDriver.py unittest/cfg/regression.yml
+
+.PHONY: build
diff --git a/docs/source/setup.rst b/docs/source/setup.rst
index d7dc34da5..d5ae47b29 100644
--- a/docs/source/setup.rst
+++ b/docs/source/setup.rst
@@ -55,7 +55,9 @@ If the other two options do not work for your system, or if you prefer to have a
 
 .. code-block:: bash
 
-    cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DLLVM_ENABLE_ASSERTIONS=On ../llvm/
+    cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Debug -DLLVM_ENABLE_ASSERTIONS=On ../llvm-project/llvm/
+
+To enable support for RISCV targets, add ``-DLLVM_EXPERIMENTAL_TARGETS_TO_BUILD=RISCV`` to the ``cmake`` invocation.
 
 See the ``README.md`` in the "build" folder for more information on how to further configure LLVM.
 
diff --git a/unittest/cfg/fast.yml b/unittest/cfg/fast.yml
new file mode 100644
index 000000000..4df43543f
--- /dev/null
+++ b/unittest/cfg/fast.yml
@@ -0,0 +1,7 @@
+benchmarks:
+  - path: matrixMultiply
+
+OPT_PASSES:
+  - ""
+  - "-DWC"
+  - "-TMR"
\ No newline at end of file
diff --git a/unittest/cfg/full.yml b/unittest/cfg/full.yml
new file mode 100644
index 000000000..8424b9ac0
--- /dev/null
+++ b/unittest/cfg/full.yml
@@ -0,0 +1,36 @@
+benchmarks:
+  - path: matrixMultiply
+    re: "Number of errors: 0"
+  
+  - path: chstone
+    re: "RESULT: PASS"
+  
+  - path: crazyCF
+  
+  - path: crc16
+
+  - path: aes
+
+  - path: cache_test
+
+  # - path: TMRregression
+
+OPT_PASSES:
+  - ""
+  - "-DWC"
+  - "-TMR"   
+  - "-TMR -countErrors"
+  - "-DWC -noMemReplication"
+  - "-TMR -noMemReplication"
+  - "-DWC -noLoadSync"
+  - "-TMR -noLoadSync"
+  - "-DWC -noStoreDataSync"
+  - "-TMR -noStoreDataSync"
+  - "-DWC -noStoreAddrSync"
+  - "-TMR -noStoreAddrSync"
+  - "-DWC -noMemReplication -noLoadSync"
+  - "-TMR -noMemReplication -noLoadSync"
+  - "-DWC -noMemReplication -noStoreDataSync"
+  - "-TMR -noMemReplication -noStoreDataSync"
+  - "-DWC -noMemReplication -noStoreAddrSync"
+  - "-TMR -noMemReplication -noStoreAddrSync"
\ No newline at end of file
diff --git a/unittest/cfg/full_tmr.yml b/unittest/cfg/full_tmr.yml
new file mode 100644
index 000000000..b87172381
--- /dev/null
+++ b/unittest/cfg/full_tmr.yml
@@ -0,0 +1,19 @@
+benchmarks:
+  - path: matrixMultiply
+    re: "Number of errors: 0"
+  
+  - path: chstone
+    re: "RESULT: PASS"
+  
+  - path: crazyCF
+  
+  - path: crc16
+
+  - path: aes
+
+  - path: cache_test
+
+  # - path: TMRregression
+
+OPT_PASSES:
+  - "-TMR"
diff --git a/unittest/cfg/regression.yml b/unittest/cfg/regression.yml
new file mode 100644
index 000000000..f1438b28b
--- /dev/null
+++ b/unittest/cfg/regression.yml
@@ -0,0 +1,11 @@
+drivers:
+  - path: tests/TMRregression/unitTestDriver.py
+    re: "Success!"
+
+  - path: unittest/llvm-stress.py
+    re: "Success!"
+
+OPT_PASSES:
+  - ""
+  - " -DWC"
+  - " -TMR"
diff --git a/unittest/llvm-stress.py b/unittest/llvm-stress.py
new file mode 100644
index 000000000..1cd66a4b5
--- /dev/null
+++ b/unittest/llvm-stress.py
@@ -0,0 +1,83 @@
+###########################################################
+# driver for running llvm-stress
+###########################################################
+
+
+import os
+import time
+import shlex
+import pathlib
+import argparse
+import tempfile
+import subprocess as sp
+
+this_dir = pathlib.Path(__file__).resolve().parent
+makefile_path = this_dir / "makefile.customFile"
+
+
+def setUpArgs():
+    parser = argparse.ArgumentParser(description="Run randomly generated .ll files (using llvm-stress) through DWC and TMR passes")
+    parser.add_argument('passes', type=str, help='opt passes to run on generated IR')
+    parser.add_argument('--tests', '-n', help='how many times to run the stress test (default 10)', type=int, default=10)
+    parser.add_argument('--size', '-s', help='size will be passed to llvm-stress. indicates number of lines to generate', type=int, default=150)
+    return parser.parse_args()
+
+
+def createRandomIRFile(tempFile, size):
+    # create file of IR
+    path = tempFile.name
+    cmd = "llvm-stress-7 -size={} -o {}".format(size, str(path))
+    # print(cmd)
+    proc = sp.Popen(shlex.split(cmd), stdout=sp.PIPE)
+    output = proc.communicate()[0]
+    # wait for file to be created
+    while not os.path.exists(str(path)):
+        time.sleep(0.5)
+    # print errors
+    if proc.returncode:
+        print(output.decode())
+    return proc.returncode
+
+def runOpt(srcDir, targetPath, passes):
+    # run file through optimizer and compile to assembly, but do not link, since there is no `main`
+    cmd = "make --file={mk} 'PROJECT_SRC={dir}' 'TARGET={tgt}' 'OPT_PASSES={ps}' {tgt}.s"
+    cmd = cmd.format(
+        mk=makefile_path,
+        dir=srcDir,
+        tgt=targetPath,
+        ps=passes)
+    # print(cmd)
+    proc = sp.Popen(shlex.split(cmd), cwd=srcDir, stdout=sp.PIPE)
+    output = proc.communicate()[0]
+    # print errors
+    if proc.returncode:
+        print(output.decode())
+    return proc.returncode
+
+
+def main():
+    args = setUpArgs()
+    llSuffix = ".lbc"
+
+    with tempfile.TemporaryDirectory(dir=str(this_dir)) as td:
+        for _ in range(args.tests):
+            # create random .ll file
+            with tempfile.NamedTemporaryFile(mode='w', dir=str(td), suffix=llSuffix) as llFile:
+                rc0 = createRandomIRFile(llFile, args.size)
+                if rc0:
+                    print("Error creating IR file of size {}".format(args.size))
+                    return rc0
+                # run through optimizer (can directly take .ll files)
+                rawFileName = llFile.name.replace(llSuffix, "")
+                rawFileName = os.path.basename(rawFileName)
+                rc1 = runOpt(str(td), rawFileName, args.passes)
+                # don't need to remove opt file, because temp directory will be deleted
+                if rc1:
+                    print("Error running configuration {}".format(args.passes))
+    
+    # if success
+    print("Success!")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/unittest/makefile.customFile b/unittest/makefile.customFile
new file mode 100644
index 000000000..218d1d96d
--- /dev/null
+++ b/unittest/makefile.customFile
@@ -0,0 +1,16 @@
+# level is one more than normal because we intend to use it in one directory down
+LEVEL = ../../tests
+TARGET ?=
+OPT_PASSES ?=
+OPT_FLAGS ?=
+
+BOARD=x86
+
+# this is for compatibility with building for ARM QEMU
+ifneq ($(BUILD_FOR_SIMULATOR),)
+USER_DEFS	+= __FOR_SIM=1 __QEMU_SIM=1
+USER_CFLAGS += -g3
+INC_DIRS    := $(LEVEL)/../simulation
+endif
+
+include $(LEVEL)/makefiles/Makefile.common
diff --git a/unittest/pyDriver.py b/unittest/pyDriver.py
new file mode 100644
index 000000000..9edc752a3
--- /dev/null
+++ b/unittest/pyDriver.py
@@ -0,0 +1,88 @@
+###########################################################
+# top level driver for running specialized Python driver
+#  code in other directories
+###########################################################
+
+import re
+import sys
+import yaml
+import shlex
+import pathlib
+import argparse
+import subprocess as sp
+
+from unittest import COAST_dir, error, bcolors
+
+
+class Driver:
+    def __init__(self, path):
+        self.path = path
+        self.relpath = self.path.relative_to(COAST_dir)
+        self.target = None
+        self.re = None
+
+    def run(self, passes):
+        # the command includes the current opt args
+        runPath = self.path if sys.version_info.minor > 5 else str(self.path)
+        runDir = str(self.path.parents[0])
+        cmd = "python3 {} ".format(runPath)
+        cmd += "\" {}\"".format(" ".join(passes.split()))
+        # print(cmd)
+        # these drivers should be self-cleaning
+        s = sp.Popen(shlex.split(cmd), cwd=runDir, stdout=sp.PIPE, stderr=sp.STDOUT)
+        stdout = s.communicate()[0].decode()
+        # check the return value
+        if s.returncode:
+            print(stdout)
+            error("Could not run", runPath)
+
+        if self.re is not None:
+            m = re.search(self.re, stdout)
+            if not m:
+                print(stdout)
+                error("Could not match stdout of", runPath,
+                    "using re expression:", self.re)
+
+
+def main():
+    # Load command-line arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument('config_yml')
+    args = parser.parse_args()
+
+    # Ensure yaml config file exists, then open and read it
+    cfg_path = pathlib.Path(args.config_yml)
+    if not cfg_path.is_file():
+        error("Config file", cfg_path, "does not exist.")
+    # pathlib doesn't work with open in 3.5-
+    if sys.version_info.minor <= 5:
+        cfg_path = str(cfg_path)
+    with open(cfg_path, 'r') as stream:
+        try:
+            cfg = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+    # get the correct driver paths
+    drivers = []
+    for d in cfg["drivers"]:
+        d_path = COAST_dir / d["path"]
+        if not d_path.is_file():
+            error("Driver file", d_path, "does not exist.")
+        tmp_driver = Driver(d_path)
+        # get the regex to match output
+        if "re" in d:
+            tmp_driver.re = d["re"]
+        drivers.append(tmp_driver)
+
+    # run over all the configuration options, running each driver
+    for opt_pass in cfg["OPT_PASSES"]:
+        print(bcolors.HEADER + "OPT_PASSES:", opt_pass, bcolors.ENDC)
+        for driver in drivers:
+            print("  " + bcolors.OKBLUE + str(driver.relpath), bcolors.ENDC)
+            print("    Running")
+            driver.run(opt_pass)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/unittest/unittest.py b/unittest/unittest.py
new file mode 100644
index 000000000..a889917e4
--- /dev/null
+++ b/unittest/unittest.py
@@ -0,0 +1,144 @@
+import sys
+import argparse
+import pathlib
+import yaml
+import subprocess
+import re
+
+COAST_dir = pathlib.Path(__file__).resolve().parent.parent
+tests_dir = COAST_dir / "tests"
+
+class bcolors:
+    HEADER = '\033[95m'
+    OKBLUE = '\033[94m'
+    OKGREEN = '\033[92m'
+    WARNING = '\033[93m'
+    FAIL = '\033[91m'
+    ENDC = '\033[0m'
+    BOLD = '\033[1m'
+    UNDERLINE = '\033[4m'
+
+class NoDesignInPath(Exception):
+    pass
+
+def error(*msg, return_code=-1):
+    print("!!!! ERROR:", " ".join(str(item) for item in msg))
+    sys.exit(return_code)
+
+class Benchmark:
+    def __init__(self, path):
+        self.path = path
+        self.relpath = self.path.relative_to(tests_dir)
+        self.target = None
+        self.re = None
+
+        # Check if directory contains a valid design.
+        # The current methed checks if it is a design by seeing if it contains
+        # a Makefile with a 'TARGET = ' line.
+
+        makefile_path = self.path / 'Makefile'
+        if not makefile_path.is_file():
+            raise NoDesignInPath
+
+        if makefile_path.is_file():
+            makefile_text = open(str(makefile_path), 'r').read()
+            m = re.search(r"^\s*TARGET\s*(:|\+)?=\s*(.*?)\s*$", makefile_text, re.M)
+            if m:
+                self.target = m.group(2)
+            else:
+                raise NoDesignInPath
+
+    # Compile the benchmark for x86 using provided opt_passes
+    def compile(self, opt_passes):
+        # Clean design dir
+        cmd = ["make", "clean"]
+        s = subprocess.run(cmd, cwd=str(self.path), stdout=subprocess.DEVNULL,
+                        stderr=subprocess.DEVNULL)
+        if s.returncode:
+            error("Could not clean", self.path)
+
+        # Compile design
+        cmd = ["make", "exe", "BOARD=x86", "OPT_PASSES=" + opt_passes]
+        s = subprocess.Popen(
+            cmd, cwd=str(self.path), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+        stdout = s.communicate()[0].decode()
+        if s.returncode:
+            print(stdout)
+            error("Could not compile", self.path)
+
+    # Run the x86 compiled benchmark (must call compile first)
+    def run(self):
+        design_exe_path = str(self.path / (self.target + ".out"))
+        cmd = [design_exe_path, ]
+        s = subprocess.Popen(
+            cmd, cwd=str(self.path), stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+        stdout = s.communicate()[0].decode()
+        if s.returncode:
+            print(stdout)
+            error("Could not run", design_exe_path)
+        
+        # If there is a regex matcher, check the stdout against it
+        if self.re is not None:
+            m = re.search(self.re, stdout)
+            if not m:
+                print(stdout)
+                error("Could not match stdout of", design_exe_path,
+                    "using re expression:", self.re)
+
+def find_all_benchmarks_in_path(path):
+    benchmarks = []
+
+    for d in path.glob('**/'):
+        try:
+            benchmark = Benchmark(d)
+        except NoDesignInPath:
+            pass
+        else:
+            benchmarks.append(benchmark)
+
+    return benchmarks
+
+def main():
+    # Load command-line arguments
+    parser = argparse.ArgumentParser()
+    parser.add_argument('config_yml')
+    args = parser.parse_args()
+
+    # Ensure yaml config file exists, then open and read it
+    cfg_path = pathlib.Path(args.config_yml)
+    if not cfg_path.is_file():
+        error("Config file", cfg_path, "does not exist.")
+    with open(str(cfg_path), 'r') as stream:
+        try:
+            cfg = yaml.safe_load(stream)
+        except yaml.YAMLError as exc:
+            print(exc)
+
+    # Find all benchmarks to run
+    benchmarks = []
+    for bench_cfg in cfg["benchmarks"]:
+        bench_dir = tests_dir / bench_cfg["path"]
+        benchmarks_on_path = find_all_benchmarks_in_path(bench_dir)
+        if len(benchmarks_on_path) == 0:
+            error("No benchmarks found at", bench_dir)
+        if "re" in bench_cfg:
+            for b in benchmarks_on_path:
+                b.re = bench_cfg["re"]
+        benchmarks.extend(benchmarks_on_path)
+
+    # Loop through all OPT passes, and build/run each benchmark for this pass configuration
+    for opt_pass in cfg["OPT_PASSES"]:
+        print(bcolors.HEADER + "OPT_PASSES:", opt_pass, bcolors.ENDC)
+        for benchmark in benchmarks:
+            print("  " + bcolors.OKBLUE + str(benchmark.relpath), bcolors.ENDC)
+            print("    Compiling")
+            benchmark.compile(opt_pass)
+            if benchmark.re is not None:
+                print("    Running and validating output")
+            else:
+                print("    Running")
+            benchmark.run()
+
+
+if __name__ == "__main__":
+    main()

From 843156440d39b66fe2d4bcf595c38aabfdc0105a Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Wed, 5 Aug 2020 08:58:39 -0600
Subject: [PATCH 7/9] Travis file must have execute permissions.

---
 .github/.travis/build.sh | 0
 .gitignore               | 1 +
 2 files changed, 1 insertion(+)
 mode change 100644 => 100755 .github/.travis/build.sh
 create mode 100644 .gitignore

diff --git a/.github/.travis/build.sh b/.github/.travis/build.sh
old mode 100644
new mode 100755
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..bee8a64b7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+__pycache__

From e600e93e4efe67d6caf293f18519c7ffbe4d3f3b Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Wed, 5 Aug 2020 09:27:19 -0600
Subject: [PATCH 8/9] Fixing project build files.

---
 projects/build/README                      | 4 ++++
 projects/dataflowProtection/CMakeLists.txt | 3 +++
 2 files changed, 7 insertions(+)
 create mode 100644 projects/build/README

diff --git a/projects/build/README b/projects/build/README
new file mode 100644
index 000000000..97338d42a
--- /dev/null
+++ b/projects/build/README
@@ -0,0 +1,4 @@
+Run the following in this directory to build the projects:
+
+cmake ..
+make
\ No newline at end of file
diff --git a/projects/dataflowProtection/CMakeLists.txt b/projects/dataflowProtection/CMakeLists.txt
index cd495c3a6..e7dd24707 100644
--- a/projects/dataflowProtection/CMakeLists.txt
+++ b/projects/dataflowProtection/CMakeLists.txt
@@ -6,5 +6,8 @@ add_llvm_loadable_module(DataflowProtection
 	cloning.cpp
 	synchronization.cpp
 	utils.cpp
+    verification.cpp
+    interface.cpp
+    inspection.cpp
 	dataflowProtection.h
 )

From 41c141a23a18fc8f422d5f0d723150e76020bcec Mon Sep 17 00:00:00 2001
From: Benjamin James <bjames@byu.net>
Date: Wed, 5 Aug 2020 09:43:16 -0600
Subject: [PATCH 9/9] Adding cool badges to top-level README.

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index ee87e001a..62ad53b54 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,9 @@
 # Welcome to the COAST Repository
 
+[![Build Status](https://travis-ci.com/byuccl/coast.svg?branch=master)](https://travis-ci.com/byuccl/coast)
+[![Documentation Status](https://readthedocs.org/projects/coast-compiler/badge/?version=latest)](https://coast-compiler.readthedocs.io/en/latest/?badge=latest)
+
+
 Welcome to the repository for COAST (COmpiler-Assisted Software fault Tolerance), BYU's tool for automated software mitigation! To get started, please refer to our [documentation pages](https://coast-compiler.readthedocs.io/en/latest/).