diff --git a/.github/workflows/post-release.yml b/.github/workflows/post-release.yml
index da539c35..a44a675d 100644
--- a/.github/workflows/post-release.yml
+++ b/.github/workflows/post-release.yml
@@ -6,7 +6,7 @@ on:
 
 jobs:
   post-release:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
     steps:
       # trigger post-release in dependency repo, this indirection allows the
       # dependency repo to be updated often without affecting this repo. At
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index a1a1a436..c38b8de6 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -7,7 +7,7 @@ on:
 
 jobs:
   release:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
 
     # need to manually check for a couple things
     # - tests passed?
@@ -73,89 +73,70 @@ jobs:
           # previous results to compare against?
           [ -n "$LFS_PREV_VERSION" ] && curl -sS \
             "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/`
-              `status/$LFS_PREV_VERSION" \
+              `status/$LFS_PREV_VERSION?per_page=100" \
             | jq -re 'select(.sha != env.GITHUB_SHA) | .statuses[]' \
             >> prev-results.json \
             || true
 
-          # unfortunately these each have their own format
-          [ -e results/code-thumb.csv ] && ( \
-            export PREV="$(jq -re '
-                  select(.context == "results / code").description
-                  | capture("Code size is (?<result>[0-9]+)").result' \
-                prev-results.json || echo 0)"
-            ./scripts/code.py -u results/code-thumb.csv -s | awk '
-              NR==2 {printf "Code size,%d B",$2}
-              NR==2 && ENVIRON["PREV"]+0 != 0 {
-                printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}
-              NR==2 {printf "\n"}' \
-            >> results.csv)
-          [ -e results/code-thumb-readonly.csv ] && ( \
-            export PREV="$(jq -re '
-                  select(.context == "results / code (readonly)").description
-                  | capture("Code size is (?<result>[0-9]+)").result' \
-                prev-results.json || echo 0)"
-            ./scripts/code.py -u results/code-thumb-readonly.csv -s | awk '
-              NR==2 {printf "Code size<br/>(readonly),%d B",$2}
-              NR==2 && ENVIRON["PREV"]+0 != 0 {
-                printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}
-              NR==2 {printf "\n"}' \
-            >> results.csv)
-          [ -e results/code-thumb-threadsafe.csv ] && ( \
-            export PREV="$(jq -re '
-                  select(.context == "results / code (threadsafe)").description
-                  | capture("Code size is (?<result>[0-9]+)").result' \
-                prev-results.json || echo 0)"
-            ./scripts/code.py -u results/code-thumb-threadsafe.csv -s | awk '
-              NR==2 {printf "Code size<br/>(threadsafe),%d B",$2}
-              NR==2 && ENVIRON["PREV"]+0 != 0 {
-                printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}
-              NR==2 {printf "\n"}' \
-            >> results.csv)
-          [ -e results/code-thumb-migrate.csv ] && ( \
-            export PREV="$(jq -re '
-                  select(.context == "results / code (migrate)").description
-                  | capture("Code size is (?<result>[0-9]+)").result' \
-                prev-results.json || echo 0)"
-            ./scripts/code.py -u results/code-thumb-migrate.csv -s | awk '
-              NR==2 {printf "Code size<br/>(migrate),%d B",$2}
-              NR==2 && ENVIRON["PREV"]+0 != 0 {
-                printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}
-              NR==2 {printf "\n"}' \
-            >> results.csv)
-          [ -e results/code-thumb-error-asserts.csv ] && ( \
-            export PREV="$(jq -re '
-                  select(.context == "results / code (error-asserts)").description
-                  | capture("Code size is (?<result>[0-9]+)").result' \
-                prev-results.json || echo 0)"
-            ./scripts/code.py -u results/code-thumb-error-asserts.csv -s | awk '
-              NR==2 {printf "Code size<br/>(error-asserts),%d B",$2}
-              NR==2 && ENVIRON["PREV"]+0 != 0 {
-                printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}
-              NR==2 {printf "\n"}' \
-            >> results.csv)
-          [ -e results/coverage.csv ] && ( \
-            export PREV="$(jq -re '
-                  select(.context == "results / coverage").description
-                  | capture("Coverage is (?<result>[0-9\\.]+)").result' \
-                prev-results.json || echo 0)"
-            ./scripts/coverage.py -u results/coverage.csv -s | awk -F '[ /%]+' '
-              NR==2 {printf "Coverage,%.1f%% of %d lines",$4,$3}
-              NR==2 && ENVIRON["PREV"]+0 != 0 {
-                printf " (%+.1f%%)",$4-ENVIRON["PREV"]}
-              NR==2 {printf "\n"}' \
-            >> results.csv)
-
-          # transpose to GitHub table
-          [ -e results.csv ] || exit 0
-          awk -F ',' '
-            {label[NR]=$1; value[NR]=$2}
-            END {
-              for (r=1; r<=NR; r++) {printf "| %s ",label[r]}; printf "|\n";
-              for (r=1; r<=NR; r++) {printf "|:--"}; printf "|\n";
-              for (r=1; r<=NR; r++) {printf "| %s ",value[r]}; printf "|\n"}' \
-            results.csv > results.txt
-          echo "RESULTS:"
+          # build table for GitHub
+          echo "<table>" >> results.txt
+          echo "<thead>" >> results.txt
+          echo "<tr>" >> results.txt
+          echo "<th align=left>Configuration</th>" >> results.txt
+          for r in Code Stack Structs Coverage
+          do
+            echo "<th align=right>$r</th>" >> results.txt
+          done
+          echo "</tr>" >> results.txt
+          echo "</thead>" >> results.txt
+
+          echo "<tbody>" >> results.txt
+          for c in "" readonly threadsafe migrate error-asserts
+          do
+            echo "<tr>" >> results.txt
+            c_or_default=${c:-default}
+            echo "<td align=left>${c_or_default^}</td>" >> results.txt
+            for r in code stack structs
+            do
+              # per-config results
+              echo "<td align=right>" >> results.txt
+              [ -e results/thumb${c:+-$c}.csv ] && ( \
+                export PREV="$(jq -re '
+                      select(.context == "'"results (thumb${c:+, $c}) / $r"'").description
+                      | capture("(?<result>[0-9∞]+)").result' \
+                    prev-results.json || echo 0)"
+                ./scripts/summary.py results/thumb${c:+-$c}.csv -f $r -Y | awk '
+                  NR==2 {printf "%s B",$2}
+                  NR==2 && ENVIRON["PREV"]+0 != 0 {
+                    printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}
+                  NR==2 {printf "\n"}' \
+                | sed -e 's/ /\&nbsp;/g' \
+                >> results.txt)
+              echo "</td>" >> results.txt
+            done
+            # coverage results
+            if [ -z $c ]
+            then
+              echo "<td rowspan=0 align=right>" >> results.txt
+              [ -e results/coverage.csv ] && ( \
+                export PREV="$(jq -re '
+                      select(.context == "results / coverage").description
+                      | capture("(?<result>[0-9\\.]+)").result' \
+                    prev-results.json || echo 0)"
+                ./scripts/coverage.py -u results/coverage.csv -Y | awk -F '[ /%]+' '
+                  NR==2 {printf "%.1f%% of %d lines",$4,$3}
+                  NR==2 && ENVIRON["PREV"]+0 != 0 {
+                    printf " (%+.1f%%)",$4-ENVIRON["PREV"]}
+                  NR==2 {printf "\n"}' \
+                | sed -e 's/ /\&nbsp;/g' \
+                >> results.txt)
+              echo "</td>" >> results.txt
+            fi
+            echo "</tr>" >> results.txt
+          done
+          echo "</tbody>" >> results.txt
+          echo "</table>" >> results.txt
+
           cat results.txt
 
       # find changes from history
diff --git a/.github/workflows/status.yml b/.github/workflows/status.yml
index 7bd851a2..d28b17cc 100644
--- a/.github/workflows/status.yml
+++ b/.github/workflows/status.yml
@@ -6,7 +6,7 @@ on:
 
 jobs:
   status:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
     steps:
       # custom statuses?
       - uses: dawidd6/action-download-artifact@v2
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 6d633f8e..bc8bb0c6 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -8,7 +8,7 @@ env:
 jobs:
   # run tests
   test:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
     strategy:
       fail-fast: false
       matrix:
@@ -18,11 +18,27 @@ jobs:
       - uses: actions/checkout@v2
       - name: install
         run: |
-          # need toml, also pip3 isn't installed by default?
+          # need a few additional tools
+          #
+          # note this includes gcc-10, which is required for -fcallgraph-info=su
           sudo apt-get update -qq
-          sudo apt-get install -qq python3 python3-pip lcov
+          sudo apt-get install -qq gcc-10 python3 python3-pip lcov
           sudo pip3 install toml
-          gcc --version
+          echo "CC=gcc-10" >> $GITHUB_ENV
+          gcc-10 --version
+          lcov --version
+          python3 --version
+
+          # need newer lcov version for gcc-10
+          #sudo apt-get remove lcov
+          #wget https://launchpad.net/ubuntu/+archive/primary/+files/lcov_1.15-1_all.deb
+          #sudo apt install ./lcov_1.15-1_all.deb
+          #lcov --version
+          #which lcov
+          #ls -lha /usr/bin/lcov
+          wget https://github.com/linux-test-project/lcov/releases/download/v1.15/lcov-1.15.tar.gz
+          tar xf lcov-1.15.tar.gz
+          sudo make -C lcov-1.15 install
 
           # setup a ram-backed disk to speed up reentrant tests
           mkdir disks
@@ -41,36 +57,36 @@ jobs:
         if: ${{matrix.arch == 'thumb'}}
         run: |
           sudo apt-get install -qq \
-            gcc-arm-linux-gnueabi \
+            gcc-10-arm-linux-gnueabi \
             libc6-dev-armel-cross \
             qemu-user
-          echo "CC=arm-linux-gnueabi-gcc -mthumb --static" >> $GITHUB_ENV
+          echo "CC=arm-linux-gnueabi-gcc-10 -mthumb --static" >> $GITHUB_ENV
           echo "EXEC=qemu-arm" >> $GITHUB_ENV
-          arm-linux-gnueabi-gcc --version
+          arm-linux-gnueabi-gcc-10 --version
           qemu-arm -version
       # cross-compile with MIPS (32-bit, big-endian)
       - name: install-mips
         if: ${{matrix.arch == 'mips'}}
         run: |
           sudo apt-get install -qq \
-            gcc-mips-linux-gnu \
+            gcc-10-mips-linux-gnu \
             libc6-dev-mips-cross \
             qemu-user
-          echo "CC=mips-linux-gnu-gcc --static" >> $GITHUB_ENV
+          echo "CC=mips-linux-gnu-gcc-10 --static" >> $GITHUB_ENV
           echo "EXEC=qemu-mips" >> $GITHUB_ENV
-          mips-linux-gnu-gcc --version
+          mips-linux-gnu-gcc-10 --version
           qemu-mips -version
       # cross-compile with PowerPC (32-bit, big-endian)
       - name: install-powerpc
         if: ${{matrix.arch == 'powerpc'}}
         run: |
           sudo apt-get install -qq \
-            gcc-powerpc-linux-gnu \
+            gcc-10-powerpc-linux-gnu \
             libc6-dev-powerpc-cross \
             qemu-user
-          echo "CC=powerpc-linux-gnu-gcc --static" >> $GITHUB_ENV
+          echo "CC=powerpc-linux-gnu-gcc-10 --static" >> $GITHUB_ENV
           echo "EXEC=qemu-ppc" >> $GITHUB_ENV
-          powerpc-linux-gnu-gcc --version
+          powerpc-linux-gnu-gcc-10 --version
           qemu-ppc -version
 
       # make sure example can at least compile
@@ -148,102 +164,108 @@ jobs:
           retention-days: 1
 
       # update results
-      - name: results-code
+      - name: results
         run: |
           mkdir -p results
           make clean
-          make code \
+          make lfs.csv \
             CFLAGS+=" \
               -DLFS_NO_ASSERT \
               -DLFS_NO_DEBUG \
               -DLFS_NO_WARN \
-              -DLFS_NO_ERROR" \
-            CODEFLAGS+="-o results/code-${{matrix.arch}}.csv"
-      - name: results-code-readonly
+              -DLFS_NO_ERROR"
+          cp lfs.csv results/${{matrix.arch}}.csv
+          ./scripts/summary.py results/${{matrix.arch}}.csv
+      - name: results-readonly
         run: |
           mkdir -p results
           make clean
-          make code \
+          make lfs.csv \
             CFLAGS+=" \
               -DLFS_NO_ASSERT \
               -DLFS_NO_DEBUG \
               -DLFS_NO_WARN \
               -DLFS_NO_ERROR \
-              -DLFS_READONLY" \
-            CODEFLAGS+="-o results/code-${{matrix.arch}}-readonly.csv"
-      - name: results-code-threadsafe
+              -DLFS_READONLY"
+          cp lfs.csv results/${{matrix.arch}}-readonly.csv
+          ./scripts/summary.py results/${{matrix.arch}}-readonly.csv
+      - name: results-threadsafe
         run: |
           mkdir -p results
           make clean
-          make code \
+          make lfs.csv \
             CFLAGS+=" \
               -DLFS_NO_ASSERT \
               -DLFS_NO_DEBUG \
               -DLFS_NO_WARN \
               -DLFS_NO_ERROR \
-              -DLFS_THREADSAFE" \
-            CODEFLAGS+="-o results/code-${{matrix.arch}}-threadsafe.csv"
-      - name: results-code-migrate
+              -DLFS_THREADSAFE"
+          cp lfs.csv results/${{matrix.arch}}-threadsafe.csv
+          ./scripts/summary.py results/${{matrix.arch}}-threadsafe.csv
+      - name: results-migrate
         run: |
           mkdir -p results
           make clean
-          make code \
+          make lfs.csv \
             CFLAGS+=" \
               -DLFS_NO_ASSERT \
               -DLFS_NO_DEBUG \
               -DLFS_NO_WARN \
               -DLFS_NO_ERROR \
-              -DLFS_MIGRATE" \
-            CODEFLAGS+="-o results/code-${{matrix.arch}}-migrate.csv"
-      - name: results-code-error-asserts
+              -DLFS_MIGRATE"
+          cp lfs.csv results/${{matrix.arch}}-migrate.csv
+          ./scripts/summary.py results/${{matrix.arch}}-migrate.csv
+      - name: results-error-asserts
         run: |
           mkdir -p results
           make clean
-          make code \
+          make lfs.csv \
             CFLAGS+=" \
               -DLFS_NO_DEBUG \
               -DLFS_NO_WARN \
               -DLFS_NO_ERROR \
-              -D'LFS_ASSERT(test)=do {if(!(test)) {return -1;}} while(0)'" \
-            CODEFLAGS+="-o results/code-${{matrix.arch}}-error-asserts.csv"
+              -D'LFS_ASSERT(test)=do {if(!(test)) {return -1;}} while(0)'"
+          cp lfs.csv results/${{matrix.arch}}-error-asserts.csv
+          ./scripts/summary.py results/${{matrix.arch}}-error-asserts.csv
       - name: upload-results
         uses: actions/upload-artifact@v2
         with:
           name: results
           path: results
-      # limit reporting to Thumb, otherwise there would be too many numbers
-      # flying around for the results to be easily readable
+
+      # create statuses with results
       - name: collect-status
-        if: ${{matrix.arch == 'thumb'}}
         run: |
           mkdir -p status
-          for f in $(shopt -s nullglob ; echo results/code*.csv)
+          for f in $(shopt -s nullglob ; echo results/*.csv)
           do
-            export STEP="results-code$(
-              echo $f | sed -n 's/.*code-.*-\(.*\).csv/-\1/p')"
-            export CONTEXT="results / code$(
-              echo $f | sed -n 's/.*code-.*-\(.*\).csv/ (\1)/p')"
-            export PREV="$(curl -sS \
-              "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/status/master" \
-              | jq -re 'select(.sha != env.GITHUB_SHA) | .statuses[]
-                | select(.context == env.CONTEXT).description
-                | capture("Code size is (?<result>[0-9]+)").result' \
-              || echo 0)"
-            export DESCRIPTION="$(./scripts/code.py -u $f -s | awk '
-              NR==2 {printf "Code size is %d B",$2}
-              NR==2 && ENVIRON["PREV"]+0 != 0 {
-                printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}')"
-            jq -n '{
-              state: "success",
-              context: env.CONTEXT,
-              description: env.DESCRIPTION,
-              target_job: "${{github.job}} (${{matrix.arch}})",
-              target_step: env.STEP}' \
-              | tee status/code$(
-                echo $f | sed -n 's/.*code-.*-\(.*\).csv/-\1/p').json
+            export STEP="results$(
+              echo $f | sed -n 's/[^-]*-\(.*\).csv/-\1/p')"
+            for r in code stack structs
+            do
+              export CONTEXT="results (${{matrix.arch}}$(
+                echo $f | sed -n 's/[^-]*-\(.*\).csv/, \1/p')) / $r"
+              export PREV="$(curl -sS \
+                "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/status/master?per_page=100" \
+                | jq -re 'select(.sha != env.GITHUB_SHA) | .statuses[]
+                  | select(.context == env.CONTEXT).description
+                  | capture("(?<result>[0-9∞]+)").result' \
+                || echo 0)"
+              export DESCRIPTION="$(./scripts/summary.py $f -f $r -Y | awk '
+                NR==2 {printf "%s B",$2}
+                NR==2 && ENVIRON["PREV"]+0 != 0 {
+                  printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}')"
+              jq -n '{
+                state: "success",
+                context: env.CONTEXT,
+                description: env.DESCRIPTION,
+                target_job: "${{github.job}} (${{matrix.arch}})",
+                target_step: env.STEP}' \
+                | tee status/$r-${{matrix.arch}}$(
+                  echo $f | sed -n 's/[^-]*-\(.*\).csv/-\1/p').json
+            done
           done
       - name: upload-status
-        if: ${{matrix.arch == 'thumb'}}
         uses: actions/upload-artifact@v2
         with:
           name: status
@@ -252,7 +274,7 @@ jobs:
 
   # run under Valgrind to check for memory errors
   valgrind:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
     steps:
       - uses: actions/checkout@v2
       - name: install
@@ -272,7 +294,7 @@ jobs:
 
   # self-host with littlefs-fuse for a fuzz-like test
   fuse:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
     if: ${{!endsWith(github.ref, '-prefix')}}
     steps:
       - uses: actions/checkout@v2
@@ -297,16 +319,18 @@ jobs:
 
           # setup disk for littlefs-fuse
           mkdir mount
-          sudo chmod a+rw /dev/loop0
+          LOOP=$(sudo losetup -f)
+          sudo chmod a+rw $LOOP
           dd if=/dev/zero bs=512 count=128K of=disk
-          losetup /dev/loop0 disk
+          losetup $LOOP disk
+          echo "LOOP=$LOOP" >> $GITHUB_ENV
       - name: test
         run: |
           # self-host test
           make -C littlefs-fuse
 
-          littlefs-fuse/lfs --format /dev/loop0
-          littlefs-fuse/lfs /dev/loop0 mount
+          littlefs-fuse/lfs --format $LOOP
+          littlefs-fuse/lfs $LOOP mount
 
           ls mount
           mkdir mount/littlefs
@@ -318,7 +342,7 @@ jobs:
 
   # test migration using littlefs-fuse
   migrate:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
     if: ${{!endsWith(github.ref, '-prefix')}}
     steps:
       - uses: actions/checkout@v2
@@ -348,9 +372,11 @@ jobs:
 
           # setup disk for littlefs-fuse
           mkdir mount
-          sudo chmod a+rw /dev/loop0
+          LOOP=$(sudo losetup -f)
+          sudo chmod a+rw $LOOP
           dd if=/dev/zero bs=512 count=128K of=disk
-          losetup /dev/loop0 disk
+          losetup $LOOP disk
+          echo "LOOP=$LOOP" >> $GITHUB_ENV
       - name: test
         run: |
           # compile v1 and v2
@@ -358,8 +384,8 @@ jobs:
           make -C v2
 
           # run self-host test with v1
-          v1/lfs --format /dev/loop0
-          v1/lfs /dev/loop0 mount
+          v1/lfs --format $LOOP
+          v1/lfs $LOOP mount
 
           ls mount
           mkdir mount/littlefs
@@ -373,8 +399,8 @@ jobs:
           cd ../..
           fusermount -u mount
 
-          v2/lfs --migrate /dev/loop0
-          v2/lfs /dev/loop0 mount
+          v2/lfs --migrate $LOOP
+          v2/lfs $LOOP mount
 
           # run self-host test with v2 right where we left off
           ls mount
@@ -385,7 +411,7 @@ jobs:
 
   # collect coverage info
   coverage:
-    runs-on: ubuntu-18.04
+    runs-on: ubuntu-20.04
     needs: [test]
     steps:
       - uses: actions/checkout@v2
@@ -421,14 +447,14 @@ jobs:
           export STEP="results-coverage"
           export CONTEXT="results / coverage"
           export PREV="$(curl -sS \
-            "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/status/master" \
+            "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/status/master?per_page=100" \
             | jq -re 'select(.sha != env.GITHUB_SHA) | .statuses[]
               | select(.context == env.CONTEXT).description
-              | capture("Coverage is (?<result>[0-9\\.]+)").result' \
+              | capture("(?<result>[0-9\\.]+)").result' \
             || echo 0)"
           export DESCRIPTION="$(
-            ./scripts/coverage.py -u results/coverage.csv -s | awk -F '[ /%]+' '
-              NR==2 {printf "Coverage is %.1f%% of %d lines",$4,$3}
+            ./scripts/coverage.py -u results/coverage.csv -Y | awk -F '[ /%]+' '
+              NR==2 {printf "%.1f%% of %d lines",$4,$3}
               NR==2 && ENVIRON["PREV"]+0 != 0 {
                 printf " (%+.1f%%)",$4-ENVIRON["PREV"]}')"
           jq -n '{
diff --git a/.gitignore b/.gitignore
index a6ebc4c3..3f7b860e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,8 @@
 *.o
 *.d
 *.a
+*.ci
+*.csv
 
 # Testing things
 blocks/
diff --git a/Makefile b/Makefile
index 763a0cee..7cc59f8a 100644
--- a/Makefile
+++ b/Makefile
@@ -17,44 +17,63 @@ TARGET ?= $(BUILDDIR)lfs.a
 endif
 
 
-CC ?= gcc
-AR ?= ar
-SIZE ?= size
-CTAGS ?= ctags
-NM ?= nm
-LCOV ?= lcov
+CC      ?= gcc
+AR      ?= ar
+SIZE    ?= size
+CTAGS   ?= ctags
+NM      ?= nm
+OBJDUMP ?= objdump
+LCOV    ?= lcov
 
 SRC ?= $(wildcard *.c)
 OBJ := $(SRC:%.c=$(BUILDDIR)%.o)
 DEP := $(SRC:%.c=$(BUILDDIR)%.d)
 ASM := $(SRC:%.c=$(BUILDDIR)%.s)
+CGI := $(SRC:%.c=$(BUILDDIR)%.ci)
 
 ifdef DEBUG
-override CFLAGS += -O0 -g3
+override CFLAGS += -O0
 else
 override CFLAGS += -Os
 endif
 ifdef TRACE
 override CFLAGS += -DLFS_YES_TRACE
 endif
+override CFLAGS += -g3
 override CFLAGS += -I.
 override CFLAGS += -std=c99 -Wall -pedantic
 override CFLAGS += -Wextra -Wshadow -Wjump-misses-init -Wundef
 
 ifdef VERBOSE
-override TESTFLAGS += -v
-override CODEFLAGS += -v
+override TESTFLAGS     += -v
+override CALLSFLAGS    += -v
+override CODEFLAGS     += -v
+override DATAFLAGS     += -v
+override STACKFLAGS    += -v
+override STRUCTSFLAGS  += -v
 override COVERAGEFLAGS += -v
 endif
 ifdef EXEC
 override TESTFLAGS += --exec="$(EXEC)"
 endif
+ifdef COVERAGE
+override TESTFLAGS += --coverage
+endif
 ifdef BUILDDIR
-override TESTFLAGS += --build-dir="$(BUILDDIR:/=)"
-override CODEFLAGS += --build-dir="$(BUILDDIR:/=)"
+override TESTFLAGS     += --build-dir="$(BUILDDIR:/=)"
+override CALLSFLAGS    += --build-dir="$(BUILDDIR:/=)"
+override CODEFLAGS     += --build-dir="$(BUILDDIR:/=)"
+override DATAFLAGS     += --build-dir="$(BUILDDIR:/=)"
+override STACKFLAGS    += --build-dir="$(BUILDDIR:/=)"
+override STRUCTSFLAGS  += --build-dir="$(BUILDDIR:/=)"
+override COVERAGEFLAGS += --build-dir="$(BUILDDIR:/=)"
 endif
 ifneq ($(NM),nm)
 override CODEFLAGS += --nm-tool="$(NM)"
+override DATAFLAGS += --nm-tool="$(NM)"
+endif
+ifneq ($(OBJDUMP),objdump)
+override STRUCTSFLAGS += --objdump-tool="$(OBJDUMP)"
 endif
 
 
@@ -73,9 +92,9 @@ size: $(OBJ)
 tags:
 	$(CTAGS) --totals --c-types=+p $(shell find -H -name '*.h') $(SRC)
 
-.PHONY: code
-code: $(OBJ)
-	./scripts/code.py $^ $(CODEFLAGS)
+.PHONY: calls
+calls: $(CGI)
+	./scripts/calls.py $^ $(CALLSFLAGS)
 
 .PHONY: test
 test:
@@ -84,9 +103,30 @@ test:
 test%: tests/test$$(firstword $$(subst \#, ,%)).toml
 	./scripts/test.py $@ $(TESTFLAGS)
 
+.PHONY: code
+code: $(OBJ)
+	./scripts/code.py $^ -S $(CODEFLAGS)
+
+.PHONY: data
+data: $(OBJ)
+	./scripts/data.py $^ -S $(DATAFLAGS)
+
+.PHONY: stack
+stack: $(CGI)
+	./scripts/stack.py $^ -S $(STACKFLAGS)
+
+.PHONY: structs
+structs: $(OBJ)
+	./scripts/structs.py $^ -S $(STRUCTSFLAGS)
+
 .PHONY: coverage
 coverage:
-	./scripts/coverage.py $(BUILDDIR)tests/*.toml.info $(COVERAGEFLAGS)
+	./scripts/coverage.py $(BUILDDIR)tests/*.toml.info -s $(COVERAGEFLAGS)
+
+.PHONY: summary
+summary: $(BUILDDIR)lfs.csv
+	./scripts/summary.py -Y $^ $(SUMMARYFLAGS)
+
 
 # rules
 -include $(DEP)
@@ -95,20 +135,39 @@ coverage:
 $(BUILDDIR)lfs: $(OBJ)
 	$(CC) $(CFLAGS) $^ $(LFLAGS) -o $@
 
-$(BUILDDIR)%.a: $(OBJ)
+$(BUILDDIR)lfs.a: $(OBJ)
 	$(AR) rcs $@ $^
 
+$(BUILDDIR)lfs.csv: $(OBJ) $(CGI)
+	./scripts/code.py $(OBJ) -q $(CODEFLAGS) -o $@
+	./scripts/data.py $(OBJ) -q -m $@ $(DATAFLAGS) -o $@
+	./scripts/stack.py $(CGI) -q -m $@ $(STACKFLAGS) -o $@
+	./scripts/structs.py $(OBJ) -q -m $@ $(STRUCTSFLAGS) -o $@
+	$(if $(COVERAGE),\
+		./scripts/coverage.py $(BUILDDIR)tests/*.toml.info \
+			-q -m $@ $(COVERAGEFLAGS) -o $@)
+
 $(BUILDDIR)%.o: %.c
 	$(CC) -c -MMD $(CFLAGS) $< -o $@
 
 $(BUILDDIR)%.s: %.c
 	$(CC) -S $(CFLAGS) $< -o $@
 
+# gcc depends on the output file for intermediate file names, so
+# we can't omit to .o output. We also need to serialize with the
+# normal .o rule because otherwise we can end up with multiprocess
+# problems with two instances of gcc modifying the same .o
+$(BUILDDIR)%.ci: %.c | $(BUILDDIR)%.o
+	$(CC) -c -MMD -fcallgraph-info=su $(CFLAGS) $< -o $|
+
 # clean everything
 .PHONY: clean
 clean:
-	rm -f $(TARGET)
+	rm -f $(BUILDDIR)lfs
+	rm -f $(BUILDDIR)lfs.a
+	rm -f $(BUILDDIR)lfs.csv
 	rm -f $(OBJ)
+	rm -f $(CGI)
 	rm -f $(DEP)
 	rm -f $(ASM)
 	rm -f $(BUILDDIR)tests/*.toml.*
diff --git a/lfs.c b/lfs.c
index 4a2f050c..65d2ff4a 100644
--- a/lfs.c
+++ b/lfs.c
@@ -8,9 +8,24 @@
 #include "lfs.h"
 #include "lfs_util.h"
 
+
+// some constants used throughout the code
 #define LFS_BLOCK_NULL ((lfs_block_t)-1)
 #define LFS_BLOCK_INLINE ((lfs_block_t)-2)
 
+enum {
+    LFS_OK_RELOCATED = 1,
+    LFS_OK_DROPPED   = 2,
+    LFS_OK_ORPHANED  = 3,
+};
+
+enum {
+    LFS_CMP_EQ = 0,
+    LFS_CMP_LT = 1,
+    LFS_CMP_GT = 2,
+};
+
+
 /// Caching block device operations ///
 
 static inline void lfs_cache_drop(lfs_t *lfs, lfs_cache_t *rcache) {
@@ -109,12 +124,6 @@ static int lfs_bd_read(lfs_t *lfs,
     return 0;
 }
 
-enum {
-    LFS_CMP_EQ = 0,
-    LFS_CMP_LT = 1,
-    LFS_CMP_GT = 2,
-};
-
 static int lfs_bd_cmp(lfs_t *lfs,
         const lfs_cache_t *pcache, lfs_cache_t *rcache, lfs_size_t hint,
         lfs_block_t block, lfs_off_t off,
@@ -472,13 +481,15 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
 static int lfs_dir_compact(lfs_t *lfs,
         lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount,
         lfs_mdir_t *source, uint16_t begin, uint16_t end);
-
+static lfs_ssize_t lfs_file_flushedwrite(lfs_t *lfs, lfs_file_t *file,
+        const void *buffer, lfs_size_t size);
 static lfs_ssize_t lfs_file_rawwrite(lfs_t *lfs, lfs_file_t *file,
         const void *buffer, lfs_size_t size);
 static int lfs_file_rawsync(lfs_t *lfs, lfs_file_t *file);
 static int lfs_file_outline(lfs_t *lfs, lfs_file_t *file);
 static int lfs_file_flush(lfs_t *lfs, lfs_file_t *file);
 
+static int lfs_fs_deorphan(lfs_t *lfs, bool powerloss);
 static int lfs_fs_preporphans(lfs_t *lfs, int8_t orphans);
 static void lfs_fs_prepmove(lfs_t *lfs,
         uint16_t id, const lfs_block_t pair[2]);
@@ -486,8 +497,6 @@ static int lfs_fs_pred(lfs_t *lfs, const lfs_block_t dir[2],
         lfs_mdir_t *pdir);
 static lfs_stag_t lfs_fs_parent(lfs_t *lfs, const lfs_block_t dir[2],
         lfs_mdir_t *parent);
-static int lfs_fs_relocate(lfs_t *lfs,
-        const lfs_block_t oldpair[2], lfs_block_t newpair[2]);
 static int lfs_fs_forceconsistency(lfs_t *lfs);
 #endif
 
@@ -498,6 +507,8 @@ static int lfs1_traverse(lfs_t *lfs,
 
 static int lfs_dir_rawrewind(lfs_t *lfs, lfs_dir_t *dir);
 
+static lfs_ssize_t lfs_file_flushedread(lfs_t *lfs, lfs_file_t *file,
+        void *buffer, lfs_size_t size);
 static lfs_ssize_t lfs_file_rawread(lfs_t *lfs, lfs_file_t *file,
         void *buffer, lfs_size_t size);
 static int lfs_file_rawclose(lfs_t *lfs, lfs_file_t *file);
@@ -738,6 +749,7 @@ static int lfs_dir_traverse_filter(void *p,
             (LFS_MKTAG(0x7ff, 0x3ff, 0) & tag) == (
                 LFS_MKTAG(LFS_TYPE_DELETE, 0, 0) |
                     (LFS_MKTAG(0, 0x3ff, 0) & *filtertag))) {
+        *filtertag = LFS_MKTAG(LFS_FROM_NOOP, 0, 0);
         return true;
     }
 
@@ -752,131 +764,228 @@ static int lfs_dir_traverse_filter(void *p,
 #endif
 
 #ifndef LFS_READONLY
+// maximum recursive depth of lfs_dir_traverse, the deepest call:
+//
+// traverse with commit
+// '-> traverse with filter
+//     '-> traverse with move
+//         ' traverse with filter
+//
+#define LFS_DIR_TRAVERSE_DEPTH 4
+
+struct lfs_dir_traverse {
+    const lfs_mdir_t *dir;
+    lfs_off_t off;
+    lfs_tag_t ptag;
+    const struct lfs_mattr *attrs;
+    int attrcount;
+
+    lfs_tag_t tmask;
+    lfs_tag_t ttag;
+    uint16_t begin;
+    uint16_t end;
+    int16_t diff;
+
+    int (*cb)(void *data, lfs_tag_t tag, const void *buffer);
+    void *data;
+
+    lfs_tag_t tag;
+    const void *buffer;
+    struct lfs_diskoff disk;
+};
+
 static int lfs_dir_traverse(lfs_t *lfs,
         const lfs_mdir_t *dir, lfs_off_t off, lfs_tag_t ptag,
         const struct lfs_mattr *attrs, int attrcount,
         lfs_tag_t tmask, lfs_tag_t ttag,
         uint16_t begin, uint16_t end, int16_t diff,
         int (*cb)(void *data, lfs_tag_t tag, const void *buffer), void *data) {
+    // This function in inherently recursive, but bounded. To allow tool-based
+    // analysis without unnecessary code-cost we use an explicit stack
+    struct lfs_dir_traverse stack[LFS_DIR_TRAVERSE_DEPTH-1];
+    unsigned sp = 0;
+    int res;
+
     // iterate over directory and attrs
+    lfs_tag_t tag;
+    const void *buffer;
+    struct lfs_diskoff disk;
     while (true) {
-        lfs_tag_t tag;
-        const void *buffer;
-        struct lfs_diskoff disk;
-        if (off+lfs_tag_dsize(ptag) < dir->off) {
-            off += lfs_tag_dsize(ptag);
-            int err = lfs_bd_read(lfs,
-                    NULL, &lfs->rcache, sizeof(tag),
-                    dir->pair[0], off, &tag, sizeof(tag));
-            if (err) {
-                return err;
-            }
-
-            tag = (lfs_frombe32(tag) ^ ptag) | 0x80000000;
-            disk.block = dir->pair[0];
-            disk.off = off+sizeof(lfs_tag_t);
-            buffer = &disk;
-            ptag = tag;
-        } else if (attrcount > 0) {
-            tag = attrs[0].tag;
-            buffer = attrs[0].buffer;
-            attrs += 1;
-            attrcount -= 1;
-        } else {
-            return 0;
-        }
-
-        lfs_tag_t mask = LFS_MKTAG(0x7ff, 0, 0);
-        if ((mask & tmask & tag) != (mask & tmask & ttag)) {
-            continue;
-        }
+        {
+            if (off+lfs_tag_dsize(ptag) < dir->off) {
+                off += lfs_tag_dsize(ptag);
+                int err = lfs_bd_read(lfs,
+                        NULL, &lfs->rcache, sizeof(tag),
+                        dir->pair[0], off, &tag, sizeof(tag));
+                if (err) {
+                    return err;
+                }
 
-        // do we need to filter? inlining the filtering logic here allows
-        // for some minor optimizations
-        if (lfs_tag_id(tmask) != 0) {
-            // scan for duplicates and update tag based on creates/deletes
-            int filter = lfs_dir_traverse(lfs,
-                    dir, off, ptag, attrs, attrcount,
-                    0, 0, 0, 0, 0,
-                    lfs_dir_traverse_filter, &tag);
-            if (filter < 0) {
-                return filter;
+                tag = (lfs_frombe32(tag) ^ ptag) | 0x80000000;
+                disk.block = dir->pair[0];
+                disk.off = off+sizeof(lfs_tag_t);
+                buffer = &disk;
+                ptag = tag;
+            } else if (attrcount > 0) {
+                tag = attrs[0].tag;
+                buffer = attrs[0].buffer;
+                attrs += 1;
+                attrcount -= 1;
+            } else {
+                // finished traversal, pop from stack?
+                res = 0;
+                break;
             }
 
-            if (filter) {
+            // do we need to filter?
+            lfs_tag_t mask = LFS_MKTAG(0x7ff, 0, 0);
+            if ((mask & tmask & tag) != (mask & tmask & ttag)) {
                 continue;
             }
 
-            // in filter range?
-            if (!(lfs_tag_id(tag) >= begin && lfs_tag_id(tag) < end)) {
+            if (lfs_tag_id(tmask) != 0) {
+                LFS_ASSERT(sp < LFS_DIR_TRAVERSE_DEPTH);
+                // recurse, scan for duplicates, and update tag based on
+                // creates/deletes
+                stack[sp] = (struct lfs_dir_traverse){
+                    .dir        = dir,
+                    .off        = off,
+                    .ptag       = ptag,
+                    .attrs      = attrs,
+                    .attrcount  = attrcount,
+                    .tmask      = tmask,
+                    .ttag       = ttag,
+                    .begin      = begin,
+                    .end        = end,
+                    .diff       = diff,
+                    .cb         = cb,
+                    .data       = data,
+                    .tag        = tag,
+                    .buffer     = buffer,
+                    .disk       = disk,
+                };
+                sp += 1;
+
+                dir = dir;
+                off = off;
+                ptag = ptag;
+                attrs = attrs;
+                attrcount = attrcount;
+                tmask = 0;
+                ttag = 0;
+                begin = 0;
+                end = 0;
+                diff = 0;
+                cb = lfs_dir_traverse_filter;
+                data = &stack[sp-1].tag;
                 continue;
             }
         }
 
+popped:
+        // in filter range?
+        if (lfs_tag_id(tmask) != 0 &&
+                !(lfs_tag_id(tag) >= begin && lfs_tag_id(tag) < end)) {
+            continue;
+        }
+
         // handle special cases for mcu-side operations
         if (lfs_tag_type3(tag) == LFS_FROM_NOOP) {
             // do nothing
         } else if (lfs_tag_type3(tag) == LFS_FROM_MOVE) {
-            uint16_t fromid = lfs_tag_size(tag);
-            uint16_t toid = lfs_tag_id(tag);
-            // There is a huge room for simple optimization for the rename case
-            // where we can see up to 4 levels of lfs_dir_traverse recursions
-            // when compaction happened (for example):
+            // Without this condition, lfs_dir_traverse can exhibit an
+            // extremely expensive O(n^3) of nested loops when renaming.
+            // This happens because lfs_dir_traverse tries to filter tags by
+            // the tags in the source directory, triggering a second
+            // lfs_dir_traverse with its own filter operation.
             //
-            // >lfs_dir_compact
-            //  [1] lfs_dir_traverse(cb=lfs_dir_commit_size)
-            //      - do 'duplicates and tag update'
-            //    [2] lfs_dir_traverse(cb=lfs_dir_traverse_filter, data=tag[1])
-            //        - Reaching a LFS_FROM_MOVE tag (here)
-            //      [3] lfs_dir_traverse(cb=lfs_dir_traverse_filter,
-            //                data=tag[1]) <= on 'from' dir
-            //          - do 'duplicates and tag update'
-            //        [4] lfs_dir_traverse(cb=lfs_dir_traverse_filter,
-            //                    data=tag[3])
+            // However we don't actually care about filtering the second set of
+            // tags, since duplicate tags have no effect when filtering.
             //
-            // Yet, for LFS_FROM_MOVE when cb == lfs_dir_traverse_filter
-            // traverse [3] and [4] don't do anything:
-            // - if [2] is supposed to match 'toid' for duplication, a preceding
-            //   ERASE or CREATE with the same tag id will already have stopped
-            //   the search.
-            // - if [2] is here to update tag value of CREATE/DELETE attr found
-            //   during the scan, since [3] is looking for LFS_TYPE_STRUCT only
-            //   and call lfs_dir_traverse_filter with LFS_TYPE_STRUCT attr
-            //   wheras lfs_dir_traverse_filter only modify tag on CREATE or
-            //   DELETE. Consequently, cb called from [4] will never stop the
-            //   search from [2].
-            // - [4] may call lfs_dir_traverse_filter, but with action on a
-            //   tag[3] pointer completely different from tag[1]
+            // This check skips this unnecessary recursive filtering explicitly,
+            // reducing this runtime from O(n^3) to O(n^2).
             if (cb == lfs_dir_traverse_filter) {
                 continue;
             }
 
-            // note: buffer = oldcwd dir
-            int err = lfs_dir_traverse(lfs,
-                    buffer, 0, 0xffffffff, NULL, 0,
-                    LFS_MKTAG(0x600, 0x3ff, 0),
-                    LFS_MKTAG(LFS_TYPE_STRUCT, 0, 0),
-                    fromid, fromid+1, toid-fromid+diff,
-                    cb, data);
-            if (err) {
-                return err;
-            }
+            // recurse into move
+            stack[sp] = (struct lfs_dir_traverse){
+                .dir        = dir,
+                .off        = off,
+                .ptag       = ptag,
+                .attrs      = attrs,
+                .attrcount  = attrcount,
+                .tmask      = tmask,
+                .ttag       = ttag,
+                .begin      = begin,
+                .end        = end,
+                .diff       = diff,
+                .cb         = cb,
+                .data       = data,
+                .tag        = LFS_MKTAG(LFS_FROM_NOOP, 0, 0),
+            };
+            sp += 1;
+
+            uint16_t fromid = lfs_tag_size(tag);
+            uint16_t toid = lfs_tag_id(tag);
+            dir = buffer;
+            off = 0;
+            ptag = 0xffffffff;
+            attrs = NULL;
+            attrcount = 0;
+            tmask = LFS_MKTAG(0x600, 0x3ff, 0);
+            ttag = LFS_MKTAG(LFS_TYPE_STRUCT, 0, 0);
+            begin = fromid;
+            end = fromid+1;
+            diff = toid-fromid+diff;
         } else if (lfs_tag_type3(tag) == LFS_FROM_USERATTRS) {
             for (unsigned i = 0; i < lfs_tag_size(tag); i++) {
                 const struct lfs_attr *a = buffer;
-                int err = cb(data, LFS_MKTAG(LFS_TYPE_USERATTR + a[i].type,
+                res = cb(data, LFS_MKTAG(LFS_TYPE_USERATTR + a[i].type,
                         lfs_tag_id(tag) + diff, a[i].size), a[i].buffer);
-                if (err) {
-                    return err;
+                if (res < 0) {
+                    return res;
+                }
+
+                if (res) {
+                    break;
                 }
             }
         } else {
-            int err = cb(data, tag + LFS_MKTAG(0, diff, 0), buffer);
-            if (err) {
-                return err;
+            res = cb(data, tag + LFS_MKTAG(0, diff, 0), buffer);
+            if (res < 0) {
+                return res;
+            }
+
+            if (res) {
+                break;
             }
         }
     }
+
+    if (sp > 0) {
+        // pop from the stack and return, fortunately all pops share
+        // a destination
+        dir         = stack[sp-1].dir;
+        off         = stack[sp-1].off;
+        ptag        = stack[sp-1].ptag;
+        attrs       = stack[sp-1].attrs;
+        attrcount   = stack[sp-1].attrcount;
+        tmask       = stack[sp-1].tmask;
+        ttag        = stack[sp-1].ttag;
+        begin       = stack[sp-1].begin;
+        end         = stack[sp-1].end;
+        diff        = stack[sp-1].diff;
+        cb          = stack[sp-1].cb;
+        data        = stack[sp-1].data;
+        tag         = stack[sp-1].tag;
+        buffer      = stack[sp-1].buffer;
+        disk        = stack[sp-1].disk;
+        sp -= 1;
+        goto popped;
+    } else {
+        return res;
+    }
 }
 #endif
 
@@ -1553,7 +1662,7 @@ static int lfs_dir_drop(lfs_t *lfs, lfs_mdir_t *dir, lfs_mdir_t *tail) {
 static int lfs_dir_split(lfs_t *lfs,
         lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount,
         lfs_mdir_t *source, uint16_t split, uint16_t end) {
-    // create tail directory
+    // create tail metadata pair
     lfs_mdir_t tail;
     int err = lfs_dir_alloc(lfs, &tail);
     if (err) {
@@ -1564,9 +1673,10 @@ static int lfs_dir_split(lfs_t *lfs,
     tail.tail[0] = dir->tail[0];
     tail.tail[1] = dir->tail[1];
 
-    err = lfs_dir_compact(lfs, &tail, attrs, attrcount, source, split, end);
-    if (err) {
-        return err;
+    // note we don't care about LFS_OK_RELOCATED
+    int res = lfs_dir_compact(lfs, &tail, attrs, attrcount, source, split, end);
+    if (res < 0) {
+        return res;
     }
 
     dir->tail[0] = tail.pair[0];
@@ -1607,107 +1717,45 @@ static int lfs_dir_commit_commit(void *p, lfs_tag_t tag, const void *buffer) {
 }
 #endif
 
+#ifndef LFS_READONLY
+static bool lfs_dir_needsrelocation(lfs_t *lfs, lfs_mdir_t *dir) {
+    // If our revision count == n * block_cycles, we should force a relocation,
+    // this is how littlefs wear-levels at the metadata-pair level. Note that we
+    // actually use (block_cycles+1)|1, this is to avoid two corner cases:
+    // 1. block_cycles = 1, which would prevent relocations from terminating
+    // 2. block_cycles = 2n, which, due to aliasing, would only ever relocate
+    //    one metadata block in the pair, effectively making this useless
+    return (lfs->cfg->block_cycles > 0
+            && ((dir->rev + 1) % ((lfs->cfg->block_cycles+1)|1) == 0));
+}
+#endif
+
 #ifndef LFS_READONLY
 static int lfs_dir_compact(lfs_t *lfs,
         lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount,
         lfs_mdir_t *source, uint16_t begin, uint16_t end) {
     // save some state in case block is bad
-    const lfs_block_t oldpair[2] = {dir->pair[0], dir->pair[1]};
     bool relocated = false;
-    bool tired = false;
-
-    // should we split?
-    while (end - begin > 1) {
-        // find size
-        lfs_size_t size = 0;
-        int err = lfs_dir_traverse(lfs,
-                source, 0, 0xffffffff, attrs, attrcount,
-                LFS_MKTAG(0x400, 0x3ff, 0),
-                LFS_MKTAG(LFS_TYPE_NAME, 0, 0),
-                begin, end, -begin,
-                lfs_dir_commit_size, &size);
-        if (err) {
-            return err;
-        }
-
-        // space is complicated, we need room for tail, crc, gstate,
-        // cleanup delete, and we cap at half a block to give room
-        // for metadata updates.
-        if (end - begin < 0xff &&
-                size <= lfs_min(lfs->cfg->block_size - 36,
-                    lfs_alignup((lfs->cfg->metadata_max ?
-                            lfs->cfg->metadata_max : lfs->cfg->block_size)/2,
-                        lfs->cfg->prog_size))) {
-            break;
-        }
-
-        // can't fit, need to split, we should really be finding the
-        // largest size that fits with a small binary search, but right now
-        // it's not worth the code size
-        uint16_t split = (end - begin) / 2;
-        err = lfs_dir_split(lfs, dir, attrs, attrcount,
-                source, begin+split, end);
-        if (err) {
-            // if we fail to split, we may be able to overcompact, unless
-            // we're too big for even the full block, in which case our
-            // only option is to error
-            if (err == LFS_ERR_NOSPC && size <= lfs->cfg->block_size - 36) {
-                break;
-            }
-            return err;
-        }
-
-        end = begin + split;
-    }
+    bool tired = lfs_dir_needsrelocation(lfs, dir);
 
     // increment revision count
     dir->rev += 1;
-    // If our revision count == n * block_cycles, we should force a relocation,
-    // this is how littlefs wear-levels at the metadata-pair level. Note that we
-    // actually use (block_cycles+1)|1, this is to avoid two corner cases:
-    // 1. block_cycles = 1, which would prevent relocations from terminating
-    // 2. block_cycles = 2n, which, due to aliasing, would only ever relocate
-    //    one metadata block in the pair, effectively making this useless
-    if (lfs->cfg->block_cycles > 0 &&
-            (dir->rev % ((lfs->cfg->block_cycles+1)|1) == 0)) {
-        if (lfs_pair_cmp(dir->pair, (const lfs_block_t[2]){0, 1}) == 0) {
-            // oh no! we're writing too much to the superblock,
-            // should we expand?
-            lfs_ssize_t res = lfs_fs_rawsize(lfs);
-            if (res < 0) {
-                return res;
-            }
-
-            // do we have extra space? littlefs can't reclaim this space
-            // by itself, so expand cautiously
-            if ((lfs_size_t)res < lfs->cfg->block_count/2) {
-                LFS_DEBUG("Expanding superblock at rev %"PRIu32, dir->rev);
-                int err = lfs_dir_split(lfs, dir, attrs, attrcount,
-                        source, begin, end);
-                if (err && err != LFS_ERR_NOSPC) {
-                    return err;
-                }
 
-                // welp, we tried, if we ran out of space there's not much
-                // we can do, we'll error later if we've become frozen
-                if (!err) {
-                    end = begin;
-                }
-            }
+    // do not proactively relocate blocks during migrations, this
+    // can cause a number of failure states such: clobbering the
+    // v1 superblock if we relocate root, and invalidating directory
+    // pointers if we relocate the head of a directory. On top of
+    // this, relocations increase the overall complexity of
+    // lfs_migration, which is already a delicate operation.
 #ifdef LFS_MIGRATE
-        } else if (lfs->lfs1) {
-            // do not proactively relocate blocks during migrations, this
-            // can cause a number of failure states such: clobbering the
-            // v1 superblock if we relocate root, and invalidating directory
-            // pointers if we relocate the head of a directory. On top of
-            // this, relocations increase the overall complexity of
-            // lfs_migration, which is already a delicate operation.
+    if (lfs->lfs1) {
+        tired = false;
+    }
 #endif
-        } else {
-            // we're writing too much, time to relocate
-            tired = true;
-            goto relocate;
-        }
+
+    if (tired && lfs_pair_cmp(dir->pair, (const lfs_block_t[2]){0, 1}) != 0) {
+        // we're writing too much, time to relocate
+        goto relocate;
     }
 
     // begin loop to commit compaction to blocks until a compact sticks
@@ -1851,44 +1899,114 @@ static int lfs_dir_compact(lfs_t *lfs,
         continue;
     }
 
-    if (relocated) {
-        // update references if we relocated
-        LFS_DEBUG("Relocating {0x%"PRIx32", 0x%"PRIx32"} "
-                    "-> {0x%"PRIx32", 0x%"PRIx32"}",
-                oldpair[0], oldpair[1], dir->pair[0], dir->pair[1]);
-        int err = lfs_fs_relocate(lfs, oldpair, dir->pair);
-        if (err) {
-            return err;
-        }
-    }
-
-    return 0;
+    return relocated ? LFS_OK_RELOCATED : 0;
 }
 #endif
 
 #ifndef LFS_READONLY
-static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
-        const struct lfs_mattr *attrs, int attrcount) {
-    // check for any inline files that aren't RAM backed and
-    // forcefully evict them, needed for filesystem consistency
-    for (lfs_file_t *f = (lfs_file_t*)lfs->mlist; f; f = f->next) {
-        if (dir != &f->m && lfs_pair_cmp(f->m.pair, dir->pair) == 0 &&
-                f->type == LFS_TYPE_REG && (f->flags & LFS_F_INLINE) &&
-                f->ctz.size > lfs->cfg->cache_size) {
-            int err = lfs_file_outline(lfs, f);
+static int lfs_dir_splittingcompact(lfs_t *lfs, lfs_mdir_t *dir,
+        const struct lfs_mattr *attrs, int attrcount,
+        lfs_mdir_t *source, uint16_t begin, uint16_t end) {
+    while (true) {
+        // find size of first split, we do this by halving the split until
+        // the metadata is guaranteed to fit
+        //
+        // Note that this isn't a true binary search, we never increase the
+        // split size. This may result in poorly distributed metadata but isn't
+        // worth the extra code size or performance hit to fix.
+        lfs_size_t split = begin;
+        while (end - split > 1) {
+            lfs_size_t size = 0;
+            int err = lfs_dir_traverse(lfs,
+                    source, 0, 0xffffffff, attrs, attrcount,
+                    LFS_MKTAG(0x400, 0x3ff, 0),
+                    LFS_MKTAG(LFS_TYPE_NAME, 0, 0),
+                    split, end, -split,
+                    lfs_dir_commit_size, &size);
             if (err) {
                 return err;
             }
 
-            err = lfs_file_flush(lfs, f);
-            if (err) {
+            // space is complicated, we need room for tail, crc, gstate,
+            // cleanup delete, and we cap at half a block to give room
+            // for metadata updates.
+            if (end - split < 0xff
+                    && size <= lfs_min(lfs->cfg->block_size - 36,
+                        lfs_alignup(
+                            (lfs->cfg->metadata_max
+                                ? lfs->cfg->metadata_max
+                                : lfs->cfg->block_size)/2,
+                            lfs->cfg->prog_size))) {
+                break;
+            }
+
+            split = split + ((end - split) / 2);
+        }
+
+        if (split == begin) {
+            // no split needed
+            break;
+        }
+
+        // split into two metadata pairs and continue
+        int err = lfs_dir_split(lfs, dir, attrs, attrcount,
+                source, split, end);
+        if (err && err != LFS_ERR_NOSPC) {
+            return err;
+        }
+
+        if (err) {
+            // we can't allocate a new block, try to compact with degraded
+            // performance
+            LFS_WARN("Unable to split {0x%"PRIx32", 0x%"PRIx32"}",
+                    dir->pair[0], dir->pair[1]);
+            break;
+        } else {
+            end = split;
+        }
+    }
+
+    if (lfs_dir_needsrelocation(lfs, dir)
+            && lfs_pair_cmp(dir->pair, (const lfs_block_t[2]){0, 1}) == 0) {
+        // oh no! we're writing too much to the superblock,
+        // should we expand?
+        lfs_ssize_t size = lfs_fs_rawsize(lfs);
+        if (size < 0) {
+            return size;
+        }
+
+        // do we have extra space? littlefs can't reclaim this space
+        // by itself, so expand cautiously
+        if ((lfs_size_t)size < lfs->cfg->block_count/2) {
+            LFS_DEBUG("Expanding superblock at rev %"PRIu32, dir->rev);
+            int err = lfs_dir_split(lfs, dir, attrs, attrcount,
+                    source, begin, end);
+            if (err && err != LFS_ERR_NOSPC) {
                 return err;
             }
+
+            if (err) {
+                // welp, we tried, if we ran out of space there's not much
+                // we can do, we'll error later if we've become frozen
+                LFS_WARN("Unable to expand superblock");
+            } else {
+                end = begin;
+            }
         }
     }
 
+    return lfs_dir_compact(lfs, dir, attrs, attrcount, source, begin, end);
+}
+#endif
+
+#ifndef LFS_READONLY
+static int lfs_dir_relocatingcommit(lfs_t *lfs, lfs_mdir_t *dir,
+        const lfs_block_t pair[2],
+        const struct lfs_mattr *attrs, int attrcount,
+        lfs_mdir_t *pdir) {
+    int state = 0;
+
     // calculate changes to the directory
-    lfs_mdir_t olddir = *dir;
     bool hasdelete = false;
     for (int i = 0; i < attrcount; i++) {
         if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_CREATE) {
@@ -1907,23 +2025,19 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
 
     // should we actually drop the directory block?
     if (hasdelete && dir->count == 0) {
-        lfs_mdir_t pdir;
-        int err = lfs_fs_pred(lfs, dir->pair, &pdir);
+        LFS_ASSERT(pdir);
+        int err = lfs_fs_pred(lfs, dir->pair, pdir);
         if (err && err != LFS_ERR_NOENT) {
-            *dir = olddir;
             return err;
         }
 
-        if (err != LFS_ERR_NOENT && pdir.split) {
-            err = lfs_dir_drop(lfs, &pdir, dir);
-            if (err) {
-                *dir = olddir;
-                return err;
-            }
+        if (err != LFS_ERR_NOENT && pdir->split) {
+            state = LFS_OK_DROPPED;
+            goto fixmlist;
         }
     }
 
-    if (dir->erased || dir->count >= 0xff) {
+    if (dir->erased) {
         // try to commit
         struct lfs_commit commit = {
             .block = dir->pair[0],
@@ -1948,7 +2062,6 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
             if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) {
                 goto compact;
             }
-            *dir = olddir;
             return err;
         }
 
@@ -1961,7 +2074,6 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
         if (!lfs_gstate_iszero(&delta)) {
             err = lfs_dir_getgstate(lfs, dir, &delta);
             if (err) {
-                *dir = olddir;
                 return err;
             }
 
@@ -1973,7 +2085,6 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
                 if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) {
                     goto compact;
                 }
-                *dir = olddir;
                 return err;
             }
         }
@@ -1984,7 +2095,6 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
             if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) {
                 goto compact;
             }
-            *dir = olddir;
             return err;
         }
 
@@ -1995,19 +2105,23 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
         // and update gstate
         lfs->gdisk = lfs->gstate;
         lfs->gdelta = (lfs_gstate_t){0};
-    } else {
+
+        goto fixmlist;
+    }
+
 compact:
-        // fall back to compaction
-        lfs_cache_drop(lfs, &lfs->pcache);
+    // fall back to compaction
+    lfs_cache_drop(lfs, &lfs->pcache);
 
-        int err = lfs_dir_compact(lfs, dir, attrs, attrcount,
-                dir, 0, dir->count);
-        if (err) {
-            *dir = olddir;
-            return err;
-        }
+    state = lfs_dir_splittingcompact(lfs, dir, attrs, attrcount,
+            dir, 0, dir->count);
+    if (state < 0) {
+        return state;
     }
 
+    goto fixmlist;
+
+fixmlist:;
     // this complicated bit of logic is for fixing up any active
     // metadata-pairs that we may have affected
     //
@@ -2015,33 +2129,32 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
     // lfs_dir_commit could also be in this list, and even then
     // we need to copy the pair so they don't get clobbered if we refetch
     // our mdir.
+    lfs_block_t oldpair[2] = {pair[0], pair[1]};
     for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) {
-        if (&d->m != dir && lfs_pair_cmp(d->m.pair, olddir.pair) == 0) {
+        if (lfs_pair_cmp(d->m.pair, oldpair) == 0) {
             d->m = *dir;
-            for (int i = 0; i < attrcount; i++) {
-                if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE &&
-                        d->id == lfs_tag_id(attrs[i].tag)) {
-                    d->m.pair[0] = LFS_BLOCK_NULL;
-                    d->m.pair[1] = LFS_BLOCK_NULL;
-                } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE &&
-                        d->id > lfs_tag_id(attrs[i].tag)) {
-                    d->id -= 1;
-                    if (d->type == LFS_TYPE_DIR) {
-                        ((lfs_dir_t*)d)->pos -= 1;
-                    }
-                } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_CREATE &&
-                        d->id >= lfs_tag_id(attrs[i].tag)) {
-                    d->id += 1;
-                    if (d->type == LFS_TYPE_DIR) {
-                        ((lfs_dir_t*)d)->pos += 1;
+            if (d->m.pair != pair) {
+                for (int i = 0; i < attrcount; i++) {
+                    if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE &&
+                            d->id == lfs_tag_id(attrs[i].tag)) {
+                        d->m.pair[0] = LFS_BLOCK_NULL;
+                        d->m.pair[1] = LFS_BLOCK_NULL;
+                    } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE &&
+                            d->id > lfs_tag_id(attrs[i].tag)) {
+                        d->id -= 1;
+                        if (d->type == LFS_TYPE_DIR) {
+                            ((lfs_dir_t*)d)->pos -= 1;
+                        }
+                    } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_CREATE &&
+                            d->id >= lfs_tag_id(attrs[i].tag)) {
+                        d->id += 1;
+                        if (d->type == LFS_TYPE_DIR) {
+                            ((lfs_dir_t*)d)->pos += 1;
+                        }
                     }
                 }
             }
-        }
-    }
 
-    for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) {
-        if (lfs_pair_cmp(d->m.pair, olddir.pair) == 0) {
             while (d->id >= d->m.count && d->m.split) {
                 // we split and id is on tail now
                 d->id -= d->m.count;
@@ -2053,6 +2166,221 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
         }
     }
 
+    return state;
+}
+#endif
+
+#ifndef LFS_READONLY
+static int lfs_dir_orphaningcommit(lfs_t *lfs, lfs_mdir_t *dir,
+        const struct lfs_mattr *attrs, int attrcount) {
+    // check for any inline files that aren't RAM backed and
+    // forcefully evict them, needed for filesystem consistency
+    for (lfs_file_t *f = (lfs_file_t*)lfs->mlist; f; f = f->next) {
+        if (dir != &f->m && lfs_pair_cmp(f->m.pair, dir->pair) == 0 &&
+                f->type == LFS_TYPE_REG && (f->flags & LFS_F_INLINE) &&
+                f->ctz.size > lfs->cfg->cache_size) {
+            int err = lfs_file_outline(lfs, f);
+            if (err) {
+                return err;
+            }
+
+            err = lfs_file_flush(lfs, f);
+            if (err) {
+                return err;
+            }
+        }
+    }
+
+    lfs_block_t lpair[2] = {dir->pair[0], dir->pair[1]};
+    lfs_mdir_t ldir = *dir;
+    lfs_mdir_t pdir;
+    int state = lfs_dir_relocatingcommit(lfs, &ldir, dir->pair,
+            attrs, attrcount, &pdir);
+    if (state < 0) {
+        return state;
+    }
+
+    // update if we're not in mlist, note we may have already been
+    // updated if we are in mlist
+    if (lfs_pair_cmp(dir->pair, lpair) == 0) {
+        *dir = ldir;
+    }
+
+    // commit was successful, but may require other changes in the
+    // filesystem, these would normally be tail recursive, but we have
+    // flattened them here avoid unbounded stack usage
+
+    // need to drop?
+    if (state == LFS_OK_DROPPED) {
+        // steal state
+        int err = lfs_dir_getgstate(lfs, dir, &lfs->gdelta);
+        if (err) {
+            return err;
+        }
+
+        // steal tail, note that this can't create a recursive drop
+        lpair[0] = pdir.pair[0];
+        lpair[1] = pdir.pair[1];
+        lfs_pair_tole32(dir->tail);
+        state = lfs_dir_relocatingcommit(lfs, &pdir, lpair, LFS_MKATTRS(
+                    {LFS_MKTAG(LFS_TYPE_TAIL + dir->split, 0x3ff, 8),
+                        dir->tail}),
+                NULL);
+        lfs_pair_fromle32(dir->tail);
+        if (state < 0) {
+            return state;
+        }
+
+        ldir = pdir;
+    }
+
+    // need to relocate?
+    bool orphans = false;
+    while (state == LFS_OK_RELOCATED) {
+        LFS_DEBUG("Relocating {0x%"PRIx32", 0x%"PRIx32"} "
+                    "-> {0x%"PRIx32", 0x%"PRIx32"}",
+                lpair[0], lpair[1], ldir.pair[0], ldir.pair[1]);
+        state = 0;
+
+        // update internal root
+        if (lfs_pair_cmp(lpair, lfs->root) == 0) {
+            lfs->root[0] = ldir.pair[0];
+            lfs->root[1] = ldir.pair[1];
+        }
+
+        // update internally tracked dirs
+        for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) {
+            if (lfs_pair_cmp(lpair, d->m.pair) == 0) {
+                d->m.pair[0] = ldir.pair[0];
+                d->m.pair[1] = ldir.pair[1];
+            }
+
+            if (d->type == LFS_TYPE_DIR &&
+                    lfs_pair_cmp(lpair, ((lfs_dir_t*)d)->head) == 0) {
+                ((lfs_dir_t*)d)->head[0] = ldir.pair[0];
+                ((lfs_dir_t*)d)->head[1] = ldir.pair[1];
+            }
+        }
+
+        // find parent
+        lfs_stag_t tag = lfs_fs_parent(lfs, lpair, &pdir);
+        if (tag < 0 && tag != LFS_ERR_NOENT) {
+            return tag;
+        }
+
+        bool hasparent = (tag != LFS_ERR_NOENT);
+        if (tag != LFS_ERR_NOENT) {
+            // note that if we have a parent, we must have a pred, so this will
+            // always create an orphan
+            int err = lfs_fs_preporphans(lfs, +1);
+            if (err) {
+                return err;
+            }
+
+            // fix pending move in this pair? this looks like an optimization but
+            // is in fact _required_ since relocating may outdate the move.
+            uint16_t moveid = 0x3ff;
+            if (lfs_gstate_hasmovehere(&lfs->gstate, pdir.pair)) {
+                moveid = lfs_tag_id(lfs->gstate.tag);
+                LFS_DEBUG("Fixing move while relocating "
+                        "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n",
+                        pdir.pair[0], pdir.pair[1], moveid);
+                lfs_fs_prepmove(lfs, 0x3ff, NULL);
+                if (moveid < lfs_tag_id(tag)) {
+                    tag -= LFS_MKTAG(0, 1, 0);
+                }
+            }
+
+            lfs_block_t ppair[2] = {pdir.pair[0], pdir.pair[1]};
+            lfs_pair_tole32(ldir.pair);
+            state = lfs_dir_relocatingcommit(lfs, &pdir, ppair, LFS_MKATTRS(
+                        {LFS_MKTAG_IF(moveid != 0x3ff,
+                            LFS_TYPE_DELETE, moveid, 0), NULL},
+                        {tag, ldir.pair}),
+                    NULL);
+            lfs_pair_fromle32(ldir.pair);
+            if (state < 0) {
+                return state;
+            }
+
+            if (state == LFS_OK_RELOCATED) {
+                lpair[0] = ppair[0];
+                lpair[1] = ppair[1];
+                ldir = pdir;
+                orphans = true;
+                continue;
+            }
+        }
+
+        // find pred
+        int err = lfs_fs_pred(lfs, lpair, &pdir);
+        if (err && err != LFS_ERR_NOENT) {
+            return err;
+        }
+        LFS_ASSERT(!(hasparent && err == LFS_ERR_NOENT));
+
+        // if we can't find dir, it must be new
+        if (err != LFS_ERR_NOENT) {
+            if (lfs_gstate_hasorphans(&lfs->gstate)) {
+                // next step, clean up orphans
+                err = lfs_fs_preporphans(lfs, -hasparent);
+                if (err) {
+                    return err;
+                }
+            }
+
+            // fix pending move in this pair? this looks like an optimization
+            // but is in fact _required_ since relocating may outdate the move.
+            uint16_t moveid = 0x3ff;
+            if (lfs_gstate_hasmovehere(&lfs->gstate, pdir.pair)) {
+                moveid = lfs_tag_id(lfs->gstate.tag);
+                LFS_DEBUG("Fixing move while relocating "
+                        "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n",
+                        pdir.pair[0], pdir.pair[1], moveid);
+                lfs_fs_prepmove(lfs, 0x3ff, NULL);
+            }
+
+            // replace bad pair, either we clean up desync, or no desync occured
+            lpair[0] = pdir.pair[0];
+            lpair[1] = pdir.pair[1];
+            lfs_pair_tole32(ldir.pair);
+            state = lfs_dir_relocatingcommit(lfs, &pdir, lpair, LFS_MKATTRS(
+                        {LFS_MKTAG_IF(moveid != 0x3ff,
+                            LFS_TYPE_DELETE, moveid, 0), NULL},
+                        {LFS_MKTAG(LFS_TYPE_TAIL + pdir.split, 0x3ff, 8),
+                            ldir.pair}),
+                    NULL);
+            lfs_pair_fromle32(ldir.pair);
+            if (state < 0) {
+                return state;
+            }
+
+            ldir = pdir;
+        }
+    }
+
+    return orphans ? LFS_OK_ORPHANED : 0;
+}
+#endif
+
+#ifndef LFS_READONLY
+static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir,
+        const struct lfs_mattr *attrs, int attrcount) {
+    int orphans = lfs_dir_orphaningcommit(lfs, dir, attrs, attrcount);
+    if (orphans < 0) {
+        return orphans;
+    }
+
+    if (orphans) {
+        // make sure we've removed all orphans, this is a noop if there
+        // are none, but if we had nested blocks failures we may have
+        // created some
+        int err = lfs_fs_deorphan(lfs, false);
+        if (err) {
+            return err;
+        }
+    }
+
     return 0;
 }
 #endif
@@ -2107,7 +2435,7 @@ static int lfs_rawmkdir(lfs_t *lfs, const char *path) {
         return err;
     }
 
-    // current block end of list?
+    // current block not end of list?
     if (cwd.m.split) {
         // update tails, this creates a desync
         err = lfs_fs_preporphans(lfs, +1);
@@ -2804,12 +3132,12 @@ static int lfs_file_flush(lfs_t *lfs, lfs_file_t *file) {
                 // copy over a byte at a time, leave it up to caching
                 // to make this efficient
                 uint8_t data;
-                lfs_ssize_t res = lfs_file_rawread(lfs, &orig, &data, 1);
+                lfs_ssize_t res = lfs_file_flushedread(lfs, &orig, &data, 1);
                 if (res < 0) {
                     return res;
                 }
 
-                res = lfs_file_rawwrite(lfs, file, &data, 1);
+                res = lfs_file_flushedwrite(lfs, file, &data, 1);
                 if (res < 0) {
                     return res;
                 }
@@ -2910,23 +3238,11 @@ static int lfs_file_rawsync(lfs_t *lfs, lfs_file_t *file) {
 }
 #endif
 
-static lfs_ssize_t lfs_file_rawread(lfs_t *lfs, lfs_file_t *file,
+static lfs_ssize_t lfs_file_flushedread(lfs_t *lfs, lfs_file_t *file,
         void *buffer, lfs_size_t size) {
-    LFS_ASSERT((file->flags & LFS_O_RDONLY) == LFS_O_RDONLY);
-
     uint8_t *data = buffer;
     lfs_size_t nsize = size;
 
-#ifndef LFS_READONLY
-    if (file->flags & LFS_F_WRITING) {
-        // flush out any writes
-        int err = lfs_file_flush(lfs, file);
-        if (err) {
-            return err;
-        }
-    }
-#endif
-
     if (file->pos >= file->ctz.size) {
         // eof if past end
         return 0;
@@ -2983,43 +3299,29 @@ static lfs_ssize_t lfs_file_rawread(lfs_t *lfs, lfs_file_t *file,
     return size;
 }
 
-#ifndef LFS_READONLY
-static lfs_ssize_t lfs_file_rawwrite(lfs_t *lfs, lfs_file_t *file,
-        const void *buffer, lfs_size_t size) {
-    LFS_ASSERT((file->flags & LFS_O_WRONLY) == LFS_O_WRONLY);
-
-    const uint8_t *data = buffer;
-    lfs_size_t nsize = size;
+static lfs_ssize_t lfs_file_rawread(lfs_t *lfs, lfs_file_t *file,
+        void *buffer, lfs_size_t size) {
+    LFS_ASSERT((file->flags & LFS_O_RDONLY) == LFS_O_RDONLY);
 
-    if (file->flags & LFS_F_READING) {
-        // drop any reads
+#ifndef LFS_READONLY
+    if (file->flags & LFS_F_WRITING) {
+        // flush out any writes
         int err = lfs_file_flush(lfs, file);
         if (err) {
             return err;
         }
     }
+#endif
 
-    if ((file->flags & LFS_O_APPEND) && file->pos < file->ctz.size) {
-        file->pos = file->ctz.size;
-    }
-
-    if (file->pos + size > lfs->file_max) {
-        // Larger than file limit?
-        return LFS_ERR_FBIG;
-    }
+    return lfs_file_flushedread(lfs, file, buffer, size);
+}
 
-    if (!(file->flags & LFS_F_WRITING) && file->pos > file->ctz.size) {
-        // fill with zeros
-        lfs_off_t pos = file->pos;
-        file->pos = file->ctz.size;
 
-        while (file->pos < pos) {
-            lfs_ssize_t res = lfs_file_rawwrite(lfs, file, &(uint8_t){0}, 1);
-            if (res < 0) {
-                return res;
-            }
-        }
-    }
+#ifndef LFS_READONLY
+static lfs_ssize_t lfs_file_flushedwrite(lfs_t *lfs, lfs_file_t *file,
+        const void *buffer, lfs_size_t size) {
+    const uint8_t *data = buffer;
+    lfs_size_t nsize = size;
 
     if ((file->flags & LFS_F_INLINE) &&
             lfs_max(file->pos+nsize, file->ctz.size) >
@@ -3101,9 +3403,51 @@ static lfs_ssize_t lfs_file_rawwrite(lfs_t *lfs, lfs_file_t *file,
         lfs_alloc_ack(lfs);
     }
 
-    file->flags &= ~LFS_F_ERRED;
     return size;
 }
+
+static lfs_ssize_t lfs_file_rawwrite(lfs_t *lfs, lfs_file_t *file,
+        const void *buffer, lfs_size_t size) {
+    LFS_ASSERT((file->flags & LFS_O_WRONLY) == LFS_O_WRONLY);
+
+    if (file->flags & LFS_F_READING) {
+        // drop any reads
+        int err = lfs_file_flush(lfs, file);
+        if (err) {
+            return err;
+        }
+    }
+
+    if ((file->flags & LFS_O_APPEND) && file->pos < file->ctz.size) {
+        file->pos = file->ctz.size;
+    }
+
+    if (file->pos + size > lfs->file_max) {
+        // Larger than file limit?
+        return LFS_ERR_FBIG;
+    }
+
+    if (!(file->flags & LFS_F_WRITING) && file->pos > file->ctz.size) {
+        // fill with zeros
+        lfs_off_t pos = file->pos;
+        file->pos = file->ctz.size;
+
+        while (file->pos < pos) {
+            lfs_ssize_t res = lfs_file_flushedwrite(lfs, file, &(uint8_t){0}, 1);
+            if (res < 0) {
+                return res;
+            }
+        }
+    }
+
+    lfs_ssize_t nsize = lfs_file_flushedwrite(lfs, file, buffer, size);
+    if (nsize < 0) {
+        return nsize;
+    }
+
+    file->flags &= ~LFS_F_ERRED;
+    return nsize;
+}
 #endif
 
 static lfs_soff_t lfs_file_rawseek(lfs_t *lfs, lfs_file_t *file,
@@ -3456,7 +3800,8 @@ static int lfs_rawrename(lfs_t *lfs, const char *oldpath, const char *newpath) {
     }
 
     lfs->mlist = prevdir.next;
-    if (prevtag != LFS_ERR_NOENT && lfs_tag_type3(prevtag) == LFS_TYPE_DIR) {
+    if (prevtag != LFS_ERR_NOENT
+            && lfs_tag_type3(prevtag) == LFS_TYPE_DIR) {
         // fix orphan
         err = lfs_fs_preporphans(lfs, -1);
         if (err) {
@@ -4076,109 +4421,6 @@ static lfs_stag_t lfs_fs_parent(lfs_t *lfs, const lfs_block_t pair[2],
 }
 #endif
 
-#ifndef LFS_READONLY
-static int lfs_fs_relocate(lfs_t *lfs,
-        const lfs_block_t oldpair[2], lfs_block_t newpair[2]) {
-    // update internal root
-    if (lfs_pair_cmp(oldpair, lfs->root) == 0) {
-        lfs->root[0] = newpair[0];
-        lfs->root[1] = newpair[1];
-    }
-
-    // update internally tracked dirs
-    for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) {
-        if (lfs_pair_cmp(oldpair, d->m.pair) == 0) {
-            d->m.pair[0] = newpair[0];
-            d->m.pair[1] = newpair[1];
-        }
-
-        if (d->type == LFS_TYPE_DIR &&
-                lfs_pair_cmp(oldpair, ((lfs_dir_t*)d)->head) == 0) {
-            ((lfs_dir_t*)d)->head[0] = newpair[0];
-            ((lfs_dir_t*)d)->head[1] = newpair[1];
-        }
-    }
-
-    // find parent
-    lfs_mdir_t parent;
-    lfs_stag_t tag = lfs_fs_parent(lfs, oldpair, &parent);
-    if (tag < 0 && tag != LFS_ERR_NOENT) {
-        return tag;
-    }
-
-    if (tag != LFS_ERR_NOENT) {
-        // update disk, this creates a desync
-        int err = lfs_fs_preporphans(lfs, +1);
-        if (err) {
-            return err;
-        }
-
-        // fix pending move in this pair? this looks like an optimization but
-        // is in fact _required_ since relocating may outdate the move.
-        uint16_t moveid = 0x3ff;
-        if (lfs_gstate_hasmovehere(&lfs->gstate, parent.pair)) {
-            moveid = lfs_tag_id(lfs->gstate.tag);
-            LFS_DEBUG("Fixing move while relocating "
-                    "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n",
-                    parent.pair[0], parent.pair[1], moveid);
-            lfs_fs_prepmove(lfs, 0x3ff, NULL);
-            if (moveid < lfs_tag_id(tag)) {
-                tag -= LFS_MKTAG(0, 1, 0);
-            }
-        }
-
-        lfs_pair_tole32(newpair);
-        err = lfs_dir_commit(lfs, &parent, LFS_MKATTRS(
-                {LFS_MKTAG_IF(moveid != 0x3ff,
-                    LFS_TYPE_DELETE, moveid, 0), NULL},
-                {tag, newpair}));
-        lfs_pair_fromle32(newpair);
-        if (err) {
-            return err;
-        }
-
-        // next step, clean up orphans
-        err = lfs_fs_preporphans(lfs, -1);
-        if (err) {
-            return err;
-        }
-    }
-
-    // find pred
-    int err = lfs_fs_pred(lfs, oldpair, &parent);
-    if (err && err != LFS_ERR_NOENT) {
-        return err;
-    }
-
-    // if we can't find dir, it must be new
-    if (err != LFS_ERR_NOENT) {
-        // fix pending move in this pair? this looks like an optimization but
-        // is in fact _required_ since relocating may outdate the move.
-        uint16_t moveid = 0x3ff;
-        if (lfs_gstate_hasmovehere(&lfs->gstate, parent.pair)) {
-            moveid = lfs_tag_id(lfs->gstate.tag);
-            LFS_DEBUG("Fixing move while relocating "
-                    "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n",
-                    parent.pair[0], parent.pair[1], moveid);
-            lfs_fs_prepmove(lfs, 0x3ff, NULL);
-        }
-
-        // replace bad pair, either we clean up desync, or no desync occurred
-        lfs_pair_tole32(newpair);
-        err = lfs_dir_commit(lfs, &parent, LFS_MKATTRS(
-                {LFS_MKTAG_IF(moveid != 0x3ff,
-                    LFS_TYPE_DELETE, moveid, 0), NULL},
-                {LFS_MKTAG(LFS_TYPE_TAIL + parent.split, 0x3ff, 8), newpair}));
-        lfs_pair_fromle32(newpair);
-        if (err) {
-            return err;
-        }
-    }
-
-    return 0;
-}
-#endif
-
 #ifndef LFS_READONLY
 static int lfs_fs_preporphans(lfs_t *lfs, int8_t orphans) {
     LFS_ASSERT(lfs_tag_size(lfs->gstate.tag) > 0 || orphans >= 0);
@@ -4233,77 +4475,129 @@ static int lfs_fs_demove(lfs_t *lfs) {
 #endif
 
 #ifndef LFS_READONLY
-static int lfs_fs_deorphan(lfs_t *lfs) {
+static int lfs_fs_deorphan(lfs_t *lfs, bool powerloss) {
     if (!lfs_gstate_hasorphans(&lfs->gstate)) {
         return 0;
     }
 
-    // Fix any orphans
-    lfs_mdir_t pdir = {.split = true, .tail = {0, 1}};
-    lfs_mdir_t dir;
-
-    // iterate over all directory directory entries
-    while (!lfs_pair_isnull(pdir.tail)) {
-        int err = lfs_dir_fetch(lfs, &dir, pdir.tail);
-        if (err) {
-            return err;
-        }
+    int8_t found = 0;
+restart:
+    {
+        // Fix any orphans
+        lfs_mdir_t pdir = {.split = true, .tail = {0, 1}};
+        lfs_mdir_t dir;
 
-        // check head blocks for orphans
-        if (!pdir.split) {
-            // check if we have a parent
-            lfs_mdir_t parent;
-            lfs_stag_t tag = lfs_fs_parent(lfs, pdir.tail, &parent);
-            if (tag < 0 && tag != LFS_ERR_NOENT) {
-                return tag;
+        // iterate over all directory directory entries
+        while (!lfs_pair_isnull(pdir.tail)) {
+            int err = lfs_dir_fetch(lfs, &dir, pdir.tail);
+            if (err) {
+                return err;
             }
 
-            if (tag == LFS_ERR_NOENT) {
-                // we are an orphan
-                LFS_DEBUG("Fixing orphan {0x%"PRIx32", 0x%"PRIx32"}",
-                        pdir.tail[0], pdir.tail[1]);
-
-                err = lfs_dir_drop(lfs, &pdir, &dir);
-                if (err) {
-                    return err;
+            // check head blocks for orphans
+            if (!pdir.split) {
+                // check if we have a parent
+                lfs_mdir_t parent;
+                lfs_stag_t tag = lfs_fs_parent(lfs, pdir.tail, &parent);
+                if (tag < 0 && tag != LFS_ERR_NOENT) {
+                    return tag;
                 }
 
-                // refetch tail
-                continue;
-            }
+                // note we only check for full orphans if we may have had a
+                // power-loss, otherwise orphans are created intentionally
+                // during operations such as lfs_mkdir
+                if (tag == LFS_ERR_NOENT && powerloss) {
+                    // we are an orphan
+                    LFS_DEBUG("Fixing orphan {0x%"PRIx32", 0x%"PRIx32"}",
+                            pdir.tail[0], pdir.tail[1]);
 
-            lfs_block_t pair[2];
-            lfs_stag_t res = lfs_dir_get(lfs, &parent,
-                    LFS_MKTAG(0x7ff, 0x3ff, 0), tag, pair);
-            if (res < 0) {
-                return res;
-            }
-            lfs_pair_fromle32(pair);
-
-            if (!lfs_pair_sync(pair, pdir.tail)) {
-                // we have desynced
-                LFS_DEBUG("Fixing half-orphan {0x%"PRIx32", 0x%"PRIx32"} "
-                            "-> {0x%"PRIx32", 0x%"PRIx32"}",
-                        pdir.tail[0], pdir.tail[1], pair[0], pair[1]);
-
-                lfs_pair_tole32(pair);
-                err = lfs_dir_commit(lfs, &pdir, LFS_MKATTRS(
-                        {LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 8), pair}));
-                lfs_pair_fromle32(pair);
-                if (err) {
-                    return err;
+                    // steal state
+                    err = lfs_dir_getgstate(lfs, &dir, &lfs->gdelta);
+                    if (err) {
+                        return err;
+                    }
+
+                    // steal tail
+                    lfs_pair_tole32(dir.tail);
+                    int state = lfs_dir_orphaningcommit(lfs, &pdir, LFS_MKATTRS(
+                            {LFS_MKTAG(LFS_TYPE_TAIL + dir.split, 0x3ff, 8),
+                                dir.tail}));
+                    lfs_pair_fromle32(dir.tail);
+                    if (state < 0) {
+                        return state;
+                    }
+
+                    found += 1;
+
+                    // did our commit create more orphans?
+                    if (state == LFS_OK_ORPHANED) {
+                        goto restart;
+                    }
+
+                    // refetch tail
+                    continue;
                 }
 
-                // refetch tail
-                continue;
+                if (tag != LFS_ERR_NOENT) {
+                    lfs_block_t pair[2];
+                    lfs_stag_t state = lfs_dir_get(lfs, &parent,
+                            LFS_MKTAG(0x7ff, 0x3ff, 0), tag, pair);
+                    if (state < 0) {
+                        return state;
+                    }
+                    lfs_pair_fromle32(pair);
+
+                    if (!lfs_pair_sync(pair, pdir.tail)) {
+                        // we have desynced
+                        LFS_DEBUG("Fixing half-orphan "
+                                "{0x%"PRIx32", 0x%"PRIx32"} "
+                                "-> {0x%"PRIx32", 0x%"PRIx32"}",
+                                pdir.tail[0], pdir.tail[1], pair[0], pair[1]);
+
+                        // fix pending move in this pair? this looks like an
+                        // optimization but is in fact _required_ since
+                        // relocating may outdate the move.
+                        uint16_t moveid = 0x3ff;
+                        if (lfs_gstate_hasmovehere(&lfs->gstate, pdir.pair)) {
+                            moveid = lfs_tag_id(lfs->gstate.tag);
+                            LFS_DEBUG("Fixing move while fixing orphans "
+                                    "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n",
+                                    pdir.pair[0], pdir.pair[1], moveid);
+                            lfs_fs_prepmove(lfs, 0x3ff, NULL);
+                        }
+
+                        lfs_pair_tole32(pair);
+                        state = lfs_dir_orphaningcommit(lfs, &pdir, LFS_MKATTRS(
+                                {LFS_MKTAG_IF(moveid != 0x3ff,
+                                    LFS_TYPE_DELETE, moveid, 0), NULL},
+                                {LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 8),
+                                    pair}));
+                        lfs_pair_fromle32(pair);
+                        if (state < 0) {
+                            return state;
+                        }
+
+                        found += 1;
+
+                        // did our commit create more orphans?
+                        if (state == LFS_OK_ORPHANED) {
+                            goto restart;
+                        }
+
+                        // refetch tail
+                        continue;
+                    }
+                }
             }
-        }
 
-        pdir = dir;
+            pdir = dir;
+        }
     }
 
     // mark orphans as fixed
-    return lfs_fs_preporphans(lfs, -lfs_gstate_getorphans(&lfs->gstate));
+    return lfs_fs_preporphans(lfs, -lfs_min(
+            lfs_gstate_getorphans(&lfs->gstate),
+            found));
 }
 #endif
 
@@ -4314,7 +4608,7 @@ static int lfs_fs_forceconsistency(lfs_t *lfs) {
         return err;
     }
 
-    err = lfs_fs_deorphan(lfs);
+    err = lfs_fs_deorphan(lfs, true);
     if (err) {
         return err;
     }
diff --git a/scripts/code.py b/scripts/code.py
index 08b33a10..b394e9cd 100755
--- a/scripts/code.py
+++ b/scripts/code.py
@@ -15,7 +15,7 @@
 import collections as co
 
 
-OBJ_PATHS = ['*.o', 'bd/*.o']
+OBJ_PATHS = ['*.o']
 
 def collect(paths, **args):
     results = co.defaultdict(lambda: 0)
@@ -31,7 +31,8 @@ def collect(paths, **args):
         proc = sp.Popen(cmd,
             stdout=sp.PIPE,
             stderr=sp.PIPE if not args.get('verbose') else None,
-            universal_newlines=True)
+            universal_newlines=True,
+            errors='replace')
         for line in proc.stdout:
             m = pattern.match(line)
             if m:
@@ -48,16 +49,30 @@ def collect(paths, **args):
         # map to source files
         if args.get('build_dir'):
             file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
+        # replace .o with .c, different scripts report .o/.c, we need to
+        # choose one if we want to deduplicate csv files
+        file = re.sub('\.o$', '.c', file)
         # discard internal functions
-        if func.startswith('__'):
-            continue
+        if not args.get('everything'):
+            if func.startswith('__'):
+                continue
         # discard .8449 suffixes created by optimizer
         func = re.sub('\.[0-9]+', '', func)
+
         flat_results.append((file, func, size))
 
     return flat_results
 
 def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
     # find sizes
     if not args.get('use', None):
         # find .o files
@@ -75,13 +90,14 @@ def main(**args):
 
         results = collect(paths, **args)
     else:
-        with open(args['use']) as f:
+        with openio(args['use']) as f:
             r = csv.DictReader(f)
             results = [
                 (   result['file'],
-                    result['function'],
-                    int(result['size']))
-                for result in r]
+                    result['name'],
+                    int(result['code_size']))
+                for result in r
+                if result.get('code_size') not in {None, ''}]
 
     total = 0
     for _, _, size in results:
@@ -89,13 +105,17 @@ def main(**args):
 
     # find previous results?
     if args.get('diff'):
-        with open(args['diff']) as f:
-            r = csv.DictReader(f)
-            prev_results = [
-                (   result['file'],
-                    result['function'],
-                    int(result['size']))
-                for result in r]
+        try:
+            with openio(args['diff']) as f:
+                r = csv.DictReader(f)
+                prev_results = [
+                    (   result['file'],
+                        result['name'],
+                        int(result['code_size']))
+                    for result in r
+                    if result.get('code_size') not in {None, ''}]
+        except FileNotFoundError:
+            prev_results = []
 
         prev_total = 0
         for _, _, size in prev_results:
@@ -103,14 +123,34 @@ def main(**args):
 
     # write results to CSV
     if args.get('output'):
-        with open(args['output'], 'w') as f:
-            w = csv.writer(f)
-            w.writerow(['file', 'function', 'size'])
-            for file, func, size in sorted(results):
-                w.writerow((file, func, size))
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        func = result.pop('name', '')
+                        result.pop('code_size', None)
+                        merged_results[(file, func)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, func, size in results:
+            merged_results[(file, func)]['code_size'] = size
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'code_size'])
+            w.writeheader()
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})
 
     # print results
-    def dedup_entries(results, by='function'):
+    def dedup_entries(results, by='name'):
         entries = co.defaultdict(lambda: 0)
         for file, func, size in results:
             entry = (file if by == 'file' else func)
@@ -126,45 +166,67 @@ def diff_entries(olds, news):
             diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
         return diff
 
+    def sorted_entries(entries):
+        if args.get('size_sort'):
+            return sorted(entries, key=lambda x: (-x[1], x))
+        elif args.get('reverse_size_sort'):
+            return sorted(entries, key=lambda x: (+x[1], x))
+        else:
+            return sorted(entries)
+
+    def sorted_diff_entries(entries):
+        if args.get('size_sort'):
+            return sorted(entries, key=lambda x: (-x[1][1], x))
+        elif args.get('reverse_size_sort'):
+            return sorted(entries, key=lambda x: (+x[1][1], x))
+        else:
+            return sorted(entries, key=lambda x: (-x[1][3], x))
+
     def print_header(by=''):
         if not args.get('diff'):
             print('%-36s %7s' % (by, 'size'))
         else:
             print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
 
-    def print_entries(by='function'):
+    def print_entry(name, size):
+        print("%-36s %7d" % (name, size))
+
+    def print_diff_entry(name, old, new, diff, ratio):
+        print("%-36s %7s %7s %+7d%s" % (name,
+            old or "-",
+            new or "-",
+            diff,
+            ' (%+.1f%%)' % (100*ratio) if ratio else ''))
+
+    def print_entries(by='name'):
         entries = dedup_entries(results, by=by)
 
         if not args.get('diff'):
             print_header(by=by)
-            for name, size in sorted(entries.items()):
-                print("%-36s %7d" % (name, size))
+            for name, size in sorted_entries(entries.items()):
+                print_entry(name, size)
         else:
             prev_entries = dedup_entries(prev_results, by=by)
             diff = diff_entries(prev_entries, entries)
             print_header(by='%s (%d added, %d removed)' % (by,
                 sum(1 for old, _, _, _ in diff.values() if not old),
                 sum(1 for _, new, _, _ in diff.values() if not new)))
-            for name, (old, new, diff, ratio) in sorted(diff.items(),
-                    key=lambda x: (-x[1][3], x)):
+            for name, (old, new, diff, ratio) in sorted_diff_entries(
+                    diff.items()):
                 if ratio or args.get('all'):
-                    print("%-36s %7s %7s %+7d%s" % (name,
-                        old or "-",
-                        new or "-",
-                        diff,
-                        ' (%+.1f%%)' % (100*ratio) if ratio else ''))
+                    print_diff_entry(name, old, new, diff, ratio)
 
     def print_totals():
         if not args.get('diff'):
-            print("%-36s %7d" % ('TOTAL', total))
+            print_entry('TOTAL', total)
         else:
-            ratio = (total-prev_total)/prev_total if prev_total else 1.0
-            print("%-36s %7s %7s %+7d%s" % (
-                'TOTAL',
-                prev_total if prev_total else '-',
-                total if total else '-',
+            ratio = (0.0 if not prev_total and not total
+                else 1.0 if not prev_total
+                else (total-prev_total)/prev_total)
+            print_diff_entry('TOTAL',
+                prev_total, total,
                 total-prev_total,
-                ' (%+.1f%%)' % (100*ratio) if ratio else ''))
+                ratio)
 
     if args.get('quiet'):
         pass
@@ -175,7 +237,7 @@ def print_totals():
         print_entries(by='file')
         print_totals()
     else:
-        print_entries(by='function')
+        print_entries(by='name')
         print_totals()
 
 if __name__ == "__main__":
@@ -188,22 +250,30 @@ def print_totals():
             or a list of paths. Defaults to %r." % OBJ_PATHS)
     parser.add_argument('-v', '--verbose', action='store_true',
         help="Output commands that run behind the scenes.")
+    parser.add_argument('-q', '--quiet', action='store_true',
+        help="Don't show anything, useful with -o.")
     parser.add_argument('-o', '--output',
         help="Specify CSV file to store results.")
     parser.add_argument('-u', '--use',
         help="Don't compile and find code sizes, instead use this CSV file.")
     parser.add_argument('-d', '--diff',
         help="Specify CSV file to diff code size against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
     parser.add_argument('-a', '--all', action='store_true',
         help="Show all functions, not just the ones that changed.")
-    parser.add_argument('--files', action='store_true',
+    parser.add_argument('-A', '--everything', action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument('-s', '--size-sort', action='store_true',
+        help="Sort by size.")
+    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
+        help="Sort by size, but backwards.")
+    parser.add_argument('-F', '--files', action='store_true',
         help="Show file-level code sizes. Note this does not include padding! "
             "So sizes may differ from other tools.")
-    parser.add_argument('-s', '--summary', action='store_true',
+    parser.add_argument('-Y', '--summary', action='store_true',
         help="Only show the total code size.")
-    parser.add_argument('-q', '--quiet', action='store_true',
-        help="Don't show anything, useful with -o.")
-    parser.add_argument('--type', default='tTrRdDbB',
+    parser.add_argument('--type', default='tTrRdD',
         help="Type of symbols to report, this uses the same single-character "
             "type-names emitted by nm. Defaults to %(default)r.")
     parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
diff --git a/scripts/coverage.py b/scripts/coverage.py
index 6f1f54fa..b3a90ed2 100755
--- a/scripts/coverage.py
+++ b/scripts/coverage.py
@@ -55,8 +55,9 @@ def func_from_lineno(file, lineno):
     for (file, func), (hits, count) in reduced_funcs.items():
         # discard internal/testing functions (test_* injected with
         # internal testing)
-        if func.startswith('__') or func.startswith('test_'):
-            continue
+        if not args.get('everything'):
+            if func.startswith('__') or func.startswith('test_'):
+                continue
         # discard .8449 suffixes created by optimizer
         func = re.sub('\.[0-9]+', '', func)
         results.append((file, func, hits, count))
@@ -65,6 +66,15 @@ def func_from_lineno(file, lineno):
 
 
 def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
     # find coverage
     if not args.get('use'):
         # find *.info files
@@ -82,14 +92,16 @@ def main(**args):
 
         results = collect(paths, **args)
     else:
-        with open(args['use']) as f:
+        with openio(args['use']) as f:
             r = csv.DictReader(f)
             results = [
                 (   result['file'],
-                    result['function'],
-                    int(result['hits']),
-                    int(result['count']))
-                for result in r]
+                    result['name'],
+                    int(result['coverage_hits']),
+                    int(result['coverage_count']))
+                for result in r
+                if result.get('coverage_hits') not in {None, ''}
+                if result.get('coverage_count') not in {None, ''}]
 
     total_hits, total_count = 0, 0
     for _, _, hits, count in results:
@@ -98,14 +110,19 @@ def main(**args):
 
     # find previous results?
     if args.get('diff'):
-        with open(args['diff']) as f:
-            r = csv.DictReader(f)
-            prev_results = [
-                (   result['file'],
-                    result['function'],
-                    int(result['hits']),
-                    int(result['count']))
-                for result in r]
+        try:
+            with openio(args['diff']) as f:
+                r = csv.DictReader(f)
+                prev_results = [
+                    (   result['file'],
+                        result['name'],
+                        int(result['coverage_hits']),
+                        int(result['coverage_count']))
+                    for result in r
+                    if result.get('coverage_hits') not in {None, ''}
+                    if result.get('coverage_count') not in {None, ''}]
+        except FileNotFoundError:
+            prev_results = []
 
         prev_total_hits, prev_total_count = 0, 0
         for _, _, hits, count in prev_results:
@@ -114,14 +131,36 @@ def main(**args):
 
     # write results to CSV
     if args.get('output'):
-        with open(args['output'], 'w') as f:
-            w = csv.writer(f)
-            w.writerow(['file', 'function', 'hits', 'count'])
-            for file, func, hits, count in sorted(results):
-                w.writerow((file, func, hits, count))
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        func = result.pop('name', '')
+                        result.pop('coverage_hits', None)
+                        result.pop('coverage_count', None)
+                        merged_results[(file, func)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, func, hits, count in results:
+            merged_results[(file, func)]['coverage_hits'] = hits
+            merged_results[(file, func)]['coverage_count'] = count
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'coverage_hits', 'coverage_count'])
+            w.writeheader()
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})
 
     # print results
-    def dedup_entries(results, by='function'):
+    def dedup_entries(results, by='name'):
         entries = co.defaultdict(lambda: (0, 0))
         for file, func, hits, count in results:
             entry = (file if by == 'file' else func)
@@ -147,23 +186,59 @@ def diff_entries(olds, news):
                     - (old_hits/old_count if old_count else 1.0)))
         return diff
 
+    def sorted_entries(entries):
+        if args.get('coverage_sort'):
+            return sorted(entries, key=lambda x: (-(x[1][0]/x[1][1] if x[1][1] else -1), x))
+        elif args.get('reverse_coverage_sort'):
+            return sorted(entries, key=lambda x: (+(x[1][0]/x[1][1] if x[1][1] else -1), x))
+        else:
+            return sorted(entries)
+
+    def sorted_diff_entries(entries):
+        if args.get('coverage_sort'):
+            return sorted(entries, key=lambda x: (-(x[1][2]/x[1][3] if x[1][3] else -1), x))
+        elif args.get('reverse_coverage_sort'):
+            return sorted(entries, key=lambda x: (+(x[1][2]/x[1][3] if x[1][3] else -1), x))
+        else:
+            return sorted(entries, key=lambda x: (-x[1][6], x))
+
     def print_header(by=''):
         if not args.get('diff'):
             print('%-36s %19s' % (by, 'hits/line'))
         else:
             print('%-36s %19s %19s %11s' % (by, 'old', 'new', 'diff'))
 
-    def print_entries(by='function'):
+    def print_entry(name, hits, count):
+        print("%-36s %11s %7s" % (name,
+            '%d/%d' % (hits, count)
+                if count else '-',
+            '%.1f%%' % (100*hits/count)
+                if count else '-'))
+
+    def print_diff_entry(name,
+            old_hits, old_count,
+            new_hits, new_count,
+            diff_hits, diff_count,
+            ratio):
+        print("%-36s %11s %7s %11s %7s %11s%s" % (name,
+            '%d/%d' % (old_hits, old_count)
+                if old_count else '-',
+            '%.1f%%' % (100*old_hits/old_count)
+                if old_count else '-',
+            '%d/%d' % (new_hits, new_count)
+                if new_count else '-',
+            '%.1f%%' % (100*new_hits/new_count)
+                if new_count else '-',
+            '%+d/%+d' % (diff_hits, diff_count),
+            ' (%+.1f%%)' % (100*ratio) if ratio else ''))
+
+    def print_entries(by='name'):
         entries = dedup_entries(results, by=by)
 
         if not args.get('diff'):
             print_header(by=by)
-            for name, (hits, count) in sorted(entries.items()):
-                print("%-36s %11s %7s" % (name,
-                    '%d/%d' % (hits, count)
-                        if count else '-',
-                    '%.1f%%' % (100*hits/count)
-                        if count else '-'))
+            for name, (hits, count) in sorted_entries(entries.items()):
+                print_entry(name, hits, count)
         else:
             prev_entries = dedup_entries(prev_results, by=by)
             diff = diff_entries(prev_entries, entries)
@@ -173,45 +248,28 @@ def print_entries(by='function'):
             for name, (
                     old_hits, old_count,
                     new_hits, new_count,
-                    diff_hits, diff_count, ratio) in sorted(diff.items(),
-                        key=lambda x: (-x[1][6], x)):
+                    diff_hits, diff_count, ratio) in sorted_diff_entries(
+                        diff.items()):
                 if ratio or args.get('all'):
-                    print("%-36s %11s %7s %11s %7s %11s%s" % (name,
-                        '%d/%d' % (old_hits, old_count)
-                            if old_count else '-',
-                        '%.1f%%' % (100*old_hits/old_count)
-                            if old_count else '-',
-                        '%d/%d' % (new_hits, new_count)
-                            if new_count else '-',
-                        '%.1f%%' % (100*new_hits/new_count)
-                            if new_count else '-',
-                        '%+d/%+d' % (diff_hits, diff_count),
-                        ' (%+.1f%%)' % (100*ratio) if ratio else ''))
+                    print_diff_entry(name,
+                        old_hits, old_count,
+                        new_hits, new_count,
+                        diff_hits, diff_count,
+                        ratio)
 
     def print_totals():
         if not args.get('diff'):
-            print("%-36s %11s %7s" % ('TOTAL',
-                '%d/%d' % (total_hits, total_count)
-                    if total_count else '-',
-                '%.1f%%' % (100*total_hits/total_count)
-                    if total_count else '-'))
+            print_entry('TOTAL', total_hits, total_count)
         else:
             ratio = ((total_hits/total_count
                     if total_count else 1.0)
                 - (prev_total_hits/prev_total_count
                     if prev_total_count else 1.0))
-            print("%-36s %11s %7s %11s %7s %11s%s" % ('TOTAL',
-                '%d/%d' % (prev_total_hits, prev_total_count)
-                    if prev_total_count else '-',
-                '%.1f%%' % (100*prev_total_hits/prev_total_count)
-                    if prev_total_count else '-',
-                '%d/%d' % (total_hits, total_count)
-                    if total_count else '-',
-                '%.1f%%' % (100*total_hits/total_count)
-                    if total_count else '-',
-                '%+d/%+d' % (total_hits-prev_total_hits,
-                    total_count-prev_total_count),
-                ' (%+.1f%%)' % (100*ratio) if ratio else ''))
+            print_diff_entry('TOTAL',
+                prev_total_hits, prev_total_count,
+                total_hits, total_count,
+                total_hits-prev_total_hits, total_count-prev_total_count,
+                ratio)
 
     if args.get('quiet'):
         pass
@@ -222,7 +280,7 @@ def print_totals():
         print_entries(by='file')
         print_totals()
     else:
-        print_entries(by='function')
+        print_entries(by='name')
         print_totals()
 
 if __name__ == "__main__":
@@ -243,12 +301,23 @@ def print_totals():
         help="Don't do any work, instead use this CSV file.")
     parser.add_argument('-d', '--diff',
         help="Specify CSV file to diff code size against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
     parser.add_argument('-a', '--all', action='store_true',
         help="Show all functions, not just the ones that changed.")
-    parser.add_argument('--files', action='store_true',
+    parser.add_argument('-A', '--everything', action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument('-s', '--coverage-sort', action='store_true',
+        help="Sort by coverage.")
+    parser.add_argument('-S', '--reverse-coverage-sort', action='store_true',
+        help="Sort by coverage, but backwards.")
+    parser.add_argument('-F', '--files', action='store_true',
         help="Show file-level coverage.")
-    parser.add_argument('-s', '--summary', action='store_true',
+    parser.add_argument('-Y', '--summary', action='store_true',
         help="Only show the total coverage.")
     parser.add_argument('-q', '--quiet', action='store_true',
         help="Don't show anything, useful with -o.")
+    parser.add_argument('--build-dir',
+        help="Specify the relative build directory. Used to map object files \
+            to the correct source files.")
     sys.exit(main(**vars(parser.parse_args())))
diff --git a/scripts/data.py b/scripts/data.py
new file mode 100755
index 00000000..4b8e00da
--- /dev/null
+++ b/scripts/data.py
@@ -0,0 +1,283 @@
+#!/usr/bin/env python3
+#
+# Script to find data size at the function level. Basically just a bit wrapper
+# around nm with some extra conveniences for comparing builds. Heavily inspired
+# by Linux's Bloat-O-Meter.
+#
+
+import os
+import glob
+import itertools as it
+import subprocess as sp
+import shlex
+import re
+import csv
+import collections as co
+
+
+OBJ_PATHS = ['*.o']
+
+def collect(paths, **args):
+    results = co.defaultdict(lambda: 0)
+    pattern = re.compile(
+        '^(?P<size>[0-9a-fA-F]+)' +
+        ' (?P<type>[%s])' % re.escape(args['type']) +
+        ' (?P<func>.+?)$')
+    for path in paths:
+        # note nm-tool may contain extra args
+        cmd = args['nm_tool'] + ['--size-sort', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace')
+        for line in proc.stdout:
+            m = pattern.match(line)
+            if m:
+                results[(path, m.group('func'))] += int(m.group('size'), 16)
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            sys.exit(-1)
+
+    flat_results = []
+    for (file, func), size in results.items():
+        # map to source files
+        if args.get('build_dir'):
+            file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
+        # replace .o with .c, different scripts report .o/.c, we need to
+        # choose one if we want to deduplicate csv files
+        file = re.sub('\.o$', '.c', file)
+        # discard internal functions
+        if not args.get('everything'):
+            if func.startswith('__'):
+                continue
+        # discard .8449 suffixes created by optimizer
+        func = re.sub('\.[0-9]+', '', func)
+        flat_results.append((file, func, size))
+
+    return flat_results
+
+def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
+    # find sizes
+    if not args.get('use', None):
+        # find .o files
+        paths = []
+        for path in args['obj_paths']:
+            if os.path.isdir(path):
+                path = path + '/*.o'
+
+            for path in glob.glob(path):
+                paths.append(path)
+
+        if not paths:
+            print('no .obj files found in %r?' % args['obj_paths'])
+            sys.exit(-1)
+
+        results = collect(paths, **args)
+    else:
+        with openio(args['use']) as f:
+            r = csv.DictReader(f)
+            results = [
+                (   result['file'],
+                    result['name'],
+                    int(result['data_size']))
+                for result in r
+                if result.get('data_size') not in {None, ''}]
+
+    total = 0
+    for _, _, size in results:
+        total += size
+
+    # find previous results?
+    if args.get('diff'):
+        try:
+            with openio(args['diff']) as f:
+                r = csv.DictReader(f)
+                prev_results = [
+                    (   result['file'],
+                        result['name'],
+                        int(result['data_size']))
+                    for result in r
+                    if result.get('data_size') not in {None, ''}]
+        except FileNotFoundError:
+            prev_results = []
+
+        prev_total = 0
+        for _, _, size in prev_results:
+            prev_total += size
+
+    # write results to CSV
+    if args.get('output'):
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        func = result.pop('name', '')
+                        result.pop('data_size', None)
+                        merged_results[(file, func)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, func, size in results:
+            merged_results[(file, func)]['data_size'] = size
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'data_size'])
+            w.writeheader()
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})
+
+    # print results
+    def dedup_entries(results, by='name'):
+        entries = co.defaultdict(lambda: 0)
+        for file, func, size in results:
+            entry = (file if by == 'file' else func)
+            entries[entry] += size
+        return entries
+
+    def diff_entries(olds, news):
+        diff = co.defaultdict(lambda: (0, 0, 0, 0))
+        for name, new in news.items():
+            diff[name] = (0, new, new, 1.0)
+        for name, old in olds.items():
+            _, new, _, _ = diff[name]
+            diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
+        return diff
+
+    def sorted_entries(entries):
+        if args.get('size_sort'):
+            return sorted(entries, key=lambda x: (-x[1], x))
+        elif args.get('reverse_size_sort'):
+            return sorted(entries, key=lambda x: (+x[1], x))
+        else:
+            return sorted(entries)
+
+    def sorted_diff_entries(entries):
+        if args.get('size_sort'):
+            return sorted(entries, key=lambda x: (-x[1][1], x))
+        elif args.get('reverse_size_sort'):
+            return sorted(entries, key=lambda x: (+x[1][1], x))
+        else:
+            return sorted(entries, key=lambda x: (-x[1][3], x))
+
+    def print_header(by=''):
+        if not args.get('diff'):
+            print('%-36s %7s' % (by, 'size'))
+        else:
+            print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
+
+    def print_entry(name, size):
+        print("%-36s %7d" % (name, size))
+
+    def print_diff_entry(name, old, new, diff, ratio):
+        print("%-36s %7s %7s %+7d%s" % (name,
+            old or "-",
+            new or "-",
+            diff,
+            ' (%+.1f%%)' % (100*ratio) if ratio else ''))
+
+    def print_entries(by='name'):
+        entries = dedup_entries(results, by=by)
+
+        if not args.get('diff'):
+            print_header(by=by)
+            for name, size in sorted_entries(entries.items()):
+                print_entry(name, size)
+        else:
+            prev_entries = dedup_entries(prev_results, by=by)
+            diff = diff_entries(prev_entries, entries)
+            print_header(by='%s (%d added, %d removed)' % (by,
+                sum(1 for old, _, _, _ in diff.values() if not old),
+                sum(1 for _, new, _, _ in diff.values() if not new)))
+            for name, (old, new, diff, ratio) in sorted_diff_entries(
+                    diff.items()):
+                if ratio or args.get('all'):
+                    print_diff_entry(name, old, new, diff, ratio)
+
+    def print_totals():
+        if not args.get('diff'):
+            print_entry('TOTAL', total)
+        else:
+            ratio = (0.0 if not prev_total and not total
+                else 1.0 if not prev_total
+                else (total-prev_total)/prev_total)
+            print_diff_entry('TOTAL',
+                prev_total, total,
+                total-prev_total,
+                ratio)
+
+    if args.get('quiet'):
+        pass
+    elif args.get('summary'):
+        print_header()
+        print_totals()
+    elif args.get('files'):
+        print_entries(by='file')
+        print_totals()
+    else:
+        print_entries(by='name')
+        print_totals()
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Find data size at the function level.")
+    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
+        help="Description of where to find *.o files. May be a directory \
+            or a list of paths. Defaults to %r." % OBJ_PATHS)
+    parser.add_argument('-v', '--verbose', action='store_true',
+        help="Output commands that run behind the scenes.")
+    parser.add_argument('-q', '--quiet', action='store_true',
+        help="Don't show anything, useful with -o.")
+    parser.add_argument('-o', '--output',
+        help="Specify CSV file to store results.")
+    parser.add_argument('-u', '--use',
+        help="Don't compile and find data sizes, instead use this CSV file.")
+    parser.add_argument('-d', '--diff',
+        help="Specify CSV file to diff data size against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
+    parser.add_argument('-a', '--all', action='store_true',
+        help="Show all functions, not just the ones that changed.")
+    parser.add_argument('-A', '--everything', action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument('-s', '--size-sort', action='store_true',
+        help="Sort by size.")
+    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
+        help="Sort by size, but backwards.")
+    parser.add_argument('-F', '--files', action='store_true',
+        help="Show file-level data sizes. Note this does not include padding! "
+            "So sizes may differ from other tools.")
+    parser.add_argument('-Y', '--summary', action='store_true',
+        help="Only show the total data size.")
+    parser.add_argument('--type', default='dDbB',
+        help="Type of symbols to report, this uses the same single-character "
+            "type-names emitted by nm. Defaults to %(default)r.")
+    parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(),
+        help="Path to the nm tool to use.")
+    parser.add_argument('--build-dir',
+        help="Specify the relative build directory. Used to map object files \
+            to the correct source files.")
+    sys.exit(main(**vars(parser.parse_args())))
diff --git a/scripts/stack.py b/scripts/stack.py
new file mode 100755
index 00000000..0c652d8d
--- /dev/null
+++ b/scripts/stack.py
@@ -0,0 +1,430 @@
+#!/usr/bin/env python3
+#
+# Script to find stack usage at the function level. Will detect recursion and
+# report as infinite stack usage.
+#
+
+import os
+import glob
+import itertools as it
+import re
+import csv
+import collections as co
+import math as m
+
+
+CI_PATHS = ['*.ci']
+
+def collect(paths, **args):
+    # parse the vcg format
+    k_pattern = re.compile('([a-z]+)\s*:', re.DOTALL)
+    v_pattern = re.compile('(?:"(.*?)"|([a-z]+))', re.DOTALL)
+    def parse_vcg(rest):
+        def parse_vcg(rest):
+            node = []
+            while True:
+                rest = rest.lstrip()
+                m = k_pattern.match(rest)
+                if not m:
+                    return (node, rest)
+                k, rest = m.group(1), rest[m.end(0):]
+
+                rest = rest.lstrip()
+                if rest.startswith('{'):
+                    v, rest = parse_vcg(rest[1:])
+                    assert rest[0] == '}', "unexpected %r" % rest[0:1]
+                    rest = rest[1:]
+                    node.append((k, v))
+                else:
+                    m = v_pattern.match(rest)
+                    assert m, "unexpected %r" % rest[0:1]
+                    v, rest = m.group(1) or m.group(2), rest[m.end(0):]
+                    node.append((k, v))
+
+        node, rest = parse_vcg(rest)
+        assert rest == '', "unexpected %r" % rest[0:1]
+        return node
+
+    # collect into functions
+    results = co.defaultdict(lambda: (None, None, 0, set()))
+    f_pattern = re.compile(
+        r'([^\\]*)\\n([^:]*)[^\\]*\\n([0-9]+) bytes \((.*)\)')
+    for path in paths:
+        with open(path) as f:
+            vcg = parse_vcg(f.read())
+        for k, graph in vcg:
+            if k != 'graph':
+                continue
+            for k, info in graph:
+                if k == 'node':
+                    info = dict(info)
+                    m = f_pattern.match(info['label'])
+                    if m:
+                        function, file, size, type = m.groups()
+                        if not args.get('quiet') and type != 'static':
+                            print('warning: found non-static stack for %s (%s)'
+                                % (function, type))
+                        _, _, _, targets = results[info['title']]
+                        results[info['title']] = (
+                            file, function, int(size), targets)
+                elif k == 'edge':
+                    info = dict(info)
+                    _, _, _, targets = results[info['sourcename']]
+                    targets.add(info['targetname'])
+                else:
+                    continue
+
+    if not args.get('everything'):
+        for source, (s_file, s_function, _, _) in list(results.items()):
+            # discard internal functions
+            if s_file.startswith('<') or s_file.startswith('/usr/include'):
+                del results[source]
+
+    # find maximum stack size recursively, this requires also detecting cycles
+    # (in case of recursion)
+    def find_limit(source, seen=None):
+        seen = seen or set()
+        if source not in results:
+            return 0
+        _, _, frame, targets = results[source]
+
+        limit = 0
+        for target in targets:
+            if target in seen:
+                # found a cycle
+                return float('inf')
+            limit_ = find_limit(target, seen | {target})
+            limit = max(limit, limit_)
+
+        return frame + limit
+
+    def find_deps(targets):
+        deps = set()
+        for target in targets:
+            if target in results:
+                t_file, t_function, _, _ = results[target]
+                deps.add((t_file, t_function))
+        return deps
+
+    # flatten into a list
+    flat_results = []
+    for source, (s_file, s_function, frame, targets) in results.items():
+        limit = find_limit(source)
+        deps = find_deps(targets)
+        flat_results.append((s_file, s_function, frame, limit, deps))
+
+    return flat_results
+
+def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
+    # find sizes
+    if not args.get('use', None):
+        # find .ci files
+        paths = []
+        for path in args['ci_paths']:
+            if os.path.isdir(path):
+                path = path + '/*.ci'
+
+            for path in glob.glob(path):
+                paths.append(path)
+
+        if not paths:
+            print('no .ci files found in %r?' % args['ci_paths'])
+            sys.exit(-1)
+
+        results = collect(paths, **args)
+    else:
+        with openio(args['use']) as f:
+            r = csv.DictReader(f)
+            results = [
+                (   result['file'],
+                    result['name'],
+                    int(result['stack_frame']),
+                    float(result['stack_limit']), # note limit can be inf
+                    set())
+                for result in r
+                if result.get('stack_frame') not in {None, ''}
+                if result.get('stack_limit') not in {None, ''}]
+
+    total_frame = 0
+    total_limit = 0
+    for _, _, frame, limit, _ in results:
+        total_frame += frame
+        total_limit = max(total_limit, limit)
+
+    # find previous results?
+    if args.get('diff'):
+        try:
+            with openio(args['diff']) as f:
+                r = csv.DictReader(f)
+                prev_results = [
+                    (   result['file'],
+                        result['name'],
+                        int(result['stack_frame']),
+                        float(result['stack_limit']),
+                        set())
+                    for result in r
+                    if result.get('stack_frame') not in {None, ''}
+                    if result.get('stack_limit') not in {None, ''}]
+        except FileNotFoundError:
+            prev_results = []
+
+        prev_total_frame = 0
+        prev_total_limit = 0
+        for _, _, frame, limit, _ in prev_results:
+            prev_total_frame += frame
+            prev_total_limit = max(prev_total_limit, limit)
+
+    # write results to CSV
+    if args.get('output'):
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        func = result.pop('name', '')
+                        result.pop('stack_frame', None)
+                        result.pop('stack_limit', None)
+                        merged_results[(file, func)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, func, frame, limit, _ in results:
+            merged_results[(file, func)]['stack_frame'] = frame
+            merged_results[(file, func)]['stack_limit'] = limit
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'stack_frame', 'stack_limit'])
+            w.writeheader()
+            for (file, func), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': func, **result})
+
+    # print results
+    def dedup_entries(results, by='name'):
+        entries = co.defaultdict(lambda: (0, 0, set()))
+        for file, func, frame, limit, deps in results:
+            entry = (file if by == 'file' else func)
+            entry_frame, entry_limit, entry_deps = entries[entry]
+            entries[entry] = (
+                entry_frame + frame,
+                max(entry_limit, limit),
+                entry_deps | {file if by == 'file' else func
+                    for file, func in deps})
+        return entries
+
+    def diff_entries(olds, news):
+        diff = co.defaultdict(lambda: (None, None, None, None, 0, 0, 0, set()))
+        for name, (new_frame, new_limit, deps) in news.items():
+            diff[name] = (
+                None, None,
+                new_frame, new_limit,
+                new_frame, new_limit,
+                1.0,
+                deps)
+        for name, (old_frame, old_limit, _) in olds.items():
+            _, _, new_frame, new_limit, _, _, _, deps = diff[name]
+            diff[name] = (
+                old_frame, old_limit,
+                new_frame, new_limit,
+                (new_frame or 0) - (old_frame or 0),
+                0 if m.isinf(new_limit or 0) and m.isinf(old_limit or 0)
+                    else (new_limit or 0) - (old_limit or 0),
+                0.0 if m.isinf(new_limit or 0) and m.isinf(old_limit or 0)
+                    else +float('inf') if m.isinf(new_limit or 0)
+                    else -float('inf') if m.isinf(old_limit or 0)
+                    else +0.0 if not old_limit and not new_limit
+                    else +1.0 if not old_limit
+                    else ((new_limit or 0) - (old_limit or 0))/(old_limit or 0),
+                deps)
+        return diff
+
+    def sorted_entries(entries):
+        if args.get('limit_sort'):
+            return sorted(entries, key=lambda x: (-x[1][1], x))
+        elif args.get('reverse_limit_sort'):
+            return sorted(entries, key=lambda x: (+x[1][1], x))
+        elif args.get('frame_sort'):
+            return sorted(entries, key=lambda x: (-x[1][0], x))
+        elif args.get('reverse_frame_sort'):
+            return sorted(entries, key=lambda x: (+x[1][0], x))
+        else:
+            return sorted(entries)
+
+    def sorted_diff_entries(entries):
+        if args.get('limit_sort'):
+            return sorted(entries, key=lambda x: (-(x[1][3] or 0), x))
+        elif args.get('reverse_limit_sort'):
+            return sorted(entries, key=lambda x: (+(x[1][3] or 0), x))
+        elif args.get('frame_sort'):
+            return sorted(entries, key=lambda x: (-(x[1][2] or 0), x))
+        elif args.get('reverse_frame_sort'):
+            return sorted(entries, key=lambda x: (+(x[1][2] or 0), x))
+        else:
+            return sorted(entries, key=lambda x: (-x[1][6], x))
+
+    def print_header(by=''):
+        if not args.get('diff'):
+            print('%-36s %7s %7s' % (by, 'frame', 'limit'))
+        else:
+            print('%-36s %15s %15s %15s' % (by, 'old', 'new', 'diff'))
+
+    def print_entry(name, frame, limit):
+        print("%-36s %7d %7s" % (name,
+            frame, '∞' if m.isinf(limit) else int(limit)))
+
+    def print_diff_entry(name,
+            old_frame, old_limit,
+            new_frame, new_limit,
+            diff_frame, diff_limit,
+            ratio):
+        print('%-36s %7s %7s %7s %7s %+7d %7s%s' % (name,
+            old_frame if old_frame is not None else "-",
+            ('∞' if m.isinf(old_limit) else int(old_limit))
+                if old_limit is not None else "-",
+            new_frame if new_frame is not None else "-",
+            ('∞' if m.isinf(new_limit) else int(new_limit))
+                if new_limit is not None else "-",
+            diff_frame,
+            ('+∞' if diff_limit > 0 and m.isinf(diff_limit)
+                else '-∞' if diff_limit < 0 and m.isinf(diff_limit)
+                else '%+d' % diff_limit),
+            '' if not ratio
+                else ' (+∞%)' if ratio > 0 and m.isinf(ratio)
+                else ' (-∞%)' if ratio < 0 and m.isinf(ratio)
+                else ' (%+.1f%%)' % (100*ratio)))
+
+    def print_entries(by='name'):
+        # build optional tree of dependencies
+        def print_deps(entries, depth, print,
+                filter=lambda _: True,
+                prefixes=('', '', '', '')):
+            entries = entries if isinstance(entries, list) else list(entries)
+            filtered_entries = [(name, entry)
+                for name, entry in entries
+                if filter(name)]
+            for i, (name, entry) in enumerate(filtered_entries):
+                last = (i == len(filtered_entries)-1)
+                print(prefixes[0+last] + name, entry)
+
+                if depth > 0:
+                    deps = entry[-1]
+                    print_deps(entries, depth-1, print,
+                        lambda name: name in deps,
+                        (   prefixes[2+last] + "|-> ",
+                            prefixes[2+last] + "'-> ",
+                            prefixes[2+last] + "|   ",
+                            prefixes[2+last] + "    "))
+
+        entries = dedup_entries(results, by=by)
+
+        if not args.get('diff'):
+            print_header(by=by)
+            print_deps(
+                sorted_entries(entries.items()),
+                args.get('depth') or 0,
+                lambda name, entry: print_entry(name, *entry[:-1]))
+        else:
+            prev_entries = dedup_entries(prev_results, by=by)
+            diff = diff_entries(prev_entries, entries)
+
+            print_header(by='%s (%d added, %d removed)' % (by,
+                sum(1 for _, old, _, _, _, _, _, _ in diff.values() if old is None),
+                sum(1 for _, _, _, new, _, _, _, _ in diff.values() if new is None)))
+            print_deps(
+                filter(
+                    lambda x: x[1][6] or args.get('all'),
+                    sorted_diff_entries(diff.items())),
+                args.get('depth') or 0,
+                lambda name, entry: print_diff_entry(name, *entry[:-1]))
+
+    def print_totals():
+        if not args.get('diff'):
+            print_entry('TOTAL', total_frame, total_limit)
+        else:
+            diff_frame = total_frame - prev_total_frame
+            diff_limit = (
+                0 if m.isinf(total_limit or 0) and m.isinf(prev_total_limit or 0)
+                    else (total_limit or 0) - (prev_total_limit or 0))
+            ratio = (
+                0.0 if m.isinf(total_limit or 0) and m.isinf(prev_total_limit or 0)
+                    else +float('inf') if m.isinf(total_limit or 0)
+                    else -float('inf') if m.isinf(prev_total_limit or 0)
+                    else 0.0 if not prev_total_limit and not total_limit
+                    else 1.0 if not prev_total_limit
+                    else ((total_limit or 0) - (prev_total_limit or 0))/(prev_total_limit or 0))
+            print_diff_entry('TOTAL',
+                prev_total_frame, prev_total_limit,
+                total_frame, total_limit,
+                diff_frame, diff_limit,
+                ratio)
+
+    if args.get('quiet'):
+        pass
+    elif args.get('summary'):
+        print_header()
+        print_totals()
+    elif args.get('files'):
+        print_entries(by='file')
+        print_totals()
+    else:
+        print_entries(by='name')
+        print_totals()
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Find stack usage at the function level.")
+    parser.add_argument('ci_paths', nargs='*', default=CI_PATHS,
+        help="Description of where to find *.ci files. May be a directory \
+            or a list of paths. Defaults to %r." % CI_PATHS)
+    parser.add_argument('-v', '--verbose', action='store_true',
+        help="Output commands that run behind the scenes.")
+    parser.add_argument('-q', '--quiet', action='store_true',
+        help="Don't show anything, useful with -o.")
+    parser.add_argument('-o', '--output',
+        help="Specify CSV file to store results.")
+    parser.add_argument('-u', '--use',
+        help="Don't parse callgraph files, instead use this CSV file.")
+    parser.add_argument('-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
+    parser.add_argument('-a', '--all', action='store_true',
+        help="Show all functions, not just the ones that changed.")
+    parser.add_argument('-A', '--everything', action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument('-s', '--limit-sort', action='store_true',
+        help="Sort by stack limit.")
+    parser.add_argument('-S', '--reverse-limit-sort', action='store_true',
+        help="Sort by stack limit, but backwards.")
+    parser.add_argument('--frame-sort', action='store_true',
+        help="Sort by stack frame size.")
+    parser.add_argument('--reverse-frame-sort', action='store_true',
+        help="Sort by stack frame size, but backwards.")
+    parser.add_argument('-L', '--depth', default=0, type=lambda x: int(x, 0),
+        nargs='?', const=float('inf'),
+        help="Depth of dependencies to show.")
+    parser.add_argument('-F', '--files', action='store_true',
+        help="Show file-level calls.")
+    parser.add_argument('-Y', '--summary', action='store_true',
+        help="Only show the total stack size.")
+    parser.add_argument('--build-dir',
+        help="Specify the relative build directory. Used to map object files \
+            to the correct source files.")
+    sys.exit(main(**vars(parser.parse_args())))
diff --git a/scripts/structs.py b/scripts/structs.py
new file mode 100755
index 00000000..e8d7193e
--- /dev/null
+++ b/scripts/structs.py
@@ -0,0 +1,331 @@
+#!/usr/bin/env python3
+#
+# Script to find struct sizes.
+#
+
+import os
+import glob
+import itertools as it
+import subprocess as sp
+import shlex
+import re
+import csv
+import collections as co
+
+
+OBJ_PATHS = ['*.o']
+
+def collect(paths, **args):
+    decl_pattern = re.compile(
+        '^\s+(?P<no>[0-9]+)'
+            '\s+(?P<dir>[0-9]+)'
+            '\s+.*'
+            '\s+(?P<file>[^\s]+)$')
+    struct_pattern = re.compile(
+        '^(?:.*DW_TAG_(?P<tag>[a-z_]+).*'
+            '|^.*DW_AT_name.*:\s*(?P<name>[^:\s]+)\s*'
+            '|^.*DW_AT_decl_file.*:\s*(?P<decl>[0-9]+)\s*'
+            '|^.*DW_AT_byte_size.*:\s*(?P<size>[0-9]+)\s*)$')
+
+    results = co.defaultdict(lambda: 0)
+    for path in paths:
+        # find decl, we want to filter by structs in .h files
+        decls = {}
+        # note objdump-tool may contain extra args
+        cmd = args['objdump_tool'] + ['--dwarf=rawline', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace')
+        for line in proc.stdout:
+            # find file numbers
+            m = decl_pattern.match(line)
+            if m:
+                decls[int(m.group('no'))] = m.group('file')
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            sys.exit(-1)
+
+        # collect structs as we parse dwarf info
+        found = False
+        name = None
+        decl = None
+        size = None
+
+        # note objdump-tool may contain extra args
+        cmd = args['objdump_tool'] + ['--dwarf=info', path]
+        if args.get('verbose'):
+            print(' '.join(shlex.quote(c) for c in cmd))
+        proc = sp.Popen(cmd,
+            stdout=sp.PIPE,
+            stderr=sp.PIPE if not args.get('verbose') else None,
+            universal_newlines=True,
+            errors='replace')
+        for line in proc.stdout:
+            # state machine here to find structs
+            m = struct_pattern.match(line)
+            if m:
+                if m.group('tag'):
+                    if (name is not None
+                            and decl is not None
+                            and size is not None):
+                        decl = decls.get(decl, '?')
+                        results[(decl, name)] = size
+                    found = (m.group('tag') == 'structure_type')
+                    name = None
+                    decl = None
+                    size = None
+                elif found and m.group('name'):
+                    name = m.group('name')
+                elif found and name and m.group('decl'):
+                    decl = int(m.group('decl'))
+                elif found and name and m.group('size'):
+                    size = int(m.group('size'))
+        proc.wait()
+        if proc.returncode != 0:
+            if not args.get('verbose'):
+                for line in proc.stderr:
+                    sys.stdout.write(line)
+            sys.exit(-1)
+
+    flat_results = []
+    for (file, struct), size in results.items():
+        # map to source files
+        if args.get('build_dir'):
+            file = re.sub('%s/*' % re.escape(args['build_dir']), '', file)
+        # only include structs declared in header files in the current
+        # directory, ignore internal-only # structs (these are represented
+        # in other measurements)
+        if not args.get('everything'):
+            if not file.endswith('.h'):
+                continue
+        # replace .o with .c, different scripts report .o/.c, we need to
+        # choose one if we want to deduplicate csv files
+        file = re.sub('\.o$', '.c', file)
+
+        flat_results.append((file, struct, size))
+
+    return flat_results
+
+
+def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
+    # find sizes
+    if not args.get('use', None):
+        # find .o files
+        paths = []
+        for path in args['obj_paths']:
+            if os.path.isdir(path):
+                path = path + '/*.o'
+
+            for path in glob.glob(path):
+                paths.append(path)
+
+        if not paths:
+            print('no .obj files found in %r?' % args['obj_paths'])
+            sys.exit(-1)
+
+        results = collect(paths, **args)
+    else:
+        with openio(args['use']) as f:
+            r = csv.DictReader(f)
+            results = [
+                (   result['file'],
+                    result['name'],
+                    int(result['struct_size']))
+                for result in r
+                if result.get('struct_size') not in {None, ''}]
+
+    total = 0
+    for _, _, size in results:
+        total += size
+
+    # find previous results?
+    if args.get('diff'):
+        try:
+            with openio(args['diff']) as f:
+                r = csv.DictReader(f)
+                prev_results = [
+                    (   result['file'],
+                        result['name'],
+                        int(result['struct_size']))
+                    for result in r
+                    if result.get('struct_size') not in {None, ''}]
+        except FileNotFoundError:
+            prev_results = []
+
+        prev_total = 0
+        for _, _, size in prev_results:
+            prev_total += size
+
+    # write results to CSV
+    if args.get('output'):
+        merged_results = co.defaultdict(lambda: {})
+        other_fields = []
+
+        # merge?
+        if args.get('merge'):
+            try:
+                with openio(args['merge']) as f:
+                    r = csv.DictReader(f)
+                    for result in r:
+                        file = result.pop('file', '')
+                        struct = result.pop('name', '')
+                        result.pop('struct_size', None)
+                        merged_results[(file, struct)] = result
+                        other_fields = result.keys()
+            except FileNotFoundError:
+                pass
+
+        for file, struct, size in results:
+            merged_results[(file, struct)]['struct_size'] = size
+
+        with openio(args['output'], 'w') as f:
+            w = csv.DictWriter(f, ['file', 'name', *other_fields, 'struct_size'])
+            w.writeheader()
+            for (file, struct), result in sorted(merged_results.items()):
+                w.writerow({'file': file, 'name': struct, **result})
+
+    # print results
+    def dedup_entries(results, by='name'):
+        entries = co.defaultdict(lambda: 0)
+        for file, struct, size in results:
+            entry = (file if by == 'file' else struct)
+            entries[entry] += size
+        return entries
+
+    def diff_entries(olds, news):
+        diff = co.defaultdict(lambda: (0, 0, 0, 0))
+        for name, new in news.items():
+            diff[name] = (0, new, new, 1.0)
+        for name, old in olds.items():
+            _, new, _, _ = diff[name]
+            diff[name] = (old, new, new-old, (new-old)/old if old else 1.0)
+        return diff
+
+    def sorted_entries(entries):
+        if args.get('size_sort'):
+            return sorted(entries, key=lambda x: (-x[1], x))
+        elif args.get('reverse_size_sort'):
+            return sorted(entries, key=lambda x: (+x[1], x))
+        else:
+            return sorted(entries)
+
+    def sorted_diff_entries(entries):
+        if args.get('size_sort'):
+            return sorted(entries, key=lambda x: (-x[1][1], x))
+        elif args.get('reverse_size_sort'):
+            return sorted(entries, key=lambda x: (+x[1][1], x))
+        else:
+            return sorted(entries, key=lambda x: (-x[1][3], x))
+
+    def print_header(by=''):
+        if not args.get('diff'):
+            print('%-36s %7s' % (by, 'size'))
+        else:
+            print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff'))
+
+    def print_entry(name, size):
+        print("%-36s %7d" % (name, size))
+
+    def print_diff_entry(name, old, new, diff, ratio):
+        print("%-36s %7s %7s %+7d%s" % (name,
+            old or "-",
+            new or "-",
+            diff,
+            ' (%+.1f%%)' % (100*ratio) if ratio else ''))
+
+    def print_entries(by='name'):
+        entries = dedup_entries(results, by=by)
+
+        if not args.get('diff'):
+            print_header(by=by)
+            for name, size in sorted_entries(entries.items()):
+                print_entry(name, size)
+        else:
+            prev_entries = dedup_entries(prev_results, by=by)
+            diff = diff_entries(prev_entries, entries)
+            print_header(by='%s (%d added, %d removed)' % (by,
+                sum(1 for old, _, _, _ in diff.values() if not old),
+                sum(1 for _, new, _, _ in diff.values() if not new)))
+            for name, (old, new, diff, ratio) in sorted_diff_entries(
+                    diff.items()):
+                if ratio or args.get('all'):
+                    print_diff_entry(name, old, new, diff, ratio)
+
+    def print_totals():
+        if not args.get('diff'):
+            print_entry('TOTAL', total)
+        else:
+            ratio = (0.0 if not prev_total and not total
+                else 1.0 if not prev_total
+                else (total-prev_total)/prev_total)
+            print_diff_entry('TOTAL',
+                prev_total, total,
+                total-prev_total,
+                ratio)
+
+    if args.get('quiet'):
+        pass
+    elif args.get('summary'):
+        print_header()
+        print_totals()
+    elif args.get('files'):
+        print_entries(by='file')
+        print_totals()
+    else:
+        print_entries(by='name')
+        print_totals()
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Find struct sizes.")
+    parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS,
+        help="Description of where to find *.o files. May be a directory \
+            or a list of paths. Defaults to %r." % OBJ_PATHS)
+    parser.add_argument('-v', '--verbose', action='store_true',
+        help="Output commands that run behind the scenes.")
+    parser.add_argument('-q', '--quiet', action='store_true',
+        help="Don't show anything, useful with -o.")
+    parser.add_argument('-o', '--output',
+        help="Specify CSV file to store results.")
+    parser.add_argument('-u', '--use',
+        help="Don't compile and find struct sizes, instead use this CSV file.")
+    parser.add_argument('-d', '--diff',
+        help="Specify CSV file to diff struct size against.")
+    parser.add_argument('-m', '--merge',
+        help="Merge with an existing CSV file when writing to output.")
+    parser.add_argument('-a', '--all', action='store_true',
+        help="Show all functions, not just the ones that changed.")
+    parser.add_argument('-A', '--everything', action='store_true',
+        help="Include builtin and libc specific symbols.")
+    parser.add_argument('-s', '--size-sort', action='store_true',
+        help="Sort by size.")
+    parser.add_argument('-S', '--reverse-size-sort', action='store_true',
+        help="Sort by size, but backwards.")
+    parser.add_argument('-F', '--files', action='store_true',
+        help="Show file-level struct sizes.")
+    parser.add_argument('-Y', '--summary', action='store_true',
+        help="Only show the total struct size.")
+    parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(),
+        help="Path to the objdump tool to use.")
+    parser.add_argument('--build-dir',
+        help="Specify the relative build directory. Used to map object files \
+            to the correct source files.")
+    sys.exit(main(**vars(parser.parse_args())))
diff --git a/scripts/summary.py b/scripts/summary.py
new file mode 100755
index 00000000..7ce769bf
--- /dev/null
+++ b/scripts/summary.py
@@ -0,0 +1,279 @@
+#!/usr/bin/env python3
+#
+# Script to summarize the outputs of other scripts. Operates on CSV files.
+#
+
+import functools as ft
+import collections as co
+import os
+import csv
+import re
+import math as m
+
+# displayable fields
+Field = co.namedtuple('Field', 'name,parse,acc,key,fmt,repr,null,ratio')
+FIELDS = [
+    # name, parse, accumulate, fmt, print, null
+    Field('code',
+        lambda r: int(r['code_size']),
+        sum,
+        lambda r: r,
+        '%7s',
+        lambda r: r,
+        '-',
+        lambda old, new: (new-old)/old),
+    Field('data',
+        lambda r: int(r['data_size']),
+        sum,
+        lambda r: r,
+        '%7s',
+        lambda r: r,
+        '-',
+        lambda old, new: (new-old)/old),
+    Field('stack',
+        lambda r: float(r['stack_limit']),
+        max,
+        lambda r: r,
+        '%7s',
+        lambda r: '∞' if m.isinf(r) else int(r),
+        '-',
+        lambda old, new: (new-old)/old),
+    Field('structs',
+        lambda r: int(r['struct_size']),
+        sum,
+        lambda r: r,
+        '%8s',
+        lambda r: r,
+        '-',
+        lambda old, new: (new-old)/old),
+    Field('coverage',
+        lambda r: (int(r['coverage_hits']), int(r['coverage_count'])),
+        lambda rs: ft.reduce(lambda a, b: (a[0]+b[0], a[1]+b[1]), rs),
+        lambda r: r[0]/r[1],
+        '%19s',
+        lambda r: '%11s %7s' % ('%d/%d' % (r[0], r[1]), '%.1f%%' % (100*r[0]/r[1])),
+        '%11s %7s' % ('-', '-'),
+        lambda old, new: ((new[0]/new[1]) - (old[0]/old[1])))
+]
+
+
+def main(**args):
+    def openio(path, mode='r'):
+        if path == '-':
+            if 'r' in mode:
+                return os.fdopen(os.dup(sys.stdin.fileno()), 'r')
+            else:
+                return os.fdopen(os.dup(sys.stdout.fileno()), 'w')
+        else:
+            return open(path, mode)
+
+    # find results
+    results = co.defaultdict(lambda: {})
+    for path in args.get('csv_paths', '-'):
+        try:
+            with openio(path) as f:
+                r = csv.DictReader(f)
+                for result in r:
+                    file = result.pop('file', '')
+                    name = result.pop('name', '')
+                    prev = results[(file, name)]
+                    for field in FIELDS:
+                        try:
+                            r = field.parse(result)
+                            if field.name in prev:
+                                results[(file, name)][field.name] = field.acc(
+                                    [prev[field.name], r])
+                            else:
+                                results[(file, name)][field.name] = r
+                        except (KeyError, ValueError):
+                            pass
+        except FileNotFoundError:
+            pass
+
+    # find fields
+    if args.get('all_fields'):
+        fields = FIELDS
+    elif args.get('fields') is not None:
+        fields_dict = {field.name: field for field in FIELDS}
+        fields = [fields_dict[f] for f in args['fields']]
+    else:
+        fields = []
+        for field in FIELDS:
+            if any(field.name in result for result in results.values()):
+                fields.append(field)
+
+    # find total for every field
+    total = {}
+    for result in results.values():
+        for field in fields:
+            if field.name in result and field.name in total:
+                total[field.name] = field.acc(
+                    [total[field.name], result[field.name]])
+            elif field.name in result:
+                total[field.name] = result[field.name]
+
+    # find previous results?
+    if args.get('diff'):
+        prev_results = co.defaultdict(lambda: {})
+        try:
+            with openio(args['diff']) as f:
+                r = csv.DictReader(f)
+                for result in r:
+                    file = result.pop('file', '')
+                    name = result.pop('name', '')
+                    prev = prev_results[(file, name)]
+                    for field in FIELDS:
+                        try:
+                            r = field.parse(result)
+                            if field.name in prev:
+                                prev_results[(file, name)][field.name] = field.acc(
+                                    [prev[field.name], r])
+                            else:
+                                prev_results[(file, name)][field.name] = r
+                        except (KeyError, ValueError):
+                            pass
+        except FileNotFoundError:
+            pass
+
+        prev_total = {}
+        for result in prev_results.values():
+            for field in fields:
+                if field.name in result and field.name in prev_total:
+                    prev_total[field.name] = field.acc(
+                        [prev_total[field.name], result[field.name]])
+                elif field.name in result:
+                    prev_total[field.name] = result[field.name]
+
+    # print results
+    def dedup_entries(results, by='name'):
+        entries = co.defaultdict(lambda: {})
+        for (file, func), result in results.items():
+            entry = (file if by == 'file' else func)
+            prev = entries[entry]
+            for field in fields:
+                if field.name in result and field.name in prev:
+                    entries[entry][field.name] = field.acc(
+                        [prev[field.name], result[field.name]])
+                elif field.name in result:
+                    entries[entry][field.name] = result[field.name]
+        return entries
+
+    def sorted_entries(entries):
+        if args.get('sort') is not None:
+            field = {field.name: field for field in FIELDS}[args['sort']]
+            return sorted(entries, key=lambda x: (
+                -(field.key(x[1][field.name])) if field.name in x[1] else -1, x))
+        elif args.get('reverse_sort') is not None:
+            field = {field.name: field for field in FIELDS}[args['reverse_sort']]
+            return sorted(entries, key=lambda x: (
+                +(field.key(x[1][field.name])) if field.name in x[1] else -1, x))
+        else:
+            return sorted(entries)
+
+    def print_header(by=''):
+        if not args.get('diff'):
+            print('%-36s' % by, end='')
+            for field in fields:
+                print((' '+field.fmt) % field.name, end='')
+            print()
+        else:
+            print('%-36s' % by, end='')
+            for field in fields:
+                print((' '+field.fmt) % field.name, end='')
+                print(' %-9s' % '', end='')
+            print()
+
+    def print_entry(name, result):
+        print('%-36s' % name, end='')
+        for field in fields:
+            r = result.get(field.name)
+            if r is not None:
+                print((' '+field.fmt) % field.repr(r), end='')
+            else:
+                print((' '+field.fmt) % '-', end='')
+        print()
+
+    def print_diff_entry(name, old, new):
+        print('%-36s' % name, end='')
+        for field in fields:
+            n = new.get(field.name)
+            if n is not None:
+                print((' '+field.fmt) % field.repr(n), end='')
+            else:
+                print((' '+field.fmt) % '-', end='')
+            o = old.get(field.name)
+            ratio = (
+                0.0 if m.isinf(o or 0) and m.isinf(n or 0)
+                    else +float('inf') if m.isinf(n or 0)
+                    else -float('inf') if m.isinf(o or 0)
+                    else 0.0 if not o and not n
+                    else +1.0 if not o
+                    else -1.0 if not n
+                    else field.ratio(o, n))
+            print(' %-9s' % (
+                '' if not ratio
+                    else '(+∞%)' if ratio > 0 and m.isinf(ratio)
+                    else '(-∞%)' if ratio < 0 and m.isinf(ratio)
+                    else '(%+.1f%%)' % (100*ratio)), end='')
+        print()
+
+    def print_entries(by='name'):
+        entries = dedup_entries(results, by=by)
+
+        if not args.get('diff'):
+            print_header(by=by)
+            for name, result in sorted_entries(entries.items()):
+                print_entry(name, result)
+        else:
+            prev_entries = dedup_entries(prev_results, by=by)
+            print_header(by='%s (%d added, %d removed)' % (by,
+                sum(1 for name in entries if name not in prev_entries),
+                sum(1 for name in prev_entries if name not in entries)))
+            for name, result in sorted_entries(entries.items()):
+                if args.get('all') or result != prev_entries.get(name, {}):
+                    print_diff_entry(name, prev_entries.get(name, {}), result)
+
+    def print_totals():
+        if not args.get('diff'):
+            print_entry('TOTAL', total)
+        else:
+            print_diff_entry('TOTAL', prev_total, total)
+
+    if args.get('summary'):
+        print_header()
+        print_totals()
+    elif args.get('files'):
+        print_entries(by='file')
+        print_totals()
+    else:
+        print_entries(by='name')
+        print_totals()
+
+
+if __name__ == "__main__":
+    import argparse
+    import sys
+    parser = argparse.ArgumentParser(
+        description="Summarize measurements")
+    parser.add_argument('csv_paths', nargs='*', default='-',
+        help="Description of where to find *.csv files. May be a directory \
+            or list of paths. *.csv files will be merged to show the total \
+            coverage.")
+    parser.add_argument('-d', '--diff',
+        help="Specify CSV file to diff against.")
+    parser.add_argument('-a', '--all', action='store_true',
+        help="Show all objects, not just the ones that changed.")
+    parser.add_argument('-e', '--all-fields', action='store_true',
+        help="Show all fields, even those with no results.")
+    parser.add_argument('-f', '--fields', type=lambda x: re.split('\s*,\s*', x),
+        help="Comma separated list of fields to print, by default all fields \
+            that are found in the CSV files are printed.")
+    parser.add_argument('-s', '--sort',
+        help="Sort by this field.")
+    parser.add_argument('-S', '--reverse-sort',
+        help="Sort by this field, but backwards.")
+    parser.add_argument('-F', '--files', action='store_true',
+        help="Show file-level calls.")
+    parser.add_argument('-Y', '--summary', action='store_true',
+        help="Only show the totals.")
+    sys.exit(main(**vars(parser.parse_args())))
diff --git a/scripts/test.py b/scripts/test.py
index f8051f0d..92a13b1d 100755
--- a/scripts/test.py
+++ b/scripts/test.py
@@ -784,10 +784,13 @@ def main(**args):
             stdout=sp.PIPE if not args.get('verbose') else None,
             stderr=sp.STDOUT if not args.get('verbose') else None,
             universal_newlines=True)
+        stdout = []
+        for line in proc.stdout:
+            stdout.append(line)
         proc.wait()
         if proc.returncode != 0:
             if not args.get('verbose'):
-                for line in proc.stdout:
+                for line in stdout:
                     sys.stdout.write(line)
             sys.exit(-1)
 
@@ -803,9 +806,9 @@ def main(**args):
             failure.case.test(failure=failure, **args)
             sys.exit(0)
 
-    print('tests passed %d/%d (%.2f%%)' % (passed, total,
+    print('tests passed %d/%d (%.1f%%)' % (passed, total,
         100*(passed/total if total else 1.0)))
-    print('tests failed %d/%d (%.2f%%)' % (failed, total,
+    print('tests failed %d/%d (%.1f%%)' % (failed, total,
         100*(failed/total if total else 1.0)))
     return 1 if failed > 0 else 0