diff --git a/.github/workflows/post-release.yml b/.github/workflows/post-release.yml index da539c35..a44a675d 100644 --- a/.github/workflows/post-release.yml +++ b/.github/workflows/post-release.yml @@ -6,7 +6,7 @@ on: jobs: post-release: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: # trigger post-release in dependency repo, this indirection allows the # dependency repo to be updated often without affecting this repo. At diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a1a1a436..c38b8de6 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -7,7 +7,7 @@ on: jobs: release: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 # need to manually check for a couple things # - tests passed? @@ -73,89 +73,70 @@ jobs: # previous results to compare against? [ -n "$LFS_PREV_VERSION" ] && curl -sS \ "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/` - `status/$LFS_PREV_VERSION" \ + `status/$LFS_PREV_VERSION?per_page=100" \ | jq -re 'select(.sha != env.GITHUB_SHA) | .statuses[]' \ >> prev-results.json \ || true - # unfortunately these each have their own format - [ -e results/code-thumb.csv ] && ( \ - export PREV="$(jq -re ' - select(.context == "results / code").description - | capture("Code size is (?[0-9]+)").result' \ - prev-results.json || echo 0)" - ./scripts/code.py -u results/code-thumb.csv -s | awk ' - NR==2 {printf "Code size,%d B",$2} - NR==2 && ENVIRON["PREV"]+0 != 0 { - printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]} - NR==2 {printf "\n"}' \ - >> results.csv) - [ -e results/code-thumb-readonly.csv ] && ( \ - export PREV="$(jq -re ' - select(.context == "results / code (readonly)").description - | capture("Code size is (?[0-9]+)").result' \ - prev-results.json || echo 0)" - ./scripts/code.py -u results/code-thumb-readonly.csv -s | awk ' - NR==2 {printf "Code size
(readonly),%d B",$2} - NR==2 && ENVIRON["PREV"]+0 != 0 { - printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]} - NR==2 {printf "\n"}' \ - >> results.csv) - [ -e results/code-thumb-threadsafe.csv ] && ( \ - export PREV="$(jq -re ' - select(.context == "results / code (threadsafe)").description - | capture("Code size is (?[0-9]+)").result' \ - prev-results.json || echo 0)" - ./scripts/code.py -u results/code-thumb-threadsafe.csv -s | awk ' - NR==2 {printf "Code size
(threadsafe),%d B",$2} - NR==2 && ENVIRON["PREV"]+0 != 0 { - printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]} - NR==2 {printf "\n"}' \ - >> results.csv) - [ -e results/code-thumb-migrate.csv ] && ( \ - export PREV="$(jq -re ' - select(.context == "results / code (migrate)").description - | capture("Code size is (?[0-9]+)").result' \ - prev-results.json || echo 0)" - ./scripts/code.py -u results/code-thumb-migrate.csv -s | awk ' - NR==2 {printf "Code size
(migrate),%d B",$2} - NR==2 && ENVIRON["PREV"]+0 != 0 { - printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]} - NR==2 {printf "\n"}' \ - >> results.csv) - [ -e results/code-thumb-error-asserts.csv ] && ( \ - export PREV="$(jq -re ' - select(.context == "results / code (error-asserts)").description - | capture("Code size is (?[0-9]+)").result' \ - prev-results.json || echo 0)" - ./scripts/code.py -u results/code-thumb-error-asserts.csv -s | awk ' - NR==2 {printf "Code size
(error-asserts),%d B",$2} - NR==2 && ENVIRON["PREV"]+0 != 0 { - printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]} - NR==2 {printf "\n"}' \ - >> results.csv) - [ -e results/coverage.csv ] && ( \ - export PREV="$(jq -re ' - select(.context == "results / coverage").description - | capture("Coverage is (?[0-9\\.]+)").result' \ - prev-results.json || echo 0)" - ./scripts/coverage.py -u results/coverage.csv -s | awk -F '[ /%]+' ' - NR==2 {printf "Coverage,%.1f%% of %d lines",$4,$3} - NR==2 && ENVIRON["PREV"]+0 != 0 { - printf " (%+.1f%%)",$4-ENVIRON["PREV"]} - NR==2 {printf "\n"}' \ - >> results.csv) - - # transpose to GitHub table - [ -e results.csv ] || exit 0 - awk -F ',' ' - {label[NR]=$1; value[NR]=$2} - END { - for (r=1; r<=NR; r++) {printf "| %s ",label[r]}; printf "|\n"; - for (r=1; r<=NR; r++) {printf "|:--"}; printf "|\n"; - for (r=1; r<=NR; r++) {printf "| %s ",value[r]}; printf "|\n"}' \ - results.csv > results.txt - echo "RESULTS:" + # build table for GitHub + echo "" >> results.txt + echo "" >> results.txt + echo "" >> results.txt + echo "" >> results.txt + for r in Code Stack Structs Coverage + do + echo "" >> results.txt + done + echo "" >> results.txt + echo "" >> results.txt + + echo "" >> results.txt + for c in "" readonly threadsafe migrate error-asserts + do + echo "" >> results.txt + c_or_default=${c:-default} + echo "" >> results.txt + for r in code stack structs + do + # per-config results + echo "" >> results.txt + done + # coverage results + if [ -z $c ] + then + echo "" >> results.txt + fi + echo "" >> results.txt + done + echo "" >> results.txt + echo "
Configuration$r
${c_or_default^}" >> results.txt + [ -e results/thumb${c:+-$c}.csv ] && ( \ + export PREV="$(jq -re ' + select(.context == "'"results (thumb${c:+, $c}) / $r"'").description + | capture("(?[0-9∞]+)").result' \ + prev-results.json || echo 0)" + ./scripts/summary.py results/thumb${c:+-$c}.csv -f $r -Y | awk ' + NR==2 {printf "%s B",$2} + NR==2 && ENVIRON["PREV"]+0 != 0 { + printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]} + NR==2 {printf "\n"}' \ + | sed -e 's/ /\ /g' \ + >> results.txt) + echo "" >> results.txt + [ -e results/coverage.csv ] && ( \ + export PREV="$(jq -re ' + select(.context == "results / coverage").description + | capture("(?[0-9\\.]+)").result' \ + prev-results.json || echo 0)" + ./scripts/coverage.py -u results/coverage.csv -Y | awk -F '[ /%]+' ' + NR==2 {printf "%.1f%% of %d lines",$4,$3} + NR==2 && ENVIRON["PREV"]+0 != 0 { + printf " (%+.1f%%)",$4-ENVIRON["PREV"]} + NR==2 {printf "\n"}' \ + | sed -e 's/ /\ /g' \ + >> results.txt) + echo "
" >> results.txt + cat results.txt # find changes from history diff --git a/.github/workflows/status.yml b/.github/workflows/status.yml index 7bd851a2..d28b17cc 100644 --- a/.github/workflows/status.yml +++ b/.github/workflows/status.yml @@ -6,7 +6,7 @@ on: jobs: status: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: # custom statuses? - uses: dawidd6/action-download-artifact@v2 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6d633f8e..bc8bb0c6 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -8,7 +8,7 @@ env: jobs: # run tests test: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 strategy: fail-fast: false matrix: @@ -18,11 +18,27 @@ jobs: - uses: actions/checkout@v2 - name: install run: | - # need toml, also pip3 isn't installed by default? + # need a few additional tools + # + # note this includes gcc-10, which is required for -fcallgraph-info=su sudo apt-get update -qq - sudo apt-get install -qq python3 python3-pip lcov + sudo apt-get install -qq gcc-10 python3 python3-pip lcov sudo pip3 install toml - gcc --version + echo "CC=gcc-10" >> $GITHUB_ENV + gcc-10 --version + lcov --version + python3 --version + + # need newer lcov version for gcc-10 + #sudo apt-get remove lcov + #wget https://launchpad.net/ubuntu/+archive/primary/+files/lcov_1.15-1_all.deb + #sudo apt install ./lcov_1.15-1_all.deb + #lcov --version + #which lcov + #ls -lha /usr/bin/lcov + wget https://github.com/linux-test-project/lcov/releases/download/v1.15/lcov-1.15.tar.gz + tar xf lcov-1.15.tar.gz + sudo make -C lcov-1.15 install # setup a ram-backed disk to speed up reentrant tests mkdir disks @@ -41,36 +57,36 @@ jobs: if: ${{matrix.arch == 'thumb'}} run: | sudo apt-get install -qq \ - gcc-arm-linux-gnueabi \ + gcc-10-arm-linux-gnueabi \ libc6-dev-armel-cross \ qemu-user - echo "CC=arm-linux-gnueabi-gcc -mthumb --static" >> $GITHUB_ENV + echo "CC=arm-linux-gnueabi-gcc-10 -mthumb --static" >> $GITHUB_ENV echo "EXEC=qemu-arm" >> $GITHUB_ENV - arm-linux-gnueabi-gcc --version + arm-linux-gnueabi-gcc-10 --version qemu-arm -version # cross-compile with MIPS (32-bit, big-endian) - name: install-mips if: ${{matrix.arch == 'mips'}} run: | sudo apt-get install -qq \ - gcc-mips-linux-gnu \ + gcc-10-mips-linux-gnu \ libc6-dev-mips-cross \ qemu-user - echo "CC=mips-linux-gnu-gcc --static" >> $GITHUB_ENV + echo "CC=mips-linux-gnu-gcc-10 --static" >> $GITHUB_ENV echo "EXEC=qemu-mips" >> $GITHUB_ENV - mips-linux-gnu-gcc --version + mips-linux-gnu-gcc-10 --version qemu-mips -version # cross-compile with PowerPC (32-bit, big-endian) - name: install-powerpc if: ${{matrix.arch == 'powerpc'}} run: | sudo apt-get install -qq \ - gcc-powerpc-linux-gnu \ + gcc-10-powerpc-linux-gnu \ libc6-dev-powerpc-cross \ qemu-user - echo "CC=powerpc-linux-gnu-gcc --static" >> $GITHUB_ENV + echo "CC=powerpc-linux-gnu-gcc-10 --static" >> $GITHUB_ENV echo "EXEC=qemu-ppc" >> $GITHUB_ENV - powerpc-linux-gnu-gcc --version + powerpc-linux-gnu-gcc-10 --version qemu-ppc -version # make sure example can at least compile @@ -148,102 +164,108 @@ jobs: retention-days: 1 # update results - - name: results-code + - name: results run: | mkdir -p results make clean - make code \ + make lfs.csv \ CFLAGS+=" \ -DLFS_NO_ASSERT \ -DLFS_NO_DEBUG \ -DLFS_NO_WARN \ - -DLFS_NO_ERROR" \ - CODEFLAGS+="-o results/code-${{matrix.arch}}.csv" - - name: results-code-readonly + -DLFS_NO_ERROR" + cp lfs.csv results/${{matrix.arch}}.csv + ./scripts/summary.py results/${{matrix.arch}}.csv + - name: results-readonly run: | mkdir -p results make clean - make code \ + make lfs.csv \ CFLAGS+=" \ -DLFS_NO_ASSERT \ -DLFS_NO_DEBUG \ -DLFS_NO_WARN \ -DLFS_NO_ERROR \ - -DLFS_READONLY" \ - CODEFLAGS+="-o results/code-${{matrix.arch}}-readonly.csv" - - name: results-code-threadsafe + -DLFS_READONLY" + cp lfs.csv results/${{matrix.arch}}-readonly.csv + ./scripts/summary.py results/${{matrix.arch}}-readonly.csv + - name: results-threadsafe run: | mkdir -p results make clean - make code \ + make lfs.csv \ CFLAGS+=" \ -DLFS_NO_ASSERT \ -DLFS_NO_DEBUG \ -DLFS_NO_WARN \ -DLFS_NO_ERROR \ - -DLFS_THREADSAFE" \ - CODEFLAGS+="-o results/code-${{matrix.arch}}-threadsafe.csv" - - name: results-code-migrate + -DLFS_THREADSAFE" + cp lfs.csv results/${{matrix.arch}}-threadsafe.csv + ./scripts/summary.py results/${{matrix.arch}}-threadsafe.csv + - name: results-migrate run: | mkdir -p results make clean - make code \ + make lfs.csv \ CFLAGS+=" \ -DLFS_NO_ASSERT \ -DLFS_NO_DEBUG \ -DLFS_NO_WARN \ -DLFS_NO_ERROR \ - -DLFS_MIGRATE" \ - CODEFLAGS+="-o results/code-${{matrix.arch}}-migrate.csv" - - name: results-code-error-asserts + -DLFS_MIGRATE" + cp lfs.csv results/${{matrix.arch}}-migrate.csv + ./scripts/summary.py results/${{matrix.arch}}-migrate.csv + - name: results-error-asserts run: | mkdir -p results make clean - make code \ + make lfs.csv \ CFLAGS+=" \ -DLFS_NO_DEBUG \ -DLFS_NO_WARN \ -DLFS_NO_ERROR \ - -D'LFS_ASSERT(test)=do {if(!(test)) {return -1;}} while(0)'" \ - CODEFLAGS+="-o results/code-${{matrix.arch}}-error-asserts.csv" + -D'LFS_ASSERT(test)=do {if(!(test)) {return -1;}} while(0)'" + cp lfs.csv results/${{matrix.arch}}-error-asserts.csv + ./scripts/summary.py results/${{matrix.arch}}-error-asserts.csv - name: upload-results uses: actions/upload-artifact@v2 with: name: results path: results - # limit reporting to Thumb, otherwise there would be too many numbers - # flying around for the results to be easily readable + + # create statuses with results - name: collect-status - if: ${{matrix.arch == 'thumb'}} run: | mkdir -p status - for f in $(shopt -s nullglob ; echo results/code*.csv) + for f in $(shopt -s nullglob ; echo results/*.csv) do - export STEP="results-code$( - echo $f | sed -n 's/.*code-.*-\(.*\).csv/-\1/p')" - export CONTEXT="results / code$( - echo $f | sed -n 's/.*code-.*-\(.*\).csv/ (\1)/p')" - export PREV="$(curl -sS \ - "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/status/master" \ - | jq -re 'select(.sha != env.GITHUB_SHA) | .statuses[] - | select(.context == env.CONTEXT).description - | capture("Code size is (?[0-9]+)").result' \ - || echo 0)" - export DESCRIPTION="$(./scripts/code.py -u $f -s | awk ' - NR==2 {printf "Code size is %d B",$2} - NR==2 && ENVIRON["PREV"]+0 != 0 { - printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}')" - jq -n '{ - state: "success", - context: env.CONTEXT, - description: env.DESCRIPTION, - target_job: "${{github.job}} (${{matrix.arch}})", - target_step: env.STEP}' \ - | tee status/code$( - echo $f | sed -n 's/.*code-.*-\(.*\).csv/-\1/p').json + export STEP="results$( + echo $f | sed -n 's/[^-]*-\(.*\).csv/-\1/p')" + for r in code stack structs + do + export CONTEXT="results (${{matrix.arch}}$( + echo $f | sed -n 's/[^-]*-\(.*\).csv/, \1/p')) / $r" + export PREV="$(curl -sS \ + "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/status/master?per_page=100" \ + | jq -re 'select(.sha != env.GITHUB_SHA) | .statuses[] + | select(.context == env.CONTEXT).description + | capture("(?[0-9∞]+)").result' \ + || echo 0)" + export DESCRIPTION="$(./scripts/summary.py $f -f $r -Y | awk ' + NR==2 {printf "%s B",$2} + NR==2 && ENVIRON["PREV"]+0 != 0 { + printf " (%+.1f%%)",100*($2-ENVIRON["PREV"])/ENVIRON["PREV"]}')" + jq -n '{ + state: "success", + context: env.CONTEXT, + description: env.DESCRIPTION, + target_job: "${{github.job}} (${{matrix.arch}})", + target_step: env.STEP}' \ + | tee status/$r-${{matrix.arch}}$( + echo $f | sed -n 's/[^-]*-\(.*\).csv/-\1/p').json + done done - name: upload-status - if: ${{matrix.arch == 'thumb'}} uses: actions/upload-artifact@v2 with: name: status @@ -252,7 +274,7 @@ jobs: # run under Valgrind to check for memory errors valgrind: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - name: install @@ -272,7 +294,7 @@ jobs: # self-host with littlefs-fuse for a fuzz-like test fuse: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 if: ${{!endsWith(github.ref, '-prefix')}} steps: - uses: actions/checkout@v2 @@ -297,16 +319,18 @@ jobs: # setup disk for littlefs-fuse mkdir mount - sudo chmod a+rw /dev/loop0 + LOOP=$(sudo losetup -f) + sudo chmod a+rw $LOOP dd if=/dev/zero bs=512 count=128K of=disk - losetup /dev/loop0 disk + losetup $LOOP disk + echo "LOOP=$LOOP" >> $GITHUB_ENV - name: test run: | # self-host test make -C littlefs-fuse - littlefs-fuse/lfs --format /dev/loop0 - littlefs-fuse/lfs /dev/loop0 mount + littlefs-fuse/lfs --format $LOOP + littlefs-fuse/lfs $LOOP mount ls mount mkdir mount/littlefs @@ -318,7 +342,7 @@ jobs: # test migration using littlefs-fuse migrate: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 if: ${{!endsWith(github.ref, '-prefix')}} steps: - uses: actions/checkout@v2 @@ -348,9 +372,11 @@ jobs: # setup disk for littlefs-fuse mkdir mount - sudo chmod a+rw /dev/loop0 + LOOP=$(sudo losetup -f) + sudo chmod a+rw $LOOP dd if=/dev/zero bs=512 count=128K of=disk - losetup /dev/loop0 disk + losetup $LOOP disk + echo "LOOP=$LOOP" >> $GITHUB_ENV - name: test run: | # compile v1 and v2 @@ -358,8 +384,8 @@ jobs: make -C v2 # run self-host test with v1 - v1/lfs --format /dev/loop0 - v1/lfs /dev/loop0 mount + v1/lfs --format $LOOP + v1/lfs $LOOP mount ls mount mkdir mount/littlefs @@ -373,8 +399,8 @@ jobs: cd ../.. fusermount -u mount - v2/lfs --migrate /dev/loop0 - v2/lfs /dev/loop0 mount + v2/lfs --migrate $LOOP + v2/lfs $LOOP mount # run self-host test with v2 right where we left off ls mount @@ -385,7 +411,7 @@ jobs: # collect coverage info coverage: - runs-on: ubuntu-18.04 + runs-on: ubuntu-20.04 needs: [test] steps: - uses: actions/checkout@v2 @@ -421,14 +447,14 @@ jobs: export STEP="results-coverage" export CONTEXT="results / coverage" export PREV="$(curl -sS \ - "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/status/master" \ + "$GITHUB_API_URL/repos/$GITHUB_REPOSITORY/status/master?per_page=100" \ | jq -re 'select(.sha != env.GITHUB_SHA) | .statuses[] | select(.context == env.CONTEXT).description - | capture("Coverage is (?[0-9\\.]+)").result' \ + | capture("(?[0-9\\.]+)").result' \ || echo 0)" export DESCRIPTION="$( - ./scripts/coverage.py -u results/coverage.csv -s | awk -F '[ /%]+' ' - NR==2 {printf "Coverage is %.1f%% of %d lines",$4,$3} + ./scripts/coverage.py -u results/coverage.csv -Y | awk -F '[ /%]+' ' + NR==2 {printf "%.1f%% of %d lines",$4,$3} NR==2 && ENVIRON["PREV"]+0 != 0 { printf " (%+.1f%%)",$4-ENVIRON["PREV"]}')" jq -n '{ diff --git a/.gitignore b/.gitignore index a6ebc4c3..3f7b860e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,8 @@ *.o *.d *.a +*.ci +*.csv # Testing things blocks/ diff --git a/Makefile b/Makefile index 763a0cee..7cc59f8a 100644 --- a/Makefile +++ b/Makefile @@ -17,44 +17,63 @@ TARGET ?= $(BUILDDIR)lfs.a endif -CC ?= gcc -AR ?= ar -SIZE ?= size -CTAGS ?= ctags -NM ?= nm -LCOV ?= lcov +CC ?= gcc +AR ?= ar +SIZE ?= size +CTAGS ?= ctags +NM ?= nm +OBJDUMP ?= objdump +LCOV ?= lcov SRC ?= $(wildcard *.c) OBJ := $(SRC:%.c=$(BUILDDIR)%.o) DEP := $(SRC:%.c=$(BUILDDIR)%.d) ASM := $(SRC:%.c=$(BUILDDIR)%.s) +CGI := $(SRC:%.c=$(BUILDDIR)%.ci) ifdef DEBUG -override CFLAGS += -O0 -g3 +override CFLAGS += -O0 else override CFLAGS += -Os endif ifdef TRACE override CFLAGS += -DLFS_YES_TRACE endif +override CFLAGS += -g3 override CFLAGS += -I. override CFLAGS += -std=c99 -Wall -pedantic override CFLAGS += -Wextra -Wshadow -Wjump-misses-init -Wundef ifdef VERBOSE -override TESTFLAGS += -v -override CODEFLAGS += -v +override TESTFLAGS += -v +override CALLSFLAGS += -v +override CODEFLAGS += -v +override DATAFLAGS += -v +override STACKFLAGS += -v +override STRUCTSFLAGS += -v override COVERAGEFLAGS += -v endif ifdef EXEC override TESTFLAGS += --exec="$(EXEC)" endif +ifdef COVERAGE +override TESTFLAGS += --coverage +endif ifdef BUILDDIR -override TESTFLAGS += --build-dir="$(BUILDDIR:/=)" -override CODEFLAGS += --build-dir="$(BUILDDIR:/=)" +override TESTFLAGS += --build-dir="$(BUILDDIR:/=)" +override CALLSFLAGS += --build-dir="$(BUILDDIR:/=)" +override CODEFLAGS += --build-dir="$(BUILDDIR:/=)" +override DATAFLAGS += --build-dir="$(BUILDDIR:/=)" +override STACKFLAGS += --build-dir="$(BUILDDIR:/=)" +override STRUCTSFLAGS += --build-dir="$(BUILDDIR:/=)" +override COVERAGEFLAGS += --build-dir="$(BUILDDIR:/=)" endif ifneq ($(NM),nm) override CODEFLAGS += --nm-tool="$(NM)" +override DATAFLAGS += --nm-tool="$(NM)" +endif +ifneq ($(OBJDUMP),objdump) +override STRUCTSFLAGS += --objdump-tool="$(OBJDUMP)" endif @@ -73,9 +92,9 @@ size: $(OBJ) tags: $(CTAGS) --totals --c-types=+p $(shell find -H -name '*.h') $(SRC) -.PHONY: code -code: $(OBJ) - ./scripts/code.py $^ $(CODEFLAGS) +.PHONY: calls +calls: $(CGI) + ./scripts/calls.py $^ $(CALLSFLAGS) .PHONY: test test: @@ -84,9 +103,30 @@ test: test%: tests/test$$(firstword $$(subst \#, ,%)).toml ./scripts/test.py $@ $(TESTFLAGS) +.PHONY: code +code: $(OBJ) + ./scripts/code.py $^ -S $(CODEFLAGS) + +.PHONY: data +data: $(OBJ) + ./scripts/data.py $^ -S $(DATAFLAGS) + +.PHONY: stack +stack: $(CGI) + ./scripts/stack.py $^ -S $(STACKFLAGS) + +.PHONY: structs +structs: $(OBJ) + ./scripts/structs.py $^ -S $(STRUCTSFLAGS) + .PHONY: coverage coverage: - ./scripts/coverage.py $(BUILDDIR)tests/*.toml.info $(COVERAGEFLAGS) + ./scripts/coverage.py $(BUILDDIR)tests/*.toml.info -s $(COVERAGEFLAGS) + +.PHONY: summary +summary: $(BUILDDIR)lfs.csv + ./scripts/summary.py -Y $^ $(SUMMARYFLAGS) + # rules -include $(DEP) @@ -95,20 +135,39 @@ coverage: $(BUILDDIR)lfs: $(OBJ) $(CC) $(CFLAGS) $^ $(LFLAGS) -o $@ -$(BUILDDIR)%.a: $(OBJ) +$(BUILDDIR)lfs.a: $(OBJ) $(AR) rcs $@ $^ +$(BUILDDIR)lfs.csv: $(OBJ) $(CGI) + ./scripts/code.py $(OBJ) -q $(CODEFLAGS) -o $@ + ./scripts/data.py $(OBJ) -q -m $@ $(DATAFLAGS) -o $@ + ./scripts/stack.py $(CGI) -q -m $@ $(STACKFLAGS) -o $@ + ./scripts/structs.py $(OBJ) -q -m $@ $(STRUCTSFLAGS) -o $@ + $(if $(COVERAGE),\ + ./scripts/coverage.py $(BUILDDIR)tests/*.toml.info \ + -q -m $@ $(COVERAGEFLAGS) -o $@) + $(BUILDDIR)%.o: %.c $(CC) -c -MMD $(CFLAGS) $< -o $@ $(BUILDDIR)%.s: %.c $(CC) -S $(CFLAGS) $< -o $@ +# gcc depends on the output file for intermediate file names, so +# we can't omit to .o output. We also need to serialize with the +# normal .o rule because otherwise we can end up with multiprocess +# problems with two instances of gcc modifying the same .o +$(BUILDDIR)%.ci: %.c | $(BUILDDIR)%.o + $(CC) -c -MMD -fcallgraph-info=su $(CFLAGS) $< -o $| + # clean everything .PHONY: clean clean: - rm -f $(TARGET) + rm -f $(BUILDDIR)lfs + rm -f $(BUILDDIR)lfs.a + rm -f $(BUILDDIR)lfs.csv rm -f $(OBJ) + rm -f $(CGI) rm -f $(DEP) rm -f $(ASM) rm -f $(BUILDDIR)tests/*.toml.* diff --git a/lfs.c b/lfs.c index 4a2f050c..65d2ff4a 100644 --- a/lfs.c +++ b/lfs.c @@ -8,9 +8,24 @@ #include "lfs.h" #include "lfs_util.h" + +// some constants used throughout the code #define LFS_BLOCK_NULL ((lfs_block_t)-1) #define LFS_BLOCK_INLINE ((lfs_block_t)-2) +enum { + LFS_OK_RELOCATED = 1, + LFS_OK_DROPPED = 2, + LFS_OK_ORPHANED = 3, +}; + +enum { + LFS_CMP_EQ = 0, + LFS_CMP_LT = 1, + LFS_CMP_GT = 2, +}; + + /// Caching block device operations /// static inline void lfs_cache_drop(lfs_t *lfs, lfs_cache_t *rcache) { @@ -109,12 +124,6 @@ static int lfs_bd_read(lfs_t *lfs, return 0; } -enum { - LFS_CMP_EQ = 0, - LFS_CMP_LT = 1, - LFS_CMP_GT = 2, -}; - static int lfs_bd_cmp(lfs_t *lfs, const lfs_cache_t *pcache, lfs_cache_t *rcache, lfs_size_t hint, lfs_block_t block, lfs_off_t off, @@ -472,13 +481,15 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, static int lfs_dir_compact(lfs_t *lfs, lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount, lfs_mdir_t *source, uint16_t begin, uint16_t end); - +static lfs_ssize_t lfs_file_flushedwrite(lfs_t *lfs, lfs_file_t *file, + const void *buffer, lfs_size_t size); static lfs_ssize_t lfs_file_rawwrite(lfs_t *lfs, lfs_file_t *file, const void *buffer, lfs_size_t size); static int lfs_file_rawsync(lfs_t *lfs, lfs_file_t *file); static int lfs_file_outline(lfs_t *lfs, lfs_file_t *file); static int lfs_file_flush(lfs_t *lfs, lfs_file_t *file); +static int lfs_fs_deorphan(lfs_t *lfs, bool powerloss); static int lfs_fs_preporphans(lfs_t *lfs, int8_t orphans); static void lfs_fs_prepmove(lfs_t *lfs, uint16_t id, const lfs_block_t pair[2]); @@ -486,8 +497,6 @@ static int lfs_fs_pred(lfs_t *lfs, const lfs_block_t dir[2], lfs_mdir_t *pdir); static lfs_stag_t lfs_fs_parent(lfs_t *lfs, const lfs_block_t dir[2], lfs_mdir_t *parent); -static int lfs_fs_relocate(lfs_t *lfs, - const lfs_block_t oldpair[2], lfs_block_t newpair[2]); static int lfs_fs_forceconsistency(lfs_t *lfs); #endif @@ -498,6 +507,8 @@ static int lfs1_traverse(lfs_t *lfs, static int lfs_dir_rawrewind(lfs_t *lfs, lfs_dir_t *dir); +static lfs_ssize_t lfs_file_flushedread(lfs_t *lfs, lfs_file_t *file, + void *buffer, lfs_size_t size); static lfs_ssize_t lfs_file_rawread(lfs_t *lfs, lfs_file_t *file, void *buffer, lfs_size_t size); static int lfs_file_rawclose(lfs_t *lfs, lfs_file_t *file); @@ -738,6 +749,7 @@ static int lfs_dir_traverse_filter(void *p, (LFS_MKTAG(0x7ff, 0x3ff, 0) & tag) == ( LFS_MKTAG(LFS_TYPE_DELETE, 0, 0) | (LFS_MKTAG(0, 0x3ff, 0) & *filtertag))) { + *filtertag = LFS_MKTAG(LFS_FROM_NOOP, 0, 0); return true; } @@ -752,131 +764,228 @@ static int lfs_dir_traverse_filter(void *p, #endif #ifndef LFS_READONLY +// maximum recursive depth of lfs_dir_traverse, the deepest call: +// +// traverse with commit +// '-> traverse with filter +// '-> traverse with move +// ' traverse with filter +// +#define LFS_DIR_TRAVERSE_DEPTH 4 + +struct lfs_dir_traverse { + const lfs_mdir_t *dir; + lfs_off_t off; + lfs_tag_t ptag; + const struct lfs_mattr *attrs; + int attrcount; + + lfs_tag_t tmask; + lfs_tag_t ttag; + uint16_t begin; + uint16_t end; + int16_t diff; + + int (*cb)(void *data, lfs_tag_t tag, const void *buffer); + void *data; + + lfs_tag_t tag; + const void *buffer; + struct lfs_diskoff disk; +}; + static int lfs_dir_traverse(lfs_t *lfs, const lfs_mdir_t *dir, lfs_off_t off, lfs_tag_t ptag, const struct lfs_mattr *attrs, int attrcount, lfs_tag_t tmask, lfs_tag_t ttag, uint16_t begin, uint16_t end, int16_t diff, int (*cb)(void *data, lfs_tag_t tag, const void *buffer), void *data) { + // This function in inherently recursive, but bounded. To allow tool-based + // analysis without unnecessary code-cost we use an explicit stack + struct lfs_dir_traverse stack[LFS_DIR_TRAVERSE_DEPTH-1]; + unsigned sp = 0; + int res; + // iterate over directory and attrs + lfs_tag_t tag; + const void *buffer; + struct lfs_diskoff disk; while (true) { - lfs_tag_t tag; - const void *buffer; - struct lfs_diskoff disk; - if (off+lfs_tag_dsize(ptag) < dir->off) { - off += lfs_tag_dsize(ptag); - int err = lfs_bd_read(lfs, - NULL, &lfs->rcache, sizeof(tag), - dir->pair[0], off, &tag, sizeof(tag)); - if (err) { - return err; - } - - tag = (lfs_frombe32(tag) ^ ptag) | 0x80000000; - disk.block = dir->pair[0]; - disk.off = off+sizeof(lfs_tag_t); - buffer = &disk; - ptag = tag; - } else if (attrcount > 0) { - tag = attrs[0].tag; - buffer = attrs[0].buffer; - attrs += 1; - attrcount -= 1; - } else { - return 0; - } - - lfs_tag_t mask = LFS_MKTAG(0x7ff, 0, 0); - if ((mask & tmask & tag) != (mask & tmask & ttag)) { - continue; - } + { + if (off+lfs_tag_dsize(ptag) < dir->off) { + off += lfs_tag_dsize(ptag); + int err = lfs_bd_read(lfs, + NULL, &lfs->rcache, sizeof(tag), + dir->pair[0], off, &tag, sizeof(tag)); + if (err) { + return err; + } - // do we need to filter? inlining the filtering logic here allows - // for some minor optimizations - if (lfs_tag_id(tmask) != 0) { - // scan for duplicates and update tag based on creates/deletes - int filter = lfs_dir_traverse(lfs, - dir, off, ptag, attrs, attrcount, - 0, 0, 0, 0, 0, - lfs_dir_traverse_filter, &tag); - if (filter < 0) { - return filter; + tag = (lfs_frombe32(tag) ^ ptag) | 0x80000000; + disk.block = dir->pair[0]; + disk.off = off+sizeof(lfs_tag_t); + buffer = &disk; + ptag = tag; + } else if (attrcount > 0) { + tag = attrs[0].tag; + buffer = attrs[0].buffer; + attrs += 1; + attrcount -= 1; + } else { + // finished traversal, pop from stack? + res = 0; + break; } - if (filter) { + // do we need to filter? + lfs_tag_t mask = LFS_MKTAG(0x7ff, 0, 0); + if ((mask & tmask & tag) != (mask & tmask & ttag)) { continue; } - // in filter range? - if (!(lfs_tag_id(tag) >= begin && lfs_tag_id(tag) < end)) { + if (lfs_tag_id(tmask) != 0) { + LFS_ASSERT(sp < LFS_DIR_TRAVERSE_DEPTH); + // recurse, scan for duplicates, and update tag based on + // creates/deletes + stack[sp] = (struct lfs_dir_traverse){ + .dir = dir, + .off = off, + .ptag = ptag, + .attrs = attrs, + .attrcount = attrcount, + .tmask = tmask, + .ttag = ttag, + .begin = begin, + .end = end, + .diff = diff, + .cb = cb, + .data = data, + .tag = tag, + .buffer = buffer, + .disk = disk, + }; + sp += 1; + + dir = dir; + off = off; + ptag = ptag; + attrs = attrs; + attrcount = attrcount; + tmask = 0; + ttag = 0; + begin = 0; + end = 0; + diff = 0; + cb = lfs_dir_traverse_filter; + data = &stack[sp-1].tag; continue; } } +popped: + // in filter range? + if (lfs_tag_id(tmask) != 0 && + !(lfs_tag_id(tag) >= begin && lfs_tag_id(tag) < end)) { + continue; + } + // handle special cases for mcu-side operations if (lfs_tag_type3(tag) == LFS_FROM_NOOP) { // do nothing } else if (lfs_tag_type3(tag) == LFS_FROM_MOVE) { - uint16_t fromid = lfs_tag_size(tag); - uint16_t toid = lfs_tag_id(tag); - // There is a huge room for simple optimization for the rename case - // where we can see up to 4 levels of lfs_dir_traverse recursions - // when compaction happened (for example): + // Without this condition, lfs_dir_traverse can exhibit an + // extremely expensive O(n^3) of nested loops when renaming. + // This happens because lfs_dir_traverse tries to filter tags by + // the tags in the source directory, triggering a second + // lfs_dir_traverse with its own filter operation. // - // >lfs_dir_compact - // [1] lfs_dir_traverse(cb=lfs_dir_commit_size) - // - do 'duplicates and tag update' - // [2] lfs_dir_traverse(cb=lfs_dir_traverse_filter, data=tag[1]) - // - Reaching a LFS_FROM_MOVE tag (here) - // [3] lfs_dir_traverse(cb=lfs_dir_traverse_filter, - // data=tag[1]) <= on 'from' dir - // - do 'duplicates and tag update' - // [4] lfs_dir_traverse(cb=lfs_dir_traverse_filter, - // data=tag[3]) + // However we don't actually care about filtering the second set of + // tags, since duplicate tags have no effect when filtering. // - // Yet, for LFS_FROM_MOVE when cb == lfs_dir_traverse_filter - // traverse [3] and [4] don't do anything: - // - if [2] is supposed to match 'toid' for duplication, a preceding - // ERASE or CREATE with the same tag id will already have stopped - // the search. - // - if [2] is here to update tag value of CREATE/DELETE attr found - // during the scan, since [3] is looking for LFS_TYPE_STRUCT only - // and call lfs_dir_traverse_filter with LFS_TYPE_STRUCT attr - // wheras lfs_dir_traverse_filter only modify tag on CREATE or - // DELETE. Consequently, cb called from [4] will never stop the - // search from [2]. - // - [4] may call lfs_dir_traverse_filter, but with action on a - // tag[3] pointer completely different from tag[1] + // This check skips this unnecessary recursive filtering explicitly, + // reducing this runtime from O(n^3) to O(n^2). if (cb == lfs_dir_traverse_filter) { continue; } - // note: buffer = oldcwd dir - int err = lfs_dir_traverse(lfs, - buffer, 0, 0xffffffff, NULL, 0, - LFS_MKTAG(0x600, 0x3ff, 0), - LFS_MKTAG(LFS_TYPE_STRUCT, 0, 0), - fromid, fromid+1, toid-fromid+diff, - cb, data); - if (err) { - return err; - } + // recurse into move + stack[sp] = (struct lfs_dir_traverse){ + .dir = dir, + .off = off, + .ptag = ptag, + .attrs = attrs, + .attrcount = attrcount, + .tmask = tmask, + .ttag = ttag, + .begin = begin, + .end = end, + .diff = diff, + .cb = cb, + .data = data, + .tag = LFS_MKTAG(LFS_FROM_NOOP, 0, 0), + }; + sp += 1; + + uint16_t fromid = lfs_tag_size(tag); + uint16_t toid = lfs_tag_id(tag); + dir = buffer; + off = 0; + ptag = 0xffffffff; + attrs = NULL; + attrcount = 0; + tmask = LFS_MKTAG(0x600, 0x3ff, 0); + ttag = LFS_MKTAG(LFS_TYPE_STRUCT, 0, 0); + begin = fromid; + end = fromid+1; + diff = toid-fromid+diff; } else if (lfs_tag_type3(tag) == LFS_FROM_USERATTRS) { for (unsigned i = 0; i < lfs_tag_size(tag); i++) { const struct lfs_attr *a = buffer; - int err = cb(data, LFS_MKTAG(LFS_TYPE_USERATTR + a[i].type, + res = cb(data, LFS_MKTAG(LFS_TYPE_USERATTR + a[i].type, lfs_tag_id(tag) + diff, a[i].size), a[i].buffer); - if (err) { - return err; + if (res < 0) { + return res; + } + + if (res) { + break; } } } else { - int err = cb(data, tag + LFS_MKTAG(0, diff, 0), buffer); - if (err) { - return err; + res = cb(data, tag + LFS_MKTAG(0, diff, 0), buffer); + if (res < 0) { + return res; + } + + if (res) { + break; } } } + + if (sp > 0) { + // pop from the stack and return, fortunately all pops share + // a destination + dir = stack[sp-1].dir; + off = stack[sp-1].off; + ptag = stack[sp-1].ptag; + attrs = stack[sp-1].attrs; + attrcount = stack[sp-1].attrcount; + tmask = stack[sp-1].tmask; + ttag = stack[sp-1].ttag; + begin = stack[sp-1].begin; + end = stack[sp-1].end; + diff = stack[sp-1].diff; + cb = stack[sp-1].cb; + data = stack[sp-1].data; + tag = stack[sp-1].tag; + buffer = stack[sp-1].buffer; + disk = stack[sp-1].disk; + sp -= 1; + goto popped; + } else { + return res; + } } #endif @@ -1553,7 +1662,7 @@ static int lfs_dir_drop(lfs_t *lfs, lfs_mdir_t *dir, lfs_mdir_t *tail) { static int lfs_dir_split(lfs_t *lfs, lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount, lfs_mdir_t *source, uint16_t split, uint16_t end) { - // create tail directory + // create tail metadata pair lfs_mdir_t tail; int err = lfs_dir_alloc(lfs, &tail); if (err) { @@ -1564,9 +1673,10 @@ static int lfs_dir_split(lfs_t *lfs, tail.tail[0] = dir->tail[0]; tail.tail[1] = dir->tail[1]; - err = lfs_dir_compact(lfs, &tail, attrs, attrcount, source, split, end); - if (err) { - return err; + // note we don't care about LFS_OK_RELOCATED + int res = lfs_dir_compact(lfs, &tail, attrs, attrcount, source, split, end); + if (res < 0) { + return res; } dir->tail[0] = tail.pair[0]; @@ -1607,107 +1717,45 @@ static int lfs_dir_commit_commit(void *p, lfs_tag_t tag, const void *buffer) { } #endif +#ifndef LFS_READONLY +static bool lfs_dir_needsrelocation(lfs_t *lfs, lfs_mdir_t *dir) { + // If our revision count == n * block_cycles, we should force a relocation, + // this is how littlefs wear-levels at the metadata-pair level. Note that we + // actually use (block_cycles+1)|1, this is to avoid two corner cases: + // 1. block_cycles = 1, which would prevent relocations from terminating + // 2. block_cycles = 2n, which, due to aliasing, would only ever relocate + // one metadata block in the pair, effectively making this useless + return (lfs->cfg->block_cycles > 0 + && ((dir->rev + 1) % ((lfs->cfg->block_cycles+1)|1) == 0)); +} +#endif + #ifndef LFS_READONLY static int lfs_dir_compact(lfs_t *lfs, lfs_mdir_t *dir, const struct lfs_mattr *attrs, int attrcount, lfs_mdir_t *source, uint16_t begin, uint16_t end) { // save some state in case block is bad - const lfs_block_t oldpair[2] = {dir->pair[0], dir->pair[1]}; bool relocated = false; - bool tired = false; - - // should we split? - while (end - begin > 1) { - // find size - lfs_size_t size = 0; - int err = lfs_dir_traverse(lfs, - source, 0, 0xffffffff, attrs, attrcount, - LFS_MKTAG(0x400, 0x3ff, 0), - LFS_MKTAG(LFS_TYPE_NAME, 0, 0), - begin, end, -begin, - lfs_dir_commit_size, &size); - if (err) { - return err; - } - - // space is complicated, we need room for tail, crc, gstate, - // cleanup delete, and we cap at half a block to give room - // for metadata updates. - if (end - begin < 0xff && - size <= lfs_min(lfs->cfg->block_size - 36, - lfs_alignup((lfs->cfg->metadata_max ? - lfs->cfg->metadata_max : lfs->cfg->block_size)/2, - lfs->cfg->prog_size))) { - break; - } - - // can't fit, need to split, we should really be finding the - // largest size that fits with a small binary search, but right now - // it's not worth the code size - uint16_t split = (end - begin) / 2; - err = lfs_dir_split(lfs, dir, attrs, attrcount, - source, begin+split, end); - if (err) { - // if we fail to split, we may be able to overcompact, unless - // we're too big for even the full block, in which case our - // only option is to error - if (err == LFS_ERR_NOSPC && size <= lfs->cfg->block_size - 36) { - break; - } - return err; - } - - end = begin + split; - } + bool tired = lfs_dir_needsrelocation(lfs, dir); // increment revision count dir->rev += 1; - // If our revision count == n * block_cycles, we should force a relocation, - // this is how littlefs wear-levels at the metadata-pair level. Note that we - // actually use (block_cycles+1)|1, this is to avoid two corner cases: - // 1. block_cycles = 1, which would prevent relocations from terminating - // 2. block_cycles = 2n, which, due to aliasing, would only ever relocate - // one metadata block in the pair, effectively making this useless - if (lfs->cfg->block_cycles > 0 && - (dir->rev % ((lfs->cfg->block_cycles+1)|1) == 0)) { - if (lfs_pair_cmp(dir->pair, (const lfs_block_t[2]){0, 1}) == 0) { - // oh no! we're writing too much to the superblock, - // should we expand? - lfs_ssize_t res = lfs_fs_rawsize(lfs); - if (res < 0) { - return res; - } - - // do we have extra space? littlefs can't reclaim this space - // by itself, so expand cautiously - if ((lfs_size_t)res < lfs->cfg->block_count/2) { - LFS_DEBUG("Expanding superblock at rev %"PRIu32, dir->rev); - int err = lfs_dir_split(lfs, dir, attrs, attrcount, - source, begin, end); - if (err && err != LFS_ERR_NOSPC) { - return err; - } - // welp, we tried, if we ran out of space there's not much - // we can do, we'll error later if we've become frozen - if (!err) { - end = begin; - } - } + // do not proactively relocate blocks during migrations, this + // can cause a number of failure states such: clobbering the + // v1 superblock if we relocate root, and invalidating directory + // pointers if we relocate the head of a directory. On top of + // this, relocations increase the overall complexity of + // lfs_migration, which is already a delicate operation. #ifdef LFS_MIGRATE - } else if (lfs->lfs1) { - // do not proactively relocate blocks during migrations, this - // can cause a number of failure states such: clobbering the - // v1 superblock if we relocate root, and invalidating directory - // pointers if we relocate the head of a directory. On top of - // this, relocations increase the overall complexity of - // lfs_migration, which is already a delicate operation. + if (lfs->lfs1) { + tired = false; + } #endif - } else { - // we're writing too much, time to relocate - tired = true; - goto relocate; - } + + if (tired && lfs_pair_cmp(dir->pair, (const lfs_block_t[2]){0, 1}) != 0) { + // we're writing too much, time to relocate + goto relocate; } // begin loop to commit compaction to blocks until a compact sticks @@ -1851,44 +1899,114 @@ static int lfs_dir_compact(lfs_t *lfs, continue; } - if (relocated) { - // update references if we relocated - LFS_DEBUG("Relocating {0x%"PRIx32", 0x%"PRIx32"} " - "-> {0x%"PRIx32", 0x%"PRIx32"}", - oldpair[0], oldpair[1], dir->pair[0], dir->pair[1]); - int err = lfs_fs_relocate(lfs, oldpair, dir->pair); - if (err) { - return err; - } - } - - return 0; + return relocated ? LFS_OK_RELOCATED : 0; } #endif #ifndef LFS_READONLY -static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, - const struct lfs_mattr *attrs, int attrcount) { - // check for any inline files that aren't RAM backed and - // forcefully evict them, needed for filesystem consistency - for (lfs_file_t *f = (lfs_file_t*)lfs->mlist; f; f = f->next) { - if (dir != &f->m && lfs_pair_cmp(f->m.pair, dir->pair) == 0 && - f->type == LFS_TYPE_REG && (f->flags & LFS_F_INLINE) && - f->ctz.size > lfs->cfg->cache_size) { - int err = lfs_file_outline(lfs, f); +static int lfs_dir_splittingcompact(lfs_t *lfs, lfs_mdir_t *dir, + const struct lfs_mattr *attrs, int attrcount, + lfs_mdir_t *source, uint16_t begin, uint16_t end) { + while (true) { + // find size of first split, we do this by halving the split until + // the metadata is guaranteed to fit + // + // Note that this isn't a true binary search, we never increase the + // split size. This may result in poorly distributed metadata but isn't + // worth the extra code size or performance hit to fix. + lfs_size_t split = begin; + while (end - split > 1) { + lfs_size_t size = 0; + int err = lfs_dir_traverse(lfs, + source, 0, 0xffffffff, attrs, attrcount, + LFS_MKTAG(0x400, 0x3ff, 0), + LFS_MKTAG(LFS_TYPE_NAME, 0, 0), + split, end, -split, + lfs_dir_commit_size, &size); if (err) { return err; } - err = lfs_file_flush(lfs, f); - if (err) { + // space is complicated, we need room for tail, crc, gstate, + // cleanup delete, and we cap at half a block to give room + // for metadata updates. + if (end - split < 0xff + && size <= lfs_min(lfs->cfg->block_size - 36, + lfs_alignup( + (lfs->cfg->metadata_max + ? lfs->cfg->metadata_max + : lfs->cfg->block_size)/2, + lfs->cfg->prog_size))) { + break; + } + + split = split + ((end - split) / 2); + } + + if (split == begin) { + // no split needed + break; + } + + // split into two metadata pairs and continue + int err = lfs_dir_split(lfs, dir, attrs, attrcount, + source, split, end); + if (err && err != LFS_ERR_NOSPC) { + return err; + } + + if (err) { + // we can't allocate a new block, try to compact with degraded + // performance + LFS_WARN("Unable to split {0x%"PRIx32", 0x%"PRIx32"}", + dir->pair[0], dir->pair[1]); + break; + } else { + end = split; + } + } + + if (lfs_dir_needsrelocation(lfs, dir) + && lfs_pair_cmp(dir->pair, (const lfs_block_t[2]){0, 1}) == 0) { + // oh no! we're writing too much to the superblock, + // should we expand? + lfs_ssize_t size = lfs_fs_rawsize(lfs); + if (size < 0) { + return size; + } + + // do we have extra space? littlefs can't reclaim this space + // by itself, so expand cautiously + if ((lfs_size_t)size < lfs->cfg->block_count/2) { + LFS_DEBUG("Expanding superblock at rev %"PRIu32, dir->rev); + int err = lfs_dir_split(lfs, dir, attrs, attrcount, + source, begin, end); + if (err && err != LFS_ERR_NOSPC) { return err; } + + if (err) { + // welp, we tried, if we ran out of space there's not much + // we can do, we'll error later if we've become frozen + LFS_WARN("Unable to expand superblock"); + } else { + end = begin; + } } } + return lfs_dir_compact(lfs, dir, attrs, attrcount, source, begin, end); +} +#endif + +#ifndef LFS_READONLY +static int lfs_dir_relocatingcommit(lfs_t *lfs, lfs_mdir_t *dir, + const lfs_block_t pair[2], + const struct lfs_mattr *attrs, int attrcount, + lfs_mdir_t *pdir) { + int state = 0; + // calculate changes to the directory - lfs_mdir_t olddir = *dir; bool hasdelete = false; for (int i = 0; i < attrcount; i++) { if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_CREATE) { @@ -1907,23 +2025,19 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, // should we actually drop the directory block? if (hasdelete && dir->count == 0) { - lfs_mdir_t pdir; - int err = lfs_fs_pred(lfs, dir->pair, &pdir); + LFS_ASSERT(pdir); + int err = lfs_fs_pred(lfs, dir->pair, pdir); if (err && err != LFS_ERR_NOENT) { - *dir = olddir; return err; } - if (err != LFS_ERR_NOENT && pdir.split) { - err = lfs_dir_drop(lfs, &pdir, dir); - if (err) { - *dir = olddir; - return err; - } + if (err != LFS_ERR_NOENT && pdir->split) { + state = LFS_OK_DROPPED; + goto fixmlist; } } - if (dir->erased || dir->count >= 0xff) { + if (dir->erased) { // try to commit struct lfs_commit commit = { .block = dir->pair[0], @@ -1948,7 +2062,6 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) { goto compact; } - *dir = olddir; return err; } @@ -1961,7 +2074,6 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, if (!lfs_gstate_iszero(&delta)) { err = lfs_dir_getgstate(lfs, dir, &delta); if (err) { - *dir = olddir; return err; } @@ -1973,7 +2085,6 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) { goto compact; } - *dir = olddir; return err; } } @@ -1984,7 +2095,6 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, if (err == LFS_ERR_NOSPC || err == LFS_ERR_CORRUPT) { goto compact; } - *dir = olddir; return err; } @@ -1995,19 +2105,23 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, // and update gstate lfs->gdisk = lfs->gstate; lfs->gdelta = (lfs_gstate_t){0}; - } else { + + goto fixmlist; + } + compact: - // fall back to compaction - lfs_cache_drop(lfs, &lfs->pcache); + // fall back to compaction + lfs_cache_drop(lfs, &lfs->pcache); - int err = lfs_dir_compact(lfs, dir, attrs, attrcount, - dir, 0, dir->count); - if (err) { - *dir = olddir; - return err; - } + state = lfs_dir_splittingcompact(lfs, dir, attrs, attrcount, + dir, 0, dir->count); + if (state < 0) { + return state; } + goto fixmlist; + +fixmlist:; // this complicated bit of logic is for fixing up any active // metadata-pairs that we may have affected // @@ -2015,33 +2129,32 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, // lfs_dir_commit could also be in this list, and even then // we need to copy the pair so they don't get clobbered if we refetch // our mdir. + lfs_block_t oldpair[2] = {pair[0], pair[1]}; for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) { - if (&d->m != dir && lfs_pair_cmp(d->m.pair, olddir.pair) == 0) { + if (lfs_pair_cmp(d->m.pair, oldpair) == 0) { d->m = *dir; - for (int i = 0; i < attrcount; i++) { - if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE && - d->id == lfs_tag_id(attrs[i].tag)) { - d->m.pair[0] = LFS_BLOCK_NULL; - d->m.pair[1] = LFS_BLOCK_NULL; - } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE && - d->id > lfs_tag_id(attrs[i].tag)) { - d->id -= 1; - if (d->type == LFS_TYPE_DIR) { - ((lfs_dir_t*)d)->pos -= 1; - } - } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_CREATE && - d->id >= lfs_tag_id(attrs[i].tag)) { - d->id += 1; - if (d->type == LFS_TYPE_DIR) { - ((lfs_dir_t*)d)->pos += 1; + if (d->m.pair != pair) { + for (int i = 0; i < attrcount; i++) { + if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE && + d->id == lfs_tag_id(attrs[i].tag)) { + d->m.pair[0] = LFS_BLOCK_NULL; + d->m.pair[1] = LFS_BLOCK_NULL; + } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_DELETE && + d->id > lfs_tag_id(attrs[i].tag)) { + d->id -= 1; + if (d->type == LFS_TYPE_DIR) { + ((lfs_dir_t*)d)->pos -= 1; + } + } else if (lfs_tag_type3(attrs[i].tag) == LFS_TYPE_CREATE && + d->id >= lfs_tag_id(attrs[i].tag)) { + d->id += 1; + if (d->type == LFS_TYPE_DIR) { + ((lfs_dir_t*)d)->pos += 1; + } } } } - } - } - for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) { - if (lfs_pair_cmp(d->m.pair, olddir.pair) == 0) { while (d->id >= d->m.count && d->m.split) { // we split and id is on tail now d->id -= d->m.count; @@ -2053,6 +2166,221 @@ static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, } } + return state; +} +#endif + +#ifndef LFS_READONLY +static int lfs_dir_orphaningcommit(lfs_t *lfs, lfs_mdir_t *dir, + const struct lfs_mattr *attrs, int attrcount) { + // check for any inline files that aren't RAM backed and + // forcefully evict them, needed for filesystem consistency + for (lfs_file_t *f = (lfs_file_t*)lfs->mlist; f; f = f->next) { + if (dir != &f->m && lfs_pair_cmp(f->m.pair, dir->pair) == 0 && + f->type == LFS_TYPE_REG && (f->flags & LFS_F_INLINE) && + f->ctz.size > lfs->cfg->cache_size) { + int err = lfs_file_outline(lfs, f); + if (err) { + return err; + } + + err = lfs_file_flush(lfs, f); + if (err) { + return err; + } + } + } + + lfs_block_t lpair[2] = {dir->pair[0], dir->pair[1]}; + lfs_mdir_t ldir = *dir; + lfs_mdir_t pdir; + int state = lfs_dir_relocatingcommit(lfs, &ldir, dir->pair, + attrs, attrcount, &pdir); + if (state < 0) { + return state; + } + + // update if we're not in mlist, note we may have already been + // updated if we are in mlist + if (lfs_pair_cmp(dir->pair, lpair) == 0) { + *dir = ldir; + } + + // commit was successful, but may require other changes in the + // filesystem, these would normally be tail recursive, but we have + // flattened them here avoid unbounded stack usage + + // need to drop? + if (state == LFS_OK_DROPPED) { + // steal state + int err = lfs_dir_getgstate(lfs, dir, &lfs->gdelta); + if (err) { + return err; + } + + // steal tail, note that this can't create a recursive drop + lpair[0] = pdir.pair[0]; + lpair[1] = pdir.pair[1]; + lfs_pair_tole32(dir->tail); + state = lfs_dir_relocatingcommit(lfs, &pdir, lpair, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_TAIL + dir->split, 0x3ff, 8), + dir->tail}), + NULL); + lfs_pair_fromle32(dir->tail); + if (state < 0) { + return state; + } + + ldir = pdir; + } + + // need to relocate? + bool orphans = false; + while (state == LFS_OK_RELOCATED) { + LFS_DEBUG("Relocating {0x%"PRIx32", 0x%"PRIx32"} " + "-> {0x%"PRIx32", 0x%"PRIx32"}", + lpair[0], lpair[1], ldir.pair[0], ldir.pair[1]); + state = 0; + + // update internal root + if (lfs_pair_cmp(lpair, lfs->root) == 0) { + lfs->root[0] = ldir.pair[0]; + lfs->root[1] = ldir.pair[1]; + } + + // update internally tracked dirs + for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) { + if (lfs_pair_cmp(lpair, d->m.pair) == 0) { + d->m.pair[0] = ldir.pair[0]; + d->m.pair[1] = ldir.pair[1]; + } + + if (d->type == LFS_TYPE_DIR && + lfs_pair_cmp(lpair, ((lfs_dir_t*)d)->head) == 0) { + ((lfs_dir_t*)d)->head[0] = ldir.pair[0]; + ((lfs_dir_t*)d)->head[1] = ldir.pair[1]; + } + } + + // find parent + lfs_stag_t tag = lfs_fs_parent(lfs, lpair, &pdir); + if (tag < 0 && tag != LFS_ERR_NOENT) { + return tag; + } + + bool hasparent = (tag != LFS_ERR_NOENT); + if (tag != LFS_ERR_NOENT) { + // note that if we have a parent, we must have a pred, so this will + // always create an orphan + int err = lfs_fs_preporphans(lfs, +1); + if (err) { + return err; + } + + // fix pending move in this pair? this looks like an optimization but + // is in fact _required_ since relocating may outdate the move. + uint16_t moveid = 0x3ff; + if (lfs_gstate_hasmovehere(&lfs->gstate, pdir.pair)) { + moveid = lfs_tag_id(lfs->gstate.tag); + LFS_DEBUG("Fixing move while relocating " + "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n", + pdir.pair[0], pdir.pair[1], moveid); + lfs_fs_prepmove(lfs, 0x3ff, NULL); + if (moveid < lfs_tag_id(tag)) { + tag -= LFS_MKTAG(0, 1, 0); + } + } + + lfs_block_t ppair[2] = {pdir.pair[0], pdir.pair[1]}; + lfs_pair_tole32(ldir.pair); + state = lfs_dir_relocatingcommit(lfs, &pdir, ppair, LFS_MKATTRS( + {LFS_MKTAG_IF(moveid != 0x3ff, + LFS_TYPE_DELETE, moveid, 0), NULL}, + {tag, ldir.pair}), + NULL); + lfs_pair_fromle32(ldir.pair); + if (state < 0) { + return state; + } + + if (state == LFS_OK_RELOCATED) { + lpair[0] = ppair[0]; + lpair[1] = ppair[1]; + ldir = pdir; + orphans = true; + continue; + } + } + + // find pred + int err = lfs_fs_pred(lfs, lpair, &pdir); + if (err && err != LFS_ERR_NOENT) { + return err; + } + LFS_ASSERT(!(hasparent && err == LFS_ERR_NOENT)); + + // if we can't find dir, it must be new + if (err != LFS_ERR_NOENT) { + if (lfs_gstate_hasorphans(&lfs->gstate)) { + // next step, clean up orphans + err = lfs_fs_preporphans(lfs, -hasparent); + if (err) { + return err; + } + } + + // fix pending move in this pair? this looks like an optimization + // but is in fact _required_ since relocating may outdate the move. + uint16_t moveid = 0x3ff; + if (lfs_gstate_hasmovehere(&lfs->gstate, pdir.pair)) { + moveid = lfs_tag_id(lfs->gstate.tag); + LFS_DEBUG("Fixing move while relocating " + "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n", + pdir.pair[0], pdir.pair[1], moveid); + lfs_fs_prepmove(lfs, 0x3ff, NULL); + } + + // replace bad pair, either we clean up desync, or no desync occured + lpair[0] = pdir.pair[0]; + lpair[1] = pdir.pair[1]; + lfs_pair_tole32(ldir.pair); + state = lfs_dir_relocatingcommit(lfs, &pdir, lpair, LFS_MKATTRS( + {LFS_MKTAG_IF(moveid != 0x3ff, + LFS_TYPE_DELETE, moveid, 0), NULL}, + {LFS_MKTAG(LFS_TYPE_TAIL + pdir.split, 0x3ff, 8), + ldir.pair}), + NULL); + lfs_pair_fromle32(ldir.pair); + if (state < 0) { + return state; + } + + ldir = pdir; + } + } + + return orphans ? LFS_OK_ORPHANED : 0; +} +#endif + +#ifndef LFS_READONLY +static int lfs_dir_commit(lfs_t *lfs, lfs_mdir_t *dir, + const struct lfs_mattr *attrs, int attrcount) { + int orphans = lfs_dir_orphaningcommit(lfs, dir, attrs, attrcount); + if (orphans < 0) { + return orphans; + } + + if (orphans) { + // make sure we've removed all orphans, this is a noop if there + // are none, but if we had nested blocks failures we may have + // created some + int err = lfs_fs_deorphan(lfs, false); + if (err) { + return err; + } + } + return 0; } #endif @@ -2107,7 +2435,7 @@ static int lfs_rawmkdir(lfs_t *lfs, const char *path) { return err; } - // current block end of list? + // current block not end of list? if (cwd.m.split) { // update tails, this creates a desync err = lfs_fs_preporphans(lfs, +1); @@ -2804,12 +3132,12 @@ static int lfs_file_flush(lfs_t *lfs, lfs_file_t *file) { // copy over a byte at a time, leave it up to caching // to make this efficient uint8_t data; - lfs_ssize_t res = lfs_file_rawread(lfs, &orig, &data, 1); + lfs_ssize_t res = lfs_file_flushedread(lfs, &orig, &data, 1); if (res < 0) { return res; } - res = lfs_file_rawwrite(lfs, file, &data, 1); + res = lfs_file_flushedwrite(lfs, file, &data, 1); if (res < 0) { return res; } @@ -2910,23 +3238,11 @@ static int lfs_file_rawsync(lfs_t *lfs, lfs_file_t *file) { } #endif -static lfs_ssize_t lfs_file_rawread(lfs_t *lfs, lfs_file_t *file, +static lfs_ssize_t lfs_file_flushedread(lfs_t *lfs, lfs_file_t *file, void *buffer, lfs_size_t size) { - LFS_ASSERT((file->flags & LFS_O_RDONLY) == LFS_O_RDONLY); - uint8_t *data = buffer; lfs_size_t nsize = size; -#ifndef LFS_READONLY - if (file->flags & LFS_F_WRITING) { - // flush out any writes - int err = lfs_file_flush(lfs, file); - if (err) { - return err; - } - } -#endif - if (file->pos >= file->ctz.size) { // eof if past end return 0; @@ -2983,43 +3299,29 @@ static lfs_ssize_t lfs_file_rawread(lfs_t *lfs, lfs_file_t *file, return size; } -#ifndef LFS_READONLY -static lfs_ssize_t lfs_file_rawwrite(lfs_t *lfs, lfs_file_t *file, - const void *buffer, lfs_size_t size) { - LFS_ASSERT((file->flags & LFS_O_WRONLY) == LFS_O_WRONLY); - - const uint8_t *data = buffer; - lfs_size_t nsize = size; +static lfs_ssize_t lfs_file_rawread(lfs_t *lfs, lfs_file_t *file, + void *buffer, lfs_size_t size) { + LFS_ASSERT((file->flags & LFS_O_RDONLY) == LFS_O_RDONLY); - if (file->flags & LFS_F_READING) { - // drop any reads +#ifndef LFS_READONLY + if (file->flags & LFS_F_WRITING) { + // flush out any writes int err = lfs_file_flush(lfs, file); if (err) { return err; } } +#endif - if ((file->flags & LFS_O_APPEND) && file->pos < file->ctz.size) { - file->pos = file->ctz.size; - } - - if (file->pos + size > lfs->file_max) { - // Larger than file limit? - return LFS_ERR_FBIG; - } + return lfs_file_flushedread(lfs, file, buffer, size); +} - if (!(file->flags & LFS_F_WRITING) && file->pos > file->ctz.size) { - // fill with zeros - lfs_off_t pos = file->pos; - file->pos = file->ctz.size; - while (file->pos < pos) { - lfs_ssize_t res = lfs_file_rawwrite(lfs, file, &(uint8_t){0}, 1); - if (res < 0) { - return res; - } - } - } +#ifndef LFS_READONLY +static lfs_ssize_t lfs_file_flushedwrite(lfs_t *lfs, lfs_file_t *file, + const void *buffer, lfs_size_t size) { + const uint8_t *data = buffer; + lfs_size_t nsize = size; if ((file->flags & LFS_F_INLINE) && lfs_max(file->pos+nsize, file->ctz.size) > @@ -3101,9 +3403,51 @@ static lfs_ssize_t lfs_file_rawwrite(lfs_t *lfs, lfs_file_t *file, lfs_alloc_ack(lfs); } - file->flags &= ~LFS_F_ERRED; return size; } + +static lfs_ssize_t lfs_file_rawwrite(lfs_t *lfs, lfs_file_t *file, + const void *buffer, lfs_size_t size) { + LFS_ASSERT((file->flags & LFS_O_WRONLY) == LFS_O_WRONLY); + + if (file->flags & LFS_F_READING) { + // drop any reads + int err = lfs_file_flush(lfs, file); + if (err) { + return err; + } + } + + if ((file->flags & LFS_O_APPEND) && file->pos < file->ctz.size) { + file->pos = file->ctz.size; + } + + if (file->pos + size > lfs->file_max) { + // Larger than file limit? + return LFS_ERR_FBIG; + } + + if (!(file->flags & LFS_F_WRITING) && file->pos > file->ctz.size) { + // fill with zeros + lfs_off_t pos = file->pos; + file->pos = file->ctz.size; + + while (file->pos < pos) { + lfs_ssize_t res = lfs_file_flushedwrite(lfs, file, &(uint8_t){0}, 1); + if (res < 0) { + return res; + } + } + } + + lfs_ssize_t nsize = lfs_file_flushedwrite(lfs, file, buffer, size); + if (nsize < 0) { + return nsize; + } + + file->flags &= ~LFS_F_ERRED; + return nsize; +} #endif static lfs_soff_t lfs_file_rawseek(lfs_t *lfs, lfs_file_t *file, @@ -3456,7 +3800,8 @@ static int lfs_rawrename(lfs_t *lfs, const char *oldpath, const char *newpath) { } lfs->mlist = prevdir.next; - if (prevtag != LFS_ERR_NOENT && lfs_tag_type3(prevtag) == LFS_TYPE_DIR) { + if (prevtag != LFS_ERR_NOENT + && lfs_tag_type3(prevtag) == LFS_TYPE_DIR) { // fix orphan err = lfs_fs_preporphans(lfs, -1); if (err) { @@ -4076,109 +4421,6 @@ static lfs_stag_t lfs_fs_parent(lfs_t *lfs, const lfs_block_t pair[2], } #endif -#ifndef LFS_READONLY -static int lfs_fs_relocate(lfs_t *lfs, - const lfs_block_t oldpair[2], lfs_block_t newpair[2]) { - // update internal root - if (lfs_pair_cmp(oldpair, lfs->root) == 0) { - lfs->root[0] = newpair[0]; - lfs->root[1] = newpair[1]; - } - - // update internally tracked dirs - for (struct lfs_mlist *d = lfs->mlist; d; d = d->next) { - if (lfs_pair_cmp(oldpair, d->m.pair) == 0) { - d->m.pair[0] = newpair[0]; - d->m.pair[1] = newpair[1]; - } - - if (d->type == LFS_TYPE_DIR && - lfs_pair_cmp(oldpair, ((lfs_dir_t*)d)->head) == 0) { - ((lfs_dir_t*)d)->head[0] = newpair[0]; - ((lfs_dir_t*)d)->head[1] = newpair[1]; - } - } - - // find parent - lfs_mdir_t parent; - lfs_stag_t tag = lfs_fs_parent(lfs, oldpair, &parent); - if (tag < 0 && tag != LFS_ERR_NOENT) { - return tag; - } - - if (tag != LFS_ERR_NOENT) { - // update disk, this creates a desync - int err = lfs_fs_preporphans(lfs, +1); - if (err) { - return err; - } - - // fix pending move in this pair? this looks like an optimization but - // is in fact _required_ since relocating may outdate the move. - uint16_t moveid = 0x3ff; - if (lfs_gstate_hasmovehere(&lfs->gstate, parent.pair)) { - moveid = lfs_tag_id(lfs->gstate.tag); - LFS_DEBUG("Fixing move while relocating " - "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n", - parent.pair[0], parent.pair[1], moveid); - lfs_fs_prepmove(lfs, 0x3ff, NULL); - if (moveid < lfs_tag_id(tag)) { - tag -= LFS_MKTAG(0, 1, 0); - } - } - - lfs_pair_tole32(newpair); - err = lfs_dir_commit(lfs, &parent, LFS_MKATTRS( - {LFS_MKTAG_IF(moveid != 0x3ff, - LFS_TYPE_DELETE, moveid, 0), NULL}, - {tag, newpair})); - lfs_pair_fromle32(newpair); - if (err) { - return err; - } - - // next step, clean up orphans - err = lfs_fs_preporphans(lfs, -1); - if (err) { - return err; - } - } - - // find pred - int err = lfs_fs_pred(lfs, oldpair, &parent); - if (err && err != LFS_ERR_NOENT) { - return err; - } - - // if we can't find dir, it must be new - if (err != LFS_ERR_NOENT) { - // fix pending move in this pair? this looks like an optimization but - // is in fact _required_ since relocating may outdate the move. - uint16_t moveid = 0x3ff; - if (lfs_gstate_hasmovehere(&lfs->gstate, parent.pair)) { - moveid = lfs_tag_id(lfs->gstate.tag); - LFS_DEBUG("Fixing move while relocating " - "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n", - parent.pair[0], parent.pair[1], moveid); - lfs_fs_prepmove(lfs, 0x3ff, NULL); - } - - // replace bad pair, either we clean up desync, or no desync occurred - lfs_pair_tole32(newpair); - err = lfs_dir_commit(lfs, &parent, LFS_MKATTRS( - {LFS_MKTAG_IF(moveid != 0x3ff, - LFS_TYPE_DELETE, moveid, 0), NULL}, - {LFS_MKTAG(LFS_TYPE_TAIL + parent.split, 0x3ff, 8), newpair})); - lfs_pair_fromle32(newpair); - if (err) { - return err; - } - } - - return 0; -} -#endif - #ifndef LFS_READONLY static int lfs_fs_preporphans(lfs_t *lfs, int8_t orphans) { LFS_ASSERT(lfs_tag_size(lfs->gstate.tag) > 0 || orphans >= 0); @@ -4233,77 +4475,129 @@ static int lfs_fs_demove(lfs_t *lfs) { #endif #ifndef LFS_READONLY -static int lfs_fs_deorphan(lfs_t *lfs) { +static int lfs_fs_deorphan(lfs_t *lfs, bool powerloss) { if (!lfs_gstate_hasorphans(&lfs->gstate)) { return 0; } - // Fix any orphans - lfs_mdir_t pdir = {.split = true, .tail = {0, 1}}; - lfs_mdir_t dir; - - // iterate over all directory directory entries - while (!lfs_pair_isnull(pdir.tail)) { - int err = lfs_dir_fetch(lfs, &dir, pdir.tail); - if (err) { - return err; - } + int8_t found = 0; +restart: + { + // Fix any orphans + lfs_mdir_t pdir = {.split = true, .tail = {0, 1}}; + lfs_mdir_t dir; - // check head blocks for orphans - if (!pdir.split) { - // check if we have a parent - lfs_mdir_t parent; - lfs_stag_t tag = lfs_fs_parent(lfs, pdir.tail, &parent); - if (tag < 0 && tag != LFS_ERR_NOENT) { - return tag; + // iterate over all directory directory entries + while (!lfs_pair_isnull(pdir.tail)) { + int err = lfs_dir_fetch(lfs, &dir, pdir.tail); + if (err) { + return err; } - if (tag == LFS_ERR_NOENT) { - // we are an orphan - LFS_DEBUG("Fixing orphan {0x%"PRIx32", 0x%"PRIx32"}", - pdir.tail[0], pdir.tail[1]); - - err = lfs_dir_drop(lfs, &pdir, &dir); - if (err) { - return err; + // check head blocks for orphans + if (!pdir.split) { + // check if we have a parent + lfs_mdir_t parent; + lfs_stag_t tag = lfs_fs_parent(lfs, pdir.tail, &parent); + if (tag < 0 && tag != LFS_ERR_NOENT) { + return tag; } - // refetch tail - continue; - } + // note we only check for full orphans if we may have had a + // power-loss, otherwise orphans are created intentionally + // during operations such as lfs_mkdir + if (tag == LFS_ERR_NOENT && powerloss) { + // we are an orphan + LFS_DEBUG("Fixing orphan {0x%"PRIx32", 0x%"PRIx32"}", + pdir.tail[0], pdir.tail[1]); - lfs_block_t pair[2]; - lfs_stag_t res = lfs_dir_get(lfs, &parent, - LFS_MKTAG(0x7ff, 0x3ff, 0), tag, pair); - if (res < 0) { - return res; - } - lfs_pair_fromle32(pair); - - if (!lfs_pair_sync(pair, pdir.tail)) { - // we have desynced - LFS_DEBUG("Fixing half-orphan {0x%"PRIx32", 0x%"PRIx32"} " - "-> {0x%"PRIx32", 0x%"PRIx32"}", - pdir.tail[0], pdir.tail[1], pair[0], pair[1]); - - lfs_pair_tole32(pair); - err = lfs_dir_commit(lfs, &pdir, LFS_MKATTRS( - {LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 8), pair})); - lfs_pair_fromle32(pair); - if (err) { - return err; + // steal state + err = lfs_dir_getgstate(lfs, &dir, &lfs->gdelta); + if (err) { + return err; + } + + // steal tail + lfs_pair_tole32(dir.tail); + int state = lfs_dir_orphaningcommit(lfs, &pdir, LFS_MKATTRS( + {LFS_MKTAG(LFS_TYPE_TAIL + dir.split, 0x3ff, 8), + dir.tail})); + lfs_pair_fromle32(dir.tail); + if (state < 0) { + return state; + } + + found += 1; + + // did our commit create more orphans? + if (state == LFS_OK_ORPHANED) { + goto restart; + } + + // refetch tail + continue; } - // refetch tail - continue; + if (tag != LFS_ERR_NOENT) { + lfs_block_t pair[2]; + lfs_stag_t state = lfs_dir_get(lfs, &parent, + LFS_MKTAG(0x7ff, 0x3ff, 0), tag, pair); + if (state < 0) { + return state; + } + lfs_pair_fromle32(pair); + + if (!lfs_pair_sync(pair, pdir.tail)) { + // we have desynced + LFS_DEBUG("Fixing half-orphan " + "{0x%"PRIx32", 0x%"PRIx32"} " + "-> {0x%"PRIx32", 0x%"PRIx32"}", + pdir.tail[0], pdir.tail[1], pair[0], pair[1]); + + // fix pending move in this pair? this looks like an + // optimization but is in fact _required_ since + // relocating may outdate the move. + uint16_t moveid = 0x3ff; + if (lfs_gstate_hasmovehere(&lfs->gstate, pdir.pair)) { + moveid = lfs_tag_id(lfs->gstate.tag); + LFS_DEBUG("Fixing move while fixing orphans " + "{0x%"PRIx32", 0x%"PRIx32"} 0x%"PRIx16"\n", + pdir.pair[0], pdir.pair[1], moveid); + lfs_fs_prepmove(lfs, 0x3ff, NULL); + } + + lfs_pair_tole32(pair); + state = lfs_dir_orphaningcommit(lfs, &pdir, LFS_MKATTRS( + {LFS_MKTAG_IF(moveid != 0x3ff, + LFS_TYPE_DELETE, moveid, 0), NULL}, + {LFS_MKTAG(LFS_TYPE_SOFTTAIL, 0x3ff, 8), + pair})); + lfs_pair_fromle32(pair); + if (state < 0) { + return state; + } + + found += 1; + + // did our commit create more orphans? + if (state == LFS_OK_ORPHANED) { + goto restart; + } + + // refetch tail + continue; + } + } } - } - pdir = dir; + pdir = dir; + } } // mark orphans as fixed - return lfs_fs_preporphans(lfs, -lfs_gstate_getorphans(&lfs->gstate)); + return lfs_fs_preporphans(lfs, -lfs_min( + lfs_gstate_getorphans(&lfs->gstate), + found)); } #endif @@ -4314,7 +4608,7 @@ static int lfs_fs_forceconsistency(lfs_t *lfs) { return err; } - err = lfs_fs_deorphan(lfs); + err = lfs_fs_deorphan(lfs, true); if (err) { return err; } diff --git a/scripts/code.py b/scripts/code.py index 08b33a10..b394e9cd 100755 --- a/scripts/code.py +++ b/scripts/code.py @@ -15,7 +15,7 @@ import collections as co -OBJ_PATHS = ['*.o', 'bd/*.o'] +OBJ_PATHS = ['*.o'] def collect(paths, **args): results = co.defaultdict(lambda: 0) @@ -31,7 +31,8 @@ def collect(paths, **args): proc = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE if not args.get('verbose') else None, - universal_newlines=True) + universal_newlines=True, + errors='replace') for line in proc.stdout: m = pattern.match(line) if m: @@ -48,16 +49,30 @@ def collect(paths, **args): # map to source files if args.get('build_dir'): file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) + # replace .o with .c, different scripts report .o/.c, we need to + # choose one if we want to deduplicate csv files + file = re.sub('\.o$', '.c', file) # discard internal functions - if func.startswith('__'): - continue + if not args.get('everything'): + if func.startswith('__'): + continue # discard .8449 suffixes created by optimizer func = re.sub('\.[0-9]+', '', func) + flat_results.append((file, func, size)) return flat_results def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + # find sizes if not args.get('use', None): # find .o files @@ -75,13 +90,14 @@ def main(**args): results = collect(paths, **args) else: - with open(args['use']) as f: + with openio(args['use']) as f: r = csv.DictReader(f) results = [ ( result['file'], - result['function'], - int(result['size'])) - for result in r] + result['name'], + int(result['code_size'])) + for result in r + if result.get('code_size') not in {None, ''}] total = 0 for _, _, size in results: @@ -89,13 +105,17 @@ def main(**args): # find previous results? if args.get('diff'): - with open(args['diff']) as f: - r = csv.DictReader(f) - prev_results = [ - ( result['file'], - result['function'], - int(result['size'])) - for result in r] + try: + with openio(args['diff']) as f: + r = csv.DictReader(f) + prev_results = [ + ( result['file'], + result['name'], + int(result['code_size'])) + for result in r + if result.get('code_size') not in {None, ''}] + except FileNotFoundError: + prev_results = [] prev_total = 0 for _, _, size in prev_results: @@ -103,14 +123,34 @@ def main(**args): # write results to CSV if args.get('output'): - with open(args['output'], 'w') as f: - w = csv.writer(f) - w.writerow(['file', 'function', 'size']) - for file, func, size in sorted(results): - w.writerow((file, func, size)) + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + func = result.pop('name', '') + result.pop('code_size', None) + merged_results[(file, func)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, func, size in results: + merged_results[(file, func)]['code_size'] = size + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'code_size']) + w.writeheader() + for (file, func), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': func, **result}) # print results - def dedup_entries(results, by='function'): + def dedup_entries(results, by='name'): entries = co.defaultdict(lambda: 0) for file, func, size in results: entry = (file if by == 'file' else func) @@ -126,45 +166,67 @@ def diff_entries(olds, news): diff[name] = (old, new, new-old, (new-old)/old if old else 1.0) return diff + def sorted_entries(entries): + if args.get('size_sort'): + return sorted(entries, key=lambda x: (-x[1], x)) + elif args.get('reverse_size_sort'): + return sorted(entries, key=lambda x: (+x[1], x)) + else: + return sorted(entries) + + def sorted_diff_entries(entries): + if args.get('size_sort'): + return sorted(entries, key=lambda x: (-x[1][1], x)) + elif args.get('reverse_size_sort'): + return sorted(entries, key=lambda x: (+x[1][1], x)) + else: + return sorted(entries, key=lambda x: (-x[1][3], x)) + def print_header(by=''): if not args.get('diff'): print('%-36s %7s' % (by, 'size')) else: print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff')) - def print_entries(by='function'): + def print_entry(name, size): + print("%-36s %7d" % (name, size)) + + def print_diff_entry(name, old, new, diff, ratio): + print("%-36s %7s %7s %+7d%s" % (name, + old or "-", + new or "-", + diff, + ' (%+.1f%%)' % (100*ratio) if ratio else '')) + + def print_entries(by='name'): entries = dedup_entries(results, by=by) if not args.get('diff'): print_header(by=by) - for name, size in sorted(entries.items()): - print("%-36s %7d" % (name, size)) + for name, size in sorted_entries(entries.items()): + print_entry(name, size) else: prev_entries = dedup_entries(prev_results, by=by) diff = diff_entries(prev_entries, entries) print_header(by='%s (%d added, %d removed)' % (by, sum(1 for old, _, _, _ in diff.values() if not old), sum(1 for _, new, _, _ in diff.values() if not new))) - for name, (old, new, diff, ratio) in sorted(diff.items(), - key=lambda x: (-x[1][3], x)): + for name, (old, new, diff, ratio) in sorted_diff_entries( + diff.items()): if ratio or args.get('all'): - print("%-36s %7s %7s %+7d%s" % (name, - old or "-", - new or "-", - diff, - ' (%+.1f%%)' % (100*ratio) if ratio else '')) + print_diff_entry(name, old, new, diff, ratio) def print_totals(): if not args.get('diff'): - print("%-36s %7d" % ('TOTAL', total)) + print_entry('TOTAL', total) else: - ratio = (total-prev_total)/prev_total if prev_total else 1.0 - print("%-36s %7s %7s %+7d%s" % ( - 'TOTAL', - prev_total if prev_total else '-', - total if total else '-', + ratio = (0.0 if not prev_total and not total + else 1.0 if not prev_total + else (total-prev_total)/prev_total) + print_diff_entry('TOTAL', + prev_total, total, total-prev_total, - ' (%+.1f%%)' % (100*ratio) if ratio else '')) + ratio) if args.get('quiet'): pass @@ -175,7 +237,7 @@ def print_totals(): print_entries(by='file') print_totals() else: - print_entries(by='function') + print_entries(by='name') print_totals() if __name__ == "__main__": @@ -188,22 +250,30 @@ def print_totals(): or a list of paths. Defaults to %r." % OBJ_PATHS) parser.add_argument('-v', '--verbose', action='store_true', help="Output commands that run behind the scenes.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") parser.add_argument('-o', '--output', help="Specify CSV file to store results.") parser.add_argument('-u', '--use', help="Don't compile and find code sizes, instead use this CSV file.") parser.add_argument('-d', '--diff', help="Specify CSV file to diff code size against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") parser.add_argument('-a', '--all', action='store_true', help="Show all functions, not just the ones that changed.") - parser.add_argument('--files', action='store_true', + parser.add_argument('-A', '--everything', action='store_true', + help="Include builtin and libc specific symbols.") + parser.add_argument('-s', '--size-sort', action='store_true', + help="Sort by size.") + parser.add_argument('-S', '--reverse-size-sort', action='store_true', + help="Sort by size, but backwards.") + parser.add_argument('-F', '--files', action='store_true', help="Show file-level code sizes. Note this does not include padding! " "So sizes may differ from other tools.") - parser.add_argument('-s', '--summary', action='store_true', + parser.add_argument('-Y', '--summary', action='store_true', help="Only show the total code size.") - parser.add_argument('-q', '--quiet', action='store_true', - help="Don't show anything, useful with -o.") - parser.add_argument('--type', default='tTrRdDbB', + parser.add_argument('--type', default='tTrRdD', help="Type of symbols to report, this uses the same single-character " "type-names emitted by nm. Defaults to %(default)r.") parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(), diff --git a/scripts/coverage.py b/scripts/coverage.py index 6f1f54fa..b3a90ed2 100755 --- a/scripts/coverage.py +++ b/scripts/coverage.py @@ -55,8 +55,9 @@ def func_from_lineno(file, lineno): for (file, func), (hits, count) in reduced_funcs.items(): # discard internal/testing functions (test_* injected with # internal testing) - if func.startswith('__') or func.startswith('test_'): - continue + if not args.get('everything'): + if func.startswith('__') or func.startswith('test_'): + continue # discard .8449 suffixes created by optimizer func = re.sub('\.[0-9]+', '', func) results.append((file, func, hits, count)) @@ -65,6 +66,15 @@ def func_from_lineno(file, lineno): def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + # find coverage if not args.get('use'): # find *.info files @@ -82,14 +92,16 @@ def main(**args): results = collect(paths, **args) else: - with open(args['use']) as f: + with openio(args['use']) as f: r = csv.DictReader(f) results = [ ( result['file'], - result['function'], - int(result['hits']), - int(result['count'])) - for result in r] + result['name'], + int(result['coverage_hits']), + int(result['coverage_count'])) + for result in r + if result.get('coverage_hits') not in {None, ''} + if result.get('coverage_count') not in {None, ''}] total_hits, total_count = 0, 0 for _, _, hits, count in results: @@ -98,14 +110,19 @@ def main(**args): # find previous results? if args.get('diff'): - with open(args['diff']) as f: - r = csv.DictReader(f) - prev_results = [ - ( result['file'], - result['function'], - int(result['hits']), - int(result['count'])) - for result in r] + try: + with openio(args['diff']) as f: + r = csv.DictReader(f) + prev_results = [ + ( result['file'], + result['name'], + int(result['coverage_hits']), + int(result['coverage_count'])) + for result in r + if result.get('coverage_hits') not in {None, ''} + if result.get('coverage_count') not in {None, ''}] + except FileNotFoundError: + prev_results = [] prev_total_hits, prev_total_count = 0, 0 for _, _, hits, count in prev_results: @@ -114,14 +131,36 @@ def main(**args): # write results to CSV if args.get('output'): - with open(args['output'], 'w') as f: - w = csv.writer(f) - w.writerow(['file', 'function', 'hits', 'count']) - for file, func, hits, count in sorted(results): - w.writerow((file, func, hits, count)) + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + func = result.pop('name', '') + result.pop('coverage_hits', None) + result.pop('coverage_count', None) + merged_results[(file, func)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, func, hits, count in results: + merged_results[(file, func)]['coverage_hits'] = hits + merged_results[(file, func)]['coverage_count'] = count + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'coverage_hits', 'coverage_count']) + w.writeheader() + for (file, func), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': func, **result}) # print results - def dedup_entries(results, by='function'): + def dedup_entries(results, by='name'): entries = co.defaultdict(lambda: (0, 0)) for file, func, hits, count in results: entry = (file if by == 'file' else func) @@ -147,23 +186,59 @@ def diff_entries(olds, news): - (old_hits/old_count if old_count else 1.0))) return diff + def sorted_entries(entries): + if args.get('coverage_sort'): + return sorted(entries, key=lambda x: (-(x[1][0]/x[1][1] if x[1][1] else -1), x)) + elif args.get('reverse_coverage_sort'): + return sorted(entries, key=lambda x: (+(x[1][0]/x[1][1] if x[1][1] else -1), x)) + else: + return sorted(entries) + + def sorted_diff_entries(entries): + if args.get('coverage_sort'): + return sorted(entries, key=lambda x: (-(x[1][2]/x[1][3] if x[1][3] else -1), x)) + elif args.get('reverse_coverage_sort'): + return sorted(entries, key=lambda x: (+(x[1][2]/x[1][3] if x[1][3] else -1), x)) + else: + return sorted(entries, key=lambda x: (-x[1][6], x)) + def print_header(by=''): if not args.get('diff'): print('%-36s %19s' % (by, 'hits/line')) else: print('%-36s %19s %19s %11s' % (by, 'old', 'new', 'diff')) - def print_entries(by='function'): + def print_entry(name, hits, count): + print("%-36s %11s %7s" % (name, + '%d/%d' % (hits, count) + if count else '-', + '%.1f%%' % (100*hits/count) + if count else '-')) + + def print_diff_entry(name, + old_hits, old_count, + new_hits, new_count, + diff_hits, diff_count, + ratio): + print("%-36s %11s %7s %11s %7s %11s%s" % (name, + '%d/%d' % (old_hits, old_count) + if old_count else '-', + '%.1f%%' % (100*old_hits/old_count) + if old_count else '-', + '%d/%d' % (new_hits, new_count) + if new_count else '-', + '%.1f%%' % (100*new_hits/new_count) + if new_count else '-', + '%+d/%+d' % (diff_hits, diff_count), + ' (%+.1f%%)' % (100*ratio) if ratio else '')) + + def print_entries(by='name'): entries = dedup_entries(results, by=by) if not args.get('diff'): print_header(by=by) - for name, (hits, count) in sorted(entries.items()): - print("%-36s %11s %7s" % (name, - '%d/%d' % (hits, count) - if count else '-', - '%.1f%%' % (100*hits/count) - if count else '-')) + for name, (hits, count) in sorted_entries(entries.items()): + print_entry(name, hits, count) else: prev_entries = dedup_entries(prev_results, by=by) diff = diff_entries(prev_entries, entries) @@ -173,45 +248,28 @@ def print_entries(by='function'): for name, ( old_hits, old_count, new_hits, new_count, - diff_hits, diff_count, ratio) in sorted(diff.items(), - key=lambda x: (-x[1][6], x)): + diff_hits, diff_count, ratio) in sorted_diff_entries( + diff.items()): if ratio or args.get('all'): - print("%-36s %11s %7s %11s %7s %11s%s" % (name, - '%d/%d' % (old_hits, old_count) - if old_count else '-', - '%.1f%%' % (100*old_hits/old_count) - if old_count else '-', - '%d/%d' % (new_hits, new_count) - if new_count else '-', - '%.1f%%' % (100*new_hits/new_count) - if new_count else '-', - '%+d/%+d' % (diff_hits, diff_count), - ' (%+.1f%%)' % (100*ratio) if ratio else '')) + print_diff_entry(name, + old_hits, old_count, + new_hits, new_count, + diff_hits, diff_count, + ratio) def print_totals(): if not args.get('diff'): - print("%-36s %11s %7s" % ('TOTAL', - '%d/%d' % (total_hits, total_count) - if total_count else '-', - '%.1f%%' % (100*total_hits/total_count) - if total_count else '-')) + print_entry('TOTAL', total_hits, total_count) else: ratio = ((total_hits/total_count if total_count else 1.0) - (prev_total_hits/prev_total_count if prev_total_count else 1.0)) - print("%-36s %11s %7s %11s %7s %11s%s" % ('TOTAL', - '%d/%d' % (prev_total_hits, prev_total_count) - if prev_total_count else '-', - '%.1f%%' % (100*prev_total_hits/prev_total_count) - if prev_total_count else '-', - '%d/%d' % (total_hits, total_count) - if total_count else '-', - '%.1f%%' % (100*total_hits/total_count) - if total_count else '-', - '%+d/%+d' % (total_hits-prev_total_hits, - total_count-prev_total_count), - ' (%+.1f%%)' % (100*ratio) if ratio else '')) + print_diff_entry('TOTAL', + prev_total_hits, prev_total_count, + total_hits, total_count, + total_hits-prev_total_hits, total_count-prev_total_count, + ratio) if args.get('quiet'): pass @@ -222,7 +280,7 @@ def print_totals(): print_entries(by='file') print_totals() else: - print_entries(by='function') + print_entries(by='name') print_totals() if __name__ == "__main__": @@ -243,12 +301,23 @@ def print_totals(): help="Don't do any work, instead use this CSV file.") parser.add_argument('-d', '--diff', help="Specify CSV file to diff code size against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") parser.add_argument('-a', '--all', action='store_true', help="Show all functions, not just the ones that changed.") - parser.add_argument('--files', action='store_true', + parser.add_argument('-A', '--everything', action='store_true', + help="Include builtin and libc specific symbols.") + parser.add_argument('-s', '--coverage-sort', action='store_true', + help="Sort by coverage.") + parser.add_argument('-S', '--reverse-coverage-sort', action='store_true', + help="Sort by coverage, but backwards.") + parser.add_argument('-F', '--files', action='store_true', help="Show file-level coverage.") - parser.add_argument('-s', '--summary', action='store_true', + parser.add_argument('-Y', '--summary', action='store_true', help="Only show the total coverage.") parser.add_argument('-q', '--quiet', action='store_true', help="Don't show anything, useful with -o.") + parser.add_argument('--build-dir', + help="Specify the relative build directory. Used to map object files \ + to the correct source files.") sys.exit(main(**vars(parser.parse_args()))) diff --git a/scripts/data.py b/scripts/data.py new file mode 100755 index 00000000..4b8e00da --- /dev/null +++ b/scripts/data.py @@ -0,0 +1,283 @@ +#!/usr/bin/env python3 +# +# Script to find data size at the function level. Basically just a bit wrapper +# around nm with some extra conveniences for comparing builds. Heavily inspired +# by Linux's Bloat-O-Meter. +# + +import os +import glob +import itertools as it +import subprocess as sp +import shlex +import re +import csv +import collections as co + + +OBJ_PATHS = ['*.o'] + +def collect(paths, **args): + results = co.defaultdict(lambda: 0) + pattern = re.compile( + '^(?P[0-9a-fA-F]+)' + + ' (?P[%s])' % re.escape(args['type']) + + ' (?P.+?)$') + for path in paths: + # note nm-tool may contain extra args + cmd = args['nm_tool'] + ['--size-sort', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace') + for line in proc.stdout: + m = pattern.match(line) + if m: + results[(path, m.group('func'))] += int(m.group('size'), 16) + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + sys.exit(-1) + + flat_results = [] + for (file, func), size in results.items(): + # map to source files + if args.get('build_dir'): + file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) + # replace .o with .c, different scripts report .o/.c, we need to + # choose one if we want to deduplicate csv files + file = re.sub('\.o$', '.c', file) + # discard internal functions + if not args.get('everything'): + if func.startswith('__'): + continue + # discard .8449 suffixes created by optimizer + func = re.sub('\.[0-9]+', '', func) + flat_results.append((file, func, size)) + + return flat_results + +def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + + # find sizes + if not args.get('use', None): + # find .o files + paths = [] + for path in args['obj_paths']: + if os.path.isdir(path): + path = path + '/*.o' + + for path in glob.glob(path): + paths.append(path) + + if not paths: + print('no .obj files found in %r?' % args['obj_paths']) + sys.exit(-1) + + results = collect(paths, **args) + else: + with openio(args['use']) as f: + r = csv.DictReader(f) + results = [ + ( result['file'], + result['name'], + int(result['data_size'])) + for result in r + if result.get('data_size') not in {None, ''}] + + total = 0 + for _, _, size in results: + total += size + + # find previous results? + if args.get('diff'): + try: + with openio(args['diff']) as f: + r = csv.DictReader(f) + prev_results = [ + ( result['file'], + result['name'], + int(result['data_size'])) + for result in r + if result.get('data_size') not in {None, ''}] + except FileNotFoundError: + prev_results = [] + + prev_total = 0 + for _, _, size in prev_results: + prev_total += size + + # write results to CSV + if args.get('output'): + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + func = result.pop('name', '') + result.pop('data_size', None) + merged_results[(file, func)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, func, size in results: + merged_results[(file, func)]['data_size'] = size + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'data_size']) + w.writeheader() + for (file, func), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': func, **result}) + + # print results + def dedup_entries(results, by='name'): + entries = co.defaultdict(lambda: 0) + for file, func, size in results: + entry = (file if by == 'file' else func) + entries[entry] += size + return entries + + def diff_entries(olds, news): + diff = co.defaultdict(lambda: (0, 0, 0, 0)) + for name, new in news.items(): + diff[name] = (0, new, new, 1.0) + for name, old in olds.items(): + _, new, _, _ = diff[name] + diff[name] = (old, new, new-old, (new-old)/old if old else 1.0) + return diff + + def sorted_entries(entries): + if args.get('size_sort'): + return sorted(entries, key=lambda x: (-x[1], x)) + elif args.get('reverse_size_sort'): + return sorted(entries, key=lambda x: (+x[1], x)) + else: + return sorted(entries) + + def sorted_diff_entries(entries): + if args.get('size_sort'): + return sorted(entries, key=lambda x: (-x[1][1], x)) + elif args.get('reverse_size_sort'): + return sorted(entries, key=lambda x: (+x[1][1], x)) + else: + return sorted(entries, key=lambda x: (-x[1][3], x)) + + def print_header(by=''): + if not args.get('diff'): + print('%-36s %7s' % (by, 'size')) + else: + print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff')) + + def print_entry(name, size): + print("%-36s %7d" % (name, size)) + + def print_diff_entry(name, old, new, diff, ratio): + print("%-36s %7s %7s %+7d%s" % (name, + old or "-", + new or "-", + diff, + ' (%+.1f%%)' % (100*ratio) if ratio else '')) + + def print_entries(by='name'): + entries = dedup_entries(results, by=by) + + if not args.get('diff'): + print_header(by=by) + for name, size in sorted_entries(entries.items()): + print_entry(name, size) + else: + prev_entries = dedup_entries(prev_results, by=by) + diff = diff_entries(prev_entries, entries) + print_header(by='%s (%d added, %d removed)' % (by, + sum(1 for old, _, _, _ in diff.values() if not old), + sum(1 for _, new, _, _ in diff.values() if not new))) + for name, (old, new, diff, ratio) in sorted_diff_entries( + diff.items()): + if ratio or args.get('all'): + print_diff_entry(name, old, new, diff, ratio) + + def print_totals(): + if not args.get('diff'): + print_entry('TOTAL', total) + else: + ratio = (0.0 if not prev_total and not total + else 1.0 if not prev_total + else (total-prev_total)/prev_total) + print_diff_entry('TOTAL', + prev_total, total, + total-prev_total, + ratio) + + if args.get('quiet'): + pass + elif args.get('summary'): + print_header() + print_totals() + elif args.get('files'): + print_entries(by='file') + print_totals() + else: + print_entries(by='name') + print_totals() + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser( + description="Find data size at the function level.") + parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS, + help="Description of where to find *.o files. May be a directory \ + or a list of paths. Defaults to %r." % OBJ_PATHS) + parser.add_argument('-v', '--verbose', action='store_true', + help="Output commands that run behind the scenes.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") + parser.add_argument('-o', '--output', + help="Specify CSV file to store results.") + parser.add_argument('-u', '--use', + help="Don't compile and find data sizes, instead use this CSV file.") + parser.add_argument('-d', '--diff', + help="Specify CSV file to diff data size against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") + parser.add_argument('-a', '--all', action='store_true', + help="Show all functions, not just the ones that changed.") + parser.add_argument('-A', '--everything', action='store_true', + help="Include builtin and libc specific symbols.") + parser.add_argument('-s', '--size-sort', action='store_true', + help="Sort by size.") + parser.add_argument('-S', '--reverse-size-sort', action='store_true', + help="Sort by size, but backwards.") + parser.add_argument('-F', '--files', action='store_true', + help="Show file-level data sizes. Note this does not include padding! " + "So sizes may differ from other tools.") + parser.add_argument('-Y', '--summary', action='store_true', + help="Only show the total data size.") + parser.add_argument('--type', default='dDbB', + help="Type of symbols to report, this uses the same single-character " + "type-names emitted by nm. Defaults to %(default)r.") + parser.add_argument('--nm-tool', default=['nm'], type=lambda x: x.split(), + help="Path to the nm tool to use.") + parser.add_argument('--build-dir', + help="Specify the relative build directory. Used to map object files \ + to the correct source files.") + sys.exit(main(**vars(parser.parse_args()))) diff --git a/scripts/stack.py b/scripts/stack.py new file mode 100755 index 00000000..0c652d8d --- /dev/null +++ b/scripts/stack.py @@ -0,0 +1,430 @@ +#!/usr/bin/env python3 +# +# Script to find stack usage at the function level. Will detect recursion and +# report as infinite stack usage. +# + +import os +import glob +import itertools as it +import re +import csv +import collections as co +import math as m + + +CI_PATHS = ['*.ci'] + +def collect(paths, **args): + # parse the vcg format + k_pattern = re.compile('([a-z]+)\s*:', re.DOTALL) + v_pattern = re.compile('(?:"(.*?)"|([a-z]+))', re.DOTALL) + def parse_vcg(rest): + def parse_vcg(rest): + node = [] + while True: + rest = rest.lstrip() + m = k_pattern.match(rest) + if not m: + return (node, rest) + k, rest = m.group(1), rest[m.end(0):] + + rest = rest.lstrip() + if rest.startswith('{'): + v, rest = parse_vcg(rest[1:]) + assert rest[0] == '}', "unexpected %r" % rest[0:1] + rest = rest[1:] + node.append((k, v)) + else: + m = v_pattern.match(rest) + assert m, "unexpected %r" % rest[0:1] + v, rest = m.group(1) or m.group(2), rest[m.end(0):] + node.append((k, v)) + + node, rest = parse_vcg(rest) + assert rest == '', "unexpected %r" % rest[0:1] + return node + + # collect into functions + results = co.defaultdict(lambda: (None, None, 0, set())) + f_pattern = re.compile( + r'([^\\]*)\\n([^:]*)[^\\]*\\n([0-9]+) bytes \((.*)\)') + for path in paths: + with open(path) as f: + vcg = parse_vcg(f.read()) + for k, graph in vcg: + if k != 'graph': + continue + for k, info in graph: + if k == 'node': + info = dict(info) + m = f_pattern.match(info['label']) + if m: + function, file, size, type = m.groups() + if not args.get('quiet') and type != 'static': + print('warning: found non-static stack for %s (%s)' + % (function, type)) + _, _, _, targets = results[info['title']] + results[info['title']] = ( + file, function, int(size), targets) + elif k == 'edge': + info = dict(info) + _, _, _, targets = results[info['sourcename']] + targets.add(info['targetname']) + else: + continue + + if not args.get('everything'): + for source, (s_file, s_function, _, _) in list(results.items()): + # discard internal functions + if s_file.startswith('<') or s_file.startswith('/usr/include'): + del results[source] + + # find maximum stack size recursively, this requires also detecting cycles + # (in case of recursion) + def find_limit(source, seen=None): + seen = seen or set() + if source not in results: + return 0 + _, _, frame, targets = results[source] + + limit = 0 + for target in targets: + if target in seen: + # found a cycle + return float('inf') + limit_ = find_limit(target, seen | {target}) + limit = max(limit, limit_) + + return frame + limit + + def find_deps(targets): + deps = set() + for target in targets: + if target in results: + t_file, t_function, _, _ = results[target] + deps.add((t_file, t_function)) + return deps + + # flatten into a list + flat_results = [] + for source, (s_file, s_function, frame, targets) in results.items(): + limit = find_limit(source) + deps = find_deps(targets) + flat_results.append((s_file, s_function, frame, limit, deps)) + + return flat_results + +def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + + # find sizes + if not args.get('use', None): + # find .ci files + paths = [] + for path in args['ci_paths']: + if os.path.isdir(path): + path = path + '/*.ci' + + for path in glob.glob(path): + paths.append(path) + + if not paths: + print('no .ci files found in %r?' % args['ci_paths']) + sys.exit(-1) + + results = collect(paths, **args) + else: + with openio(args['use']) as f: + r = csv.DictReader(f) + results = [ + ( result['file'], + result['name'], + int(result['stack_frame']), + float(result['stack_limit']), # note limit can be inf + set()) + for result in r + if result.get('stack_frame') not in {None, ''} + if result.get('stack_limit') not in {None, ''}] + + total_frame = 0 + total_limit = 0 + for _, _, frame, limit, _ in results: + total_frame += frame + total_limit = max(total_limit, limit) + + # find previous results? + if args.get('diff'): + try: + with openio(args['diff']) as f: + r = csv.DictReader(f) + prev_results = [ + ( result['file'], + result['name'], + int(result['stack_frame']), + float(result['stack_limit']), + set()) + for result in r + if result.get('stack_frame') not in {None, ''} + if result.get('stack_limit') not in {None, ''}] + except FileNotFoundError: + prev_results = [] + + prev_total_frame = 0 + prev_total_limit = 0 + for _, _, frame, limit, _ in prev_results: + prev_total_frame += frame + prev_total_limit = max(prev_total_limit, limit) + + # write results to CSV + if args.get('output'): + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + func = result.pop('name', '') + result.pop('stack_frame', None) + result.pop('stack_limit', None) + merged_results[(file, func)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, func, frame, limit, _ in results: + merged_results[(file, func)]['stack_frame'] = frame + merged_results[(file, func)]['stack_limit'] = limit + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'stack_frame', 'stack_limit']) + w.writeheader() + for (file, func), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': func, **result}) + + # print results + def dedup_entries(results, by='name'): + entries = co.defaultdict(lambda: (0, 0, set())) + for file, func, frame, limit, deps in results: + entry = (file if by == 'file' else func) + entry_frame, entry_limit, entry_deps = entries[entry] + entries[entry] = ( + entry_frame + frame, + max(entry_limit, limit), + entry_deps | {file if by == 'file' else func + for file, func in deps}) + return entries + + def diff_entries(olds, news): + diff = co.defaultdict(lambda: (None, None, None, None, 0, 0, 0, set())) + for name, (new_frame, new_limit, deps) in news.items(): + diff[name] = ( + None, None, + new_frame, new_limit, + new_frame, new_limit, + 1.0, + deps) + for name, (old_frame, old_limit, _) in olds.items(): + _, _, new_frame, new_limit, _, _, _, deps = diff[name] + diff[name] = ( + old_frame, old_limit, + new_frame, new_limit, + (new_frame or 0) - (old_frame or 0), + 0 if m.isinf(new_limit or 0) and m.isinf(old_limit or 0) + else (new_limit or 0) - (old_limit or 0), + 0.0 if m.isinf(new_limit or 0) and m.isinf(old_limit or 0) + else +float('inf') if m.isinf(new_limit or 0) + else -float('inf') if m.isinf(old_limit or 0) + else +0.0 if not old_limit and not new_limit + else +1.0 if not old_limit + else ((new_limit or 0) - (old_limit or 0))/(old_limit or 0), + deps) + return diff + + def sorted_entries(entries): + if args.get('limit_sort'): + return sorted(entries, key=lambda x: (-x[1][1], x)) + elif args.get('reverse_limit_sort'): + return sorted(entries, key=lambda x: (+x[1][1], x)) + elif args.get('frame_sort'): + return sorted(entries, key=lambda x: (-x[1][0], x)) + elif args.get('reverse_frame_sort'): + return sorted(entries, key=lambda x: (+x[1][0], x)) + else: + return sorted(entries) + + def sorted_diff_entries(entries): + if args.get('limit_sort'): + return sorted(entries, key=lambda x: (-(x[1][3] or 0), x)) + elif args.get('reverse_limit_sort'): + return sorted(entries, key=lambda x: (+(x[1][3] or 0), x)) + elif args.get('frame_sort'): + return sorted(entries, key=lambda x: (-(x[1][2] or 0), x)) + elif args.get('reverse_frame_sort'): + return sorted(entries, key=lambda x: (+(x[1][2] or 0), x)) + else: + return sorted(entries, key=lambda x: (-x[1][6], x)) + + def print_header(by=''): + if not args.get('diff'): + print('%-36s %7s %7s' % (by, 'frame', 'limit')) + else: + print('%-36s %15s %15s %15s' % (by, 'old', 'new', 'diff')) + + def print_entry(name, frame, limit): + print("%-36s %7d %7s" % (name, + frame, '∞' if m.isinf(limit) else int(limit))) + + def print_diff_entry(name, + old_frame, old_limit, + new_frame, new_limit, + diff_frame, diff_limit, + ratio): + print('%-36s %7s %7s %7s %7s %+7d %7s%s' % (name, + old_frame if old_frame is not None else "-", + ('∞' if m.isinf(old_limit) else int(old_limit)) + if old_limit is not None else "-", + new_frame if new_frame is not None else "-", + ('∞' if m.isinf(new_limit) else int(new_limit)) + if new_limit is not None else "-", + diff_frame, + ('+∞' if diff_limit > 0 and m.isinf(diff_limit) + else '-∞' if diff_limit < 0 and m.isinf(diff_limit) + else '%+d' % diff_limit), + '' if not ratio + else ' (+∞%)' if ratio > 0 and m.isinf(ratio) + else ' (-∞%)' if ratio < 0 and m.isinf(ratio) + else ' (%+.1f%%)' % (100*ratio))) + + def print_entries(by='name'): + # build optional tree of dependencies + def print_deps(entries, depth, print, + filter=lambda _: True, + prefixes=('', '', '', '')): + entries = entries if isinstance(entries, list) else list(entries) + filtered_entries = [(name, entry) + for name, entry in entries + if filter(name)] + for i, (name, entry) in enumerate(filtered_entries): + last = (i == len(filtered_entries)-1) + print(prefixes[0+last] + name, entry) + + if depth > 0: + deps = entry[-1] + print_deps(entries, depth-1, print, + lambda name: name in deps, + ( prefixes[2+last] + "|-> ", + prefixes[2+last] + "'-> ", + prefixes[2+last] + "| ", + prefixes[2+last] + " ")) + + entries = dedup_entries(results, by=by) + + if not args.get('diff'): + print_header(by=by) + print_deps( + sorted_entries(entries.items()), + args.get('depth') or 0, + lambda name, entry: print_entry(name, *entry[:-1])) + else: + prev_entries = dedup_entries(prev_results, by=by) + diff = diff_entries(prev_entries, entries) + + print_header(by='%s (%d added, %d removed)' % (by, + sum(1 for _, old, _, _, _, _, _, _ in diff.values() if old is None), + sum(1 for _, _, _, new, _, _, _, _ in diff.values() if new is None))) + print_deps( + filter( + lambda x: x[1][6] or args.get('all'), + sorted_diff_entries(diff.items())), + args.get('depth') or 0, + lambda name, entry: print_diff_entry(name, *entry[:-1])) + + def print_totals(): + if not args.get('diff'): + print_entry('TOTAL', total_frame, total_limit) + else: + diff_frame = total_frame - prev_total_frame + diff_limit = ( + 0 if m.isinf(total_limit or 0) and m.isinf(prev_total_limit or 0) + else (total_limit or 0) - (prev_total_limit or 0)) + ratio = ( + 0.0 if m.isinf(total_limit or 0) and m.isinf(prev_total_limit or 0) + else +float('inf') if m.isinf(total_limit or 0) + else -float('inf') if m.isinf(prev_total_limit or 0) + else 0.0 if not prev_total_limit and not total_limit + else 1.0 if not prev_total_limit + else ((total_limit or 0) - (prev_total_limit or 0))/(prev_total_limit or 0)) + print_diff_entry('TOTAL', + prev_total_frame, prev_total_limit, + total_frame, total_limit, + diff_frame, diff_limit, + ratio) + + if args.get('quiet'): + pass + elif args.get('summary'): + print_header() + print_totals() + elif args.get('files'): + print_entries(by='file') + print_totals() + else: + print_entries(by='name') + print_totals() + + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser( + description="Find stack usage at the function level.") + parser.add_argument('ci_paths', nargs='*', default=CI_PATHS, + help="Description of where to find *.ci files. May be a directory \ + or a list of paths. Defaults to %r." % CI_PATHS) + parser.add_argument('-v', '--verbose', action='store_true', + help="Output commands that run behind the scenes.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") + parser.add_argument('-o', '--output', + help="Specify CSV file to store results.") + parser.add_argument('-u', '--use', + help="Don't parse callgraph files, instead use this CSV file.") + parser.add_argument('-d', '--diff', + help="Specify CSV file to diff against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") + parser.add_argument('-a', '--all', action='store_true', + help="Show all functions, not just the ones that changed.") + parser.add_argument('-A', '--everything', action='store_true', + help="Include builtin and libc specific symbols.") + parser.add_argument('-s', '--limit-sort', action='store_true', + help="Sort by stack limit.") + parser.add_argument('-S', '--reverse-limit-sort', action='store_true', + help="Sort by stack limit, but backwards.") + parser.add_argument('--frame-sort', action='store_true', + help="Sort by stack frame size.") + parser.add_argument('--reverse-frame-sort', action='store_true', + help="Sort by stack frame size, but backwards.") + parser.add_argument('-L', '--depth', default=0, type=lambda x: int(x, 0), + nargs='?', const=float('inf'), + help="Depth of dependencies to show.") + parser.add_argument('-F', '--files', action='store_true', + help="Show file-level calls.") + parser.add_argument('-Y', '--summary', action='store_true', + help="Only show the total stack size.") + parser.add_argument('--build-dir', + help="Specify the relative build directory. Used to map object files \ + to the correct source files.") + sys.exit(main(**vars(parser.parse_args()))) diff --git a/scripts/structs.py b/scripts/structs.py new file mode 100755 index 00000000..e8d7193e --- /dev/null +++ b/scripts/structs.py @@ -0,0 +1,331 @@ +#!/usr/bin/env python3 +# +# Script to find struct sizes. +# + +import os +import glob +import itertools as it +import subprocess as sp +import shlex +import re +import csv +import collections as co + + +OBJ_PATHS = ['*.o'] + +def collect(paths, **args): + decl_pattern = re.compile( + '^\s+(?P[0-9]+)' + '\s+(?P[0-9]+)' + '\s+.*' + '\s+(?P[^\s]+)$') + struct_pattern = re.compile( + '^(?:.*DW_TAG_(?P[a-z_]+).*' + '|^.*DW_AT_name.*:\s*(?P[^:\s]+)\s*' + '|^.*DW_AT_decl_file.*:\s*(?P[0-9]+)\s*' + '|^.*DW_AT_byte_size.*:\s*(?P[0-9]+)\s*)$') + + results = co.defaultdict(lambda: 0) + for path in paths: + # find decl, we want to filter by structs in .h files + decls = {} + # note objdump-tool may contain extra args + cmd = args['objdump_tool'] + ['--dwarf=rawline', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace') + for line in proc.stdout: + # find file numbers + m = decl_pattern.match(line) + if m: + decls[int(m.group('no'))] = m.group('file') + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + sys.exit(-1) + + # collect structs as we parse dwarf info + found = False + name = None + decl = None + size = None + + # note objdump-tool may contain extra args + cmd = args['objdump_tool'] + ['--dwarf=info', path] + if args.get('verbose'): + print(' '.join(shlex.quote(c) for c in cmd)) + proc = sp.Popen(cmd, + stdout=sp.PIPE, + stderr=sp.PIPE if not args.get('verbose') else None, + universal_newlines=True, + errors='replace') + for line in proc.stdout: + # state machine here to find structs + m = struct_pattern.match(line) + if m: + if m.group('tag'): + if (name is not None + and decl is not None + and size is not None): + decl = decls.get(decl, '?') + results[(decl, name)] = size + found = (m.group('tag') == 'structure_type') + name = None + decl = None + size = None + elif found and m.group('name'): + name = m.group('name') + elif found and name and m.group('decl'): + decl = int(m.group('decl')) + elif found and name and m.group('size'): + size = int(m.group('size')) + proc.wait() + if proc.returncode != 0: + if not args.get('verbose'): + for line in proc.stderr: + sys.stdout.write(line) + sys.exit(-1) + + flat_results = [] + for (file, struct), size in results.items(): + # map to source files + if args.get('build_dir'): + file = re.sub('%s/*' % re.escape(args['build_dir']), '', file) + # only include structs declared in header files in the current + # directory, ignore internal-only # structs (these are represented + # in other measurements) + if not args.get('everything'): + if not file.endswith('.h'): + continue + # replace .o with .c, different scripts report .o/.c, we need to + # choose one if we want to deduplicate csv files + file = re.sub('\.o$', '.c', file) + + flat_results.append((file, struct, size)) + + return flat_results + + +def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + + # find sizes + if not args.get('use', None): + # find .o files + paths = [] + for path in args['obj_paths']: + if os.path.isdir(path): + path = path + '/*.o' + + for path in glob.glob(path): + paths.append(path) + + if not paths: + print('no .obj files found in %r?' % args['obj_paths']) + sys.exit(-1) + + results = collect(paths, **args) + else: + with openio(args['use']) as f: + r = csv.DictReader(f) + results = [ + ( result['file'], + result['name'], + int(result['struct_size'])) + for result in r + if result.get('struct_size') not in {None, ''}] + + total = 0 + for _, _, size in results: + total += size + + # find previous results? + if args.get('diff'): + try: + with openio(args['diff']) as f: + r = csv.DictReader(f) + prev_results = [ + ( result['file'], + result['name'], + int(result['struct_size'])) + for result in r + if result.get('struct_size') not in {None, ''}] + except FileNotFoundError: + prev_results = [] + + prev_total = 0 + for _, _, size in prev_results: + prev_total += size + + # write results to CSV + if args.get('output'): + merged_results = co.defaultdict(lambda: {}) + other_fields = [] + + # merge? + if args.get('merge'): + try: + with openio(args['merge']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + struct = result.pop('name', '') + result.pop('struct_size', None) + merged_results[(file, struct)] = result + other_fields = result.keys() + except FileNotFoundError: + pass + + for file, struct, size in results: + merged_results[(file, struct)]['struct_size'] = size + + with openio(args['output'], 'w') as f: + w = csv.DictWriter(f, ['file', 'name', *other_fields, 'struct_size']) + w.writeheader() + for (file, struct), result in sorted(merged_results.items()): + w.writerow({'file': file, 'name': struct, **result}) + + # print results + def dedup_entries(results, by='name'): + entries = co.defaultdict(lambda: 0) + for file, struct, size in results: + entry = (file if by == 'file' else struct) + entries[entry] += size + return entries + + def diff_entries(olds, news): + diff = co.defaultdict(lambda: (0, 0, 0, 0)) + for name, new in news.items(): + diff[name] = (0, new, new, 1.0) + for name, old in olds.items(): + _, new, _, _ = diff[name] + diff[name] = (old, new, new-old, (new-old)/old if old else 1.0) + return diff + + def sorted_entries(entries): + if args.get('size_sort'): + return sorted(entries, key=lambda x: (-x[1], x)) + elif args.get('reverse_size_sort'): + return sorted(entries, key=lambda x: (+x[1], x)) + else: + return sorted(entries) + + def sorted_diff_entries(entries): + if args.get('size_sort'): + return sorted(entries, key=lambda x: (-x[1][1], x)) + elif args.get('reverse_size_sort'): + return sorted(entries, key=lambda x: (+x[1][1], x)) + else: + return sorted(entries, key=lambda x: (-x[1][3], x)) + + def print_header(by=''): + if not args.get('diff'): + print('%-36s %7s' % (by, 'size')) + else: + print('%-36s %7s %7s %7s' % (by, 'old', 'new', 'diff')) + + def print_entry(name, size): + print("%-36s %7d" % (name, size)) + + def print_diff_entry(name, old, new, diff, ratio): + print("%-36s %7s %7s %+7d%s" % (name, + old or "-", + new or "-", + diff, + ' (%+.1f%%)' % (100*ratio) if ratio else '')) + + def print_entries(by='name'): + entries = dedup_entries(results, by=by) + + if not args.get('diff'): + print_header(by=by) + for name, size in sorted_entries(entries.items()): + print_entry(name, size) + else: + prev_entries = dedup_entries(prev_results, by=by) + diff = diff_entries(prev_entries, entries) + print_header(by='%s (%d added, %d removed)' % (by, + sum(1 for old, _, _, _ in diff.values() if not old), + sum(1 for _, new, _, _ in diff.values() if not new))) + for name, (old, new, diff, ratio) in sorted_diff_entries( + diff.items()): + if ratio or args.get('all'): + print_diff_entry(name, old, new, diff, ratio) + + def print_totals(): + if not args.get('diff'): + print_entry('TOTAL', total) + else: + ratio = (0.0 if not prev_total and not total + else 1.0 if not prev_total + else (total-prev_total)/prev_total) + print_diff_entry('TOTAL', + prev_total, total, + total-prev_total, + ratio) + + if args.get('quiet'): + pass + elif args.get('summary'): + print_header() + print_totals() + elif args.get('files'): + print_entries(by='file') + print_totals() + else: + print_entries(by='name') + print_totals() + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser( + description="Find struct sizes.") + parser.add_argument('obj_paths', nargs='*', default=OBJ_PATHS, + help="Description of where to find *.o files. May be a directory \ + or a list of paths. Defaults to %r." % OBJ_PATHS) + parser.add_argument('-v', '--verbose', action='store_true', + help="Output commands that run behind the scenes.") + parser.add_argument('-q', '--quiet', action='store_true', + help="Don't show anything, useful with -o.") + parser.add_argument('-o', '--output', + help="Specify CSV file to store results.") + parser.add_argument('-u', '--use', + help="Don't compile and find struct sizes, instead use this CSV file.") + parser.add_argument('-d', '--diff', + help="Specify CSV file to diff struct size against.") + parser.add_argument('-m', '--merge', + help="Merge with an existing CSV file when writing to output.") + parser.add_argument('-a', '--all', action='store_true', + help="Show all functions, not just the ones that changed.") + parser.add_argument('-A', '--everything', action='store_true', + help="Include builtin and libc specific symbols.") + parser.add_argument('-s', '--size-sort', action='store_true', + help="Sort by size.") + parser.add_argument('-S', '--reverse-size-sort', action='store_true', + help="Sort by size, but backwards.") + parser.add_argument('-F', '--files', action='store_true', + help="Show file-level struct sizes.") + parser.add_argument('-Y', '--summary', action='store_true', + help="Only show the total struct size.") + parser.add_argument('--objdump-tool', default=['objdump'], type=lambda x: x.split(), + help="Path to the objdump tool to use.") + parser.add_argument('--build-dir', + help="Specify the relative build directory. Used to map object files \ + to the correct source files.") + sys.exit(main(**vars(parser.parse_args()))) diff --git a/scripts/summary.py b/scripts/summary.py new file mode 100755 index 00000000..7ce769bf --- /dev/null +++ b/scripts/summary.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +# +# Script to summarize the outputs of other scripts. Operates on CSV files. +# + +import functools as ft +import collections as co +import os +import csv +import re +import math as m + +# displayable fields +Field = co.namedtuple('Field', 'name,parse,acc,key,fmt,repr,null,ratio') +FIELDS = [ + # name, parse, accumulate, fmt, print, null + Field('code', + lambda r: int(r['code_size']), + sum, + lambda r: r, + '%7s', + lambda r: r, + '-', + lambda old, new: (new-old)/old), + Field('data', + lambda r: int(r['data_size']), + sum, + lambda r: r, + '%7s', + lambda r: r, + '-', + lambda old, new: (new-old)/old), + Field('stack', + lambda r: float(r['stack_limit']), + max, + lambda r: r, + '%7s', + lambda r: '∞' if m.isinf(r) else int(r), + '-', + lambda old, new: (new-old)/old), + Field('structs', + lambda r: int(r['struct_size']), + sum, + lambda r: r, + '%8s', + lambda r: r, + '-', + lambda old, new: (new-old)/old), + Field('coverage', + lambda r: (int(r['coverage_hits']), int(r['coverage_count'])), + lambda rs: ft.reduce(lambda a, b: (a[0]+b[0], a[1]+b[1]), rs), + lambda r: r[0]/r[1], + '%19s', + lambda r: '%11s %7s' % ('%d/%d' % (r[0], r[1]), '%.1f%%' % (100*r[0]/r[1])), + '%11s %7s' % ('-', '-'), + lambda old, new: ((new[0]/new[1]) - (old[0]/old[1]))) +] + + +def main(**args): + def openio(path, mode='r'): + if path == '-': + if 'r' in mode: + return os.fdopen(os.dup(sys.stdin.fileno()), 'r') + else: + return os.fdopen(os.dup(sys.stdout.fileno()), 'w') + else: + return open(path, mode) + + # find results + results = co.defaultdict(lambda: {}) + for path in args.get('csv_paths', '-'): + try: + with openio(path) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + name = result.pop('name', '') + prev = results[(file, name)] + for field in FIELDS: + try: + r = field.parse(result) + if field.name in prev: + results[(file, name)][field.name] = field.acc( + [prev[field.name], r]) + else: + results[(file, name)][field.name] = r + except (KeyError, ValueError): + pass + except FileNotFoundError: + pass + + # find fields + if args.get('all_fields'): + fields = FIELDS + elif args.get('fields') is not None: + fields_dict = {field.name: field for field in FIELDS} + fields = [fields_dict[f] for f in args['fields']] + else: + fields = [] + for field in FIELDS: + if any(field.name in result for result in results.values()): + fields.append(field) + + # find total for every field + total = {} + for result in results.values(): + for field in fields: + if field.name in result and field.name in total: + total[field.name] = field.acc( + [total[field.name], result[field.name]]) + elif field.name in result: + total[field.name] = result[field.name] + + # find previous results? + if args.get('diff'): + prev_results = co.defaultdict(lambda: {}) + try: + with openio(args['diff']) as f: + r = csv.DictReader(f) + for result in r: + file = result.pop('file', '') + name = result.pop('name', '') + prev = prev_results[(file, name)] + for field in FIELDS: + try: + r = field.parse(result) + if field.name in prev: + prev_results[(file, name)][field.name] = field.acc( + [prev[field.name], r]) + else: + prev_results[(file, name)][field.name] = r + except (KeyError, ValueError): + pass + except FileNotFoundError: + pass + + prev_total = {} + for result in prev_results.values(): + for field in fields: + if field.name in result and field.name in prev_total: + prev_total[field.name] = field.acc( + [prev_total[field.name], result[field.name]]) + elif field.name in result: + prev_total[field.name] = result[field.name] + + # print results + def dedup_entries(results, by='name'): + entries = co.defaultdict(lambda: {}) + for (file, func), result in results.items(): + entry = (file if by == 'file' else func) + prev = entries[entry] + for field in fields: + if field.name in result and field.name in prev: + entries[entry][field.name] = field.acc( + [prev[field.name], result[field.name]]) + elif field.name in result: + entries[entry][field.name] = result[field.name] + return entries + + def sorted_entries(entries): + if args.get('sort') is not None: + field = {field.name: field for field in FIELDS}[args['sort']] + return sorted(entries, key=lambda x: ( + -(field.key(x[1][field.name])) if field.name in x[1] else -1, x)) + elif args.get('reverse_sort') is not None: + field = {field.name: field for field in FIELDS}[args['reverse_sort']] + return sorted(entries, key=lambda x: ( + +(field.key(x[1][field.name])) if field.name in x[1] else -1, x)) + else: + return sorted(entries) + + def print_header(by=''): + if not args.get('diff'): + print('%-36s' % by, end='') + for field in fields: + print((' '+field.fmt) % field.name, end='') + print() + else: + print('%-36s' % by, end='') + for field in fields: + print((' '+field.fmt) % field.name, end='') + print(' %-9s' % '', end='') + print() + + def print_entry(name, result): + print('%-36s' % name, end='') + for field in fields: + r = result.get(field.name) + if r is not None: + print((' '+field.fmt) % field.repr(r), end='') + else: + print((' '+field.fmt) % '-', end='') + print() + + def print_diff_entry(name, old, new): + print('%-36s' % name, end='') + for field in fields: + n = new.get(field.name) + if n is not None: + print((' '+field.fmt) % field.repr(n), end='') + else: + print((' '+field.fmt) % '-', end='') + o = old.get(field.name) + ratio = ( + 0.0 if m.isinf(o or 0) and m.isinf(n or 0) + else +float('inf') if m.isinf(n or 0) + else -float('inf') if m.isinf(o or 0) + else 0.0 if not o and not n + else +1.0 if not o + else -1.0 if not n + else field.ratio(o, n)) + print(' %-9s' % ( + '' if not ratio + else '(+∞%)' if ratio > 0 and m.isinf(ratio) + else '(-∞%)' if ratio < 0 and m.isinf(ratio) + else '(%+.1f%%)' % (100*ratio)), end='') + print() + + def print_entries(by='name'): + entries = dedup_entries(results, by=by) + + if not args.get('diff'): + print_header(by=by) + for name, result in sorted_entries(entries.items()): + print_entry(name, result) + else: + prev_entries = dedup_entries(prev_results, by=by) + print_header(by='%s (%d added, %d removed)' % (by, + sum(1 for name in entries if name not in prev_entries), + sum(1 for name in prev_entries if name not in entries))) + for name, result in sorted_entries(entries.items()): + if args.get('all') or result != prev_entries.get(name, {}): + print_diff_entry(name, prev_entries.get(name, {}), result) + + def print_totals(): + if not args.get('diff'): + print_entry('TOTAL', total) + else: + print_diff_entry('TOTAL', prev_total, total) + + if args.get('summary'): + print_header() + print_totals() + elif args.get('files'): + print_entries(by='file') + print_totals() + else: + print_entries(by='name') + print_totals() + + +if __name__ == "__main__": + import argparse + import sys + parser = argparse.ArgumentParser( + description="Summarize measurements") + parser.add_argument('csv_paths', nargs='*', default='-', + help="Description of where to find *.csv files. May be a directory \ + or list of paths. *.csv files will be merged to show the total \ + coverage.") + parser.add_argument('-d', '--diff', + help="Specify CSV file to diff against.") + parser.add_argument('-a', '--all', action='store_true', + help="Show all objects, not just the ones that changed.") + parser.add_argument('-e', '--all-fields', action='store_true', + help="Show all fields, even those with no results.") + parser.add_argument('-f', '--fields', type=lambda x: re.split('\s*,\s*', x), + help="Comma separated list of fields to print, by default all fields \ + that are found in the CSV files are printed.") + parser.add_argument('-s', '--sort', + help="Sort by this field.") + parser.add_argument('-S', '--reverse-sort', + help="Sort by this field, but backwards.") + parser.add_argument('-F', '--files', action='store_true', + help="Show file-level calls.") + parser.add_argument('-Y', '--summary', action='store_true', + help="Only show the totals.") + sys.exit(main(**vars(parser.parse_args()))) diff --git a/scripts/test.py b/scripts/test.py index f8051f0d..92a13b1d 100755 --- a/scripts/test.py +++ b/scripts/test.py @@ -784,10 +784,13 @@ def main(**args): stdout=sp.PIPE if not args.get('verbose') else None, stderr=sp.STDOUT if not args.get('verbose') else None, universal_newlines=True) + stdout = [] + for line in proc.stdout: + stdout.append(line) proc.wait() if proc.returncode != 0: if not args.get('verbose'): - for line in proc.stdout: + for line in stdout: sys.stdout.write(line) sys.exit(-1) @@ -803,9 +806,9 @@ def main(**args): failure.case.test(failure=failure, **args) sys.exit(0) - print('tests passed %d/%d (%.2f%%)' % (passed, total, + print('tests passed %d/%d (%.1f%%)' % (passed, total, 100*(passed/total if total else 1.0))) - print('tests failed %d/%d (%.2f%%)' % (failed, total, + print('tests failed %d/%d (%.1f%%)' % (failed, total, 100*(failed/total if total else 1.0))) return 1 if failed > 0 else 0