diff --git a/Makefile.am b/Makefile.am index 0ba1eeefb5..622bc6a231 100644 --- a/Makefile.am +++ b/Makefile.am @@ -359,7 +359,11 @@ endif # repo configuration, or submodules). But this is a Git-crawling target # anyway, and in the worst case (Git's design changes) we would spend a # bit of time researching the FS in vain, and go on to re-generate the -# ChangeLog when maybe we should not have - oh well: +# ChangeLog when maybe we should not have - oh well. +# WARNING: The CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR=true mode here is +# default to allow for prettier documentation, but it can require too much +# memory for weaker build systems. Set it to false when calling make there. +CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR_ENVVAR = true $(abs_top_builddir)/ChangeLog: tools/gitlog2changelog.py dummy-stamp @cd $(abs_top_srcdir) && \ if test -e .git ; then \ @@ -369,7 +373,9 @@ $(abs_top_builddir)/ChangeLog: tools/gitlog2changelog.py dummy-stamp echo "Using still-valid ChangeLog file generated earlier from same revision of Git source metadata in '$${NUT_GITDIR}'" >&2 ; \ else \ echo " DOC-CHANGELOG-GENERATE $@" ; \ - CHANGELOG_FILE="$@" $(WITH_PDF_NONASCII_TITLES_ENVVAR) $(abs_top_builddir)/tools/gitlog2changelog.py $(GITLOG_START_POINT) || { \ + CHANGELOG_FILE="$@" $(WITH_PDF_NONASCII_TITLES_ENVVAR) \ + CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR="$(CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR_ENVVAR)" \ + $(abs_top_builddir)/tools/gitlog2changelog.py $(GITLOG_START_POINT) || { \ echo " DOC-CHANGELOG-GENERATE $@ : FAILED (non-fatal)" >&2 ; \ printf "gitlog2changelog.py failed to generate the ChangeLog.\n\nNOTE: See https://github.com/networkupstools/nut/commits/master for change history.\n\n" > "$@" ; \ } ; \ diff --git a/NEWS.adoc b/NEWS.adoc index 0ee525b938..6ba1eb092d 100644 --- a/NEWS.adoc +++ b/NEWS.adoc @@ -213,6 +213,15 @@ during a NUT build. - added a `make distcheck-light-man` recipe to require verification that the manual page files can be built using the prepared "tarball" archive. [#2473] + - revised the documentation building recipes, with the goal to avoid building + the `ChangeLog` products and their intermediate files more than once (but + still react to `git` metadata changes during development), and to sanity + check the resulting final document (currently only for `html-single` mode). + As part of this, the `CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR` setting was + added (for `make` calls and used by `tools/gitlog2changelog.py.in` script), + and it defaults to `true` allowing for better ordered documents at the cost + of some memory during document generation. [#2510] + - added a `common/Makefile.am` build product for a new internal library `libcommonstr.la` which allows a smaller selection of helper methods for tools like `nut-scanner` which do not need the full `libcommon.la` diff --git a/UPGRADING.adoc b/UPGRADING.adoc index 7580eda30e..16c8673c55 100644 --- a/UPGRADING.adoc +++ b/UPGRADING.adoc @@ -38,6 +38,13 @@ Changes from 2.8.2 to 2.8.3 pages which are delivered automatically. Packaging recipes can likely be simplified now. [#2445] +- A `CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR` setting was added (for `make` + calls and used by `tools/gitlog2changelog.py.in` script), and it defaults + to `true` allowing for better ordered documents at the cost of some memory + during document generation. Resource-constrained builders (working from + a Git workspace, not tarball archives) may have to set it to `false` when + calling `make` for NUT. [#2510] + - NUT products like `nut-scanner`, which dynamically load shared libraries at run-time without persistent pre-linking, should now know the library file names that were present during build (likely encumbered with version diff --git a/docs/Makefile.am b/docs/Makefile.am index 619d9e6778..46794f83ca 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -18,7 +18,20 @@ MAINTAINERCLEANFILES = Makefile.in .dirstamp EXTRA_DIST = -all: doc +# Note: "doc" ensures the `configure`-specified list of documents we actually +# want, while the default generated "all: all-am" target historically causes +# some but not all of these targets to get built (e.g. ChangeLog html/pdf is +# usually not made there). Post-processing "doc" as part of "all" helps +# ensure that we do not rebuild stuff in vain during parallel builds (where +# "all-am" and "doc" would be unordered parallel goals of the "all" target) +# while getting those further goals achieved eventually in the default build. +# Crucially, this allows to make sure "ChangeLog(.adoc*)" files have been +# generated once (can take a looong while), settled into place, and only then +# we revisit them for html/pdf rendering (another long while) without randomly +# confusing the system with new timestamps and needless regenerations later on. +all: + @echo " DOC-FOLLOW-UP Basic 'make $@' in `pwd` is done, following up with 'make doc' to ensure complex document types" + +@$(MAKE) $(AM_MAKEFLAGS) doc # Is "egrep == grep -E" always valid? (maybe all a job for configure.ac) #EGREP = egrep @@ -256,9 +269,21 @@ ChangeLog.html-contentchecked: fi ; \ fi ; \ fi; \ - else \ + if [ x"$$FAILED" = x ] ; then \ + echo "PASSED $@" >&2 ; \ + exit 0 ; \ + fi ; \ + if [ x"$$FAILED" != x ] && [ -s '$(top_builddir)/ChangeLog.adoc' ] \ + && [ "`head -1 $(top_builddir)/ChangeLog.adoc`" = "=== Failed to generate the ChangeLog" ] \ + ; then \ + FAILED="" ; \ + fi; \ + fi; \ + if [ x"$$FAILED" = x ] ; then \ echo "SKIPPED $@ because input files were not available" >&2 ; \ - fi + exit 0 ; \ + fi ; \ + exit 1 check-html-single: $(ASCIIDOC_HTML_SINGLE) +@FAILED=""; LANG=C; LC_ALL=C; export LANG; export LC_ALL; \ @@ -332,6 +357,8 @@ DOCBUILD_CONVERT_GITHUB_LINKS = { \ .adoc.adoc-parsed: @$(DOCBUILD_CONVERT_GITHUB_LINKS) +$(top_builddir)/ChangeLog.adoc-parsed: $(top_builddir)/ChangeLog.adoc + dummy: $(top_builddir)/ChangeLog: dummy @+echo " DOC-CHANGELOG-GENERATE-WRAPPER $@ : call parent Makefile to decide if (re-)generation is needed" \ @@ -347,6 +374,14 @@ else !WITH_PDF_NONASCII_TITLES A2X_ASCII_IDS = ":ascii-ids:\n" endif !WITH_PDF_NONASCII_TITLES +# Probably due to the web of makefiles and an overwhelmed job server in some +# implementations, during parallel builds we can end up scheduling several +# threads creating this asciidoc (and adoc-parsed later). This step only +# costs a few seconds, however the updated timestamp may cause new HTML/PDF +# builds which cost a lot more. Below we try a few ways to detect a build +# already running and bail out early if the file exists. Otherwise we bite +# the bullet and spend a few seconds, and then re-check if another thread +# did exist and finished first. $(top_builddir)/ChangeLog.adoc: $(top_builddir)/ChangeLog @INPUT="$?"; \ test -n "$${INPUT}" || INPUT="$$(top_builddir)/ChangeLog" ; \ @@ -357,6 +392,8 @@ $(top_builddir)/ChangeLog.adoc: $(top_builddir)/ChangeLog test -n "$@" && { printf '=== Failed to generate the ChangeLog\n\n%s\n\nNOTE: See https://github.com/networkupstools/nut/commits/master for change history.\n\n' "$${MSG}" > "$@" ; } ; \ exit ; \ } ; \ + W=10 ; while [ "$${W}" -gt 0 ] && find '$@.tmp.'* '$@' -newer "$${INPUT}" 2>/dev/null >/dev/null ; do sleep 1 ; W="`expr $$W - 1`"; done ; touch "$@.tmp.$$$$"; \ + if [ x"`find '$@' -newer "$${INPUT}" 2>/dev/null`" != x ] ; then echo " DOC-CHANGELOG-ASCIIDOC $${INPUT} => $@ : SKIP (keep existing)"; rm -f "$@.tmp.$$$$"; exit 0 ; fi ; \ echo " DOC-CHANGELOG-ASCIIDOC $${INPUT} => $@" \ && printf "ifdef::txt[]\n== Very detailed Change Log\n"$(A2X_ASCII_IDS)"endif::txt[]\n\n" > "$@.tmp.$$$$" \ && TABCHAR="`printf '\t'`" \ @@ -370,7 +407,8 @@ $(top_builddir)/ChangeLog.adoc: $(top_builddir)/ChangeLog -e 's,\[\[\([^]]*\)\]\],[\1],g' \ -e 's,^\(\s\s*\)\([0-9]\),\1{empty}\2,g' \ < "$${INPUT}" >> "$@.tmp.$$$$" \ - && mv -f "$@.tmp.$$$$" "$@" + && if [ x"`find '$@' -newer "$${INPUT}" 2>/dev/null`" != x ] ; then echo " DOC-CHANGELOG-ASCIIDOC $${INPUT} => $@ : SKIP (keep recently born competitor)"; rm -f "$@.tmp.$$$$"; \ + else mv -f "$@.tmp.$$$$" "$@" ; fi # Add other directory deps (not for local EXTRA_DIST) and generated contents FULL_USER_MANUAL_DEPS = $(USER_MANUAL_DEPS) $(SHARED_DEPS) \ diff --git a/tools/gitlog2changelog.py.in b/tools/gitlog2changelog.py.in index 889bf3793e..fca6f5ac96 100755 --- a/tools/gitlog2changelog.py.in +++ b/tools/gitlog2changelog.py.in @@ -89,6 +89,31 @@ else: else: fout = open(CHANGELOG_FILE, "w") +# By default we collect information from a commit and output it as soon as +# we have enough. Part of it is best-effort grouping of a series of commits +# made by the same author on the same day, if they follow each other. +# The alternative is to expend memory to collect all git log entries into a +# dictionary first (key = date+author, value = list of entries) and only +# print the output in the end of processing. This costs more resources, so +# is not default behavior. +requireGroupByDateAuthor = False +try: + tmpEnvVar = os.environ.get("CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR", None) + if str(tmpEnvVar).lower() == "true": + requireGroupByDateAuthor = True +except Exception as ignored: + pass + +cachedContent = None +if requireGroupByDateAuthor: + try: + from collections import defaultdict + cachedContent = defaultdict(list) + except Exception as x: + print("Failed to init requireGroupByDateAuthor processing as defaultdict(list), trying simple dict(): " + str(x)) + requireGroupByDateAuthor = False + cachedContent = dict() + # Set up the loop variables in order to locate the blocks we want authorFound = False dateFound = False @@ -162,7 +187,14 @@ for line in fin: continue # Extract the actual commit message for this commit elif authorFound and dateFound and messageFound is False: - # Find the commit message if we can + # Find the commit message if we can (including the optional + # details after the title and a blank line) + # FIXME: Detect end of message by /^#/ to allow for longer essays + # in the detailed comments part? + # FIXME: Some such comments include asciidoc-ish markup, notably + # bullet lists - do not concatenate those into one block but do + # actually pass them as sub-lists (indented, and perhaps not + # starting with an asterisk which we use for this document). if len(line) == fin_chop: if messageNL: messageFound = True @@ -187,21 +219,33 @@ for line in fin: files = files + ", " + fileList[0].strip() else: files = fileList[0].strip() + # All of the parts of the commit have been found - write out the entry if authorFound and dateFound and messageFound and filesFound: # First the author line, only outputted if it is the first for that - # author on this day + # author on this day. + # WARNING: In case of git rebase commit shuffling, merges of dormant + # branches, etc. we are not guaranteed to have all dates in the list + # nicely ordered. In fact, the same date+author can be repeated if + # there were commits with other metadata in git history between those + # (e.g. many PRs from a few authors merged during one day). While we + # could cache each section by authorLine and only output in the end, + # it can require a lot of memory - so by default we do not. authorLine = date + " " + author - if len(prevAuthorLine) == 0: - fout.write(authorLine + "\n\n") - elif authorLine == prevAuthorLine: - pass + if requireGroupByDateAuthor: + if authorLine not in cachedContent: + cachedContent[authorLine] = list() else: - fout.write("\n" + authorLine + "\n\n") + if len(prevAuthorLine) == 0: + fout.write(authorLine + "\n\n") + elif authorLine == prevAuthorLine: + pass + else: + fout.write("\n" + authorLine + "\n\n") # Assemble the actual commit message line(s) and limit the line length # to 80 characters. - # Avoid printing same (or equivalen) filename lists twice, if commit + # Avoid printing same (or equivalent) filename lists twice, if commit # message starts with them. if message.startswith(files + ":"): commitLine = "* " + message @@ -219,8 +263,11 @@ for line in fin: else: commitLine = "* " + files + ": " + message - # Write out the commit line - fout.write(wrapper.fill(commitLine) + "\n") + if requireGroupByDateAuthor: + cachedContent[authorLine].append(commitLine) + else: + # Write out the commit line, wrapped for length + fout.write(wrapper.fill(commitLine) + "\n") # Now reset all the variables ready for a new commit block. authorFound = False @@ -232,6 +279,22 @@ for line in fin: files = "" prevAuthorLine = authorLine +if requireGroupByDateAuthor: + # We did not print anything before, flush it out now; + # most recent date first (alphanumerically reverse) + counter = 0 + for authorLine in sorted(cachedContent, reverse=True): + if counter == 0: + fout.write(authorLine + "\n\n") + else: + fout.write("\n" + authorLine + "\n\n") + + # Use original list append order + for commitLine in cachedContent[authorLine]: + fout.write(wrapper.fill(commitLine) + "\n") + + counter = counter + 1 + # Close the input and output lines now that we are finished. if fin_mode == 3: p.stdout.close()