From e15ee2b2516a2a3b6ad69618ae60fd3fc7830e48 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 16 Jul 2024 09:59:26 +0200 Subject: [PATCH 1/8] docs/Makefile.am: actually do fail with ChangeLog.html-contentchecked recipe hits [#2510] ...except for when we deliberately generated nothing and report it in the document Signed-off-by: Jim Klimov --- docs/Makefile.am | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/docs/Makefile.am b/docs/Makefile.am index 619d9e6778..2fd8bc62d8 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -256,9 +256,21 @@ ChangeLog.html-contentchecked: fi ; \ fi ; \ fi; \ - else \ + if [ x"$$FAILED" = x ] ; then \ + echo "PASSED $@" >&2 ; \ + exit 0 ; \ + fi ; \ + if [ x"$$FAILED" != x ] && [ -s '$(top_builddir)/ChangeLog.adoc' ] \ + && [ "`head -1 $(top_builddir)/ChangeLog.adoc`" = "=== Failed to generate the ChangeLog" ] \ + ; then \ + FAILED="" ; \ + fi; \ + fi; \ + if [ x"$$FAILED" = x ] ; then \ echo "SKIPPED $@ because input files were not available" >&2 ; \ - fi + exit 0 ; \ + fi ; \ + exit 1 check-html-single: $(ASCIIDOC_HTML_SINGLE) +@FAILED=""; LANG=C; LC_ALL=C; export LANG; export LC_ALL; \ From 55678727da7bea48a2282c1dd8a7c370bc9bd18b Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 16 Jul 2024 10:24:16 +0200 Subject: [PATCH 2/8] docs/Makefile.am: "doc" target as part of "all" is not alone there [#2510] Signed-off-by: Jim Klimov --- docs/Makefile.am | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/Makefile.am b/docs/Makefile.am index 2fd8bc62d8..787dee45fb 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -18,7 +18,20 @@ MAINTAINERCLEANFILES = Makefile.in .dirstamp EXTRA_DIST = -all: doc +# Note: "doc" ensures the `configure`-specified list of documents we actually +# want, while the default generated "all: all-am" target historically causes +# some but not all of these targets to get built (e.g. ChangeLog html/pdf is +# usually not made there). Post-processing "doc" as part of "all" helps +# ensure that we do not rebuild stuff in vain during parallel builds (where +# "all-am" and "doc" would be unordered parallel goals of the "all" target) +# while getting those further goals achieved eventually in the default build. +# Crucially, this allows to make sure "ChangeLog(.adoc*)" files have been +# generated once (can take a looong while), settled into place, and only then +# we revisit them for html/pdf rendering (another long while) without randomly +# confusing the system with new timestamps and needless regenerations later on. +all: + @echo " DOC-FOLLOW-UP Basic 'make $@' in `pwd` is done, following up with 'make doc' to ensure complex document types" + +@$(MAKE) $(AM_MAKEFLAGS) doc # Is "egrep == grep -E" always valid? (maybe all a job for configure.ac) #EGREP = egrep From be6e87b5b2f9be59dd34689324adf969f17484e2 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Mon, 15 Jul 2024 18:52:31 +0200 Subject: [PATCH 3/8] docs/Makefile.am: try to make sure we only build ChangeLog.adoc once (or keep the first built copy in a parallel fanout) [#2510] Signed-off-by: Jim Klimov --- docs/Makefile.am | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/docs/Makefile.am b/docs/Makefile.am index 787dee45fb..46794f83ca 100644 --- a/docs/Makefile.am +++ b/docs/Makefile.am @@ -357,6 +357,8 @@ DOCBUILD_CONVERT_GITHUB_LINKS = { \ .adoc.adoc-parsed: @$(DOCBUILD_CONVERT_GITHUB_LINKS) +$(top_builddir)/ChangeLog.adoc-parsed: $(top_builddir)/ChangeLog.adoc + dummy: $(top_builddir)/ChangeLog: dummy @+echo " DOC-CHANGELOG-GENERATE-WRAPPER $@ : call parent Makefile to decide if (re-)generation is needed" \ @@ -372,6 +374,14 @@ else !WITH_PDF_NONASCII_TITLES A2X_ASCII_IDS = ":ascii-ids:\n" endif !WITH_PDF_NONASCII_TITLES +# Probably due to the web of makefiles and an overwhelmed job server in some +# implementations, during parallel builds we can end up scheduling several +# threads creating this asciidoc (and adoc-parsed later). This step only +# costs a few seconds, however the updated timestamp may cause new HTML/PDF +# builds which cost a lot more. Below we try a few ways to detect a build +# already running and bail out early if the file exists. Otherwise we bite +# the bullet and spend a few seconds, and then re-check if another thread +# did exist and finished first. $(top_builddir)/ChangeLog.adoc: $(top_builddir)/ChangeLog @INPUT="$?"; \ test -n "$${INPUT}" || INPUT="$$(top_builddir)/ChangeLog" ; \ @@ -382,6 +392,8 @@ $(top_builddir)/ChangeLog.adoc: $(top_builddir)/ChangeLog test -n "$@" && { printf '=== Failed to generate the ChangeLog\n\n%s\n\nNOTE: See https://github.com/networkupstools/nut/commits/master for change history.\n\n' "$${MSG}" > "$@" ; } ; \ exit ; \ } ; \ + W=10 ; while [ "$${W}" -gt 0 ] && find '$@.tmp.'* '$@' -newer "$${INPUT}" 2>/dev/null >/dev/null ; do sleep 1 ; W="`expr $$W - 1`"; done ; touch "$@.tmp.$$$$"; \ + if [ x"`find '$@' -newer "$${INPUT}" 2>/dev/null`" != x ] ; then echo " DOC-CHANGELOG-ASCIIDOC $${INPUT} => $@ : SKIP (keep existing)"; rm -f "$@.tmp.$$$$"; exit 0 ; fi ; \ echo " DOC-CHANGELOG-ASCIIDOC $${INPUT} => $@" \ && printf "ifdef::txt[]\n== Very detailed Change Log\n"$(A2X_ASCII_IDS)"endif::txt[]\n\n" > "$@.tmp.$$$$" \ && TABCHAR="`printf '\t'`" \ @@ -395,7 +407,8 @@ $(top_builddir)/ChangeLog.adoc: $(top_builddir)/ChangeLog -e 's,\[\[\([^]]*\)\]\],[\1],g' \ -e 's,^\(\s\s*\)\([0-9]\),\1{empty}\2,g' \ < "$${INPUT}" >> "$@.tmp.$$$$" \ - && mv -f "$@.tmp.$$$$" "$@" + && if [ x"`find '$@' -newer "$${INPUT}" 2>/dev/null`" != x ] ; then echo " DOC-CHANGELOG-ASCIIDOC $${INPUT} => $@ : SKIP (keep recently born competitor)"; rm -f "$@.tmp.$$$$"; \ + else mv -f "$@.tmp.$$$$" "$@" ; fi # Add other directory deps (not for local EXTRA_DIST) and generated contents FULL_USER_MANUAL_DEPS = $(USER_MANUAL_DEPS) $(SHARED_DEPS) \ From 67e89c803397a9c911bb1c78d666d6eec1e1fe05 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 16 Jul 2024 11:09:52 +0200 Subject: [PATCH 4/8] tools/gitlog2changelog.py.in: comment about possible interleaved date+author entries [#2510] Signed-off-by: Jim Klimov --- tools/gitlog2changelog.py.in | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/gitlog2changelog.py.in b/tools/gitlog2changelog.py.in index 889bf3793e..18b12193e7 100755 --- a/tools/gitlog2changelog.py.in +++ b/tools/gitlog2changelog.py.in @@ -190,7 +190,14 @@ for line in fin: # All of the parts of the commit have been found - write out the entry if authorFound and dateFound and messageFound and filesFound: # First the author line, only outputted if it is the first for that - # author on this day + # author on this day. + # WARNING: In case of git rebase commit shuffling, merges of dormant + # branches, etc. we are not guaranteed to have all dates in the list + # nicely ordered. In fact, the same date+author can be repeated if + # there were commits with other metadata in git history between those + # (e.g. many PRs from a few authors merged during one day). While we + # could cache each section by authorLine and only output in the end, + # it can require a lot of memory - so by default we do not. authorLine = date + " " + author if len(prevAuthorLine) == 0: fout.write(authorLine + "\n\n") From 4f6a9e7e2d88ce88407a1582bdee01406fd586cf Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 16 Jul 2024 11:10:28 +0200 Subject: [PATCH 5/8] tools/gitlog2changelog.py.in: cosmetic fixes Signed-off-by: Jim Klimov --- tools/gitlog2changelog.py.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/gitlog2changelog.py.in b/tools/gitlog2changelog.py.in index 18b12193e7..5fefe1996c 100755 --- a/tools/gitlog2changelog.py.in +++ b/tools/gitlog2changelog.py.in @@ -187,6 +187,7 @@ for line in fin: files = files + ", " + fileList[0].strip() else: files = fileList[0].strip() + # All of the parts of the commit have been found - write out the entry if authorFound and dateFound and messageFound and filesFound: # First the author line, only outputted if it is the first for that @@ -208,7 +209,7 @@ for line in fin: # Assemble the actual commit message line(s) and limit the line length # to 80 characters. - # Avoid printing same (or equivalen) filename lists twice, if commit + # Avoid printing same (or equivalent) filename lists twice, if commit # message starts with them. if message.startswith(files + ":"): commitLine = "* " + message From f34439bbb32259af208d08da5e3f4852cc501584 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 16 Jul 2024 11:46:42 +0200 Subject: [PATCH 6/8] tools/gitlog2changelog.py.in: support CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR mode [#2510] Signed-off-by: Jim Klimov --- tools/gitlog2changelog.py.in | 62 ++++++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 7 deletions(-) diff --git a/tools/gitlog2changelog.py.in b/tools/gitlog2changelog.py.in index 5fefe1996c..ef7e0c86c1 100755 --- a/tools/gitlog2changelog.py.in +++ b/tools/gitlog2changelog.py.in @@ -89,6 +89,31 @@ else: else: fout = open(CHANGELOG_FILE, "w") +# By default we collect information from a commit and output it as soon as +# we have enough. Part of it is best-effort grouping of a series of commits +# made by the same author on the same day, if they follow each other. +# The alternative is to expend memory to collect all git log entries into a +# dictionary first (key = date+author, value = list of entries) and only +# print the output in the end of processing. This costs more resources, so +# is not default behavior. +requireGroupByDateAuthor = False +try: + tmpEnvVar = os.environ.get("CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR", None) + if str(tmpEnvVar).lower() == "true": + requireGroupByDateAuthor = True +except Exception as ignored: + pass + +cachedContent = None +if requireGroupByDateAuthor: + try: + from collections import defaultdict + cachedContent = defaultdict(list) + except Exception as x: + print("Failed to init requireGroupByDateAuthor processing as defaultdict(list), trying simple dict(): " + str(x)) + requireGroupByDateAuthor = False + cachedContent = dict() + # Set up the loop variables in order to locate the blocks we want authorFound = False dateFound = False @@ -200,12 +225,16 @@ for line in fin: # could cache each section by authorLine and only output in the end, # it can require a lot of memory - so by default we do not. authorLine = date + " " + author - if len(prevAuthorLine) == 0: - fout.write(authorLine + "\n\n") - elif authorLine == prevAuthorLine: - pass + if requireGroupByDateAuthor: + if authorLine not in cachedContent: + cachedContent[authorLine] = list() else: - fout.write("\n" + authorLine + "\n\n") + if len(prevAuthorLine) == 0: + fout.write(authorLine + "\n\n") + elif authorLine == prevAuthorLine: + pass + else: + fout.write("\n" + authorLine + "\n\n") # Assemble the actual commit message line(s) and limit the line length # to 80 characters. @@ -227,8 +256,11 @@ for line in fin: else: commitLine = "* " + files + ": " + message - # Write out the commit line - fout.write(wrapper.fill(commitLine) + "\n") + if requireGroupByDateAuthor: + cachedContent[authorLine].append(commitLine) + else: + # Write out the commit line, wrapped for length + fout.write(wrapper.fill(commitLine) + "\n") # Now reset all the variables ready for a new commit block. authorFound = False @@ -240,6 +272,22 @@ for line in fin: files = "" prevAuthorLine = authorLine +if requireGroupByDateAuthor: + # We did not print anything before, flush it out now; + # most recent date first (alphanumerically reverse) + counter = 0 + for authorLine in sorted(cachedContent, reverse=True): + if counter == 0: + fout.write(authorLine + "\n\n") + else: + fout.write("\n" + authorLine + "\n\n") + + # Use original list append order + for commitLine in cachedContent[authorLine]: + fout.write(wrapper.fill(commitLine) + "\n") + + counter = counter + 1 + # Close the input and output lines now that we are finished. if fin_mode == 3: p.stdout.close() From fab189cf7d00d51c9e3cddba859b77147c8a4c04 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 16 Jul 2024 11:47:07 +0200 Subject: [PATCH 7/8] fixup! tools/gitlog2changelog.py.in: cosmetic fixes Signed-off-by: Jim Klimov --- tools/gitlog2changelog.py.in | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tools/gitlog2changelog.py.in b/tools/gitlog2changelog.py.in index ef7e0c86c1..fca6f5ac96 100755 --- a/tools/gitlog2changelog.py.in +++ b/tools/gitlog2changelog.py.in @@ -187,7 +187,14 @@ for line in fin: continue # Extract the actual commit message for this commit elif authorFound and dateFound and messageFound is False: - # Find the commit message if we can + # Find the commit message if we can (including the optional + # details after the title and a blank line) + # FIXME: Detect end of message by /^#/ to allow for longer essays + # in the detailed comments part? + # FIXME: Some such comments include asciidoc-ish markup, notably + # bullet lists - do not concatenate those into one block but do + # actually pass them as sub-lists (indented, and perhaps not + # starting with an asterisk which we use for this document). if len(line) == fin_chop: if messageNL: messageFound = True From 94991ea31eb40f9cca525358de6eead0bad779d0 Mon Sep 17 00:00:00 2001 From: Jim Klimov Date: Tue, 16 Jul 2024 11:58:26 +0200 Subject: [PATCH 8/8] Makefile.am, NEWS.adoc, UPGRADING.adoc: for ChangeLog generation from git metadata, default to HANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR=true [#2510] Signed-off-by: Jim Klimov --- Makefile.am | 10 ++++++++-- NEWS.adoc | 9 +++++++++ UPGRADING.adoc | 7 +++++++ 3 files changed, 24 insertions(+), 2 deletions(-) diff --git a/Makefile.am b/Makefile.am index 0ba1eeefb5..622bc6a231 100644 --- a/Makefile.am +++ b/Makefile.am @@ -359,7 +359,11 @@ endif # repo configuration, or submodules). But this is a Git-crawling target # anyway, and in the worst case (Git's design changes) we would spend a # bit of time researching the FS in vain, and go on to re-generate the -# ChangeLog when maybe we should not have - oh well: +# ChangeLog when maybe we should not have - oh well. +# WARNING: The CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR=true mode here is +# default to allow for prettier documentation, but it can require too much +# memory for weaker build systems. Set it to false when calling make there. +CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR_ENVVAR = true $(abs_top_builddir)/ChangeLog: tools/gitlog2changelog.py dummy-stamp @cd $(abs_top_srcdir) && \ if test -e .git ; then \ @@ -369,7 +373,9 @@ $(abs_top_builddir)/ChangeLog: tools/gitlog2changelog.py dummy-stamp echo "Using still-valid ChangeLog file generated earlier from same revision of Git source metadata in '$${NUT_GITDIR}'" >&2 ; \ else \ echo " DOC-CHANGELOG-GENERATE $@" ; \ - CHANGELOG_FILE="$@" $(WITH_PDF_NONASCII_TITLES_ENVVAR) $(abs_top_builddir)/tools/gitlog2changelog.py $(GITLOG_START_POINT) || { \ + CHANGELOG_FILE="$@" $(WITH_PDF_NONASCII_TITLES_ENVVAR) \ + CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR="$(CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR_ENVVAR)" \ + $(abs_top_builddir)/tools/gitlog2changelog.py $(GITLOG_START_POINT) || { \ echo " DOC-CHANGELOG-GENERATE $@ : FAILED (non-fatal)" >&2 ; \ printf "gitlog2changelog.py failed to generate the ChangeLog.\n\nNOTE: See https://github.com/networkupstools/nut/commits/master for change history.\n\n" > "$@" ; \ } ; \ diff --git a/NEWS.adoc b/NEWS.adoc index 0ee525b938..6ba1eb092d 100644 --- a/NEWS.adoc +++ b/NEWS.adoc @@ -213,6 +213,15 @@ during a NUT build. - added a `make distcheck-light-man` recipe to require verification that the manual page files can be built using the prepared "tarball" archive. [#2473] + - revised the documentation building recipes, with the goal to avoid building + the `ChangeLog` products and their intermediate files more than once (but + still react to `git` metadata changes during development), and to sanity + check the resulting final document (currently only for `html-single` mode). + As part of this, the `CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR` setting was + added (for `make` calls and used by `tools/gitlog2changelog.py.in` script), + and it defaults to `true` allowing for better ordered documents at the cost + of some memory during document generation. [#2510] + - added a `common/Makefile.am` build product for a new internal library `libcommonstr.la` which allows a smaller selection of helper methods for tools like `nut-scanner` which do not need the full `libcommon.la` diff --git a/UPGRADING.adoc b/UPGRADING.adoc index 7580eda30e..16c8673c55 100644 --- a/UPGRADING.adoc +++ b/UPGRADING.adoc @@ -38,6 +38,13 @@ Changes from 2.8.2 to 2.8.3 pages which are delivered automatically. Packaging recipes can likely be simplified now. [#2445] +- A `CHANGELOG_REQUIRE_GROUP_BY_DATE_AUTHOR` setting was added (for `make` + calls and used by `tools/gitlog2changelog.py.in` script), and it defaults + to `true` allowing for better ordered documents at the cost of some memory + during document generation. Resource-constrained builders (working from + a Git workspace, not tarball archives) may have to set it to `false` when + calling `make` for NUT. [#2510] + - NUT products like `nut-scanner`, which dynamically load shared libraries at run-time without persistent pre-linking, should now know the library file names that were present during build (likely encumbered with version