Improve lint compliance and general code quality (bbl.awk) and add an AWK functions file - bbl - Read, search and index the Bible on the command line -- Greek, Latin, KJV, Knox, RSV, and more

commit 387d074f9b2ff488a0e54839572261791defb613
parent 10f20ad5e463e8a6143a1c50dd3c4fac827330d9
Author: Wilson Gheen <wilson@wilsonrgheen.com>
Date:   Sun, 21 Aug 2022 16:05:38 -0500

Improve lint compliance and general code quality (bbl.awk) and add an AWK functions file

Diffstat:
M Makefile  | 6 +++---
M bbl.awk  | 50 ++++++++++++++++++++++++--------------------------
M bbl.sh  | 5 ++++-
A input.awk  | 30 ++++++++++++++++++++++++++++++

4 files changed, 61 insertions(+), 30 deletions(-)
diff --git a/Makefile b/Makefile
@@ -5,7 +5,7 @@ bbl: bbl.sh bbl.awk readings/*/*.tsv readings/*/*.aliases
 	cat bbl.sh > $@
 	echo 'exit 0' >> $@
 	echo "#EOF" >> $@
-	tar cf bbl.tar bbl.awk
+	tar cf bbl.tar input.awk bbl.awk
 	(cd readings && \
 	for d in $$(find . -mindepth 1 -maxdepth 1 -type d -printf '%f\n'); do \
 		(cd "$$d" && \
@@ -21,9 +21,9 @@ bbl: bbl.sh bbl.awk readings/*/*.tsv readings/*/*.aliases
 	rm -f bbl.tar
 	chmod +x $@
 
-test: bbl.sh bbl.awk
+test: bbl.sh input.awk bbl.awk
 	shellcheck -s sh -S error bbl.sh
-	echo -n | gawk --lint=invalid -f bbl.awk
+	echo -n | gawk --lint=fatal -f input.awk -f bbl.awk
 
 clean:
 	rm -f bbl
diff --git a/bbl.awk b/bbl.awk
@@ -7,7 +7,9 @@ BEGIN {
 	#  $6 Verse
 	FS = "\t"
 
-	MAX_WIDTH = 80
+    header_ended = 0
+    outputted_records = 0
+	MAX_WIDTH = envint("KJV_MAX_WIDTH", 80, 8, 80)
     NO_LINE_WRAP = envbool("KJV_NOLINEWRAP")
     NO_VERSE_NUMBERS = envbool("KJV_NOVERSENUMBERS")
     NO_CHAPTER_HEADINGS = envbool("KJV_NOCHAPTERHEADINGS")
@@ -22,11 +24,10 @@ BEGIN {
         NO_TITLE = 1
         NO_VERSE_BREAK = 1
     }
-	if (ENVIRON["KJV_MAX_WIDTH"] ~ /^[0-9]+$/) {
-		if (int(ENVIRON["KJV_MAX_WIDTH"]) < MAX_WIDTH) {
-			MAX_WIDTH = int(ENVIRON["KJV_MAX_WIDTH"])
-		}
-	}
+
+    if (!is_set(cmd)) {
+        cmd = "list"
+    }
 
 	if (cmd == "ref") {
         if (lang == "he") {
@@ -54,14 +55,7 @@ cmd == "list" {
 	}
 }
 
-function envbool(str){
-    return ENVIRON[str] != "" && ENVIRON[str] != "0"
-}
-function num(str){
-    return (lang == "he") ? str : int(str)
-}
-
-function parseref(arr, q) {
+function parseref(arr, q,       i) {
     # NOTE: For Hebrew, the colon between book and chapter is required
 	# 1. <book>
 	# 1a. <book>[, ?<book>]...
@@ -249,7 +243,6 @@ function cleanbook(book) {
 	return book
 }
 
-#TODO !!! Fix or use only internally
 function bookmatches(book, bookabbr, query) {
 	book = cleanbook(book)
 	if (book == query) {
@@ -259,7 +252,6 @@ function bookmatches(book, bookabbr, query) {
 	if (bookabbr == query) {
 		return book
 	}
-    #TODO !!! length(query) == 0?
 	if (substr(book, 1, length(query)) == query) {
 		return book
 	}
@@ -294,7 +286,7 @@ function roughpattern(regex) {
     return regex
 }
 
-function printverse(verse, word_count, characters_printed) {
+function printverse(verse, word_count, characters_printed,     i) {
 	if (NO_LINE_WRAP) {
         if (NO_VERSE_BREAK) {
             printf("%s ", verse)
@@ -328,7 +320,7 @@ function printverse(verse, word_count, characters_printed) {
     }
 }
 
-function process_alias(alias, aliasabbr, book_names,      arr) {
+function process_alias(alias, aliasabbr, book_names,      arr, i) {
     if (hasbook(alias, aliasabbr)) {
         delete p["book", cleanbook(alias)]
         delete p["book", cleanbook(aliasabbr)]
@@ -395,7 +387,7 @@ cmd == "ref" && mode == "exact" && hasbook($1, $2) && (p["chapter"] == "" || $4 
 	processline()
 }
 
-cmd == "ref" && mode == "random" && (p["book"] == "" || bookmatches($1, $2, p["book"])) && (p["chapter"] == "" || $4 == p["chapter"]) {
+cmd == "ref" && mode == "random" && (p["book"] == "" || hasbook($1, $2)) && (p["chapter"] == "" || $4 == p["chapter"]) {
     print
     outputted_records++
 }
@@ -404,29 +396,35 @@ cmd == "ref" && mode == "exact_ch_set" && hasbook($1, $2) && p["chapter", $4] {
 	processline()
 }
 
-cmd == "ref" && mode == "exact_set" && bookmatches($1, $2, p["book"]) && (((p["chapter"] == "" || $4 == p["chapter"]) && p["verse", $5]) || p["chapter:verse", $4 ":" $5]) {
+cmd == "ref" && mode == "exact_set" && hasbook($1, $2) && (((p["chapter"] == "" || $4 == p["chapter"]) && p["verse", $5]) || p["chapter:verse", $4 ":" $5]) {
 	processline()
 }
 
-cmd == "ref" && mode == "range" && bookmatches($1, $2, p["book"]) && ((p["chapter_end"] == "" && $4 == p["chapter"]) || ($4 >= p["chapter"] && $4 <= p["chapter_end"])) && (p["verse"] == "" || $5 >= p["verse"]) && (p["verse_end"] == "" || $5 <= p["verse_end"]) {
+cmd == "ref" && mode == "range" && hasbook($1, $2) && ((p["chapter_end"] == "" && $4 == p["chapter"]) || ($4 >= p["chapter"] && $4 <= p["chapter_end"])) && (p["verse"] == "" || $5 >= p["verse"]) && (p["verse_end"] == "" || $5 <= p["verse_end"]) {
 	processline()
 }
 
-cmd == "ref" && mode == "range_ext" && bookmatches($1, $2, p["book"]) && (($4 == p["chapter"] && $5 >= p["verse"] && p["chapter"] != p["chapter_end"]) || ($4 > p["chapter"] && $4 < p["chapter_end"]) || ($4 == p["chapter_end"] && $5 <= p["verse_end"] && p["chapter"] != p["chapter_end"]) || (p["chapter"] == p["chapter_end"] && $4 == p["chapter"] && $5 >= p["verse"] && $5 <= p["verse_end"])) {
+cmd == "ref" && mode == "range_ext" && hasbook($1, $2) && (($4 == p["chapter"] && $5 >= p["verse"] && p["chapter"] != p["chapter_end"]) || ($4 > p["chapter"] && $4 < p["chapter_end"]) || ($4 == p["chapter_end"] && $5 <= p["verse_end"] && p["chapter"] != p["chapter_end"]) || (p["chapter"] == p["chapter_end"] && $4 == p["chapter"] && $5 >= p["verse"] && $5 <= p["verse_end"])) {
 	processline()
 }
 
-cmd == "ref" && (mode == "search" || mode == "rough_search") && (p["book"] == "" || bookmatches($1, $2, p["book"])) && (p["chapter"] == "" || $4 == p["chapter"]) && match(mode == "rough_search" ? tolower($6) : $6, p["search"]) {
+cmd == "ref" && (mode == "search" || mode == "rough_search") && (p["book"] == "" || hasbook($1, $2)) && (p["chapter"] == "" || $4 == p["chapter"]) && match(mode == "rough_search" ? tolower($6) : $6, p["search"]) {
 	processline()
 }
 
 END {
 	if (cmd == "ref") {
         if (outputted_records == 0) {
-		    print "Unknown reference: " ref
+            if (!is_set(ref)) {
+                print "Opted to search by ref but no ref was specified"
+            } else {
+                print "Unknown reference: " ref
+            }
 		    exit 1
-        } else if (mode == "random") {
+        } else if (is_set(mode) && mode == "random") {
             printf("~~~RANDOMS: %d\n", p["numberOfVerses"])
         }
-	}
+    } else if (cmd != "list" && cmd != "clean") {
+        print "Unknown cmd specified: " cmd
+    }
 }
diff --git a/bbl.sh b/bbl.sh
@@ -18,6 +18,9 @@ reading_exists() {
 get_data() {
     sed '1,/^#EOF$/d' < "$SELF" | tar xz -O "$@"
 }
+get_awk() {
+    get_data input.awk bbl.awk
+}
 get_data_if_exists() {
     list=$(ls_archive)
     get_data $(for arg in "$@"; do
@@ -291,7 +294,7 @@ else
         linesInFile=$(($(wc -l "$filename" | awk '{print $1}') - 1))
         sedCmd=$(shuf -i 1-$linesInFile -n "$numberOfVerses" | sort -n | tr '\n' ' ' | sed 's/ /p;/g' | sed 's/..$/{p;q}/')
         sed -n "$sedCmd" "$filename" > "${myTempDir}/randomVerses"
-        awk -v cmd=clean "$(get_data bbl.awk)" "${myTempDir}/randomVerses" 2>/dev/null > "${filename}"
+        awk -v cmd=clean "$(get_awk)" "${myTempDir}/randomVerses" 2>/dev/null > "${filename}"
     fi
 fi
 
diff --git a/input.awk b/input.awk
@@ -0,0 +1,30 @@
+function is_set(scalar,     oldlint, isset){
+    oldlint = LINT
+    LINT = 0 # For warning about accessing an unset variable
+    isset = (scalar != "" || scalar != 0)
+    LINT = oldlint
+    return isset
+}
+function envbool(str){
+    return str in ENVIRON && ENVIRON[str] != "" && ENVIRON[str] != "0"
+}
+function envint(str, default_value, min, max, regex,     i){
+    if (!is_set(regex)) {
+        regex = "^-?[0-9]+$"
+    }
+	if (str in ENVIRON && ENVIRON[str] ~ regex) {
+        i = int(ENVIRON[str])
+        if (is_set(min) && i < min) {
+            return min
+        } else if (is_set(max) && i > max) {
+            return max
+        } else {
+            return i
+        }
+	} else {
+        return default_value
+    }
+}
+function num(str){
+    return (lang == "he") ? str : int(str)
+}

	bbl Read, search and index the Bible on the command line -- Greek, Latin, KJV, Knox, RSV, and more
	git clone git://git.wilsonrgheen.com/bbl
	Log \| Files \| Refs \| README \| LICENSE

M	Makefile	\|	6	+++---
M	bbl.awk	\|	50	++++++++++++++++++++++++--------------------------
M	bbl.sh	\|	5	++++-
A	input.awk	\|	30	++++++++++++++++++++++++++++++