Add alias feature (not yet stable) - bbl - Read, search and index the Bible on the command line -- Greek, Latin, KJV, Knox, RSV, and more

commit ec33ba0e4a377fe313e8de3aac0a2cf7d3861de2
parent 78211e76872d631430015511c558f3bbf6588df4
Author: Wilson Gheen <wilson@wilsonrgheen.com>
Date:   Sun,  7 Aug 2022 10:36:20 -0500

Add alias feature (not yet stable)

Diffstat:
A .gitattributes  | 1 +
M Makefile  | 17 +++++++++++++++--
M bbl.awk  | 182 ++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
M bbl.sh  | 41 ++++++++++++++++++++++++++++++++++-------
A readings/bibles/bibles.aliases  | 5 +++++
R drb.tsv -> readings/bibles/drb.tsv  | 0 
R grb.tsv -> readings/bibles/grb.tsv  | 0 
R heb.tsv -> readings/bibles/heb.tsv  | 0 
R kjv.tsv -> readings/bibles/kjv.tsv  | 0 
R knx.tsv -> readings/bibles/knx.tsv  | 0 
R njb.tsv -> readings/bibles/njb.tsv  | 0 
R rsv.tsv -> readings/bibles/rsv.tsv  | 0 
R vul.tsv -> readings/bibles/vul.tsv  | 0 
R lapoe.tsv -> readings/other/lapoe.tsv  | 0

14 files changed, 165 insertions(+), 81 deletions(-)
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+Makefile filter=hideMakeComments
diff --git a/Makefile b/Makefile
@@ -1,11 +1,24 @@
 
 PREFIX = /usr/local
 
-bbl: bbl.sh bbl.awk *.tsv
+bbl: bbl.sh bbl.awk readings/*/*.tsv readings/*/*.aliases
 	cat bbl.sh > $@
 	echo 'exit 0' >> $@
 	echo "#EOF" >> $@
-	tar cz bbl.awk *.tsv >> $@
+	tar cf bbl.tar bbl.awk
+	(cd readings && \
+	for d in $$(find . -mindepth 1 -maxdepth 1 -type d -printf '%f\n'); do \
+		(cd "$$d" && \
+		common_aliases="$$d.aliases" ; \
+		[ -f "$$common_aliases" ] && tar rf ../../bbl.tar "$$common_aliases" ; \
+		for f in *.tsv; do \
+			aliases_file="$${f%%.tsv}.aliases" ; \
+			[ -f "$$aliases_file" ] && tar rf ../../bbl.tar "$$aliases_file" ; \
+			tar rf ../../bbl.tar "$$f" ; \
+		done; )\
+	done)
+	gzip -c bbl.tar >> $@
+	rm -f bbl.tar
 	chmod +x $@
 
 test: bbl.sh bbl.awk
diff --git a/bbl.awk b/bbl.awk
@@ -39,8 +39,7 @@ BEGIN {
                 re["book"] = "^[1-9]?[a-zA-Z ]+"
                 re["chsep"] = ":?"
         }
-		mode = parseref(p)
-		p["book"] = cleanbook(p["book"])
+		mode = parseref(p, ref)
 	}
 }
 
@@ -62,9 +61,10 @@ function num(str){
     return (lang == "he") ? str : int(str)
 }
 
-function parseref(arr) {
+function parseref(arr, q) {
     # NOTE: For Hebrew, the colon between book and chapter is required
 	# 1. <book>
+	# 1a. <book>[, ?<book>]...
 	# 2. <book>:?<chapter>
 	# 2a. <book>:?<chapter>[, ?<chapter>]...
 	# 3. <book>:?<chapter>:<verse>
@@ -75,93 +75,102 @@ function parseref(arr) {
 	# 6. <book>:?<chapter>[:<verse>]-<chapter>:<verse>
 	# 7. /~?<search>
 	# 8. <book>/~?search
+	# 8a. <book>[, ?<book>].../~?search
 	# 9. <book>:?<chapter>/~?search
     #10. @ <number of verses>?
     #11. <book> @ <number of verses>?
     #12. <book>:?<chapter> @ <number of verses>?
 
-	if (match(ref, re["book"])) {
-            # 1, 2, 2a, 3, 3a, 3b, 4, 5, 6, 8, 9, 11, 12
-            arr["book"] = substr(ref, 1, RLENGTH)
-            ref = substr(ref, RLENGTH + 1)
-	} else if (sub("^ */ *", "", ref)) {
+	if (match(q, re["book"])) {
+            # 1, 1a, 2, 2a, 3, 3a, 3b, 4, 5, 6, 8, 9, 11, 12
+            arr["book", cleanbook(substr(q, 1, RLENGTH))] = 1
+            q = substr(q, RLENGTH + 1)
+	} else if (sub("^ */ *", "", q)) {
             # 7
-            if (sub("^~ *", "", ref)) {
-                arr["search"] = roughpattern(ref)
+            if (sub("^~ *", "", q)) {
+                arr["search"] = roughpattern(q)
                 return "rough_search"
             } else {
-                arr["search"] = ref
+                arr["search"] = q
                 return "search"
             }
 	}
 
-	if (match(ref, sprintf("^%s%s", re["chsep"], re["num"]))) {
+	if (match(q, sprintf("^%s%s", re["chsep"], re["num"]))) {
             # 2, 2a, 3, 3a, 3b, 4, 5, 6, 9, 12
-            if (sub("^:", "", ref)) {
-                    arr["chapter"] = num(substr(ref, 1, RLENGTH - 1))
-                    ref = substr(ref, RLENGTH)
+            if (sub("^:", "", q)) {
+                    arr["chapter"] = num(substr(q, 1, RLENGTH - 1))
+                    q = substr(q, RLENGTH)
             } else {
-                    arr["chapter"] = num(substr(ref, 1, RLENGTH))
-                    ref = substr(ref, RLENGTH + 1)
+                    arr["chapter"] = num(substr(q, 1, RLENGTH))
+                    q = substr(q, RLENGTH + 1)
             }
-	} else if (sub("^ */ *", "", ref)) {
+	} else if (sub("^ */ *", "", q)) {
             # 8
-            if (sub("^~ *", "", ref)) {
-                arr["search"] = roughpattern(ref)
+            if (sub("^~ *", "", q)) {
+                arr["search"] = roughpattern(q)
                 return "rough_search"
             } else {
-                arr["search"] = ref
+                arr["search"] = q
                 return "search"
             }
-	} else if (ref == "") {
+	} else if (match(q, sprintf("(,%s)+", re["book"]))) {
+            # 1a
+            # TODO make compatible with chapter/verse/searches etc.
+            split(q, temp_arr, ",")
+            for (i in temp_arr) {
+                arr["book", temp_arr[i]] = 1
+            }
+            return "exact"
+	} else if (q == "") {
             # 1
             return "exact"
 	}
 
-	if (match(ref, sprintf("^:%s", re["num"]))) {
+	if (match(q, sprintf("^:%s", re["num"]))) {
             # 3, 3a, 3b, 5, 6
-            arr["verse"] = num(substr(ref, 2, RLENGTH - 1))
-            ref = substr(ref, RLENGTH + 1)
-	} else if (match(ref, sprintf("^-%s$", re["num"]))) {
+            arr["verse"] = num(substr(q, 2, RLENGTH - 1))
+            q = substr(q, RLENGTH + 1)
+	} else if (match(q, sprintf("^-%s$", re["num"]))) {
             # 4
-            arr["chapter_end"] = num(substr(ref, 2))
+            arr["chapter_end"] = num(substr(q, 2))
             return "range"
-	} else if (sub("^ */ *", "", ref)) {
+	} else if (sub("^ */ *", "", q)) {
             # 9
-            if (sub("^~ *", "", ref)) {
-                arr["search"] = roughpattern(ref)
+            if (sub("^~ *", "", q)) {
+                arr["search"] = roughpattern(q)
                 return "rough_search"
             } else {
-                arr["search"] = ref
+                arr["search"] = q
                 return "search"
             }
-	} else if (ref == "") {
+	} else if (q == "") {
             # 2
             return "exact"
-	} else if (match(ref, sprintf("^(, ?%s)+$", re["num"]))) {
+	} else if (match(q, sprintf("^(, ?%s)+$", re["num"]))) {
             # 2a
             arr["chapter", arr["chapter"]] = 1
             delete arr["chapter"]
-            while (match(ref, sprintf("^, ?%s", re["num"]))) {
-                    if(sub("^, ", "", ref)) {
-                        arr["chapter", substr(ref, 1, RLENGTH - 2)] = 1
-                        ref = substr(ref, RLENGTH - 1)
+            while (match(q, sprintf("^, ?%s", re["num"]))) {
+                    if(sub("^, ", "", q)) {
+                        arr["chapter", substr(q, 1, RLENGTH - 2)] = 1
+                        q = substr(q, RLENGTH - 1)
                     } else {
-                        arr["chapter", substr(ref, 2, RLENGTH - 1)] = 1
-                        ref = substr(ref, RLENGTH + 1)
+                        arr["chapter", substr(q, 2, RLENGTH - 1)] = 1
+                        q = substr(q, RLENGTH + 1)
                     }
             }
 
-            if (ref != "") {
+            if (q != "") {
                     return "unknown"
             }
 
             return "exact_ch_set"
-	} else if (match(ref, "^ *@ *")) {
+	} else if (match(q, "^ *@ *")) {
             # 10, 11, 12
-            ref = substr(ref, RLENGTH + 1)
-            if (match(ref, sprintf("^%s", re["num"]))) {
-                arr["numberOfVerses"] = num(ref)
+            q = substr(q, RLENGTH + 1)
+            if (match(q, sprintf("^%s", re["num"]))) {
+                arr["numberOfVerses"] = num(q)
             } else {
                 arr["numberOfVerses"] = 1
             }
@@ -169,52 +178,52 @@ function parseref(arr) {
             return "random"
 	}
 
-	if (match(ref, sprintf("^-%s$", re["num"]))) {
+	if (match(q, sprintf("^-%s$", re["num"]))) {
             # 5
-            arr["verse_end"] = num(substr(ref, 2))
+            arr["verse_end"] = num(substr(q, 2))
             return "range"
-	} else if (match(ref, sprintf("-%s", re["num"]))) {
+	} else if (match(q, sprintf("-%s", re["num"]))) {
             # 6
-            arr["chapter_end"] = num(substr(ref, 2, RLENGTH - 1))
-            ref = substr(ref, RLENGTH + 1)
-	} else if (ref == "") {
+            arr["chapter_end"] = num(substr(q, 2, RLENGTH - 1))
+            q = substr(q, RLENGTH + 1)
+	} else if (q == "") {
             # 3
             return "exact"
-	} else if (match(ref, sprintf("^(, ?%s)+$", re["num"]))) {
+	} else if (match(q, sprintf("^(, ?%s)+$", re["num"]))) {
             # 3a
             arr["verse", arr["verse"]] = 1
             delete arr["verse"]
-            while (match(ref, sprintf("^, ?%s", re["num"]))) {
-                    if(sub("^, ", "", ref)) {
-                        arr["verse", substr(ref, 1, RLENGTH - 2)] = 1
-                        ref = substr(ref, RLENGTH - 1)
+            while (match(q, sprintf("^, ?%s", re["num"]))) {
+                    if(sub("^, ", "", q)) {
+                        arr["verse", substr(q, 1, RLENGTH - 2)] = 1
+                        q = substr(q, RLENGTH - 1)
                     } else {
-                        arr["verse", substr(ref, 2, RLENGTH - 1)] = 1
-                        ref = substr(ref, RLENGTH + 1)
+                        arr["verse", substr(q, 2, RLENGTH - 1)] = 1
+                        q = substr(q, RLENGTH + 1)
                     }
             }
 
-            if (ref != "") {
+            if (q != "") {
                     return "unknown"
             }
 
             return "exact_set"
-        } else if (match(ref, sprintf("^, ?%s:%s", re["num"], re["num"]))) {
+        } else if (match(q, sprintf("^, ?%s:%s", re["num"], re["num"]))) {
             # 3b
             arr["chapter:verse", arr["chapter"] ":" arr["verse"]] = 1
             delete arr["chapter"]
             delete arr["verse"]
             do {
-                    if(sub("^, ", "", ref)) {
-                        arr["chapter:verse", substr(ref, 1, RLENGTH - 2)] = 1
-                        ref = substr(ref, RLENGTH - 1)
+                    if(sub("^, ", "", q)) {
+                        arr["chapter:verse", substr(q, 1, RLENGTH - 2)] = 1
+                        q = substr(q, RLENGTH - 1)
                     } else {
-                        arr["chapter:verse", substr(ref, 2, RLENGTH - 1)] = 1
-                        ref = substr(ref, RLENGTH + 1)
+                        arr["chapter:verse", substr(q, 2, RLENGTH - 1)] = 1
+                        q = substr(q, RLENGTH + 1)
                     }
-            } while (match(ref, sprintf("^, ?%s:%s", re["num"])))
+            } while (match(q, sprintf("^, ?%s:%s", re["num"])))
 
-            if (ref != "") {
+            if (q != "") {
                     return "unknown"
             }
 
@@ -223,9 +232,9 @@ function parseref(arr) {
             return "unknown"
 	}
 
-	if (match(ref, sprintf("^:%s$", re["num"]))) {
+	if (match(q, sprintf("^:%s$", re["num"]))) {
             # 6
-            arr["verse_end"] = num(substr(ref, 2))
+            arr["verse_end"] = num(substr(q, 2))
             return "range_ext"
 	} else {
             return "unknown"
@@ -238,22 +247,30 @@ function cleanbook(book) {
 	return book
 }
 
+#TODO !!! Fix or use only internally
 function bookmatches(book, bookabbr, query) {
 	book = cleanbook(book)
 	if (book == query) {
 		return book
 	}
-
 	bookabbr = cleanbook(bookabbr)
 	if (bookabbr == query) {
 		return book
 	}
-
+    #TODO !!! length(query) == 0?
 	if (substr(book, 1, length(query)) == query) {
 		return book
 	}
 }
 
+function hasbook(book, bookabbr,    query) {
+    for(query in p){
+        if (sub("^book" SUBSEP, "", query) && bookmatches(book, bookabbr, query)) {
+            return book
+        }
+    }
+}
+
 function roughpattern(regex) {
     # TODO Can mess with search pattern if regex is used on command line
     regex = tolower(regex)
@@ -309,6 +326,17 @@ function printverse(verse, word_count, characters_printed) {
     }
 }
 
+function process_alias(alias, aliasabbr, book_names,      arr) {
+    if (hasbook(alias, aliasabbr)) {
+        delete p["book", alias]
+        delete p["book", aliasabbr]
+        split(book_names, arr, ",")
+        for(i in arr) {
+            p["book", arr[i]] = 1
+        }
+    }
+}
+
 function processline() {
     newbook = (last_book_printed != $2)
 	if (newbook) {
@@ -351,7 +379,17 @@ function processline() {
 	outputted_records++
 }
 
-cmd == "ref" && mode == "exact" && bookmatches($1, $2, p["book"]) && (p["chapter"] == "" || $4 == p["chapter"]) && (p["verse"] == "" || $5 == p["verse"]) {
+
+cmd == "ref" && !header_ended {
+    if (/^#/) {
+        header_ended = 1
+    } else {
+        process_alias($1, $2, $3)
+        next
+    }
+}
+
+cmd == "ref" && mode == "exact" && hasbook($1, $2) && (p["chapter"] == "" || $4 == p["chapter"]) && (p["verse"] == "" || $5 == p["verse"]) {
 	processline()
 }
 
@@ -360,7 +398,7 @@ cmd == "ref" && mode == "random" && (p["book"] == "" || bookmatches($1, $2, p["b
     outputted_records++
 }
 
-cmd == "ref" && mode == "exact_ch_set" && bookmatches($1, $2, p["book"]) && p["chapter", $4] {
+cmd == "ref" && mode == "exact_ch_set" && hasbook($1, $2) && p["chapter", $4] {
 	processline()
 }
 
diff --git a/bbl.sh b/bbl.sh
@@ -6,20 +6,45 @@ SELF="$0"
 BIBLE=""
 
 data_exists() {
-    sed '1,/^#EOF$/d' < "$SELF" | tar tz "$1.tsv" >/dev/null 2>&1
+    sed '1,/^#EOF$/d' < "$SELF" | tar tz "$@"
+}
+ls_archive() {
+    # tar tz with no arguments just lists everything
+    data_exists
+}
+reading_exists() {
+    data_exists "$1.tsv" >/dev/null 2>&1
 }
 get_data() {
-    sed '1,/^#EOF$/d' < "$SELF" | tar xz -O "$1"
+    sed '1,/^#EOF$/d' < "$SELF" | tar xz -O "$@"
+}
+get_data_if_exists() {
+    list=$(ls_archive)
+    get_data $(for arg in "$@"; do
+        echo "$list" | grep -x "$arg"
+    done)
+}
+get_aliases() {
+    aliases="$1.aliases"
+    case "$1" in drb|grb|heb|kjv|knx|njb|rsv|vul) aliases="bibles.aliases $aliases";; esac
+    echo "$aliases"
+}
+get_reading() {
+    get_data_if_exists $(get_aliases "$1") $1.tsv
 }
 get_ref() {
     # Thank you, StackExchange. This will cause $PAGER to give the same exit code
     # that bbl would have given, so that a nonzero exit code can be used in scripts
     # to know that the reference returned no results.
-    { { { { get_data "$1" | awk -v cmd=ref -v ref="$2" -v cross_ref="$3" -v lang="$lang" "$(get_data bbl.awk)"; echo $? >&3; } | ${PAGER} >&4; } 3>&1; } | { read xs; exit $xs; } } 4>&1
+    r="$1" shift
+    cr="$1" shift
+    { { { { get_reading "$r" | awk -v cmd=ref -v ref="$*" -v cross_ref="$cr" -v lang="$lang" "$(get_data bbl.awk)"; echo $? >&3; } | ${PAGER} >&4; } 3>&1; } | { read xs; exit $xs; } } 4>&1
 }
 list_books() {
     reading="$(echo "${BIBLE}" | cut -d " " -f 1)"
-    get_data "$reading" 2>/dev/null | awk -v cmd=list "$(get_data bbl.awk)" | ${PAGER}
+    for f in $reading.tsv $(get_aliases "$reading"); do
+        get_data_if_exists "$f" 2>/dev/null | awk -v cmd=list "$(get_data bbl.awk)"
+    done | ${PAGER}
     exit
 }
 list_readings() {
@@ -71,6 +96,8 @@ show_help() {
     echo
     echo "  Reference types:"
     echo "  NOTE: The colon between book and chapter is required for Hebrew, optional for everything else."
+    echo " <Book> can refer either to the name of a book, or an alias referring to a list of books."
+    echo " Specify the -l flag to get list of both books and aliases"
     echo " References for Hebrew must be in Hebrew; for all else, must be in English."
     echo "      <Book>"
     echo "          Individual book"
@@ -139,7 +166,7 @@ while [ $# -gt 0 ]; do
         -o)
                 shift
                 nocrossref='y'
-                data_exists "$1" && set_bible "$1" ||
+                reading_exists "$1" && set_bible "$1" ||
                 { echo "Error: $1.tsv not found."; exit 1
                 }
                 shift ;;
@@ -233,7 +260,7 @@ if [ $# -eq 0 ]; then
         if ! read -r ref; then
             break
         fi
-        get_ref "$b" "$ref"
+        get_ref "$b" "" "$ref"
     done
     exit 0
 fi
@@ -245,7 +272,7 @@ exitCode=0
 atLeastOneSuccess=''
 for version in $BIBLE; do
     filename="${myTempDir}/${i}-${version}.txt"
-    get_ref "$version.tsv" "$*" "$i" > "$filename"
+    get_ref "$version" "$i" "$@" > "$filename"
     [ $? -ne 0 ] && exitCode=1 || atLeastOneSuccess='y'
     i=$((i + 1))
 done
diff --git a/readings/bibles/bibles.aliases b/readings/bibles/bibles.aliases
@@ -0,0 +1,5 @@
+Epistles of Paul	Pa	Romans,1 Corinthians,2 Corinthians,Galations,Ephesians,Philippians,Colossians,1 Thessalonians,2 Thessalonians,1 Timothy,2 Timothy,Titus,Philemon,Hebrews
+Peter	Pe	1 Peter,2 Peter
+Epistles of John	Ej	1 John,2 John,3 John
+Epistles	Epi	Epistles of Paul,James,Peter,Epistles of John,Jude
+#
diff --git a/drb.tsv b/readings/bibles/drb.tsv
diff --git a/grb.tsv b/readings/bibles/grb.tsv
diff --git a/heb.tsv b/readings/bibles/heb.tsv
diff --git a/kjv.tsv b/readings/bibles/kjv.tsv
diff --git a/knx.tsv b/readings/bibles/knx.tsv
diff --git a/njb.tsv b/readings/bibles/njb.tsv
diff --git a/rsv.tsv b/readings/bibles/rsv.tsv
diff --git a/vul.tsv b/readings/bibles/vul.tsv
diff --git a/lapoe.tsv b/readings/other/lapoe.tsv

	bbl Read, search and index the Bible on the command line -- Greek, Latin, KJV, Knox, RSV, and more
	git clone git://git.wilsonrgheen.com/bbl
	Log \| Files \| Refs \| README \| LICENSE

A	.gitattributes	\|	1	+
M	Makefile	\|	17	+++++++++++++++--
M	bbl.awk	\|	182	++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
M	bbl.sh	\|	41	++++++++++++++++++++++++++++++++++-------
A	readings/bibles/bibles.aliases	\|	5	+++++
R	drb.tsv -> readings/bibles/drb.tsv	\|	0
R	grb.tsv -> readings/bibles/grb.tsv	\|	0
R	heb.tsv -> readings/bibles/heb.tsv	\|	0
R	kjv.tsv -> readings/bibles/kjv.tsv	\|	0
R	knx.tsv -> readings/bibles/knx.tsv	\|	0
R	njb.tsv -> readings/bibles/njb.tsv	\|	0
R	rsv.tsv -> readings/bibles/rsv.tsv	\|	0
R	vul.tsv -> readings/bibles/vul.tsv	\|	0
R	lapoe.tsv -> readings/other/lapoe.tsv	\|	0