bbl

Read, search and index the Bible on the command line -- Greek, Latin, KJV, Knox, RSV, and more
git clone git://git.wilsonrgheen.com/bbl
Log | Files | Refs | README | LICENSE

commit 9d4050b3b810fba38b16ae978b581f32fb4aec30
parent 4808273f467c733dedd955598dee54bdfd53f549
Author: Wilson Gheen <wilson@wilsonrgheen.com>
Date:   Mon, 21 Feb 2022 06:42:34 -0600

Implemented approximate searching

Diffstat:
MREADME.md | 11+++++++----
Mbbl.awk | 64+++++++++++++++++++++++++++++++++++++++++++++++++++-------------
Mbbl.sh | 18++++++++++++------
Mgrb.tsv | 1-
4 files changed, 70 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md @@ -41,12 +41,15 @@ usage: bbl [flags] [bible] [reference...] <Book>:<Chapter>:<Verse>-<Chapter>:<Verse> Range of chapters and verses in a book - /<Search> + /~?<Search> All verses that match a pattern - <Book>/<Search> + <Book>/~?<Search> All verses in a book that match a pattern - <Book>:<Chapter>/<Search> + <Book>:<Chapter>/~?<Search> All verses in a chapter of a book that match a pattern + In searches, the optional ~ indicates that the search should be approximate: + Case and accent marks will be disregarded. Note that this will often take + much longer than an exact search @ <Number-of-Verses>?" Random verse or assortment of verses from any book/chapter" @@ -59,7 +62,7 @@ usage: bbl [flags] [bible] [reference...] ## Note The default behaviour (without a flag to specify the Bible version) is to print from the Knox Bible. -This is easy to change by changing line 7 of bbl.sh (BIBLE=knx) to reflect the three-letter abbreviation of your translation of choice. +This is easy to change by changing the line before the default of the main case statement (close to line 125) of bbl.sh (set_bible knx) to reflect the three-letter abbreviation of your translation of choice. One can easily extend this program ad nauseam by simply adding new .tsv files and updating the getopt and the case statement in bbl.sh accordingly. ## Install diff --git a/bbl.awk b/bbl.awk @@ -41,9 +41,9 @@ function parseref(ref, arr) { # 4. <book>:?<chapter>-<chapter> # 5. <book>:?<chapter>:<verse>-<verse> # 6. <book>:?<chapter>:<verse>-<chapter>:<verse> - # 7. /<search> - # 8. <book>/search - # 9. <book>:?<chapter>/search + # 7. /~?<search> + # 8. <book>/~?search + # 9. <book>:?<chapter>/~?search #10. @ <number of verses>? #11. <book> @ <number of verses>? #12. <book>:?<chapter> @ <number of verses>? @@ -52,10 +52,15 @@ function parseref(ref, arr) { # 1, 2, 3, 3a, 3b, 4, 5, 6, 8, 9, 11, 12 arr["book"] = substr(ref, 1, RLENGTH) ref = substr(ref, RLENGTH + 1) - } else if (match(ref, "^/")) { + } else if (sub("^ */ *", "", ref)) { # 7 - arr["search"] = substr(ref, 2) - return "search" + if (sub("^~ *", "", ref)) { + arr["search"] = roughpattern(ref) + return "rough_search" + } else { + arr["search"] = ref + return "search" + } } if (match(ref, "^:?[1-9]+[0-9]*")) { @@ -67,10 +72,15 @@ function parseref(ref, arr) { arr["chapter"] = int(substr(ref, 1, RLENGTH)) ref = substr(ref, RLENGTH + 1) } - } else if (match(ref, "^/")) { + } else if (sub("^ */ *", "", ref)) { # 8 - arr["search"] = substr(ref, 2) - return "search" + if (sub("^~ *", "", ref)) { + arr["search"] = roughpattern(ref) + return "rough_search" + } else { + arr["search"] = ref + return "search" + } } else if (ref == "") { # 1 return "exact" @@ -84,10 +94,15 @@ function parseref(ref, arr) { # 4 arr["chapter_end"] = int(substr(ref, 2)) return "range" - } else if (match(ref, "^/")) { + } else if (sub("^ */ *", "", ref)) { # 9 - arr["search"] = substr(ref, 2) - return "search" + if (sub("^~ *", "", ref)) { + arr["search"] = roughpattern(ref) + return "rough_search" + } else { + arr["search"] = ref + return "search" + } } else if (ref == "") { # 2 return "exact" @@ -190,6 +205,29 @@ function bookmatches(book, bookabbr, query) { } } +function roughpattern(regex) { + # TODO Can mess with search pattern if regex is used on command line + regex = tolower(regex) + switch(lang) { + case "el": + polytonic["α"] = "[αάἀ-ἆὰᾀ-ᾆᾳ-ᾷ]" + polytonic["ε"] = "[εέἐ-ἕὲ]" + polytonic["η"] = "[ηήἠ-ἧὴᾐ-ᾗῃ-ῇ]" + polytonic["ι"] = "[ιίΐϊἰ-ἷὶῒ-ῖ]" + polytonic["ο"] = "[οόὀ-ὅὸ]" + polytonic["υ"] = "[υΰϋύὐ-ὗὺῢῦ]" + polytonic["ω"] = "[ωώὠ-ὧὼᾠ-ᾧῳ-ῷ]" + for (letter in polytonic) { + gsub(letter, polytonic[letter], regex) + } + break + case "la": + gsub("e", "[eë]", regex) + break + } + return regex +} + function printverse(verse, word_count, characters_printed) { if (NO_LINE_WRAP) { printf("%s\n", verse) @@ -254,7 +292,7 @@ cmd == "ref" && mode == "range_ext" && bookmatches($1, $2, p["book"]) && (($4 == processline() } -cmd == "ref" && mode == "search" && (p["book"] == "" || bookmatches($1, $2, p["book"])) && (p["chapter"] == "" || $4 == p["chapter"]) && match(tolower($6), tolower(p["search"])) { +cmd == "ref" && (mode == "search" || mode == "rough_search") && (p["book"] == "" || bookmatches($1, $2, p["book"])) && (p["chapter"] == "" || $4 == p["chapter"]) && match(mode == "rough_search" ? tolower($6) : $6, p["search"]) { processline() } diff --git a/bbl.sh b/bbl.sh @@ -51,12 +51,15 @@ show_help() { echo " <Book>:<Chapter>:<Verse>-<Chapter>:<Verse>" echo " Range of chapters and verses in a book" echo - echo " /<Search>" + echo " /~?<Search>" echo " All verses that match a pattern" - echo " <Book>/<Search>" + echo " <Book>/~?<Search>" echo " All verses in a book that match a pattern" - echo " <Book>:<Chapter>/<Search>" + echo " <Book>:<Chapter>/~?<Search>" echo " All verses in a chapter of a book that match a pattern" + echo " In searches, the optional ~ indicates that the search should be approximate:" + echo " Case and accent marks will be disregarded. Note that this will often take" + echo " much longer than an exact search" echo echo " @ <Number-of-Verses>?" echo " Random verse or assortment of verses from any book/chapter" @@ -76,6 +79,7 @@ set_bible() { fi } +lang="en" # Language of text being used--most are English opts="$(getopt -o lWchdgjknrv -l list,no-line-wrap,cat,help,douay,greek,jerusalem,kjv,knox,rsv,vulgate -- "$@")" eval set -- "$opts" while [ $# -gt 0 ]; do @@ -101,6 +105,7 @@ while [ $# -gt 0 ]; do shift ;; -g|--greek) set_bible grb + lang="el" shift ;; -j|--jerusalem) set_bible njb @@ -116,6 +121,7 @@ while [ $# -gt 0 ]; do shift ;; -v|--vulgate) set_bible vul + lang="la" shift ;; *) # Use Knox Bible if none is specified in command line options @@ -142,11 +148,11 @@ if [ $# -eq 0 ]; then # Interactive mode while true; do - printf "knx> " + printf "$BIBLE> " if ! read -r ref; then break fi - get_data knx.tsv | awk -v cmd=ref -v ref="$ref" "$(get_data bbl.awk)" | ${PAGER} + get_data "$BIBLE.tsv" | awk -v cmd=ref -v ref="$ref" "$(get_data bbl.awk)" | ${PAGER} done exit 0 fi @@ -154,7 +160,7 @@ fi i=0 myTempDir=$(mktemp -d "${TMPDIR:-/tmp/}$(basename $0).XXXXXXXXXXXX") for version in $BIBLE; do - get_data ${version}.tsv 2>/dev/null | awk -v cmd=ref -v ref="$*" -v cross_ref="${i}" "$(get_data bbl.awk)" 2>/dev/null > "${myTempDir}/${i}-${version}.txt" + get_data ${version}.tsv 2>/dev/null | awk -v cmd=ref -v ref="$*" -v cross_ref="${i}" -v lang="$lang" "$(get_data bbl.awk)" 2>/dev/null > "${myTempDir}/${i}-${version}.txt" i=$((i + 1)) done diff --git a/grb.tsv b/grb.tsv @@ -1,5 +1,4 @@ Genesis Ge 1 1 1 Ἐν ἀρχῇ ἐποίησεν ὁ θεὸς τὸν οὐρανὸν καὶ τὴν γῆν. -Genesis Ge 1 1 1 Ἐν ἀρχῇ ἐποίησεν ὁ θεὸς τὸν οὐρανὸν καὶ τὴν γῆν. Genesis Ge 1 1 2 ἡ δὲ γῆ ἦν ἀόρατος καὶ ἀκατασκεύαστος, καὶ σκότος ἐπάνω τῆς ἀβύσσου, καὶ πνεῦμα θεοῦ ἐπεφέρετο ἐπάνω τοῦ ὕδατος. Genesis Ge 1 1 3 καὶ εἶπεν ὁ θεός Γενηθήτω φῶς. καὶ ἐγένετο φῶς. Genesis Ge 1 1 4 καὶ εἶδεν ὁ θεὸς τὸ φῶς ὅτι καλόν. καὶ διεχώρισεν ὁ θεὸς ἀνὰ μέσον τοῦ φωτὸς καὶ ἀνὰ μέσον τοῦ σκότους.