Fixed scripts - bbl - Read, search and index the Bible on the command line -- Greek, Latin, KJV, Knox, RSV, and more

commit 30558f0a23d3b43c2cd14a4e5cbc8a9e254595c2
parent 8beacc653ebc457d2ae2db046bbb3fa97159b595
Author: Wilson Gheen <wilson@wilsonrgheen.com>
Date:   Mon, 18 Apr 2022 16:25:27 -0500

Fixed scripts

Diffstat:
M sample_scripts/hebget.sh  | 23 +++++++++++++++++++++++
M sample_scripts/latinpoemget.sh  | 11 +++++++----

2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/sample_scripts/hebget.sh b/sample_scripts/hebget.sh
@@ -1,5 +1,14 @@
 #!/bin/sh
 # https://github.com/thenewmantis/bbl.git
+
+# !!!NOTE!!! This file will probably not display as intended if viewed in a web browser.
+# Web browsers use a feature called "bidi" while displaying text. With bidi, characters from scripts that are read right-to-left will be displayed
+# in their proper orientation alongside regular (left-to-right) characters. This file itself was written while coping with the fact that Hebrew
+# must be displayed wrong at times for the purpose of working with text in a sane way. Therefore all strings in this shell script that are written in Hebrew
+# are actually backwards (written left-to-right) in this file. In the lines (5 lines after this one) where I show example output, I have letters written
+# right-to-left, as they should be, but web browsers will automatically reverse the display orientation of all Hebrew letters relative to the way they are encoded in files.
+# The only fully accurate way that I am aware of to view this file is to download and open it in a text editor.
+
 # This script is intended to pull all verses of the Hebrew Bible from the web into plain text, with one verse per line, in the following format: (e.g.)
 # בראשית	בר	א	א	א	בְּרֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים אֵ֥ת הַשָּׁמַ֖יִם וְאֵ֥ת הָאָֽרֶץ׃
 # Which, in the command line application that these verses are used for, would produce the following output:
@@ -26,8 +35,22 @@ download() {
     curl -L "$url" -o "$myFile"
 }
 nextBook() {
+    # Book 26 is psalms, which has 26, 26a, 26b, 26c, 26d and 26e
+    if echo "$b" | grep -q '^26'; then
+       case "$b" in
+           26a) b='26b' ;;
+           26b) b='26c' ;;
+           26c) b='26d' ;;
+           26d) b='26e' ;;
+           26e)
+               b='27'
+               bAbs=$(( bAbs + 1 )) ;;
+       esac
+       return
+    fi
     bAbs=$(( bAbs + 1 ))
     next="$($printf '%02d' "$(($(echo "$b" | grep -o '[1-9][0-9]\?') + 1))")"
+    # These books all have parts a and b (e.g. there is no 25, only 25a and 25b)
     for n in 08 09 25 35; do
         if [ "$b" = "${n}a" ]; then
             b="${n}b"
diff --git a/sample_scripts/latinpoemget.sh b/sample_scripts/latinpoemget.sh
@@ -1,7 +1,10 @@
 #!/bin/sh
 # To TSV-ify webpages from https://www.thelatinlibrary.com/
-# In this example, the Aeneid:
+author="vergil"
+title="ec"
+max=10
+b=2
 
-for n in $(seq 12); do
-    curl -L "https://www.thelatinlibrary.com/vergil/aen$n.shtml" | sed -n '/<p class="internal_navigation"/,/<div class="footer"/{/^\w/p}' | sed -e 's/&nbsp;.*//' -e 's/<br>//' -e 's/&#151/—/g' | awk "{printf(\"Aeneid\tAen\t1\t${n}\t%d\t%s\n\", NR, \$0)}"
-done > latinpoem.tsv
+for n in $(seq $max); do
+    curl -L "https://www.thelatinlibrary.com/$author/$title$n.shtml" | sed -n '/<p class="internal_navigation"/,/<div class="footer"/{/^\w/p}' | sed 's/<BR>/\n/' | sed -e 's/&nbsp;.*//' -e 's/<.*>//g' -e '/^\s*$/d' | awk "{printf(\"Eclogues\tEcl\t${b}\t${n}\t%d\t%s\n\", NR, \$0)}"
+done >> latinpoem.tsv

	bbl Read, search and index the Bible on the command line -- Greek, Latin, KJV, Knox, RSV, and more
	git clone git://git.wilsonrgheen.com/bbl
	Log \| Files \| Refs \| README \| LICENSE

M	sample_scripts/hebget.sh	\|	23	+++++++++++++++++++++++
M	sample_scripts/latinpoemget.sh	\|	11	+++++++----