commit 30558f0a23d3b43c2cd14a4e5cbc8a9e254595c2
parent 8beacc653ebc457d2ae2db046bbb3fa97159b595
Author: Wilson Gheen <wilson@wilsonrgheen.com>
Date: Mon, 18 Apr 2022 16:25:27 -0500
Fixed scripts
Diffstat:
2 files changed, 30 insertions(+), 4 deletions(-)
diff --git a/sample_scripts/hebget.sh b/sample_scripts/hebget.sh
@@ -1,5 +1,14 @@
#!/bin/sh
# https://github.com/thenewmantis/bbl.git
+
+# !!!NOTE!!! This file will probably not display as intended if viewed in a web browser.
+# Web browsers use a feature called "bidi" while displaying text. With bidi, characters from scripts that are read right-to-left will be displayed
+# in their proper orientation alongside regular (left-to-right) characters. This file itself was written while coping with the fact that Hebrew
+# must be displayed wrong at times for the purpose of working with text in a sane way. Therefore all strings in this shell script that are written in Hebrew
+# are actually backwards (written left-to-right) in this file. In the lines (5 lines after this one) where I show example output, I have letters written
+# right-to-left, as they should be, but web browsers will automatically reverse the display orientation of all Hebrew letters relative to the way they are encoded in files.
+# The only fully accurate way that I am aware of to view this file is to download and open it in a text editor.
+
# This script is intended to pull all verses of the Hebrew Bible from the web into plain text, with one verse per line, in the following format: (e.g.)
# בראשית בר א א א בְּרֵאשִׁ֖ית בָּרָ֣א אֱלֹהִ֑ים אֵ֥ת הַשָּׁמַ֖יִם וְאֵ֥ת הָאָֽרֶץ׃
# Which, in the command line application that these verses are used for, would produce the following output:
@@ -26,8 +35,22 @@ download() {
curl -L "$url" -o "$myFile"
}
nextBook() {
+ # Book 26 is psalms, which has 26, 26a, 26b, 26c, 26d and 26e
+ if echo "$b" | grep -q '^26'; then
+ case "$b" in
+ 26a) b='26b' ;;
+ 26b) b='26c' ;;
+ 26c) b='26d' ;;
+ 26d) b='26e' ;;
+ 26e)
+ b='27'
+ bAbs=$(( bAbs + 1 )) ;;
+ esac
+ return
+ fi
bAbs=$(( bAbs + 1 ))
next="$($printf '%02d' "$(($(echo "$b" | grep -o '[1-9][0-9]\?') + 1))")"
+ # These books all have parts a and b (e.g. there is no 25, only 25a and 25b)
for n in 08 09 25 35; do
if [ "$b" = "${n}a" ]; then
b="${n}b"
diff --git a/sample_scripts/latinpoemget.sh b/sample_scripts/latinpoemget.sh
@@ -1,7 +1,10 @@
#!/bin/sh
# To TSV-ify webpages from https://www.thelatinlibrary.com/
-# In this example, the Aeneid:
+author="vergil"
+title="ec"
+max=10
+b=2
-for n in $(seq 12); do
- curl -L "https://www.thelatinlibrary.com/vergil/aen$n.shtml" | sed -n '/<p class="internal_navigation"/,/<div class="footer"/{/^\w/p}' | sed -e 's/ .*//' -e 's/<br>//' -e 's/—/—/g' | awk "{printf(\"Aeneid\tAen\t1\t${n}\t%d\t%s\n\", NR, \$0)}"
-done > latinpoem.tsv
+for n in $(seq $max); do
+ curl -L "https://www.thelatinlibrary.com/$author/$title$n.shtml" | sed -n '/<p class="internal_navigation"/,/<div class="footer"/{/^\w/p}' | sed 's/<BR>/\n/' | sed -e 's/ .*//' -e 's/<.*>//g' -e '/^\s*$/d' | awk "{printf(\"Eclogues\tEcl\t${b}\t${n}\t%d\t%s\n\", NR, \$0)}"
+done >> latinpoem.tsv