bbl

Read, search and index the Bible on the command line -- Greek, Latin, KJV, Knox, RSV, and more
git clone git://git.wilsonrgheen.com/bbl
Log | Files | Refs | README | LICENSE

commit 05336feb4ae9bceddcdcaae12b43b1cc8186ac0a
parent 9596395f7895cdf90f5c926f492f878c75b4ff75
Author: Wilson Gheen <wilson@wilsonrgheen.com>
Date:   Wed,  9 Feb 2022 05:31:51 -0600

Sample fetch scripts--I initially ran these to generate some of the .tsv
files in the first place. They may server as helpful examples for someone looking to
versify another text.
Not all of these are guaranteed to get the .tsv exactly right (or
exactly how I have them right now).

Diffstat:
Asample_scripts/drbget.vim | 34++++++++++++++++++++++++++++++++++
Asample_scripts/getBookTitles.vim | 22++++++++++++++++++++++
Asample_scripts/knxget.vim | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asample_scripts/njbget.vim | 33+++++++++++++++++++++++++++++++++
Asample_scripts/rsvget.vim | 65+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 208 insertions(+), 0 deletions(-)

diff --git a/sample_scripts/drbget.vim b/sample_scripts/drbget.vim @@ -0,0 +1,34 @@ +let books = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy', 'Josue', 'Judges', 'Ruth', '1 Kings', '2 Kings', '3 Kings', '4 Kings', '1 Paralipomena', '2 Paralipomena', '1 Esdras', '2 Esdras', 'Tobias', 'Judith', 'Esther', 'Job', 'Psalms', 'Proverbs', 'Ecclesiastes', 'Song of Songs', 'Wisdom', 'Ecclesiasticus', 'Isaiah', 'Jeremiah', 'Lamentations', 'Baruch', 'Ezechiel', 'Daniel', 'Osee', 'Joel', 'Amos', 'Abdias', 'Jonas', 'Michaeas', 'Nahum', 'Habacuc', 'Sophonias', 'Aggaeus', 'Zacharias', 'Malachias', '1 Machabees', '2 Machabees', 'Matthew', 'Mark', 'Luke', 'John', 'The Acts', 'Romans', '1 Corinthians', '2 Corinthians', 'Galatians', 'Ephesians', 'Philippians', 'Colossians', '1 Thessalonians', '2 Thessalonians', '1 Timothy', '2 Timothy', 'Titus', 'Philemon', 'Hebrews', 'James', '1 Peter', '2 Peter', '1 John', '2 John', '3 John', 'Jude', 'Apocalypse'] +let abbrs = ['Gen', 'Ex', 'Lev', 'Num', 'Dt', 'Jos', 'Jdg', 'Ruth', '1Ki', '2Ki', '3Ki', '4Ki', '1Par', '2Par', 'Esd', 'Neh', 'Tob', 'Jdt', 'Est', 'Job', 'Ps', 'Prv', 'Eccles', 'Cant', 'Wis', 'Ecclus', 'Is', 'Jer', 'Lam', 'Bar', 'Eze', 'Dan', 'Os', 'Joel', 'Amos', 'Abd', 'Jon', 'Mic', 'Nah', 'Hab', 'Sop', 'Agg', 'Zac', 'Mal', '1Mac', '2Mac', 'Mt', 'Mk', 'Lk', 'Jn', 'Acts', 'Rom', '1Cor', '2Cor', 'Gal', 'Eph', 'Phil', 'Col', '1Thess', '2Thess', '1Tim', '2Tim', 'Tit', 'Phm', 'Heb', 'Jas', '1Pet', '2Pet', '1Jn', '2Jn', '3Jn', 'Jude', 'Apoc'] + +" https://github.com/thenewmantis/bbl.git +" This script is intended to pull all verses of the Douay-Rheims Bible from the web into plain text, with one verse per line, in the following format: (e.g.) +" Exodus Exo 2 5 20 And they met Moses and Aaron, who stood over against them as they came out from Pharao: +" The operation of this script is, of course, dependent on the website that hosts the content keeping its URLS and HTML the same, or at least still compatible with the regex used. +" Please feel free to modify and reuse this script or another one like it in order to versify any text you find online +" Every line in the resulting file should match the following regex (typed exactly as it would be in a Vimscript command (but ignore the surrounding whitespace)): +" ^\%(\%([1-4] \|The \|Song of \)\?\a\+\|Sirach (Ecclesiasticus)\)\t\%([1-4]\)\?\a\+\t\d\{1,2}\t\d\{1,3}\t\d\{1,3}\t\D\+$ +" To run this script successfully, open an empty Vim buffer in the directory that this script is placed, give the buffer a filename and run the following ex command (from the empty buffer): +" :source drbget.vim + +for b in range(len(books)) + let c = 1 + while 1 + exe printf('!curl -Lf ''http://drbo.org/chapter/%02d%03d.htm'' > drb_output.log', b+1, c) + !awk '/\<a href="\/cgi-bin/' drb_output.log | perl -pe 's/<.*?>|[\[\]]//g' > drb_verses.log; [ -s drb_verses.log ] + + if v:shell_error == 0 + let verses = readfile('drb_verses.log') + + for verse in verses + let verseText = substitute(verse, '\d\+\W\?\s*', '', '') + pu =books[b].' '.abbrs[b].' '.(b+1).' '.c.' '.str2nr(verse).' '.verseText + endfor + + let c += 1 + else + break + endif + endw +endfor +w diff --git a/sample_scripts/getBookTitles.vim b/sample_scripts/getBookTitles.vim @@ -0,0 +1,22 @@ +" Run the following script on a .tsv file to copy the book titles and abbreviations from that file +set wrapscan +let books = [] +let abbrs = [] +0pu_ | + +while line('.') != 1 + let currLine = split(getline('.'), '\t') + let books += [currLine[0]] + let abbrs += [currLine[1]] + exe ':norm /^\%(=currLine[0]\)\@!' +endw +redir @b>|let books +redir @a>|let abbrs +redir END +0pu a +norm ilet ela=lldt[ +1d +0pu b +norm ilet ela=lldt[ +1d +3d +echom "Done." diff --git a/sample_scripts/knxget.vim b/sample_scripts/knxget.vim @@ -0,0 +1,54 @@ +let books = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy', 'Josue', 'Judges', 'Ruth', '1 Kings', '2 Kings', '3 Kings', '4 Kings', '1 Paralipomena', '2 Paralipomena', '1 Esdras', '2 Esdras', 'Tobias', 'Judith', 'Esther', 'Job', 'Psalms', 'Proverbs', 'Ecclesiastes', 'Song of Songs', 'Wisdom', 'Ecclesiasticus', 'Isaiah', 'Jeremiah', 'Lamentations', 'Baruch', 'Ezechiel', 'Daniel', 'Osee', 'Joel', 'Amos', 'Abdias', 'Jonas', 'Michaeas', 'Nahum', 'Habacuc', 'Sophonias', 'Aggaeus', 'Zacharias', 'Malachias', '1 Machabees', '2 Machabees', 'Matthew', 'Mark', 'Luke', 'John', 'The Acts', 'Romans', '1 Corinthians', '2 Corinthians', 'Galatians', 'Ephesians', 'Philippians', 'Colossians', '1 Thessalonians', '2 Thessalonians', '1 Timothy', '2 Timothy', 'Titus', 'Philemon', 'Hebrews', 'James', '1 Peter', '2 Peter', '1 John', '2 John', '3 John', 'Jude', 'Apocalypse'] +let abbrs = ['Gen', 'Ex', 'Lev', 'Num', 'Dt', 'Jos', 'Jdg', 'Ru', '1Kgs', '2Kgs', '3Kgs', '4Kgs', '1Par', '2Par', 'Esd', 'Neh', 'Tob', 'Jdt', 'Est', 'Job', 'Ps', 'Prv', 'Eccl', 'Cant', 'Wis', 'Eccle', 'Isa', 'Jer', 'Lam', 'Bar', 'Eze', 'Dan', 'Os', 'Joel', 'Am', 'Abd', 'Jon', 'Mic', 'Nah', 'Hab', 'Sop', 'Agg', 'Zac', 'Mal', '1Mac', '2Mac', 'Mat', 'Mk', 'Lk', 'Jn', 'Acts', 'Rom', '1Cor', '2Cor', 'Gal', 'Eph', 'Phi', 'Col', '1Th', '2Th', '1Tim', '2Tim', 'Tit', 'Phm', 'Heb', 'Jas', '1Pet', '2Pet', '1Jn', '2Jn', '3Jn', 'Jud', 'Apoc'] +" https://github.com/thenewmantis/bbl.git +" This script is intended to pull all verses of the Knox Version of the Bible from the web into plain text, with one verse per line, in the following format: (e.g.) +" Exodus Exo 2 5 20 and meeting Moses and Aaron face to face, as they came away from Pharao’s audience, +" The operation of this script is, of course, dependent on the website that hosts the content keeping its URLS and HTML the same, or at least still compatible with the regex used. +" Please feel free to modify and reuse this script or another one like it in order to versify any text you find online +" Every line in the resulting file should match the following regex (typed exactly as it would be in a Vimscript command (but ignore the surrounding whitespace)): +" ^\%(\%([1-4] \|The \|Song of \)\?\a\+\|Sirach (Ecclesiasticus)\)\t\%([1-4]\)\?\a\+\t\d\{1,2}\t\d\{1,3}\t\d\{1,3}\t\D\+$ +" To run this script successfully, open an empty Vim buffer in the directory that this script is placed, give the buffer a filename and run the following ex command (from the empty buffer): +" :source knxget.vim + +let t = 'OT' +let lastOT = index(books, '2 Machabees') + +for b in range(len(books)) + let c = 1 + while 1 + if b > lastOT + let t = 'NT' + en + let a = abbrs[b] + if a == 'Joel' + let a = 'Jo' + elseif match(a, '^\d') > -1 + let abbr = abbr[0] . '_' . abbr[1:] + endif + exe '!curl -f ''http://catholicbible.online/knox/'.t.'/'.abbr.'/ch_'.c.''' > knx_output.log' + + if v:shell_error == 0 + !awk /vers-no\|vers-content/ knx_output.log | perl -pe 's/^.*?>|<.*?>|✻//g' > knx_verses.log + + let verses = readfile('knx_verses.log') + let verseNum = 0 + + for verse in verses + let tryVerseNumber = str2nr(verse) + if tryVerseNumber + let verseNum = tryVerseNumber + else + pu =books[b].' '.abbrs[b].' '.(b+1).' '.c.' '.verseNum.' '.verse + en + endfor + + let c += 1 + else + break + endif + endw +endfor +" Delete DOS-style carriage returns (may require that "edit" command), non-breaking spaces and asterisks (from footnotes) +w | edit ++ff=unix +%s/[ *]//eg +w diff --git a/sample_scripts/njbget.vim b/sample_scripts/njbget.vim @@ -0,0 +1,33 @@ +let books = ['Genesis', 'Exodus', 'Leviticus', 'Numbers', 'Deuteronomy', 'Joshua', 'Judges', 'Ruth', '1 Samuel', '2 Samuel', '1 Kings', '2 Kings', '1 Chronicles', '2 Chronicles', 'Ezra', 'Nehemiah', 'Tobit', 'Judith', 'Esther', '1 Maccabees', '2 Maccabees', 'Job', 'Psalms', 'Proverbs', 'Ecclesiastes', 'Song of Songs', 'Wisdom', 'Sirach', 'Isaiah', 'Jeremiah', 'Lamentations', 'Baruch', 'Ezekiel', 'Daniel', 'Hosea', 'Joel', 'Amos', 'Obadiah', 'Jonah', 'Micah', 'Nahum', 'Habakkuk', 'Zephaniah', 'Haggai', 'Zechariah', 'Malachi', 'Matthew', 'Mark', 'Luke', 'John', 'Acts of Apostles', 'Romans', '1 Corinthians', '2 Corinthians', 'Galatians', 'Ephesians', 'Philippians', 'Colossians', '1 Thessalonians', '2 Thessalonians', '1 Timothy', '2 Timothy', 'Titus', 'Philemon', 'Hebrews', 'James', '1 Peter', '2 Peter', '1 John', '2 John', '3 John', 'Jude', 'Revelation'] +let abbrs = ['Ge', 'Exo', 'Lev', 'Num', 'Deu', 'Josh', 'Jdgs', 'Ruth', '1Sm', '2Sm', '1Ki', '2Ki', '1Chr', '2Chr', 'Ezra', 'Neh', 'Tob', 'Jdt', 'Est', '1Mac', '2Mac', 'Job', 'Ps', 'Prov', 'Eccles', 'Song', 'Wis', 'Sir', 'Is', 'Jer', 'Lam', 'Bar', 'Ezek', 'Dan', 'Hos', 'Joel', 'Amos', 'Obad', 'Jon', 'Mic', 'Nahum', 'Hab', 'Zeph', 'Hag', 'Zech', 'Mal', 'Mt', 'Mk', 'Lk', 'Jn', 'Acts', 'Rom', '1Cor', '2Cor', 'Gal', 'Eph', 'Phil', 'Col', '1Th', '2Th', '1Tim', '2Tim', 'Tit', 'Phmn', 'Heb', 'Jas', '1Pet', '2Pet', '1Jn', '2Jn', '3Jn', 'Jude', 'Rev'] + +" https://github.com/thenewmantis/bbl.git +" This script is intended to pull all verses of the New Jerusalem Bible from the web into plain text, with one verse per line, in the following format: (e.g.) +" Exodus Exo 2 5 20 As they left Pharaoh's presence, they met Moses and Aaron who were standing in their way. +" The operation of this script is, of course, dependent on the website that hosts the content keeping its URLS and HTML the same, or at least still compatible with the regex used. +" Please feel free to modify and reuse this script or another one like it in order to versify any text you find online +" Every line in the resulting file should match the following regex (typed exactly as it would be in a Vimscript command (but ignore the surrounding whitespace)): +" ^\%(\%([1-4] \|The \|Song of \)\?\a\+\|Sirach (Ecclesiasticus)\)\t\%([1-4]\)\?\a\+\t\d\{1,2}\t\d\{1,3}\t\d\{1,3}\t\D\+$ +" To run this script successfully, open an empty Vim buffer in the directory that this script is placed, give the buffer a filename and run the following ex command (from the empty buffer): +" :source njbget.vim + +for b in range(len(books)) + let c = 1 + while 1 + exe '!curl -f ''https://www.catholic.org/bible/book.php?id='.(b+1).'&bible_chapter='.c.''' > njb_output.log' + !awk /\<a\ name=\"\[0-9\]+\"\>/ njb_output.log | perl -pe 's/<.*?>|//g; s/(\D)(\d+)/\1\n\2/g' > njb_verses.log; [ -s njb_verses.log ] + + if v:shell_error == 0 + let verses = readfile('njb_verses.log') + + for verse in verses + let verseText = substitute(verse, '\d\+\W\?\s*', '', '') + pu =books[b].' '.abbrs[b].' '.(b+1).' '.c.' '.str2nr(verse).' '.verseText + endfor + + let c += 1 + else + break + endif + endw +endfor diff --git a/sample_scripts/rsvget.vim b/sample_scripts/rsvget.vim @@ -0,0 +1,65 @@ +let books = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Tobit", "Judith", "Esther", "Job", "Psalms", "Proverbs", "Ecclesiastes", "Song of Solomon", "Wisdom", "Sirach (Ecclesiasticus)", "Isaiah", "Jeremiah", "Lamentations", "Baruch", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "1 Maccabees", "2 Maccabees", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John", "2 John", "3 John", "Jude", "Revelation"] +let abbrs = ["Gen", "Ex", "Lev", "Num", "Deut", "Josh", "Judg", "Ruth", "1Sam", "2Sam", "1Kings", "2Kings", "1Chron", "2Chron", "Ezra", "Neh", "Tob", "Jdt", "Esther", "Job", "Ps", "Prov", "Eccles", "Song", "Wis", "Sir", "Is", "Jer", "Lam", "Bar", "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jon", "Mic", "Nahum", "Hab", "Zeph", "Hag", "Zech", "Mal", "1Mac", "2Mac", "Mt", "Mk", "Lk", "Jn", "Acts", "Rom", "1Cor", "2Cor", "Gal", "Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Tit", "Philem", "Heb", "Jas", "1Pet", "2Pet", "1Jn", "2Jn", "3Jn", "Jude", "Rev"] + +" https://github.com/thenewmantis/bbl.git +" This script is intended to pull all verses of the Revised Standard Version: Catholic Edition from the web into plain text, with one verse per line, in the following format: (e.g.) +" Exodus Exo 2 5 20 They met Moses and Aaron, who were waiting for them, as they came forth from Pharaoh; +" The operation of this script is, of course, dependent on the website that hosts the content keeping its URLS and HTML the same, or at least still compatible with the regex used. +" Please feel free to modify and reuse this script or another one like it in order to versify any text you find online +" Every line in the resulting file should match the following regex (typed exactly as it would be in a Vimscript command (but ignore the surrounding whitespace)): +" ^\%(\%([1-4] \|The \|Song of \)\?\a\+\|Sirach (Ecclesiasticus)\)\t\%([1-4]\)\?\a\+\t\d\{1,2}\t\d\{1,3}\t\d\{1,3}\t\D\+$ +" To run this script successfully, open an empty Vim buffer in the directory that this script is placed, give the buffer a filename and run the following ex command (from the empty buffer): +" :source rsvget.vim + +for b in range(len(books)) + let c = 1 + while 1 + exe '!curl -f ''https://biblia.com/books/rsvce/'.abbrs[b].c.''' > rsv_output.log' + !awk /\<div\ class=\"resourcetext\"\>/ rsv_output.log | perl -pe 's/<[^\/][^<]*?>[a-z]<\/.*?>//g; s/<.*?>//g; s/(\D)(\d+)/\1\n\2/g' > rsv_verses.log; [ -s rsv_verses.log ] + + if v:shell_error == 0 + " The first line is only the book name + let verses = readfile('rsv_verses.log')[1:] + + let chapterTitle = '' + if books[b] == 'Psalms' + "The second (or rarely, third) line, in the case of some psalms, contains the title for that psalm. Otherwise it is just the chapter number (in which case, `chapterTitle` will remain a blank string) + let line = substitute(verses[0], '\d\+\s\+', '', '') + let verses = verses[1:] + + if match(line, 'PSALM') > -1 + let line = substitute(verses[0], '\d\+\s\+', '', '') + let verses = verses[1:] + endif + + if match(line, '\w') > -1 + let chapterTitle = line + endif + else + " The second line is empty + let verses = verses[1:] + endif + + + for verse in verses + let verseText = substitute(verse, '\d\+\W\?\s*', '', '') + if (chapterTitle != '') + let verseText = '(' . chapterTitle . ') ' . verseText + let chapterTitle = '' + endif + pu =books[b].' '.abbrs[b].' '.(b+1).' '.c.' '.str2nr(verse).' '.verseText + endfor + + let c += 1 + else + w + break + endif + endw +endfor +%s/\s\+\ze)//eg +%s/\s\zs \+//eg +" Delete DOS-style carriage returns (may require that "edit" command), non-breaking spaces and asterisks (from footnotes) +w | edit ++ff=unix +%s/[ *]//eg +w