rsvget.vim (4454B)
1 let books = ["Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy", "Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings", "2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah", "Tobit", "Judith", "Esther", "Job", "Psalms", "Proverbs", "Ecclesiastes", "Song of Solomon", "Wisdom", "Sirach (Ecclesiasticus)", "Isaiah", "Jeremiah", "Lamentations", "Baruch", "Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah", "Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah", "Malachi", "1 Maccabees", "2 Maccabees", "Matthew", "Mark", "Luke", "John", "Acts", "Romans", "1 Corinthians", "2 Corinthians", "Galatians", "Ephesians", "Philippians", "Colossians", "1 Thessalonians", "2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon", "Hebrews", "James", "1 Peter", "2 Peter", "1 John", "2 John", "3 John", "Jude", "Revelation"] 2 let abbrs = ["Gen", "Ex", "Lev", "Num", "Deut", "Josh", "Judg", "Ruth", "1Sam", "2Sam", "1Kings", "2Kings", "1Chron", "2Chron", "Ezra", "Neh", "Tob", "Jdt", "Esther", "Job", "Ps", "Prov", "Eccles", "Song", "Wis", "Sir", "Is", "Jer", "Lam", "Bar", "Ezek", "Dan", "Hos", "Joel", "Amos", "Obad", "Jon", "Mic", "Nahum", "Hab", "Zeph", "Hag", "Zech", "Mal", "1Mac", "2Mac", "Mt", "Mk", "Lk", "Jn", "Acts", "Rom", "1Cor", "2Cor", "Gal", "Eph", "Phil", "Col", "1Thess", "2Thess", "1Tim", "2Tim", "Tit", "Philem", "Heb", "Jas", "1Pet", "2Pet", "1Jn", "2Jn", "3Jn", "Jude", "Rev"] 3 4 " https://github.com/thenewmantis/bbl.git 5 " This script is intended to pull all verses of the Revised Standard Version: Catholic Edition from the web into plain text, with one verse per line, in the following format: (e.g.) 6 " Exodus Exo 2 5 20 They met Moses and Aaron, who were waiting for them, as they came forth from Pharaoh; 7 " The operation of this script is, of course, dependent on the website that hosts the content keeping its URLS and HTML the same, or at least still compatible with the regex used. 8 " Please feel free to modify and reuse this script or another one like it in order to versify any text you find online 9 " Every line in the resulting file should match the following regex (typed exactly as it would be in a Vimscript command (but ignore the surrounding whitespace)): 10 " ^\%(\%([1-4] \|The \|Song of \)\?\a\+\|Sirach (Ecclesiasticus)\)\t\%([1-4]\)\?\a\+\t\d\{1,2}\t\d\{1,3}\t\d\{1,3}\t\D\+$ 11 " To run this script successfully, open an empty Vim buffer in the directory that this script is placed, give the buffer a filename and run the following ex command (from the empty buffer): 12 " :source rsvget.vim 13 14 for b in range(len(books)) 15 let c = 1 16 while 1 17 exe '!curl -f ''https://biblia.com/books/rsvce/'.abbrs[b].c.''' > rsv_output.log' 18 !awk /\<div\ class=\"resourcetext\"\>/ rsv_output.log | perl -pe 's/<[^\/][^<]*?>[a-z]<\/.*?>//g; s/<.*?>//g; s/(\D)(\d+)/\1\n\2/g' > rsv_verses.log; [ -s rsv_verses.log ] 19 20 if v:shell_error == 0 21 " The first line is only the book name 22 let verses = readfile('rsv_verses.log')[1:] 23 24 let chapterTitle = '' 25 if books[b] == 'Psalms' 26 "The second (or rarely, third) line, in the case of some psalms, contains the title for that psalm. Otherwise it is just the chapter number (in which case, `chapterTitle` will remain a blank string) 27 let line = substitute(verses[0], '\d\+\s\+', '', '') 28 let verses = verses[1:] 29 30 if match(line, 'PSALM') > -1 31 let line = substitute(verses[0], '\d\+\s\+', '', '') 32 let verses = verses[1:] 33 endif 34 35 if match(line, '\w') > -1 36 let chapterTitle = line 37 endif 38 else 39 " The second line is empty 40 let verses = verses[1:] 41 endif 42 43 44 for verse in verses 45 let verseText = substitute(verse, '\d\+\W\?\s*', '', '') 46 if (chapterTitle != '') 47 let verseText = '(' . chapterTitle . ') ' . verseText 48 let chapterTitle = '' 49 endif 50 pu =books[b].' '.abbrs[b].' '.(b+1).' '.c.' '.str2nr(verse).' '.verseText 51 endfor 52 53 let c += 1 54 else 55 w 56 break 57 endif 58 endw 59 endfor 60 %s/\s\+\ze)//eg 61 %s/\s\zs \+//eg 62 " Delete DOS-style carriage returns (may require that "edit" command), non-breaking spaces and asterisks (from footnotes) 63 w | edit ++ff=unix 64 %s/[ *]//eg 65 w