diff options
author | Rob Austein <sra@hactrn.net> | 2019-03-18 20:44:57 +0000 |
---|---|---|
committer | Rob Austein <sra@hactrn.net> | 2019-03-18 20:44:57 +0000 |
commit | 3e7b8b209060988ed020f0eda33b1a2f7c292be7 (patch) | |
tree | fea518e604853f3642e913486f6873e0e6b8c1a6 /tools/convert-and-slurp-attachments.sh |
Initial wiki dump and initial tools
Diffstat (limited to 'tools/convert-and-slurp-attachments.sh')
-rwxr-xr-x | tools/convert-and-slurp-attachments.sh | 18 |
1 files changed, 18 insertions, 0 deletions
diff --git a/tools/convert-and-slurp-attachments.sh b/tools/convert-and-slurp-attachments.sh new file mode 100755 index 0000000..ce7f34d --- /dev/null +++ b/tools/convert-and-slurp-attachments.sh @@ -0,0 +1,18 @@ +#!/bin/sh - + +ls | fgrep -v . | +while read page +do + base="https://trac.rpki.net" + path="/wiki/$(echo $page | sed s=%2F=/=g)" + + # Fetch the Wiki page, extract the useful portion of the HTML, convert that into Markdown + curl "${base}${path}" | + xsltproc --html extract-wiki-content.xsl - | + html2markdown --no-skip-internal-links --reference-links >"$page.md" + + # Fetch a ZIP file containing any attachments, clean up if result is empty or broken + curl "${base}/zip-attachment${path}/" >"$page.zip" + zipinfo "$page.zip" >/dev/null 2>&1 || rm -f "$page.zip" + +done |