Mirror a web page (kwiki)

From Notes

Jump to: navigation, search
#!/bin/bash

#
# Author:   Josh Miller
# Date:     06/15/2006
#
# Purpose:  Generate a local static copy of a kwiki wiki.
#

## Setup some basic variables
NOTIFY=miller.j@example.com
PHASE=START

notification () {
  echo "AirKwiki transfer failed during $PHASE phase." | mail -s "AirKwiki transfer failed during $PHASE phase." $NOTIFY
}

## Download the website.
PHASE=DOWNLOAD
wget --mirror -P/var/www/html/ http://kwiki.example.com/
if [ $? -gt 0 ]; then
  notification
fi

## Remove the action pages.
PHASE=ACTIONREMOVAL
for i in `find /var/www/html/kwiki.example.com -type f -name "*action=*"`
do
  rm -rf $i
done
if [ $? -gt 0 ]; then
  notification
fi

## Convert links in all files:
PHASE=LINKCONVERSION
for i in `find /var/www/html/kwiki.example.com`
do
  # Remove all cgi references
  sed -i 's/index.cgi?//g' $i
  # Make sure that the 'Home Page' icon works
  sed -i 's/<a href="" accesskey="h" title="Home Page">/<a href="http:\/\/airkwiki.example.com\/" accesskey="h" title="Home Page">/g' $i
done
if [ $? -gt 0 ]; then
  notification
fi

## Convert filenames:
PHASE=FILENAMECONVERSION

# create cp command column:
NUM=`ls /var/www/html/kwiki.example.com | egrep 'cgi\?' | wc -l`

rm -rf /var/www/html/kwiki.example.com/cp.files
touch /var/www/html/kwiki.example.com/cp.files

for ((i=0;i<$NUM;i+=1))
do
  echo "cp " >> /var/www/html/kwiki.example.com/cp.files
done
if [ $? -gt 0 ]; then
  notification
fi

# print column list of what the filenames are:
find /var/www/html/kwiki.example.com -type f -name "*cgi\?*" > /var/www/html/kwiki.example.com/base.files

# print column list of what the filenames should become:
ls /var/www/html/kwiki.example.com | egrep 'cgi\?' | awk -F ? '{print "/var/www/html/kwiki.example.com/"$2}' > /var/www/html/kwiki.example.com/new.files

# Sort list of files
sort /var/www/html/kwiki.example.com/base.files > /var/www/html/kwiki.example.com/base.files.new
mv /var/www/html/kwiki.example.com/base.files.new /var/www/html/kwiki.example.com/base.files
sort /var/www/html/kwiki.example.com/new.files > /var/www/html/kwiki.example.com/new.files.new
mv /var/www/html/kwiki.example.com/new.files.new /var/www/html/kwiki.example.com/new.files

# generate script to cp all files:
paste /var/www/html/kwiki.example.com/cp.files   \
      /var/www/html/kwiki.example.com/base.files \
      /var/www/html/kwiki.example.com/new.files  \
      > /var/www/html/kwiki.example.com/doit
if [ $? -gt 0 ]; then
  notification
fi

# execute script to rename files
bash /var/www/html/kwiki.example.com/doit
if [ $? -gt 0 ]; then
  notification
fi

## Cleanup
PHASE=CLEANUP
rm /var/www/html/kwiki.example.com/base.files
rm /var/www/html/kwiki.example.com/new.files
rm /var/www/html/kwiki.example.com/cp.files
rm /var/www/html/kwiki.example.com/doit

## Basic mail notification
echo "Airkwiki copy complete" | mail -s "Airkwiki copy complete" $NOTIFY
Personal tools