#!/bin/sh
# Usage: ./corpus_build.sh  www.example.com
#
# This recursively downloads webpages from a server, and puts them into
#    a new directory, named the argument you pass the script (which in
#    the above example is www.example.com)
time nice wget -r -k -np -nc -nv -R.ram,.jpg,.gif,.png,.swf,.js,.css,.pdf -U "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)" $1
# -N ?

