#!/bin/sh
# http://www.berklix.com/~jhs/bin/.sh/web_cp_local_distfiles
# See also failure logs in
# http://www.berklix.com/~jhs/bin/.sh/web_cp_local_distfiles_logs/

echo "This consumed 100G so was stopped 2014_02,"
echo "I have the distfiles split by RELEASE elsewhere"
echo "Instead use /home/jhs/bin/.sh/distfiles_fetch"
exit

# Based on # ~jhs/bin/.sh/web_cp_local_weekly

# Timing runs,
#	when everything was supposedly already there:
#		Started:  Thu Apr 12 23:24:19 CEST 2012
#		Finished: Fri Apr 13 05:51:34 CEST 2012
#		Started:  Fri Apr 13 11:49:18 CEST 2012
#		Finished: Sat Apr 14 04:54:16 CEST 2012
#	Normal run
#		Started:  Thu Sep 20 13:03:33 CEST 2012
#		Finished: Fri Sep 21 23:59:04 CEST 2012
#	When everything was supposedly already there:
#		Started:	~ Sat Sep 22 08:42:37 CEST 2012
# However with HTTrack version 3.43-12 host=blak uname -r=8.2-RELEASE
# each ends with eg:
#	Segmentation faultorg/pub/FreeBSD/distfiles/\
#		Search-InvertedIndex-1.14.tar.gz (45092 bytes) - OK
#	...
#	04:54:15e.freebsInfo: puengine: transfer-status: link added: \
#		 ftp2.de.freebsd.org/pub/FreeBSD/distfiles/Shout-2.1.tar.gz \
#		 -> /pub/freebsd/dists/httree/ftp2.de.freebsd.org/pub/ \
#		FreeBSD/distfiles/Shout-2.1.tar.gz
#	Segmentation faultorg/pub/FreeBSD/distfiles/Shout-2.1.tar.gz \
#		(9433 bytes) - OK
# & at next start it always seems to complain, eg:
#	00:09:39        Warning:        Cache: damaged cache, trying to repair
#	00:09:39        Warning:        Cache: 955249 bytes successfully recovered in 2588 entries
#	00:09:39        Warning:        Previous file not found \
#		 (erased by user ?), recatching: \
#		 ftp2.de.freebsd.org/pub/FreeBSD/distfiles/
# Now building /pri/FreeBSD/branches/amd64/-current/ports/www/httrack
# 3.44.5	to see if it fails too. It fails within minutes.
# A 2nd run after removin all meta files & caches was no better,
# still failed within minutes.

# One day I might mirror other parts of the tree too, beyond just distfiles ?
# First I need a complete tree, then run again to time a near null run.

# Run this on host blak to keep load off fire.

. ~jhs/bin/.sh/web_cp_0_inc

#	/pub/FreeBSD/dists/httree ->
#		/host/blak/ad4s4/ftp/.master/pub/FreeBSD/dists/httree
#	/pub/FreeBSD/dists/httrack -> 
#		httree/ftp2.de.freebsd.org/pub/FreeBSD/distfiles

base=`basename $0`
httree=/pub/FreeBSD/dists/httree

httree=/host/blak/ad4s4/ftp/pub/FreeBSD/dists/httree
cd $httree
if test $? -eq 0 ; then
	true
	# echo "$0 cd succeeded on `hostname -s` `date -u +%Y-%m-%dT%H:%M:%SZ`" | \
	#	mail -s "Cron: `hostname -s`:$base" jhs
else
	echo "$0 cd failed on `hostname -s` `date -u +%Y-%m-%dT%H:%M:%SZ`" | \
		mail -s "Cron: `hostname -s`:$base" jhs
	exit 1
fi

du -s -m
#	Not "du -s -m $httree" as $httree is a sym link on host=fire
#	pointing at host=blak, & this script runs on host=blak.


echo -n ".tmp files of zero size: `find . -type f -name \*.tmp -size 0c | wc -l`"
find . -type f -name \*.tmp -size 0c | xargs rm
echo ", removed."

echo -n ".tmp files of non zero size: `find . -type f -name \*.tmp | wc -l`"
find . -type f -name \*.tmp | xargs rm
echo ", removed."

echo "Updated by: http://www.berklix.com/~jhs/bin/.sh/$base"  \
				>  /pub/Dates/$base.last
echo "Started:  `date -u +%Y-%m-%dT%H:%M:%SZ`"		>>  /pub/Dates/$base.last

params="$params --robots=0"

# --robots=0
#	I added to avoid it fetching nothing except index.html
#		15:59:092.de.freInfo: rgNote: due to ftp2.de.freebsd.org
#		remote robots.txt rules, links begining with these path
#		will be forbidden: / (see in the options to disable this)
#	It still prints warning but does now fetch.


$sl ; $ht $params -O $httree http://ftp2.de.freebsd.org/pub/FreeBSD/distfiles/

# Also available: http://ftp2.at.freebsd.org/pub/FreeBSD/distfiles/

du -s -m

echo "Removing files with : The proxy server could not handle the request"
#	String comes from gate/usr/local/libexec/apache/libproxy.so
nice find . -type f -name \*.tmp -exec grep -s "The proxy server could not handle the request" {} \; -exec rm {} \;
#	This may cause error listings such as:
#		Binary file ./ftp2.de.freebsd.org/pub/FreeBSD/distfiles/\
#			MesaDemos-7.4.2.tar.html.tmp matches

# echo "Other .tmp files in `pwd`"
# find . -type f -name \*.tmp | xargs ls -l
#	\*.tar.html.tmp :
#	Last time I mirrored there were a dozen files of name \*.tar.html.tmp
#	that kept coming in from upstream again after each time I deleted,
#	so nothing to be done about them.
#	Maybe they were on the upstream source, maybe that too was mirroring.

du -s -m

echo "Later to mirror http://ftp-archive.freebsd.org/pub/FreeBSD-Archive/ports/distfiles/"
# See Also:	~/bin/.sh/web_cp_local_bsd

echo "Finished: `date -u +%Y-%m-%dT%H:%M:%SZ`"	>>  /pub/Dates/$base.last
echo "$base on `hostname -s` `cat /pub/Dates/$base.last`" | \
	mail -s "Cron: `hostname -s`" jhs
exit 0
