#!/bin/sh
# ~jhs/public_html/bin/.sh/distfiles_cmpd	by jhs@
# Removes duplicate distfiles.
# I keep my distfiles in seperate smaller directories,
# segregated by release, with newest release directory containing most files,
# but older directories holding more stuff I need for legacy support.
# This shell script strips identical copies.

# See also:
#	/site/domain/this/etc/make.conf.fetch
#	~/bin/.sh/distfiles_dups
#	~/bin/.sh/distfiles_fetch
#	~/bin/.sh/distfiles_mount

# Alternates I havent tried:
#	See end of file, + poudriere preferred by Bryan Drewery
#	cd /usr/ports ; make distclean
#		# Uses /usr/ports/Mk/bsd.port.mk	--distclean
# JJLATER Look at use mount -t nullfs (as used by poudriere)

echo "As make fetch often gets run as root, Suggestions:"
echo "Either run this as root with xs, or"
echo "xs chown -R jhs:staff /pub/FreeBSD/dists/* \
 /host/gate/usr/ports/distfiles/*"

bleat=true
bleat=echo	# Uncomment for Debug session.
DBG="distfiles_cmpd_debug: "
DBG=""

outer() {
	# $bleat "${DBG}Outer_0 $*"
	del=$1
	shift
	# $bleat "${DBG}Outer_1 $del : $*"
	for i in $*
		do
		# $bleat "${DBG}Calling Inner $del : $*"
		inner $del "$*"
		del=$1
		shift
		done
	}
inner() {
	# $bleat "${DBG}Inner $1 : $2 :"
	for i in $2
		do
		# $bleat "${DBG}zap $1 $i"
		zap $1 $i
		done
	}
zap()	{
	$bleat "${DBG}Stripping $1 V. $2"
	( cd $1 && nice find . -type f -exec cmpd -d -s -S -l {} $2 \; )
	# -l added 2017-02-26 to keep track of which newer RELEASE also needs
	#    same distfile. Reason: sometimes I delete a newer
	#	release while keeping an older release (as I sometime purge
	#	eg 9.0, 9.1, 9.2, but keep 9.3, & also still keep 4.11 & 6.4
	}

# Start Of Main.
D=/pub/FreeBSD/dists
dirs=

# The next block {
# was found on host=fire 2015_04)14 by runnning:
#	find -x / /0s1 /0s1/usr /0s1/usr1 /0s1/var /0s2 /0s2/usr /0s2/usr1 \
#		/0s2/var /0s3 /0s3/usr /0s3/usr1 /0s3/var /0s4 /0s4/crypt \
#		/1s2 /1s2/usr /1s2/usr1 /1s2/var /1s3 /1s3/usr /1s3/usr1 \
#		/1s3/var /1s4 /tmp /usr /usr1 /var \
#		-type d -name distfiles -print

# Never strip these:
#	/home/jhs/public_html/ftp/FreeBSD/ports/distfiles
#	/home/jhs/public_html/scanjet/data/distfiles

sense=`cd /usr/ports/distfiles;/bin/pwd` # not `pwd` as it returns distfiles
sense2=`basename $sense`
if [ "$sense2" = "distfiles" ]; then
	# You have a local distfiles/ to be stripped.
	dirs="${dirs} /usr/ports/distfiles"
	#	If this happens to be the same as another directory in the
	#	$dirs path, for each duplicate file cmpd will warn:
	#		...... share same device (major & minor) & inode ,
	#		link count is 1; Skipping.
else
	# You do not have a local distfiles/ to be stripped.
	# (Your /usr/ports/distfiles is presumably just a symbolic link
	# to eg $D/5.3-RELEASE)
fi

# Next directories are in strip order, ie first gets stripped against
# reference 2nd etc. Best in approx newest release last.
# In approx reverse order to /site/domain/this/etc/make.conf.common
# Not in simple 4 before 5 numeric order as releases 4.x 5.x & 6.x & 7.x
#	overlap / interleave in time.
# For release dates see http://www.freebsd.org/releases/index.html
#	/usr/www/en/releases

# dirs="${dirs} /chroot/usr/ports/distfiles"

cd $D

# /host/gate/usr/ports/distfiles may be a link to $D/6.4-RELEASE so avoid it.
if false ; then
if test -e /host/gate/usr/ports/distfiles ; then
  ## dirs="${dirs} /host/gate/usr/ports/distfiles"
  ## cd: /host/park/usr/ports/distfiles: Input/output error
else
  true	# Dummy.
	#	Probably AMD has failed to mount, showable with:
	#	cd /host/gate
	#	/host/gate: Operation timed out.
fi
else
fi

# ping -c 1 gate && dirs="${dirs} /host/gate/usr/ports/distfiles"
# Fails maybe I need to tell amd to listen on port ?
# I suspect /host/gate may cause an amd crash ?
ping -c 1 park && dirs="${dirs} /host/park/usr/ports/distfiles"
ping -c 1 mart && dirs="${dirs} /host/mart/usr/ports/distfiles"

if test -e /jail/usr/ports/distfiles ; then
	dirs="${dirs} /jail/usr/ports/distfiles"
	echo "/jail/usr/ports/distfiles exists so added to list ${dirs}"
else
	true
fi

if test -e $D/current.old ; then
	dirs="${dirs} $D/current.old"
	echo "$D/current.old exists so added to list ${dirs}"
else
	true
fi
#	For when I occasionally manually:
#		cd /pub/FreeBSD/dists; mv current current.old; mkdir current
#		cd /pri/FreeBSD/branches/-current/ports
#		setenv PORTSDIR `pwd`
#		nice nice make -k -j 5 BATCH=yes fetch
#		distfiles_cmpd
#	so that current/ only contains what is now needed,
#	& current.old files no
#	longer needed by current ports/ can be removed to save space.
#	Also where I can periodically
#		cp -R /usrb/chroot/usr/ports/distfiles

if [ "X${FIRSTLAST}" = "X" ]; then # {
	FIRSTLAST=first
        echo "FIRSTLAST was unset, now set to $FIRSTLAST"
else	# }{
	echo "FIRSTLAST imported preset as $FIRSTLAST"
	# presume LAST
fi      # }
echo "Current will stripped $FIRSTLAST relative to releases."
echo "You can override with 'setenv FIRSTLAST first' or 'setenv FIRSTLAST last'"
echo "If current is first, it gets pruned, deleting identical files that match"
echo "reference copies in release directories. If instead current is last, the"
echo "release directories get pruned, refering against current."

if [ "X${FIRSTLAST}" = "Xfirst" ]; then # {
        echo "Current will be first, pruned against reference releases."
fi      # }
if [ "X${FIRSTLAST}" = "Xlast" ]; then # {
        echo "Current will be last,  releases pruned by reference to current."
fi      # }

if [ "X${FIRSTLAST}" = "Xfirst" ]; then # {
  dirs="${dirs} $D/current"	# ONE_ONLY
fi      # }
#	Of the 2 dirs= line ending with marker ONE_ONLY, one should be
#	commented out, & one not. Depending on needs of human owner:
#	Sometimes uncommented is near the beginning, to keep it stripped
#	as small as possible, with most files in the latest release
#	directories. Other times, eg shortly before a new release is due,
#	to ensure I pre fetch as many current distfiles as possible I run
#	make fetch on current ports/, before that I either:
#		- move current to end of list to ensure I don''t transiently
#		  have too many duplicate distfiles overflowing the disc.
#		- Or mv current current.old; make fetch ; distfiles_cmpd
#	Warning theres an additional pain: ports Mk/ macros are too dumb,
#	try to force me to remote refetch & reclick certain
#	license crap again, even if I already hold distfile localy in another
#	dir which is not directly /usr/ports/distfiles.

# No point stripping the httrack mirror, as its on a different host
# & will fill again from crontab + web_cp_local_distfiles :
# /host/fire/usra/ftp/pub/FreeBSD/dists
#	httrack -> httree/ftp2.de.freebsd.org/pub/FreeBSD/distfiles
#	httree -> /host/blak/ad4s4/ftp/master/pub/FreeBSD/dists/httree
# dirs="${dirs} /host/blak/ad4s4/ftp/master/pub/FreeBSD/dists/httree/ftp2.de.freebsd.org/pub/FreeBSD/distfiles"

# Careful, next few NFS sym links come back to prime host user=fire,
# so danger of erasure of only file; though thankfuly, 2015-10 running
# on host=fire, I saw cmpd just emit "Error: .. share same device ..  Skipping!"
# ping -c 1 lapr && dirs="${dirs} /host/lapr/data/release/12.1-STABLE/usr/distfiles"
# ping -c 1 lapr && dirs="${dirs} /host/lapr/data/release/13.0-CURRENT/usr/distfiles"
# ping -c 1 lapr && dirs="${dirs} /host/lapr/data/release/9.3-RELEASE/usr/distfiles"
# ping -c 1 lapr && dirs="${dirs} /host/lapr/data/release/s1/usr/distfiles"
# ping -c 1 lapr && dirs="${dirs} /host/lapr/data/release/s2/usr/distfiles"
# ping -c 1 lapr && dirs="${dirs} /host/lapr/data/release/s3/usr/distfiles"
# ping -c 1 lapr && dirs="${dirs} /host/lapr/data/release/this/usr/distfiles"

# dirs="${dirs} $D/common"		# individual hosts'' local copies
	# Various hosts running different releases share this via sym links.

#	Shortly after a release is made, I list the release last
#	& run a make fetch, as best time to grab stuff that may later migrate
#	elsewhere on the net & be harder to find.

# Sorted by date order, Do Not sort by release number order.
#	http://www.freebsd.org/releases/

#			Name		# Rel. Date : Size : Hosts Using
# dirs="${dirs} $D/1.0-RELEASE"		# 1993-11
# dirs="${dirs} $D/1.1-RELEASE"		# 1994-05
# dirs="${dirs} $D/1.1.5-RELEASE"	# ????-??
# dirs="${dirs} $D/1.1.5.1-RELEASE"	# 1994-07
# dirs="${dirs} $D/2.0-RELEASE"		# 1994-11
# dirs="${dirs} $D/2.0.5-RELEASE"	# 1995-06
# dirs="${dirs} $D/2.1-RELEASE"		# 1995-11
# dirs="${dirs} $D/2.1.5-RELEASE"	# 1996-07
# dirs="${dirs} $D/2.1.6-RELEASE"	# 1996-12
# dirs="${dirs} $D/2.1.7-RELEASE"	# 1997-02
# dirs="${dirs} $D/2.2-RELEASE"		# 1997-03
# dirs="${dirs} $D/2.2.1-RELEASE"	# 1997-04
# dirs="${dirs} $D/2.2.2-RELEASE"	# 1997-05
# dirs="${dirs} $D/2.2.5-RELEASE"	# 1997-10
# dirs="${dirs} $D/2.2.6-RELEASE"	# 1998-03
# dirs="${dirs} $D/2.2.7-RELEASE"	# 1998-07
# dirs="${dirs} $D/3.0-RELEASE"		# 1998-10
# dirs="${dirs} $D/2.2.8-RELEASE"	# 1998-12
# dirs="${dirs} $D/3.1-RELEASE"		# 1999-02
# dirs="${dirs} $D/3.2-RELEASE"		# 1999-05
# dirs="${dirs} $D/3.3-RELEASE"		# 1999-09
# dirs="${dirs} $D/3.4-RELEASE"		# 1999-12
# dirs="${dirs} $D/4.0-RELEASE"		# 2000-03
# dirs="${dirs} $D/3.5-RELEASE"		# 2000-06
# dirs="${dirs} $D/4.1-RELEASE"		# 2000-07
# dirs="${dirs} $D/4.1.1-RELEASE"	# 2000-09
# dirs="${dirs} $D/4.2-RELEASE"		# 2000-11
# dirs="${dirs} $D/4.3-RELEASE"		# 2001-04
# dirs="${dirs} $D/4.4-RELEASE"		# 2001-09
# dirs="${dirs} $D/4.5-RELEASE"		# 2002-01
# dirs="${dirs} $D/4.6-RELEASE"		# 2002-06
# dirs="${dirs} $D/4.6.2-RELEASE"	# 2002-08
# dirs="${dirs} $D/4.7-RELEASE"		# 2002-10
# dirs="${dirs} $D/5.0-RELEASE"		# 2003-01
# dirs="${dirs} $D/4.8-RELEASE"		# 2003-04
# dirs="${dirs} $D/5.1-RELEASE"		# 2003-06
# dirs="${dirs} $D/4.9-RELEASE"		# 2003-10
# dirs="${dirs} $D/5.2-RELEASE"		# 2004-01
# dirs="${dirs} $D/5.2.1-RELEASE"	# 2004-02
# dirs="${dirs} $D/4.10-RELEASE"	# 2004-05
# dirs="${dirs} $D/5.3-RELEASE"		# 2004-11-06
  dirs="${dirs} $D/4.11-RELEASE"	# 2005-01-25 du=6.2 G host=lapa,lapl,mini,rain,scan,snow
# dirs="${dirs} $D/5.4-RELEASE"		# 2005-05-09
# dirs="${dirs} $D/6.0-RELEASE"		# 2005-11-04
# dirs="${dirs} $D/6.1-RELEASE"		# 2006-05-09
# dirs="${dirs} $D/5.5-RELEASE"		# 2006-05-25
# dirs="${dirs} $D/6.2-RELEASE"		# 2007-01-15
# dirs="${dirs} $D/6.3-RELEASE"		# 2008-01-18
# dirs="${dirs} $D/7.0-RELEASE"		# 2008-02-27
  dirs="${dirs} $D/6.4-RELEASE"		# 2008-11-28 du=11 G host=mart,park
# dirs="${dirs} $D/7.1-RELEASE"		# 2009-01-04
# dirs="${dirs} $D/7.2-RELEASE"		# 2009-05-04
# dirs="${dirs} $D/8.0-RELEASE"		# 2009-11-25
# dirs="${dirs} $D/7.3-RELEASE"		# 2010-03-23 host=dual,erik,film,king,loft,thin,wind
# dirs="${dirs} $D/8.1-RELEASE"		# 2010-07-23
  dirs="${dirs} $D/7.4-RELEASE"		# 2011-02-21 du=640 M host=lapd,lapn,slim,sony
# dirs="${dirs} $D/8.2-RELEASE"		# 2011-02-24 host=john,laph,laps
# dirs="${dirs} $D/9.0-RELEASE"		# 2012-01-10
# dirs="${dirs} $D/8.3-RELEASE"		# 2012-04-18 host=blak
# dirs="${dirs} $D/9.1-RELEASE"		# 2012-12-30 host=blak
  dirs="${dirs} $D/8.4-RELEASE"		# 2013-06-09 du=10 G host=lapo,lapr:s1
  dirs="${dirs} $D/9.2-RELEASE"         # 2013-09-30 du=20 G host=blak,fire:1s1-main
# dirs="${dirs} $D/10.0-RELEASE"	# 2014-01-20
  dirs="${dirs} $D/9.3-RELEASE"		# 2014-07-17 du=20 G host=blak:1s2,fire:later,lapr:s3
# dirs="${dirs} $D/10.1-RELEASE"	# 2014-11-06 host=lapr:s2
# dirs="${dirs} $D/10.2-RELEASE"	# 2015-08-12
# dirs="${dirs} $D/10.3-RELEASE"	# 2016-04-04 host=fire:1s3,slim-remo
# dirs="${dirs} $D/11.0-RELEASE"	# 2016-10-10
# dirs="${dirs} $D/11.1-RELEASE"	# 2017-07-26
# dirs="${dirs} $D/10.4-RELEASE"	# 2017-10-03
# dirs="${dirs} $D/11.2-RELEASE"	# 2018-06-27
# dirs="${dirs} $D/12.0-RELEASE"	# 2018-12-11
# dirs="${dirs} $D/11.3-RELEASE"	# 2019-07-09
# dirs="${dirs} $D/12.1-RELEASE"	# 2019-11
# dirs="${dirs} $D/11.4-RELEASE"	# 2020-06-15/23
# dirs="${dirs} $D/12.2-RELEASE"	# 2020-10-31 host=fire:0s2,lapr:s2
# dirs="${dirs} $D/13.0-RELEASE"	# 2021-04-13 host=rasp
# dirs="${dirs} $D/12.3-RELEASE"	# 2021-12-09 du=54 G host=dell:s1,lapr:s2
  dirs="${dirs} $D/12.4-RELEASE"	# 2022-12-05 du=109 G host=dell
# dirs="${dirs} $D/13.1-RELEASE"	# 2022-05-16
  dirs="${dirs} $D/13.2-RELEASE"	# 2023-05-11 du=? host=land.slim,rasp-later
  dirs="${dirs} $D/14.0-RELEASE"	# 2023-11-20 
  dirs="${dirs} $D/13.3-RELEASE"	# 2024-03-05 
  dirs="${dirs} $D/14.1-RELEASE"	# 2024-06-04 
  dirs="${dirs} $D/13.4-RELEASE"	# 2024-09-17 
  dirs="${dirs} $D/14.2-RELEASE"	# 2024-12-03 
  dirs="${dirs} $D/13.5-RELEASE"	# 2025-03-11 
  dirs="${dirs} $D/15.0-RELEASE"	# 2025-12-02 

# remo hosts=land,slim

if [ "X${FIRSTLAST}" = "Xlast" ]; then # {
  dirs="${dirs} $D/current"		# host=lapr/s1,blak/1s1	# ONE_ONLY
fi      # }


  dirs="${dirs} $D/jhs"
	# Estic for which I was master repository.
	# Word perfect, that I''m licensed to use, & don''t want clueless
	#	ports scripts forcing me to fetch again.

outer ${dirs}

# echo "$D/common/ should be empty, looking now:"
# ls $D/common

echo "Zero size files (if any) in /pub/freebsd/dists"
cd /pub/freebsd/dists
# find has no --exclude parameter so just list where I want to search.
#	Transient zero size files in ./httree/hts-cache/ref/\*.ref
#					\ to avoid triggering brackets.c
find [0-9]* current jhs -type f -size 0c | xargs rm

echo "Caution: editors/openoffice.org-3 make fetch is too crippled to fetch from"
echo "adjacent directories, something should be tweaked to allow local fetch;"
echo "till then consider doing something like this:"
echo "	cd /pub/freebsd/dists/8.2-RELEASE/.."
echo "	ln 8.3-RELEASE/jdk-6u3-fcs-mozilla_headers-b05-unix-24_sep_2007.jar \\"
echo "		8.2-RELEASE/"
echo "	ln 8.3-RELEASE/jdk-6u3-fcs-src-b05-jrl-24_sep_2007.jar \\"
echo "		8.2-RELEASE/"
echo "	ln 8.3-RELEASE/bsd-jdk16-patches-4.tar.bz2 \\"
echo "		8.2-RELEASE/"

echo "Suggestion: Run distfiles_dups"
echo "Suggestion: Run dups, cos can be same file in dif dirs eg"
echo "	12.3-RELEASE/gnome3/accerciser-3.38.0.tar.xz"
echo "	12.4-RELEASE/gnome/accerciser-3.38.0.tar.xz"

echo "Suggestion: Run portmaster -t -y --clean-distfiles"
#	from /usr/ports/ports-mgmt/portmaster
exit 0

#	To:		ports@@@freebsd.org
#	cc:		Lars Engels <lars.engels@@@0x20.net>,
#			Kubilay Kocak <koobs@@@freebsd.org>,
#			RW <rwmaillists@@@googlemail.com>,
#			Bryan Drewery <bdrewery@@@FreeBSD.org>
#	Subject:	Re: distfiles cleaner
#	From:		"Julian H. Stacey" <jhs@@@berklix.com>
#	Organization:	http://www.berklix.com BSD Unix Linux Consultants, Munich Germany
#	Fcc:		sent
#	User-agent:	EXMH on FreeBSD http://www.berklix.com/free/
#	X-URL:		http://www.berklix.com
#	In-reply-to:	Your message "Mon, 19 Oct 2015 14:56:11 +0100."
#			<20151019145611.459045c5@@@gumby.homeunix.com>
#
#	Hi, Reference:
#	> From:		RW via freebsd-ports <freebsd-ports@@@freebsd.org>
#	> Date:		Mon, 19 Oct 2015 14:56:11 +0100
#
#	RW via freebsd-ports wrote:
#	> On Mon, 19 Oct 2015 14:16:37 +0200
#	> Julian H. Stacey wrote:
#	>
#	> > Hi ports@@@
#	> > What is the modern equivalent of this obsolete stuff please ?
#	> >
#	> > http://www.freebsd.org/cgi/man.cgi?query=portsclean&sektion=1&apropos=0&manpath=FreeBSD+9.0-RELEASE+and+Ports
#	> >	portsclean --distclean
#	> >	Clean out all the distfiles that are not referenced by any
#	> >	port in the ports tree.
#	> >
#	> > http://wiki.freebsd.org/portupgrade
#	> >	Portupgrade (aka pkgtools)
#	> >	last edited 2012-08-01
#	>
#	> Why are you looking at the wiki?
#
#	I didnt know what tool to look for so a search engine took me to the wiki.
#
#	> The code was updated only a few months
#	> ago?
#
#	Err ? moved from wiki to which ports/ ?
#	Wiki has "It is currently maintained by BryanDrewery <bdrewery@@@FreeBSD.org>"
#	So I added to CC. If the wiki is obsolete I suggest it be deleted
#	or reduced to point to URL with new code.
#
#	>  http://svnweb.freebsd.org/ports/head/ports-mgmt/portupgrade/
#
#	Thanks.
#
#	[with current]
#	cd /usr/ports/ports-mgmt/portupgrade ; make install ; man portupgrade
#		"To clean unreferenced distfiles, working directories and old shared
#		libraries, use portsclean(1)"
#	which portsclean				# /usr/local/sbin/portsclean
#	pkg which /usr/local/sbin/portsclean		# portupgrade-2.4.14,2
#	pkg info portupgrade-2.4.14,2 | grep Origin	# ports-mgmt/portupgrade
#
#
#	> I think the case for portupgrade and portsclean is now much stronger
#	> than it was before they were fully converted to pkg. That removed all
#	> the problems associated with maintaining a secondary database. The
#	> conversion to pkg replaced portmaster's best code and portupgrade's
#	> worst.
#
#	I'm lost. I presume portupgrade had some bad code,
#	replaced by good code from portmaster.
#
#	> BTW I recently switched from distviper to portsclean. I used to prefer
#	> distviper because of its speed, but that speed comes from assuming that
#	> all distinfo files are called distinfo, which isn't true. I found
#	> it was unconditionally removing the files for linux ports.
#
#	Yes, confirmed below, distviper removed a great swathe of
#		rpm/i386/fedora/10/
#		rpm/i686/centos/6.6/
#
#	> It's also
#	> never been converted to pkg which breaks its fast mode.
#
#	Cloning my 58G current distfiles to test each method I found:
#
#	PORTS/			EXECUTABLE				DISTFILES GIG
#	sysutils/bsdadminscripts
#				distviper				# 4.185918
#
#	ports-mgmt/portmaster
#				portmaster -t -y --clean-distfiles	# 4.231094
#
#	ports-mgmt/portupgrade
#				portsclean --distclean			# 4.078680
#
#	portsclean removed 50 files that portmaster did not, so I guess portmaster
#	may be best/ most conservative. A sample of 1 deletion by portsclean:
#		./subversion18/subversion-1.8.14.tar.bz2
#	from devel/subversion18 (not something Ive just built or fetched BTW)
#
#	I ran find on all 3 stripped distfiles/ +
#	diff current.portsclean current.portmaster > cleanmaster.diff
#	wc -l *
#		100 cleanmaster.diff
#		751 current.distviper
#		837 current.portmaster
#		783 current.portsclean
#	All here for next few days in case of interest:
#		http://www.berklix.com/~jhs/tmp/distfile_lists/
#
#	Thanks Lars, Kubilay, RW.
#
#	Cheers,
#	Julian
