#!/bin/sh
# /site/usr/local/bin/demime+emil+delatt
# Source:		http://www.berklix.com/~jhs/bin/.sh/demime+emil+delatt
# Installed by:		cd ~jhs/public_html/bin/.sh ; xs make install
# Author:		jhs_ERASE_@berklix.com
# Copyright		Julian H. Stacey, Munich October 2014 - 2021
#			Free for use.

# See Also:
#  /usr/ports/mail/delatt		in 9.2-RELEASE 12.2-STABLE 12.4-RELEASE delatt --mdir ~/mail/tech/mime/samples/..........
#  /usr/ports/mail/demime		in 4.10 to 8.2-RELEASE, not in 8.3 9.2 & 12.2
#  /usr/ports/mail/emil			in 12.4-RELEASE
#  /usr/ports/mail/mailutils		in 12.4-RELEASE
#  /usr/ports/mail/mblaze		in 12.4-RELEASE
#  /usr/ports/mail/mimedefang		in 12.4-RELEASE
#  /usr/ports/mail/p5-MIME-Tools	in 12.4-RELEASE

#  pkg info -O delatt	delatt-1.1.3_1	Strip attachments from email
#  pkg info -O demime
#  pkg info -O emil	emil-2.1b9_1    Mail format/encoding converter
#  pkg info -o delatt	delatt-1.1.3_1	mail/delatt
#  pkg info -o demime 			mail/demime/
#  pkg info -o emil	emil-2.1b9_1    mail/emil
#  ls -l `which delatt`	  7693 Apr  7  2023 /usr/local/bin/delatt
#  ls -l `which demime`	 93740 Mar 17  2014 /9-RELEASE/usr/local/bin/demime
#  ls -l `which emil`	141072 Apr  6  2023 /usr/local/bin/emil

#  http://svnweb.freebsd.org/ports/head/mail/emil/pkg-descr?revision=HEAD
#  http://www.berklix.com/~jhs/bin/.sh/sedc2a0
#  http://www.berklix.com/~jhs/src/bsd/fixes/FreeBSD/ports/gen/mail/demime/
#  http://cgit.freebsd.org/ports/tree/mail/p5-Email-MIME-Attachment-Stripper/pkg-descr

# This script is called by $NOMIME & $NOMIME_FORCE in
# ~jhs/.procmailrc* http://www.berklix.com/~jhs/dots/.procmailrc

# Also can be called from within EXMH, click:
#	Click:		"More"
#	Click:		"Apply command to message"
#	Text in box:	"demime+emil+delatt $file"

# Flags:
#	-f To forcibly strip enclosures,
#	  (which might be signature gifs, gifs in spam, or pdf docs
#	  or jpg pics from friends. It is up to procmail filter or human
#	  to decide if & when to assert -f.
#	-d Debug extra info.

# Problems - Demime: disappeared from FreeBSD /usr/ports/mail/demime/

# Problems - Emil:
#   +pid 45675 (emil), uid 200: exited on signal 11
#   grep "uid 200: exited on signal 11" ~/mail/cron/security/[0-9]* | grep emil

# JJLATER:
#	- Add Signal trap handling to remove temp files.
#	- Add code to be sensible if fed unexpected data
#	  such as perhaps a uuencoded (or raw) binary or non mail text.

base=`basename $0`
trail=`date -u +%Y-%m-%dT%H:%M:%SZ`


# Assume TMPDIR may be unset
#   ("setenv TMPDIR  ~/tmp" was removed from /site/etc/csh.cshrc.master
#   as it was breaking make world within a chroot, where ~ was empty)
if [ "x$TMPDIR" == "x" ] ; then	#{{
	TMPJDIR=~/tmp/.$base.$trail.tmpdir
else				#}{
	TMPJDIR=$TMPDIR/.$base.$trail.tmpdir
	# the "tmpdir" also is a safety feature, cos if all the other 
	# vars evaluate to null string we are left with TMPJDIR=/..tmpdir
	# before a later rm -rf $TMPJDIR
fi				#}}

mkdir -p $TMPJDIR

types=$TMPJDIR/.$base.$$.types.tmp
filtered_output=$TMPJDIR/.$base.$$.filtered_output.tmp
pipe_input=$TMPJDIR/.$base.$$.pipe_input_data.tmp
ret_val=0

dev_debug_path="/tmp/$base.tmp"

dev_debug="/dev/stderr"		# Production
# dev_debug="/dev/stdout"	# Debug
# dev_debug="$dev_debug_path"	# Alternate Debug for if testing pipes

if [ "x$dev_debug" == "x$dev_debug_path" ] ; then	#{{
	touch $dev_debug_path	# ensure it exists to append to
fi							#}}

force=NO
debug=NO
# echo "Debug: Count: $#, Args: $*"			>> $dev_debug
# JJLATER add a while [ "x$#" != "x0" ]
if [ "x$#" == "x0" ] ; then	#{{
	# echo "Debug: No Args"				>> $dev_debug
else				#}{
	# echo "Debug: Sniff for parameters"		>> $dev_debug
	if [ "x$1" == "x-f" ] ; then	#{{
		# echo "Debug: Assert Force"		>> $dev_debug
		force=YES
		shift
	elif [ "x$1" == "x-d" ] ; then	#{{
		# echo "Debug: Assert Force"		>> $dev_debug
		debug=YES
		shift
	else				#}{
		# echo "Debug: No force parameter"	>> $dev_debug
	fi				#}}
fi				#}}
# echo "Debug: Count: $#, Args: $*"			>> $dev_debug


if [ "x$#" == "x0" ] ; then	#{{	Pipe
	# echo "Debug: This is a pipe."			>> $dev_debug
	cat > ${pipe_input}	# Read pipe into a file,
				# can not just pipe it for 2 reasons:
				#	- Nead to sample data before
				#	  deciding if to process it.
				#	- Emil may later seg. fault,
				#	  in which case do a plain cat.
	files=${pipe_input}
else				#}{	File
	# echo "Debug: This is a file."			>> $dev_debug
	files=$*
fi				#}}

for i in ${files}
	do	#{
	# First keep a copy for later debug,
	# as I am getting mysterious zero size files.
	#  See Also:
	#	~/mail/tech/mime/demime-tool
	#	~/mail/tech/mime/enclosure_only_in_html
	#	~/mail/tech/mime/samples
	#	~/mail/tech/mime/samples/emil_segmentation_violation
	#	~/mail/tech/mime/samples/fails_with_demime_plus_emil
	#	~/mail/tech/mime/samples/ok_with_demime_plus_emil
	log=~/.mail.demime+emil+delatt.log
	touch		$log
	echo " " >>	$log
	cat $i >>	$log

	# Finished logging, Now do the work.
	if [ "x$debug" == "xYES" ] ; then	#{
		echo ""			>> $dev_debug
		echo "$0 DOING $i"	>> $dev_debug
	fi	#}
	if [ "x$force" == "xNO" ] ; then	#{{
		# Be cautious: only strip simple mails with plain
		# + html texts + nothing else. If it has eg a photo
		# or PDF leave mail unchanged.

		# It's possible to have nested MIME enclosures:
		# which do not start on left margin matching
		# "^Content-Type:" but are indented so just caught
		# by "Content-Type:".

		grep "Content-Type: " < $i \
			| grep -v	"^Content-Type: multipart/alternative"\
			| grep -i -v	"^Content-Type: text/plain"	\
			| grep -i -v	"^Content-Type: text/html"	\
			| grep -v	"=Content-Type:"	\
			| grep -v	":Content-Type:"	\
			| grep -v	":Content-Type;"	\
			> $types

		grep '\-\-\-\-\-BEGIN PGP SIGNATURE\-\-\-\-\-' < $i >> $types
		# demime breaks, removing sig. above then exmh complains
		# unexpected armor invalid armor header
		# JJLATER Even if "x$force" == "xYES" I should not allow
		# demime to run if there is a BEGIN PGP SIGNATURE
	fi					#}}

	# Question instead of "Content-Type: " should I also allow a tab ?

	# '-i' as up to Feb 2014 on stable@freebsd.org I have also seen
	# 'Content-Type: TEXT/PLAIN' from
	#	User-Agent: Alpine 1.10 (GSO 962 2008-03-14)
	#	User-Agent: Alpine 2.00 (BSF 1167 2008-08-23)

	# Sample header lines:
	#	From easyjet.com
	#	DKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed;
	#		s=easyjet; d=email.easyjet.com;
	#	h=MIME-Version:Content-Type:Date:To:From:Reply-To:
	#		Subject:List-Unsubscribe:Message-ID;
	#		i=generationeasyJet@email.easyJet.com;
	#	bh=.........; b=.........

	# Sample:
	#	Date: Wed, 22 Oct 2014 19:25:47 +0100
	#	From: "Martin's Money Tips" <MartinsMoneyTips@moneysavingexpert.com>
	#	DKIM-Signature: v=1; a=rsa-sha1; c=simple;
	#		d=moneysavingexpert.com; s=sm2;
	#		i=@moneysavingexpert.com; h=Content-Transfer-Encoding:
	#	Content-Type:Reply-To:MIME-Version:Message-ID:Subject:Date:To:			NOT CAUGHT
	#		From; bh=...; b=...

	# Above, the h= is seen to be in same order as headers, so the
	# Content-Type in h= could conceivably be at beginning,
	#	middle or or end, eg:
	# h=Content-Type:... ; h=...:Content-Type:... ; h=...:Content-Type;

	# Mails containing any of these are not stripped:
	#	application/pdf image/jpeg message/disposition-notification
	#	multipart/mixed multipart/mixed multipart/related
	#	multipart/report multipart/signed text/calendar
	#	text/comma-separated-values text/h=3d3d (3d = '=')
	#	text/rfc822-headers text/rfc822-headers text/text/x-sgml
	#	text/vcard text/x-csrc text/x-diff text/x-patch

	# JJLATER add a strip of ms-word-doc ?.

	if [ "x$force" == "xNO" ] && test -s $types ; then #{{ Test of size 0.
		if [ "x$debug" == "xYES" ] ; then	#{
			echo "$0 Skipping as $types size is > 0: $i" \
				>> $dev_debug
		fi	#}
		echo -n "$base: Not changing data in "		>> $dev_debug
		if [ "x$#" == "x0" ] ; then	#{{
			echo "pipe"				>> $dev_debug
		else				#}{
			echo "file: $i"				>> $dev_debug
		fi				#}}
		echo ", containing:"				>> $dev_debug
		cat $types					>> $dev_debug
		if [ "x$#" == "x0" ] ; then	#{
			cat $i
		fi				#}
		# JJLATER do not exit 1, there may be more files yet.
	else	#}{ ! ( [ "x$force" == "xNO" ] && test -s $types )
		# Size of $types is 0, Safe to process $i with demime & emil.

# Both demime And emil have advantages & disadvantages:
# Notes on why I use Both demime & emil, & in that order:

#	- Demime alone was not converting quoted-printable junk back to
#	  plain text.

#	- Emil alone will just convert an HTML spam to uuencoded,
#	  making it less recognisable to human or spam filter.

#	- Emil wont discard enclosures eg .JPG, just converts them to
#	  uuencoded appended without MIME.

#	- Emil does not leave an X-Something in header,
#	  (as a clue its been nobbled), unlike demime which does.
#	  ( See Also /usr/local/bin/header_add
#	  http://www.berklix.com/~jhs/src/bsd/jhs/bin/local/mail/header_add )

#	- Emil with Header Content-Type: multipart/alternative
#	  & body Content-Transfer-Encoding: quoted-printable outputs
#	  8bit & also restores plain text, including removing
#	  artificial line breaks caused by quoted-printable, so
#	  spam phrases are automaticaly detectable & discardable.

#	- 2014-02-26 to emil I appended -H 8bit -T 8bit to make mail easier
#	  to edit with vi, as I was still seeing some quoted-printable junk.
#	  Fixed in http://www.berklix.com/~jhs/src/bsd/fixes/freebsd/ports/\
#		gen/mail/emil/Makefile.REL=9.1-RELEASE.diff

#	- JJLATER reduce emil -h 3 to -h 2,
#	  mark header that I have nobled the content.

#	- Do not use html2text as it scrambles the header,

		# Code for later if I can solve problem below **
		# if [ -e /usr/local/bin/demime ] ; then	#{{
		# 	DEMIMEPATH="/usr/local/bin/demime"
		# elif [ -e /9-RELEASE/usr/local/bin/demime ] ; then # }{
		# 	DEMIMEPATH="/9-RELEASE/usr/local/bin/demime"
		# else	#}{
		# 	DEMIMEPATH="/demime_nonexistant"
		# fi	#}}

		## { commented out 2023-08-08 as it was stripping
		##   both alternate maybe because perl
		##    version was too old ? just a guess.
		## if [ -e /usr/local/bin/demime ] ; then	#{{
		## 	demime \
		## 		-8 - < $i | emil -h 3 -H 8bit -T 8bit > \
		## 		${filtered_output}
		## }

		# elif [ -e /9-RELEASE/usr/local/bin/demime ] ; then
		#	/9-RELEASE/usr/local/bin/demime \
		#		-8 - < $i | emil -h 3 -H 8bit -T 8bit > \
		#		${filtered_output}
		# ** Above fails with:
		#	Can't open junkmail file:No such file or directory \
		#	at /9-RELEASE/usr/local/bin/demime line 604, \
		#	<STDIN> line 95631.
		# I guess it needs some library, as this works:
		#	chroot /9-RELEASE
		#	demime -8 - < in-file > out-file
		# replacing with
		#	[demime 1.01d removed an attachment of type
		#	image/jpeg which had a name of

		if [ -e /usr/local/bin/delatt ] ; then # }{
			cat < $i > ${filtered_output}
			## delatt --mdir < $i > ${filtered_output}
			#  cd /usr/ports/mail/delatt/ ; make install
			#  /usr/ports/mail/delatt/pkg-descr :
			#    Strip attachments from email, and optionally
			#    save the attachments to files.  It will work with
			#    either mbox or maildir files.
			#    It is great for archiving old email without
			#    wasting space on attachments and the extra HTML
			#    message parts that some MUAs http://en.wikipedia.org/wiki/Comparison_of_email_clients attach.
			# ---
			# jhs@: it does not convert eg
			#   Content-Transfer-Encoding: quoted-printable
			#   Content-Type: text/html; charset=UTF-8
			#   href=3D"http://us02web.zoom.us/w/88451734555?tk=3DZzCZds
		else	#}{
                        cat < $i > ${filtered_output}

			# Demime was a perl script
			#	ports/mail/demime is in 4.10 to 8.2-RELEASE,
			#	 not in 8.3 not in 9.2 & 12.2
			#	http://www.berklix.com/~jhs/src/bsd/fixes/FreeBSD/\
			#		ports/gen/mail/demime
			# COMMENT=	A tool to scrub mime from mailing lists
			# http://duckduckgo.com/?sites=www.FreeBSD.org\
			#	%2Cdocs.FreeBSD.org%2Clists.FreeBSD.org\
			#	%2Cwiki.FreeBSD.org%2Cforums.FreeBSD.org\
			#	&ka=v&kt=v&kh=1&kj=r2&q=mail%2Fdemime\
			#	&submit=Search&ia=web
			# http://www.freebsd.org/cgi/ports.cgi?query=\
			#	mime&stype=all&sektion=mail
			# Look at:
			#	/usr/ports/mail/mimedefang
			#	/usr/ports/mail/normalizemime
			#	/usr/ports/mail/\
			#		p5-Email-MIME-Attachment-Stripper
			#	/usr/ports/mail/p5-MIME-Tools
		fi	#}}

		# JJLATER test & report on emil & delatt exit codes

		# Above, something (in emil?) complains:
		# Possible unintended interpolation of
		#	@parrot in string at (eval 239) line 4.
		# Global symbol "@parrot" requires explicit package
		#	name at (eval 239) line 4.

		# If zero input to a pipe, "Segmentation fault"

		# JJLATER if emil fails, I could just run demime,
		# but perhaps better to spend time analysing & fixing emil.


		if test -s ${filtered_output}; then	#{{
			# echo "Not Zero Size, OK"		>> $dev_debug
			if [ "x$#" == "x0" ] ; then	#{{ # Pipe
				# Output to pipe /dev/stdout.
				cat ${filtered_output}
			else		# }{
				cat ${filtered_output} > $i
				# cat rather than mv to preserve permissions
				# & ownership & in case it is linked.
			fi				#}}
		else	#}{
			# Emil probably segmentation faulted on zero body size.
			echo -n "$base: seg. fault, so not changed data in " \
								>> $dev_debug
			if [ "x$#" == "x0" ] ; then	#{{
				echo "pipe."			>> $dev_debug
			else		# }{
				echo "file: $i."		>> $dev_debug
			fi				#}}
			rm -f ${filtered_output}
			ret_val=1
		fi	#}}
		rm -f ${filtered_output}
	fi	#}}
	if [ "x$force" == "xNO" ] ; then	#{{
		rm $types
	fi					#}}
	done	#}

if [ "x$#" == "x0" ] ; then	#{{
	# echo "Debug: This is a pipe."				>> $dev_debug
	rm ${pipe_input}
fi				#}}

# ls -la $TMPJDIR
# rmdir $TMPJDIR
rm -rf $TMPJDIR

# exit $?	# man sh: "the exit status of the most recent pipeline"
exit ${ret_val}

# Re. EXMH menu item "apply command to message"
# Default: cat $file > /dev/null
# A command line with a semicolon will error, eg:
#	cat $file > /dev/null ; cp /tmp/t1 /tmp/t2
#	invalid command name "cp"
# So one cannot have a complex command line with both demime & emil,
# hence instead call this script.

# Earlier experiment that worked OK direct from EXMH:
#	cat $file | sed -e s/jhs_ERASE_@berklix.com/jhs@localhost/g | \
#		demime -8 - | emil -h 3 -H 8bit -T 8bit | sendmail -i -t
# No need of path prefix '/usr/local/bin'  which I assume it gets from $PATH.
# But limitations:
# - Do not remove spaces around pipe symbols, else it fails.
# - The sed is to avoid delay, mail going out to gate then smart host then
#   returning.
# - If there are other addressees (eg on cc: or to: line),
#   they will get a copy that they will not want.
# - The exmh window waits till the pipe is complete (&
#   sendmail I guess waits to talk to gate, so its a few seconds wait.
# So better to avoid sendmail & just process localy as a file.

#	ports/mail/demime is in 4.10 to 8.2-RELEASE, not in 8.3.
#	http://www.berklix.com/~jhs/src/bsd/fixes/FreeBSD/ports/gen/mail/demime
