#!/bin/bash
#
# updatefaqs - Copyright 1996 P.Fox (fox@roestock.demon.co.uk)
#
# Automatically extract news articles to store in a faqs directory.
# The files are stored according to their archive name if possible,
# otherwise by subject name. The files are gzipped when stored for
# space efficiency.
#
# This script should be run immediately after unbatching a new
# set of news articles. For C-news, this can be done by calling it
# at the end of /usr/lib/newsbin/input/newsrun. And before updating
# global new reader index and overview files. (This script actually
# updates my overview files - remove if not appropriate)
#
# Warranty - there isn't one. This script will probably need
# modifying to suit local conditions. USE AT YOUR OWN RISK.
#
# Conditions of use: The above text must remain part of the script,
# while the program carries out the most of the functions described
# above.  Other than that, do what you will.
#
ROOTDIR=/var/spool/news
#ROOTDIR=/home
DESTDIR=/home/faqs
TMPFILE=/tmp/updfaq$$
rm -f $TMPFILE
cd $DESTDIR
#
# First, groups to remove article after achiving
#
groups="comp.answers news.answers comp.os.linux.answers sci.answers demon.answers uk.answers"
#groups="faqs"
for dir in $groups
do
	SRCDIR=$ROOTDIR/`echo $dir | tr -st '.' '/'`
	for file in $SRCDIR/*
	#for file in fails
	do
#		echo "Looking at $file"
		# Find the subject line
		if test -f $file
		then
			subject=`grep ^Subject: "$file" | head -1`
			newname=`grep -i "^archive.name:" "$file" | head -1 | sed 's/:.\/pub\///' | cut -c14- | tr -d '	 ' | sed 's/\.gz$/.gz./'`
#			echo "Subject: $subject; newname: $newname"
			if test "$newname" = ""
			then
				newname=`grep -i "^demon-archive-name:" "$file" | head -1 | cut -c20- | tr -d '	 ' | sed 's/\.gz$/.gz./'`
				echo "Cannot find Archive-name, trying demon-archive-name" >> $TMPFILE
#				echo "Cannot find Archive-name, trying demon-archive-name"
			fi
			if test "$newname" = ""
			then
				newname=`echo "$subject" | cut -c10- | tr -st ' /<>*()[]'"'"'{}\\\\&!;:' '.................' | sed 's/\.gz$/.gz./' | sed 's/^\.*//'`
				echo "Cannot find Archive-name, using subject" >> $TMPFILE
#				echo "Cannot find Archive-name, using subject"
				newname="SUBJECTS/$newname"
			fi
			if test "$newname" != ""
			then
				filename=`basename $newname`
#				echo $filename
				names=`echo $newname | sed 's+/+ +g' | sed " s/$filename\$"//`
#				echo $names
				(for path in $names;
				do
					if test ! -d $path;
					then
						mkdir $path
					fi
					cd $path
				done)
				if test -f "$newname.gz"
				then
					if test "$file" -nt "$newname.gz"
					then
						echo "`basename $file` -[update]> $newname" >> $TMPFILE
#						echo "`basename $file` -[update]> $newname"
					else
						echo "`basename $file` older than $newname" >> $TMPFILE
#						echo "`basename $file` older than $newname"
						rm -f $file
						continue
					fi
				else
					echo "`basename $file` ---------> $newname" >> $TMPFILE
#					echo "`basename $file` ---------> $newname"
				fi
				mv $file $newname
				gzip -f $newname
			else
				echo "Cannot find suitable name for $file in Archive-name or $subject" >> $TMPFILE
#				echo "Cannot find suitable name for $file in $subject"
			fi
		else
			echo "File $file not found	---- No articles in $dir ?" >> $TMPFILE
		fi
	done
done

#
# Now groups which we leave the article to be read normally
#
groups="comp.risks comp.os.linux.announce demon.announce comp.windows.x.announce gnu.announce gnu.gcc.announce comp.security.announce demon.archives.announce de.alt.sources.linux.patches"
for dir in $groups
do
	if test ! -d "$DESTDIR/$dir"
	then
		mkdir "$DESTDIR/$dir"
	fi
	cd "$DESTDIR/$dir"
	SRCDIR=$ROOTDIR/`echo $dir | tr -st '.' '/'`
	for file in $SRCDIR/*
	#for file in fails
	do
#		echo "Looking at $file"
		# Find the subject line
		if test -f $file
		then
			subject=`grep ^Subject: "$file" | head -1`
			newname=`grep -i "^archive.name:" "$file" | head -1 | sed 's/:.\/pub\///' | cut -c14- | tr -d '	 ' | sed 's/\.gz$/.gz./'`
#			echo "Subject: $subject; newname: $newname"
			if test "$newname" = ""
			then
				newname=`grep -i "^demon-archive-name:" "$file" | head -1 | cut -c20- | tr -d '	 ' | sed 's/\.gz$/.gz./'`
#				echo "Cannot find Archive-name, trying demon-archive-name" >> $TMPFILE
#				echo "Cannot find Archive-name, trying demon-archive-name"
			fi
			if test "$newname" = ""
			then
				newname=`echo "$subject" | cut -c10- | tr -st ' /<>*()[]'"'"'{}\\\\&!;:' '.................' | sed 's/\.gz$/.gz./' | sed 's/^\.*//'`
#				echo "Cannot find Archive-name, using subject" >> $TMPFILE
#				echo "Cannot find Archive-name, using subject"
#				newname="SUBJECTS/$newname"
				if test "$dir" = "comp.os.linux.announce"
				then
					# split into subdirectories by first letter of subject, if not letter don't bother
					letter=`echo $newname | cut -c1 | tr '[A-Z]' '[a-z]' | grep '[a-z]'`
					if test "$letter" != ""
					then
						newname="$letter/$newname"
					fi
				fi
			fi
			if test "$newname" != ""
			then
				filename=`basename $newname`
#				echo $filename
				names=`echo $newname | sed 's+/+ +g' | sed " s/$filename\$"//`
#				echo $names
				(for path in $names;
				do
					if test ! -d $path;
					then
						mkdir $path
					fi
					cd $path
				done)
				if test -f "$newname.gz"
				then
					if test "$file" -nt "$newname.gz"
					then
						echo "`basename $file` -[update]> $newname" >> $TMPFILE
#						echo "`basename $file` -[update]> $newname"
					else
#						echo "`basename $file` older than $newname" >> $TMPFILE
#						echo "`basename $file` older than $newname"
						continue
					fi
				else
					echo "`basename $file` ---------> $newname" >> $TMPFILE
#					echo "`basename $file` ---------> $newname"
				fi
				cp $file $newname
				gzip -f $newname
			else
				echo "Cannot find suitable name for $file in Archive-name or $subject" >> $TMPFILE
#				echo "Cannot find suitable name for $file in $subject"
			fi
		else
			echo "File $file not found	---- No articles in $dir ?" >> $TMPFILE
		fi
	done
done
#
# Now my personal groups
#
groups="roestock.isapnp roestock.serialmon"
for dir in $groups
do
	if test ! -d "$DESTDIR/$dir"
	then
		mkdir "$DESTDIR/$dir"
	fi
	cd "$DESTDIR/$dir"
	SRCDIR=$ROOTDIR/`echo $dir | tr -st '.' '/'`
	for file in $SRCDIR/*
	do
#		echo "Looking at $file"
		if test -f $file
		then
			newname=`basename $file`
			if test -f "$newname.gz"
			then
				continue
			else
				echo "`basename $file` ---------> $newname" >> $TMPFILE
				cp $file $newname
				gzip -f $newname
			fi
		else
			echo "File $file not found	---- No articles in $dir ?" >> $TMPFILE
		fi
	done
done
sync
echo "Making overview" >> $TMPFILE
#/var/lib/news/bin/mkover -e
/var/lib/news/bin/mkover -e >> $TMPFILE
echo "Checking xrefs (investigate any files named below)" >> $TMPFILE
# Files should have Xref: not xref:
#find $ROOTDIR -name .overview -print | xargs grep 'xref:' >> $TMPFILE
#echo "Checking empty fields (investigate any files named below)" >> $TMPFILE
# Files should not have consecutive tabs (except the refs field) and should have Xref: entry
find $ROOTDIR -name .overview -print | xargs egrep -v '[0-9]+	[^	]+	[^	]+	[^	]+	<[^	]+>	[^	]*	[0-9]+	[0-9]+	Xref:.*' >> $TMPFILE
echo "Done" >> $TMPFILE
mail -s "updatefaqs" fox < $TMPFILE
rm -f $TMPFILE
