#! /bin/sh # check-spool -- find orphaned news articles (those without history entries) # Author: tale@isc.org (David Lawrence) # This script works by making two lists in TMPDIR. # First the articles (and other random files, like core files) are # listed in SPOOL.LIST. # Then the files that the history file knows about are listed in HIST.LIST. # Then the two lists are compared, with SPOOL.LIST files that are not in # HIST.LIST saved to ORPHAHS.LIST and also listed on the standard output. # HIST.LIST and COMM.LIST are removed, but ORPHANS.LIST is left behind # for further use. # # Note that because no locking is done, and the system can keep receiving # and expiring news the whole time the script is running, it is possible # that the output will include files that existed when SPOOL.LIST was # made but no longer existed when HIST.LIST was made because an intervening # expire ran. Since most people run a script like this to look for stray # files to remove, these false positives aren't of any consequence. # # Because the find is done before the history scan, there should never be # articles reported which are really valid, having arrived while the script # was running. ### INN set up. ## =()<. @<_PATH_SHELLVARS>@>()= . /var/news/innshellvars ### Uncomment for C News set up (and comment the innshellvars line above) ## =()<#NEWSCONFIG=${NEWSCONFIG-@@}>()= #NEWSCONFIG=${NEWSCONFIG-/var/news/bin/config} #SPOOL=$NEWSARTS #TMPDIR=${TMPDIR-/var/tmp} cd $SPOOL # List files of all subdirectories of the spool # EXCEPT any top level directory that has a dot in its name, or lost+found. # In the output, ignore any .overview files. (Ideally you should have # .overview files some place other than the article tree.) find `ls | egrep -v '\.|lost+found'` -type f -print | egrep -v '\.overview' > $TMPDIR/SPOOL.LIST cd $TMPDIR sort -o SPOOL.LIST SPOOL.LIST # Search the history file to see which articles it knows about. # Gawk is using a field separator of a hard tab. # The tr command is translating dot to slash and space to newline. gawk -F' ' '$3 != "" {print $3}' /news/lib/history | tr '. ' '/ ' | sort > HIST.LIST comm -13 HIST.LIST SPOOL.LIST | tee ORPHANS.LIST rm -f HIST.LIST SPOOL.LIST exit 0