# HG changeset patch # User pjbriggs # Date 1496753689 14400 # Node ID a73c48890bde043b29b9c875add59cebf23d826c # Parent 8159dab5dbdbd3c41909f18879c7d07260c81eba Version v0.02.04.5: handle large output files diff -r 8159dab5dbdb -r a73c48890bde README.rst --- a/README.rst Tue Apr 12 05:53:41 2016 -0400 +++ b/README.rst Tue Jun 06 08:54:49 2017 -0400 @@ -60,6 +60,11 @@ ========== ====================================================================== Version Changes ---------- ---------------------------------------------------------------------- + +0.02.04.5 - Update to handle large output files which can sometimes be generated + by the ``pal_finder_v0.02.04.pl`` or ``pal_filter.py`` scripts (logs + of hundreds of Gb's have been observed in production): log files + longer than 500 lines are now truncated to avoid downstream problems. 0.02.04.4 - Update to the filter script (``pal_filter.py``) which removes some columns from the output assembly file. 0.02.04.3 - Update to the Illumina filtering script from Graeme Fox (including diff -r 8159dab5dbdb -r a73c48890bde pal_finder_wrapper.sh --- a/pal_finder_wrapper.sh Tue Apr 12 05:53:41 2016 -0400 +++ b/pal_finder_wrapper.sh Tue Jun 06 08:54:49 2017 -0400 @@ -50,6 +50,9 @@ echo "### $(basename $0) ###" echo $* # +# Maximum size reporting log file contents +MAX_LINES=500 +# # Initialise locations of scripts, data and executables # # Set these in the environment to overide at execution time @@ -313,8 +316,16 @@ # # Run pal_finder echo "### Running pal_finder ###" -perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 2>&1 | tee pal_finder.log -echo "### pal_finder finised ###" +perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 1>pal_finder.log 2>&1 +echo "### pal_finder finished ###" +# +# Handlers the pal_finder log file +echo "### Output from pal_finder ###" +if [ $(wc -l pal_finder.log | cut -d" " -f1) -gt $MAX_LINES ] ; then + echo WARNING output too long, truncated to last $MAX_LINES lines: + echo ... +fi +tail -$MAX_LINES pal_finder.log # # Check that log ends with "Done!!" message if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then @@ -335,7 +346,13 @@ # Run the filtering & assembly script if [ ! -z "$FILTERED_MICROSATS" ] || [ ! -z "$OUTPUT_ASSEMBLY" ] ; then echo "### Running filtering & assembly script ###" - python $PALFINDER_FILTER -i $fastq_r1 -j $fastq_r2 -p Output/PAL_summary.txt $FILTER_OPTIONS 2>&1 + python $PALFINDER_FILTER -i $fastq_r1 -j $fastq_r2 -p Output/PAL_summary.txt $FILTER_OPTIONS 1>pal_filter.log 2>&1 + echo "### Output from pal_filter ###" + if [ $(wc -l pal_filter.log | cut -d" " -f1) -gt $MAX_LINES ] ; then + echo WARNING output too long, truncated to last $MAX_LINES lines: + echo ... + fi + tail -$MAX_LINES pal_filter.log if [ $? -ne 0 ] ; then echo ERROR $PALFINDER_FILTER exited with non-zero status >&2 exit 1 @@ -368,5 +385,7 @@ if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then /bin/mv config.txt $OUTPUT_CONFIG_FILE fi +# +echo "### Pal_finder tool completed ###" ## # diff -r 8159dab5dbdb -r a73c48890bde pal_finder_wrapper.xml --- a/pal_finder_wrapper.xml Tue Apr 12 05:53:41 2016 -0400 +++ b/pal_finder_wrapper.xml Tue Jun 06 08:54:49 2017 -0400 @@ -1,4 +1,4 @@ - + Find microsatellite repeat elements from sequencing reads and design PCR primers to amplify them perl