diff pal_finder_wrapper_utils.sh @ 8:4e625d3672ba draft

Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
author pjbriggs
date Wed, 16 May 2018 07:39:16 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pal_finder_wrapper_utils.sh	Wed May 16 07:39:16 2018 -0400
@@ -0,0 +1,75 @@
+#!/bin/bash
+#
+# Helper functions for the pal_finder_wrapper.sh script
+#
+# Utility function for terminating on fatal error
+function fatal() {
+    echo "FATAL $@" >&2
+    exit 1
+}
+#
+# Check that specified program is available
+function have_program() {
+    local program=$1
+    local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
+    if [ -z "$got_program" ] ; then
+	echo yes
+    else
+	echo no
+    fi
+}
+#
+# Set the value for a parameter in the pal_finder config file
+function set_config_value() {
+    local key=$1
+    local value=$2
+    local config_txt=$3
+    if [ -z "$value" ] ; then
+       echo "No value for $key, left as default"
+    else
+       echo Setting "$key" to "$value"
+       sed -i 's,^'"$key"' .*,'"$key"'  '"$value"',' $config_txt
+    fi
+}
+#
+# Identify 'bad' PRIMER_PRODUCT_SIZE_RANGE from pr3in.txt file
+function find_bad_primer_ranges() {
+    # Parses a pr3in.txt file from pal_finder and reports
+    # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has
+    # upper limit which is smaller than lower limit
+    local pr3in=$1
+    local outfile=$2
+    local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)"
+    for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/')
+    do
+	# Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE
+	# keywords in the primer3 input
+	if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then
+	    # Lines look like:
+	    # SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535*PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194
+	    local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ')
+	    local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2)
+	elif [ ! -z "$(echo $line | grep ^PRIMER_PRODUCT_SIZE_RANGE)" ] ; then
+	    # Lines look like:
+	    # PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194*SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535
+	    local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2 | tr '^' ' ')
+	    local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2)
+	fi
+	seq_id=$(echo $seq_id | cut -d')' -f3)
+	# Check the upper and lower limits in each range
+	# to see if it's okay
+	local bad_range=
+	for range in $(echo $size_range) ; do
+	    local lower=$(echo $range | cut -d'-' -f1)
+	    local upper=$(echo $range | cut -d'-' -f2)
+	    if [ "$lower" -gt "$upper" ] ; then
+		bad_range=yes
+		break
+	    fi
+	done
+	# Report if the range is wrong
+	if [ ! -z "$bad_range" ] ; then
+	    echo "${seq_id}"$'\t'"(${size_range})" >>$outfile
+	fi
+    done
+}