annotate pal_finder_wrapper_utils.sh @ 9:52dbe2089d14 draft default tip

Version 0.02.04.8 (update fastq subsetting).
author pjbriggs
date Wed, 04 Jul 2018 06:05:52 -0400
parents 4e625d3672ba
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
8
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
1 #!/bin/bash
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
2 #
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
3 # Helper functions for the pal_finder_wrapper.sh script
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
4 #
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
5 # Utility function for terminating on fatal error
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
6 function fatal() {
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
7 echo "FATAL $@" >&2
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
8 exit 1
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
9 }
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
10 #
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
11 # Check that specified program is available
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
12 function have_program() {
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
13 local program=$1
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
14 local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
15 if [ -z "$got_program" ] ; then
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
16 echo yes
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
17 else
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
18 echo no
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
19 fi
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
20 }
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
21 #
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
22 # Set the value for a parameter in the pal_finder config file
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
23 function set_config_value() {
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
24 local key=$1
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
25 local value=$2
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
26 local config_txt=$3
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
27 if [ -z "$value" ] ; then
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
28 echo "No value for $key, left as default"
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
29 else
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
30 echo Setting "$key" to "$value"
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
31 sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
32 fi
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
33 }
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
34 #
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
35 # Identify 'bad' PRIMER_PRODUCT_SIZE_RANGE from pr3in.txt file
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
36 function find_bad_primer_ranges() {
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
37 # Parses a pr3in.txt file from pal_finder and reports
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
38 # sequence ids where the PRIMER_PRODUCT_SIZE_RANGE has
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
39 # upper limit which is smaller than lower limit
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
40 local pr3in=$1
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
41 local outfile=$2
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
42 local pattern="^(SEQUENCE_ID|PRIMER_PRODUCT_SIZE_RANGE)"
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
43 for line in $(grep -E "$pattern" $pr3in | sed 's/ /^/' | sed 'N;s/\n/*/')
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
44 do
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
45 # Loop over pairs of SEQUENCE_ID and PRIMER_PRODUCT_SIZE_RANGE
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
46 # keywords in the primer3 input
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
47 if [ ! -z "$(echo $line | grep ^SEQUENCE_ID)" ] ; then
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
48 # Lines look like:
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
49 # SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535*PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
50 local size_range=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2 | tr '^' ' ')
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
51 local seq_id=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2)
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
52 elif [ ! -z "$(echo $line | grep ^PRIMER_PRODUCT_SIZE_RANGE)" ] ; then
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
53 # Lines look like:
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
54 # PRIMER_PRODUCT_SIZE_RANGE=194-329^59-194*SEQUENCE_ID=(AT_1_16)(AT_1_16)M00879:99:000000000-AH9KG:1:2107:10006:2535
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
55 local size_range=$(echo $line | cut -d'*' -f1 | cut -d'=' -f2 | tr '^' ' ')
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
56 local seq_id=$(echo $line | cut -d'*' -f2 | cut -d'=' -f2)
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
57 fi
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
58 seq_id=$(echo $seq_id | cut -d')' -f3)
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
59 # Check the upper and lower limits in each range
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
60 # to see if it's okay
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
61 local bad_range=
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
62 for range in $(echo $size_range) ; do
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
63 local lower=$(echo $range | cut -d'-' -f1)
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
64 local upper=$(echo $range | cut -d'-' -f2)
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
65 if [ "$lower" -gt "$upper" ] ; then
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
66 bad_range=yes
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
67 break
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
68 fi
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
69 done
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
70 # Report if the range is wrong
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
71 if [ ! -z "$bad_range" ] ; then
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
72 echo "${seq_id}"$'\t'"(${size_range})" >>$outfile
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
73 fi
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
74 done
4e625d3672ba Pal_finder tool version 0.02.04.7: add detection/reporting of bad ranges; enable subset of reads to be used; check n-mers.
pjbriggs
parents:
diff changeset
75 }