0
|
1 #!/bin/sh
|
|
2 #
|
|
3 # pal_finder_wrapper.sh: run pal_finder perl script as a Galaxy tool
|
|
4 #
|
|
5 # Usage: run_palfinder.sh FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY FILTERED_MICROSATS [OPTIONS]
|
|
6 # run_palfinder.sh --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]
|
|
7 #
|
|
8 # Options:
|
|
9 #
|
|
10 # --primer-prefix PREFIX: prefix added to the beginning of all primer names (prPrefixName)
|
|
11 # --2merMinReps N: miniumum number of 2-mer repeat units to detect (0=ignore units of this size)
|
|
12 # --3merMinReps N
|
|
13 # --4merMinReps N
|
|
14 # --5merMinReps N
|
|
15 # --6merMinReps N
|
|
16 # --primer-mispriming-library FASTA: specify a Fasta file with sequences to avoid amplifying
|
|
17 # --primer-opt-size VALUE: optimum primer length
|
|
18 # --primer-min-size VALUE: minimum acceptable primer length
|
|
19 # --primer-max-size VALUE: maximum acceptable primer length
|
|
20 # --primer-min-gc VALUE: minimum allowable percentage of Gs and Cs in any primer
|
|
21 # --primer-max-gc VALUE: maximum allowable percentage of Gs and Cs
|
|
22 # --primer-gc-clamp VALUE: number of consecutive Gs and Cs at 3' end of both left and right primer
|
|
23 # --primer-max-end-gc VALUE: max number of Gs or Cs in last five 3' bases of left or right primer
|
|
24 # --primer-min-tm VALUE: minimum acceptable melting temperature (Celsius) for a primer oligo
|
|
25 # --primer-max-tm VALUE: maximum acceptable melting temperature (Celsius)
|
|
26 # --primer-opt-tm VALUE: optimum melting temperature (Celsius)
|
|
27 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers
|
|
28 # --output_config_file FNAME: write a copy of the config.txt file to FNAME
|
|
29 # --filter_microsats FNAME: run Graeme Fox's Perl script to filter and sort the
|
|
30 # microsatellites from pal_finder and write to FNAME
|
|
31 #
|
|
32 # pal_finder is available from http://sourceforge.net/projects/palfinder/
|
|
33 #
|
|
34 # primer3 is available from http://primer3.sourceforge.net/releases.php
|
|
35 # (nb needs version 2.0.0-alpha)
|
|
36 #
|
|
37 # Explicitly set the locations of the pal_finder script, data files and the primer3
|
|
38 # executable by setting the following variables in the environment:
|
|
39 #
|
|
40 # * PALFINDER_SCRIPT_DIR: location of the pal_finder Perl script (defaults to
|
|
41 # /usr/bin)
|
|
42 # * PALFINDER_DATA_DIR: location of the pal_finder data files (specifically
|
|
43 # config.txt and simple.ref; defaults to /usr/share/pal_finder_v0.02.04)
|
|
44 # * PRIMER3_CORE_EXE: name of the primer3_core program, which should include the
|
|
45 # full path if it's not on the Galaxy user's PATH (defaults to primer3_core)
|
|
46 #
|
|
47 echo $*
|
|
48 #
|
|
49 # Initialise locations of scripts, data and executables
|
|
50 #
|
|
51 # Set these in the environment to overide at execution time
|
|
52 : ${PALFINDER_SCRIPT_DIR:=/usr/bin}
|
|
53 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04}
|
|
54 : ${PRIMER3_CORE_EXE:=primer3_core}
|
|
55 #
|
|
56 # Filter script is in the same directory as this script
|
|
57 PALFINDER_FILTER_PL=$(dirname $0)/pal_finder_filter.pl
|
|
58 if [ ! -f $PALFINDER_FILTER_PL ] ; then
|
|
59 echo No pal_finder_filter.pl script >&2
|
|
60 exit 1
|
|
61 fi
|
|
62 #
|
|
63 # Check that we have all the components
|
|
64 function have_program() {
|
|
65 local program=$1
|
|
66 local got_program=$(which $program 2>&1 | grep "no $(basename $program) in")
|
|
67 if [ -z "$got_program" ] ; then
|
|
68 echo yes
|
|
69 else
|
|
70 echo no
|
|
71 fi
|
|
72 }
|
|
73 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then
|
|
74 echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2
|
|
75 exit 1
|
|
76 fi
|
|
77 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then
|
|
78 echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2
|
|
79 exit 1
|
|
80 fi
|
|
81 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then
|
|
82 echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2
|
|
83 exit 1
|
|
84 fi
|
|
85 #
|
|
86 # Initialise parameters used in the config.txt file
|
|
87 PRIMER_PREFIX="test"
|
|
88 MIN_2_MER_REPS=6
|
|
89 MIN_3_MER_REPS=0
|
|
90 MIN_4_MER_REPS=0
|
|
91 MIN_5_MER_REPS=0
|
|
92 MIN_6_MER_REPS=0
|
|
93 PRIMER_MISPRIMING_LIBRARY=$PALFINDER_DATA_DIR/simple.ref
|
|
94 PRIMER_OPT_SIZE=
|
|
95 PRIMER_MAX_SIZE=
|
|
96 PRIMER_MIN_SIZE=
|
|
97 PRIMER_MAX_GC=
|
|
98 PRIMER_MIN_GC=
|
|
99 PRIMER_GC_CLAMP=
|
|
100 PRIMER_MAX_END_GC=
|
|
101 PRIMER_OPT_TM=
|
|
102 PRIMER_MAX_TM=
|
|
103 PRIMER_MIN_TM=
|
|
104 PRIMER_PAIR_MAX_DIFF_TM=
|
|
105 OUTPUT_CONFIG_FILE=
|
|
106 FILTERED_MICROSATS=
|
|
107 #
|
|
108 # Collect command line arguments
|
|
109 if [ $# -lt 2 ] ; then
|
|
110 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
|
|
111 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]"
|
|
112 exit
|
|
113 fi
|
|
114 if [ "$1" == "--454" ] ; then
|
|
115 PLATFORM="454"
|
|
116 FNA=$2
|
|
117 else
|
|
118 PLATFORM="Illumina"
|
|
119 FASTQ_R1=$1
|
|
120 FASTQ_R2=$2
|
|
121 fi
|
|
122 MICROSAT_SUMMARY=$3
|
|
123 PAL_SUMMARY=$4
|
|
124 shift; shift; shift; shift
|
|
125 #
|
|
126 # Collect command line options
|
|
127 while [ ! -z "$1" ] ; do
|
|
128 case "$1" in
|
|
129 --primer-prefix)
|
|
130 shift
|
|
131 PRIMER_PREFIX=$1
|
|
132 ;;
|
|
133 --2merMinReps)
|
|
134 shift
|
|
135 MIN_2_MER_REPS=$1
|
|
136 ;;
|
|
137 --3merMinReps)
|
|
138 shift
|
|
139 MIN_3_MER_REPS=$1
|
|
140 ;;
|
|
141 --4merMinReps)
|
|
142 shift
|
|
143 MIN_4_MER_REPS=$1
|
|
144 ;;
|
|
145 --5merMinReps)
|
|
146 shift
|
|
147 MIN_5_MER_REPS=$1
|
|
148 ;;
|
|
149 --6merMinReps)
|
|
150 shift
|
|
151 MIN_6_MER_REPS=$1
|
|
152 ;;
|
|
153 --primer-mispriming-library)
|
|
154 shift
|
|
155 PRIMER_MISPRIMING_LIBRARY=$1
|
|
156 ;;
|
|
157 --primer-opt-size)
|
|
158 shift
|
|
159 PRIMER_OPT_SIZE=$1
|
|
160 ;;
|
|
161 --primer-max-size)
|
|
162 shift
|
|
163 PRIMER_MAX_SIZE=$1
|
|
164 ;;
|
|
165 --primer-min-size)
|
|
166 shift
|
|
167 PRIMER_MIN_SIZE=$1
|
|
168 ;;
|
|
169 --primer-max-gc)
|
|
170 shift
|
|
171 PRIMER_MAX_GC=$1
|
|
172 ;;
|
|
173 --primer-min-gc)
|
|
174 shift
|
|
175 PRIMER_MIN_GC=$1
|
|
176 ;;
|
|
177 --primer-gc-clamp)
|
|
178 shift
|
|
179 PRIMER_GC_CLAMP=$1
|
|
180 ;;
|
|
181 --primer-max-end-gc)
|
|
182 shift
|
|
183 PRIMER_MAX_END_GC=$1
|
|
184 ;;
|
|
185 --primer-opt-tm)
|
|
186 shift
|
|
187 PRIMER_OPT_TM=$1
|
|
188 ;;
|
|
189 --primer-max-tm)
|
|
190 shift
|
|
191 PRIMER_MAX_TM=$1
|
|
192 ;;
|
|
193 --primer-min-tm)
|
|
194 shift
|
|
195 PRIMER_MIN_TM=$1
|
|
196 ;;
|
|
197 --primer-pair-max-diff-tm)
|
|
198 shift
|
|
199 PRIMER_PAIR_MAX_DIFF_TM=$1
|
|
200 ;;
|
|
201 --output_config_file)
|
|
202 shift
|
|
203 OUTPUT_CONFIG_FILE=$1
|
|
204 ;;
|
|
205 --filter_microsats)
|
|
206 shift
|
|
207 FILTERED_MICROSATS=$1
|
|
208 ;;
|
|
209 *)
|
|
210 echo Unknown option: $1 >&2
|
|
211 exit 1
|
|
212 ;;
|
|
213 esac
|
|
214 shift
|
|
215 done
|
|
216 #
|
|
217 # Check that primer3_core is available
|
|
218 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"`
|
|
219 if [ -z "$got_primer3" ] ; then
|
|
220 echo ERROR primer3_core not found >&2
|
|
221 exit 1
|
|
222 fi
|
|
223 #
|
|
224 # Set up the working dir
|
|
225 if [ "$PLATFORM" == "Illumina" ] ; then
|
|
226 # Paired end Illumina data as input
|
|
227 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then
|
|
228 echo ERROR R1 and R2 fastqs are the same file >&2
|
|
229 exit 1
|
|
230 fi
|
|
231 ln -s $FASTQ_R1
|
|
232 ln -s $FASTQ_R2
|
|
233 fastq_r1=$(basename $FASTQ_R1)
|
|
234 fastq_r2=$(basename $FASTQ_R2)
|
|
235 else
|
|
236 # 454 data as input
|
|
237 ln -s $FNA
|
|
238 fna=$(basename $FNA)
|
|
239 fi
|
|
240 ln -s $PRIMER_MISPRIMING_LIBRARY
|
|
241 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY)
|
|
242 mkdir Output
|
|
243 #
|
|
244 # Copy in the default config.txt file
|
|
245 /bin/cp $PALFINDER_DATA_DIR/config.txt .
|
|
246 #
|
|
247 # Update the config.txt file with new values
|
|
248 function set_config_value() {
|
|
249 local key=$1
|
|
250 local value=$2
|
|
251 local config_txt=$3
|
|
252 if [ -z "$value" ] ; then
|
|
253 echo "No value for $key, left as default"
|
|
254 else
|
|
255 echo Setting "$key" to "$value"
|
|
256 sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt
|
|
257 fi
|
|
258 }
|
|
259 # Input files
|
|
260 set_config_value platform $PLATFORM config.txt
|
|
261 if [ "$PLATFORM" == "Illumina" ] ; then
|
|
262 set_config_value inputFormat fastq config.txt
|
|
263 set_config_value pairedEnd 1 config.txt
|
|
264 set_config_value inputReadFile $fastq_r1 config.txt
|
|
265 set_config_value pairedReadFile $fastq_r2 config.txt
|
|
266 else
|
|
267 set_config_value inputFormat fasta config.txt
|
|
268 set_config_value pairedEnd 0 config.txt
|
|
269 set_config_value input454reads $fna config.txt
|
|
270 fi
|
|
271 # Output files
|
|
272 set_config_value MicrosatSumOut Output/microsat_summary.txt config.txt
|
|
273 set_config_value PALsummaryOut Output/PAL_summary.txt config.txt
|
|
274 # Microsat info
|
|
275 set_config_value 2merMinReps $MIN_2_MER_REPS config.txt
|
|
276 set_config_value 3merMinReps $MIN_3_MER_REPS config.txt
|
|
277 set_config_value 4merMinReps $MIN_4_MER_REPS config.txt
|
|
278 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt
|
|
279 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt
|
|
280 # Primer3 settings
|
|
281 set_config_value primer3input Output/pr3in.txt config.txt
|
|
282 set_config_value primer3output Output/pr3out.txt config.txt
|
|
283 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt
|
|
284 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt
|
|
285 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt
|
|
286 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt
|
|
287 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt
|
|
288 set_config_value PRIMER_MAX_SIZE "$PRIMER_MAX_SIZE" config.txt
|
|
289 set_config_value PRIMER_MIN_GC "$PRIMER_MIN_GC" config.txt
|
|
290 set_config_value PRIMER_MAX_GC "$PRIMER_MAX_GC" config.txt
|
|
291 set_config_value PRIMER_GC_CLAMP "$PRIMER_GC_CLAMP" config.txt
|
|
292 set_config_value PRIMER_MAX_END_GC "$PRIMER_MAX_END_GC" config.txt
|
|
293 set_config_value PRIMER_MIN_TM "$PRIMER_MIN_TM" config.txt
|
|
294 set_config_value PRIMER_MAX_TM "$PRIMER_MAX_TM" config.txt
|
|
295 set_config_value PRIMER_OPT_TM "$PRIMER_OPT_TM" config.txt
|
|
296 set_config_value PRIMER_PAIR_MAX_DIFF_TM "$PRIMER_PAIR_MAX_DIFF_TM" config.txt
|
|
297 #
|
|
298 # Run pal_finder
|
|
299 perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 2>&1 | tee pal_finder.log
|
|
300 #
|
|
301 # Check that log ends with "Done!!" message
|
|
302 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then
|
|
303 echo ERROR pal_finder failed to complete successfully >&2
|
|
304 exit 1
|
|
305 fi
|
|
306 #
|
|
307 # Run the pal_finder_filter.pl script from Graeme Fox
|
|
308 if [ ! -z "$FILTERED_MICROSATS" ] ; then
|
|
309 perl $PALFINDER_FILTER_PL Output/PAL_summary.txt 2>&1
|
|
310 if [ $? -ne 0 ] ; then
|
|
311 echo ERROR pal_finder_filter.pl exited with non-zero status >&2
|
|
312 exit 1
|
|
313 elif [ ! -f pal_finder_filter_output.txt ] ; then
|
|
314 echo ERROR no output from pal_finder_filter.pl >&2
|
|
315 exit 1
|
|
316 fi
|
|
317 fi
|
|
318 #
|
|
319 # Clean up
|
|
320 if [ -f Output/microsat_summary.txt ] ; then
|
|
321 /bin/mv Output/microsat_summary.txt $MICROSAT_SUMMARY
|
|
322 fi
|
|
323 if [ -f Output/PAL_summary.txt ] ; then
|
|
324 /bin/mv Output/PAL_summary.txt $PAL_SUMMARY
|
|
325 fi
|
|
326 if [ ! -z "$FILTERED_MICROSATS" ] && [ -f pal_finder_filter_output.txt ] ; then
|
|
327 echo Moving pal_finder_filter_output.txt to $FILTERED_MICROSATS
|
|
328 /bin/mv pal_finder_filter_output.txt $FILTERED_MICROSATS
|
|
329 fi
|
|
330 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then
|
|
331 /bin/mv config.txt $OUTPUT_CONFIG_FILE
|
|
332 fi
|
|
333 ##
|
|
334 #
|