Mercurial > repos > pjbriggs > pal_finder
annotate pal_finder_wrapper.sh @ 4:cb56cc1d5c39 draft
Updates to the palfilter.py utility.
author | pjbriggs |
---|---|
date | Mon, 21 Mar 2016 06:52:43 -0400 |
parents | e1a14ed7a9d6 |
children | a73c48890bde |
rev | line source |
---|---|
0 | 1 #!/bin/sh |
2 # | |
3 # pal_finder_wrapper.sh: run pal_finder perl script as a Galaxy tool | |
4 # | |
5 # Usage: run_palfinder.sh FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY FILTERED_MICROSATS [OPTIONS] | |
6 # run_palfinder.sh --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS] | |
7 # | |
8 # Options: | |
9 # | |
10 # --primer-prefix PREFIX: prefix added to the beginning of all primer names (prPrefixName) | |
11 # --2merMinReps N: miniumum number of 2-mer repeat units to detect (0=ignore units of this size) | |
12 # --3merMinReps N | |
13 # --4merMinReps N | |
14 # --5merMinReps N | |
15 # --6merMinReps N | |
16 # --primer-mispriming-library FASTA: specify a Fasta file with sequences to avoid amplifying | |
17 # --primer-opt-size VALUE: optimum primer length | |
18 # --primer-min-size VALUE: minimum acceptable primer length | |
19 # --primer-max-size VALUE: maximum acceptable primer length | |
20 # --primer-min-gc VALUE: minimum allowable percentage of Gs and Cs in any primer | |
21 # --primer-max-gc VALUE: maximum allowable percentage of Gs and Cs | |
22 # --primer-gc-clamp VALUE: number of consecutive Gs and Cs at 3' end of both left and right primer | |
23 # --primer-max-end-gc VALUE: max number of Gs or Cs in last five 3' bases of left or right primer | |
24 # --primer-min-tm VALUE: minimum acceptable melting temperature (Celsius) for a primer oligo | |
25 # --primer-max-tm VALUE: maximum acceptable melting temperature (Celsius) | |
26 # --primer-opt-tm VALUE: optimum melting temperature (Celsius) | |
27 # --primer-pair-max-diff-tm VALUE: max difference between melting temps of left & right primers | |
28 # --output_config_file FNAME: write a copy of the config.txt file to FNAME | |
2 | 29 # --filter_microsats FNAME: write output of filter options FNAME |
30 # -assembly FNAME: run the 'assembly' filter option and write to FNAME | |
31 # -primers: run the 'primers' filter option | |
32 # -occurrences: run the 'occurrences' filter option | |
33 # -rankmotifs: run the 'rankmotifs' filter option | |
0 | 34 # |
35 # pal_finder is available from http://sourceforge.net/projects/palfinder/ | |
36 # | |
37 # primer3 is available from http://primer3.sourceforge.net/releases.php | |
38 # (nb needs version 2.0.0-alpha) | |
39 # | |
40 # Explicitly set the locations of the pal_finder script, data files and the primer3 | |
41 # executable by setting the following variables in the environment: | |
42 # | |
43 # * PALFINDER_SCRIPT_DIR: location of the pal_finder Perl script (defaults to | |
44 # /usr/bin) | |
45 # * PALFINDER_DATA_DIR: location of the pal_finder data files (specifically | |
46 # config.txt and simple.ref; defaults to /usr/share/pal_finder_v0.02.04) | |
47 # * PRIMER3_CORE_EXE: name of the primer3_core program, which should include the | |
48 # full path if it's not on the Galaxy user's PATH (defaults to primer3_core) | |
49 # | |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
50 echo "### $(basename $0) ###" |
0 | 51 echo $* |
52 # | |
53 # Initialise locations of scripts, data and executables | |
54 # | |
55 # Set these in the environment to overide at execution time | |
56 : ${PALFINDER_SCRIPT_DIR:=/usr/bin} | |
57 : ${PALFINDER_DATA_DIR:=/usr/share/pal_finder_v0.02.04} | |
58 : ${PRIMER3_CORE_EXE:=primer3_core} | |
59 # | |
60 # Filter script is in the same directory as this script | |
3
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
61 PALFINDER_FILTER=$(dirname $0)/pal_filter.py |
2 | 62 if [ ! -f $PALFINDER_FILTER ] ; then |
63 echo No $PALFINDER_FILTER script >&2 | |
0 | 64 exit 1 |
65 fi | |
66 # | |
67 # Check that we have all the components | |
68 function have_program() { | |
69 local program=$1 | |
70 local got_program=$(which $program 2>&1 | grep "no $(basename $program) in") | |
71 if [ -z "$got_program" ] ; then | |
72 echo yes | |
73 else | |
74 echo no | |
75 fi | |
76 } | |
77 if [ "$(have_program $PRIMER3_CORE_EXE)" == "no" ] ; then | |
78 echo "ERROR primer3_core missing: ${PRIMER3_CORE_EXE} not found" >&2 | |
79 exit 1 | |
80 fi | |
81 if [ ! -f "${PALFINDER_DATA_DIR}/config.txt" ] ; then | |
82 echo "ERROR pal_finder config.txt not found in ${PALFINDER_DATA_DIR}" >&2 | |
83 exit 1 | |
84 fi | |
85 if [ ! -f "${PALFINDER_SCRIPT_DIR}/pal_finder_v0.02.04.pl" ] ; then | |
86 echo "ERROR pal_finder_v0.02.04.pl not found in ${PALFINDER_SCRIPT_DIR}" >&2 | |
87 exit 1 | |
88 fi | |
89 # | |
90 # Initialise parameters used in the config.txt file | |
91 PRIMER_PREFIX="test" | |
92 MIN_2_MER_REPS=6 | |
93 MIN_3_MER_REPS=0 | |
94 MIN_4_MER_REPS=0 | |
95 MIN_5_MER_REPS=0 | |
96 MIN_6_MER_REPS=0 | |
97 PRIMER_MISPRIMING_LIBRARY=$PALFINDER_DATA_DIR/simple.ref | |
98 PRIMER_OPT_SIZE= | |
99 PRIMER_MAX_SIZE= | |
100 PRIMER_MIN_SIZE= | |
101 PRIMER_MAX_GC= | |
102 PRIMER_MIN_GC= | |
103 PRIMER_GC_CLAMP= | |
104 PRIMER_MAX_END_GC= | |
105 PRIMER_OPT_TM= | |
106 PRIMER_MAX_TM= | |
107 PRIMER_MIN_TM= | |
108 PRIMER_PAIR_MAX_DIFF_TM= | |
109 OUTPUT_CONFIG_FILE= | |
2 | 110 OUTPUT_ASSEMBLY= |
0 | 111 FILTERED_MICROSATS= |
2 | 112 FILTER_OPTIONS= |
0 | 113 # |
114 # Collect command line arguments | |
115 if [ $# -lt 2 ] ; then | |
116 echo "Usage: $0 FASTQ_R1 FASTQ_R2 MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | |
117 echo " $0 --454 FASTA MICROSAT_SUMMARY PAL_SUMMARY [OPTIONS]" | |
3
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
118 exits |
0 | 119 fi |
120 if [ "$1" == "--454" ] ; then | |
121 PLATFORM="454" | |
122 FNA=$2 | |
123 else | |
124 PLATFORM="Illumina" | |
125 FASTQ_R1=$1 | |
126 FASTQ_R2=$2 | |
127 fi | |
128 MICROSAT_SUMMARY=$3 | |
129 PAL_SUMMARY=$4 | |
130 shift; shift; shift; shift | |
131 # | |
132 # Collect command line options | |
133 while [ ! -z "$1" ] ; do | |
134 case "$1" in | |
135 --primer-prefix) | |
136 shift | |
2 | 137 # Convert all non-alphanumeric characters to underscores in prefix |
138 PRIMER_PREFIX=$(echo -n $1 | tr -s -c "[:alnum:]" "_") | |
0 | 139 ;; |
140 --2merMinReps) | |
141 shift | |
142 MIN_2_MER_REPS=$1 | |
143 ;; | |
144 --3merMinReps) | |
145 shift | |
146 MIN_3_MER_REPS=$1 | |
147 ;; | |
148 --4merMinReps) | |
149 shift | |
150 MIN_4_MER_REPS=$1 | |
151 ;; | |
152 --5merMinReps) | |
153 shift | |
154 MIN_5_MER_REPS=$1 | |
155 ;; | |
156 --6merMinReps) | |
157 shift | |
158 MIN_6_MER_REPS=$1 | |
159 ;; | |
160 --primer-mispriming-library) | |
161 shift | |
162 PRIMER_MISPRIMING_LIBRARY=$1 | |
163 ;; | |
164 --primer-opt-size) | |
165 shift | |
166 PRIMER_OPT_SIZE=$1 | |
167 ;; | |
168 --primer-max-size) | |
169 shift | |
170 PRIMER_MAX_SIZE=$1 | |
171 ;; | |
172 --primer-min-size) | |
173 shift | |
174 PRIMER_MIN_SIZE=$1 | |
175 ;; | |
176 --primer-max-gc) | |
177 shift | |
178 PRIMER_MAX_GC=$1 | |
179 ;; | |
180 --primer-min-gc) | |
181 shift | |
182 PRIMER_MIN_GC=$1 | |
183 ;; | |
184 --primer-gc-clamp) | |
185 shift | |
186 PRIMER_GC_CLAMP=$1 | |
187 ;; | |
188 --primer-max-end-gc) | |
189 shift | |
190 PRIMER_MAX_END_GC=$1 | |
191 ;; | |
192 --primer-opt-tm) | |
193 shift | |
194 PRIMER_OPT_TM=$1 | |
195 ;; | |
196 --primer-max-tm) | |
197 shift | |
198 PRIMER_MAX_TM=$1 | |
199 ;; | |
200 --primer-min-tm) | |
201 shift | |
202 PRIMER_MIN_TM=$1 | |
203 ;; | |
204 --primer-pair-max-diff-tm) | |
205 shift | |
206 PRIMER_PAIR_MAX_DIFF_TM=$1 | |
207 ;; | |
208 --output_config_file) | |
209 shift | |
210 OUTPUT_CONFIG_FILE=$1 | |
211 ;; | |
212 --filter_microsats) | |
213 shift | |
214 FILTERED_MICROSATS=$1 | |
215 ;; | |
2 | 216 -primers|-occurrences|-rankmotifs) |
217 FILTER_OPTIONS="$FILTER_OPTIONS $1" | |
218 ;; | |
219 -assembly) | |
220 FILTER_OPTIONS="$FILTER_OPTIONS $1" | |
221 shift | |
222 OUTPUT_ASSEMBLY=$1 | |
223 ;; | |
0 | 224 *) |
225 echo Unknown option: $1 >&2 | |
226 exit 1 | |
227 ;; | |
228 esac | |
229 shift | |
230 done | |
231 # | |
232 # Check that primer3_core is available | |
233 got_primer3=`which $PRIMER3_CORE_EXE 2>&1 | grep -v "no primer3_core in"` | |
234 if [ -z "$got_primer3" ] ; then | |
235 echo ERROR primer3_core not found >&2 | |
236 exit 1 | |
237 fi | |
238 # | |
239 # Set up the working dir | |
240 if [ "$PLATFORM" == "Illumina" ] ; then | |
241 # Paired end Illumina data as input | |
242 if [ $FASTQ_R1 == $FASTQ_R2 ] ; then | |
243 echo ERROR R1 and R2 fastqs are the same file >&2 | |
244 exit 1 | |
245 fi | |
246 ln -s $FASTQ_R1 | |
247 ln -s $FASTQ_R2 | |
248 fastq_r1=$(basename $FASTQ_R1) | |
249 fastq_r2=$(basename $FASTQ_R2) | |
250 else | |
251 # 454 data as input | |
252 ln -s $FNA | |
253 fna=$(basename $FNA) | |
254 fi | |
255 ln -s $PRIMER_MISPRIMING_LIBRARY | |
256 PRIMER_MISPRIMING_LIBRARY=$(basename $PRIMER_MISPRIMING_LIBRARY) | |
257 mkdir Output | |
258 # | |
259 # Copy in the default config.txt file | |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
260 echo "### Creating config.txt file for pal_finder run ###" |
0 | 261 /bin/cp $PALFINDER_DATA_DIR/config.txt . |
262 # | |
263 # Update the config.txt file with new values | |
264 function set_config_value() { | |
265 local key=$1 | |
266 local value=$2 | |
267 local config_txt=$3 | |
268 if [ -z "$value" ] ; then | |
269 echo "No value for $key, left as default" | |
270 else | |
271 echo Setting "$key" to "$value" | |
272 sed -i 's,^'"$key"' .*,'"$key"' '"$value"',' $config_txt | |
273 fi | |
274 } | |
275 # Input files | |
276 set_config_value platform $PLATFORM config.txt | |
277 if [ "$PLATFORM" == "Illumina" ] ; then | |
278 set_config_value inputFormat fastq config.txt | |
279 set_config_value pairedEnd 1 config.txt | |
280 set_config_value inputReadFile $fastq_r1 config.txt | |
281 set_config_value pairedReadFile $fastq_r2 config.txt | |
282 else | |
283 set_config_value inputFormat fasta config.txt | |
284 set_config_value pairedEnd 0 config.txt | |
285 set_config_value input454reads $fna config.txt | |
286 fi | |
287 # Output files | |
288 set_config_value MicrosatSumOut Output/microsat_summary.txt config.txt | |
289 set_config_value PALsummaryOut Output/PAL_summary.txt config.txt | |
290 # Microsat info | |
291 set_config_value 2merMinReps $MIN_2_MER_REPS config.txt | |
292 set_config_value 3merMinReps $MIN_3_MER_REPS config.txt | |
293 set_config_value 4merMinReps $MIN_4_MER_REPS config.txt | |
294 set_config_value 5merMinReps $MIN_5_MER_REPS config.txt | |
295 set_config_value 6merMinReps $MIN_6_MER_REPS config.txt | |
296 # Primer3 settings | |
297 set_config_value primer3input Output/pr3in.txt config.txt | |
298 set_config_value primer3output Output/pr3out.txt config.txt | |
299 set_config_value primer3executable $PRIMER3_CORE_EXE config.txt | |
300 set_config_value prNamePrefix ${PRIMER_PREFIX}_ config.txt | |
301 set_config_value PRIMER_MISPRIMING_LIBRARY "$PRIMER_MISPRIMING_LIBRARY" config.txt | |
302 set_config_value PRIMER_OPT_SIZE "$PRIMER_OPT_SIZE" config.txt | |
303 set_config_value PRIMER_MIN_SIZE "$PRIMER_MIN_SIZE" config.txt | |
304 set_config_value PRIMER_MAX_SIZE "$PRIMER_MAX_SIZE" config.txt | |
305 set_config_value PRIMER_MIN_GC "$PRIMER_MIN_GC" config.txt | |
306 set_config_value PRIMER_MAX_GC "$PRIMER_MAX_GC" config.txt | |
307 set_config_value PRIMER_GC_CLAMP "$PRIMER_GC_CLAMP" config.txt | |
308 set_config_value PRIMER_MAX_END_GC "$PRIMER_MAX_END_GC" config.txt | |
309 set_config_value PRIMER_MIN_TM "$PRIMER_MIN_TM" config.txt | |
310 set_config_value PRIMER_MAX_TM "$PRIMER_MAX_TM" config.txt | |
311 set_config_value PRIMER_OPT_TM "$PRIMER_OPT_TM" config.txt | |
312 set_config_value PRIMER_PAIR_MAX_DIFF_TM "$PRIMER_PAIR_MAX_DIFF_TM" config.txt | |
313 # | |
314 # Run pal_finder | |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
315 echo "### Running pal_finder ###" |
0 | 316 perl $PALFINDER_SCRIPT_DIR/pal_finder_v0.02.04.pl config.txt 2>&1 | tee pal_finder.log |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
317 echo "### pal_finder finised ###" |
0 | 318 # |
319 # Check that log ends with "Done!!" message | |
320 if [ -z "$(tail -n 1 pal_finder.log | grep Done!!)" ] ; then | |
321 echo ERROR pal_finder failed to complete successfully >&2 | |
322 exit 1 | |
323 fi | |
324 # | |
2 | 325 # Sort outputs into a consistent order regardless of Perl version |
326 echo "### Sorting outputs ###" | |
327 head -1 Output/PAL_summary.txt > Output/PAL_summary.sorted.txt | |
328 if [ "$PLATFORM" == "Illumina" ] ; then | |
329 grep -v "^readPairID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt | |
330 else | |
331 grep -v "^SequenceID" Output/PAL_summary.txt | sort -k 1 >> Output/PAL_summary.sorted.txt | |
332 fi | |
333 mv Output/PAL_summary.sorted.txt Output/PAL_summary.txt | |
334 # | |
335 # Run the filtering & assembly script | |
336 if [ ! -z "$FILTERED_MICROSATS" ] || [ ! -z "$OUTPUT_ASSEMBLY" ] ; then | |
337 echo "### Running filtering & assembly script ###" | |
338 python $PALFINDER_FILTER -i $fastq_r1 -j $fastq_r2 -p Output/PAL_summary.txt $FILTER_OPTIONS 2>&1 | |
0 | 339 if [ $? -ne 0 ] ; then |
2 | 340 echo ERROR $PALFINDER_FILTER exited with non-zero status >&2 |
0 | 341 exit 1 |
2 | 342 elif [ ! -f PAL_summary.filtered ] ; then |
343 echo ERROR no output from $PALFINDER_FILTER >&2 | |
0 | 344 exit 1 |
345 fi | |
346 fi | |
347 # | |
348 # Clean up | |
1
771ebe02636f
Uploaded version 0.02.04.2: fix bug that causes tool to fail when prefix includes spaces; add explicit dependency on Perl 5.16.3.
pjbriggs
parents:
0
diff
changeset
|
349 echo "### Handling output files ###" |
0 | 350 if [ -f Output/microsat_summary.txt ] ; then |
351 /bin/mv Output/microsat_summary.txt $MICROSAT_SUMMARY | |
352 fi | |
353 if [ -f Output/PAL_summary.txt ] ; then | |
354 /bin/mv Output/PAL_summary.txt $PAL_SUMMARY | |
355 fi | |
2 | 356 if [ ! -z "$FILTERED_MICROSATS" ] && [ -f PAL_summary.filtered ] ; then |
357 /bin/mv PAL_summary.filtered $FILTERED_MICROSATS | |
358 fi | |
359 if [ ! -z "$OUTPUT_ASSEMBLY" ] ; then | |
3
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
360 assembly=${fastq_r1%.*}_pal_filter_assembly_output.txt |
2 | 361 if [ -f "$assembly" ] ; then |
362 /bin/mv $assembly "$OUTPUT_ASSEMBLY" | |
3
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
363 else |
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
364 echo ERROR no assembly output found >&2 |
e1a14ed7a9d6
Updated to version 0.02.04.4 (new pal_filter script)
pjbriggs
parents:
2
diff
changeset
|
365 exit 1 |
2 | 366 fi |
0 | 367 fi |
368 if [ ! -z "$OUTPUT_CONFIG_FILE" ] && [ -f config.txt ] ; then | |
369 /bin/mv config.txt $OUTPUT_CONFIG_FILE | |
370 fi | |
371 ## | |
372 # |