Mercurial > repos > rnateam > rnacode
comparison processMAF.sh @ 2:434332033e82 draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rnacode commit b6d3800e260cdfcbe99e5f056344c3df101dad00
author | rnateam |
---|---|
date | Fri, 13 Apr 2018 07:40:08 -0400 |
parents | |
children | d49b9759e294 |
comparison
equal
deleted
inserted
replaced
1:7a84c6c1c4e0 | 2:434332033e82 |
---|---|
1 #!/usr/bin/env bash | |
2 # RNAcode sometimes fails because of bugs. Since the manual suggests | |
3 # to call RNAcode on splitted alignments it is feasible to run | |
4 # RNAcode separately on the parts. This is implemented here. Command | |
5 # line parameters just passed on to RNAcode. | |
6 # | |
7 # - the script ensures that the region ids are continuous (otherwise | |
8 # the results for each block would start with 0) | |
9 # - also eps file names are corrected accordingly | |
10 # if RNAcode fails for one part it just outputs the part (for bug reporting) | |
11 # and continues | |
12 | |
13 # for splitting the alignment you can use breakMAF.pl from the RNAcode | |
14 # github (it seems to be absent from the 0.3 release) and feed the output | |
15 # with the desired RNAcode parameters into this shell script, e.g.: | |
16 # | |
17 # breakMAF.pl < chrM.maf | ./processMAF.sh --tabular --eps --eps-dir eps2/ | |
18 | |
19 # parse the command line options | |
20 # - removes --outfile, --help, --version, and the input file from the arguments | |
21 # - outfile and infile are stored in variables | |
22 declare -a args=() | |
23 while [[ $# -gt 0 ]] | |
24 do | |
25 key="$1" | |
26 case $key in | |
27 -d|--eps-dir) | |
28 epsdir=$2 | |
29 args+=($1 $2) | |
30 shift # past argument | |
31 shift # past value | |
32 ;; | |
33 -e|--eps) | |
34 eps=1 | |
35 args+=($1) | |
36 shift # past argument | |
37 ;; | |
38 -g|--gtf) | |
39 gtf=1 | |
40 args+=($1) | |
41 shift # past argument | |
42 ;; | |
43 -t|--tabular) | |
44 tabular=1 | |
45 args+=($1) | |
46 shift # past argument | |
47 ;; | |
48 -o|--outfile) | |
49 outfile=$2 | |
50 shift # past argument | |
51 shift # past value | |
52 ;; | |
53 -b|--best-only|-r|--best-region|-s|--stop-early) | |
54 args+=($1) | |
55 shift # past argument | |
56 ;; | |
57 -n|--num-samples|-p|--cutoff|-c|--pars|-i|--eps-cutoff) | |
58 args+=($1 $2) | |
59 shift # past argument | |
60 shift # past value | |
61 ;; | |
62 -h|--help|-v|--version) | |
63 shift # past argument | |
64 ;; | |
65 *) # unknown option | |
66 file=$1 | |
67 shift # past argument | |
68 ;; | |
69 esac | |
70 done | |
71 | |
72 # fix output (renumber blocks) | |
73 # and move eps files (if present) to tmpdir | |
74 function fix_output { | |
75 if [[ -z "$last" ]]; then | |
76 last=0 | |
77 fi | |
78 while read line | |
79 do | |
80 if [[ -z "$gtf" ]]; then | |
81 i=`echo "$line" | sed 's/^\([[:digit:]]\+\)[[:space:]].*/\1/'` | |
82 else | |
83 i=`echo $line | sed 's/.*Gene\([[:digit:]]\+\).*/\1/'` | |
84 fi | |
85 j=`echo "$i+$last" | bc` | |
86 if [[ -z "$gtf" ]]; then | |
87 echo "$line" | sed "s/^\([[:digit:]]\+\)\([[:space:]].*\)/$j\2/" | |
88 else | |
89 echo "$line" | sed "s/^\(.*\)Gene[0-9]\+\(\".*\)$/\1Gene$j\2/" | |
90 fi | |
91 #echo $line | awk -v n=$j '{printf("%d\t", n); for(i=2; i<=NF; i++){printf("%s", $(i)); if(i==NF){printf("\n")}else{printf("\t")}}}' | |
92 if [[ ! -z "$eps" && -f ${epsdir:-eps}/hss-$i.eps ]]; then | |
93 mv ${epsdir:-eps}/hss-$i.eps $tmpd/hss-$j.eps | |
94 fi | |
95 done | |
96 if [[ ! -z "$j" ]]; then | |
97 last=`echo "$j+1" | bc` | |
98 unset j | |
99 fi | |
100 } | |
101 | |
102 # run RNAcode for $tempfile if >= 3 sequences | |
103 function run_rnacode { | |
104 >&2 echo -e "processing " `cat ${tmpif} | grep ^s | head -n 1 | cut -d" " -f1-6` | |
105 # >&2 echo "with RNAcode" $@ | |
106 nl=`cat ${tmpif} | grep "^s" | wc -l` | |
107 if [[ "$nl" -ge "3" ]]; then | |
108 # - filter the outfile for lines containing the ref and redirect everything to stderr | |
109 # https://github.com/wash/rnacode/issues/9 | |
110 # - we can not pipe stdout | ... | fix_output since then $last can not be used as global variable | |
111 if [[ ! -z "$gtf" ]]; then | |
112 field=1 | |
113 elif [[ ! -z "$tabular" ]]; then | |
114 field=7 | |
115 else | |
116 field=6 | |
117 fi | |
118 RNAcode $@ | awk -v ref=$ref -v field=$field '{if($(field)==ref){print $0}else{$0 > "/dev/stderr"}}' > ${tmpof} | |
119 | |
120 if [[ "$?" != "0" ]]; then | |
121 ef=$(mktemp -u -p '.') | |
122 cat ${tmpif} > ${ef}.maf | |
123 >&2 echo "RNAcode failed for the alignmentblock \""`cat ${tmpif} | grep $ref | cut -d" " -f 1-6`"\" (${ef}.maf)" | |
124 fi | |
125 fix_output < $tmpof | |
126 echo -n > ${tmpof} | |
127 else | |
128 >&2 echo "less than 3 sequences in the alignment block \""`cat ${tmpif} | grep $ref | cut -d" " -f 1-6`"\"" | |
129 fi | |
130 } | |
131 | |
132 ref="" | |
133 last=0 | |
134 | |
135 if [[ ! -z "$tabular" ]]; then | |
136 echo -e "HSS #\tFrame\tLength\tFrom\tTo\tName\tStart\tEnd\tScore\tP" >> ${outfile:-/dev/stdout} | |
137 fi | |
138 | |
139 tmpif=$(mktemp -p '.') | |
140 tmpof=$(mktemp -p '.') | |
141 tmpd=$(mktemp -d -p '.') | |
142 | |
143 # process lines of the alignment | |
144 # - save lines to tmpif | |
145 # - empty lines: process tmpif (ie. last alignment block) with RNAcode, clear tmpif | |
146 # - on the go the name of the reference species is determined from the 1st line | |
147 # of the alignment this is used then for filtering the RNAcode output | |
148 # in case of gtf output only the chromosome is printed, ie only chr1 instead of dm6.chr1 | |
149 while read line | |
150 do | |
151 if [[ "$line" =~ ^# ]]; then | |
152 echo -n > ${tmpif} | |
153 elif [[ "$line" =~ ^$ ]]; then | |
154 run_rnacode ${args[@]} ${tmpif} | |
155 # >> ${outfile:-/dev/stdout} | |
156 echo -n > ${tmpif} | |
157 else | |
158 if [[ -z $ref && "$line" =~ ^s ]]; then | |
159 if [[ -z "$gtf" ]]; then | |
160 ref=`echo $line | cut -d" " -f 2` | |
161 else | |
162 ref=`echo $line | sed 's/\./ /g' | cut -d" " -f 3` | |
163 fi | |
164 fi | |
165 echo $line >> ${tmpif} | |
166 fi | |
167 done < ${file:-/dev/stdin} | |
168 # if there is something left -> process it | |
169 if [[ "`cat ${tmpif} | wc -l`" -gt "0" ]]; then | |
170 run_rnacode ${args[@]} ${tmpif} | |
171 # >> ${outfile:-/dev/stdout} | |
172 fi | |
173 | |
174 if [[ ! -z "$eps" ]]; then | |
175 mv ${tmpd}/*eps ${epsdir:-eps}/ | |
176 fi | |
177 | |
178 rm ${tmpif} | |
179 rm ${tmpof} | |
180 rmdir ${tmpd} |