Mercurial > repos > davidvanzessen > mutation_analysis
comparison wrapper.sh @ 0:8a5a2abbb870 draft default tip
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 29 Aug 2016 05:36:10 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8a5a2abbb870 |
---|---|
1 #!/bin/bash | |
2 #set -e | |
3 dir="$(cd "$(dirname "$0")" && pwd)" | |
4 input=$1 | |
5 method=$2 | |
6 log=$3 #becomes the main html page at the end | |
7 outdir=$4 | |
8 output="$outdir/index.html" #copied to $log location at the end | |
9 title=$5 | |
10 include_fr1=$6 | |
11 functionality=$7 | |
12 unique=$8 | |
13 naive_output_ca=$9 | |
14 naive_output_cg=${10} | |
15 naive_output_cm=${11} | |
16 filter_unique=${12} | |
17 class_filter=${13} | |
18 empty_region_filter=${14} | |
19 mkdir $outdir | |
20 | |
21 tar -xzf $dir/style.tar.gz -C $outdir | |
22 | |
23 echo "---------------- read parameters ----------------" | |
24 echo "---------------- read parameters ----------------<br />" > $log | |
25 | |
26 echo "unpacking IMGT file" | |
27 | |
28 type="`file $input`" | |
29 if [[ "$type" == *"Zip archive"* ]] ; then | |
30 echo "Zip archive" | |
31 echo "unzip $input -d $PWD/files/" | |
32 unzip $input -d $PWD/files/ | |
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then | |
34 echo "ZX archive" | |
35 echo "tar -xJf $input -C $PWD/files/" | |
36 mkdir -p $PWD/files/$title | |
37 tar -xJf $input -C $PWD/files/$title | |
38 fi | |
39 | |
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt | |
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt | |
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt | |
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt | |
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt | |
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt | |
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt | |
47 | |
48 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then | |
49 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}" | |
50 else | |
51 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin" | |
52 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}" | |
53 fi | |
54 | |
55 echo "---------------- identification ($method) ----------------" | |
56 echo "---------------- identification ($method) ----------------<br />" >> $log | |
57 | |
58 if [[ "${method}" == "custom" ]] ; then | |
59 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | |
60 else | |
61 echo "---------------- summary_to_fasta.py ----------------" | |
62 echo "---------------- summary_to_fasta.py ----------------<br />" >> $log | |
63 | |
64 python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta | |
65 | |
66 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | |
67 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | |
68 fi | |
69 | |
70 echo "---------------- merge_and_filter.r ----------------" | |
71 echo "---------------- merge_and_filter.r ----------------<br />" >> $log | |
72 | |
73 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 | |
74 | |
75 echo "---------------- creating new IMGT zip ----------------" | |
76 echo "---------------- creating new IMGT zip ----------------<br />" >> $log | |
77 | |
78 mkdir $outdir/new_IMGT | |
79 | |
80 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" | |
81 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" | |
82 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt" | |
83 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" | |
84 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt" | |
85 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt" | |
86 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" | |
87 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" | |
88 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" | |
89 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" | |
90 | |
91 mkdir $outdir/new_IMGT_ca | |
92 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca | |
93 | |
94 mkdir $outdir/new_IMGT_ca1 | |
95 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca1 | |
96 | |
97 mkdir $outdir/new_IMGT_ca2 | |
98 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca2 | |
99 | |
100 mkdir $outdir/new_IMGT_cg | |
101 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg | |
102 | |
103 mkdir $outdir/new_IMGT_cg1 | |
104 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg1 | |
105 | |
106 mkdir $outdir/new_IMGT_cg2 | |
107 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg2 | |
108 | |
109 mkdir $outdir/new_IMGT_cg3 | |
110 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg3 | |
111 | |
112 mkdir $outdir/new_IMGT_cg4 | |
113 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg4 | |
114 | |
115 mkdir $outdir/new_IMGT_cm | |
116 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm | |
117 | |
118 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 | |
119 | |
120 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1 | |
121 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca1/ $outdir/merged.txt "ca1" 2>&1 | |
122 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca2/ $outdir/merged.txt "ca2" 2>&1 | |
123 | |
124 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1 | |
125 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg1/ $outdir/merged.txt "cg1" 2>&1 | |
126 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg2/ $outdir/merged.txt "cg2" 2>&1 | |
127 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg3/ $outdir/merged.txt "cg3" 2>&1 | |
128 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg4/ $outdir/merged.txt "cg4" 2>&1 | |
129 | |
130 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1 | |
131 | |
132 | |
133 tmp="$PWD" | |
134 cd $outdir/new_IMGT/ #tar weirdness... | |
135 tar -cJf ../new_IMGT.txz * | |
136 | |
137 cd $outdir/new_IMGT_ca/ | |
138 tar -cJf ../new_IMGT_ca.txz * | |
139 | |
140 cd $outdir/new_IMGT_ca1/ | |
141 tar -cJf ../new_IMGT_ca1.txz * | |
142 | |
143 cd $outdir/new_IMGT_ca2/ | |
144 tar -cJf ../new_IMGT_ca2.txz * | |
145 | |
146 cd $outdir/new_IMGT_cg/ | |
147 tar -cJf ../new_IMGT_cg.txz * | |
148 | |
149 cd $outdir/new_IMGT_cg1/ | |
150 tar -cJf ../new_IMGT_cg1.txz * | |
151 | |
152 cd $outdir/new_IMGT_cg2/ | |
153 tar -cJf ../new_IMGT_cg2.txz * | |
154 | |
155 cd $outdir/new_IMGT_cg3/ | |
156 tar -cJf ../new_IMGT_cg3.txz * | |
157 | |
158 cd $outdir/new_IMGT_cg4/ | |
159 tar -cJf ../new_IMGT_cg4.txz * | |
160 | |
161 cd $outdir/new_IMGT_cm/ | |
162 tar -cJf ../new_IMGT_cm.txz * | |
163 | |
164 cd $tmp | |
165 | |
166 echo "---------------- mutation_analysis.r ----------------" | |
167 echo "---------------- mutation_analysis.r ----------------<br />" >> $log | |
168 | |
169 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched" | |
170 echo "R mutation analysis" | |
171 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1 | |
172 | |
173 | |
174 echo "---------------- mutation_analysis.py ----------------" | |
175 echo "---------------- mutation_analysis.py ----------------<br />" >> $log | |
176 | |
177 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt | |
178 | |
179 echo "---------------- aa_histogram.r ----------------" | |
180 echo "---------------- aa_histogram.r ----------------<br />" >> $log | |
181 | |
182 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1 | |
183 if [ -e "$outdir/aa_histogram_.png" ]; then | |
184 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png | |
185 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt | |
186 fi | |
187 | |
188 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | |
189 | |
190 funcs=(sum mean median) | |
191 funcs=(sum) | |
192 | |
193 echo "---------------- sequence_overview.r ----------------" | |
194 echo "---------------- sequence_overview.r ----------------<br />" >> $log | |
195 | |
196 mkdir $outdir/sequence_overview | |
197 | |
198 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | |
199 | |
200 echo "<table border='1'>" > $outdir/base_overview.html | |
201 | |
202 while IFS=$'\t' read ID class seq A C G T | |
203 do | |
204 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html | |
205 done < $outdir/sequence_overview/ntoverview.txt | |
206 | |
207 echo "<html><center><h1>$title</h1></center>" > $output | |
208 echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output | |
209 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output | |
210 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output | |
211 echo "<script type='text/javascript' src='script.js'></script>" >> $output | |
212 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output | |
213 echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output | |
214 | |
215 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" | |
216 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" | |
217 total_count=$((matched_count + unmatched_count)) | |
218 perc_count=$((unmatched_count / total_count * 100)) | |
219 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"` | |
220 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"` | |
221 | |
222 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output | |
223 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output | |
224 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output | |
225 | |
226 echo "---------------- main tables ----------------" | |
227 echo "---------------- main tables ----------------<br />" >> $log | |
228 | |
229 echo "<div class='tabber'>" >> $output | |
230 echo "<div class='tabbertab' title='SHM Overview'>" >> $output | |
231 | |
232 for func in ${funcs[@]} | |
233 do | |
234 | |
235 echo "---------------- $func table ----------------" | |
236 echo "---------------- $func table ----------------<br />" >> $log | |
237 | |
238 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt | |
239 | |
240 echo "---------------- pattern_plots.r ----------------" | |
241 echo "---------------- pattern_plots.r ----------------<br />" >> $log | |
242 | |
243 Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1 | |
244 | |
245 echo "<table class='pure-table pure-table-striped'>" >> $output | |
246 echo "<thead><tr><th>info</th>" >> $output | |
247 for gene in ${genes[@]} | |
248 do | |
249 tmp=`cat $outdir/${gene}_${func}_n.txt` | |
250 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output | |
251 done | |
252 | |
253 tmp=`cat $outdir/all_${func}_n.txt` | |
254 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output | |
255 tmp=`cat $outdir/unmatched_${func}_n.txt` | |
256 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output | |
257 | |
258 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz | |
259 do | |
260 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh | |
261 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output | |
262 else | |
263 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output | |
264 fi | |
265 done < $outdir/data_${func}.txt | |
266 echo "</table>" >> $output | |
267 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output | |
268 done | |
269 | |
270 echo "<img src='plot1.png' /><br />" >> $output | |
271 echo "<img src='plot2.png' /><br />" >> $output | |
272 echo "<img src='plot3.png' /><br />" >> $output | |
273 | |
274 echo "</div>" >> $output #SHM overview tab end | |
275 | |
276 echo "---------------- images ----------------" | |
277 echo "---------------- images ----------------<br />" >> $log | |
278 | |
279 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output | |
280 | |
281 if [ -a $outdir/scatter.png ] | |
282 then | |
283 echo "<img src='scatter.png'/><br />" >> $output | |
284 echo "<a href='scatter.txt'>download data</a><br />" >> $output | |
285 fi | |
286 if [ -a $outdir/frequency_ranges.png ] | |
287 then | |
288 echo "<img src='frequency_ranges.png'/><br />" >> $output | |
289 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output | |
290 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output | |
291 fi | |
292 | |
293 echo "</div>" >> $output #SHM frequency tab end | |
294 | |
295 echo "<div class='tabbertab' title='Transition tables'>" >> $output | |
296 | |
297 echo "<table border='0'>" >> $output | |
298 | |
299 for gene in ${genes[@]} | |
300 do | |
301 echo "<tr>" >> $output | |
302 echo "<td><h1>${gene}</h1></td>" >> $output | |
303 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output | |
304 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output | |
305 echo "<td><table class='pure-table transition-table pure-table-bordered'>" >> $output | |
306 while IFS=, read from a c g t | |
307 do | |
308 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
309 done < $outdir/transitions_${gene}_sum.txt | |
310 echo "</table></td>" >> $output | |
311 | |
312 echo "</tr>" >> $output | |
313 done | |
314 | |
315 echo "<tr>" >> $output | |
316 echo "<td><h1>All</h1></td>" >> $output | |
317 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output | |
318 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output | |
319 echo "<td><table class='pure-table transition-table pure-table-bordered'>" >> $output | |
320 while IFS=, read from a c g t | |
321 do | |
322 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
323 done < $outdir/transitions_all_sum.txt | |
324 echo "</table></td>" >> $output | |
325 | |
326 echo "</tr>" >> $output | |
327 | |
328 echo "</table>" >> $output | |
329 | |
330 echo "</div>" >> $output #transition tables tab end | |
331 | |
332 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output | |
333 | |
334 if [ -a $outdir/aa_histogram.png ] | |
335 then | |
336 echo "<img src='aa_histogram.png'/><br />" >> $output | |
337 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output | |
338 echo "<img src='aa_histogram_ca.png'/><br />" >> $output | |
339 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output | |
340 echo "<img src='aa_histogram_cg.png'/><br />" >> $output | |
341 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output | |
342 echo "<img src='aa_histogram_cm.png'/><br />" >> $output | |
343 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output | |
344 fi | |
345 | |
346 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output | |
347 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output | |
348 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output | |
349 | |
350 echo "</div>" >> $output #antigen selection tab end | |
351 | |
352 echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab | |
353 | |
354 if [ -a $outdir/ca.png ] | |
355 then | |
356 echo "<img src='ca.png'/><br />" >> $output | |
357 echo "<a href='ca.txt'>download data</a><br />" >> $output | |
358 fi | |
359 if [ -a $outdir/cg.png ] | |
360 then | |
361 echo "<img src='cg.png'/><br />" >> $output | |
362 echo "<a href='cg.txt'>download data</a><br />" >> $output | |
363 fi | |
364 | |
365 echo "</div>" >> $output #CSR tab end | |
366 | |
367 echo "---------------- change-o MakeDB ----------------" | |
368 | |
369 mkdir $outdir/change_o | |
370 | |
371 tmp="$PWD" | |
372 | |
373 cd $outdir/change_o | |
374 | |
375 bash $dir/change_o/makedb.sh $input false false false $outdir/change_o/change-o-db.txt | |
376 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt | |
377 | |
378 Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1 | |
379 | |
380 echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1" | |
381 | |
382 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then | |
383 bash $dir/change_o/makedb.sh $outdir/new_IMGT_ca.txz false false false $outdir/change_o/change-o-db-ca.txt | |
384 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-ca.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-ca.txt $outdir/change_o/change-o-defined_clones-summary-ca.txt | |
385 else | |
386 echo "No ca sequences" > "$outdir/change_o/change-o-db-defined_clones-ca.txt" | |
387 echo "No ca sequences" > "$outdir/change_o/change-o-defined_clones-summary-ca.txt" | |
388 fi | |
389 | |
390 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then | |
391 bash $dir/change_o/makedb.sh $outdir/new_IMGT_cg.txz false false false $outdir/change_o/change-o-db-cg.txt | |
392 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cg.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cg.txt $outdir/change_o/change-o-defined_clones-summary-cg.txt | |
393 else | |
394 echo "No cg sequences" > "$outdir/change_o/change-o-db-defined_clones-cg.txt" | |
395 echo "No cg sequences" > "$outdir/change_o/change-o-defined_clones-summary-cg.txt" | |
396 fi | |
397 | |
398 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then | |
399 bash $dir/change_o/makedb.sh $outdir/new_IMGT_cm.txz false false false $outdir/change_o/change-o-db-cm.txt | |
400 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cm.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cm.txt $outdir/change_o/change-o-defined_clones-summary-cm.txt | |
401 else | |
402 echo "No cm sequences" > "$outdir/change_o/change-o-db-defined_clones-cm.txt" | |
403 echo "No cm sequences" > "$outdir/change_o/change-o-defined_clones-summary-cm.txt" | |
404 fi | |
405 | |
406 PWD="$tmp" | |
407 | |
408 echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab | |
409 | |
410 function clonality_table { | |
411 local infile=$1 | |
412 local outfile=$2 | |
413 | |
414 echo "<table class='pure-table pure-table-striped'>" >> $outfile | |
415 echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile | |
416 | |
417 first='true' | |
418 | |
419 while read size clones seqs | |
420 do | |
421 if [[ "$first" == "true" ]]; then | |
422 first="false" | |
423 continue | |
424 fi | |
425 echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile | |
426 done < $infile | |
427 | |
428 echo "</table>" >> $outfile | |
429 } | |
430 echo "<div class='tabber'>" >> $output | |
431 | |
432 echo "<div class='tabbertab' title='All'>" >> $output | |
433 clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output | |
434 echo "</div>" >> $output | |
435 | |
436 echo "<div class='tabbertab' title='Ca'>" >> $output | |
437 clonality_table $outdir/change_o/change-o-defined_clones-summary-ca.txt $output | |
438 echo "</div>" >> $output | |
439 | |
440 echo "<div class='tabbertab' title='Cg'>" >> $output | |
441 clonality_table $outdir/change_o/change-o-defined_clones-summary-cg.txt $output | |
442 echo "</div>" >> $output | |
443 | |
444 echo "<div class='tabbertab' title='Cm'>" >> $output | |
445 clonality_table $outdir/change_o/change-o-defined_clones-summary-cm.txt $output | |
446 echo "</div>" >> $output | |
447 | |
448 echo "</div>" >> $output #clonality tabber end | |
449 | |
450 echo "</div>" >> $output #clonality tab end | |
451 | |
452 echo "<div class='tabbertab' title='Downloads'>" >> $output | |
453 | |
454 echo "<table class='pure-table pure-table-striped'>" >> $output | |
455 echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output | |
456 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output | |
457 echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt'>Download</a></td></tr>" >> $output | |
458 echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt'>Download</a></td></tr>" >> $output | |
459 echo "<tr><td>The data used to generate the sexond SHM Overview plot</td><td><a href='plot2.txt'>Download</a></td></tr>" >> $output | |
460 echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt'>Download</a></td></tr>" >> $output | |
461 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output | |
462 | |
463 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output | |
464 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output | |
465 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output | |
466 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output | |
467 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output | |
468 | |
469 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output | |
470 | |
471 echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output | |
472 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output | |
473 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output | |
474 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output | |
475 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output | |
476 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output | |
477 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output | |
478 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output | |
479 | |
480 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output | |
481 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output | |
482 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output | |
483 echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz'>Download</a></td></tr>" >> $output | |
484 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output | |
485 echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz'>Download</a></td></tr>" >> $output | |
486 echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz'>Download</a></td></tr>" >> $output | |
487 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output | |
488 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output | |
489 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output | |
490 | |
491 echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt'>Download</a></td></tr>" >> $output | |
492 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt'>Download</a></td></tr>" >> $output | |
493 echo "<tr><td>The Change-O DB file with defined clones of ca</td><td><a href='change_o/change-o-db-defined_clones-ca.txt'>Download</a></td></tr>" >> $output | |
494 echo "<tr><td>The Change-O DB defined clones summary file of ca</td><td><a href='change_o/change-o-defined_clones-summary-ca.txt'>Download</a></td></tr>" >> $output | |
495 echo "<tr><td>The Change-O DB file with defined clones of cg</td><td><a href='change_o/change-o-db-defined_clones-cg.txt'>Download</a></td></tr>" >> $output | |
496 echo "<tr><td>The Change-O DB defined clones summary file of cg</td><td><a href='change_o/change-o-defined_clones-summary-cg.txt'>Download</a></td></tr>" >> $output | |
497 echo "<tr><td>The Change-O DB file with defined clones of cm</td><td><a href='change_o/change-o-db-defined_clones-cm.txt'>Download</a></td></tr>" >> $output | |
498 echo "<tr><td>The Change-O DB defined clones summary file of cm</td><td><a href='change_o/change-o-defined_clones-summary-cm.txt'>Download</a></td></tr>" >> $output | |
499 | |
500 echo "</table>" >> $output | |
501 | |
502 echo "</div>" >> $output #downloads tab end | |
503 | |
504 echo "</div>" >> $output #tabs end | |
505 | |
506 echo "</html>" >> $output | |
507 | |
508 echo "---------------- baseline ----------------" | |
509 echo "---------------- baseline ----------------<br />" >> $log | |
510 tmp="$PWD" | |
511 | |
512 mkdir $outdir/baseline | |
513 | |
514 | |
515 mkdir $outdir/baseline/ca_cg_cm | |
516 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then | |
517 cd $outdir/baseline/ca_cg_cm | |
518 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" | |
519 else | |
520 echo "No sequences" > "$outdir/baseline.txt" | |
521 fi | |
522 | |
523 mkdir $outdir/baseline/ca | |
524 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then | |
525 cd $outdir/baseline/ca | |
526 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" | |
527 else | |
528 echo "No ca sequences" > "$outdir/baseline_ca.txt" | |
529 fi | |
530 | |
531 mkdir $outdir/baseline/cg | |
532 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then | |
533 cd $outdir/baseline/cg | |
534 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" | |
535 else | |
536 echo "No cg sequences" > "$outdir/baseline_cg.txt" | |
537 fi | |
538 | |
539 mkdir $outdir/baseline/cm | |
540 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then | |
541 cd $outdir/baseline/cm | |
542 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" | |
543 else | |
544 echo "No cm sequences" > "$outdir/baseline_cm.txt" | |
545 fi | |
546 | |
547 cd $tmp | |
548 | |
549 echo "---------------- naive_output.r ----------------" | |
550 echo "---------------- naive_output.r ----------------<br />" >> $log | |
551 | |
552 if [[ "$naive_output" != "None" ]] | |
553 then | |
554 cp $outdir/new_IMGT_ca.txz ${naive_output_ca} | |
555 cp $outdir/new_IMGT_cg.txz ${naive_output_cg} | |
556 cp $outdir/new_IMGT_cm.txz ${naive_output_cm} | |
557 fi | |
558 | |
559 echo "</table>" >> $outdir/base_overview.html | |
560 | |
561 mv $log $outdir/log.html | |
562 | |
563 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log | |
564 echo "<table border = 1>" >> $log | |
565 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log | |
566 tIFS="$TMP" | |
567 IFS=$'\t' | |
568 while read step seq perc | |
569 do | |
570 echo "<tr>" >> $log | |
571 echo "<td>$step</td>" >> $log | |
572 echo "<td>$seq</td>" >> $log | |
573 echo "<td>${perc}%</td>" >> $log | |
574 echo "</tr>" >> $log | |
575 done < $outdir/filtering_steps.txt | |
576 echo "</table border></center></html>" >> $log | |
577 | |
578 IFS="$tIFS" | |
579 | |
580 | |
581 echo "---------------- Done! ----------------" | |
582 echo "---------------- Done! ----------------<br />" >> $outdir/log.html | |
583 | |
584 | |
585 | |
586 | |
587 | |
588 | |
589 | |
590 | |
591 | |
592 | |
593 | |
594 | |
595 | |
596 | |
597 | |
598 | |
599 | |
600 | |
601 | |
602 | |
603 |