comparison wrapper.sh @ 0:8a5a2abbb870 draft default tip

Uploaded
author davidvanzessen
date Mon, 29 Aug 2016 05:36:10 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:8a5a2abbb870
1 #!/bin/bash
2 #set -e
3 dir="$(cd "$(dirname "$0")" && pwd)"
4 input=$1
5 method=$2
6 log=$3 #becomes the main html page at the end
7 outdir=$4
8 output="$outdir/index.html" #copied to $log location at the end
9 title=$5
10 include_fr1=$6
11 functionality=$7
12 unique=$8
13 naive_output_ca=$9
14 naive_output_cg=${10}
15 naive_output_cm=${11}
16 filter_unique=${12}
17 class_filter=${13}
18 empty_region_filter=${14}
19 mkdir $outdir
20
21 tar -xzf $dir/style.tar.gz -C $outdir
22
23 echo "---------------- read parameters ----------------"
24 echo "---------------- read parameters ----------------<br />" > $log
25
26 echo "unpacking IMGT file"
27
28 type="`file $input`"
29 if [[ "$type" == *"Zip archive"* ]] ; then
30 echo "Zip archive"
31 echo "unzip $input -d $PWD/files/"
32 unzip $input -d $PWD/files/
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then
34 echo "ZX archive"
35 echo "tar -xJf $input -C $PWD/files/"
36 mkdir -p $PWD/files/$title
37 tar -xJf $input -C $PWD/files/$title
38 fi
39
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
47
48 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
49 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
50 else
51 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
52 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
53 fi
54
55 echo "---------------- identification ($method) ----------------"
56 echo "---------------- identification ($method) ----------------<br />" >> $log
57
58 if [[ "${method}" == "custom" ]] ; then
59 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
60 else
61 echo "---------------- summary_to_fasta.py ----------------"
62 echo "---------------- summary_to_fasta.py ----------------<br />" >> $log
63
64 python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta
65
66 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
67 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
68 fi
69
70 echo "---------------- merge_and_filter.r ----------------"
71 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
72
73 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
74
75 echo "---------------- creating new IMGT zip ----------------"
76 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
77
78 mkdir $outdir/new_IMGT
79
80 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
81 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
82 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
83 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
84 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
85 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
86 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
87 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
88 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
89 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
90
91 mkdir $outdir/new_IMGT_ca
92 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
93
94 mkdir $outdir/new_IMGT_ca1
95 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca1
96
97 mkdir $outdir/new_IMGT_ca2
98 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca2
99
100 mkdir $outdir/new_IMGT_cg
101 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
102
103 mkdir $outdir/new_IMGT_cg1
104 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg1
105
106 mkdir $outdir/new_IMGT_cg2
107 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg2
108
109 mkdir $outdir/new_IMGT_cg3
110 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg3
111
112 mkdir $outdir/new_IMGT_cg4
113 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg4
114
115 mkdir $outdir/new_IMGT_cm
116 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
117
118 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
119
120 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
121 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca1/ $outdir/merged.txt "ca1" 2>&1
122 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca2/ $outdir/merged.txt "ca2" 2>&1
123
124 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
125 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg1/ $outdir/merged.txt "cg1" 2>&1
126 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg2/ $outdir/merged.txt "cg2" 2>&1
127 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg3/ $outdir/merged.txt "cg3" 2>&1
128 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg4/ $outdir/merged.txt "cg4" 2>&1
129
130 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
131
132
133 tmp="$PWD"
134 cd $outdir/new_IMGT/ #tar weirdness...
135 tar -cJf ../new_IMGT.txz *
136
137 cd $outdir/new_IMGT_ca/
138 tar -cJf ../new_IMGT_ca.txz *
139
140 cd $outdir/new_IMGT_ca1/
141 tar -cJf ../new_IMGT_ca1.txz *
142
143 cd $outdir/new_IMGT_ca2/
144 tar -cJf ../new_IMGT_ca2.txz *
145
146 cd $outdir/new_IMGT_cg/
147 tar -cJf ../new_IMGT_cg.txz *
148
149 cd $outdir/new_IMGT_cg1/
150 tar -cJf ../new_IMGT_cg1.txz *
151
152 cd $outdir/new_IMGT_cg2/
153 tar -cJf ../new_IMGT_cg2.txz *
154
155 cd $outdir/new_IMGT_cg3/
156 tar -cJf ../new_IMGT_cg3.txz *
157
158 cd $outdir/new_IMGT_cg4/
159 tar -cJf ../new_IMGT_cg4.txz *
160
161 cd $outdir/new_IMGT_cm/
162 tar -cJf ../new_IMGT_cm.txz *
163
164 cd $tmp
165
166 echo "---------------- mutation_analysis.r ----------------"
167 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
168
169 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
170 echo "R mutation analysis"
171 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
172
173
174 echo "---------------- mutation_analysis.py ----------------"
175 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
176
177 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
178
179 echo "---------------- aa_histogram.r ----------------"
180 echo "---------------- aa_histogram.r ----------------<br />" >> $log
181
182 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
183 if [ -e "$outdir/aa_histogram_.png" ]; then
184 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
185 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
186 fi
187
188 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
189
190 funcs=(sum mean median)
191 funcs=(sum)
192
193 echo "---------------- sequence_overview.r ----------------"
194 echo "---------------- sequence_overview.r ----------------<br />" >> $log
195
196 mkdir $outdir/sequence_overview
197
198 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
199
200 echo "<table border='1'>" > $outdir/base_overview.html
201
202 while IFS=$'\t' read ID class seq A C G T
203 do
204 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
205 done < $outdir/sequence_overview/ntoverview.txt
206
207 echo "<html><center><h1>$title</h1></center>" > $output
208 echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output
209 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
210 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
211 echo "<script type='text/javascript' src='script.js'></script>" >> $output
212 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
213 echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output
214
215 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
216 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
217 total_count=$((matched_count + unmatched_count))
218 perc_count=$((unmatched_count / total_count * 100))
219 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
220 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
221
222 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
223 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
224 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
225
226 echo "---------------- main tables ----------------"
227 echo "---------------- main tables ----------------<br />" >> $log
228
229 echo "<div class='tabber'>" >> $output
230 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
231
232 for func in ${funcs[@]}
233 do
234
235 echo "---------------- $func table ----------------"
236 echo "---------------- $func table ----------------<br />" >> $log
237
238 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
239
240 echo "---------------- pattern_plots.r ----------------"
241 echo "---------------- pattern_plots.r ----------------<br />" >> $log
242
243 Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1
244
245 echo "<table class='pure-table pure-table-striped'>" >> $output
246 echo "<thead><tr><th>info</th>" >> $output
247 for gene in ${genes[@]}
248 do
249 tmp=`cat $outdir/${gene}_${func}_n.txt`
250 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
251 done
252
253 tmp=`cat $outdir/all_${func}_n.txt`
254 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
255 tmp=`cat $outdir/unmatched_${func}_n.txt`
256 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output
257
258 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
259 do
260 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
261 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
262 else
263 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
264 fi
265 done < $outdir/data_${func}.txt
266 echo "</table>" >> $output
267 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
268 done
269
270 echo "<img src='plot1.png' /><br />" >> $output
271 echo "<img src='plot2.png' /><br />" >> $output
272 echo "<img src='plot3.png' /><br />" >> $output
273
274 echo "</div>" >> $output #SHM overview tab end
275
276 echo "---------------- images ----------------"
277 echo "---------------- images ----------------<br />" >> $log
278
279 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
280
281 if [ -a $outdir/scatter.png ]
282 then
283 echo "<img src='scatter.png'/><br />" >> $output
284 echo "<a href='scatter.txt'>download data</a><br />" >> $output
285 fi
286 if [ -a $outdir/frequency_ranges.png ]
287 then
288 echo "<img src='frequency_ranges.png'/><br />" >> $output
289 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
290 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
291 fi
292
293 echo "</div>" >> $output #SHM frequency tab end
294
295 echo "<div class='tabbertab' title='Transition tables'>" >> $output
296
297 echo "<table border='0'>" >> $output
298
299 for gene in ${genes[@]}
300 do
301 echo "<tr>" >> $output
302 echo "<td><h1>${gene}</h1></td>" >> $output
303 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
304 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
305 echo "<td><table class='pure-table transition-table pure-table-bordered'>" >> $output
306 while IFS=, read from a c g t
307 do
308 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
309 done < $outdir/transitions_${gene}_sum.txt
310 echo "</table></td>" >> $output
311
312 echo "</tr>" >> $output
313 done
314
315 echo "<tr>" >> $output
316 echo "<td><h1>All</h1></td>" >> $output
317 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
318 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
319 echo "<td><table class='pure-table transition-table pure-table-bordered'>" >> $output
320 while IFS=, read from a c g t
321 do
322 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
323 done < $outdir/transitions_all_sum.txt
324 echo "</table></td>" >> $output
325
326 echo "</tr>" >> $output
327
328 echo "</table>" >> $output
329
330 echo "</div>" >> $output #transition tables tab end
331
332 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
333
334 if [ -a $outdir/aa_histogram.png ]
335 then
336 echo "<img src='aa_histogram.png'/><br />" >> $output
337 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
338 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
339 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
340 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
341 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
342 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
343 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
344 fi
345
346 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
347 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
348 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
349
350 echo "</div>" >> $output #antigen selection tab end
351
352 echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab
353
354 if [ -a $outdir/ca.png ]
355 then
356 echo "<img src='ca.png'/><br />" >> $output
357 echo "<a href='ca.txt'>download data</a><br />" >> $output
358 fi
359 if [ -a $outdir/cg.png ]
360 then
361 echo "<img src='cg.png'/><br />" >> $output
362 echo "<a href='cg.txt'>download data</a><br />" >> $output
363 fi
364
365 echo "</div>" >> $output #CSR tab end
366
367 echo "---------------- change-o MakeDB ----------------"
368
369 mkdir $outdir/change_o
370
371 tmp="$PWD"
372
373 cd $outdir/change_o
374
375 bash $dir/change_o/makedb.sh $input false false false $outdir/change_o/change-o-db.txt
376 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
377
378 Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
379
380 echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
381
382 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
383 bash $dir/change_o/makedb.sh $outdir/new_IMGT_ca.txz false false false $outdir/change_o/change-o-db-ca.txt
384 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-ca.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-ca.txt $outdir/change_o/change-o-defined_clones-summary-ca.txt
385 else
386 echo "No ca sequences" > "$outdir/change_o/change-o-db-defined_clones-ca.txt"
387 echo "No ca sequences" > "$outdir/change_o/change-o-defined_clones-summary-ca.txt"
388 fi
389
390 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
391 bash $dir/change_o/makedb.sh $outdir/new_IMGT_cg.txz false false false $outdir/change_o/change-o-db-cg.txt
392 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cg.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cg.txt $outdir/change_o/change-o-defined_clones-summary-cg.txt
393 else
394 echo "No cg sequences" > "$outdir/change_o/change-o-db-defined_clones-cg.txt"
395 echo "No cg sequences" > "$outdir/change_o/change-o-defined_clones-summary-cg.txt"
396 fi
397
398 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
399 bash $dir/change_o/makedb.sh $outdir/new_IMGT_cm.txz false false false $outdir/change_o/change-o-db-cm.txt
400 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cm.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cm.txt $outdir/change_o/change-o-defined_clones-summary-cm.txt
401 else
402 echo "No cm sequences" > "$outdir/change_o/change-o-db-defined_clones-cm.txt"
403 echo "No cm sequences" > "$outdir/change_o/change-o-defined_clones-summary-cm.txt"
404 fi
405
406 PWD="$tmp"
407
408 echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab
409
410 function clonality_table {
411 local infile=$1
412 local outfile=$2
413
414 echo "<table class='pure-table pure-table-striped'>" >> $outfile
415 echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile
416
417 first='true'
418
419 while read size clones seqs
420 do
421 if [[ "$first" == "true" ]]; then
422 first="false"
423 continue
424 fi
425 echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile
426 done < $infile
427
428 echo "</table>" >> $outfile
429 }
430 echo "<div class='tabber'>" >> $output
431
432 echo "<div class='tabbertab' title='All'>" >> $output
433 clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output
434 echo "</div>" >> $output
435
436 echo "<div class='tabbertab' title='Ca'>" >> $output
437 clonality_table $outdir/change_o/change-o-defined_clones-summary-ca.txt $output
438 echo "</div>" >> $output
439
440 echo "<div class='tabbertab' title='Cg'>" >> $output
441 clonality_table $outdir/change_o/change-o-defined_clones-summary-cg.txt $output
442 echo "</div>" >> $output
443
444 echo "<div class='tabbertab' title='Cm'>" >> $output
445 clonality_table $outdir/change_o/change-o-defined_clones-summary-cm.txt $output
446 echo "</div>" >> $output
447
448 echo "</div>" >> $output #clonality tabber end
449
450 echo "</div>" >> $output #clonality tab end
451
452 echo "<div class='tabbertab' title='Downloads'>" >> $output
453
454 echo "<table class='pure-table pure-table-striped'>" >> $output
455 echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
456 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output
457 echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt'>Download</a></td></tr>" >> $output
458 echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt'>Download</a></td></tr>" >> $output
459 echo "<tr><td>The data used to generate the sexond SHM Overview plot</td><td><a href='plot2.txt'>Download</a></td></tr>" >> $output
460 echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt'>Download</a></td></tr>" >> $output
461 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
462
463 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
464 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output
465 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output
466 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output
467 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output
468
469 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output
470
471 echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output
472 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output
473 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output
474 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output
475 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output
476 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output
477 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output
478 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output
479
480 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output
481 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output
482 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output
483 echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz'>Download</a></td></tr>" >> $output
484 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output
485 echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz'>Download</a></td></tr>" >> $output
486 echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz'>Download</a></td></tr>" >> $output
487 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output
488 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output
489 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output
490
491 echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt'>Download</a></td></tr>" >> $output
492 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt'>Download</a></td></tr>" >> $output
493 echo "<tr><td>The Change-O DB file with defined clones of ca</td><td><a href='change_o/change-o-db-defined_clones-ca.txt'>Download</a></td></tr>" >> $output
494 echo "<tr><td>The Change-O DB defined clones summary file of ca</td><td><a href='change_o/change-o-defined_clones-summary-ca.txt'>Download</a></td></tr>" >> $output
495 echo "<tr><td>The Change-O DB file with defined clones of cg</td><td><a href='change_o/change-o-db-defined_clones-cg.txt'>Download</a></td></tr>" >> $output
496 echo "<tr><td>The Change-O DB defined clones summary file of cg</td><td><a href='change_o/change-o-defined_clones-summary-cg.txt'>Download</a></td></tr>" >> $output
497 echo "<tr><td>The Change-O DB file with defined clones of cm</td><td><a href='change_o/change-o-db-defined_clones-cm.txt'>Download</a></td></tr>" >> $output
498 echo "<tr><td>The Change-O DB defined clones summary file of cm</td><td><a href='change_o/change-o-defined_clones-summary-cm.txt'>Download</a></td></tr>" >> $output
499
500 echo "</table>" >> $output
501
502 echo "</div>" >> $output #downloads tab end
503
504 echo "</div>" >> $output #tabs end
505
506 echo "</html>" >> $output
507
508 echo "---------------- baseline ----------------"
509 echo "---------------- baseline ----------------<br />" >> $log
510 tmp="$PWD"
511
512 mkdir $outdir/baseline
513
514
515 mkdir $outdir/baseline/ca_cg_cm
516 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
517 cd $outdir/baseline/ca_cg_cm
518 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
519 else
520 echo "No sequences" > "$outdir/baseline.txt"
521 fi
522
523 mkdir $outdir/baseline/ca
524 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
525 cd $outdir/baseline/ca
526 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
527 else
528 echo "No ca sequences" > "$outdir/baseline_ca.txt"
529 fi
530
531 mkdir $outdir/baseline/cg
532 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
533 cd $outdir/baseline/cg
534 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
535 else
536 echo "No cg sequences" > "$outdir/baseline_cg.txt"
537 fi
538
539 mkdir $outdir/baseline/cm
540 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
541 cd $outdir/baseline/cm
542 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
543 else
544 echo "No cm sequences" > "$outdir/baseline_cm.txt"
545 fi
546
547 cd $tmp
548
549 echo "---------------- naive_output.r ----------------"
550 echo "---------------- naive_output.r ----------------<br />" >> $log
551
552 if [[ "$naive_output" != "None" ]]
553 then
554 cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
555 cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
556 cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
557 fi
558
559 echo "</table>" >> $outdir/base_overview.html
560
561 mv $log $outdir/log.html
562
563 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
564 echo "<table border = 1>" >> $log
565 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
566 tIFS="$TMP"
567 IFS=$'\t'
568 while read step seq perc
569 do
570 echo "<tr>" >> $log
571 echo "<td>$step</td>" >> $log
572 echo "<td>$seq</td>" >> $log
573 echo "<td>${perc}%</td>" >> $log
574 echo "</tr>" >> $log
575 done < $outdir/filtering_steps.txt
576 echo "</table border></center></html>" >> $log
577
578 IFS="$tIFS"
579
580
581 echo "---------------- Done! ----------------"
582 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603