annotate tools/spades_2_5/plot_spades_stats.xml @ 3:d82f18c76309 draft

Uploaded spades wrapper 0.6. Supports spades 2.5.1. Also removes the need for the hack at installation time, by fixing the problem with input files. Shows the license for the tool as well.
author lionelguy
date Thu, 12 Sep 2013 07:46:54 -0400
parents b5ce24f34dd7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
1 <tool id="plot_spades_stats" name="SPAdes stats" version="0.1">
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
2 <description>coverage vs. length plot</description>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
3 <requirements>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
4 <requirement type="package">R</requirement>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
5 </requirements>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
6 <command interpreter="bash">r_wrapper.sh $script_file</command>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
7
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
8 <inputs>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
9 <param name="input_scaffolds" type="data" format="tabular" label="Scaffold stats"/>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
10 <param name="input_contigs" type="data" format="tabular" label="Contig stats"/>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
11 <param name="length_co" type="integer" value="1000" min="0" label="Length cut-off" help="Contigs with length under that value are shown in red"/>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
12 <param name="coverage_co" type="integer" value="10" min="0" label="Coverage cut-off" help="Contigs with length under that value are shown in red"/>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
13 </inputs>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
14 <configfiles>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
15 <configfile name="script_file">
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
16 ## Setup R error handling to go to stderr
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
17 options( show.error.messages=F,
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
18 error = function () {
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
19 cat( geterrmessage(), file=stderr() ); q( "no", 1, F )
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
20 } )
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
21 files = c("${input_contigs}", "${input_scaffolds}")
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
22 types = c("Contigs", "Scaffolds")
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
23
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
24 ## Start plotting device
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
25 png("${out_file}", w=500, h=1000)
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
26 par(mfrow=c(2,1))
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
27
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
28 ## Loop over the two files
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
29 for (i in 1:length(types)){
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
30 seqs = read.table(files[i], header=FALSE, comment.char="#")
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
31 colnames = c("name", "length", "coverage")
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
32 names(seqs) = colnames
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
33
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
34 ## Stats over all sequences
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
35 sl_all = sort(seqs\$length, decreasing=TRUE)
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
36 cs_all = cumsum(sl_all)
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
37 s_all = sum(seqs\$length)
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
38 n50_idx_all = which.min(sl_all[cs_all &lt; 0.5*s_all])
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
39 n90_idx_all = which.min(sl_all[cs_all &lt; 0.9*s_all])
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
40 n50_all = sl_all[n50_idx_all]
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
41 n90_all = sl_all[n90_idx_all]
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
42
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
43 ## Filter short seqs, redo stats
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
44 seqs_filt = seqs[seqs\$length >= ${length_co} &amp; seqs\$coverage >= ${coverage_co},]
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
45 if (nrow(seqs_filt) > 0){
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
46 sl_filt = sort(seqs_filt\$length, decreasing=TRUE)
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
47 cs_filt = cumsum(sl_filt)
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
48 s_filt = sum(seqs_filt\$length)
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
49 n50_idx_filt = which.min(sl_filt[cs_filt &lt; 0.5*s_filt])
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
50 n90_idx_filt = which.min(sl_filt[cs_filt &lt; 0.9*s_filt])
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
51 n50_filt = sl_filt[n50_idx_filt]
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
52 n90_filt = sl_filt[n90_idx_filt]
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
53 }
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
54 seqs_bad = seqs[seqs\$length &lt; ${length_co} | seqs\$coverage &lt; ${coverage_co},]
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
55
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
56 ## Length vs coverage
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
57 plot(length~coverage, data=seqs, log="xy", type="n", main=paste(types[i], ": coverage vs. length", sep=""), xlab="Coverage", ylab="Length")
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
58 if (nrow(seqs_bad) > 0){
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
59 points(length~coverage, data=seqs_bad, cex=0.5, col="red")
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
60 }
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
61 if (nrow(seqs_filt) > 0){
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
62 points(length~coverage, data=seqs_filt, cex=0.5, col="black")
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
63 }
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
64 abline(v=${coverage_co}, h=${length_co}, lty=2, col=grey(0.3))
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
65 legend(x="topleft", legend=c("Before/after filtering", paste(c("N50: ", "N90: ", "Median cov.: "), c(n50_all, n90_all, round(median(seqs\$coverage))), rep("/", 3), c(n50_filt, n90_filt, round(median(seqs_filt\$coverage))), sep="")), cex=0.8)
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
66 }
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
67 dev.off()
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
68 </configfile>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
69 </configfiles>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
70 <outputs>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
71 <data format="png" name="out_file" />
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
72 </outputs>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
73 <help>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
74 **What it does**
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
75
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
76 Using the output of SPAdes (a pair of fasta file and stat file for each of the contigs and scaffolds), it produces a coverage vs. contig plot. Each dot represent a contig/scaffold. Given a coverage and a length cutoff, sequences that do not meet those criteria are shown in red. Some statistics are also given (N50, N90, median contig/scaffold length) both before and after filtering.
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
77
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
78 Use the "filter SPAdes output" tool to actually filter sequences.
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
79 </help>
b5ce24f34dd7 Uploaded
lionelguy
parents:
diff changeset
80 </tool>