annotate rna_quast.xml @ 2:7e130d325fa7 draft

Uploaded
author lehmanju
date Tue, 13 Oct 2020 12:35:25 +0000
parents a0bd8ab14f66
children bf3dc4cae5bf
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
1 <tool id="rna_quast" name="rnaQUAST" version="@TOOL_VERSION@" python_template_version="3.7">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
2 <description>A Quality Assessment Tool for De Novo Transcriptome Assemblies</description>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
3 <macros>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
4 <token name="@TOOL_VERSION@">2.1.0</token>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
5 </macros>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
6 <requirements>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
7 <requirement type="package" version="@TOOL_VERSION@">rnaquast</requirement>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
8 </requirements>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
9 <stdio>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
10 <regex match="Traceback " source="both" level="fatal" description="rnaQuast failed" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
11 </stdio>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
12 <command detect_errors="exit_code"><![CDATA[
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
13 #import re
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
14 #for $i in $input
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
15 ln -s '$i' '${re.sub('[^\w\-.]', '_', i.element_identifier)}' &&
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
16 #end for
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
17 #if $r
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
18 #for $rf in $r
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
19 ln -s '$rf' '${re.sub('[^\w\-.]', '_', rf.element_identifier)}' &&
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
20 #end for
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
21 #end if
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
22 #if $gene_coordinates.use_gtf == "true"
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
23 #for $g in $gene_coordinates.gtf
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
24 ln -s '$g' '${re.sub('[^\w\-.]', '_', g.element_identifier)}' &&
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
25 #end for
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
26 #end if
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
27 mkdir outputdir &&
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
28 rnaQUAST.py
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
29 --threads \${GALAXY_SLOTS:-1}
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
30 --transcripts
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
31 #for $i in $input
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
32 '${re.sub('[^\w\-.]', '_', i.element_identifier)}'
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
33 #end for
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
34 $strand_specific
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
35 #if $r
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
36 -r
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
37 #for $rf in $r
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
38 '${re.sub('[^\w\-.]', '_', rf.element_identifier)}'
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
39 #end for
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
40 #end if
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
41 #if $gene_coordinates.use_gtf == "true"
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
42 --gtf
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
43 #for $g in $gene_coordinates.gtf
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
44 '${re.sub('[^\w\-.]', '_', g.element_identifier)}'
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
45 #end for
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
46 $gene_coordinates.disable_infer_genes
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
47 $gene_coordinates.disable_infer_transcripts
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
48 #end if
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
49 $prokaryote
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
50 --min_alignment '$min_alignment'
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
51 #if "pdf" not in $out_sr and "plots" not in $out_add
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
52 --no_plots
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
53 #end if
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
54 $blat
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
55 $busco_lineage
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
56 $gene_mark
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
57 --lower_threshold $lower_threshold
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
58 --upper_threshold $upper_threshold
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
59 -o outputdir
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
60 && mkdir details
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
61 #for $i in $input
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
62 #set basename = os.path.splitext(re.sub('[^\w\-.]', '_', $i.element_identifier))[0]
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
63 &&
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
64 (for f in \$(find 'outputdir/'$basename'_output' -type f);
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
65 do
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
66 d=\$(dirname \$f | cut -d"/" -f2 | cut -d'_' -f1) &&
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
67 mv \$f details/"\$d"_____"\$(basename \$f)";
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
68 done)
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
69 #end for
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
70 ## rename .list files to .txt files to make them detectable (format detection by extension)
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
71 ## the final `true` seems needed since otherwise the `;` at the end is swallowed
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
72 && find details/ -name "*.list" -exec mv {} {}.txt \;
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
73 && true
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
74 ]]></command>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
75 <inputs>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
76 <param name="input" type="data" format="fasta" multiple="true" label="Chromosomes/scaffolds file"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
77 <param name="strand_specific" argument="-ss" type="boolean" truevalue="-ss" falsevalue="" checked="false" label="Strand-specific"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
78 <param name="r" optional="true" argument="-r" type="data" format="fasta" multiple="true" label="Reference genome" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
79 <conditional name="gene_coordinates">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
80 <param name="use_gtf" type="select" label="Use file with gene coordinates in GTF/GFF format?" help="We recommend to use files downloaded from GENCODE or Ensembl.">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
81 <option value="true" selected="true">Yes</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
82 <option value="false">No</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
83 </param>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
84 <when value="true">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
85 <param name="gtf" argument="--gtf" type="data" format="gtf, gff, gff3" multiple="true" label="GTF/GFF file"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
86 <param argument="--disable_infer_genes" type="boolean" truevalue="--disable_infer_genes" falsevalue="" checked="false" label=" GTF file contains genes records?"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
87 <param argument="--disable_infer_transcripts" type="boolean" truevalue="--disable_infer_transcripts" falsevalue="" checked="false" label="GTF file contains transcripts records?"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
88 </when>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
89 <when value="false">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
90 </when>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
91 </conditional>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
92 <param argument="--prokaryote" type="boolean" truevalue="--prokaryote" falsevalue="" checked="false" label="Is genome prokararyotic?"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
93 <param argument="--min_alignment" type="integer" value="50" label="Minimal alignment length to be used"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
94 <param argument="--blat" type="boolean" truevalue="--blat" falsevalue="" checked="false" label="Run with BLAT alignment tool instead of GMAP?" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
95 <param argument="--busco_lineage" type="boolean" truevalue="--busco_lineage" falsevalue="" checked="false" label="Run BUSCO tool?" help="The BUSCO tool detects core genes in the assembly. Use this option to provide path to the BUSCO lineage data (Eukaryota, Metazoa, Arthropoda, Vertebrata or Fungi)."/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
96 <param argument="--gene_mark" type="boolean" truevalue="--gene_mark" falsevalue="" checked="false" label="Run with GeneMarkS-T gene prediction tool?"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
97 <param argument="--lower_threshold" type="integer" value="50" label="Lower threshold for x_assembled/covered/matched metrics."/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
98 <param argument="--upper_threshold" type="integer" value="95" label="Upper threshold for x_assembled/covered/matched metrics."/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
99 <param name="out_sr" type="select" multiple="true" label="Short report formats">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
100 <option value="tsv" selected="true">tabular</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
101 <option value="txt">txt</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
102 <option value="tex">tex</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
103 <option value="pdf" selected="true">pdf</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
104 </param>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
105 <param name="out_add" type="select" multiple="true" label="Additional outputs">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
106 <option value="logs">Logs</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
107 <option value="plots" selected="true">Plots (only for n>1)</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
108 <option value="comparison" selected="true">Comparison for Chromosomes/scaffolds files (only for n>1)</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
109 <option value="details" selected="true">Details per Chromosomes/scaffolds file</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
110 <option value="details_plots" selected="true">Details per Chromosomes/scaffolds file as plot</option>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
111 </param>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
112 </inputs>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
113
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
114 <outputs>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
115 <data name="short_report_pdf" format="pdf" label="${tool.name} on ${on_string}: pdf report" from_work_dir="outputdir/short_report.pdf">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
116 <filter>"pdf" in out_sr</filter>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
117 </data>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
118 <data name="short_report_txt" format="txt" label="${tool.name} on ${on_string}: txt report" from_work_dir="outputdir/short_report.txt">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
119 <filter>"txt" in out_sr</filter>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
120 </data>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
121 <data name="short_report_tex" format="txt" label="${tool.name} on ${on_string}: tex report" from_work_dir="outputdir/short_report.tex">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
122 <filter>"tex" in out_sr</filter>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
123 </data>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
124 <data name="short_report_tsv" format="tabular" label="${tool.name} on ${on_string}: tsv report" from_work_dir="outputdir/short_report.tsv">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
125 <filter>"tsv" in out_sr</filter>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
126 </data>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
127 <collection name="list_logs" type="list" label="${tool.name} on ${on_string}: logs" >
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
128 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.log" directory="outputdir/logs/" visible="false" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
129 <filter>"logs" in out_add</filter>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
130 </collection>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
131 <collection name="list_comparison_png" type="list" label="${tool.name} on ${on_string}: comparison plots" >
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
132 <discover_datasets ext="png" pattern="(?P&lt;name&gt;.+)\.png" directory="outputdir/comparison_output/" visible="false" recurse="true"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
133 <filter> len(input)>1 and "plots" in out_add </filter>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
134 </collection>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
135 <collection name="list_comparison" type="list" label="${tool.name} on ${on_string}: comparison" >
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
136 <discover_datasets ext="txt" pattern="(?P&lt;name&gt;.+)\.txt" directory="outputdir/comparison_output/" visible="false" recurse="true" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
137 <filter> len(input)>1 and "comparison" in out_add</filter>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
138 </collection>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
139 <collection name="data_collection" type="list:list" label="${tool.name} on ${on_string}: detailed output">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
140 <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;txt)" directory="details/" visible="false"/>
2
7e130d325fa7 Uploaded
lehmanju
parents: 0
diff changeset
141 <filter>"details" in out_add</filter>
0
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
142 </collection>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
143 <collection name="data_collection_png" type="list:list" label="${tool.name} on ${on_string}: detailed output plots">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
144 <discover_datasets pattern="(?P&lt;identifier_0&gt;.+)_____(?P&lt;identifier_1&gt;.+)\.(?P&lt;ext&gt;png)" directory="details/" visible="false"/>
2
7e130d325fa7 Uploaded
lehmanju
parents: 0
diff changeset
145 <filter>"details_plots" in out_add</filter>
0
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
146 </collection>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
147 </outputs>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
148 <tests>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
149 <test expect_num_outputs="7">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
150 <param name="input" value="idba.fasta,Trinity.fasta" ftype="fasta" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
151 <param name="r" value="Saccharomyces_cerevisiae.R64-1-1.75.dna.toplevel.fa" ftype="fasta" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
152 <conditional name="gene_coordinates">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
153 <param name="use_gtf" value="true" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
154 <param name="gtf" value="Saccharomyces_cerevisiae.R64-1-1.75.gtf" ftype="gtf" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
155 <param name="disable_infer_genes" value="true"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
156 <param name="disable_infer_transcripts" value="true"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
157 </conditional>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
158 <param name="out_sr" value="txt,tex,tsv" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
159 <param name="out_add" value="logs,comparison,plots,details" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
160 <output name="short_report_txt">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
161 <assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
162 <has_text text="SHORT SUMMARY REPORT"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
163 </assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
164 </output>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
165 <output name="short_report_tex">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
166 <assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
167 <has_text text="Short summary report"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
168 <has_text text="end{document}"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
169 </assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
170 </output>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
171 <output name="short_report_tsv">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
172 <assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
173 <has_line_matching expression="^METRICS/TRANSCRIPTS\tidba\tTrinity$"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
174 </assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
175 </output>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
176 <output_collection name="list_comparison_png" type="list" count="15"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
177 <output_collection name="list_comparison" type="list" count="19"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
178 <output_collection name="list_logs" type="list" count="8"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
179 <output_collection name="data_collection" type="list:list" count="2">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
180 <element name="Trinity">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
181 <element name="alignment_metrics">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
182 <assert_contents><has_line_matching expression="^METRICS/TRANSCRIPTS\s+Trinity\s+$"/></assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
183 </element>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
184 <element name="alignment_multiplicity"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
185 <element name="alignments_per_isoform"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
186 <element name="basic_metrics"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
187 <element name="block_length"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
188 <element name="blocks_per_alignment"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
189 <element name="database_metrics"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
190 <element name="misassemblies"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
191 <element name="mismatch_rate"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
192 <element name="sensitivity"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
193 <element name="specificity"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
194 <element name="transcript_length"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
195 <element name="x-aligned"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
196 <element name="x-assembled_exons"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
197 <element name="x-assembled"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
198 <element name="x-covered_exons"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
199 <element name="x-covered"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
200 <element name="x-matched_blocks"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
201 <element name="x-matched"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
202 </element>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
203 <element name="idba">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
204 <element name="alignment_metrics">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
205 <assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
206 <has_line_matching expression="^METRICS/TRANSCRIPTS\s+idba\s+$"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
207 </assert_contents>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
208 </element>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
209 </element>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
210 </output_collection>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
211 </test>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
212 <test expect_num_outputs="8">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
213 <param name="input" value="spades.311.fasta" ftype="fasta" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
214 <conditional name="gene_coordinates">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
215 <param name="use_gtf" value="false" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
216 </conditional>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
217 <param name="min_alignment" value="30" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
218 <param name="lower_threshold" value="45" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
219 <param name="upper_threshold" value="95"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
220 <param name="out_sr" value="txt,tex,tsv,pdf" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
221 <param name="out_add" value="logs" />
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
222 <output name="short_report_pdf" file="short_report.pdf" compare="sim_size"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
223 <output name="short_report_txt" file="short_report.txt" compare="sim_size"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
224 <output name="short_report_tex" file="short_report.tex" compare="sim_size"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
225 <output name="short_report_tsv" file="short_report.tsv" compare="sim_size"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
226 <output_collection name="list_logs" type="list">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
227 <element name="rnaQUAST" file="rnaQUAST"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
228 <element name="spades.311.GeneMarkS_T.err" file="spades.311.GeneMarkS_T.err"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
229 </output_collection>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
230 <output_collection name="data_collection" type="list:list">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
231 <element name="spades.311">
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
232 <element name="alignment_metrics" file="spades.311/alignment_metrics.txt"/>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
233 </element>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
234 </output_collection>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
235 </test>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
236 </tests>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
237 <help><![CDATA[
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
238 **What it does**
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
239 rnaQUAST: a quality assessment tool for de novo transcriptome assemblies
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
240 rnaQUAST—a tool for evaluating RNA-Seq assembly quality and benchmarking transcriptome assemblers using reference genome and gene database. rnaQUAST calculates various metrics that demonstrate completeness and correctness levels of the assembled transcripts, and outputs them in a user-friendly report.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
241
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
242 **Using rnaQuast without reference** you wont get:
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
243 - x_assebled PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
244 - x_assembled Exons PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
245 - Alignments per Isoform PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
246 - x_covered PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
247 - x_covered Exons PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
248 - x_matched PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
249 - x_matched PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
250 - x_matched Blocks PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
251 - gmap build out log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
252 - gmap build err log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
253
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
254 **Using rnaQuast with reference** you will get:
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
255 - PDF report
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
256 - TXT report
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
257 - TSV report
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
258 - Log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
259 - Alignement Metrics
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
260 - Basic Metrics
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
261 - Misassemblies
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
262 - Specificity
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
263 - Sensitivity
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
264 - Alignment multiplicity
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
265 - Block lentgh
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
266 - Blocks per alignment
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
267 - Mismatch rate
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
268 - Transcript length
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
269 - x_aligned
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
270 - Transcript Length PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
271 - Nx PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
272 - Block length PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
273 - Blocks per alignment PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
274 - gmap build out log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
275 - gmap build err log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
276
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
277 **Using rnaQuast without gene coordinates** you wont get:
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
278 - x_assebled PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
279 - x_assembled Exons PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
280 - Alignments per Isoform PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
281 - x_covered PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
282 - x_covered Exons PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
283 - x_matched PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
284 - x_matched PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
285 - x_matched Blocks PNG & Txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
286 - gmap build out log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
287 - gmap build err log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
288 - Database Metrics
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
289 - Alignment multiplicity PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
290 - Mismatch rate PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
291 - NAx PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
292 - x_aligned PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
293 **Using rnaQuast with gene coordinates** you will get:
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
294 - PDF report
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
295 - TXT report
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
296 - TSV report
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
297 - Log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
298 - Alignement Metrics
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
299 - Basic Metrics
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
300 - Misassemblies
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
301 - Specificity
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
302 - Sensitivity
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
303 - Alignment multiplicity
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
304 - Block lentgh
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
305 - Blocks per alignment
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
306 - Mismatch rate
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
307 - Transcript length
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
308 - x_aligned
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
309 - Transcript Length PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
310 - Nx PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
311 - Block length PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
312 - Blocks per alignment PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
313 - gmap build out log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
314 - gmap build err log
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
315 - Database Metrics
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
316 - Alignment multiplicity PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
317 - Mismatch rate PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
318 - NAx PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
319 - x_aligned PNG
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
320 **Using rnaQuast without drawing plots** you wont get any PNG's and txt-files of these + the PDF report
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
321 *Output*
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
322 **Reports**
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
323 The following text files with reports are contained in comparison_output directory and include results for all input assemblies. In addition, these reports are contained in <assembly_label>_output directories for each assembly separately.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
324 database_metrics.txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
325 Gene database metrics.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
326 - Genes / Protein coding genes – number of genes / protein coding genes
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
327 - Isoforms / Protein coding isoforms – number of isoforms / protein coding isoforms
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
328 - Exons / Introns – total number of exons / introns
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
329 - Total / Average length of all isoforms, bp
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
330 - Average exon length, bp
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
331 - Average intron length, bp
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
332 - Average / Maximum number of exons per isoform
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
333 Database coverage – the total number of bases covered by reads (in all isoforms) divided by the total length of all isoforms.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
334 x%-covered genes / isoforms / exons – number of genes / isoforms / exons from the database that have at least x% of bases covered by all reads, where x is specified with lower_threshold /upper_threshold options (50% / 95% by default).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
335 basic_mertics.txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
336 Basic transcripts metrics are calculated without reference genome and gene database.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
337 - Transcripts – total number of assembled transcripts.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
338 - Transcripts > 500 bp
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
339 - Transcripts > 1000 bp
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
340 - Average length of assembled transcripts
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
341 - Longest transcript
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
342 - Total length
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
343 - Transcript N50 – a maximal number N, such that the total length of all transcripts longer than N bp is at least 50% of the total length of all transcripts.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
344 alignment_metrics.txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
345 Alignment metrics are calculated with reference genome but without using gene database. To calculate the following metrics rnaQUAST filters all short partial alignments (see min_alignment option) and attempts to select the best hits for each transcript.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
346 - Transcripts – total number of assembled transcripts.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
347 - Aligned – the number of transcripts having at least 1 significant alignment.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
348 - Uniquely aligned – the number of transcripts having a single significant alignment.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
349 - Multiply aligned – the number of transcripts having 2 or more significant alignments. Multiply aligned transcripts are stored in <assembly_label>.paralogs.fasta file.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
350 - Misassembly candidates reported by GMAP (or BLAT) – transcripts that have discordant best-scored alignment (partial alignments that are either mapped to different strands / different chromosomes / in reverse order / too far away).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
351 - Unaligned – the number of transcripts without any significant alignments. Unaligned transcripts are stored in <assembly_label>.unaligned.fasta file.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
352 Number of assembled transcripts = Unaligned + Aligned = Unaligned + (Uniquely aligned + Multiply aligned + Misassembly candidates reported by GMAP (or BLAT)).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
353 Alignment metrics for non-misassembled transcripts
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
354 - Average aligned fraction. Aligned fraction for a single transcript is defined as total number of aligned bases in the transcript divided by the total transcript length.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
355 - Average alignment length. Aligned length for a single transcript is defined as total number of aligned bases in the transcript.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
356 - Average blocks per alignment. A block is defined as a continuous alignment fragment without indels.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
357 - Average block length (see above).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
358 - Average mismatches per transcript – average number of single nucleotide differences with reference genome per transcript.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
359 - NA50 – N50 for alignments.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
360 misassemblies.txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
361 - Transcripts – total number of assembled transcripts.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
362 - Misassembly candidates reported by GMAP (or BLAT) – transcripts that have discordant best-scored alignment (partial alignments that are either mapped to different strands / different chromosomes / in reverse order / too far away).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
363 - Misassembly candidates reported by BLASTN – transcripts are aligned to the isoform sequences extracted from the genome using gene database with BLASTN and then transcripts that have partial alignments to multiple isoforms are selected.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
364 - Misassemblies – misassembly candidates confirmed by both methods described above. Using both methods simultaneously allows to avoid considering misalignments that can be caused, for example, by paralogous genes or genomic repeats. Misassembled transcripts are stored in <assembly_label>.misassembled.fasta file.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
365 sensitivity.txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
366 Assembly completeness (sensitivity). For the following metrics (calculated with reference genome and gene database) rnaQUAST attempts to select best-matching database isoforms for every transcript. Note that a single transcript can contribute to multiple isoforms in the case of, for example, paralogous genes or genomic repeats. At the same time, an isoform can be covered by multiple transcripts in the case of fragmented assembly or duplicated transcripts in the assembly.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
367 - Database coverage – the total number of bases covered by transcripts (in all isoforms) divided by the total length of all isoforms.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
368 - Duplication ratio – total number of aligned bases in assembled transcripts divided by the total number of isoform covered bases. This metric does not count neither paralogous genes nor shared exons, only real overlaps of the assembled sequences that are mapped to the same isoform.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
369 - Average number of transcripts mapped to one isoform.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
370 - x%-assembled genes / isoforms/ exons – number of genes / isoforms / exons from the database that have at least x% captured by a single assembled transcript, where x is specified with lower_threshold / upper_threshold options (50% / 95% by default). 95%-assembled isoforms are stored in <assembly_label>.95%assembled.fasta file.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
371 - x%-covered genes / isoforms– number of genes / isoforms from the database that have at least x% of bases covered by all alignments, where x is specified with lower_threshold / upper_threshold options (50% / 95% by default).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
372 - Mean isoform assembly – assembled fraction of a single isoform is calculated as the largest number of its bases captured by a single assembled transcript divided by its length; average value is computed for isoforms with > 0 bases covered.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
373 - Mean isoform coverage – coverage of a single isoform is calculated as the number of its bases covered by all assembled transcripts divided by its length; average value is computed for isoforms with > 0 bases covered.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
374 - Mean exon coverage – coverage of a single exon is calculated as the number of its bases covered by all assembled transcripts divided by its length; average value is computed for exons with > 0 bases covered.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
375 - Average percentage of isoform x%-covered exons, where x is specified with lower_threshold / upper_threshold options (50% / 95% by default). For each isoform rnaQUAST calculates the number of x%-covered exons divided by the total number of exons. Afterwards it computes average value for all covered isoforms.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
376 BUSCO metrics. The following metrics are calculated only when busco_lineage option is used (see options for details).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
377 - Complete – percentage of completely recovered genes.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
378 - Partial – percentage of partially recovered genes.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
379 GeneMarkS-T metrics. The following metrics are calculated when reference and gene database are not provided or gene_mark option is used (see options for details).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
380 - Genes – number of predicted genes in transcripts.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
381 specificity.txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
382 Assembly specificity. To compute the following metrics we use only transcripts that have at least one significant alignment and are not misassembled.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
383 - Unannotated – total number of transcripts that do not cover any isoform from the database. Unannotated transcripts are stored in <assembly_label>.unannotated.fasta file.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
384 - x%-matched – total number of transcripts that have at least x% covering an isoform from the database, where x is specified with lower_threshold / upper_threshold options (50% / 95% by default).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
385 - Mean fraction of transcript matched – matched fraction of a single transcript is calculated as the number of its bases covering an isoform divided by the transcript length; average value is computed for transcripts with > 0 bases matched.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
386 - Mean fraction of block matched – matched fraction of a single block is calculated as the number of its bases covering an isoform divided by the block length; average value is computed for blocks with > 0 bases matched.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
387 - x%-matched blocks – percentage of blocks that have at least x% covering an isoform from the database, where x is specified with lower_threshold / upper_threshold options (50% / 95% by default).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
388 - Matched length – total number of transcript bases covering isoforms from the database.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
389 - Unmatched length – total alignment length - Matched length.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
390 relative_database_coverage.txt
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
391 Relative database coverage metrics are calculated only when raw reads (or read alignments) are provided. rnaQUAST uses read alignments to estimate the upper bound of the database coverage and the number of x-covered genes / isoforms / exons (see read coverage) and computes the following metrics:
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
392 - Relative database coverage – ratio between transcripts database coverage and reads database coverage.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
393 - Relative x%-assembled genes / isoforms / exons – ratio between transcripts x%-assembled and reads x%-covered genes / isoforms / exons.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
394 - Relative x%-covered genes / isoforms / exons – ratio between transcripts x%-covered and reads x%-covered genes / isoforms / exons.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
395 **Detailed output**
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
396 These files are contained in <assembly_label>_output directories for each assembly separately.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
397 - <assembly_label>.unaligned.fasta – transcripts without any significant alignments.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
398 - <assembly_label>.paralogs.fasta – transcripts having 2 or more significant alignments.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
399 - <assembly_label>.misassembled.fasta – misassembly candidates detected by methods described above. See misassemblies.txt description for details.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
400 - <assembly_label>.correct.fasta – transcripts with exactly 1 significant alignment that do not contain misassemblies.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
401 - <assembly_label>.x%-assembled.list – IDs of the isoforms from the database that have at least x% captured by a single assembled transcript, where x is specified by the user with an option upper_threshold (95% by default).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
402 - <assembly_label>.unannotated.fasta – transcripts that do not cover any isoform from the database.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
403 The following text file is contained in comparison_output directory and <assembly_label>_output directories for each assembly separately.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
404 reads.x%-covered.list – IDs of the isoforms from the database that have at least x% bases covered by all reads, where x is specified with lower_threshold / upper_threshold options (50% / 95% by default).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
405 **Plots**
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
406 The following plots are similarly contained in both comparison_output directory and <assembly_label>_output directories. Please note, that most of the plots represent cumulative distributions and some plots are given in logarithmic scale.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
407 Basic
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
408 - transcript_length.png – assembled transcripts length distribution (+ database isoforms length distribution).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
409 - block_length.png – alignment blocks length distribution (+ database exons length distribution).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
410 - x-aligned.png – transcript aligned fraction distribution.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
411 - blocks_per_alignment.png – distribution of number of blocks per alignment (+ distribution of number of database exons per isoform).
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
412 - alignment_multiplicity.png – distribution for the number of significant alignment for each multiply-aligned transcript.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
413 - mismatch_rate.png – substitution errors per alignment distribution.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
414 - Nx.png – Nx plot for transcripts. Nx is a maximal number N, such that the total length of all transcripts longer than N bp is at least x% of the total length of all transcripts.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
415 - NAx.png – Nx plot for alignments.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
416 **Sensitivity**
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
417 - x-assembled.png – a histogram in which each bar represents the number of isoforms from the database that have at least x% captured by a single assembled transcript.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
418 - x-covered.png – a histogram in which each bar represents the number of isoforms from the database that have at least x% of bases covered by all alignments.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
419 - x-assembled_exons.png – a histogram in which each bar represents the number of exons from the database that have at least x% captured by a single assembled transcript.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
420 - x-covered_exons.png – a histogram in which each bar represents the number of exons from the database that have at least x% of bases covered by all alignments.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
421 - alignments_per_isoform.png – plot showing number of transcript alignments per isoform
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
422 **Specificity**
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
423 - x-matched.png – a histogram in which each bar represents the number of transcripts that have at least x% matched to an isoform from the database.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
424 - x-matched_blocks.png – a histogram in which each bar represents the number of all blocks from all transcript alignments that have at least x% matched to an isoform from the database.
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
425 ]]></help>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
426 <citations>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
427 <citation type="doi">10.1093/bioinformatics/btw218 </citation>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
428 </citations>
a0bd8ab14f66 Uploaded
lehmanju
parents:
diff changeset
429 </tool>