Mercurial > repos > enios > nc_rna
comparison gffcompare.xml @ 18:11d232ed904c draft
Uploaded
author | enios |
---|---|
date | Sat, 22 May 2021 07:35:00 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:308d4a7877a7 | 18:11d232ed904c |
---|---|
1 <tool id="gffcompare" name="GffCompare"> | |
2 <description>compare assembled transcripts to a reference annotation</description> | |
3 <requirements> | |
4 <container type="docker">bianca7/lncrna:gffcompare</container> | |
5 </requirements> | |
6 <!--version_command>gffcompare -v | awk '{print $2}'</version_command--> | |
7 <command detect_errors="aggressive"><![CDATA[ | |
8 #import re | |
9 | |
10 #set escaped_element_identifiers = [re.sub('[^\w\-]', '_', str(_.element_identifier)) for _ in $gffinputs] | |
11 #for $input, $escaped_element_identifier in zip($gffinputs, $escaped_element_identifiers): | |
12 ln -s '$input' '$escaped_element_identifier' && | |
13 #end for | |
14 #if $seq_data.use_seq_data == "Yes": | |
15 #if $seq_data.seq_source.index_source == "history": | |
16 ln -s '$seq_data.seq_source.ref_file' ref_seq.fa && | |
17 #else: | |
18 ln -s '${seq_data.seq_source.index.fields.path}' ref_seq.fa && | |
19 #end if | |
20 #end if | |
21 | |
22 #if $annotation.use_ref_annotation == "Yes": | |
23 #if $annotation.ref_source.ref_source_sel == "history": | |
24 ln -s '$annotation.ref_source.reference_annotation' ref_annotation && | |
25 #else | |
26 ln -s '$annotation.ref_source.index.fields.path' ref_annotation && | |
27 #end if | |
28 #end if | |
29 | |
30 gffcompare | |
31 ## Use annotation reference? | |
32 #if $annotation.use_ref_annotation == "Yes": | |
33 -r ref_annotation | |
34 $annotation.ignore_nonoverlapping_reference | |
35 $annotation.ignore_nonoverlapping_transfrags | |
36 $annotation.strict_match | |
37 #end if | |
38 #if $annotation.refmap_tmap == "": | |
39 -T | |
40 #end if | |
41 | |
42 ## Use sequence data? | |
43 #if $seq_data.use_seq_data == "Yes": | |
44 -s ref_seq.fa | |
45 #end if | |
46 | |
47 $discard_single_exon | |
48 $discard_duplicates | |
49 $no_merge | |
50 -e $max_dist_exon | |
51 -d $max_dist_group | |
52 $chr_stats | |
53 -p '$adv_output.p' | |
54 $adv_output.A | |
55 $adv_output.C | |
56 $adv_output.X | |
57 $adv_output.K | |
58 | |
59 #for $escaped_element_identifier in $escaped_element_identifiers: | |
60 '$escaped_element_identifier' | |
61 #end for | |
62 | |
63 ]]></command> | |
64 <inputs> | |
65 <param format="gtf" name="gffinputs" type="data" label="GTF inputs for comparison" help="" multiple="true" /> | |
66 <conditional name="annotation"> | |
67 <param label="Use Reference Annotation" name="use_ref_annotation" type="select"> | |
68 <option value="No">No</option> | |
69 <option value="Yes">Yes</option> | |
70 </param> | |
71 <when value="Yes"> | |
72 <conditional name="ref_source"> | |
73 <param label="Choose the source for the reference annotation" name="ref_source_sel" type="select"> | |
74 <option value="cached">Locally cached</option> | |
75 <option value="history">History</option> | |
76 </param> | |
77 <when value="cached"> | |
78 <param argument="-r" label="Using reference annotation" name="index" type="select"> | |
79 <options from_data_table="gene_sets"> | |
80 <!--filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" /--> | |
81 </options> | |
82 <validator message="No reference annotation is available for the build associated with the selected input dataset" type="no_options" /> | |
83 </param> | |
84 </when> | |
85 <when value="history"> | |
86 <param argument="-r" format="gff3,gtf" help="Requires an annotation file in GFF3 or GTF format." label="Reference Annotation" name="reference_annotation" type="data" /> | |
87 </when> | |
88 </conditional> | |
89 <param argument="-R" falsevalue="" help="consider only the reference transcripts that overlap any of the input transfrags (Sn correction)" label="Ignore reference transcripts that are not overlapped by any input transfrags" name="ignore_nonoverlapping_reference" truevalue="-R" type="boolean" /> | |
90 <param argument="-Q" falsevalue="" help="consider only the input transcripts that overlap any of the reference transcripts (Sp correction). Warning: this will discard all 'novel' loci!" label="Ignore input transcripts that are not overlapped by any reference transcripts" name="ignore_nonoverlapping_transfrags" truevalue="-Q" type="boolean" /> | |
91 <param argument="--strict-match" name="strict_match" type="boolean" checked="false" truevalue="--strict-match" falsevalue="" label="the match code '=' is only assigned when all exon boundaries match" help="code '~' is assigned for intron chain match or single-exon" /> | |
92 <param argument="-T" name="refmap_tmap" label="Generate tmap or refmap file for each input file" type="select" multiple="True"> | |
93 <option value="refmap" selected="True">refmap</option> | |
94 <option value="tmap" selected="True">tmap</option> | |
95 </param> | |
96 </when> | |
97 <when value="No"> | |
98 <param argument="-T" name="refmap_tmap" label="Generate tmap file for each input file" type="select" multiple="True"> | |
99 <option value="tmap" selected="True">tmap</option> | |
100 </param> | |
101 </when> | |
102 </conditional> | |
103 <conditional name="seq_data"> | |
104 <param help="Use sequence data for some optional classification functions, including the addition of the p_id attribute required by Cuffdiff." label="Use Sequence Data" name="use_seq_data" type="select"> | |
105 <option value="No">No</option> | |
106 <option value="Yes">Yes</option> | |
107 </param> | |
108 <when value="No"/> | |
109 <when value="Yes"> | |
110 <conditional name="seq_source"> | |
111 <param label="Choose the source for the reference sequence" name="index_source" type="select"> | |
112 <option value="cached">Locally cached</option> | |
113 <option value="history">History</option> | |
114 </param> | |
115 <when value="cached"> | |
116 <param argument="-s" label="Using reference genome" name="index" type="select"> | |
117 <options from_data_table="fasta_indexes"> | |
118 <filter column="dbkey" key="dbkey" ref="gffinputs" type="data_meta" /> | |
119 </options> | |
120 <validator message="No reference genome is available for the build associated with the selected input dataset" type="no_options" /> | |
121 </param> | |
122 </when> | |
123 <when value="history"> | |
124 <param argument="-s" format="fasta" label="Using reference file" name="ref_file" type="data" /> | |
125 </when> | |
126 </conditional> | |
127 </when> | |
128 </conditional> | |
129 <param name="discard_single_exon" argument="-M/-N" type="select" label="Discard single-exon transcripts" help="If -S and also --strict-match is given, exact matching of all exon boundaries is required"> | |
130 <option selected="True" value="">No</option> | |
131 <option value="-M">Discard single-exon transfrags and reference transcripts</option> | |
132 <option value="-N">Discard single-exon reference transcripts</option> | |
133 </param> | |
134 <param label="Discard duplicates" name="discard_duplicates" type="select"> | |
135 <option value="">None</option> | |
136 <option value="-D">discard 'duplicate' query transfrags within a single sample (-D)</option> | |
137 <option value="-S">Only discard 'duplicate' query or reference transcripts if their boundaries are fully contained within other, larger or identical transfrags (-S)</option> | |
138 </param> | |
139 <param name="no_merge" argument="--no-merge" type="boolean" checked="false" truevalue="--no-merge" falsevalue="" label="Disable close-exon merging" help="Default: merge exons separated by 'introns' shorter than 5 bases" /> | |
140 <param argument="-e" help="max. distance (range) allowed from free ends of terminal exons of reference transcripts when assessing exon accuracy. Default: 100" label="Max. Distance for assessing exon accuracy" name="max_dist_exon" type="integer" value="100" /> | |
141 <param argument="-d" help="max. distance (range) for grouping transcript start sites. Default: 100" label="Max distance for transcript grouping" name="max_dist_group" type="integer" value="100" /> | |
142 <param name="chr_stats" argument="--chr-stats" type="boolean" checked="false" truevalue="--chr-stats" falsevalue="" label="Show summary and accuracy data separately for each reference sequence in the transcript accuracy data set" /> | |
143 <section name="adv_output" title="Options for the combined GTF output file"> | |
144 <param argument="-p" type="text" value="TCONS" label="name prefix for consensus transcripts" help="for combined.gtf" /> | |
145 <param argument="-C" type="boolean" checked="false" truevalue="-C" falsevalue="" label="discard matching and 'contained' transfrags" help="i.e. collapse intron-redundant transfrags across all query files" /> | |
146 <param argument="-A" type="boolean" checked="false" truevalue="-A" falsevalue="" label="discard the 'contained' transfrags except intron-redundant transfrags starting with a different 5' exon" help="like -C but does not discard intron-redundant transfrags if they start with a different 5' exon" /> | |
147 <param argument="-X" type="boolean" checked="false" truevalue="-X" falsevalue="" label="discard the 'contained' transfrags also if ends stick out within the container's introns" help="like -C but also discard contained transfrags if transfrag ends stick out within the container's introns" /> | |
148 <param argument="-K" type="boolean" checked="false" truevalue="-K" falsevalue="" label="do NOT discard any redundant transfrag matching a reference" help="for -C/-A/-X" /> | |
149 </section> | |
150 </inputs> | |
151 <outputs> | |
152 <data format="txt" from_work_dir="gffcmp.stats" label="${tool.name} on ${on_string}: transcript accuracy" name="transcripts_stats" /> | |
153 <data format="tabular" from_work_dir="gffcmp.loci" label="${tool.name} on ${on_string}: loci" name="transcripts_loci" /> | |
154 <data format="tabular" from_work_dir="gffcmp.tracking" label="${tool.name} on ${on_string}: data ${gffinputs[0].hid} tracking file" name="transcripts_tracking" /> | |
155 <data format="gtf" from_work_dir="gffcmp.combined.gtf" label="${tool.name} on ${on_string}: combined transcripts" name="transcripts_combined"> | |
156 <filter>(isinstance(gffinputs, list) and len(gffinputs) > 1) or annotation['use_ref_annotation'] == "No"</filter> | |
157 </data> | |
158 <data format="gtf" from_work_dir="gffcmp.annotated.gtf" label="${tool.name} on ${on_string}: annotated transcripts" name="transcripts_annotated"> | |
159 <filter>not (isinstance(gffinputs, list) and len(gffinputs) > 1) and annotation['use_ref_annotation'] == "Yes"</filter> | |
160 </data> | |
161 <collection name="refmap_output" type="list" label="${tool.name} on ${on_string}: refmap"> | |
162 <discover_datasets pattern="gffcmp\.(?P<designation>.+)\.refmap" ext="tabular" /> | |
163 <filter>annotation['refmap_tmap'] != None and 'refmap' in annotation['refmap_tmap']</filter> | |
164 </collection> | |
165 <collection name="tmap_output" type="list" label="${tool.name} on ${on_string}: tmap"> | |
166 <discover_datasets pattern="gffcmp\.(?P<designation>.+)\.tmap" ext="tabular" /> | |
167 <filter>annotation['refmap_tmap'] != None and 'tmap' in annotation['refmap_tmap']</filter> | |
168 </collection> | |
169 </outputs> | |
170 <citations> | |
171 <citation type="doi">10.1038/nbt.1621</citation> | |
172 </citations> | |
173 </tool> |