comparison frameshift_deletions_checks.xml @ 2:e8971ca74398 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/smallgenomeutilities commit 2189ba7df6d90ed10c6fdb07df93545f8f282339
author iuc
date Fri, 04 Aug 2023 16:34:44 +0000
parents 029d90b0c4f6
children
comparison
equal deleted inserted replaced
1:029d90b0c4f6 2:e8971ca74398
5 </macros> 5 </macros>
6 <expand macro="xrefs"/> 6 <expand macro="xrefs"/>
7 <requirements> 7 <requirements>
8 <requirement type="package" version="@TOOL_VERSION@">smallgenomeutilities</requirement> 8 <requirement type="package" version="@TOOL_VERSION@">smallgenomeutilities</requirement>
9 </requirements> 9 </requirements>
10 <!-- once we have version-from-git-tag in 0.4.0: <version_command>frameshift_deletion_checks &#x002D&#x002Dversion</version_command> --> 10 <version_command>frameshift_deletions_checks --version</version_command>
11 <command detect_errors="exit_code"> 11 <command detect_errors="exit_code">
12 <![CDATA[ 12 <![CDATA[
13 #if $input.is_of_type("cram"): 13 #if $input.is_of_type("cram"):
14 ln -s '$input' input.cram && 14 ln -s '$input' input.cram &&
15 ln -s '$input.metadata.cram_index' input.cram.crai && 15 ln -s '$input.metadata.cram_index' input.cram.crai &&
28 28
29 frameshift_deletions_checks 29 frameshift_deletions_checks
30 --input=input.${input.ext} 30 --input=input.${input.ext}
31 --consensus=consensus.fasta 31 --consensus=consensus.fasta
32 --reference=reference.fa 32 --reference=reference.fa
33 #if str($align_data.choice) == 'chain':
34 --chain='$align_data.chain'
35 #end if
33 #if str($ref_data.choice) == 'standard': 36 #if str($ref_data.choice) == 'standard':
34 --genes='$__tool_directory__'/annotations_NC_045512.2.gff3 37 --genes='$__tool_directory__'/annotations_NC_045512.2.gff3
35 --orf1ab='cds-YP_009724389.1' 38 --orf1ab='cds-YP_009724389.1'
36 #else: 39 #else:
37 --genes='$ref_data.genes' 40 --genes='$ref_data.genes'
45 <!-- ##cores \${GALAXY_SLOTS:-4} --> 48 <!-- ##cores \${GALAXY_SLOTS:-4} -->
46 </command> 49 </command>
47 <inputs> 50 <inputs>
48 <param argument="--consensus" type="data" format="fasta" label="Consensus" help="Fasta file containing the sample's consensus sequence (majority, with indels)" /> 51 <param argument="--consensus" type="data" format="fasta" label="Consensus" help="Fasta file containing the sample's consensus sequence (majority, with indels)" />
49 <param argument="--input" type="data" format="bam,cram" label="Input BAM" help="Input BAM file with sample's sequencing reads, aligned against the reference" /> 52 <param argument="--input" type="data" format="bam,cram" label="Input BAM" help="Input BAM file with sample's sequencing reads, aligned against the reference" />
53 <!-- keep format="txt" in sync with what bcftools_consensus.xml uses. See: https://github.com/galaxyproject/tools-iuc/blob/main/tools/bcftools/bcftools_consensus.xml#L137 -->
54 <conditional name="align_data">
55 <param name="choice" type="select" label="Consensus-to-reference alignment" help="To find insertions and deletions, the tool needs information how the consensus aligns to the reference (lift-over). You can provide a .chain file describing how the consensus maps to the reference, otherwise mafft will be used to align the consensus to the reference.">
56 <option value="chain">Provide a .chain file</option>
57 <option value="mafft">Run MAFFT to obtain alignment</option>
58 </param>
59 <when value="chain">
60 <param argument="--chain" type="data" format="txt" optional="false" label="Chain file" help="Chain file describing how the consensus is aligned to the reference (e.g. ouput of `bcftools consensus --chain &#x2026;`)." />
61 </when>
62 <when value="mafft" />
63 </conditional>
50 <conditional name="ref_data"> 64 <conditional name="ref_data">
51 <param name="choice" type="select" label="Reference data selection" help="Select built-in genome files to base reported positions and annotations on the SARS-CoV-2 reference sequence NC_045512.2. If you have mapped to a different reference, select custom genome files and provide the reference sequence and genomic feature annotations for it in fasta and gff format, repsectively."> 65 <param name="choice" type="select" label="Reference data selection" help="Select built-in genome files to base reported positions and annotations on the SARS-CoV-2 reference sequence NC_045512.2. If you have mapped to a different reference, select custom genome files and provide the reference sequence and genomic feature annotations for it in fasta and gff format, repsectively.">
52 <option value="standard">Use built-in genome files</option> 66 <option value="standard">Use built-in genome files</option>
53 <option value="custom">Provide custom genome files</option> 67 <option value="custom">Provide custom genome files</option>
54 </param> 68 </param>
69 <outputs> 83 <outputs>
70 <data name="report" format="tabular"> 84 <data name="report" format="tabular">
71 <actions> 85 <actions>
72 <conditional name="out_options.english"> 86 <conditional name="out_options.english">
73 <!-- The "english" flag removes certain numerical columns and collapses them into new text columns --> 87 <!-- The "english" flag removes certain numerical columns and collapses them into new text columns -->
88 <!-- BUG the not-text columns *should* be identical (missing numerical columns will probably be fixed in a future release) -->
74 <when value="--english"> 89 <when value="--english">
75 <action name="column_names" type="metadata" default="ref_id,start_position,length,VARIANT,gene_region,reads_all,reads_fwd,reads_rev,deletions,freq_del,freq_del_fwd,freq_del_rev,deletions_fwd,deletions_rev,insertions,freq_insert,freq_insert_fwd,freq_insert_rev,insertions_fwd,insertions_rev,stops,freq_stop,freq_stop_fwd,freq_stop_rev,stops_fwd,stops_rev,matches_ref,pos_critical_inserts,pos_critical_dels,homopolymeric,ref_base,cons_id,variant_position_english,variant_diagnosis" /> 90 <action name="column_names" type="metadata" default="ref_id,start_position,length,VARIANT,gene_region,reads_all,reads_fwd,reads_rev,deletions,freq_del,freq_del_fwd,freq_del_rev,deletions_fwd,deletions_rev,insertions,freq_insert,freq_insert_fwd,freq_insert_rev,insertions_fwd,insertions_rev,stops,freq_stop,freq_stop_fwd,freq_stop_rev,stops_fwd,stops_rev,matches_ref,pos_critical_inserts,pos_critical_dels,homopolymeric,ref_base,cons_id,variant_position_english,variant_diagnosis" />
76 </when> 91 </when>
77 <when value="--no-english"> 92 <when value="--no-english">
78 <action name="column_names" type="metadata" default="ref_id,start_position,length,VARIANT,gene_region,aa_position,stop_mismatches,stoploss_nt,reads_all,reads_fwd,reads_rev,deletions,freq_del,freq_del_fwd,freq_del_rev,deletions_fwd,deletions_rev,insertions,freq_insert,freq_insert_fwd,freq_insert_rev,insertions_fwd,insertions_rev,stops,freq_stop,freq_stop_fwd,freq_stop_rev,stops_fwd,stops_rev,matches_ref,pos_critical_inserts,pos_critical_dels,homopolymeric,ref_base,cons_id" /> 93 <action name="column_names" type="metadata" default="ref_id,start_position,length,VARIANT,gene_region,aa_position,stop_mismatches,stoploss_nt,reads_all,reads_fwd,reads_rev,deletions,freq_del,freq_del_fwd,freq_del_rev,deletions_fwd,deletions_rev,insertions,freq_insert,freq_insert_fwd,freq_insert_rev,insertions_fwd,insertions_rev,stops,freq_stop,freq_stop_fwd,freq_stop_rev,stops_fwd,stops_rev,matches_ref,pos_critical_inserts,pos_critical_dels,homopolymeric,ref_base,cons_id" />
80 </conditional> 95 </conditional>
81 </actions> 96 </actions>
82 </data> 97 </data>
83 </outputs> 98 </outputs>
84 <tests> 99 <tests>
85 <test> 100 <!-- Test data:
86 <param name="consensus" value="consensus.bcftools.fasta.gz" /> 101 title: hCoV-19/Switzerland/GE-ETHZ-100339/2020
87 <param name="input" value="REF_aln_trim.cram" /> 102 internal id: 100339_62_H07/20201002_J9279
88 <conditional name="ref_data"> 103 ENA:
89 <param name="choice" value="standard" /> 104 sample: SAMEA8673857 ERS6358378
90 </conditional> 105 experiment: ERX11049759
91 <output name="report" value="frameshift_deletions_check.tsv" /> 106 run: ERR11647777
92 </test> 107 -->
93 <test> 108 <!-- redo alignment from scratch by running MAFFT -->
94 <param name="consensus" value="consensus.bcftools.fasta.gz" /> 109 <test>
95 <param name="input" value="REF_aln_trim.cram" /> 110 <param name="consensus" value="consensus.bcftools.fasta.gz" />
111 <param name="input" value="REF_aln_trim.cram" />
112 <conditional name="align_data">
113 <param name="choice" value="mafft" />
114 </conditional>
115 <conditional name="ref_data">
116 <param name="choice" value="standard" />
117 </conditional>
118 <output name="report" value="frameshift_deletions_check.tsv" />
119 <assert_command>
120 <not_has_text text="--chain" />
121 </assert_command>
122 <assert_stderr>
123 <has_text text="mafft" />
124 </assert_stderr>
125 </test>
126 <!-- using information in .chain -->
127 <test>
128 <param name="consensus" value="consensus.bcftools_nogap.fasta.gz" />
129 <param name="input" value="REF_aln_trim.cram" />
130 <conditional name="align_data">
131 <param name="choice" value="chain" />
132 <param name="chain" value="consensus.bcftools_nogap.chain" />
133 </conditional>
134 <conditional name="ref_data">
135 <param name="choice" value="standard" />
136 </conditional>
137 <output name="report" value="frameshift_deletions_check.tsv" />
138 <assert_command>
139 <has_text text="--chain" />
140 </assert_command>
141 <assert_stderr>
142 <not_has_text text="mafft" />
143 </assert_stderr>
144 </test>
145 <!-- using information in .chain and consensus marked with gaps (bcftools consensus mark-del '-') -->
146 <test>
147 <param name="consensus" value="consensus.bcftools.fasta.gz" />
148 <param name="input" value="REF_aln_trim.cram" />
149 <conditional name="align_data">
150 <param name="choice" value="chain" />
151 <param name="chain" value="consensus.bcftools.chain" />
152 </conditional>
153 <conditional name="ref_data">
154 <param name="choice" value="standard" />
155 </conditional>
156 <output name="report" value="frameshift_deletions_check.tsv" />
157 <assert_command>
158 <has_text text="--chain" />
159 </assert_command>
160 <assert_stderr>
161 <not_has_text text="mafft" />
162 </assert_stderr>
163 </test>
164 <test>
165 <param name="consensus" value="consensus.bcftools.fasta.gz" />
166 <param name="input" value="REF_aln_trim.cram" />
167 <conditional name="align_data">
168 <param name="choice" value="chain" />
169 <param name="chain" value="consensus.bcftools.chain" />
170 </conditional>
96 <conditional name="ref_data"> 171 <conditional name="ref_data">
97 <param name="choice" value="standard" /> 172 <param name="choice" value="standard" />
98 </conditional> 173 </conditional>
99 <section name="out_options"> 174 <section name="out_options">
100 <param name="english" value="false" /> 175 <param name="english" value="false" />
101 </section> 176 </section>
102 <output name="report" value="frameshift_deletions_check_no_english.tsv" /> 177 <output name="report" value="frameshift_deletions_check_no_english.tsv" />
103 </test> 178 <assert_command>
104 <test> 179 <has_text text="--chain" />
105 <param name="consensus" value="consensus.bcftools.fasta.gz" /> 180 </assert_command>
106 <param name="input" value="REF_aln_trim.cram" /> 181 <assert_stderr>
182 <not_has_text text="mafft" />
183 </assert_stderr>
184 </test>
185 <test>
186 <param name="consensus" value="consensus.bcftools.fasta.gz" />
187 <param name="input" value="REF_aln_trim.cram" />
188 <conditional name="align_data">
189 <param name="choice" value="chain" />
190 <param name="chain" value="consensus.bcftools.chain" />
191 </conditional>
107 <conditional name="ref_data"> 192 <conditional name="ref_data">
108 <param name="choice" value="custom" /> 193 <param name="choice" value="custom" />
109 <param name="reference" value="NC_045512.2.fasta" /> 194 <param name="reference" value="NC_045512.2.fasta" />
110 <param name="genes" value="Genes_NC_045512.2.GFF3" /> 195 <param name="genes" value="Genes_NC_045512.2.GFF3" />
111 </conditional> 196 </conditional>
112 <output name="report" value="frameshift_deletions_check.tsv" /> 197 <output name="report" value="frameshift_deletions_check.tsv" />
198 <assert_command>
199 <has_text text="--chain" />
200 </assert_command>
201 <assert_stderr>
202 <not_has_text text="mafft" />
203 </assert_stderr>
113 </test> 204 </test>
114 </tests> 205 </tests>
115 <help> 206 <help>
116 <![CDATA[ 207 <![CDATA[
117 Produces a report about frameshifting indels in a consensus sequences. 208 Produces a report about frameshifting indels in a consensus sequences.
118 209
119 Developed as part of the `V-pipe workflow for analysing NGS data of short viral genomes <https://github.com/cbg-ethz/V-pipe>`_. 210 The smallgenomeutilities are part of the `V-pipe workflow for analysing NGS data of short viral genomes <https://github.com/cbg-ethz/V-pipe>`_.
120 211
121 Columns signification: 212 Columns signification:
122 ---------------------- 213 ----------------------
123 214
215 * *ref_id* / *cons_id*: name of the sequence in the reference and consensus
216 * *start_position* / *length*: location of the variant
217 * *VARIANT*: one of: "insertion", "deletion", "stopgain" or "stoploss"
124 * *gene_region*: Gene in which the deletion is found according to ``--genes`` argument; 218 * *gene_region*: Gene in which the deletion is found according to ``--genes`` argument;
125 * *reads_all*: Total number of reads covering the indel; 219 * *reads_all*: Total number of reads covering the indel;
126 * *reads_fwd*: Total number of forward reads covering the indel; 220 * *reads_fwd*: Total number of forward reads covering the indel;
127 * *reads_rev*: Total number of reverse reads covering the indel; 221 * *reads_rev*: Total number of reverse reads covering the indel;
128 * *deletions/insertions*: Number of reads supporting the deletion/insertion; 222 * *deletions* / *insertions*: Number of reads supporting the deletion/insertion;
129 * *freq_del/freq_insert*: Fraction of reads supporting the deletion/insertion; 223 * *freq_del* / *freq_insert*: Fraction of reads supporting the deletion/insertion;
130 * *matches_ref*: number of reads that matche with the reference base; 224 * *matches_ref*: number of reads that matche with the reference base;
131 * *pos_critical_inserts*: Start positions of insertions in the same gene_region that occur in > 40% of reads; 225 * *pos_critical_inserts*: Start positions of insertions in the same gene_region that occur in > 40% of reads;
132 * *pos_critical_dels*: Start positions of deletions in the same gene_region that occur in > 40% of reads; 226 * *pos_critical_dels*: Start positions of deletions in the same gene_region that occur in > 40% of reads;
133 * *homopolymeric*: True if either around the start or end position of the deletion three bases are the same, which may have caused the polymerase to skip during reverse transcription of viral RNA to cDNA, e.g. AATAG; 227 * *homopolymeric*: True if either around the start or end position of the deletion three bases are the same, which may have caused the polymerase to skip during reverse transcription of viral RNA to cDNA, e.g. AATAG;
134 * *ref_base*: base in the reference genome; 228 * *ref_base*: base in the reference genome;
135 * *indel_position_english*: english sentence describing the indel; 229 * *variant_position_english*: english sentence describing the indel or stop;
136 * *indel_diagnosis*: english sentence with the indel diagnosis; 230 * *variant_diagnosis*: english sentence with the indel diagnosis
137 * *orf1ab*: CDS ID for the full Orf1ab CDS, comprising the ribosomal shift. In the GFF this CDS should consist of 2 entries with the same CDS ID due to the parital overlap caused by the ribosomal shift at translation time
138 ]]> 231 ]]>
139 </help> 232 </help>
140 <expand macro="citations"/> 233 <expand macro="citations"/>
141 </tool> 234 </tool>