Mercurial > repos > iuc > smgu_frameshift_deletions_checks

diff frameshift_deletions_checks.xml @ 1:029d90b0c4f6 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/smallgenomeutilities commit e702dcdbc7c3235ef3c4ee8998c7247d1af49465
author: iuc
date: Fri, 14 Jul 2023 22:07:22 +0000
parents: f079716f598c
children: e8971ca74398
--- a/frameshift_deletions_checks.xml	Wed May 31 17:10:11 2023 +0000
+++ b/frameshift_deletions_checks.xml	Fri Jul 14 22:07:22 2023 +0000
@@ -11,52 +11,104 @@
     <command detect_errors="exit_code">
     <![CDATA[
 #if $input.is_of_type("cram"):
-echo 'is CRAM' >&2 &&
- ln -vs '$input' 'input.${input.ext}' >&2 &&
- ln -vs '$input.metadata.cram_index' 'input.${input.ext}.crai' >&2 &&
+ln -s '$input' input.cram &&
+ln -s '$input.metadata.cram_index' input.cram.crai &&
 #elif $input.is_of_type("bam"):
- echo 'is BAM' &&
- ln -s '$input' 'input.${input.ext}' >&2 &&
- ln -s '$input.metadata.bam_index' 'input.${input.ext}.bai' >&2 &&
+ln -s '$input' input.bam &&
+ln -s '$input.metadata.bam_index' input.bam.bai &&
 #else:
 # raise TypeError('Unknown input alignment type ${input.ext}')
 #end if
-ln -vs '$consensus' 'consensus.${consensus.ext}' >&2 &&
-ln -vs '$reference' 'reference.${reference.ext}' >&2 &&
+ln -s '$consensus' consensus.fasta &&
+#if str($ref_data.choice) == 'custom':
+ln -s '$reference' reference.fa &&
+#else:
+ln -s '$__tool_directory__/ref_NC_045512.2.fasta' reference.fa &&
+#end if
 
 frameshift_deletions_checks 
-    --input='input.${input.ext}'
-    --consensus='consensus.${consensus.ext}'
-    --reference='reference.${reference.ext}'
-    --genes='$genes'
-    --output='$report' 
-#if $orf1ab != '':
-    --orf1ab='$orf1ab'
+    --input=input.${input.ext}
+    --consensus=consensus.fasta
+    --reference=reference.fa
+#if str($ref_data.choice) == 'standard':
+    --genes='$__tool_directory__'/annotations_NC_045512.2.gff3
+    --orf1ab='cds-YP_009724389.1'
+#else:
+    --genes='$ref_data.genes'
+    --orf1ab='$ref_data.orf1ab'
 #end if
-    '$english'
-    $zero_based
+    $out_options.english
+    $out_options.zero_based
+    --output=report.tsv &&
+python '$__tool_directory__/frameshift_deletions_report_fixer.py' report.tsv '$report'
 ]]>
     <!-- ##cores \${GALAXY_SLOTS:-4} -->
     </command>
     <inputs>
         <param argument="--consensus" type="data" format="fasta" label="Consensus" help="Fasta file containing the sample's consensus sequence (majority, with indels)" />
         <param argument="--input" type="data" format="bam,cram" label="Input BAM" help="Input BAM file with sample's sequencing reads, aligned against the reference" />
-        <param argument="--reference" type="data" format="fasta" label="Reference" help="Fasta file containing the reference sequence (used during alignment) to compare against" />
-        <param argument="--genes" type="data" format="gff" label="Genes GFF" help="GFF file listing genes positions on the reference sequence" />
-        
-        <param argument="--english" type="boolean" truevalue="--english" falsevalue="--no-english" checked="true" optional="true" label="Write Summary Diagnosis?" help="If checked writes english summary diagnosis." />
-        <param argument="--zero-based" type="boolean" truevalue="--zero-based" falsevalue="" checked="false" optional="true" label="Use 0-based Sequence Positions?" help="Use 0-based (python) instead of 1-based (standard) seq positions" />
-        <param argument="--orf1ab" type="text" value="cds-YP_009724389.1" optional="true" label="ORF1AB" help="CDS ID for the full Orf1ab CDS, comprising the ribosomal shift. In the GFF this CDS should consist of 2 entries with the same CDS ID due to the partial overlap caused by the ribosomal shift at translation time."/>
+        <conditional name="ref_data">
+            <param name="choice" type="select" label="Reference data selection" help="Select built-in genome files to base reported positions and annotations on the SARS-CoV-2 reference sequence NC_045512.2. If you have mapped to a different reference, select custom genome files and provide the reference sequence and genomic feature annotations for it in fasta and gff format, repsectively.">
+                <option value="standard">Use built-in genome files</option>
+                <option value="custom">Provide custom genome files</option>
+            </param>
+            <when value="standard" />
+            <when value="custom">
+                <param argument="--reference" type="data" format="fasta" label="Reference" help="Fasta input containing the reference sequence (used during alignment) to compare against" />
+                <param argument="--genes" type="data" format="gff" label="Genes GFF" help="GFF input listing genes positions on the reference sequence" />
+                <param argument="--orf1ab" type="text" value="cds-YP_009724389.1" optional="false" label="Coding sequence containing ribosomal slippage site" help="ID of the full CDS comprising the ribosomal slippage site as it appears in the GFF input. In the GFF this CDS should consist of 2 entries with the same CDS ID due to the partial overlap caused by the ribosomal shift at translation time. Hint: To use the tool with genomes that do not have a ribosomal slippage site, enter an ID that is not present in the GFF.">
+                    <validator type="expression">value.strip()</validator>
+                </param>
+            </when>
+        </conditional>
+        <section name="out_options" title="Output format options" expanded="true">
+            <param argument="--english" type="boolean" truevalue="--english" falsevalue="--no-english" checked="true" label="Write Summary Diagnosis?" help="If checked writes english summary diagnosis." />
+            <param argument="--zero-based" type="boolean" truevalue="--zero-based" falsevalue="" checked="false" optional="true" label="Use 0-based Sequence Positions?" help="Use 0-based instead of 1-based genome positions" />
+        </section>
     </inputs>
     <outputs>
-        <data name="report" format="tabular" />
+        <data name="report" format="tabular">
+            <actions>
+                <conditional name="out_options.english">
+                    <!-- The "english" flag removes certain numerical columns and collapses them into new text columns -->
+                    <when value="--english">
+                        <action name="column_names" type="metadata" default="ref_id,start_position,length,VARIANT,gene_region,reads_all,reads_fwd,reads_rev,deletions,freq_del,freq_del_fwd,freq_del_rev,deletions_fwd,deletions_rev,insertions,freq_insert,freq_insert_fwd,freq_insert_rev,insertions_fwd,insertions_rev,stops,freq_stop,freq_stop_fwd,freq_stop_rev,stops_fwd,stops_rev,matches_ref,pos_critical_inserts,pos_critical_dels,homopolymeric,ref_base,cons_id,variant_position_english,variant_diagnosis" />
+                    </when>
+                    <when value="--no-english">
+                        <action name="column_names" type="metadata" default="ref_id,start_position,length,VARIANT,gene_region,aa_position,stop_mismatches,stoploss_nt,reads_all,reads_fwd,reads_rev,deletions,freq_del,freq_del_fwd,freq_del_rev,deletions_fwd,deletions_rev,insertions,freq_insert,freq_insert_fwd,freq_insert_rev,insertions_fwd,insertions_rev,stops,freq_stop,freq_stop_fwd,freq_stop_rev,stops_fwd,stops_rev,matches_ref,pos_critical_inserts,pos_critical_dels,homopolymeric,ref_base,cons_id" />
+                    </when>
+                </conditional>
+            </actions>
+        </data>
     </outputs>
     <tests>
         <test>
             <param name="consensus" value="consensus.bcftools.fasta.gz" />
             <param name="input" value="REF_aln_trim.cram" />
-            <param name="reference" value="NC_045512.2.fasta.gz" />
-            <param name="genes" value="Genes_NC_045512.2.GFF3" />
+            <conditional name="ref_data">
+                <param name="choice" value="standard" />
+            </conditional>
+            <output name="report" value="frameshift_deletions_check.tsv" />
+        </test>
+        <test>
+            <param name="consensus" value="consensus.bcftools.fasta.gz" />
+            <param name="input" value="REF_aln_trim.cram" />
+            <conditional name="ref_data">
+                <param name="choice" value="standard" />
+            </conditional>
+            <section name="out_options">
+                <param name="english" value="false" />
+            </section>
+            <output name="report" value="frameshift_deletions_check_no_english.tsv" />
+        </test>
+        <test>
+            <param name="consensus" value="consensus.bcftools.fasta.gz" />
+            <param name="input" value="REF_aln_trim.cram" />
+            <conditional name="ref_data">
+                <param name="choice" value="custom" />
+                <param name="reference" value="NC_045512.2.fasta" />
+                <param name="genes" value="Genes_NC_045512.2.GFF3" />
+            </conditional>
             <output name="report" value="frameshift_deletions_check.tsv" />
         </test>
     </tests>
@@ -71,8 +123,8 @@
 
 * *gene_region*: Gene in which the deletion is found according to ``--genes`` argument;
 * *reads_all*: Total number of reads covering the indel;
-* *reads_fwd*: Total nubmer of forward reads covering the indel;
-* *reads_rev*: Total nubmer of reverse reads covering the indel;
+* *reads_fwd*: Total number of forward reads covering the indel;
+* *reads_rev*: Total number of reverse reads covering the indel;
 * *deletions/insertions*: Number of reads supporting the deletion/insertion;
 * *freq_del/freq_insert*: Fraction of reads supporting the deletion/insertion;
 * *matches_ref*: number of reads that matche with the reference base;
author	iuc
date	Fri, 14 Jul 2023 22:07:22 +0000
parents	f079716f598c
children	e8971ca74398