diff frameshift_deletions_checks.xml @ 0:f079716f598c draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/smallgenomeutilities commit f6d1be0a6643389820c4622d4cb996ac03531107
author iuc
date Wed, 31 May 2023 17:10:11 +0000
parents
children 029d90b0c4f6
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/frameshift_deletions_checks.xml	Wed May 31 17:10:11 2023 +0000
@@ -0,0 +1,89 @@
+<tool id="smgu_frameshift_deletions_checks" name="Frameshift Deletions Checks" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>reports on frameshifting indels in consensus sequence</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="xrefs"/>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">smallgenomeutilities</requirement>
+    </requirements>
+    <!-- once we have version-from-git-tag in 0.4.0: <version_command>frameshift_deletion_checks &#x002D&#x002Dversion</version_command> -->
+    <command detect_errors="exit_code">
+    <![CDATA[
+#if $input.is_of_type("cram"):
+echo 'is CRAM' >&2 &&
+ ln -vs '$input' 'input.${input.ext}' >&2 &&
+ ln -vs '$input.metadata.cram_index' 'input.${input.ext}.crai' >&2 &&
+#elif $input.is_of_type("bam"):
+ echo 'is BAM' &&
+ ln -s '$input' 'input.${input.ext}' >&2 &&
+ ln -s '$input.metadata.bam_index' 'input.${input.ext}.bai' >&2 &&
+#else:
+# raise TypeError('Unknown input alignment type ${input.ext}')
+#end if
+ln -vs '$consensus' 'consensus.${consensus.ext}' >&2 &&
+ln -vs '$reference' 'reference.${reference.ext}' >&2 &&
+
+frameshift_deletions_checks 
+    --input='input.${input.ext}'
+    --consensus='consensus.${consensus.ext}'
+    --reference='reference.${reference.ext}'
+    --genes='$genes'
+    --output='$report' 
+#if $orf1ab != '':
+    --orf1ab='$orf1ab'
+#end if
+    '$english'
+    $zero_based
+]]>
+    <!-- ##cores \${GALAXY_SLOTS:-4} -->
+    </command>
+    <inputs>
+        <param argument="--consensus" type="data" format="fasta" label="Consensus" help="Fasta file containing the sample's consensus sequence (majority, with indels)" />
+        <param argument="--input" type="data" format="bam,cram" label="Input BAM" help="Input BAM file with sample's sequencing reads, aligned against the reference" />
+        <param argument="--reference" type="data" format="fasta" label="Reference" help="Fasta file containing the reference sequence (used during alignment) to compare against" />
+        <param argument="--genes" type="data" format="gff" label="Genes GFF" help="GFF file listing genes positions on the reference sequence" />
+        
+        <param argument="--english" type="boolean" truevalue="--english" falsevalue="--no-english" checked="true" optional="true" label="Write Summary Diagnosis?" help="If checked writes english summary diagnosis." />
+        <param argument="--zero-based" type="boolean" truevalue="--zero-based" falsevalue="" checked="false" optional="true" label="Use 0-based Sequence Positions?" help="Use 0-based (python) instead of 1-based (standard) seq positions" />
+        <param argument="--orf1ab" type="text" value="cds-YP_009724389.1" optional="true" label="ORF1AB" help="CDS ID for the full Orf1ab CDS, comprising the ribosomal shift. In the GFF this CDS should consist of 2 entries with the same CDS ID due to the partial overlap caused by the ribosomal shift at translation time."/>
+    </inputs>
+    <outputs>
+        <data name="report" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="consensus" value="consensus.bcftools.fasta.gz" />
+            <param name="input" value="REF_aln_trim.cram" />
+            <param name="reference" value="NC_045512.2.fasta.gz" />
+            <param name="genes" value="Genes_NC_045512.2.GFF3" />
+            <output name="report" value="frameshift_deletions_check.tsv" />
+        </test>
+    </tests>
+    <help>
+    <![CDATA[
+Produces a report about frameshifting indels in a consensus sequences.
+
+Developed as part of the `V-pipe workflow for analysing NGS data of short viral genomes <https://github.com/cbg-ethz/V-pipe>`_.
+
+Columns signification:
+----------------------
+
+* *gene_region*: Gene in which the deletion is found according to ``--genes`` argument;
+* *reads_all*: Total number of reads covering the indel;
+* *reads_fwd*: Total nubmer of forward reads covering the indel;
+* *reads_rev*: Total nubmer of reverse reads covering the indel;
+* *deletions/insertions*: Number of reads supporting the deletion/insertion;
+* *freq_del/freq_insert*: Fraction of reads supporting the deletion/insertion;
+* *matches_ref*: number of reads that matche with the reference base;
+* *pos_critical_inserts*: Start positions of insertions in the same gene_region that occur in > 40% of reads;
+* *pos_critical_dels*: Start positions of deletions in the same gene_region that occur in > 40% of reads;
+* *homopolymeric*: True if either around the start or end position of the deletion three bases are the same, which may have caused the polymerase to skip during reverse transcription of viral RNA to cDNA, e.g. AATAG;
+* *ref_base*: base in the reference genome;
+* *indel_position_english*: english sentence describing the indel;
+* *indel_diagnosis*: english sentence with the indel diagnosis;
+* *orf1ab*: CDS ID for the full Orf1ab CDS, comprising the ribosomal shift. In the GFF this CDS should consist of 2 entries with the same CDS ID due to the parital overlap caused by the ribosomal shift at translation time
+]]>
+    </help>
+    <expand macro="citations"/>
+</tool>