diff gtf2featureAnnotation.xml @ 1:247b439a78f7 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/util/.shed.yml commit 194d2e0af16624c9a3d1af92f7b3686d2e0ee552
author ebi-gxa
date Fri, 18 Oct 2019 10:10:54 -0400
parents
children e255c0e5dfca
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gtf2featureAnnotation.xml	Fri Oct 18 10:10:54 2019 -0400
@@ -0,0 +1,96 @@
+<tool id="_ensembl_gtf2gene_list" name="GTF2GeneAnnotationTable" version="1.42.1+galaxy1">
+    <description>extracts a complete annotation table or subsets thereof from an Ensembl GTF using rtracklayer</description>
+    <requirements>
+      <requirement type="package" version="1.42.1">bioconductor-rtracklayer</requirement>
+      <requirement type="package">bioconductor-biostrings</requirement>
+      <requirement type="package">r-optparse</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+       ln -s '$gtf_input' input.gtf;
+       $__tool_directory__/gtf2featureAnnotation.R --gtf-file input.gtf
+#if $noheader
+--no-header
+#end if
+#if $version_transcripts
+--version-transcripts
+#end if
+#if $mito.mark_mito
+--mito --mito-chr "${mito.mito_chr}" --mito-biotypes "${mito.mito_biotypes}" 
+#end if
+#if $cdnas.filter_cdnas
+--filter-cdnas "${cdnas.fasta_input}" --filter-cdnas-field "${cdnas.cdnas_field}" --filter-cdnas-output "${fasta_output}"
+#end if
+--feature-type "${feature_type}" --first-field "${first_field}" --output-file annotation.txt --fields "${fields}"
+	    ]]></command>
+
+    <inputs>
+        <param name="gtf_input" type="data" format="gff" label="Ensembl GTF file" />
+        <param name="feature_type" type="text" optional='true' value="gene" label="Feature type for which to derive annotation"/>
+        <param name="first_field" type="text" optional='true' value="gene_id" label="Field to place first in output table"/>
+        <param name="noheader" type="boolean" checked="false" label="Suppress header line in output?"/>
+        <param name="fields" type="text" optional='true' value="" label="Comma-separated list of field names to extract from the GTF (default: use all fields)"/>
+        <param name="version_transcripts" type="boolean" checked="false" label="Append version to transcript identifiers?" help="For transcript feature type only: where the GTF contains transcript versions, should these be appended to transcript identifiers? Useful when generating transcript/gene mappings for use with transcriptomes"/>
+        <conditional name="mito">
+          <param name="mark_mito" type="boolean" checked="true" label="Flag mitochondrial features?"/>
+          <when value="true">
+            <param name="mito_chr" type="text" optional='true' value="mt,mitochondrion_genome,mito,m,chrM,chrMt" label="Comma-separated list of possible mitochondrial chromosome names (case insensitive)"/>
+            <param name="mito_biotypes" type="text" optional='true' value="mt_trna,mt_rrna,mt_trna_pseudogene" label="Comma-separated list of possible mitochondrial biotypes (case insensitive)"/>
+          </when>
+          <when value="false" />
+        </conditional>
+        <conditional name="cdnas">
+          <param name="filter_cdnas" type="boolean" checked="false" label="Filter a FASTA-format cDNA file to match annotations?" help="For some applications, e.g. transcriptome mappers, its useful to match a cDNAs file to an annotation list (e.g. transcript-to-gene mapping)"/>
+          <when value="true">
+            <param name="fasta_input" type="data" format="fasta" label="FASTA-format cDNA/ transcript file" />
+            <param name="cdnas_field" type="text" optional='true' value="transcript_id" label="Annotation field to match with sequences."/>
+          </when>
+          <when value="false" />
+        </conditional>
+    </inputs>
+
+    <outputs>
+        <data name="feature_annotation" format="tsv" from_work_dir="annotation.txt" label="${tool.name} on ${on_string}: annotation table"/>
+        <data name="fasta_output" format="fasta" from_work_dir="filtered.fa.gz" label="${tool.name} on ${on_string}: annotation-matched sequences">
+          <filter>filter_cdnas</filter>
+        </data>
+    </outputs>
+    <tests>
+      <test>
+        <param name="gtf_input" ftype="gtf" value="test.gtf"/>
+        <output name="feature_annotation" file="annotation.txt"/>
+      </test>
+    </tests>
+
+
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+Given an Ensembl GTF file, it will extract all information on chromosomes, coordinates, and attributes provided at the specified feature level. Mitochondrial features can also be flagged.
+
+You can also supply a fasta-format file of sequences, which will be filtered by identifier to match annotation. This can be useful for tools such as Alevin which need a transcript-to-gene mapping and a transcriptome file without any missing entries (with respect to annotation).
+
+
+**Inputs**
+
+    * Ensembl GTF file
+
+-----
+
+**Outputs**
+
+    * Gene annotations in tsv.
+]]></help>
+<citations>
+  <citation type="bibtex">
+@misc{github-hinxton-single-cell,
+author = {Jonathan Manning, EBI Gene Expression Team},
+year = {2019},
+title = {Hinxton Single Cell Anlysis Environment},
+publisher = {GitHub},
+journal = {GitHub repository},
+url = {https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary},
+  }</citation>
+</citations>
+</tool>