view gtf2featureAnnotation.xml @ 1:247b439a78f7 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/util/.shed.yml commit 194d2e0af16624c9a3d1af92f7b3686d2e0ee552
author ebi-gxa
date Fri, 18 Oct 2019 10:10:54 -0400
parents
children e255c0e5dfca
line wrap: on
line source

<tool id="_ensembl_gtf2gene_list" name="GTF2GeneAnnotationTable" version="1.42.1+galaxy1">
    <description>extracts a complete annotation table or subsets thereof from an Ensembl GTF using rtracklayer</description>
    <requirements>
      <requirement type="package" version="1.42.1">bioconductor-rtracklayer</requirement>
      <requirement type="package">bioconductor-biostrings</requirement>
      <requirement type="package">r-optparse</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
       ln -s '$gtf_input' input.gtf;
       $__tool_directory__/gtf2featureAnnotation.R --gtf-file input.gtf
#if $noheader
--no-header
#end if
#if $version_transcripts
--version-transcripts
#end if
#if $mito.mark_mito
--mito --mito-chr "${mito.mito_chr}" --mito-biotypes "${mito.mito_biotypes}" 
#end if
#if $cdnas.filter_cdnas
--filter-cdnas "${cdnas.fasta_input}" --filter-cdnas-field "${cdnas.cdnas_field}" --filter-cdnas-output "${fasta_output}"
#end if
--feature-type "${feature_type}" --first-field "${first_field}" --output-file annotation.txt --fields "${fields}"
	    ]]></command>

    <inputs>
        <param name="gtf_input" type="data" format="gff" label="Ensembl GTF file" />
        <param name="feature_type" type="text" optional='true' value="gene" label="Feature type for which to derive annotation"/>
        <param name="first_field" type="text" optional='true' value="gene_id" label="Field to place first in output table"/>
        <param name="noheader" type="boolean" checked="false" label="Suppress header line in output?"/>
        <param name="fields" type="text" optional='true' value="" label="Comma-separated list of field names to extract from the GTF (default: use all fields)"/>
        <param name="version_transcripts" type="boolean" checked="false" label="Append version to transcript identifiers?" help="For transcript feature type only: where the GTF contains transcript versions, should these be appended to transcript identifiers? Useful when generating transcript/gene mappings for use with transcriptomes"/>
        <conditional name="mito">
          <param name="mark_mito" type="boolean" checked="true" label="Flag mitochondrial features?"/>
          <when value="true">
            <param name="mito_chr" type="text" optional='true' value="mt,mitochondrion_genome,mito,m,chrM,chrMt" label="Comma-separated list of possible mitochondrial chromosome names (case insensitive)"/>
            <param name="mito_biotypes" type="text" optional='true' value="mt_trna,mt_rrna,mt_trna_pseudogene" label="Comma-separated list of possible mitochondrial biotypes (case insensitive)"/>
          </when>
          <when value="false" />
        </conditional>
        <conditional name="cdnas">
          <param name="filter_cdnas" type="boolean" checked="false" label="Filter a FASTA-format cDNA file to match annotations?" help="For some applications, e.g. transcriptome mappers, its useful to match a cDNAs file to an annotation list (e.g. transcript-to-gene mapping)"/>
          <when value="true">
            <param name="fasta_input" type="data" format="fasta" label="FASTA-format cDNA/ transcript file" />
            <param name="cdnas_field" type="text" optional='true' value="transcript_id" label="Annotation field to match with sequences."/>
          </when>
          <when value="false" />
        </conditional>
    </inputs>

    <outputs>
        <data name="feature_annotation" format="tsv" from_work_dir="annotation.txt" label="${tool.name} on ${on_string}: annotation table"/>
        <data name="fasta_output" format="fasta" from_work_dir="filtered.fa.gz" label="${tool.name} on ${on_string}: annotation-matched sequences">
          <filter>filter_cdnas</filter>
        </data>
    </outputs>
    <tests>
      <test>
        <param name="gtf_input" ftype="gtf" value="test.gtf"/>
        <output name="feature_annotation" file="annotation.txt"/>
      </test>
    </tests>


    <help><![CDATA[
.. class:: infomark

**What it does**

Given an Ensembl GTF file, it will extract all information on chromosomes, coordinates, and attributes provided at the specified feature level. Mitochondrial features can also be flagged.

You can also supply a fasta-format file of sequences, which will be filtered by identifier to match annotation. This can be useful for tools such as Alevin which need a transcript-to-gene mapping and a transcriptome file without any missing entries (with respect to annotation).


**Inputs**

    * Ensembl GTF file

-----

**Outputs**

    * Gene annotations in tsv.
]]></help>
<citations>
  <citation type="bibtex">
@misc{github-hinxton-single-cell,
author = {Jonathan Manning, EBI Gene Expression Team},
year = {2019},
title = {Hinxton Single Cell Anlysis Environment},
publisher = {GitHub},
journal = {GitHub repository},
url = {https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary},
  }</citation>
</citations>
</tool>