Mercurial > repos > ebi-gxa > gtf2gene_list

<tool id="_ensembl_gtf2gene_list" name="GTF2GeneList" version="1.52.0+galaxy0" profile="18.01">
    <description>extracts a complete annotation table or subsets thereof from an Ensembl GTF using rtracklayer</description>
    <requirements>
      <requirement type="package" version="1.0.1">atlas-gene-annotation-manipulation</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
       ln -s '$gtf_input' input.gtf;
       gtf2featureAnnotation.R --gtf-file input.gtf
#if $noheader
--no-header
#end if
#if $version_transcripts
--version-transcripts
#end if
#if $mito.mark_mito
--mito --mito-chr "${mito.mito_chr}" --mito-biotypes "${mito.mito_biotypes}"
#end if
#if $cdnas.parse_cdnas
--parse-cdnas "${cdnas.fasta_input}" --parse-cdna-field "${cdnas.cdnas_field}"
#if $cdnas.filter_cdnas
--filter-cdnas-output "${fasta_output}"
#end if
#if $cdnas.parse_cdna_names
--parse-cdna-names
#end if
#if $fill_empty
--fill-empty "${fill_empty}"
#end if
#end if
--feature-type "${feature_type}" --first-field "${first_field}" --output-file annotation.txt --fields "${fields}"
	    ]]></command>

    <inputs>
        <param name="gtf_input" type="data" format="gff" label="Ensembl GTF file" />
        <param name="feature_type" type="text" optional='true' value="gene" label="Feature type for which to derive annotation"/>
        <param name="first_field" type="text" optional='true' value="gene_id" label="Field to place first in output table"/>
        <param name="noheader" type="boolean" checked="false" label="Suppress header line in output?"/>
        <param name="fields" type="text" optional='true' value="" label="Comma-separated list of field names to extract from the GTF (default: use all fields)"/>
        <param name="version_transcripts" type="boolean" checked="false" label="Append version to transcript identifiers?" help="For transcript feature type only: where the GTF contains transcript versions, should these be appended to transcript identifiers? Useful when generating transcript/gene mappings for use with transcriptomes"/>
        <param name="fill_empty" type="text" optional='true' value="" label="Column to be used to fill empty values in other fields" help="Only when output fields are defined, useful when you need to guarantee a value, for example a gene ID for a transcript/gene mapping."/>
        <conditional name="mito">
          <param name="mark_mito" type="boolean" checked="true" label="Flag mitochondrial features?"/>
          <when value="true">
            <param name="mito_chr" type="text" optional='true' value="mt,mitochondrion_genome,mito,m,chrM,chrMt" label="Comma-separated list of possible mitochondrial chromosome names (case insensitive)"/>
            <param name="mito_biotypes" type="text" optional='true' value="mt_trna,mt_rrna,mt_trna_pseudogene" label="Comma-separated list of possible mitochondrial biotypes (case insensitive)"/>
          </when>
          <when value="false" />
        </conditional>
        <conditional name="cdnas">
          <param name="parse_cdnas" type="boolean" checked="false" label="Provide a cDNA file for extracting annotations and/ or possible filtering?" help="For some applications, e.g. transcriptome mappers, its useful to match a cDNAs file to an annotation list (e.g. transcript-to-gene mapping)"/>
          <when value="true">
            <param name="fasta_input" type="data" format="fasta,fasta.gz" label="FASTA-format cDNA/ transcript file" />
            <param name="cdnas_field" type="text" optional='true' value="transcript_id" label="Annotation field in GTF to match with sequences."/>
            <param name="parse_cdna_names" type="boolean" checked="false" label="Parse the FASTA headers for annotation info?" help="e.g. to find gene IDs for transcripts not present in the GTF. May only work for Ensembl GTFs."/>
            <param name="filter_cdnas" type="boolean" checked="false" label="Filter the cDNA file to match the annotations?" />
          </when>
          <when value="false" />
        </conditional>
    </inputs>

    <outputs>
        <data name="feature_annotation" format="tsv" from_work_dir="annotation.txt" label="${tool.name} on ${on_string}: annotation table"/>
        <data name="fasta_output" format="fasta.gz" from_work_dir="filtered.fa.gz" label="${tool.name} on ${on_string}: annotation-matched sequences">
          <filter>cdnas['filter_cdnas']</filter>
        </data>
    </outputs>
    <tests>
      <test>
        <param name="gtf_input" ftype="gtf" value="test.gtf"/>
        <output name="feature_annotation" file="annotation.txt"/>
      </test>
    </tests>


    <help><![CDATA[
.. class:: infomark

**What it does**

Given an Ensembl GTF file, it will extract all information on chromosomes, coordinates, and attributes provided at the specified feature level. Mitochondrial features can also be flagged. See https://github.com/ebi-gene-expression-group/atlas-gene-annotation-manipulation.

You can also supply a fasta-format file of sequences, which can be filtered by identifier to match annotation and/or used a source of information for transcripts un-annotated in the GTF. This can be useful for tools such as Alevin which need a transcript-to-gene mapping and a transcriptome file without any missing entries (with respect to annotation).


**Inputs**

    * Ensembl GTF file

-----

**Outputs**

    * Gene annotations in tsv.
]]></help>
<citations>
  <citation type="bibtex">
@misc{github-hinxton-single-cell,
author = {Jonathan Manning, EBI Gene Expression Team},
year = {2019},
title = {Hinxton Single Cell Anlysis Environment},
publisher = {GitHub},
journal = {GitHub repository},
url = {https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary},
  }</citation>
  <citation type="doi">10.1101/2020.04.08.032698</citation>
</citations>
</tool>
author	ebi-gxa
date	Mon, 19 Jul 2021 08:00:24 +0000
parents	1a642de6db01
children