Mercurial > repos > ebi-gxa > gtf2gene_list
view gtf2featureAnnotation.xml @ 7:14b3f2a4523b draft default tip
"planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/gtf-2-gene-list/.shed.yml commit 5a62f18c80ddd55cfcc9d64546fde4e20be3e070"
author | ebi-gxa |
---|---|
date | Mon, 19 Jul 2021 08:00:24 +0000 |
parents | 1a642de6db01 |
children |
line wrap: on
line source
<tool id="_ensembl_gtf2gene_list" name="GTF2GeneList" version="1.52.0+galaxy0" profile="18.01"> <description>extracts a complete annotation table or subsets thereof from an Ensembl GTF using rtracklayer</description> <requirements> <requirement type="package" version="1.0.1">atlas-gene-annotation-manipulation</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ ln -s '$gtf_input' input.gtf; gtf2featureAnnotation.R --gtf-file input.gtf #if $noheader --no-header #end if #if $version_transcripts --version-transcripts #end if #if $mito.mark_mito --mito --mito-chr "${mito.mito_chr}" --mito-biotypes "${mito.mito_biotypes}" #end if #if $cdnas.parse_cdnas --parse-cdnas "${cdnas.fasta_input}" --parse-cdna-field "${cdnas.cdnas_field}" #if $cdnas.filter_cdnas --filter-cdnas-output "${fasta_output}" #end if #if $cdnas.parse_cdna_names --parse-cdna-names #end if #if $fill_empty --fill-empty "${fill_empty}" #end if #end if --feature-type "${feature_type}" --first-field "${first_field}" --output-file annotation.txt --fields "${fields}" ]]></command> <inputs> <param name="gtf_input" type="data" format="gff" label="Ensembl GTF file" /> <param name="feature_type" type="text" optional='true' value="gene" label="Feature type for which to derive annotation"/> <param name="first_field" type="text" optional='true' value="gene_id" label="Field to place first in output table"/> <param name="noheader" type="boolean" checked="false" label="Suppress header line in output?"/> <param name="fields" type="text" optional='true' value="" label="Comma-separated list of field names to extract from the GTF (default: use all fields)"/> <param name="version_transcripts" type="boolean" checked="false" label="Append version to transcript identifiers?" help="For transcript feature type only: where the GTF contains transcript versions, should these be appended to transcript identifiers? Useful when generating transcript/gene mappings for use with transcriptomes"/> <param name="fill_empty" type="text" optional='true' value="" label="Column to be used to fill empty values in other fields" help="Only when output fields are defined, useful when you need to guarantee a value, for example a gene ID for a transcript/gene mapping."/> <conditional name="mito"> <param name="mark_mito" type="boolean" checked="true" label="Flag mitochondrial features?"/> <when value="true"> <param name="mito_chr" type="text" optional='true' value="mt,mitochondrion_genome,mito,m,chrM,chrMt" label="Comma-separated list of possible mitochondrial chromosome names (case insensitive)"/> <param name="mito_biotypes" type="text" optional='true' value="mt_trna,mt_rrna,mt_trna_pseudogene" label="Comma-separated list of possible mitochondrial biotypes (case insensitive)"/> </when> <when value="false" /> </conditional> <conditional name="cdnas"> <param name="parse_cdnas" type="boolean" checked="false" label="Provide a cDNA file for extracting annotations and/ or possible filtering?" help="For some applications, e.g. transcriptome mappers, its useful to match a cDNAs file to an annotation list (e.g. transcript-to-gene mapping)"/> <when value="true"> <param name="fasta_input" type="data" format="fasta,fasta.gz" label="FASTA-format cDNA/ transcript file" /> <param name="cdnas_field" type="text" optional='true' value="transcript_id" label="Annotation field in GTF to match with sequences."/> <param name="parse_cdna_names" type="boolean" checked="false" label="Parse the FASTA headers for annotation info?" help="e.g. to find gene IDs for transcripts not present in the GTF. May only work for Ensembl GTFs."/> <param name="filter_cdnas" type="boolean" checked="false" label="Filter the cDNA file to match the annotations?" /> </when> <when value="false" /> </conditional> </inputs> <outputs> <data name="feature_annotation" format="tsv" from_work_dir="annotation.txt" label="${tool.name} on ${on_string}: annotation table"/> <data name="fasta_output" format="fasta.gz" from_work_dir="filtered.fa.gz" label="${tool.name} on ${on_string}: annotation-matched sequences"> <filter>cdnas['filter_cdnas']</filter> </data> </outputs> <tests> <test> <param name="gtf_input" ftype="gtf" value="test.gtf"/> <output name="feature_annotation" file="annotation.txt"/> </test> </tests> <help><![CDATA[ .. class:: infomark **What it does** Given an Ensembl GTF file, it will extract all information on chromosomes, coordinates, and attributes provided at the specified feature level. Mitochondrial features can also be flagged. See https://github.com/ebi-gene-expression-group/atlas-gene-annotation-manipulation. You can also supply a fasta-format file of sequences, which can be filtered by identifier to match annotation and/or used a source of information for transcripts un-annotated in the GTF. This can be useful for tools such as Alevin which need a transcript-to-gene mapping and a transcriptome file without any missing entries (with respect to annotation). **Inputs** * Ensembl GTF file ----- **Outputs** * Gene annotations in tsv. ]]></help> <citations> <citation type="bibtex"> @misc{github-hinxton-single-cell, author = {Jonathan Manning, EBI Gene Expression Team}, year = {2019}, title = {Hinxton Single Cell Anlysis Environment}, publisher = {GitHub}, journal = {GitHub repository}, url = {https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary}, }</citation> <citation type="doi">10.1101/2020.04.08.032698</citation> </citations> </tool>