Mercurial > repos > galaxyp > peptide_to_gff
diff peptide_to_gff.xml @ 0:cec60c540546
Uploaded
author | galaxyp |
---|---|
date | Wed, 26 Jun 2013 15:56:16 -0400 |
parents | |
children | 0cd177bc347d |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_to_gff.xml Wed Jun 26 15:56:16 2013 -0400 @@ -0,0 +1,171 @@ +<tool id="peptide_to_gff" name="Peptide to GFF" version="1.0"> + <description>Map peptide to reference genome</description> + <requirements> + <requirement type="package">kent_tools</requirement> + <requirement type="package" version="master">peptide_to_gff</requirement> + </requirements> + <command>peptide_to_gff + #if $peptide_src.inputFormat == 'protein_pilot' or $peptide_src.inputFormat == 'tabular': + --skip_lines $peptide_src.input_file.metadata.comment_lines + --input $peptide_src.input_file + --peptide_column $peptide_src.peptide_col + --accession_column $peptide_src.accession_col + #end if + #for $ref in $ref_mappings: + #if $ref.mapping.ref_selector =='cdna_gtf': + --cdna_fasta $ref.mapping.seqs + --cdna_gtf $ref.mapping.feature_ref + #elif $ref.mapping.ref_selector =='cdna_ref': + --cdna_fasta $ref.mapping.seqs + #if $ref.mapping.genome.ref_src == 'indexed': + --reference $ref.mapping.genome.reference.fields.path + #else : + --reference $ref.mapping.genome.reference + #end if + #elif $ref.mapping.ref_selector =='cds_gff': + --cds_fasta $ref.mapping.seqs + --cds_gff $ref.mapping.feature_ref + #elif $ref.mapping.ref_selector =='eej': + --cdna_gtf $ref.mapping.feature_ref + #end if + #end for + #if $show_unmapped: + --unmapped $unmapped + #end if + --output $output_gff + </command> + <inputs> + <conditional name="peptide_src" > + <param name="inputFormat" type="select" label="Peptide Source Format"> + <option value="protein_pilot">Protein Pilot Tabular</option> + <option value="tabular">Generic Tabular (with peptide and accession columns)</option> + <!-- future formats --> + </param> + <when value="protein_pilot"> + <param name="input_file" type="data" format="tabular" label="Source File" + help="A tabular file that contains a peptide in a column and a accession name in another column."/> + <param name="peptide_col" type="hidden" value="13" label="Peptide Column"/> + <param name="accession_col" type="hidden" value="7" label="Accession Identifier Column"/> + <!-- + <param name="info_cols" type="data_column" data_ref="input_file" label="Columns to include in the GTF INFO" multiple="true" optional="true"/> + --> + </when> + <when value="tabular"> + <param name="input_file" type="data" format="tabular" label="Source File" + help="A tabular file that contains a peptide in a column and a accession name in another column."/> + <param name="peptide_col" type="data_column" data_ref="input_file" label="Peptide Column"/> + <param name="accession_col" type="data_column" data_ref="input_file" label="Accession Identifier Column"/> + <!-- + <param name="info_cols" type="data_column" data_ref="input_file" label="Columns to include in the GTF INFO" multiple="true" optional="true"/> + --> + </when> + </conditional> + <repeat name="ref_mappings" title="Sequence and Feature References for mapping the peptides" min="1" help="" > + <conditional name="mapping" > + <param name="ref_selector" type="select" label="Select Peptide Mapping File Formats"> + <option value="cdna_gtf">cdna sequence with GTF feature reference (Ensembl)</option> + <option value="cdna_ref">cdna sequence with genome sequence reference (ECgene)</option> + <option value="cds_gff">protein sequence with GFF feature reference (Augustus)</option> + <!-- Eventually may want to include putative exon-exon junctions + <option value="eej">putative exon exon junctions</option> + --> + </param> + <when value="cdna_gtf"> + <param name="seqs" type="data" format="fasta" label="CDNA Transcripts Fasta" + help="Example: ftp://ftp.ensembl.org/pub/release-71/fasta/homo_saPiens/cdna/Homo_sapiens.GRCh37.71.cdna.all.fa.gz"/> + <param name="feature_ref" type="data" format="gtf" label=" GTF feature file for the cdna transcipts" + help="Example: ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"/> + </when> + <when value="cdna_ref"> + <param name="seqs" type="data" format="fasta" label="CDNA Transcripts Fasta" + help="Example: http://genome.ewha.ac.kr/ECgene/download/hg18/hg18_b1_low_fasta.txt.gz"/> + <conditional name="genome"> + <param name="ref_src" type="select" label="Reference Genome Source for mapping"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one in your history </option> + </param> + <when value="indexed"> + <param name="reference" type="select" label="Genome Reference"> + <options from_data_table="all_fasta"> + </options> + </param> + </when> + <when value="history"> + <param name="reference" type="data" format="fasta" label="Reference Genome Fasta"/> + </when> + </conditional> + </when> + <when value="cds_gff"> + <param name="seqs" type="data" format="fasta" label="Protein Sequence Fasta" + help="Example: http://gbi.agrsci.dk/pig/sscrofa10_2_annotation/ssc10.2.RNA.hints.augustus.gff.prot.faa.gz"/> + <param name="feature_ref" type="data" format="gff3,gtf" label="GFF Feature file for the Protein Sequences" + help="Example: http://gbi.agrsci.dk/pig/sscrofa10_2_annotation/ssc10.2.RNA.hints.augustus.gff.gz"/> + </when> + <!-- Eventually may want to include putative exon-exon junctions + <when value="eej"> + <param name="feature_ref" type="data" format="gtf" label=" The Ensembl GTF feature file corresponding to the EEJ build" + help="Example: ftp://ftp.ensembl.org/pub/release-71/gtf/homo_sapiens/Homo_sapiens.GRCh37.71.gtf.gz"/> + </when> + --> + </conditional> + </repeat> + <param name="show_unmapped" type="boolean" truevalue="yes" falsevalue="no" checked="False" label="Show unmapped lines"/> + </inputs> + <outputs> + <data format="gff3" metadata_source="input_file" name="output_gff" /> + <data format_source="input_file" metadata_source="input_file" name="unmapped"> + <filter>show_unmapped == True</filter> + </data> + </outputs> + <stdio> + <exit_code range="1:" level="fatal" description="Bad input dataset" /> + </stdio> + <tests> + <test> + <param name="inputFormat" value="protein_pilot"/> + <param name="input_file" value="ProtData.tsv" ftype="tabular"/> + <!-- + <param name="info_cols" value=""/> + --> + <param name="ref_selector" value="cdna_gtf"/> + <param name="seqs" value="GRCh37.69.cdna.19.fa" ftype="fasta"/> + <param name="feature_ref" value="Homo_sapiens.GRCh37_19.71.gtf" ftype="gtf"/> + <param name="show_unmapped" value="False"/> + <output name="output_gff" file="ProtData.gff3" ftype="gff3"/> + </test> + <!-- These values work when entered into a history, but aren't handled correctly by the function test environment. + <test> + <param name="inputFormat" value="protein_pilot"/> + <param name="input_file" value="ProtData.tsv" ftype="tabular"/> + <param name="ref_selector" value="cdna_ref"/> + <param name="seqs" value="GRCh37.69.cdna.19.fa" ftype="fasta"/> + <param name="ref_src" value="history"/> + <param name="reference" value="Homo_sapiens.GRCh37.71.dna.chromosome.19.fa" ftype="fasta"/> + <param name="show_unmapped" value="False"/> + <output name="output_gff" file="ProtData.gff3" ftype="gff3"/> + </test> + --> + + </tests> + <help> +**Peptide to GFF** + +Maps peptide sequences that have a known transcript or protein-coding sequence to a reference genome. + +Inputs: + + - A tabular file that contains a peptide column and a sequence accession column. + + - One or more fasta files of transcript or protein sequences that match the accession column entries + + - Either GTF, GFF, or a genome reference fasta for mapping the accession sequences to the genome. + +Output: + + - A GFF3 file with that gives the mapping of the peptide to the reference genome + + - Optionally, a dataset with the unmapped lines from the input file. + + </help> +</tool> +