Mercurial > repos > iracooke > protk_proteogenomics
diff protxml_to_gff.xml @ 2:68d8c9e521d7 draft
planemo upload for repository https://github.com/iracooke/protk-galaxytools/blob/master/protk-proteogenomics/.shed.yml commit 24e0fef2496984648a8a5cd5bff4d6b9b634a302-dirty
author | iracooke |
---|---|
date | Tue, 20 Oct 2015 20:34:50 -0400 |
parents | 28067ed4ea0e |
children |
line wrap: on
line diff
--- a/protxml_to_gff.xml Thu Mar 26 20:16:00 2015 -0400 +++ b/protxml_to_gff.xml Tue Oct 20 20:34:50 2015 -0400 @@ -1,102 +1,120 @@ <tool id="protxml_to_gff" name="Proteomics to GFF" version="1.1.0"> - <description>Export Proteomics Data to GFF</description> + <description>Export Proteomics Data to GFF</description> + <requirements> + <container type="docker">iracooke/protk-1.4.3</container> + <requirement type="package" version="1.4.3">protk</requirement> + <requirement type="package" version="2.2.29">blast+</requirement> + </requirements> + <stdio> + <exit_code range="1:" level="fatal" description="Failure" /> + </stdio> + <command> + protxml_to_gff.rb $protxml_file + #if $database.source_select=="built_in": + -d $database.dbkey + #else + -d $database.fasta_file + #end if + -c $gene_file + #if str( $gffidpattern ): + --gff-idregex='$gffidpattern' + #end if - <requirements> - <container type="docker">iracooke/protk-1.4.1</container> - <requirement type="package" version="1.4">protk</requirement> - <requirement type="package" version="2.2.29">blast+</requirement> - </requirements> + #if str( $genomeidpattern ): + --genome-idregex='$genomeidpattern' + #end if - <command> - protxml_to_gff.rb $protxml_file - - #if $database.source_select=="built_in": - -d $database.dbkey - #else - -d $database.fasta_file - #end if + #if str( $ignorepattern ): + --ignore-regex='$ignorepattern' + #end if + + --threshold=$peptide_threshold + --prot-threshold=$prot_threshold + $stack_charges + -o $output + </command> + <inputs> + <conditional name="database"> + <param name="source_select" type="select" label="Database source used for Proteomics Searches" help="Database should be an amino acid fasta file with entry id's that can be parsed to obtain contig or scaffold ids referenced in your gff file"> + <option value="input_ref">Your Upload File</option> + <option value="built_in">Built-In</option> + </param> + <when value="built_in"> + <param name="dbkey" type="select" format="text" > + <label>Database</label> + <options from_file="pepxml_databases.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + </when> + <when value="input_ref"> + <param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" /> + </when> + </conditional> + <param name="protxml_file" type="data" format="protxml" multiple="false" label="Proteomics Search Results" help="A ProtXML file produced by Protein Prophet"/> + <param name="gene_file" type="data" format="gff3" multiple="false" label="Protein coordinates" help="A gff3 file with coordinates for all protein entries used for proteomics searches. Coordinates should correspond to entries in the genome fasta file"/> + <param name="peptide_threshold" label="Peptide Probability Threshold" type="float" value="0.95" min="0" max="1" help="Only peptides within accepted proteins and passing this threshold will appear in the output"/> + <param name="prot_threshold" label="Protein Probability Threshold" type="float" value="0.99" min="0" max="1" help="Only peptides within proteins passing this threshold will appear in the output"/> + <param name="stack_charges" value="false" type="boolean" label="Peptides with different charges get separate gff entries" help="" truevalue="--stack-charge-states" falsevalue="" /> + <param name="gffidpattern" size="40" type="text" value="lcl\|([^ ]*)" label="gff id regex" help="Regex with capture group for parsing gff ids from protein ids"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> - -c $gene_file + <param name="genomeidpattern" size="40" type="text" label="genome id regex" help="Regex with capture group for parsing genomic ids from protein ids"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> - --gff-idregex='$gffidpattern' - - -o $output + <param name="ignorepattern" size="40" type="text" label="ignore regex" help="Regex to match protein ids that we should ignore completely"> + <sanitizer> + <valid initial="string.printable"> + <remove value="'"/> + </valid> + <mapping initial="none"> + <add source="'" target="__sq__"/> + </mapping> + </sanitizer> + </param> - </command> - <stdio> - <exit_code range="1:" level="fatal" description="Failure" /> - </stdio> - - <inputs> - <conditional name="database"> - <param name="source_select" type="select" label="Database source used for Proteomics Searches" help="Database should be an amino acid fasta file with entry id's that can be parsed to obtain contig or scaffold ids referenced in your gff file"> - <option value="input_ref">Your Upload File</option> - <option value="built_in">Built-In</option> - </param> - <when value="built_in"> - <param name="dbkey" type="select" format="text" > - <label>Database</label> - <options from_file="pepxml_databases.loc"> - <column name="name" index="0" /> - <column name="value" index="2" /> - </options> - </param> - </when> - <when value="input_ref"> - <param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" /> - </when> - </conditional> - - <param name="protxml_file" type="data" format="protxml" multiple="false" label="Proteomics Search Results" help="A ProtXML file produced by Protein Prophet"/> - - <param name="gene_file" type="data" format="gff3" multiple="false" label="Protein coordinates" help="A gff3 file with coordinates for all protein entries used for proteomics searches. Coordinates should correspond to entries in the genome fasta file"/> + </inputs> + <outputs> + <data format="gff3" name="output" /> + </outputs> + <tests> + <test> + <param name="source_select" value="input_ref"/> + <param name="fasta_file" value="small_prot.fasta" format="fasta"/> - <param name="gffidpattern" size="40" type="text" value="lcl\|([^ ]*)" label="gff id regex" help="Regex with capture group for parsing gff ids from protein ids"> - <sanitizer> - <valid initial="string.printable"> - <remove value="'"/> - </valid> - <mapping initial="none"> - <add source="'" target="__sq__"/> - </mapping> - </sanitizer> - </param> - - </inputs> - - <outputs> - <data format="gff3" name="output" /> - </outputs> - - - <tests> - <test> - <param name="source_select" value="input_ref"/> - <param name="fasta_file" value="small_prot.fasta" format="fasta"/> - - <param name="protxml_file" value="small.prot.xml" format="protxml"/> - <param name="gene_file" value="small_combined.gff" format="gff3"/> - <output name="output" format="gff3"> - <assert_contents> - <has_text text="polypeptide" /> - </assert_contents> - </output> - </test> - </tests> - - <help> + <param name="protxml_file" value="small.prot.xml" format="protxml"/> + <param name="gene_file" value="small_combined.gff" format="gff3"/> + <output name="output" format="gff3"> + <assert_contents> + <has_text text="polypeptide" /> + </assert_contents> + </output> + </test> + </tests> + <help> **What it does** Exports peptides and proteins to gff ----- - -**References** - - - </help> - + </help> </tool>