Mercurial > repos > iracooke > protk
changeset 0:a929e27eb203 draft
Uploaded
author | iracooke |
---|---|
date | Thu, 21 Jun 2012 22:30:48 -0400 |
parents | |
children | deaedec14cc8 |
files | display_applications/proteomics/PepXml.xml display_applications/proteomics/ProtXml.xml display_applications/proteomics/mzML.xml interprophet.xml interprophet_wrapper.rb lib/galaxy/datatypes/proteomics.py make_decoy.xml mzml_to_mgf.xml omssa.xml peptide_prophet.xml peptide_prophet_wrapper.rb pepxml_to_table.xml protein_prophet.xml protein_prophet_wrapper.rb tandem.xml tool-data/datatypes_conf.xml tool-data/mascot_databases.loc.sample tool-data/mascot_mods.loc.sample tool-data/omssa_mods.loc.sample tool-data/pepxml_databases.loc.sample tool-data/protk_display_site.txt.sample tool-data/tandem_mods.loc.sample xls_to_table.xml |
diffstat | 23 files changed, 1225 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/PepXml.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,18 @@ +<display id="proteomics_pepxml" version="1.0.0" name="view pepXML in"> + <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0"> + <!-- Define parameters by column from file --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_url" value="1"/> + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&type=pepxml</url> + <param type="data" name="pep_file" viewable="False" format="pepXML"/> + <param type="data" dataset="pep_file" name="pepxml_file" format="pepXML" viewable="False" /> + <param type="template" name="encoded_filename" strip="True" > + #import binascii + ${binascii.hexlify( $pepxml_file.file_name )} + </param> + <param type="template" name="galaxy_url" strip="True" > + ${BASE_URL.split(":")[1][2:]} + </param> + </dynamic_links> +</display>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/ProtXml.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,18 @@ +<display id="proteomics_protxml" version="1.0.0" name="view protXML in"> + <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0"> + <!-- Define parameters by column from file --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_url" value="1"/> + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&type=protxml</url> + <param type="data" name="prot_file" viewable="False" format="protXML"/> + <param type="data" dataset="prot_file" name="protxml_file" format="protXML" viewable="False" /> + <param type="template" name="encoded_filename" strip="True" > + #import binascii + ${binascii.hexlify( $protxml_file.file_name )} + </param> + <param type="template" name="galaxy_url" strip="True" > + ${BASE_URL.split(":")[1][2:]} + </param> + </dynamic_links> +</display> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/display_applications/proteomics/mzML.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,18 @@ +<display id="proteomics_mzml" version="1.0.2" name="view mzML data"> + <dynamic_links from_file="tool-data/protk_display_site.txt" skip_startswith="#" id="0" name="0"> + <!-- Define parameters by column from file --> + <dynamic_param name="site_id" value="0"/> + <dynamic_param name="site_url" value="1"/> + <!-- We define url and params as normal, but values defined in dynamic_param are available by specified name --> + <url target_frame="galaxy_main">${site_url}/init_local?file=${encoded_filename.qp}&type=mzml</url> + <param type="data" name="raw_file" viewable="False" format="mzML"/> + <param type="data" dataset="raw_file" name="mzml_file" format="mzML" viewable="False" /> + <param type="template" name="encoded_filename" strip="True" > + #import binascii + ${binascii.hexlify( $mzml_file.file_name )} + </param> + <param type="template" name="galaxy_url" strip="True" > + ${BASE_URL.split(":")[1][2:]} + </param> + </dynamic_links> +</display> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,43 @@ +<tool id="proteomics_search_interprophet_1" name="InterProphet" version="1.0.0"> + <requirements><requirement type="package">protk</requirement></requirements> + <description>Combine Peptide Prophet results from multiple search engines</description> + + <command interpreter="ruby"> + + interprophet_wrapper.rb $output $use_nss $use_nrs $use_nse $use_nsi $use_nsm --minprob $minprob + + + ## Inputs. + ${first_input} + #for $input_file in $input_files: + ${input_file.additional_input} + #end for + + </command> + + <inputs> + + <param name="first_input" type="data" format="peptideprophet_pepxml" label="Peptide Prophet Results" help="These files will typically be outputs from search tools that have subsequently been run through peptide prophet"/> + + <repeat name="input_files" title="Additional PepXML Input Files"> + <param format="peptideprophet_pepxml" name="additional_input" type="data" label="PepXML produced by Peptide Prophet" help=""/> + </repeat> + + <param name="use_nss" checked="true" type="boolean" label="Include NSS in Model" help="Include NSS (Number of Sibling Searches) in Statistical Model" truevalue="blank" falsevalue="--nonss"/> + <param name="use_nrs" checked="true" type="boolean" label="Include NRS in Model" help="Include NRS (Number of Replicate Spectra) in Statistical Model" truevalue="blank" falsevalue="--nonrs"/> + <param name="use_nse" checked="true" type="boolean" label="Include NSE in Model" help="Include NSE (Number of Sibling Experiments) in Statistical Model" truevalue="blank" falsevalue="--nonse"/> + <param name="use_nsi" checked="true" type="boolean" label="Include NSI in Model" help="Include NSI (Number of Sibling Ions) in Statistical Model" truevalue="blank" falsevalue="--nonsi"/> + <param name="use_nsm" checked="true" type="boolean" label="Include NSM in Model" help="Include NSM (Number of Sibling Modifications) in Statistical Model" truevalue="blank" falsevalue="--nonsm"/> + + <param name="minprob" type="text" label="Minimum threshod probability for reporting results"/> + + </inputs> + <outputs> + <data format="interprophet_pepxml" name="output" metadata_source="first_input" label="interprophet.${first_input.display_name}" from_work_dir="interprophet_output.pep.xml"/> + </outputs> + + <help> + Run InterProphet + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet_wrapper.rb Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,57 @@ +require 'pathname' + +$VERBOSE=nil + +# Hard-Coded argument order and number of arguments +# +actual_output_path_string=ARGV[0] +use_nss=ARGV[1] +use_nrs=ARGV[2] +use_nse=ARGV[3] +use_nsi=ARGV[4] +use_nsm=ARGV[5] +minprob=ARGV[6] +minprob_val=ARGV[7] + +wd= Dir.pwd +original_input_files=ARGV.drop(7) +# End hard coded args # + +cmd="" + +output_substitution_cmds="" + +input_files=original_input_files.collect do |input| + + # We append ".pep.xml" to the input file name because interprophet can't handle anything else + # In order for this to work properly we need to create a symbolic link our working directory + # + original_input_path=Pathname.new("#{input}") + actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" + + cmd << "ln -s #{input} #{actual_input_path_string};" + output_substitution_cmds << "ruby -pi -e \"gsub('#{actual_input_path_string}', '#{input}.pep.xml')\" interprophet_output.pep.xml;" + actual_input_path_string +end + +interprophet_path=%x[which interprophet.rb] +cmd << interprophet_path.chomp + +cmd << " --no-nss" unless use_nss=="blank" +cmd << " --no-nrs" unless use_nrs=="blank" +cmd << " --no-nse" unless use_nse=="blank" +cmd << " --no-nsi" unless use_nsi=="blank" +cmd << " --no-nsm" unless use_nsm=="blank" + + +input_files.each { |input| + cmd << " #{input}" +} + + +cmd << " -o interprophet_output.pep.xml -r" + +cmd << ";#{output_substitution_cmds}" + +%x[#{cmd}] +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/proteomics.py Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,181 @@ +""" +Proteomics format classes +""" +import logging +import re +from galaxy.datatypes.data import * +from galaxy.datatypes.xml import * +from galaxy.datatypes.sniff import * +from galaxy.datatypes.binary import * + +log = logging.getLogger(__name__) + + +class Xls( Binary ): + """Class describing a binary excel spreadsheet file""" + file_ext = "xls" + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Excel Spreadsheet file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) ) + +class PepXml(GenericXml): + """pepXML data""" + file_ext = "pepxml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'pepXML data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + """ + Determines whether the file is pepXML + """ + #TODO - Use a context manager on Python 2.5+ to close handle + handle = open(filename) + xmlns_re = re.compile(".*pepXML\"") + for i in range(3): + line = handle.readline() + if xmlns_re.match(line.strip()): + handle.close() + return True + + handle.close() + return False + +class MzML( GenericXml ): + """mzML data""" + file_ext = "mzml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mzML Mass Spectrometry data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + handle = open(filename) + xmlns_re = re.compile("^<mzML") + for i in range(3): + line = handle.readline() + if xmlns_re.match(line.strip()): + handle.close() + return True + + handle.close() + return False + + +class ProtXML( Text ): + """protXML data""" + file_ext = "protxml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'prot XML Search Results' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + protxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>', + 'xmlns="http://regis-web.systemsbiology.net/protXML"' ] + + for i, line in enumerate( file( filename ) ): + if i >= len( pepxml_header ): + return True + line = line.rstrip( '\n\r' ) + if protxml_header[ i ] not in line: + return False + + + +class MzXML( Text ): + """mzXML data""" + file_ext = "mzXML" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mzXML Mass Spectrometry data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + mzxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>', + '<mzXML xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd">' ] + for i, line in enumerate( file( filename ) ): + if i >= len( mzxml_header ): + return True + line = line.rstrip( '\n\r' ) + if line != mzxml_header[ i ]: + return False + +class Mgf( Text ): + """Mascot Generic Format data""" + file_ext = "mgf" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mgf Mascot Generic Format' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + + def sniff( self, filename ): + mgf_begin_ions = "BEGIN IONS" + max_lines=100 + + for i, line in enumerate( file( filename ) ): + line = line.rstrip( '\n\r' ) + if line==mgf_begin_ions: + return True + if i>max_lines: + return False + + +class MascotDat( Text ): + """Mascot search results """ + file_ext = "mascotdat" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mascotdat Mascot Search Results' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + + def sniff( self, filename ): + mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)" + max_lines=10 + + for i, line in enumerate( file( filename ) ): + line = line.rstrip( '\n\r' ) + if line==mime_version: + return True + if i>max_lines: + return False
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/make_decoy.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,26 @@ +<tool id="make_decoy_1" name="Create decoy databases" version="1.0.0"> + <requirements> + <requirement type="package">protk</requirement> + </requirements> + + <description>Creates a random protein database with similar properties to a real protein database</description> + + <command>make_decoy.rb $input_file -o $output -L $length -P $prefix $append</command> + + <inputs> + + <param name="input_file" type="data" format="fasta" multiple="false" label="Input File" help="Real protein sequences. Take care that these are fasta formatted with no more than 80 amino acids per line. There should be no whitespace in the sequences."/> + <param name="prefix" type="text" label="String to prepend to generated protein ID's" size="60" value="decoy_"/> + <param name="length" type="text" label="Number of random sequences to generate" help="If 0, a database of equal size to the input database will be generated" size="60" value="0"/> + <param name="append" type="boolean" checked="true" label="Append input dataset to the generated sequences" truevalue="-A" falsevalue=""/> + </inputs> + + <outputs> + <data format="fasta" name="output" metadata_source="input_file" label="Random sequences from ${input_file.display_name}" from_work_dir="random.fasta"/> + </outputs> + + <help> + Create random protein sequences + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mzml_to_mgf.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,25 @@ +<tool id="mzml_to_mgf_1" name="MzML to mgf" version="1.0.0"> + <requirements> + <requirement type="package">protk</requirement> + </requirements> + + <description>Converts an mzML file to mgf suitable for searching by omssa</description> + + <command>file_convert.rb $input_file -o $output $maldi</command> + + <inputs> + + <param name="input_file" type="data" format="mzml" multiple="false" label="Input File" help="Line Spectra in mzML format"/> + <param name="maldi" type="boolean" label="Is the data from a MALDI instrument" truevalue="-l" falsevalue=""/> + + </inputs> + + <outputs> + <data format="mgf" name="output" metadata_source="input_file" label="${input_file.display_name}.mgf" from_work_dir="converted.mgf"/> + </outputs> + + <help> + Convert line spectra to Mascot Generic Format + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/omssa.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,150 @@ +<tool id="proteomics_search_omssa_1" name="OMSSA MSMS Search" version="1.0.0"> + <requirements> + <requirement type="package">protk</requirement> + </requirements> + + <description>Run an OMSSA MS/MS Search</description> + + <command>#if $database.source_select=="built_in": + omssa_search.rb -d $database.dbkey + #else #omssa_search.rb -d $database.fasta_file + #end if + + --var-mods=' + $variable_mods + ' + + --fix-mods=' + $fixed_mods + ' + + --searched-ions=' + $searched_ions + ' + + $input_file -o $output -r --enzyme=$enzyme --precursor-ion-tol-units=$precursor_tolu -v $missed_cleavages -f $fragment_ion_tol -p $precursor_ion_tol --num-peaks-for-multi-isotope-search $multi_isotope $use_average_mass $respect_precursor_charges --max-hit-expect $max_hit_expect --intensity-cut-off $intensity_cut_off + + </command> + + + <inputs> + <conditional name="database"> + <param name="source_select" type="select" label="Database source"> + <option value="built_in">Built-In</option> + <option value="input_ref">Uploaded fasta file</option> + </param> + <when value="built_in"> + <param name="dbkey" type="select" format="text" > + <label>Database</label> + <options from_file="pepxml_databases.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + </when> + <when value="input_ref"> + <param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" /> + </when> + </conditional> + + <param name="input_file" type="data" format="mgf" multiple="false" label="MSMS File" help="An mgf file with MS/MS data"/> + + <param name="variable_mods" format="text" type="select" multiple="true" label="Variable Modifications" help="Hold the appropriate key while + clicking to select multiple items"> + <options from_file="omssa_mods.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + + <param name="fixed_mods" format="text" type="select" multiple="true" label="Fixed Modifications" help="Hold the appropriate key while + clicking to select multiple items"> + <options from_file="omssa_mods.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + + + <param name="missed_cleavages" type="select" format="text" help="Allow peptides to contain up to this many missed enzyme cleavage sites"> + <label>Missed Cleavages Allowed</label> + <option value="0">0</option> + <option value="1">1</option> + <option value="2">2</option> + </param> + + <param name="enzyme" type="select" format="text"> + <label>Enzyme</label> + <option value="0">Trypsin</option> + <option value="1">Arg-C</option> + <option value="2">CNBr</option> + <option value="3">Chymotrypsin (FYWL)</option> + <option value="4">Formic Acid</option> + <option value="5">Lys-C</option> + <option value="6">Lys-C, no P rule</option> + <option value="7">Pepsin A</option> + <option value="8">Trypsin+CNBr</option> + <option value="9">Trypsin+Chymotrypsin (FYWLKR)</option> + <option value="10">Trypsin, no P rule</option> + <option value="11">Whole protein</option> + <option value="12">Asp-N</option> + <option value="13">Glu-C</option> + <option value="14">Asp-N+Glu-C</option> + <option value="15">Top-Down</option> + <option value="16">Semi-Tryptic</option> + <option value="17">No Enzyme</option> + <option value="18">Chymotrypsin, no P rule (FYWL)</option> + <option value="19">Asp-N (DE)</option> + <option value="20">Glu-C (DE)</option> + <option value="21">Lys-N (K)</option> + <option value="22">Thermolysin, no P rule</option> + <option value="23">Semi-Chymotrypsin (FYWL)</option> + <option value="24">Semi-Glu-C</option> + </param> + + <param name="fragment_ion_tol" help="Fragment Ion Tolerance in Daltons" type="float" value="0.65" min="0" max="10000" label="Fragment ion tolerance"/> + <param name="max_hit_expect" help="Expect values less than this are considered to be hits. Use a large value, eg 10000 when results will be processed downstream with Peptide Prophet" type="float" value="1.0" min="0" max="10000000" label="Maximum Expect value allowed in the hit list"/><!-- -he--> + <param name="intensity_cut_off" help="Peak intensity cut-off as a fraction of maximum peak intensity" type="float" value="0.0005" min="0" max="1" label="Peak intensity cut-off"/><!-- -ci--> + + + <param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" label="Precursor ion tolerance"/> + <param name="precursor_tolu" type="select" format="text"> + <label>Precursor Ion Tolerance Units</label> + <option value="ppm">ppm</option> + <option value="Da">Da</option> + </param> + + <param name="use_average_mass" type="boolean" label="Use average precursor masses" help="Match precursor to average mass of the parent ion instead of its monoisotopic mass" truevalue="-a average" falsevalue=""/> + <param name="respect_precursor_charges" type="boolean" label="Respect precursor charges" help="Use precursor charge information in input file rather than OMSSA's inferred value" truevalue="--respect-charges" falsevalue=""/> + + <param name="multi_isotope" type="select" format="text" help="Include this many neighbouring peaks when searching for a match to the precursor mass. Only used when doing monoisotopic search"> + <label>Multi-isotope search.</label> + <option value="0">0</option> + <option value="1">1</option> + <option value="2">2</option> + <option value="3">3</option> + <option value="4">4</option> + </param> + + <param name="searched_ions" display="checkboxes" type="select" multiple="true" format="text" label="Ions included in search" help=""> + <option selected="true" value="0">a</option> + <option selected="true" value="1">b</option> + <option value="2">c</option> + <option selected="true" value="3">x</option> + <option selected="true" value="4">y</option> + <option value="5">zdot</option> + <option value="10">adot</option> + <option value="11">x-CO2</option> + <option value="12">adot-CO2</option> + </param> + + </inputs> + + <outputs> + <data format="raw_pepxml" name="output" metadata_source="input_file" label="omssa_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.pepXML"/> + </outputs> + + <help> + Run an OMSSA Search + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,66 @@ +<tool id="proteomics_search_peptide_prophet_1" name="Peptide Prophet" version="1.0.0"> + <requirements><requirement type="package">protk</requirement></requirements> + <description>Calculate Peptide Prophet statistics on search results</description> + + <command interpreter="ruby">peptide_prophet_wrapper.rb ${output} ${input_file} -r $glyco $useicat $phospho $usepi $usert $accurate_mass $no_ntt $no_nmc $use_gamma $use_only_expect $force_fit $allow_alt_instruments $maldi + </command> + + <inputs> + + <param name="input_file" type="data" format="raw_pepxml" multiple="false" label="Raw Search Results" help="These files will typically be outputs from omssa or xtandem search tools"/> + + <param name="glyco" type="boolean" label="Expect true positives to have a glycocapture motif" truevalue="--glyco" falsevalue=""/> + <param name="useicat" type="boolean" label="Use icat information" truevalue="--useicat" falsevalue="--no-useicat"/> + <param name="phospho" type="boolean" label="Use phospho information" truevalue="--phospho" falsevalue=""/> + <param name="usepi" type="boolean" label="Use pI information" truevalue="--usepi" falsevalue=""/> + <param name="usert" type="boolean" label="Use hydrophobicity / RT information" truevalue="--usert" falsevalue=""/> + <param name="accurate_mass" type="boolean" label="Use accurate mass binning" truevalue="--accurate-mass" falsevalue=""/> + <param name="no_ntt" type="boolean" label="Don't use NTT model" truevalue="--no-ntt" falsevalue=""/> + <param name="no_nmc" type="boolean" label="Don't use NMC model" truevalue="--no-nmc" falsevalue=""/> + <param name="use_gamma" type="boolean" label="Use Gamma distribution to model the negatives" help="Applies only to X!Tandem results" truevalue="--usegamma" falsevalue=""/> + <param name="use_only_expect" type="boolean" label="Only use Expect Score as the discriminant" help="Applies only to X!Tandem results. + Helpful for data with homologous top hits e.g. phospho or glyco" truevalue="--use-only-expect" falsevalue=""/> + <param name="force_fit" type="boolean" label="Force fitting" help="Bypasses automatic mixture model checks and forces fitting of a mixture model" truevalue="--force-fit" falsevalue=""/> + <param name="allow_alt_instruments" type="boolean" label="Allow multiple instrument types" help="Warning instead of exit with error if instrument types between runs is different" truevalue="--allow-alt-instruments" falsevalue=""/> + <param name="maldi" type="boolean" label="Maldi data" truevalue="-l" falsevalue=""/> + + + </inputs> + <outputs> + <data format="peptideprophet_pepxml" name="output" metadata_source="input_file" label="peptide_prophet.${input_file.display_name}.pep.xml" from_work_dir="peptide_prophet_output.pep.xml"/> + </outputs> + + <help> + Run Peptide Prophet + </help> + + +<!--PeptideProphet options [following the 'O']: + i [use icat information in PeptideProphet] + f [do not use icat information in PeptideProphet] + g [use N-glyc motif information in PeptideProphet] + H [use Phospho information in PeptideProphet] + m [maldi data] + I [use pI information in PeptideProphet] + R [use Hydrophobicity / RT information in PeptideProphet] + F [force the fitting of the mixture model, bypass automatic mixture model checks] + A [use accurate mass binning in PeptideProphet] + w [warning instead of exit with error if instrument types between runs is different] + x [exclude all entries with asterisked score values in PeptideProphet] + l [leave alone all entries with asterisked score values in PeptideProphet] + n [use hardcoded default initialization parameters of the distributions] + P [use Non-parametric model, can only be used with decoy option] + N [do not use the NTT model] + M [do not use the NMC model] + G [use Gamma Distribution to model the Negatives (applies only to X!Tandem data)] + E [only use Expect Score as the Discriminant(applies only to X!Tandem data, + helpful for data with homologous top hits e.g. phospho or glyco)] + d [report decoy hits with a computed probability based on the model learned] + p [run ProteinProphet afterwards] + t [do not create png data plot] + u [do not assemble protein groups in ProteinProphet analysis] + s [do not use Occam's Razor in ProteinProphet analysis to + derive the simplest protein list to explain observed peptides] +--> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet_wrapper.rb Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,38 @@ +require 'pathname' + +$VERBOSE=nil + +peptide_prophet_path=%x[which peptide_prophet.rb] + +actual_output_path_string=ARGV.shift + +# Second argument is the original input file name ... we'll change this below +original_input_file=ARGV[0] + +# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input +# name because peptide prophet can't handle anything else + +wd= Dir.pwd + +original_input_path=Pathname.new("#{original_input_file}") +actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" +full_tmp_output_path_string="#{wd}/peptide_prophet_output.pep.xml" + +cmd = "ln -s #{original_input_file} #{actual_input_path_string};" + +cmd << peptide_prophet_path.chomp + + +ARGV[0]="#{actual_input_path_string}" + +ARGV.each { |a| + cmd << " #{a}" +} + +cmd << " -o peptide_prophet_output.pep.xml" + +# Finally we need to fix up the output file so any references to the temporary working file are changed to refs to the original input file +cmd << ";ruby -pi -e \"gsub('#{actual_input_path_string}', '#{original_input_file}')\" peptide_prophet_output.pep.xml" +cmd << ";ruby -pi -e \"gsub('#{full_tmp_output_path_string}', '#{actual_output_path_string}')\" peptide_prophet_output.pep.xml" + +%x[#{cmd}]
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pepxml_to_table.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,23 @@ +<tool id="pepxml_to_table_1" name="PepXML to Table" version="1.0.0"> + <requirements><requirement type="package">protk</requirement></requirements> + <description>Converts a pepXML file to a tab delimited text file</description> + + +<!-- Note .. the input file is assumed to be the first argument --> +<command>pepxml_to_table.rb $input_file -o $output</command> + + +<inputs> + + <param name="input_file" type="data" format="pepxml" multiple="false" label="Input File" help="A pepXML file"/> + +</inputs> +<outputs> + <data format="csv" name="output" metadata_source="input_file" label="${input_file.display_name}.csv" /> +</outputs> + +<help> + Convert a pepXML file to Tab delimited text +</help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,54 @@ +<tool id="proteomics_search_protein_prophet_1" name="Protein Prophet" version="1.0.0"> + <requirements><requirement type="package">protk</requirement></requirements> + <description>Calculate Protein Prophet statistics on search results</description> + + +<!-- Note .. the input file is assumed to be the first argument --> + <command interpreter="ruby">protein_prophet_wrapper.rb $output $input_file -r $iproph $nooccam $groupwts $normprotlen $logprobs $confem $allpeps $unmapped $instances $delude --minprob=$minprob --minindep=$minindep </command> + <inputs> + + <param name="input_file" type="data" format="pepxml" multiple="false" label="Peptide Prophet Results" help="These files will typically be outputs from peptide prophet or interprophet"/> + + + <param name="iproph" selected="true" type="boolean" label="Inputs are from iProphet" truevalue="--iprophet-input" falsevalue=""/> + <param name="nooccam" type="boolean" label="Don't apply Occam's razor" help="When selected no attempt will be made to derive the simplest protein list explaining observed peptides" truevalue="--no-occam" falsevalue=""/> + <param name="groupwts" type="boolean" label="Use group weights" help="Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold" truevalue="--group-wts" falsevalue=""/> + <param name="normprotlen" type="boolean" label="Normalize NSP using Protein Length" truevalue="--norm-protlen" falsevalue=""/> + <param name="logprobs" type="boolean" label="Use the log of probability in the confidence calculations" truevalue="--log-prob" falsevalue=""/> + <param name="confem" type="boolean" label="Use the EM to compute probability given the confidenct" truevalue="--confem" falsevalue=""/> + <param name="allpeps" type="boolean" label="Consider all possible peptides in the database in the confidence model" truevalue="--allpeps" falsevalue=""/> + <param name="unmapped" type="boolean" label="Report results for unmapped proteins" truevalue="--unmapped" falsevalue=""/> + <param name="instances" type="boolean" label="Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment" truevalue="--instances" falsevalue=""/> + <param name="delude" type="boolean" label="Do NOT use peptide degeneracy information when assessing proteins" truevalue="--delude" falsevalue=""/> + + <param name="minprob" type="text" label="Minimum peptide prophet probability for peptides to be considered" value="0.05"/> + <param name="minindep" type="text" label="Minimum percentage of independent peptides required for a protein" value="0"/> + + </inputs> + <outputs> + <data format="protxml" name="output" metadata_source="input_file" label="protein_prophet.${input_file.display_name}.protXML" from_work_dir="protein_prophet_results.prot.xml"/> + </outputs> + + +<!--NOPLOT: do not generate plot png file + NOOCCAM: non-conservative maximum protein list + GROUPWTS: check peptide's total weight in the Protein Group against the threshold (default: check peptide's actual weight against threshold) + NORMPROTLEN: Normalize NSP using Protein Length + LOGPROBS: Use the log of the probabilities in the Confidence calculations + CONFEM: Use the EM to compute probability given the confidence + ALLPEPS: Consider all possible peptides in the database in the confidence model + UNMAPPED: Report results for UNMAPPED proteins + INSTANCES: Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment + DELUDE: do NOT use peptide degeneracy information when assessing proteins + + MINPROB: peptideProphet probabilty threshold (default=0.05) + MININDEP: minimum percentage of independent peptides required for a protein (default=0) + + +--> + + <help> + Run Peptide Prophet + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet_wrapper.rb Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,37 @@ +require 'pathname' + +$VERBOSE=nil + +protein_prophet_path=%x[which protein_prophet.rb] + +actual_output_path_string=ARGV.shift + +# Second argument is the original input file name ... we'll change this below +original_input_file=ARGV[0] + +# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input +# name because peptide prophet can't handle anything else + +wd= Dir.pwd + +original_input_path=Pathname.new("#{original_input_file}") +actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" + +cmd = "ln -s #{original_input_file} #{actual_input_path_string};" + +cmd << protein_prophet_path.chomp + + +ARGV[0]="#{actual_input_path_string}" + +ARGV.each { |a| + + cmd << " #{a}" +} + +cmd << " -o protein_prophet_results.prot.xml" + +cmd << ";ruby -pi -e \"gsub('#{actual_input_path_string}', '#{original_input_file}.pep.xml')\" protein_prophet_results.prot.xml" + +%x[#{cmd}] +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tandem.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,118 @@ +<tool id="proteomics_search_tandem_1" name="X!Tandem MSMS Search" version="1.0.0"> + <requirements><requirement type="package">protk</requirement></requirements> + <description>Run an X!Tandem Search</description> + + <command> + #if $database.source_select=="built_in": + tandem_search.rb -d $database.dbkey + #else #tandem_search.rb -d $database.fasta_file + #end if + + --var-mods=' + $variable_mods + #for $custom_variable_mod in $custom_variable_mods: + ,${custom_variable_mod.custom_mod} + #end for + ' + + --fix-mods=' + $fixed_mods + #for $custom_fix_mod in $custom_fix_mods: + ,${custom_fix_mod.custom_mod} + #end for + ' + + $input_file -o $output -r --enzyme=$enzyme --precursor-ion-tol-units=$precursor_tolu -v $missed_cleavages -f $fragment_ion_tol -p $precursor_ion_tol $allow_multi_isotope_search --keep-params-files + + + + </command> + + <inputs> + <conditional name="database"> + <param name="source_select" type="select" label="Database source"> + <option value="built_in">Built-In</option> + <option value="input_ref">Your Upload File</option> + </param> + <when value="built_in"> + <param name="dbkey" type="select" format="text" > + <label>Database</label> + <options from_file="pepxml_databases.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + </when> + <when value="input_ref"> + <param name="fasta_file" type="data" format="fasta" label="Uploaded FASTA file" /> + </when> + </conditional> + + <param name="input_file" type="data" format="mzml" multiple="false" label="MSMS File" help="An mzML file with MS/MS data"/> + + + <param name="variable_mods" format="text" type="select" multiple="true" label="Variable Modifications" help="Hold the appropriate key while + clicking to select multiple items"> + <options from_file="tandem_mods.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + + <repeat name="custom_variable_mods" title="Custom Variable Modifications" help="You can specify a modification when present in a motif. For instance, 0.998@N!{P}[ST] is a deamidation modification on N only if it is present in an N[any but P][S or T] motif (N-glycosite)."> + <param name="custom_mod" type="text"> + </param> + </repeat> + + + <param name="fixed_mods" format="text" type="select" multiple="true" label="Fixed Modifications" help="Hold the appropriate key while + clicking to select multiple items"> + <options from_file="tandem_mods.loc"> + <column name="name" index="0" /> + <column name="value" index="2" /> + </options> + </param> + + <repeat name="custom_fix_mods" title="Custom Fixed Modifications" help="You can specify a modification when present in a motif. For instance, 0.998@N!{P}[ST] is a deamidation modification on N only if it is present in an N[any but P][S or T] motif (N-glycosite)."> + <param name="custom_mod" type="text"> + </param> + </repeat> + + + + <param name="missed_cleavages" type="select" format="text" help="Allow peptides to contain up to this many missed enzyme cleavage sites"> + <label>Missed Cleavages Allowed</label> + <option value="0">0</option> + <option value="1">1</option> + <option value="2">2</option> + </param> + + <param name="enzyme" type="select" format="text"> + <label>Enzyme</label> + <option value="Trypsin">Trypsin</option> + </param> + + <param name="fragment_ion_tol" help="Fragment Ion Tolerance in Daltons" type="float" value="0.65" min="0" max="10000" label="Fragment ion tolerance"/> + + <param name="precursor_ion_tol" help="Precursor Ion Tolerance (Da or ppm)" type="float" value="100" min="0" max="10000" label="Precursor ion tolerance"/> + <param name="precursor_tolu" type="select" format="text"> + <label>Precursor Ion Tolerance Units</label> + <option value="ppm">ppm</option> + <option value="Da">Da</option> + </param> + + <param name="allow_multi_isotope_search" type="boolean" label="Allow multi-isotope search" help="This allows peptide candidates in windows around -1 Da and -2 Da from the acquired mass to be considered. Only applicable when the minus/plus window above is set to less than 0.5 Da. Good for accurate-mass instruments for which the reported precursor mass is not corrected to the monoisotopic mass." truevalue="" falsevalue="--strict-monoisotopic-mass"/> + + </inputs> + + + <outputs> + <data format="raw_pepxml" name="output" metadata_source="input_file" label="X!Tandem_vs_${database.dbkey if $database.has_key('dbkey') else $database.fasta_file.display_name}.${input_file.display_name}.${input_file.display_name}.pepXML"/> + </outputs> + + + <help> + Run an X!Tandem Search + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/datatypes_conf.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,37 @@ +<?xml version="1.0"?> +<datatypes> + <datatype_files> + <datatype_file name="proteomics.py"/> + </datatype_files> + <registration display_path="display_applications"> + <datatype extension="pepxml" type="galaxy.datatypes.proteomics:PepXml" mimetype="application/xml" display_in_upload="true"> + <display file="proteomics/PepXml.xml" /> + </datatype> + <datatype extension="raw_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true"> + <display file="proteomics/PepXml.xml" /> + </datatype> + <datatype extension="peptideprophet_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true"> + <display file="proteomics/PepXml.xml" /> + </datatype> + <datatype extension="interprophet_pepxml" type="galaxy.datatypes.proteomics:PepXml" subclass="true"> + <display file="proteomics/PepXml.xml" /> + </datatype> + <datatype extension="protxml" type="galaxy.datatypes.proteomics:ProtXML" display_in_upload="true" > + <display file="proteomics/ProtXml.xml"/> + </datatype> + <datatype extension="mascotdat" type="galaxy.datatypes.proteomics:MascotDat" display_in_upload="false" /> + <datatype extension="mzml" type="galaxy.datatypes.proteomics:MzML" mimetype="application/xml" display_in_upload="true"> + <display file="proteomics/mzML.xml"/> + </datatype> + <datatype extension="mgf" type="galaxy.datatypes.proteomics:Mgf" display_in_upload="true" /> + <datatype extension="xls" type="galaxy.datatypes.proteomics:Xls" display_in_upload="true" /> + </registration> + <sniffers> + <sniffer type="galaxy.datatypes.proteomics:MzML"/> + <sniffer type="galaxy.datatypes.proteomics:PepXml"/> + <sniffer type="galaxy.datatypes.proteomics:Mgf"/> + <sniffer type="galaxy.datatypes.proteomics:ProtXML"/> + <sniffer type="galaxy.datatypes.proteomics:MzXML"/> + <sniffer type="galaxy.datatypes.proteomics:Xls"/> + </sniffers> +</datatypes>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/mascot_databases.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,13 @@ +#This file lists the names of protein databases installed on Mascot +# +#In order to use interprophet to combine results from different search engines +#it is important that all searches are performed on the same database +#you should therefore ensure that each database installed on mascot has an equivalent +#database installed in the Protk databases directory (databases used by omssa and x!tandem) +#the mascot_to_pepxml tool will ask for this database when performing the conversion. +# +# Entries should follow the be structured as follows +# Display_name dbkey dbNameOnMascot dbkey +# +Swissprot spall_ SPAll spall_ +Swissprot Human sphuman_ SPHuman sphuman_ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/mascot_mods.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,77 @@ +#This file lists the names of chemical modifications acceptable for proteomics search engines +# +# +Acetyl (K) acetyl_k_ Acetyl (K) acetyl_k_ +Acetyl (N-term) acetyl_n-term_ Acetyl (N-term) acetyl_n-term_ +Acetyl (Protein N-term) acetyl_proteinn-term_ Acetyl (Protein N-term) acetyl_proteinn-term_ +Amidated (C-term) amidated_c-term_ Amidated (C-term) amidated_c-term_ +Amidated (Protein C-term) amidated_proteinc-term_ Amidated (Protein C-term) amidated_proteinc-term_ +Ammonia-loss (N-term C) ammonia-loss_n-termc_ Ammonia-loss (N-term C) ammonia-loss_n-termc_ +Biotin (K) biotin_k_ Biotin (K) biotin_k_ +Biotin (N-term) biotin_n-term_ Biotin (N-term) biotin_n-term_ +Carbamidomethyl (C) carbamidomethyl_c_ Carbamidomethyl (C) carbamidomethyl_c_ +Carbamyl (K) carbamyl_k_ Carbamyl (K) carbamyl_k_ +Carbamyl (N-term) carbamyl_n-term_ Carbamyl (N-term) carbamyl_n-term_ +Carboxymethyl (C) carboxymethyl_c_ Carboxymethyl (C) carboxymethyl_c_ +Cation:Na (C-term) cation_na_c-term_ Cation:Na (C-term) cation_na_c-term_ +Cation:Na (DE) cation_na_de_ Cation:Na (DE) cation_na_de_ +Deamidated (NQ) deamidated_nq_ Deamidated (NQ) deamidated_nq_ +Deamidated-N (N) deamidated-n_n_ Deamidated-N (N) deamidated-n_n_ +Dehydrated (N-term C) dehydrated_n-termc_ Dehydrated (N-term C) dehydrated_n-termc_ +Dehydro (C) dehydro_c_ Dehydro (C) dehydro_c_ +Dioxidation (M) dioxidation_m_ Dioxidation (M) dioxidation_m_ +Ethanolyl (C) ethanolyl_c_ Ethanolyl (C) ethanolyl_c_ +ExacTagAmine (K) exactagamine_k_ ExacTagAmine (K) exactagamine_k_ +ExacTagThiol (C) exactagthiol_c_ ExacTagThiol (C) exactagthiol_c_ +Formyl (N-term) formyl_n-term_ Formyl (N-term) formyl_n-term_ +Formyl (Protein N-term) formyl_proteinn-term_ Formyl (Protein N-term) formyl_proteinn-term_ +Gln->pyro-Glu (N-term Q) gln_pyro-glu_n-termq_ Gln->pyro-Glu (N-term Q) gln_pyro-glu_n-termq_ +Glu->pyro-Glu (N-term E) glu_pyro-glu_n-terme_ Glu->pyro-Glu (N-term E) glu_pyro-glu_n-terme_ +Guanidinyl (K) guanidinyl_k_ Guanidinyl (K) guanidinyl_k_ +ICAT-C (C) icat-c_c_ ICAT-C (C) icat-c_c_ +ICAT-C:13C(9) (C) icat-c_13c_9__c_ ICAT-C:13C(9) (C) icat-c_13c_9__c_ +ICPL (K) icpl_k_ ICPL (K) icpl_k_ +ICPL (Protein N-term) icpl_proteinn-term_ ICPL (Protein N-term) icpl_proteinn-term_ +ICPL:13C(6) (K) icpl_13c_6__k_ ICPL:13C(6) (K) icpl_13c_6__k_ +ICPL:13C(6) (Protein N-term) icpl_13c_6__proteinn-term_ ICPL:13C(6) (Protein N-term) icpl_13c_6__proteinn-term_ +ICPL:13C(6)2H(4) (K) icpl_13c_6_2h_4__k_ ICPL:13C(6)2H(4) (K) icpl_13c_6_2h_4__k_ +ICPL:13C(6)2H(4) (N-term) icpl_13c_6_2h_4__n-term_ ICPL:13C(6)2H(4) (N-term) icpl_13c_6_2h_4__n-term_ +ICPL:13C(6)2H(4) (Protein N-term) icpl_13c_6_2h_4__proteinn-term_ ICPL:13C(6)2H(4) (Protein N-term) icpl_13c_6_2h_4__proteinn-term_ +ICPL:2H(4) (K) icpl_2h_4__k_ ICPL:2H(4) (K) icpl_2h_4__k_ +ICPL:2H(4) (Protein N-term) icpl_2h_4__proteinn-term_ ICPL:2H(4) (Protein N-term) icpl_2h_4__proteinn-term_ +iTRAQ4plex (K) itraq4plex_k_ iTRAQ4plex (K) itraq4plex_k_ +iTRAQ4plex (N-term) itraq4plex_n-term_ iTRAQ4plex (N-term) itraq4plex_n-term_ +iTRAQ4plex (Y) itraq4plex_y_ iTRAQ4plex (Y) itraq4plex_y_ +iTRAQ8plex (K) itraq8plex_k_ iTRAQ8plex (K) itraq8plex_k_ +iTRAQ8plex (N-term) itraq8plex_n-term_ iTRAQ8plex (N-term) itraq8plex_n-term_ +iTRAQ8plex (Y) itraq8plex_y_ iTRAQ8plex (Y) itraq8plex_y_ +Label:18O(1) (C-term) label_18o_1__c-term_ Label:18O(1) (C-term) label_18o_1__c-term_ +Label:18O(2) (C-term) label_18o_2__c-term_ Label:18O(2) (C-term) label_18o_2__c-term_ +Met->Hse (C-term M) met_hse_c-termm_ Met->Hse (C-term M) met_hse_c-termm_ +Met->Hsl (C-term M) met_hsl_c-termm_ Met->Hsl (C-term M) met_hsl_c-termm_ +Methyl (C-term) methyl_c-term_ Methyl (C-term) methyl_c-term_ +Methyl (DE) methyl_de_ Methyl (DE) methyl_de_ +Methylthio (C) methylthio_c_ Methylthio (C) methylthio_c_ +mTRAQ (K) mtraq_k_ mTRAQ (K) mtraq_k_ +mTRAQ (N-term) mtraq_n-term_ mTRAQ (N-term) mtraq_n-term_ +mTRAQ (Y) mtraq_y_ mTRAQ (Y) mtraq_y_ +mTRAQ:13C(3)15N(1) (K) mtraq_13c_3_15n_1__k_ mTRAQ:13C(3)15N(1) (K) mtraq_13c_3_15n_1__k_ +mTRAQ:13C(3)15N(1) (N-term) mtraq_13c_3_15n_1__n-term_ mTRAQ:13C(3)15N(1) (N-term) mtraq_13c_3_15n_1__n-term_ +mTRAQ:13C(3)15N(1) (Y) mtraq_13c_3_15n_1__y_ mTRAQ:13C(3)15N(1) (Y) mtraq_13c_3_15n_1__y_ +NIPCAM (C) nipcam_c_ NIPCAM (C) nipcam_c_ +Oxidation (HW) oxidation_hw_ Oxidation (HW) oxidation_hw_ +Oxidation (M) oxidation_m_ Oxidation (M) oxidation_m_ +Phospho (ST) phospho_st_ Phospho (ST) phospho_st_ +Phospho (Y) phospho_y_ Phospho (Y) phospho_y_ +Propionamide (C) propionamide_c_ Propionamide (C) propionamide_c_ +Pyridylethyl (C) pyridylethyl_c_ Pyridylethyl (C) pyridylethyl_c_ +Pyro-carbamidomethyl (N-term C) pyro-carbamidomethyl_n-termc_ Pyro-carbamidomethyl (N-term C) pyro-carbamidomethyl_n-termc_ +Sulfo (S) sulfo_s_ Sulfo (S) sulfo_s_ +Sulfo (T) sulfo_t_ Sulfo (T) sulfo_t_ +Sulfo (Y) sulfo_y_ Sulfo (Y) sulfo_y_ +TMT (K) tmt_k_ TMT (K) tmt_k_ +TMT (N-term) tmt_n-term_ TMT (N-term) tmt_n-term_ +TMT2plex (K) tmt2plex_k_ TMT2plex (K) tmt2plex_k_ +TMT2plex (N-term) tmt2plex_n-term_ TMT2plex (N-term) tmt2plex_n-term_ +TMT6plex (K) tmt6plex_k_ TMT6plex (K) tmt6plex_k_ +TMT6plex (N-term) tmt6plex_n-term_ TMT6plex (N-term) tmt6plex_n-term_
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/omssa_mods.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,181 @@ +#This file lists the names of chemical modifications accepted by OMMSA +# +# +2-amino-3-oxo-butanoic acid T 2-amino-3-oxo-butanoicacidt_ 23 2-amino-3-oxo-butanoicacidt_ +Asparagine HexNAc asparaginehexnac_ 182 asparaginehexnac_ +Asparagine dHexHexNAc asparaginedhexhexnac_ 183 asparaginedhexhexnac_ +CAMthiopropanoyl K camthiopropanoylk_ 131 camthiopropanoylk_ +CHD2-di-methylation of K chd2-di-methylationofk_ 189 chd2-di-methylationofk_ +CHD2-di-methylation of peptide n-term chd2-di-methylationofpeptiden-term_ 190 chd2-di-methylationofpeptiden-term_ +ICAT heavy icatheavy_ 130 icatheavy_ +ICAT light icatlight_ 129 icatlight_ +M cleavage from protein n-term mcleavagefromproteinn-term_ 9 mcleavagefromproteinn-term_ +MMTS on C mmtsonc_ 179 mmtsonc_ +Maleimide-PEO2-Biotin of C maleimide-peo2-biotinofc_ 191 maleimide-peo2-biotinofc_ +NEM C nemc_ 83 nemc_ +NIPCAM nipcam_ 84 nipcam_ +O18 on peptide n-term o18onpeptiden-term_ 87 o18onpeptiden-term_ +PNGasF in O18 water pngasfino18water_ 139 pngasfino18water_ +SeMet semet_ 113 semet_ +Serine HexNAc serinehexnac_ 184 serinehexnac_ +TMT 6-plex on K tmt6-plexonk_ 198 tmt6-plexonk_ +TMT 6-plex on n-term peptide tmt6-plexonn-termpeptide_ 199 tmt6-plexonn-termpeptide_ +Threonine HexNAc threoninehexnac_ 185 threoninehexnac_ +Uniblue A on K uniblueaonk_ 195 uniblueaonk_ +acetylation of K acetylationofk_ 24 acetylationofk_ +acetylation of protein n-term acetylationofproteinn-term_ 10 acetylationofproteinn-term_ +amidation of peptide c-term amidationofpeptidec-term_ 25 amidationofpeptidec-term_ +arginine to ornithine argininetoornithine_ 163 argininetoornithine_ +beta elimination of S betaeliminationofs_ 140 betaeliminationofs_ +beta elimination of T betaeliminationoft_ 141 betaeliminationoft_ +beta methythiolation of D betamethythiolationofd_ 13 betamethythiolationofd_ +beta-carboxylation of D beta-carboxylationofd_ 47 beta-carboxylationofd_ +beta-methylthiolation of D (duplicate of 13) beta-methylthiolationofd_duplicateof13__ 26 beta-methylthiolationofd_duplicateof13__ +carbamidomethyl C carbamidomethylc_ 3 carbamidomethylc_ +carbamylation of K carbamylationofk_ 31 carbamylationofk_ +carbamylation of n-term peptide carbamylationofn-termpeptide_ 32 carbamylationofn-termpeptide_ +carboxyamidomethylation of D carboxyamidomethylationofd_ 29 carboxyamidomethylationofd_ +carboxyamidomethylation of E carboxyamidomethylationofe_ 30 carboxyamidomethylationofe_ +carboxyamidomethylation of H carboxyamidomethylationofh_ 28 carboxyamidomethylationofh_ +carboxyamidomethylation of K carboxyamidomethylationofk_ 27 carboxyamidomethylationofk_ +carboxykynurenin of W carboxykynureninofw_ 165 carboxykynureninofw_ +carboxymethyl C carboxymethylc_ 2 carboxymethylc_ +carboxymethylated selenocysteine carboxymethylatedselenocysteine_ 207 carboxymethylatedselenocysteine_ +citrullination of R citrullinationofr_ 33 citrullinationofr_ +deamidation of N deamidationofn_ 196 deamidationofn_ +deamidation of N and Q deamidationofnandq_ 4 deamidationofnandq_ +dehydro of S and T dehydroofsandt_ 164 dehydroofsandt_ +di-O18 on peptide n-term di-o18onpeptiden-term_ 88 di-o18onpeptiden-term_ +di-iodination of Y di-iodinationofy_ 35 di-iodinationofy_ +di-methylation of K di-methylationofk_ 36 di-methylationofk_ +di-methylation of R di-methylationofr_ 37 di-methylationofr_ +di-methylation of peptide n-term di-methylationofpeptiden-term_ 38 di-methylationofpeptiden-term_ +farnesylation of C farnesylationofc_ 42 farnesylationofc_ +fluorophenylalanine fluorophenylalanine_ 46 fluorophenylalanine_ +formylation of K formylationofk_ 43 formylationofk_ +formylation of peptide n-term formylationofpeptiden-term_ 44 formylationofpeptiden-term_ +formylation of protein n-term formylationofproteinn-term_ 82 formylationofproteinn-term_ +gamma-carboxylation of E gamma-carboxylationofe_ 48 gamma-carboxylationofe_ +gammathiopropionylation of K gammathiopropionylationofk_ 40 gammathiopropionylationofk_ +gammathiopropionylation of peptide n-term gammathiopropionylationofpeptiden-term_ 41 gammathiopropionylationofpeptiden-term_ +geranyl-geranyl geranyl-geranyl_ 49 geranyl-geranyl_ +glucuronylation of protein n-term glucuronylationofproteinn-term_ 50 glucuronylationofproteinn-term_ +glutathione disulfide glutathionedisulfide_ 51 glutathionedisulfide_ +guanidination of K guanidinationofk_ 53 guanidinationofk_ +heavy arginine-13C6 heavyarginine-13c6_ 136 heavyarginine-13c6_ +heavy arginine-13C6-15N4 heavyarginine-13c6-15n4_ 137 heavyarginine-13c6-15n4_ +heavy lysine - 13C6 15N2 heavylysine-13c615n2_ 181 heavylysine-13c615n2_ +heavy lysine - 2H4 heavylysine-2h4_ 180 heavylysine-2h4_ +heavy lysine-13C6 heavylysine-13c6_ 138 heavylysine-13c6_ +homoserine homoserine_ 56 homoserine_ +homoserine lactone homoserinelactone_ 57 homoserinelactone_ +hydroxylation of Y hydroxylationofy_ 64 hydroxylationofy_ +hydroxylation of D hydroxylationofd_ 59 hydroxylationofd_ +hydroxylation of F hydroxylationoff_ 63 hydroxylationoff_ +hydroxylation of K hydroxylationofk_ 60 hydroxylationofk_ +hydroxylation of N hydroxylationofn_ 61 hydroxylationofn_ +hydroxylation of P hydroxylationofp_ 62 hydroxylationofp_ +iTRAQ114 on K itraq114onk_ 168 itraq114onk_ +iTRAQ114 on Y itraq114ony_ 169 itraq114ony_ +iTRAQ114 on nterm itraq114onnterm_ 167 itraq114onnterm_ +iTRAQ115 on K itraq115onk_ 171 itraq115onk_ +iTRAQ115 on Y itraq115ony_ 172 itraq115ony_ +iTRAQ115 on nterm itraq115onnterm_ 170 itraq115onnterm_ +iTRAQ116 on K itraq116onk_ 174 itraq116onk_ +iTRAQ116 on Y itraq116ony_ 175 itraq116ony_ +iTRAQ116 on nterm itraq116onnterm_ 173 itraq116onnterm_ +iTRAQ117 on K itraq117onk_ 177 itraq117onk_ +iTRAQ117 on Y itraq117ony_ 178 itraq117ony_ +iTRAQ117 on nterm itraq117onnterm_ 176 itraq117onnterm_ +iTRAQ8plex itraq8plex_ 204 itraq8plex_ +iTRAQ8plex itraq8plex_ 205 itraq8plex_ +iTRAQ8plex itraq8plex_ 203 itraq8plex_ +iTRAQ8plex itraq8plex_ 201 itraq8plex_ +iTRAQ8plex itraq8plex_ 202 itraq8plex_ +iTRAQ8plex itraq8plex_ 200 itraq8plex_ +iodination of Y iodinationofy_ 65 iodinationofy_ +lipoyl K lipoylk_ 67 lipoylk_ +methyl C methylc_ 73 methylc_ +methyl H methylh_ 74 methylh_ +methyl N methyln_ 75 methyln_ +methyl R methylr_ 77 methylr_ +methyl ester of D methylesterofd_ 69 methylesterofd_ +methyl ester of E (duplicate of 17) methylesterofe_duplicateof17__ 70 methylesterofe_duplicateof17__ +methyl ester of S methylesterofs_ 71 methylesterofs_ +methyl ester of Y methylesterofy_ 72 methylesterofy_ +methyl ester of peptide c-term (duplicate of 18) methylesterofpeptidec-term_duplicateof18__ 68 methylesterofpeptidec-term_duplicateof18__ +methylation of D methylationofd_ 16 methylationofd_ +methylation of E methylationofe_ 17 methylationofe_ +methylation of K methylationofk_ 0 methylationofk_ +methylation of Q methylationofq_ 14 methylationofq_ +methylation of peptide c-term methylationofpeptidec-term_ 18 methylationofpeptidec-term_ +methylation of peptide n-term methylationofpeptiden-term_ 76 methylationofpeptiden-term_ +methylation of protein n-term methylationofproteinn-term_ 11 methylationofproteinn-term_ +myristoleylation of G myristoleylationofg_ 78 myristoleylationofg_ +myristoyl-4H of G myristoyl-4hofg_ 79 myristoyl-4hofg_ +myristoylation of K myristoylationofk_ 81 myristoylationofk_ +myristoylation of peptide n-term G myristoylationofpeptiden-termg_ 80 myristoylationofpeptiden-termg_ +n-acyl diglyceride cysteine n-acyldiglyceridecysteine_ 118 n-acyldiglyceridecysteine_ +n-formyl met addition n-formylmetaddition_ 22 n-formylmetaddition_ +oxidation of C oxidationofc_ 193 oxidationofc_ +oxidation of C to cysteic acid oxidationofctocysteicacid_ 34 oxidationofctocysteicacid_ +oxidation of C to sulfinic acid oxidationofctosulfinicacid_ 162 oxidationofctosulfinicacid_ +oxidation of F to dihydroxyphenylalanine oxidationofftodihydroxyphenylalanine_ 39 oxidationofftodihydroxyphenylalanine_ +oxidation of H oxidationofh_ 89 oxidationofh_ +oxidation of H to D oxidationofhtod_ 55 oxidationofhtod_ +oxidation of H to N oxidationofhton_ 54 oxidationofhton_ +oxidation of M oxidationofm_ 1 oxidationofm_ +oxidation of P to pyroglutamic acid oxidationofptopyroglutamicacid_ 111 oxidationofptopyroglutamicacid_ +oxidation of W oxidationofw_ 90 oxidationofw_ +oxidation of W to formylkynurenin oxidationofwtoformylkynurenin_ 45 oxidationofwtoformylkynurenin_ +oxidation of W to hydroxykynurenin oxidationofwtohydroxykynurenin_ 58 oxidationofwtohydroxykynurenin_ +oxidation of W to kynurenin oxidationofwtokynurenin_ 66 oxidationofwtokynurenin_ +oxidation of W to nitro oxidationofwtonitro_ 85 oxidationofwtonitro_ +oxidation of Y (duplicate of 64) oxidationofy_duplicateof64__ 194 oxidationofy_duplicateof64__ +oxidation of Y to nitro oxidationofytonitro_ 86 oxidationofytonitro_ +palmitoleyl of C palmitoleylofc_ 187 palmitoleylofc_ +palmitoleyl of S palmitoleylofs_ 186 palmitoleylofs_ +palmitoleyl of T palmitoleyloft_ 188 palmitoleyloft_ +palmitoylation of C palmitoylationofc_ 92 palmitoylationofc_ +palmitoylation of K palmitoylationofk_ 93 palmitoylationofk_ +palmitoylation of S palmitoylationofs_ 94 palmitoylationofs_ +palmitoylation of T palmitoylationoft_ 95 palmitoylationoft_ +phosphopantetheine S phosphopantetheines_ 91 phosphopantetheines_ +phosphorylation of H phosphorylationofh_ 192 phosphorylationofh_ +phosphorylation of S phosphorylationofs_ 6 phosphorylationofs_ +phosphorylation of S with ETD loss phosphorylationofswithetdloss_ 134 phosphorylationofswithetdloss_ +phosphorylation of S with prompt loss phosphorylationofswithpromptloss_ 96 phosphorylationofswithpromptloss_ +phosphorylation of T phosphorylationoft_ 7 phosphorylationoft_ +phosphorylation of T with ETD loss phosphorylationoftwithetdloss_ 135 phosphorylationoftwithetdloss_ +phosphorylation of T with prompt loss phosphorylationoftwithpromptloss_ 97 phosphorylationoftwithpromptloss_ +phosphorylation of Y phosphorylationofy_ 8 phosphorylationofy_ +phosphorylation with neutral loss on C phosphorylationwithneutrallossonc_ 99 phosphorylationwithneutrallossonc_ +phosphorylation with neutral loss on D phosphorylationwithneutrallossond_ 100 phosphorylationwithneutrallossond_ +phosphorylation with neutral loss on H phosphorylationwithneutrallossonh_ 101 phosphorylationwithneutrallossonh_ +phosphorylation with neutral loss on S phosphorylationwithneutrallossons_ 132 phosphorylationwithneutrallossons_ +phosphorylation with neutral loss on T phosphorylationwithneutrallossont_ 133 phosphorylationwithneutrallossont_ +phosphorylation with prompt loss on Y phosphorylationwithpromptlossony_ 98 phosphorylationwithpromptlossony_ +propionamide C propionamidec_ 5 propionamidec_ +propionyl heavy K propionylheavyk_ 104 propionylheavyk_ +propionyl heavy peptide n-term propionylheavypeptiden-term_ 105 propionylheavypeptiden-term_ +propionyl light K propionyllightk_ 102 propionyllightk_ +propionyl light on peptide n-term propionyllightonpeptiden-term_ 103 propionyllightonpeptiden-term_ +pyridyl K pyridylk_ 106 pyridylk_ +pyridyl peptide n-term pyridylpeptiden-term_ 107 pyridylpeptiden-term_ +pyro-cmC pyro-cmc_ 108 pyro-cmc_ +pyro-glu from n-term E pyro-glufromn-terme_ 109 pyro-glufromn-terme_ +pyro-glu from n-term Q pyro-glufromn-termq_ 110 pyro-glufromn-termq_ +s-pyridylethylation of C s-pyridylethylationofc_ 112 s-pyridylethylationofc_ +selenocysteine selenocysteine_ 206 selenocysteine_ +sulfation of Y sulfationofy_ 114 sulfationofy_ +sulphone of M sulphoneofm_ 115 sulphoneofm_ +sumoylation of K sumoylationofk_ 166 sumoylationofk_ +tri-deuteromethylation of D tri-deuteromethylationofd_ 19 tri-deuteromethylationofd_ +tri-deuteromethylation of E tri-deuteromethylationofe_ 20 tri-deuteromethylationofe_ +tri-deuteromethylation of peptide c-term tri-deuteromethylationofpeptidec-term_ 21 tri-deuteromethylationofpeptidec-term_ +tri-iodination of Y tri-iodinationofy_ 116 tri-iodinationofy_ +tri-methylation of K tri-methylationofk_ 15 tri-methylationofk_ +tri-methylation of R tri-methylationofr_ 117 tri-methylationofr_ +tri-methylation of protein n-term tri-methylationofproteinn-term_ 12 tri-methylationofproteinn-term_ +trideuteration of L (SILAC) trideuterationofl_silac__ 197 trideuterationofl_silac__ +ubiquitinylation residue ubiquitinylationresidue_ 52 ubiquitinylationresidue_
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pepxml_databases.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,13 @@ +#This file lists the names of protein databases installed locally in protk. +# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool +# In order to combine search results with Interprophet searches must be run against an identical database +# +# Entries should follow the be structured as follows +# Display_name omssa_tandem_dbname dbkey +# +# +Swissprot spall_ spall spall_ +Combined PlasmboDB (falciparum) and Swissprot Human plasmodb_pfalciparum_sphuman_ plasmodb_pfalciparum_sphuman plasmodb_pfalciparum_sphuman_ +Swissprot Human sphuman_ sphuman sphuman_ +Combined Swissprot/TRembl Human sptrhuman_ sptrhuman sptrhuman_ +Swissprot Mouse spmouse_ spmouse spmouse_
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/protk_display_site.txt.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,3 @@ +#Proteomic Visualization application should be hosted on the same server as galaxy +#Entries in this file are of the format "site_id" site_url +Proteomics Visualize http://127.0.0.1:8500
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/tandem_mods.loc.sample Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,6 @@ +#This file lists the names of inbuilt chemical modifications accepted by X!Tandem +# +# +Carbamidomethyl C carbamidomethyl_c_ 57.021464@C carbamidomethyl_c_ +Glycocapture-N glycocapture_n_ 0.998@N!{P}[ST] glycocapture_n_ +Oxidation M oxidation_m_ 15.994915@M oxidation_m_ \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xls_to_table.xml Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,23 @@ +<tool id="xls_to_table_1" name="Excel to Table" version="1.0.0"> + <requirements><requirement type="package">protk</requirement></requirements> + <description>Converts an excel spreadsheet to a tab delimited text file</description> + + +<!-- Note .. the input file is assumed to be the first argument --> +<command>xls_to_table.rb $input_file -o $output</command> + + +<inputs> + + <param name="input_file" type="data" format="xls" multiple="false" label="Input File" help="An Excel Spreadsheet"/> + +</inputs> +<outputs> + <data format="csv" name="output" metadata_source="input_file" label="${input_file.display_name}.csv" /> +</outputs> + +<help> + Convert an Excel Spreadsheet to Tab delimited text +</help> + +</tool>