# HG changeset patch # User iracooke # Date 1362435106 18000 # Node ID 25261529840c99b50efefb199e0e49f155d2ca45 # Parent 49d15d40117d2e885a37c8221854e727f8b8714a Uploaded diff -r 49d15d40117d -r 25261529840c interprophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet.xml Mon Mar 04 17:11:46 2013 -0500 @@ -0,0 +1,61 @@ + + + + galaxy_protk + trans_proteomic_pipeline + + + + Combine Peptide Prophet results from multiple search engines + + + + interprophet_wrapper.rb $output $use_nss $use_nrs $use_nse $use_nsi $use_nsm --minprob $minprob + + + ## Inputs. + ${first_input} + #for $input_file in $input_files: + ${input_file.additional_input} + #end for + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Takes a set of pepXML files (possibly generated using different search engines) and calculates updated identification probabilities for each peptide. The updated probabilities are based on a statistical model that combines evidence from identifications across all of the input files, spectra, modified states and charge states. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing iProphet + +Shteynberg D, et al. “iProphet: Improved statistical validation of peptide identifications in shotgun proteomics.” *Molecular and Cellular Proteomics* 10, M111.007690 (2011). + + + + diff -r 49d15d40117d -r 25261529840c interprophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet_wrapper.rb Mon Mar 04 17:11:46 2013 -0500 @@ -0,0 +1,56 @@ +require 'pathname' + +$VERBOSE=nil + +# Hard-Coded argument order and number of arguments +# +actual_output_path_string=ARGV[0] +use_nss=ARGV[1] +use_nrs=ARGV[2] +use_nse=ARGV[3] +use_nsi=ARGV[4] +use_nsm=ARGV[5] +minprob=ARGV[6] +minprob_val=ARGV[7] + +wd= Dir.pwd +original_input_files=ARGV.drop(7) +# End hard coded args # + +cmd="" + +output_substitution_cmds="" + +input_files=original_input_files.collect do |input| + + # We append ".pep.xml" to the input file name because interprophet can't handle anything else + # In order for this to work properly we need to create a symbolic link our working directory + # + original_input_path=Pathname.new("#{input}") + actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" + + cmd << "ln -s #{input} #{actual_input_path_string};" + output_substitution_cmds << "ruby -pi -e \"gsub('#{actual_input_path_string}', '#{input}.pep.xml')\" interprophet_output.pep.xml;" + actual_input_path_string +end + +cmd << "rvm 1.9.3@protk-1.1.9 do interprophet.rb" + +cmd << " --no-nss" unless use_nss=="blank" +cmd << " --no-nrs" unless use_nrs=="blank" +cmd << " --no-nse" unless use_nse=="blank" +cmd << " --no-nsi" unless use_nsi=="blank" +cmd << " --no-nsm" unless use_nsm=="blank" + + +input_files.each { |input| + cmd << " #{input}" +} + + +cmd << " -o interprophet_output.pep.xml -r" + +cmd << ";#{output_substitution_cmds}" + +%x[#{cmd}] + diff -r 49d15d40117d -r 25261529840c peptide_prophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet.xml Mon Mar 04 17:11:46 2013 -0500 @@ -0,0 +1,83 @@ + + + galaxy_protk + trans_proteomic_pipeline + + + Calculate Peptide Prophet statistics on search results + + peptide_prophet_wrapper.rb ${output} ${input_file} -r $glyco $useicat $phospho $usepi $usert $accurate_mass $no_ntt $no_nmc $use_gamma $use_only_expect $force_fit $allow_alt_instruments $maldi + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Given raw search engine scores as inputs this tool estimates the accuracy of peptide assignments. From a practical perspective it estimates the probability that each peptide assignment is correct (providing probabilities as outputs), given raw scores (possibly on some arbitrary scale) as inputs. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing the statistical model implemented by Peptide Prophet + +Keller A., et al. “Empirical Statistical Model to Estimate the Accuracy of Peptide Identifications Made by MS/MS and Database Search” *Anal. Chem.* 74, 5383-5392 (2002). + + + + + + + + diff -r 49d15d40117d -r 25261529840c peptide_prophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet_wrapper.rb Mon Mar 04 17:11:46 2013 -0500 @@ -0,0 +1,36 @@ +require 'pathname' + +$VERBOSE=nil + +actual_output_path_string=ARGV.shift + +# Second argument is the original input file name ... we'll change this below +original_input_file=ARGV[0] + +# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input +# name because peptide prophet can't handle anything else + +wd= Dir.pwd + +original_input_path=Pathname.new("#{original_input_file}") +actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" +full_tmp_output_path_string="#{wd}/peptide_prophet_output.pep.xml" + +cmd = "ln -s #{original_input_file} #{actual_input_path_string};" + +cmd << "rvm 1.9.3@protk-1.1.9 do peptide_prophet.rb" + + +ARGV[0]="#{actual_input_path_string}" + +ARGV.each { |a| + cmd << " #{a}" +} + +cmd << " -o peptide_prophet_output.pep.xml" + +# Finally we need to fix up the output file so any references to the temporary working file are changed to refs to the original input file +cmd << ";ruby -pi -e \"gsub('#{actual_input_path_string}', '#{original_input_file}')\" peptide_prophet_output.pep.xml" +cmd << ";ruby -pi -e \"gsub('#{full_tmp_output_path_string}', '#{actual_output_path_string}')\" peptide_prophet_output.pep.xml" + +%x[#{cmd}] diff -r 49d15d40117d -r 25261529840c protein_prophet.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet.xml Mon Mar 04 17:11:46 2013 -0500 @@ -0,0 +1,71 @@ + + + galaxy_protk + trans_proteomic_pipeline + + + Calculate Protein Prophet statistics on search results + + + + protein_prophet.rb --galaxy $input_file -r $iproph $nooccam $groupwts $normprotlen $logprobs $confem $allpeps $unmapped $instances $delude --minprob=$minprob --minindep=$minindep + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Given a set of peptide assignments from MS/MS spectra in the form of a pepXML file, this tool estimates probabilities at the protein level. As output, the tool produces a protXML file, which contains proteins along with the estimated probabilities that those proteins were present. Probabilities are estimated using a statistical model based on the number of peptides corresponding to that protein and the confidence that each of those peptides were assigned correctly. It takes account of the fact that peptides may correspond to more than one protein. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing the statistical model implemented by Protein Prophet + +Nesvizhskii A., et al. “A Statistical Model for Identifying Proteins by Tandem Mass Spectrometry” *Anal. Chem.* 75, 4646-4658 (2003). + + + + + diff -r 49d15d40117d -r 25261529840c protein_prophet_wrapper.rb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet_wrapper.rb Mon Mar 04 17:11:46 2013 -0500 @@ -0,0 +1,35 @@ +require 'pathname' + +$VERBOSE=nil + +actual_output_path_string=ARGV.shift + +# Second argument is the original input file name ... we'll change this below +original_input_file=ARGV[0] + +# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input +# name because peptide prophet can't handle anything else + +wd= Dir.pwd + +original_input_path=Pathname.new("#{original_input_file}") +actual_input_path_string="#{wd}/#{original_input_path.basename}.pep.xml" + +cmd = "ln -s #{original_input_file} #{actual_input_path_string};" + +cmd << "rvm 1.9.3@protk-1.1.9 do protein_prophet.rb" + + +ARGV[0]="#{actual_input_path_string}" + +ARGV.each { |a| + + cmd << " #{a}" +} + +cmd << " -o protein_prophet_results.prot.xml" + +cmd << ";ruby -pi -e \"gsub('#{actual_input_path_string}', '#{original_input_file}.pep.xml')\" protein_prophet_results.prot.xml" + +%x[#{cmd}] + diff -r 49d15d40117d -r 25261529840c repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Mon Mar 04 17:11:46 2013 -0500 @@ -0,0 +1,10 @@ + + + + + + + + + + diff -r 49d15d40117d -r 25261529840c tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Mon Mar 04 17:11:46 2013 -0500 @@ -0,0 +1,12 @@ + + + + + + + + + + + + \ No newline at end of file