# HG changeset patch # User iracooke # Date 1402784740 14400 # Node ID 0ad9b29c22e5716c67bdeccf25b8902166f03d66 # Parent c04896f31ff743f8754940e3a05369987b9bcea7 Uploaded diff -r c04896f31ff7 -r 0ad9b29c22e5 .gitignore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.gitignore Sat Jun 14 18:25:40 2014 -0400 @@ -0,0 +1,1 @@ +*.tar.bz2 \ No newline at end of file diff -r c04896f31ff7 -r 0ad9b29c22e5 README --- a/README Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -This package installs wrapper scripts for Peptide Prophet, iProphet and Protein Prophet - -Requirements: -This package depends on the galaxy_protk and protk_trans_proteomic_pipeline packages -Please see instructions for those packages before installing diff -r c04896f31ff7 -r 0ad9b29c22e5 README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Sat Jun 14 18:25:40 2014 -0400 @@ -0,0 +1,3 @@ +# Translate Nucleotide sequences to Protein + + diff -r c04896f31ff7 -r 0ad9b29c22e5 interprophet.xml --- a/interprophet.xml Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,74 +0,0 @@ - - - - galaxy_protk - trans_proteomic_pipeline - - - - Combine Peptide Prophet results from multiple search engines - - - - interprophet_wrapper.sh - - $output - - $use_nss - - $use_nrs - - $use_nse - - $use_nsi - - $use_nsm - - --minprob $minprob - - ## Inputs. - ${first_input} - #for $input_file in $input_files: - ${input_file.additional_input} - #end for - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Takes a set of pepXML files (possibly generated using different search engines) and calculates updated identification probabilities for each peptide. The updated probabilities are based on a statistical model that combines evidence from identifications across all of the input files, spectra, modified states and charge states. - ----- - -**Citation** - -If you use this tool please read and cite the paper describing iProphet - -Shteynberg D, et al. “iProphet: Improved statistical validation of peptide identifications in shotgun proteomics.” *Molecular and Cellular Proteomics* 10, M111.007690 (2011). - - - - diff -r c04896f31ff7 -r 0ad9b29c22e5 interprophet_wrapper.sh --- a/interprophet_wrapper.sh Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,33 +0,0 @@ -#!/usr/bin/env bash -# - -# Hard-Coded argument order -# Args 0-4 are like (eg --no-nss) -# Args 5 and 6 are like (eg --minprob 0.5) -# Remaining args are filenames -# -actual_output_path_string=$1;shift -for i in {0 1 2 3 4}; do - if [ $1 != "blank" ]; then cmd_args[$i]=$1; fi;shift -done -cmd_args+=($1);shift -cmd_args+=($1);shift - -wd=`pwd` - -for original_input_file in $@; do - input_file_name=`basename $original_input_file` - actual_input_path_string=$wd/$input_file_name.pep.xml - ln -s $original_input_file $actual_input_path_string - cmd_args+=($actual_input_path_string) -done - -rvm 1.9.3@protk-1.2.2 do interprophet.rb ${cmd_args[@]} -o interprophet_output.pep.xml -r; - -if [ -f interprophet_output.pep.xml ]; then - for original_input_file in $@; do - actual_input_path_string=$wd/`basename $original_input_file`.pep.xml - sed -i.bak s%$actual_input_path_string%$original_input_file%g interprophet_output.pep.xml - done -fi - diff -r c04896f31ff7 -r 0ad9b29c22e5 peptide_prophet.xml --- a/peptide_prophet.xml Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,100 +0,0 @@ - - - galaxy_protk - trans_proteomic_pipeline - - - Calculate Peptide Prophet statistics on search results - - - peptide_prophet_wrapper.sh $output $input_file - - -r - $glyco - $useicat - $phospho - $usepi - $usert - $accurate_mass - $no_ntt - $no_nmc - $use_gamma - $use_only_expect - $force_fit - $allow_alt_instruments - $maldi - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Given raw search engine scores as inputs this tool estimates the accuracy of peptide assignments. From a practical perspective it estimates the probability that each peptide assignment is correct (providing probabilities as outputs), given raw scores (possibly on some arbitrary scale) as inputs. - ----- - -**Citation** - -If you use this tool please read and cite the paper describing the statistical model implemented by Peptide Prophet - -Keller A., et al. “Empirical Statistical Model to Estimate the Accuracy of Peptide Identifications Made by MS/MS and Database Search” *Anal. Chem.* 74, 5383-5392 (2002). - - - - - - - - diff -r c04896f31ff7 -r 0ad9b29c22e5 peptide_prophet_wrapper.sh --- a/peptide_prophet_wrapper.sh Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ -#!/usr/bin/env bash - -actual_output_path_string=$1;shift - -original_input_file=$1;shift - -# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input -# name because peptide prophet can't handle anything else - -wd=`pwd` - -actual_input_path_string=$wd/`basename $original_input_file`.pep.xml - -full_tmp_output_path_string=$wd/peptide_prophet_output.pep.xml - -ln -s $original_input_file $actual_input_path_string - - -rvm 1.9.3@protk-1.2.2 do peptide_prophet.rb $actual_input_path_string $@ -o peptide_prophet_output.pep.xml; - -sed -i.bak s%$actual_input_path_string%$original_input_file%g peptide_prophet_output.pep.xml -sed -i.bak s%$full_tmp_output_path_string%$actual_output_path_string%g peptide_prophet_output.pep.xml - diff -r c04896f31ff7 -r 0ad9b29c22e5 pepxml_to_table.xml --- a/pepxml_to_table.xml Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ - - - - galaxy_protk - - - - - Converts a pepXML file to a tab delimited text file - - - -rvm 1.9.3@protk-1.2.2 do pepxml_to_table.rb $input_file -o $output - - - - - - - - - - - - - Convert a pepXML file to Tab delimited text - - - diff -r c04896f31ff7 -r 0ad9b29c22e5 protein_prophet.xml --- a/protein_prophet.xml Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,91 +0,0 @@ - - - galaxy_protk - trans_proteomic_pipeline - - - Calculate Protein Prophet statistics on search results - - - - - protein_prophet_wrapper.sh - - --galaxy $input_file - - -r - - $iproph - $nooccam - $groupwts - $normprotlen - $logprobs - $confem - $allpeps - $unmapped - $instances - $delude - - --minprob=$minprob - --minindep=$minindep - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**What it does** - -Given a set of peptide assignments from MS/MS spectra in the form of a pepXML file, this tool estimates probabilities at the protein level. As output, the tool produces a protXML file, which contains proteins along with the estimated probabilities that those proteins were present. Probabilities are estimated using a statistical model based on the number of peptides corresponding to that protein and the confidence that each of those peptides were assigned correctly. It takes account of the fact that peptides may correspond to more than one protein. - ----- - -**Citation** - -If you use this tool please read and cite the paper describing the statistical model implemented by Protein Prophet - -Nesvizhskii A., et al. “A Statistical Model for Identifying Proteins by Tandem Mass Spectrometry” *Anal. Chem.* 75, 4646-4658 (2003). - - - - - diff -r c04896f31ff7 -r 0ad9b29c22e5 protein_prophet_wrapper.sh --- a/protein_prophet_wrapper.sh Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env bash -# - -actual_output_path_string=$1;shift -original_input_file=$1;shift - -# Before doing anything we append create a link to the input file in our working dir with ".pep.xml" appended to the input -# name because peptide prophet can't handle anything else - -wd=`pwd` - -actual_input_path_string=$wd/`basename $original_input_file`.pep.xml - -ln -s $original_input_file $actual_input_path_string - -rvm 1.9.3@protk-1.2.2 do protein_prophet.rb $actual_input_path_string $@ -o protein_prophet_results.prot.xml - -if [ -f protein_prophet_results.prot.xml ]; then - sed -i.bak s%$actual_input_path_string%$original_input_file.pep.xml%g protein_prophet_results.prot.xml -fi - diff -r c04896f31ff7 -r 0ad9b29c22e5 protxml_to_table.xml --- a/protxml_to_table.xml Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,35 +0,0 @@ - - - galaxy_protk - - - Converts a ProtXML file to a table - - - rvm 1.9.3@protk-1.2.2 do protxml_to_table.rb - - $input_file - -o $output - - - - - - - - - - - - - - - - -**What it does** - -Converts a ProtXML file to a tab separated table - - - - diff -r c04896f31ff7 -r 0ad9b29c22e5 repository_dependencies.xml --- a/repository_dependencies.xml Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ - - - - - - diff -r c04896f31ff7 -r 0ad9b29c22e5 sixframe_translate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sixframe_translate.xml Sat Jun 14 18:25:40 2014 -0400 @@ -0,0 +1,43 @@ + + + protk + + + Translates DNA/RNA to protein + + + sixframe.rb $fasta_file -o $output $strip_header $coords + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Generates 6 frame translations suitable for proteogenomics workflows + +---- + +**References** + + + + + diff -r c04896f31ff7 -r 0ad9b29c22e5 tool-data/pepxml_databases.loc.sample --- a/tool-data/pepxml_databases.loc.sample Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,13 +0,0 @@ -#This file lists the names of protein databases installed locally in protk. -# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool -# In order to combine search results with Interprophet searches must be run against an identical database -# -# Entries should follow the be structured as follows -# Display_name omssa_tandem_dbname dbkey -# -# -Swissprot spall_ spall spall_ -Combined PlasmboDB (falciparum) and Swissprot Human plasmodb_pfalciparum_sphuman_ plasmodb_pfalciparum_sphuman plasmodb_pfalciparum_sphuman_ -Swissprot Human sphuman_ sphuman sphuman_ -Combined Swissprot/TRembl Human sptrhuman_ sptrhuman sptrhuman_ -Swissprot Mouse spmouse_ spmouse spmouse_ diff -r c04896f31ff7 -r 0ad9b29c22e5 tool-data/tandem_mods.loc.sample --- a/tool-data/tandem_mods.loc.sample Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -#This file lists the names of inbuilt chemical modifications accepted by X!Tandem -# -# -Carbamidomethyl C carbamidomethyl_c_ 57.021464@C carbamidomethyl_c_ -Glycocapture-N glycocapture_n_ 0.998@N!{P}[ST] glycocapture_n_ -Oxidation M oxidation_m_ 15.994915@M oxidation_m_ \ No newline at end of file diff -r c04896f31ff7 -r 0ad9b29c22e5 tool_dependencies.xml --- a/tool_dependencies.xml Mon Jun 10 04:58:03 2013 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,11 +0,0 @@ - - - - - - - - - - -