# HG changeset patch # User iracooke # Date 1402699012 14400 # Node ID 86daefc0e88dc4ae7be1a973225aa0f93e7558e9 # Parent e0a1e8a0500ac4f1032d21cd7c4f48a941a77cf8 Uploaded diff -r e0a1e8a0500a -r 86daefc0e88d README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Fri Jun 13 18:36:52 2014 -0400 @@ -0,0 +1,6 @@ +This package is a galaxy wrapper for the MSGF+ search tool. + +Requirements: +This package uses protk, msgfplus and proteowizard, which must be installed separately. + +For instructions please see: https://github.com/iracooke/protk/#galaxy-integration \ No newline at end of file diff -r e0a1e8a0500a -r 86daefc0e88d README.md --- a/README.md Fri Jun 13 18:36:33 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,8 +0,0 @@ -## What is it? -Galaxy tool definition file and wrapper scripts for the [MSGF+ Search Engine](http://proteomics.ucsd.edu/Software/MSGFPlus.html). - -## Installation -Install from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/ - -Depends on command-line scripts and databases available in the [protk ruby gem](https://bitbucket.org/iracooke/protk). - diff -r e0a1e8a0500a -r 86daefc0e88d msgfplus_search.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/msgfplus_search.xml Fri Jun 13 18:36:52 2014 -0400 @@ -0,0 +1,188 @@ + + Run an MSGF+ Search + + protk + msgfplus + proteowizard + + + msgfplus_search.rb + #if $database.source_select=="built_in": + --galaxy -d $database.dbkey + #else + --galaxy -d $database.fasta_file + #end if + + --var-mods=' + $variable_mods + #for $custom_variable_mod in $custom_variable_mods: + ,${custom_variable_mod.custom_mod} + #end for + ' + + --fix-mods=' + $fixed_mods + #for $custom_fix_mod in $custom_fix_mods: + ,${custom_fix_mod.custom_mod} + #end for + ' + + $input_file + -o $output + -r + --enzyme=$enzyme + --precursor-ion-tol-units=$precursor_tolu + -v $missed_cleavages + $cleavage_semi + -f $fragment_ion_tol + -p $precursor_ion_tol + --instrument=$instrument + --isotope-error-range=$isotope_error_range + --fragment-method=$fragment_method + --protocol=$protocol + --min-pep-len=$min_pep_len + --max-pep-len=$max_pep_len + --max-pep-charge=$max_pep_charge + --min-pep-charge=$min_pep_charge + --num-reported-matches=$num_reported_matches + --java-mem=$java_mem + #unless $pepxml_output_use: + --no-pepxml + #end unless + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**What it does** + +Runs an MS/MS database search using the MSGFPlus search engine. Output is in the form of a pepXML file containing identified peptides along with their raw search scores. + +---- + +**References** + +Please see http://proteomics.ucsd.edu/Software/MSGFPlus.html for details of the MSGFPlus search engine and references describing its algorithm + + + diff -r e0a1e8a0500a -r 86daefc0e88d repository_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Fri Jun 13 18:36:52 2014 -0400 @@ -0,0 +1,4 @@ + + + + diff -r e0a1e8a0500a -r 86daefc0e88d tool-data/msgfplus_mods.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/msgfplus_mods.loc.sample Fri Jun 13 18:36:52 2014 -0400 @@ -0,0 +1,50 @@ +#This file lists the names of inbuilt chemical modifications accepted by msgfplus +#Each entry consists of 4 tab separated fields like this +# +# +#Modification strings should conform to the standard MSGFPlus syntax with the following exception +#The ModType field is overridden by the msgfplus_search.rb tool. In other words any of these mods +#May be passed to the tool as a variable or fixed mod and the tool will substitute the appropriate ModType +#value +# +#Standard MSGFPlus syntax is +# +# To input a modification, use the following command: +# Mass or CompositionStr, Residues, ModType, Position, Name (all the five fields are required). +# CompositionStr (C[Num]H[Num]N[Num]O[Num]S[Num]P[Num]) +# - C (Carbon), H (Hydrogen), N (Nitrogen), O (Oxygen), S (Sulfer) and P (Phosphorus) are allowed. +# - Atom can be omitted. The sequence of atoms must be followed. +# - Negative numbers are allowed. +# - E.g. C2H2O1 (valid), H2C1O1 (invalid) +# Mass can be used instead of CompositionStr. It is important to specify accurate masses (integer masses are insufficient). +# - E.g. 15.994915 +# Residues: affected amino acids (must be upper letters) +# - Must be uppor letters or * +# - Use * if this modification is applicable to any residue. +# - * should not be "anywhere" modification (e.g. "15.994915, *, opt, any, Oxidation" is not allowed.) +# - E.g. NQ, * +# ModType: "fix" for fixed modifications, "opt" for variable modifications (case insensitive) +# Position: position in the peptide where the modification can be attached. +# - One of the following five values should be used: +# - any (anywhere), N-term (peptide N-term), C-term (peptide C-term), Prot-N-term (protein N-term), Prot-C-term (protein C-term) +# - Case insensitive +# - "-" can be omitted +# - E.g. any, Any, Prot-n-Term, ProtNTerm => all valid +# Name: name of the modification (Unimod PSI-MS name) +# - For proper mzIdentML output, this name should be the same as the Unimod PSI-MS name +# - E.g. Phospho, Acetyl +#C2H3N1O1,C,fix,any,Carbamidomethyl # Fixed Carbamidomethyl C +# Variable Modifications (default: none) +#O1,M,opt,any,Oxidation # Oxidation M +#15.994915,M,opt,any,Oxidation # Oxidation M (mass is used instead of CompositionStr) +#H-1N-1O1,NQ,opt,any,Deamidated # Negative numbers are allowed. +#C2H3NO,*,opt,N-term,Carbamidomethyl # Variable Carbamidomethyl N-term +#H-2O-1,E,opt,N-term,Pyro_glu # Pyro-glu from E +#H-3N-1,Q,opt,N-term,Pyro-glu # Pyro-glu from Q +#C2H2O,*,opt,Prot-N-term,Acetyl # Acetylation Protein N-term +#C2H2O1,K,opt,any,Acetyl # Acetylation K +#CH2,K,opt,any,Methy # Methylation K +#HO3P,STY,opt,any,Phospho # Phosphorylation STY + +Carbamidomethyl C carbamidomethyl_c_ C2H3N1O1,C,opt,any,Carbamidomethyl carbamidomethyl_c_ +Oxidation M oxidation_m_ O1,M,opt,any,Oxidation oxidation_m_ \ No newline at end of file diff -r e0a1e8a0500a -r 86daefc0e88d tool-data/pepxml_databases.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/pepxml_databases.loc.sample Fri Jun 13 18:36:52 2014 -0400 @@ -0,0 +1,13 @@ +#This file lists the names of protein databases installed locally in protk. +# These are used by omssa and x!tandem as well as the "mascot to pepxml" tool +# In order to combine search results with Interprophet searches must be run against an identical database +# +# Entries should follow the be structured as follows +# Display_name omssa_tandem_dbname dbkey +# +# +Swissprot spall_ spall spall_ +Combined PlasmboDB (falciparum) and Swissprot Human plasmodb_pfalciparum_sphuman_ plasmodb_pfalciparum_sphuman plasmodb_pfalciparum_sphuman_ +Swissprot Human sphuman_ sphuman sphuman_ +Combined Swissprot/TRembl Human sptrhuman_ sptrhuman sptrhuman_ +Swissprot Mouse spmouse_ spmouse spmouse_