Galaxy |

Changeset 0:22a1fa7d9d6a (2019-03-04)

Next changeset 1:2b785516abfc (2019-03-04)

Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dia_umpire commit 2379480213ba2e084a93bf82052fac858ffd074f

added:
datatypes_conf.xml
dia_umpire_macros.xml
dia_umpire_quant.xml
dia_umpire_se.xml
test-data/LongSwath_UPS1_1ug_rep1_xs.mzXML
test-data/LongSwath_UPS1_1ug_rep1_xs_Q2.mgf

diff -r 000000000000 -r 22a1fa7d9d6a datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Mon Mar 04 11:50:10 2019 -0500

@@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+  <datatypes>
+   <registration>
+        <datatype extension="dia_umpire.ser" type="galaxy.datatypes.data:Text" subclass="True"/>
+   </registration>
+</datatypes>
+

diff -r 000000000000 -r 22a1fa7d9d6a dia_umpire_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dia_umpire_macros.xml Mon Mar 04 11:50:10 2019 -0500

[

@@ -0,0 +1,110 @@
+<macros>
+    <token name="@VERSION@">2.1.3</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2.1.3">dia_umpire</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+        </stdio>
+    </xml>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/nmeth.3255</citation>
+            <yield />
+        </citations>
+    </xml>
+
+    <xml name="common_se_params">
+            <param name="SE_Resolution" type="integer" value="" optional="true" label="Resolution" >
+                <help>
+ SE.Resolution: Used only if the input spectra are stored in profile mode (i.e. not centroided, e.g. by using "Peak Picking" option in MSConvert when converting raw spectral data to mzXML format).
+Profile spectra will be centroided using a sliding window. The window is moved across the entire mass range of a spectrum. Only the most intense peak in the window centered at the peak m/z is kept, others are discarded. The window width is calculated based on this parameter as: width = mz / para.Resolution.
+Recommended value: Depends on the instrument and acquisition settings. Either check raw data to see the real average resolution of peaks in spectra or consult vendor specifications for the instrument. For AB SCIEX TripleTOF 5600 we use 15000-20000.
+                </help>
+            </param>
+
+            <param name="SE_StartCharge" type="integer" value="1" min="1" max="10" optional="true" label="StartCharge">
+                <help>
+SE.StartCharge: The minimum charge state for MS1 precursor ion to be detected during isotopic peak grouping.
+                </help>
+            </param>
+
+            <param name="SE_EndCharge" type="integer" value="5" min="1" max="10" optional="true" label="EndCharge">
+                <help>
+SE.EndCharge: The maximum charge state for MS1 precursor ion to be detected during isotopic peak grouping.
+Recommended value: it is not recommended to set this parameter higher than 5 for typical proteomic experiments, as it is unlikely to observe peptides of higher charge states.
+                </help>
+            </param>
+
+            <param name="SE_MS2StartCharge" type="integer" value="1" min="1" max="10" optional="true" label="MS2StartCharge">
+                <help>
+SE.MS2StartCharge: The minimum charge state for MS2 unfragmented precursor ion to be detected during isotopic peak grouping.
+                </help>
+            </param>
+
+            <param name="SE_MS2EndCharge" type="integer" value="5" min="1" max="10" optional="true" label="MS2EndCharge">
+                <help>
+SE.MS2EndCharge: The maximum charge state for MS2 unfragmented precursor ion to be detected during isotopic peak grouping.
+Recommended value: it is not recommended to set this parameter higher than 5 for typical proteomic experiments, as it is unlikely to observe peptides of higher charge states.
+                </help>
+            </param>
+    </xml>
+
+    <xml name="se_window_params">
+            <conditional name="window">
+              <param name="WindowType" type="select" label="WindowType">
+                <option value="SWATH">SWATH - fixed window size</option>
+                <option value="V_SWATH">V_SWATH - variable window size</option>
+                <option value="MSX">MSX - 2Da isolation window, its position is shuffled randomly until the whole MS1 range is covered</option>
+                <option value="MSE">MSE - as originally implemented in Waters instruments. The full MS1 range is being fragmented at once.</option>
+              </param>
+              <when value="SWATH">
+                <param name="WindowSize" type="integer" value="" optional="true" label="WindowSize">
+                    <help>
+WindowSize: Isolation window size setting for fixed window SWATH. (Please skip this part if the data is from Thermo instrument)
+Note: The window size is to be specified including overlapping regions. I.e. if your windows are: 399.5-425.5, 424.5 - 450.5, etc., then the window size should be set to 26.
+                    </help>
+                </param>
+              </when>
+              <when value="V_SWATH">
+                
+                <conditional name="window_list">
+                  <param name="window_list_src" type="select" label="">
+                    <option value="history"></option>
+                    <option value="text_entry"></option>
+                  </param>
+                  <when value="history">
+                    <param name="WindowListFile" type="data" format="tabular" label="WindowList">
+                      <help><![CDATA[
+The format should be a tab-delimited list of m/z low and high values, one window per line.  Example:
+<pre>
+400 451<br>
+449 600<br>
+</pre>
+]]>
+                      </help>
+                    </param>
+                  </when>
+                  <when value="text_entry">
+                    <param name="WindowList" type="text" area="True" size="40x40" value="" label="WindowList">
+                      <help><![CDATA[
+The format should be a tab-delimited list of m/z low and high values, one window per line.  Example:
+<pre>
+400 451<br>
+449 600<br>
+</pre>
+]]>
+                      </help>
+                    </param>
+                  </when>
+                </conditional>
+              </when>
+              <when value="MSX"/>
+              <when value="MSE"/>
+           </conditional>
+    </xml>
+
+</macros>

diff -r 000000000000 -r 22a1fa7d9d6a dia_umpire_quant.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dia_umpire_quant.xml Mon Mar 04 11:50:10 2019 -0500

[

b'@@ -0,0 +1,443 @@\n+<tool id="dia_umpire_quant" name="DIA_Umpire_Quant" version="@VERSION@.0">\n+ <description>DIA quantitation and targeted re-extraction</description>\n+ <macros>\n+ <import>dia_umpire_macros.xml</import>\n+ </macros>\n+ <expand macro="requirements" />\n+ <expand macro="stdio" />\n+ <command>\n+<![CDATA[\n+#import shutil\n+### $shutil.copytree($se_input.extra_files_path.__str__,$work_path.__str__)\n+## want to save all outputs in a directory output.extra_files_path to be used by \n+## Is file naming going to be a problem? May need to have a name param\n+cat $quant_params > $dia_umpire_quant && echo "Thread = \\$GALAXY_SLOTS" >> $dia_umpire_quant \n+&& cp -rp $se_input.extra_files_path.__str__ $work_path.__str__\n+&& ln -s $protxml_input ${work_path}/$interact_prot_xml\n+&& ln -s $searchdb_input ${work_path}/$searchdb_fa\n+#for $input in $mzxml_inputs:\n+&& ln -s $input ${work_path}/${input.name}\n+#end for\n+#for $input in $pepxml_inputs:\n+&& ln -s $input ${work_path}/${input.name}\n+#end for\n+## Make sure pep.xml and prot.xml start with "interact-"\n+## && echo "# $quant_params" >> $dia_umpire_quant \n+&& java -jar \\$DIA_UMPIRE_QUANT_JAR $quant_params \n+&& cp $work_path/ProtSummary*.xls "$ProtSummary"\n+&& cp $work_path/PeptideSummary*.xls "$PeptideSummary"\n+&& cp $work_path/FragSummary*.xls "$FragSummary"\n+&& cp $work_path/IDNoSummary*.xls "$IDNoSummary"\n+&& cat $work_path/*.log "$logfile"\n+]]>\n+ </command>\n+\n+ <configfiles>\n+ <configfile name="user_mods"><![CDATA[\n+<?xml version="1.0"?>\n+<MSModSpecSet\n+ xmlns="http://www.ncbi.nlm.nih.gov"\n+ xmlns:xs="http://www.w3.org/2001/XMLSchema-instance"\n+ xs:schemaLocation="http://www.ncbi.nlm.nih.gov OMSSA.xsd"\n+>\n+ <MSModSpec>\n+ <MSModSpec_mod>\n+ <MSMod value="modificationwithneutrallosses">1</MSMod>\n+ </MSModSpec_mod>\n+ <MSModSpec_type>\n+ <MSModType value="modaa">0</MSModType>\n+ </MSModSpec_type>\n+ <MSModSpec_name>test modification with neutral losses</MSModSpec_name>\n+ <MSModSpec_monomass>123.456789</MSModSpec_monomass>\n+ <MSModSpec_averagemass>0</MSModSpec_averagemass>\n+ <MSModSpec_n15mass>0</MSModSpec_n15mass>\n+ <MSModSpec_residues>\n+ <MSModSpec_residues_E>B</MSModSpec_residues_E>\n+ <MSModSpec_residues_E>O</MSModSpec_residues_E>\n+ </MSModSpec_residues>\n+ <MSModSpec_neutralloss>\n+ <MSMassSet>\n+ <MSMassSet_monomass>456.789123</MSMassSet_monomass>\n+ <MSMassSet_averagemass>0</MSMassSet_averagemass>\n+ <MSMassSet_n15mass>0</MSMassSet_n15mass>\n+ </MSMassSet>\n+ <MSMassSet>\n+ <MSMassSet_monomass>789.123456</MSMassSet_monomass>\n+ <MSMassSet_averagemass>0</MSMassSet_averagemass>\n+ <MSMassSet_n15mass>0</MSMassSet_n15mass>\n+ </MSMassSet>\n+ </MSModSpec_neutralloss>\n+ <MSModSpec_unimod>00</MSModSpec_unimod>\n+ <MSModSpec_psi-ms>testMod</MSModSpec_psi-ms>\n+ </MSModSpec>\n+<MSModSpecSet\n+]]>\n+ </configfile>\n+ <configfile name="quant_params"><![CDATA[\n+#DIA-Umpire (version @VERSION@)\n+#Data Independent Acquisition data processing and analysis package (Quantitation and targeted re-extraction module)\n+\n+#Working folder path: the program will process all mzXML files in the working folder (please make sure the corresponding pepXML files are in the same folder with mzXML file)\n+#Internal spectral library file, output csv files will be stored in the working folder\n+Path = ${work_path}/\n+\n+#Or you can specify all DIA mzXML files you want to analyze here (the working folder is still required for storing output files)\n+# ==File list begin\n+# ==File list end\n+\n+#No of threads\n+Thread = 6\n+\n+InternalLibID = #if $InternalLibID then $InternalLibID else \'LibID\'#\n+\n+#InternalLibSearch / TargetedExtraction both will work\n+InternalLibSearch = $TargetedExtraction\n+ExternalLibSearch = $external_setti'..b' from the previous option) threshold of peptides to be considered for protein quantitation. Higher weight (closer to 1) of a peptide for a protein is more likely to be a unique peptide for the protein. (default: 0.9)\n+ Recommended value: 0.9\n+\n+ *TopNFrag*: Top N fragments in terms of fragment score (Pearson correlation x fragment intensity) used for determining peptide ion intensity (default:6).\n+ Recommended value: 3~6\n+\n+ *TopNPep*: Top N peptide ions in terms of peptide ion intensity (determined by top\n+ fragments) used for determining protein intensity (default:6)\n+ Recommended value: 3~6\n+\n+ *Freq*: Minimum frequency of a peptide ion or fragment across all samples/replicates to\n+ be considered for Top N ranking. (default:0.5) Recommended value: 0.5 or more\n+\n+**Output** (DIA-Umpire quantitation and targeted re-extraction module):\n+=======================================================================\n+\n+ Binary files which include identification and quantitation information, and possibly the internal spectral library.\n+\n+ Three summary tables for protein, peptide ion, and fragment level reports (<filename> denotes the name of the raw file in which a peptide was identified)\n+\n+ 1. Columns printed in protein summary table (ProtSummary.xls)\n+\n+ 1. Protein Key: Protein accession number\n+ 2. <filename>_Prob: Protein identification probability\n+ 3. <filename>_Peptides: Number of identified peptide ions assigned to a protein\n+ 4. <filename>_PSMs: Number of identified pseudo MS/MS spectra assigned to a protein\n+ 5. <filename>_MS1_iBAQ: Protein abundance estimated by MS1 peptide intensities (See manuscript for details) (iBAQ: sum of all identified peptide intensities divided by the number of theoretical tryptic peptides)\n+ 6. <filename>_TopNpep/TopNfra, Freq>freq: Protein abundance estimated by top scored peptide ions and fragments (See manuscript for details).\n+\n+ 2. Columns printed in peptide ion summary table (PeptideSummary.xls)\n+\n+ 1. Peptide Key: Peptide ion identifier\n+ 2. Sequence: Peptide sequence\n+ 3. ModSeq: Peptide sequence with modification information\n+ 4. Proteins: Parent proteins\n+ 5. mz: Precursor m/z of peptide ion\n+ 6. Charge: Charge state of peptide ion\n+ 7. MaxProb: Maximum identification probability of peptide ion across the whole data- set from untargeted MS/MS database search\n+ 8. <filename>_Spec_Centric_Prob: Identification probability of a peptide ion from untargeted MS/MS database search\n+ 9. <filename>_Pep_Centric_Prob: Identification probability of a peptide ion from targeted re-extraction matching\n+ 10. <filename>_PSMs: The number of identified pseudo MS/MS spectra assigned to a peptide ion\n+ 11. <filename>_RT: Retention time of a peptide ion\n+ 12. <filename>_MS1: Peptide abundance estimated by MS1 precursor intensity 2.13. <filename>_TopNfra: Peptide abundance estimated by top N fragment ions\n+\n+ 3. Columns printed in fragment summary table (FragSummary.xls)\n+\n+ 1. Fragment Key: Fragment ion identifier\n+ 2. Protein: Parent protein accession number\n+ 3. Peptide: Parent peptide ion identifier\n+ 4. Fragment: Fragment ion type\n+ 5. FragMz: m/z of fragment ion\n+ 6. <filename>_RT: Retention time of parent peptide ion \n+ 7. <filename>_Spec_Centric_Prob: Identification probability of peptide ion from untargeted MS/MS database search\n+ 8. <filename>_Pep_Centric_Prob: Identification probability of peptide ion from targeted re-extraction matching\n+ 9. <filename>_Intensity: fragment intensity\n+ 10. <filename>_Corr: Elution profile Pearson correlation between fragment ion and precursor peptide ion\n+ 11. <filename>_PPM: Mass error of an observed fragment m/z to the theoretical one\n+\n+]]>\n+ </help>\n+ <expand macro="citations" />\n+</tool>\n'

diff -r 000000000000 -r 22a1fa7d9d6a dia_umpire_se.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dia_umpire_se.xml Mon Mar 04 11:50:10 2019 -0500

[

b'@@ -0,0 +1,572 @@\n+<tool id="dia_umpire_se" name="DIA_Umpire_SE" version="@VERSION@.0">\n+ <description>DIA signal extraction</description>\n+ <macros>\n+ <import>dia_umpire_macros.xml</import>\n+ </macros>\n+ <expand macro="requirements" />\n+ <expand macro="stdio" />\n+ <command>\n+<![CDATA[\n+#import re\n+## want to save all outputs in a directory output.extra_files_path to be used by dia_umpire_quant\n+## Is file naming going to be a problem? May need to have a name param\n+#if $se_extraction_data:\n+#set se_params = $se_ser\n+#set $ser_dir = $se_ser.extra_files_path\n+mkdir $ser_dir\n+&& ln -s \'$ser_dir\' \'$output_dir\'\n+&& cat $se_config > $se_ser \n+#else:\n+#set se_params = $params\n+mkdir \'$output_dir\'\n+&& cat $se_config > $se_params \n+#end if\n+##\n+&& echo " " >> $se_params \n+&& echo "Thread = \\$GALAXY_SLOTS" >> $se_params\n+#if $input_prefix and len($input_prefix.strip()) > 0:\n+#set $input_path = str($output_dir) + \'/\' + $input_prefix.__str__ + \'_rep\' + str($i + 1) + \'.mzXML\' \n+#else:\n+#set $input_path = str($output_dir) + \'/\' + $re.sub(\'\\.[mM]\\w+$\',\'\',$re.sub(\'[^-a-zA-Z0-9_.]\',\'_\',$input.name)) + \'.mzXML\'\n+#end if\n+&& ln -s \'${input}\' \'$input_path\'\n+&& dia_umpire_se \'$input_path\' \'$se_params\'\n+&& cat $output_dir/*.log >> "$logfile"\n+#if not $mgfs_as_collection:\n+&& cp "$output_dir/"*_Q1.mgf \'$q1_mgf\'\n+&& cp "$output_dir/"*_Q2.mgf \'$q2_mgf\'\n+&& cp "$output_dir/"*_Q3.mgf \'$q3_mgf\'\n+#end if\n+#if $ExportPrecursorPeak:\n+&& cp "$output_dir/"*PeakCluster.csv \'$PrecursorPeak\'\n+#end if\n+]]>\n+ </command>\n+ <configfiles>\n+ <configfile name="se_config"><![CDATA[#slurp\n+#DIA-Umpire (version @VERSION@)\n+#Data Independent Acquisition data processing and analysis package (Signal extraction module)\n+\n+#import re\n+#if $input_prefix:\n+#set $input_path = $input_prefix.__str__ + "_rep" + str($i + 1) + ".mzXML" \n+#else:\n+#set $input_path = $re.sub(\'\\.[mM]\\w+$\',\'\',$re.sub(\'[^-a-zA-Z0-9_.]\',\'_\',$input.name)) + ".mzXML"\n+#end if\n+# $input.name $input_path $input\n+\n+#No of threads\n+Thread = 6\n+\n+#Report peak\n+ExportPrecursorPeak = $ExportPrecursorPeak\n+ExportFragmentPeak = $ExportFragmentPeak\n+\n+#Signal extraction parameters\n+#if $instrument.model == \'Thermo_Orbitrap\':\n+SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 5#\n+SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 5#\n+SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#\n+SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#\n+SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#\n+SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#\n+SE.MS2EndCharge = #if $instrument.SE_MS2EndCharge then $instrument.SE_MS2EndCharge else 4#\n+#else if $instrument.model == \'AB_SCIEX_Triple_TOF_5600\':\n+SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 30#\n+SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 40#\n+SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#\n+SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#\n+SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#\n+SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#\n+SE.MS2EndCharge = #if $instrument.SE_MS2EndCharge then $instrument.SE_MS2EndCharge else 4#\n+#else:\n+SE.MS1PPM = #if $instrument.SE_MS1PPM then $instrument.SE_MS1PPM else 30#\n+SE.MS2PPM = #if $instrument.SE_MS2PPM then $instrument.SE_MS2PPM else 40#\n+SE.Resolution = #if $instrument.SE_Resolution then $instrument.SE_Resolution else 17000#\n+SE.StartCharge = #if $instrument.SE_StartCharge then $instrument.SE_StartCharge else 2#\n+SE.EndCharge = #if $instrument.SE_EndCharge then $instrument.SE_EndCharge else 4#\n+SE.MS2StartCharge = #if $instrument.SE_MS2StartCharge then $instrument.SE_MS2StartCharge else 2#\n+SE.MS2EndCharge'..b'l average resolution of peaks in spectra or consult vendor specifications for the instrument. For AB SCIEX TripleTOF 5600 we use 15000-20000.\n+\n+ *SE.StartCharge*: The minimum charge state for MS1 precursor ion to be detected during isotopic peak grouping.\n+\n+ *SE.EndCharge*: The maximum charge state for MS1 precursor ion to be detected during isotopic peak grouping. Recommended value: it is not recommended to set this parameter higher than 5 for typical proteomic experiments, as it is unlikely to observe peptides of higher charge states.\n+\n+ *SE.MS2StartCharge*: The minimum charge state for MS2 unfragmented precursor ion to be detected during isotopic peak grouping.\n+\n+ *SE.MS2EndCharge*: The maximum charge state for MS2 unfragmented precursor ion to be detected during isotopic peak grouping. Recommended value: it is not recommended to set this parameter higher than 5 for typical proteomic experiments, as it is unlikely to observe peptides of higher charge states.\n+\n+**DIA isolation window settings**:\n+\n+ *WindowType*: DIA experiment type. DIA is implemented differently by different vendors and current support for data-formats is lacking, so the program needs additional info to properly interpret input spectral data. Supported values in this version:\n+\n+ * SWATH - fixed window size SWATH, as described in the original SWATH paper. If you\'re using this option, it\'s mandatory to specify WindowSize option as well.\n+\n+ * V_SWATH - variable window size SWATH. If you\'re using this option, it\'s mandatory to specify Variable SWATH window setting (see section below).\n+\n+ * MSX - 2Da isolation window, its position is shuffled randomly until the whole MS1 range is covered, the process is then repeated but coverage of MS1 range by isolation windows will be different because of randomization.\n+\n+ * MSE - as originally implemented in Waters instruments. The full MS1 range is being fragmented at once.\n+\n+ *WindowSize*: Isolation window size setting for fixed window SWATH. (Please skip this part if the data is from Thermo instrument) Note: The window size is to be specified including overlapping regions. I.e. if your windows are: 399.5-425.5, 424.5 - 450.5, etc., then the window size should be set to 26. Note: Was tested only on AB SCIEX TripleTOF 5600 and Thermo Q-Exactive and Fusion data. \n+\n+ Variable SWATH window setting: Isolation settings for variable window size SWATH. (Please skip this part if the data is from Thermo instrument). The format should be a tab-delimited list of m/z low and high values, one window per row. \n+\n+\n+**Output files of DIA-Umpire signal extraction module**:\n+========================================================\n+\n+ 1. *DIA_Umpire_SE MGFs* - Three .mgf files per input .mzXML file - pseudo MS/MS spectra sets for different quality categories of detected precursor signals (see the Online Methods of the publication for details). These can be either individual history items or a dataset collection. Example:\n+\n+ 1. <filename>_Q1.mgf \n+ 2. <filename>_Q2.mgf\n+ 3. <filename>_Q3.mgf\n+\n+ Note: Each file corresponds to a different "quality level" of precursor ions (Q1= More than two isotopic peaks detected in MS1, Q2 = only two isotopic peak detected, Q3 = detected unfragmented precursor in MS2). These spectra are written to separate files, because they must be searched separately against a protein database as a consequence of differences in FDR estimates for these varying quality data.\n+\n+ 2. *DIA_Umpire_SE Signal Extraction data* - includes the binary files (.ser) containing contain all necessary information for quantitation procedures (parameter settings, all detected precursor and fragment peaks, precursor-fragment grouping information). \n+\n+ 3. If ExportPrecursorPeak and/or ExportFragmentPeak options were set to true, text files with detailed information about detected MS1 and/or MS2 features will be generated.\n+\n+\n+]]>\n+ </help>\n+ <expand macro="citations" />\n+</tool>\n'

diff -r 000000000000 -r 22a1fa7d9d6a test-data/LongSwath_UPS1_1ug_rep1_xs.mzXML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/LongSwath_UPS1_1ug_rep1_xs.mzXML Mon Mar 04 11:50:10 2019 -0500

b'@@ -0,0 +1,8529 @@\n+<?xml version="1.0" encoding="ISO-8859-1"?>\n+<mzXML xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_3.2"\n+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"\n+ xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_3.2 http://sashimi.sourceforge.net/schema_revision/mzXML_3.2/mzXML_idx_3.2.xsd">\n+ <msRun scanCount="500" startTime="PT0.249S" endTime="PT52.023S">\n+ <parentFile fileName="file:///F:/Data/SWATH_IDA_Project94/New_Rep/Convert/LongSwath_UPS1_40fm_Ecoli_1ug-rep1.wiff"\n+ fileType="processedData"\n+ fileSha1="cd541f4406cffc7ab5efc92ffb027dc61eaedf"/>\n+ <parentFile fileName="file:////LongSwath_UPS1_1ug_rep1.mzXML"\n+ fileType="processedData"\n+ fileSha1="3b9ddaeff57373e48ab21571891f6a3253bec0e9"/>\n+ <msInstrument msInstrumentID="1">\n+ <msManufacturer category="msManufacturer" value="instrument model"/>\n+ <msModel category="msModel" value="AB SCIEX instrument model"/>\n+ <msIonisation category="msIonisation" value="electrospray ionization"/>\n+ <msMassAnalyzer category="msMassAnalyzer" value="mass analyzer type"/>\n+ <msDetector category="msDetector" value="detector type"/>\n+ <software type="acquisition" name="Analyst" version="Analyst TF 2"/>\n+ </msInstrument>\n+ <dataProcessing centroided="1">\n+ </dataProcessing>\n+ <dataProcessing>\n+ <software type="processing" name="Analyst" version="Analyst TF 2"/>\n+ <comment>type: processing</comment>\n+ </dataProcessing>\n+ <dataProcessing>\n+ <software type="processing" name="ProteinPilot Software" version="4.5.0.1591"/>\n+ <comment>type: conversion</comment>\n+ </dataProcessing>\n+ <dataProcessing>\n+ <software type="processing" name="ProteoWizard" version="3.0.4337"/>\n+ <comment>name: ProteoWizard</comment>\n+ <comment>type: conversion</comment>\n+ </dataProcessing>\n+ <dataProcessing>\n+ <software type="conversion" name="ProteoWizard software" version="3.0.9987"/>\n+ <processingOperation name="Conversion to mzML"/>\n+ </dataProcessing>\n+ <scan num="1"\n+ scanType="Full"\n+ centroided="1"\n+ msLevel="1"\n+ peaksCount="697"\n+ polarity="+"\n+ retentionTime="PT0.249S"\n+ totIonCurrent="2417067"\n+ msInstrumentID="1">\n+ <peaks compressionType="zlib"\n+ compressedLen="5792"\n+ precision="64"\n+ byteOrder="network"\n+ contentType="m/z-int">eJwtWndcVFcTRSmCwC6wsEtvCkhPUAEbu4ktRkFjFJUoYgliIfppFKUsC0tbyhYItliwodGISYxiLIhGxYolUUw0gEQxxgKigiLlC+ew/9zf3Jk5c2buvnfbk61zzNf77yfrm5Hs2tOuc4mq7GmtMjPRvy7wpKyndbi5sB5yhAayacP0GMqVPa20Zf1b6ldYl/S0ns6+Csipd4Fj2hlPfEUo430nQZx1+Qr4DdXvxS+ohV3fMhv6q6PJp2wM5S0/Md5hBXis25aAeGaC4eSzbRdwJPO+oLxjK+JZHZkIu3WHitHvOewI5TPfAMeqsoX4V52J5+XL/K+lQbY6P4H2f42HvZnPLtq/iIPc9+YoxmtKIP+b25lfdzj8+tbM6ZVLgWs2eiTkRL24Xv05+CeK/iZ+xHDkkej8L/EarqFNdHGDva+rDHaJAZ8BZ9CV32gfGAUcjzExxBvSDL1kD3kljvWWoX7NW6mftAN6s2WvqY/AOElbXvXKk4MrIb/u5Tu7Ff2S8lWU537JeiXYks9XK8g/dSj9l/evp5yDeiWu2I5+R8VKyhqXnlbakfsH8bSMKzKuBW5iCf8X+pYLybeE+sHCGNQxsWwB4umLY6mvmFgPvN0e5HNzOvxt0ivJ55k//PT9VhH/5Tz46QdEEK+T/wdR0B7ISRHvoBdNTgNO0pQL6LdXHUR/UrwauPpREeCftHwt6vUq6j3tV1wBn1dfrEK+Scl3Yac/9zD8khS/od/Jci9lzQDgur6Lor3mFusRu5D42iPQDzdfQX1JHfo/vlaEfJK+3416drr/SP0Po9BvGxFMvj/Uwj6weBnlynqMd6f/5/S/CVkWnKlHPremsr4pJyjXfQg7/fSRzK++KQb5pd+l/OBv6J3GXCTfBh/mr5wEnKQXGuhF2Xwuk15eROv0ST7tu1YSr8AbeMlONXqQNzrBP9k5nfXdOAt8kgMGc7w2n6J+TgfwzTO30z/eB3UwL1DSfvkZxDNX/0X7lEbY6f+yDvGTy/SZ32V72l+LhewWNYP2tVdQN/OLXwAnuWEW5NDAbbR/oYad040kys0cd/ftQvJpGQ884zv1jNdCnqIHP1PuKkLrdJdxkruLEEe/kfVI0VOj3+mPY8BN6buS+seeaFMEkzjejQaIn+L4EHa2z/yodzJhvs2jwTPFuYzj0XyDckAU+bR+gDgpQdPRb/5qKuOPkgNH9H4O8cOk6Dd/t5z40zb29Etf68voP7cK8Q36WSCvlGW3EU9gOZb841tllP+g/4ojaAUi4qZozhNPwuc7pfpb6A38jtD/Bt/zgtGviP+bLWS7YTfof/9aTzxp1+iXzK+O+VoHJQNHrvc7/ASR/F/L3fwrEe+Tb6j/0EEGObypkjLnK8Fqe7Ry6Vnym3ofceUfsU7Wny+gf7gx6iRIb0crjzBH/wABx1MuvwQ/cXMg8'..b'01">6211293</offset>\n+ <offset id="402">6223730</offset>\n+ <offset id="403">6235183</offset>\n+ <offset id="404">6245542</offset>\n+ <offset id="405">6255134</offset>\n+ <offset id="406">6263343</offset>\n+ <offset id="407">6271206</offset>\n+ <offset id="408">6277907</offset>\n+ <offset id="409">6283648</offset>\n+ <offset id="410">6289313</offset>\n+ <offset id="411">6294458</offset>\n+ <offset id="412">6298974</offset>\n+ <offset id="413">6303011</offset>\n+ <offset id="414">6306480</offset>\n+ <offset id="415">6309861</offset>\n+ <offset id="416">6313002</offset>\n+ <offset id="417">6315791</offset>\n+ <offset id="418">6318227</offset>\n+ <offset id="419">6320532</offset>\n+ <offset id="420">6322461</offset>\n+ <offset id="421">6324396</offset>\n+ <offset id="422">6455041</offset>\n+ <offset id="423">6475267</offset>\n+ <offset id="424">6496484</offset>\n+ <offset id="425">6517920</offset>\n+ <offset id="426">6538360</offset>\n+ <offset id="427">6559959</offset>\n+ <offset id="428">6582319</offset>\n+ <offset id="429">6602599</offset>\n+ <offset id="430">6623555</offset>\n+ <offset id="431">6642718</offset>\n+ <offset id="432">6661105</offset>\n+ <offset id="433">6678415</offset>\n+ <offset id="434">6694642</offset>\n+ <offset id="435">6709350</offset>\n+ <offset id="436">6723289</offset>\n+ <offset id="437">6736250</offset>\n+ <offset id="438">6747535</offset>\n+ <offset id="439">6758089</offset>\n+ <offset id="440">6767125</offset>\n+ <offset id="441">6775822</offset>\n+ <offset id="442">6783175</offset>\n+ <offset id="443">6790056</offset>\n+ <offset id="444">6795980</offset>\n+ <offset id="445">6801429</offset>\n+ <offset id="446">6806198</offset>\n+ <offset id="447">6810575</offset>\n+ <offset id="448">6814452</offset>\n+ <offset id="449">6818185</offset>\n+ <offset id="450">6821274</offset>\n+ <offset id="451">6823851</offset>\n+ <offset id="452">6826312</offset>\n+ <offset id="453">6828584</offset>\n+ <offset id="454">6830777</offset>\n+ <offset id="455">6832758</offset>\n+ <offset id="456">6834596</offset>\n+ <offset id="457">6965101</offset>\n+ <offset id="458">6985231</offset>\n+ <offset id="459">7006321</offset>\n+ <offset id="460">7027517</offset>\n+ <offset id="461">7048431</offset>\n+ <offset id="462">7069070</offset>\n+ <offset id="463">7090066</offset>\n+ <offset id="464">7110890</offset>\n+ <offset id="465">7131646</offset>\n+ <offset id="466">7150772</offset>\n+ <offset id="467">7169055</offset>\n+ <offset id="468">7186105</offset>\n+ <offset id="469">7202680</offset>\n+ <offset id="470">7217072</offset>\n+ <offset id="471">7230754</offset>\n+ <offset id="472">7243519</offset>\n+ <offset id="473">7254176</offset>\n+ <offset id="474">7264363</offset>\n+ <offset id="475">7273411</offset>\n+ <offset id="476">7281504</offset>\n+ <offset id="477">7288581</offset>\n+ <offset id="478">7294914</offset>\n+ <offset id="479">7300471</offset>\n+ <offset id="480">7305932</offset>\n+ <offset id="481">7310713</offset>\n+ <offset id="482">7314962</offset>\n+ <offset id="483">7319071</offset>\n+ <offset id="484">7322532</offset>\n+ <offset id="485">7325440</offset>\n+ <offset id="486">7327993</offset>\n+ <offset id="487">7330502</offset>\n+ <offset id="488">7332870</offset>\n+ <offset id="489">7335159</offset>\n+ <offset id="490">7337203</offset>\n+ <offset id="491">7339206</offset>\n+ <offset id="492">7469647</offset>\n+ <offset id="493">7489549</offset>\n+ <offset id="494">7511099</offset>\n+ <offset id="495">7532687</offset>\n+ <offset id="496">7553475</offset>\n+ <offset id="497">7574454</offset>\n+ <offset id="498">7596126</offset>\n+ <offset id="499">7616774</offset>\n+ <offset id="500">7637230</offset>\n+ </index>\n+ <indexOffset>7655850</indexOffset>\n+ <sha1>c6e1822f01d723916742db8646d594aca2117ced</sha1>\n+</mzXML>\n'

diff -r 000000000000 -r 22a1fa7d9d6a test-data/LongSwath_UPS1_1ug_rep1_xs_Q2.mgf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/LongSwath_UPS1_1ug_rep1_xs_Q2.mgf Mon Mar 04 11:50:10 2019 -0500

@@ -0,0 +1,25 @@
+BEGIN IONS
+PEPMASS=740.93756
+CHARGE=4+
+RTINSECONDS=23.515736
+TITLE=LongSwath_UPS1_1ug_rep1_xs_Q2.1.1.4
+289.0418 0.13421604
+462.80182 0.34596336
+476.83914 0.076175064
+495.8407 0.28123242
+505.83884 0.40484485
+510.82834 0.26279047
+512.8057 0.08942752
+516.8521 0.09888018
+528.8025 0.17339894
+539.8589 0.034855265
+548.77325 0.2268137
+561.8681 0.36307892
+563.7804 0.02051069
+566.7381 0.3546458
+581.84204 0.34910008
+588.8908 0.33360612
+600.7914 0.04130452
+647.8723 0.42873022
+END IONS
+