Repository 'openms_metaprosip'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/openms_metaprosip

Changeset 7:8febc104e78c (2019-05-15)
Previous changeset 6:96cc79adfadb (2018-03-20) Next changeset 8:8b42d5ca8f49 (2019-05-17)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/openms commit f608f41d45664d04d3124c6ebc791bf8a566b3c5
modified:
MetaProSIP.xml
macros.xml
test-data/MetaProSIP_1_output_1.csv
test-data/MetaProSIP_1_output_2.csv
added:
test-data/OpenPepXL_input.fasta
test-data/SequenceCoverageCalculator.fasta
test-data/SequenceCoverageCalculator.idXML
test-data/SequenceCoverageCalculator.tsv
b
diff -r 96cc79adfadb -r 8febc104e78c MetaProSIP.xml
--- a/MetaProSIP.xml Tue Mar 20 14:37:53 2018 -0400
+++ b/MetaProSIP.xml Wed May 15 08:02:33 2019 -0400
[
@@ -1,7 +1,7 @@
 <?xml version='1.0' encoding='UTF-8'?>
 <!--This is a configuration file for the integration of a tools into Galaxy (https://galaxyproject.org/). This file was automatically generated using CTDConverter.-->
 <!--Proposed Tool Section: [Utilities]-->
-<tool id="MetaProSIP" name="MetaProSIP" version="2.3.1">
+<tool id="MetaProSIP" name="MetaProSIP" version="2.3.2">
   <description>Performs proteinSIP on peptide features for elemental flux analysis.</description>
   <macros>
     <token name="@EXECUTABLE@">MetaProSIP</token>
@@ -12,7 +12,7 @@
     <requirement type="package" version="3.0.1">r-gplots</requirement>
   </expand>
   <expand macro="stdio"/>
-  <command><![CDATA[
+  <command detect_errors="aggressive"><![CDATA[
 MetaProSIP
 
 #if $param_in_mzML:
@@ -25,7 +25,7 @@
   -out_csv $param_out_csv
 #end if
 #if $param_out_peptide_centric_csv:
-  -out_peptide_centric_csv $param_out_peptide_centric_csv
+  -out_peptide_centric_csv param_out_peptide_centric_csv
 #end if
 #if $param_in_featureXML:
   -in_featureXML $param_in_featureXML
@@ -59,11 +59,7 @@
     $param_plot_extension
   #end if
 #end if
-#if $param_plot_extension == 'pdf':
-  -qc_output_directory images
-#else
-  -qc_output_directory '${html_file.files_path}'
-#end if
+-qc_output_directory images
 #if $param_labeling_element:
   -labeling_element
   #if " " in str($param_labeling_element):
@@ -132,12 +128,25 @@
 #end if
 -threads "\${GALAXY_SLOTS:-1}"
 
-## remove urls and paths (keeping the file name basename)
-&& sed -i -e 's/file:\/\/[^[:space:]]\+\///g; s/\/[^[:space:]]\+\///g' $param_out_csv 
+## - add comment char to first line, 
+## - remove leading/trailing spaces in fields
+## - remove empty line(s) 
+## - remove 'file://' and get basename of filenames in the table
+## - add empty fields as '0' (MetaproSIP output has varying number of output columns, ie it omits the last columns if there are no values)
+&& cat param_out_peptide_centric_csv | 
+ sed '1 s/^/#/' | 
+ sed 's/\t /\t/g; s/ \t/\t/g; s/ /_/g' | 
+ grep -v "^$" | 
+ sed "s/\tfile:\/\//\t/g; s/\t\/[^\t]\+\//\t/g; s/\.$param_plot_extension//g" | 
+ awk -v FS='\t' 'BEGIN{line=0}{if(line==0){ncol=NF; print $0}else{printf("%s",$0); for(i=0; i<ncol-NF; i++){printf("\t0")}printf("\n")}line+=1}' > "$param_out_peptide_centric_csv"
 
 ## get html file (should be only one [?])
-#if $param_plot_extension != 'pdf':
-  && cp '${html_file.files_path}'/*\.html output.html
+
+#if $param_collection or $param_plot_extension == 'pdf'
+ && rm -f images/*\.html
+#else
+ && mv images/*\.html '${html_file}'
+ && mv images/ '${html_file.files_path}'
 #end if
  ]]></command>
   <inputs>
@@ -167,6 +176,7 @@
     <param name="param_report_natural_peptides" display="radio" type="boolean" truevalue="-report_natural_peptides" falsevalue="" checked="false" optional="True" label="Whether purely natural peptides are reported in the quality report" help="(-report_natural_peptides) "/>
     <param name="param_filter_monoisotopic" display="radio" type="boolean" truevalue="-filter_monoisotopic" falsevalue="" checked="false" optional="True" label="Try to filter out mono-isotopic patterns to improve detection of low RIA patterns" help="(-filter_monoisotopic) "/>
     <param name="param_cluster" display="radio" type="boolean" truevalue="-cluster" falsevalue="" checked="false" optional="True" label="Perform grouping" help="(-cluster) "/>
+    <param name="param_collection" type="boolean" checked="true" label="output images as collection" help="if enabled images are written to a collection amd to a webpage otherwise (pdf is always written to a collection)"/>
     <expand macro="advanced_options">
       <param name="param_min_correlation_distance_to_averagine" type="float" value="-1.0" label="Minimum difference in correlation between incorporation pattern and averagine pattern" help="(-min_correlation_distance_to_averagine) Positive values filter all RIAs passing the correlation threshold but that also show a better correlation to an averagine peptide. Disabled for values &lt;= -1"/>
       <param name="param_pattern_15N_TIC_threshold" type="float" value="0.95" label="The most intense peaks of the theoretical pattern contributing to at least this TIC fraction are taken into account" help="(-pattern_15N_TIC_threshold) "/>
@@ -188,26 +198,28 @@
   <outputs>
     <data name="param_out_csv" format="tabular" label="${tool.name} on ${on_string}: tabular"/>
     <data name="param_out_peptide_centric_csv" format="tabular" label="${tool.name} on ${on_string}: peptide centric tabular"/>
-    <data format="html" name="html_file" from_work_dir="output.html" label="${tool.name} on ${on_string}: Webpage">
-      <filter>param_plot_extension == 'png' or param_plot_extension == 'svg'</filter>
+    <data format="html" name="html_file" label="${tool.name} on ${on_string}: Webpage">
+      <filter>not param_collection and (param_plot_extension == 'png' or param_plot_extension == 'svg')</filter>
     </data>
     <collection name="images" type="list" label="${tool.name} on ${on_string}: images">
-      <filter>param_plot_extension == 'pdf'</filter>
+      <filter>param_collection or param_plot_extension == 'pdf'</filter>
       <discover_datasets pattern="__name_and_ext__" directory="images" />
     </collection>
   </outputs>
   <tests>
     <test>
-      <param name="param_in_mzML" value="MetaProSIP_1_input.mzML"/>
-      <param name="param_in_fasta" value="MetaProSIP_1_input.fasta"/>
-      <param name="param_in_featureXML" value="MetaProSIP_1_input.featureXML"/>
-      <output name="param_out_csv" file="MetaProSIP_1_output_1.csv" />
-      <output name="param_out_peptide_centric_csv" file="MetaProSIP_1_output_2.csv" />
+      <param name="param_in_mzML" value="MetaProSIP_1_input.mzML" ftype="mzml"/>
+      <param name="param_in_fasta" value="MetaProSIP_1_input.fasta" ftype="fasta"/>
+      <param name="param_in_featureXML" value="MetaProSIP_1_input.featureXML" ftype="featurexml"/>
+      <output name="param_out_csv" file="MetaProSIP_1_output_1.csv"/>
+      <output name="param_out_peptide_centric_csv" file="MetaProSIP_1_output_2.csv"  compare="sim_size" lines_diff="2"/>
     </test>
   </tests>
   <help>Performs proteinSIP on peptide features for elemental flux analysis.
 
+   ** Galaxy specific notes **
 
-For more information, visit http://ftp.mi.fu-berlin.de/OpenMS/release-documentation/html/UTILS_MetaProSIP.html</help>
+   The peptide centric tabular data set generated by the tool is not rendered properly by Galaxy, because it has more than 50 columns. You might extract columns of interst. 
+  </help>
 <expand macro="references"/>
 </tool>
b
diff -r 96cc79adfadb -r 8febc104e78c macros.xml
--- a/macros.xml Tue Mar 20 14:37:53 2018 -0400
+++ b/macros.xml Wed May 15 08:02:33 2019 -0400
b
@@ -11,9 +11,9 @@
   </xml>
   <xml name="stdio">
     <stdio>
-      <exit_code range="1:"/>
-      <exit_code range=":-1"/>
-      <regex match="Exception:"/>
+      <regex match="Could not allocate metaspace" level="fatal_oom" description="Java memory Exception"/>
+      <regex match="Cannot create VM thread" level="fatal_oom" description="Java memory Exception"/>
+      <regex match="qUncompress: could not allocate enough memory to uncompress data" level="fatal_oom" description="Java memory Exception"/>
     </stdio>
   </xml>
   <xml name="references">
b
diff -r 96cc79adfadb -r 8febc104e78c test-data/MetaProSIP_1_output_1.csv
--- a/test-data/MetaProSIP_1_output_1.csv Tue Mar 20 14:37:53 2018 -0400
+++ b/test-data/MetaProSIP_1_output_1.csv Wed May 15 08:02:33 2019 -0400
b
@@ -3,5 +3,5 @@
  Protein Accession Description # Unique Peptides Median Global LR median RIA 1 median RIA 2
  CONTIG23640_802236  length=2326  numreads=28 strand:-1 frame:0 orf_location:136:990 1 0.269627132474428 0.603333333333335 36.4733333333347
  Peptide Sequence RT Exp. m/z Theo. m/z Charge Score TIC fraction #non-natural weights RIA 1 INT 1 Cor. 1 RIA 2 INT 2 Cor. 2 Peak intensities Global LR
- YGGAVDPTVLGGVK 80.52 666.8623 666.861538013921 2 0.174659044235501 0.831887919884233 7 0.6 1432379 1.00 36.5 146376 0.87 659720 461384 165174 49177 14782 11728 6541 7410 10868 8404 9437 9556 10203 12619 12159 15831 16288 19847 19258 22222 21275 21576 21294 22006 24019 19046 17493 19060 17884 15631 15769 13964 14131 10090 9445 9216 7297 6670 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0.269627132474428
+ YGGAVDPTVLGGVK 80.52 666.8623 666.861538013921 2 0.174659044235501 0.831887919884232 7 0.6 1432379 1.00 36.5 146376 0.87 659720 461384 165174 49177 14782 11728 6541 7410 10868 8404 9437 9556 10203 12619 12159 15831 16288 19847 19258 22222 21275 21576 21294 22006 24019 19046 17493 19060 17884 15631 15769 13964 14131 10090 9445 9216 7297 6670 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0.269627132474428
 Non-Unique Peptides Accessions Peptide Sequence Descriptions Score RT Exp. m/z Theo. m/z Charge #non-natural weights Peak intensities Global LR
b
diff -r 96cc79adfadb -r 8febc104e78c test-data/MetaProSIP_1_output_2.csv
--- a/test-data/MetaProSIP_1_output_2.csv Tue Mar 20 14:37:53 2018 -0400
+++ b/test-data/MetaProSIP_1_output_2.csv Wed May 15 08:02:33 2019 -0400
b
@@ -1,3 +1,2 @@
-Peptide Sequence Feature Quality Report Spectrum Quality report scores Sample Name Protein Accessions Description Unique #Ambiguity members Score RT Exp. m/z Theo. m/z Charge TIC fraction #non-natural weights Peak intensities Group Global Peptide LR RIA 1 LR of RIA 1 INT 1 Cor. 1 RIA 2 LR of RIA 2 INT 2 Cor. 2 RIA 3 LR of RIA 3 INT 3 Cor. 3 RIA 4 LR of RIA 4 INT 4 Cor. 4 RIA 5 LR of RIA 5 INT 5 Cor. 5 RIA 6 LR of RIA 6 INT 6 Cor. 6 RIA 7 LR of RIA 7 INT 7 Cor. 7 RIA 8 LR of RIA 8 INT 8 Cor. 8 RIA 9 LR of RIA 9 INT 9 Cor. 9 RIA 10 LR of RIA 10 INT 10 Cor. 10
-YGGAVDPTVLGGVK feature test_mode_enabled.mzML CONTIG23640_802236  length=2326  numreads=28 strand:-1 frame:0 orf_location:136:990 1 1 0.174659044235501 80.52 666.8623 666.8615 2 0.831887919884232 7 659720 461384 165174 49177 14782 11728 6541 7410 10868 8404 9437 9556 10203 12619 12159 15831 16288 19847 19258 22222 21275 21576 21294 22006 24019 19046 17493 19060 17884 15631 15769 13964 14131 10090 9445 9216 7297 6670 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0 0.269627132474428 0.6 1.0 1432378.6 1.0 36.5 0.0 146375.7 0.9
-
+#Peptide_Sequence Feature Quality_Report_Spectrum Quality_report_scores Sample_Name Protein_Accessions Description Unique #Ambiguity_members Score RT Exp._m/z Theo._m/z Charge TIC_fraction #non-natural_weights Peak_intensities Group Global_Peptide_LR RIA_1 LR_of_RIA_1 INT_1 Cor._1 RIA_2 LR_of_RIA_2 INT_2 Cor._2 RIA_3 LR_of_RIA_3 INT_3 Cor._3 RIA_4 LR_of_RIA_4 INT_4 Cor._4 RIA_5 LR_of_RIA_5 INT_5 Cor._5 RIA_6 LR_of_RIA_6 INT_6 Cor._6 RIA_7 LR_of_RIA_7 INT_7 Cor._7 RIA_8 LR_of_RIA_8 INT_8 Cor._8 RIA_9 LR_of_RIA_9 INT_9 Cor._9 RIA_10 LR_of_RIA_10 INT_10 Cor._10
+YGGAVDPTVLGGVK feature spectrum_dataset_1_575Z_rt_4831.43346884947.png scores_dataset_1_575Z_rt_4831.43346884947.png dataset_1.dat CONTIG23640_802236 length=2326__numreads=28_strand:-1_frame:0_orf_location:136:990 1 1 0.174659044235501 80.52 666.8623 666.8615 2 0.831887919884232 7 659720_461384_165174_49177_14782_11728_6541_7410_10868_8404_9437_9556_10203_12619_12159_15831_16288_19847_19258_22222_21275_21576_21294_22006_24019_19046_17493_19060_17884_15631_15769_13964_14131_10090_9445_9216_7297_6670_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0_0 0 0.269627132474428 0.6 1.0 1432378.6 1.0 36.5 0.0 146375.7 0.9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
b
diff -r 96cc79adfadb -r 8febc104e78c test-data/OpenPepXL_input.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/OpenPepXL_input.fasta Wed May 15 08:02:33 2019 -0400
b
@@ -0,0 +1,39 @@
+>Protein1
+MSERDTALEKYKSYLLQHREWDSKLKDLRFGNRDLVKKYDKTEDDIKSLQSVGQIIGEVL
+KQLDSERFIVKASSGPRYVVGCRNNVDQSHLVQGVRVSLDMTTLTIMRILPREVDPLVYN
+MSIEDPGDISFAGVGGLNEQIRELREVIELPLKNPELFLRVGIKPPKGVLLYGPPGTGKT
+LLARAVAASLGVNFLKVVSSAIVDKYIGESARIIREMFGYAKEHEPCVIFMDEIDAIGGR
+RFSEGTSADREIQRTLMELLNQMDGFDYLGQTKIIMATNRPDTLDPALLRPGRLDRKIEI
+PLPNEVGRMEILKIHLEKVSKQGEIDYEALVKLTDGTNGADLRNVVTEAGFIAIKEDRDY
+VIQSDLMSAARKVADLKKLEGTIDYQKL
+>Protein2
+MSTLEELDALDQSQQGGSSNNEGLDGIEQEILAAGIDELNSRTRLLENDIKVMKSEFQRL
+THEKSTMLEKIKENQEKISNNKMLPYLVGNVVEILDMQPDEVDVQESANQNSEATRVGKS
+AVIKTSTRQTIFLPLIGLVEPEELHPGDLIGVNKDSYLIIDKLPSEYDSRVKAMEVDEKP
+TERYSDIGGLSKQIEELFEAIVLPMQQADKFRKLGVKPPKGCLMFGPPGTGKTLLARACA
+AQSNATFLKLAAPQLVQMFIGDGAKLVRDAFALAKEKSPAIIFIDELDAIGTKRFDSEKA
+GDREVQRTMLELLNQLDGFSSDDRVKVIAATNRVDTLDPALLRSGRLDRKLEFPLPNEEA
+RVGILRIHSRKMAIDDDINWEELARSTDEYNGAMLKSVCVEAGMIALRQGDTKINHEHFM
+DGILEVQMRKSKTLQYFA
+>Protein3
+MPFGNTHNKFKLNYKPEEEYPDLSKHNNHMAKVLTLELYKKLRDKETPSGFTVDDVIQTG
+VDNPGHPFIMTVGCVAGDEESYEVFKELFDPIISDRHGGYKPTDKHKTDLNHENLKGGDD
+LDPNYVLSSRVRTGRSIKGYTLPPHCSRGERRAVEKLSVEALNSLTGEFKGKYYPLKSMT
+EKEQQQLIDDHFLFDKPVSPLLLASGMARDWPDARGIWHNDNKSFLVWVNEEDHLRVISM
+EKGGNMKEVFRRFCVGLQKIEEIFKKAGHPFMWNQHLGYVLTCPSNLGTGLRGGVHVKLA
+HLSKHPKFEEILTRLRLQKRGTGGVDTAAVGSVFDVSNADRLGSSEVEQVQLVVDGVKLM
+VEMEKKLEKGQSIDDMIPAQK
+>Protein4
+MSTLDLNHLADLYDRKDWNACKKELLKLKVELAKQNLFVPTSDKEKASFARNVFEYGVLV
+SIQTCDIESFARYASQVIPFYHDSLVPSSRMGLVTGLNLLYLLSENRIAEFHTALESVPD
+KSLFERDPYVEWVISLEQNVMEGAFDKVASMIRSCNFPEFSYFMKIVMSMVRNEIATCAE
+KVYSEIPLSNATSLLYLENTKETEKLAEERGWDIRDGVIYFPKEANALETEDGMLIDEED
+ELELPPTASKHTISSIRQLLSYTSELEQIV
+>Protein5
+MCSPSATLTHRTMTEKARTVSDLTISQAIFELSSPFLENKSQKALDTLFSAIRDHDLAPL
+YKYLSENPKTSASIDFDSNFLNSMIKKNEEKLAEFDKAIEDAQELNGEHEILEAMKNKAD
+YYTNICDRERGVQLCDETFERATLTGMKIDVLFSKIRLAYVYADMRVVGQLLEKLKPLIE
+KGGDWERKNRLKAYQGIYLMSIRNFSGAADLLLDCMSTFSSTELLPYYDVVRYAVISGAI
+SLDRVDVKTKIVDSPEVLAVLPQNESMSSLEACINSLYLCDYSGFFRTLADVEVNHLKCD
+QFLVAHYRYYVREMRRRAYAQLLESYRALSIDSMAASFGVSVDYIDRDLASFIPDNKLNC
+VIDRVNGVVFTNRPDEKNRQYQEVVKQGDVLLNKLQKYQATVMRGAFKV
\ No newline at end of file
b
diff -r 96cc79adfadb -r 8febc104e78c test-data/SequenceCoverageCalculator.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SequenceCoverageCalculator.fasta Wed May 15 08:02:33 2019 -0400
b
b'@@ -0,0 +1,149508 @@\n+>sp|Q16827|PTPRO_HUMAN Receptor-type tyrosine-protein phosphatase O OS=Homo sapiens OX=9606 GN=PTPRO PE=1 SV=2\n+MGHLPTGIHGARRLLPLLWLFVLFKNATAFHVTVQDDNNIVVSLEASDVISPASVYVVKITGESKNYFFEFEEFNSTLPPPVIFKASYHGLYYIITLVVVNGNVVTKPSRSITVLTKPLPVTSVSIYDYKPSPETGVLFEIHYPEKYNVFTRVNISYWEGKDFRTMLYKDFFKGKTVFNHWLPGMCYSNITFQLVSEATFNKSTLVEYSGVSHEPKQHRTAPYPPQNISVRIVNLNKNNWEEQSGNFPEESFMRSQDTIGKEKLFHFTEETPEIPSGNISSGWPDFNSSDYETTSQPYWWDSASAAPESEDEFVSVLPMEYENNSTLSETEKSTSGSFSFFPVQMILTWLPPKPPTAFDGFHIHIEREENFTEYLMVDEEAHEFVAELKEPGKYKLSVTTFSSSGSCETRKSQSAKSLSFYISPSGEWIEELTEKPQHVSVHVLSSTTALMSWTSSQENYNSTIVSVVSLTCQKQKESQRLEKQYCTQVNSSKPIIENLVPGAQYQVVIYLRKGPLIGPPSDPVTFAIVPTGIKDLMLYPLGPTAVVLSWTRPYLGVFRKYVVEMFYFNPATMTSEWTTYYEIAATVSLTASVRIANLLPAWYYNFRVTMVTWGDPELSCCDSSTISFITAPVAPEITSVEYFNSLLYISWTYGDDTTDLSHSRMLHWMVVAEGKKKIKKSVTRNVMTAILSLPPGDIYNLSVTACTERGSNTSMLRLVKLEPAPPKSLFAVNKTQTSVTLLWVEEGVADFFEVFCQQVGSSQKTKLQEPVAVSSHVVTISSLLPATAYNCSVTSFSHDSPSVPTFIAVSTMVTEMNPNVVVISVLAILSTLLIGLLLVTLIILRKKHLQMARECGAGTFVNFASLERDGKLPYNWRRSIFAFLTLLPSCLWTDYLLAFYINPWSKNGLKKRKLTNPVQLDDFDAYIKDMAKDSDYKFSLQFEELKLIGLDIPHFAADLPLNRCKNRYTNILPYDFSRVRLVSMNEEEGADYINANYIPGYNSPQEYIATQGPLPETRNDFWKMVLQQKSQIIVMLTQCNEKRRVKCDHYWPFTEEPIAYGDITVEMISEEEQDDWACRHFRINYADEMQDVMHFNYTAWPDHGVPTANAAESILQFVHMVRQQATKSKGPMIIHCSAGVGRTGTFIALDRLLQHIRDHEFVDILGLVSEMRSYRMSMVQTEEQYIFIHQCVQLMWMKKKQQFCISDVIYENVSKS\n+>DECOY_sp|Q16827|PTPRO_HUMAN Receptor-type tyrosine-protein phosphatase O OS=Homo sapiens OX=9606 GN=PTPRO PE=1 SV=2\n+SKSVNEYIVDSICFQQKKKMWMLQVCQHIFIYQEETQVMSMRYSRMESVLGLIDVFEHDRIHQLLRDLAIFTGTRGVGASCHIIMPGKSKTAQQRVMHVFQLISEAANATPVGHDPWATYNFHMVDQMEDAYNIRFHRCAWDDQEEESIMEVTIDGYAIPEETFPWYHDCKVRRKENCQTLMVIIQSKQQLVMKWFDNRTEPLPGQTAIYEQPSNYGPIYNANIYDAGEEENMSVLRVRSFDYPLINTYRNKCRNLPLDAAFHPIDLGILKLEEFQLSFKYDSDKAMDKIYADFDDLQVPNTLKRKKLGNKSWPNIYFALLYDTWLCSPLLTLFAFISRRWNYPLKGDRELSAFNVFTGAGCERAMQLHKKRLIILTVLLLGILLTSLIALVSIVVVNPNMETVMTSVAIFTPVSPSDHSFSTVSCNYATAPLLSSITVVHSSVAVPEQLKTKQSSGVQQCFVEFFDAVGEEVWLLTVSTQTKNVAFLSKPPAPELKVLRLMSTNSGRETCATVSLNYIDGPPLSLIATMVNRTVSKKIKKKGEAVVMWHLMRSHSLDTTDDGYTWSIYLLSNFYEVSTIEPAVPATIFSITSSDCCSLEPDGWTVMTVRFNYYWAPLLNAIRVSATLSVTAAIEYYTTWESTMTAPNFYFMEVVYKRFVGLYPRTWSLVVATPGLPYLMLDKIGTPVIAFTVPDSPPGILPGKRLYIVVQYQAGPVLNEIIPKSSNVQTCYQKELRQSEKQKQCTLSVVSVITSNYNEQSSTWSMLATTSSLVHVSVHQPKETLEEIWEGSPSIYFSLSKASQSKRTECSGSSSFTTVSLKYKGPEKLEAVFEHAEEDVMLYETFNEEREIHIHFGDFATPPKPPLWTLIMQVPFFSFSGSTSKETESLTSNNEYEMPLVSVFEDESEPAASASDWWYPQSTTEYDSSNFDPWGSSINGSPIEPTEETFHFLKEKGITDQSRMFSEEPFNGSQEEWNNKNLNVIRVSINQPPYPATRHQKPEHSVGSYEVLTSKNFTAESVLQFTINSYCMGPLWHNFVTKGKFFDKYLMTRFDKGEWYSINVRTFVNYKEPYHIEFLVGTEPSPKYDYISVSTVPLPKTLVTISRSPKTVVNGNVVVLTIIYYLGHYSAKFIVPPPLTSNFEEFEFFYNKSEGTIKVVYVSAPSIVDSAELSVVINNDDQVTVHFATANKFLVFLWLLPLLRRAGHIGTPLHGM\n+>sp|Q15256|PTPRR_HUMAN Receptor-type tyrosine-protein phosphatase R OS=Homo sapiens OX=9606 GN=PTPRR PE=1 SV=2\n+MRRAVCFPALCLLLNLHAAGCFSGNNDHFLAINQKKSGKPVFIYKHSQDIEKSLDIAPQKIYRHSYHSSSEAQVSKRHQIVNSAFPRPAYDPSLNLLAMDGQDLEVENLPIPAANVIVVTLQMDVNKLNITLLRIFRQGVAAALGLLPQQVHINRLIGKKNSIELFVSPINRKTGISDALPSEEVLRSLNINVLHQSLSQFGITEVSPEKNVLQGQHEADKIWSKEGFYAVVIFLSIFVIIVTCLMILYRLKERFQLSLRQDKEKNQEIHLSPITLQPALSEAKTVHSMVQPEQAPKVLNVVVDPQGRGAPEIKATTATSVCPSPFKMKPIGLQERRGSNVSLTLDMSSLGNIEPFVSIPTPREKVAMEYLQSASRILTRSQLRDVVASSHLLQSEFMEIPMNFVDPKEIDIPRHGTKNRYKTILPNPLSRVCLRPKNVTDSLSTYINANYIRGYSGKEKAFIATQGPMINTVDDFWQMVWQEDSPVIVMITKLKEKNEKCVLYWPEKRGIYGKVEVLVISVNECDNYTIRNLVLKQGSHTQHVKHYWYTSWPDHKTPDSAQPLLQLMLDVEEDRLASQGRGPVVVHCSAGIGRTGCFIATSIGCQQLKEEGVVDALSIVCQLRMDRGGMVQTSEQYEFVHHALCLYESRLSAETVQ\n+>DECOY_sp|Q15256|PTPRR_HUMAN Receptor-type tyrosine-protein phosphatase R OS=Homo sapiens OX=9606 GN=PTPRR PE=1 SV=2\n+QVTEASLRSEYLCLAHHVFEYQESTQVMGGRDMRLQCVISLADVVGEEKLQQCGISTAIFCGTRGIGASCHVVVPGRGQSALRDEEVDLMLQLLPQASDPTKHDPWSTYWYHKVHQTHSGQKLVLNRITYNDCENVSIVLVEVKGYIGRKEPWYLVCKENKEKLKTIMVIVPSDEQWVMQWFDDVTNIMPGQTAIFAKEKGSYGRIYNANIYTSLSDTVNKPRLCVRSLPNPLITKYRNKTGHRPIDIEKPDVFNMPIEMFESQLLHSSAVVDRLQSRTLIRSASQLYEMAVKERPTPISVFPEINGLSSMDLTLSVNSGRREQLGIPKMKFPSPCVSTATTAKIEPAGRGQPDVVVNLVKPAQEPQVMSHVTKAESLAPQLTIPSLHIEQNKEKDQRLSLQFREKLRYLIMLCTVIIVFIS'..b'STVFAM\n+>NM_000106|744367_2_460_548\n+MGPRGASRGASPSPPCATWAWARSRWSSG\n+>DECOY_NM_000106|744367_2_460_548\n+GSSWRSRAWAWTACPPSPSAGRSAGRPGM\n+>NM_002543|744389_2_565_653\n+MEKTVTYFPRAHLTGKRAKRSACLWMPSC\n+>DECOY_NM_002543|744389_2_565_653\n+CSPMWLCASRKARKGTLHARPFYTVTKEM\n+>NM_000690|744529_2_820_1010\n+MASPMSSPTWWIWTWSSNVSGIMPAGLISTTGKPSPLTETSSATHAMNLWGCAGRSFRGISRS\n+>DECOY_NM_000690|744529_2_820_1010\n+SRSIGRFSRGACGWLNMAHTASSTETLPSPKGTTSILGAPMIGSVNSSWTWIWWTPSSMPSAM\n+>NM_000761|744570_3_198_391\n+MGLALARACADPGEEPAPGTVKDEPALRGRPADPHWLHARAGAEPPGHHPAGPGAAGRRFQGPA\n+>DECOY_NM_000761|744570_3_198_391\n+APGQFRRGAAGPGAPHHGPPEAGARAHLWHPDAPRGRLAPEDKVTGPAPEEGPDACARALALGM\n+>NM_006018|744640_2_288_367\n+MALPCGFSVSTSSPGNPAGFSCSTWQ\n+>DECOY_NM_006018|744640_2_288_367\n+QWTSCSFGAPNGPSSTSVSFGCPLAM\n+>NM_006018|744641_2_666_1009\n+MALQMCASASASAIPSGGTKLCSSWSSSCPWASSCSAQPELSGACGRDKWTGMPRSREPSPSSWWWPSSLSSASFPAWLCGSTSSGSCTLRARRIVKCTARWTWRSLSLSASPT\n+>DECOY_NM_006018|744641_2_666_1009\n+TPSASLSLSRWTWRATCKVIRRARLTCSGSSTSGCLWAPFSASSLSSPWWWSSPSPERSRPMGTWKDRGCAGSLEPQASCSSAWPCSSSWSSCLKTGGSPIASASASACMQLAM\n+>NM_000578|744661_2_383_456\n+MVPSPARPARPAQGHSKHLPERPT\n+>DECOY_NM_000578|744661_2_383_456\n+TPREPLHKSHGQAPRAPRAPSPVM\n+>NM_000578|744669_3_513_730\n+MGLHGAWLPHEHCFPGPRKHRVRSSGWRRGGIQTSLGAALGHRVGLALPATGCTSGRGDRQGLGRGLPSLLP\n+>DECOY_NM_000578|744669_3_513_730\n+PLLSPLGRGLGQRDGRGSTCGTAPLALGVRHGLAAGLSTQIGGRRWGSSRVRHKRPGPFCHEHPLWAGHLGM\n+>NM_021245|744690_2_744_1078\n+MALISSTIWALGLELGVQVVPRARLAEEELLAQQGLVRQDQETRQAEKENISLCSRPIFPHGSEPWGLTPSKKWNLALTCWPMGPKLNFPNISPSTGRQCPMVDMRRPPNA\n+>DECOY_NM_021245|744690_2_744_1078\n+ANPPRRMDVMPCQRGTSPSINPFNLKPGMPWCTLALNWKKSPTLGWPESGHPFIPRSCLSINEKEAQRTEQDQRVLGQQALLEEEALRARPVVQVGLELGLAWITSSILAM\n+>NM_015088|744804_2_1547_1716\n+MGTMEKRERTPGKELLFRNQLGQKMTLGTTITGLRVGPGTLAPRTLMTTNGVKGTK\n+>DECOY_NM_015088|744804_2_1547_1716\n+KTGKVGNTTMLTRPALTGPGVRLGTITTGLTMKQGLQNRFLLEKGPTRERKEMTGM\n+>NM_015088|744805_2_1556_1716\n+MEKRERTPGKELLFRNQLGQKMTLGTTITGLRVGPGTLAPRTLMTTNGVKGTK\n+>DECOY_NM_015088|744805_2_1556_1716\n+KTGKVGNTTMLTRPALTGPGVRLGTITTGLTMKQGLQNRFLLEKGPTRERKEM\n+>NM_015088|744811_2_2450_2583\n+MVGGRKSIRQKTAIGKVLQVNLCLGGVKEGRMKSGLGVMVAMQA\n+>DECOY_NM_015088|744811_2_2450_2583\n+AQMAVMVGLGSKMRGEKVGGLCLNVQLVKGIATKQRISKRGGVM\n+>NM_015088|744838_3_2433_2509\n+MVFWKEWLGGGSRSDKKQQLGKFCK\n+>DECOY_NM_015088|744838_3_2433_2509\n+KCFKGLQQKKDSRSGGGLWEKWFVM\n+>NM_000958|744871_2_1811_1884\n+MALEAGICFQVCLAWAWPRKTPPH\n+>DECOY_NM_000958|744871_2_1811_1884\n+HPPTKRPWAWALCVQFCIGAELAM\n+>NM_004958|744986_2_4253_4359\n+MALFCWVRELPSAEHMPKHYTTKNWSSRKAPPLPF\n+>DECOY_NM_004958|744986_2_4253_4359\n+FPLPPAKRSSWNKTTYHKPMHEASPLERVWCFLAM\n+>NM_004958|744989_2_4745_4845\n+MGHFIELCWHCIRTSSPWHNSALTRPGTCWMLN\n+>DECOY_NM_004958|744989_2_4745_4845\n+NLMWCTGPRTLASNHWPSSTRICHWCLEIFHGM\n+>NM_004958|745002_3_3966_4102\n+MAETAEPGAAEGLIIALPALLLGPGTGLQPDGQGSLQCCICVLLV\n+>DECOY_NM_004958|745002_3_3966_4102\n+VLLVCICCQLSGQGDPQLGTGPGLLLAPLAIILGEAAGPEATEAM\n+>NM_006725|745106_2_758_906\n+MASGDQCAMTLGTWRTPTWCAGNWAAAGQSRPCPACTSRPAAGLSTGTR\n+>DECOY_NM_006725|745106_2_758_906\n+RTGTSLGAAPRSTCAPCPRSQGAAAWNGACWTPTRWTGLTMACQDGSAM\n+>NM_006725|745108_2_1214_1371\n+MGRSSPSPTAPGGSTTPTSAASRWQPGSSAQLPGVCTICPLPKSLQVFRQSL\n+>DECOY_NM_006725|745108_2_1214_1371\n+LSQRFVQLSKPLPCITCVGPLQASSGPQWRSAASTPTTSGGPATPSPSSRGM\n+>NM_001470|745249_3_2930_3024\n+MAVGGAGHHEDRVIDQQQRGGEVPAVGEGEP\n+>DECOY_NM_001470|745249_3_2930_3024\n+PEGEGVAPVEGGRQQQDIVRDEHHGAGGVAM\n+>NM_001042594|745506_2_1677_1771\n+MVQLKPQMSSYKIHHYMIQKSGPLKIKKDEK\n+>DECOY_NM_001042594|745506_2_1677_1771\n+KEDKKIKLPGSKQIMYHHIKYSSMQPKLQVM\n+>NM_007335|745575_2_2872_3071\n+MVPGATFLCMLRYRSPMCTYRAARWRLEISTWVCPRRQPSHLSMARSCLPSSTGASSSDTKQNSAW\n+>DECOY_NM_007335|745575_2_2872_3071\n+WASNQKTDSSSAGTSSPLCSRAMSLHSPQRRPCVWTSIELRWRAARYTCMPSRYRLMCLFTAGPVM\n+>NM_007335|745583_2_4612_4727\n+MGRARTTELLALARSRSVRRRQPQRTSSWCSKHRRTCW\n+>DECOY_NM_007335|745583_2_4612_4727\n+WCTRRHKSCWSSTRQPQRRRVSRSRALALLETTRARGM\n+>NM_007337|745641_3_5066_5163\n+MVVEGVLGEKSCTLRLRGQGSYDERYMLPHQP\n+>DECOY_NM_007337|745641_3_5066_5163\n+PQHPLMYREDYSGQGRLRLTCSKEGLVGEVVM\n'
b
diff -r 96cc79adfadb -r 8febc104e78c test-data/SequenceCoverageCalculator.idXML
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SequenceCoverageCalculator.idXML Wed May 15 08:02:33 2019 -0400
[
b'@@ -0,0 +1,161 @@\n+<?xml version="1.0" encoding="UTF-8"?>\n+<?xml-stylesheet type="text/xsl" href="https://www.openms.de/xml-stylesheet/IdXML.xsl" ?>\n+<IdXML version="1.5" xsi:noNamespaceSchemaLocation="https://www.openms.de/xml-schema/IdXML_1_5.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">\n+\t<SearchParameters id="SP_0" db="/home/eugen/Development/OpenMS/src/tests/topp/OpenPepXL_input.fasta" db_version="" taxonomy="" mass_type="monoisotopic" charges="3,4,5,6,7" enzyme="trypsin" missed_cleavages="2" precursor_peak_tolerance="10" precursor_peak_tolerance_ppm="true" peak_mass_tolerance="0.2" peak_mass_tolerance_ppm="false" >\n+\t\t\t\t<UserParam type="string" name="input_consensusXML" value="/home/eugen/Development/OpenMS/src/tests/topp/OpenPepXL_input.consensusXML"/>\n+\t\t\t\t<UserParam type="string" name="input_mzML" value="/home/eugen/Development/OpenMS/src/tests/topp/OpenPepXL_input.mzML"/>\n+\t\t\t\t<UserParam type="string" name="input_decoys" value=""/>\n+\t\t\t\t<UserParam type="int" name="decoy_prefix" value="0"/>\n+\t\t\t\t<UserParam type="string" name="decoy_string" value="decoy"/>\n+\t\t\t\t<UserParam type="string" name="out_xquest_specxml" value="OpenPepXL_output.spec.xml.tmp"/>\n+\t\t\t\t<UserParam type="int" name="precursor:min_charge" value="3"/>\n+\t\t\t\t<UserParam type="int" name="precursor:max_charge" value="7"/>\n+\t\t\t\t<UserParam type="float" name="fragment:mass_tolerance_xlinks" value="0.3"/>\n+\t\t\t\t<UserParam type="int" name="peptide:min_size" value="5"/>\n+\t\t\t\t<UserParam type="stringList" name="cross_link:residue1" value="[K]"/>\n+\t\t\t\t<UserParam type="stringList" name="cross_link:residue2" value="[K]"/>\n+\t\t\t\t<UserParam type="float" name="cross_link:mass" value="138.0680796"/>\n+\t\t\t\t<UserParam type="float" name="cross_link:mass_isoshift" value="12.075321"/>\n+\t\t\t\t<UserParam type="floatList" name="cross_link:mass_monolink" value="[156.07864431, 155.094628715]"/>\n+\t\t\t\t<UserParam type="string" name="cross_link:name" value="DSS"/>\n+\t\t\t\t<UserParam type="int" name="modifications:variable_max_per_peptide" value="2"/>\n+\t\t\t\t<UserParam type="int" name="MS:1001029" value="329"/>\n+\t</SearchParameters>\n+\t<IdentificationRun date="2018-03-06T13:21:04" search_engine="OpenXQuest" search_engine_version="2.3.0" search_parameters_ref="SP_0" >\n+\t\t<ProteinIdentification score_type="" higher_score_better="true" significance_threshold="0" >\n+\t\t\t<ProteinHit id="PH_0" accession="Protein3" score="0" sequence="" >\n+\t\t\t\t<UserParam type="string" name="target_decoy" value="target"/>\n+\t\t\t</ProteinHit>\n+\t\t\t<ProteinHit id="PH_1" accession="Protein2" score="0" sequence="" >\n+\t\t\t\t<UserParam type="string" name="target_decoy" value="target"/>\n+\t\t\t</ProteinHit>\n+\t\t\t<ProteinHit id="PH_2" accession="Protein1" score="0" sequence="" >\n+\t\t\t\t<UserParam type="string" name="target_decoy" value="target"/>\n+\t\t\t</ProteinHit>\n+\t\t\t<UserParam type="string" name="SpectrumIdentificationProtocol" value="MS:1002494"/>\n+\t\t</ProteinIdentification>\n+\t\t<PeptideIdentification score_type="OpenXQuest:combined score" higher_score_better="true" significance_threshold="0" MZ="728.411010742188" RT="2120.05249023438" spectrum_reference="spectrum=99,spectrum=48" >\n+\t\t\t<PeptideHit score="0.936605209529642" sequence="LAAPQLVQMFIGDGAK(Xlink:DSS)LVR" charge="3" aa_before="K" aa_after="D" start="249" end="267" protein_refs="PH_1" >\n+\t\t\t\t<UserParam type="string" name="fragment_annotation" value="1004.99096679688,0.334201395511627,2,&quot;[alpha|xi$b18]&quot;"/>\n+\t\t\t\t<UserParam type="string" name="spectrum_reference" value="spectrum=99"/>\n+\t\t\t\t<UserParam type="string" name="target_decoy" value="target"/>\n+\t\t\t\t<UserParam type="int" name="spectrum_index" value="81"/>\n+\t\t\t\t<UserParam type="float" name="xl_mass" value="156.07864431"/>\n+\t\t\t\t<UserParam type="string" name="xl_chain" value="MS:1002509"/>\n+\t\t\t\t<UserParam type="int" name="xl_pos" value="15"/>\n+\t\t\t\t<UserParam type="string" name="xl_type" value="mono-link"/>\n+\t\t\t\t<UserParam type="int" name="xl_rank" value="2"/>\n+\t\t\t\t<UserParam type="string" name="xl_term_spec" '..b'="unique"/>\n+\t\t\t</PeptideHit>\n+\t\t</PeptideIdentification>\n+\t\t<PeptideIdentification score_type="OpenXQuest:combined score" higher_score_better="true" significance_threshold="0" MZ="656.050903320313" RT="2228.50292963107" spectrum_reference="spectrum=238,spectrum=219" >\n+\t\t\t<PeptideHit score="30.0056519661123" sequence="EVIELPLK(Xlink:DSS)NPELFLR" charge="3" aa_before="R" aa_after="V" start="145" end="159" protein_refs="PH_2" >\n+\t\t\t\t<UserParam type="string" name="fragment_annotation" value="387.815673828125,0.215708956122398,2,&quot;[alpha|ci$y6]&quot;|435.218322753906,0.0836383178830147,1,&quot;[alpha|ci$y3]&quot;|444.946411132812,0.119465403258801,2,&quot;[alpha|ci$y7]&quot;|471.207763671875,0.061412189155817,1,&quot;[alpha|ci$b4]&quot;|584.244018554688,0.203754380345345,1,&quot;[alpha|ci$b5]&quot;|692.068786621094,1,2,&quot;[alpha|xi$y10]&quot;|748.578674316406,0.292433172464371,2,&quot;[alpha|xi$y11]&quot;|774.424987792969,0.361789405345917,1,&quot;[alpha|ci$y6]&quot;|812.867004394531,0.152033895254135,2,&quot;[alpha|xi$y12]&quot;|839.981567382812,0.125570505857468,2,&quot;[alpha|xi$b13]&quot;|888.39013671875,0.356793075799942,1,&quot;[alpha|ci$y7]&quot;|896.316162109375,0.0581442341208458,2,&quot;[alpha|xi$b14]&quot;"/>\n+\t\t\t\t<UserParam type="string" name="spectrum_reference" value="spectrum=238"/>\n+\t\t\t\t<UserParam type="string" name="target_decoy" value="target"/>\n+\t\t\t\t<UserParam type="int" name="spectrum_index" value="195"/>\n+\t\t\t\t<UserParam type="float" name="xl_mass" value="156.07864431"/>\n+\t\t\t\t<UserParam type="string" name="xl_chain" value="MS:1002509"/>\n+\t\t\t\t<UserParam type="int" name="xl_pos" value="7"/>\n+\t\t\t\t<UserParam type="string" name="xl_type" value="mono-link"/>\n+\t\t\t\t<UserParam type="int" name="xl_rank" value="1"/>\n+\t\t\t\t<UserParam type="string" name="xl_term_spec" value="ANYWHERE"/>\n+\t\t\t\t<UserParam type="float" name="spec_heavy_RT" value="2221.23388666232"/>\n+\t\t\t\t<UserParam type="float" name="spec_heavy_MZ" value="660.072448730469"/>\n+\t\t\t\t<UserParam type="string" name="spectrum_reference_heavy" value="spectrum=219"/>\n+\t\t\t\t<UserParam type="int" name="spectrum_index_heavy" value="180"/>\n+\t\t\t\t<UserParam type="float" name="OMS:precursor_mz_error_ppm" value="6.2016465793789"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:xcorr xlink" value="0.389891893192958"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:xcorr common" value="0.462351176037813"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:match-odds" value="2.59304858228082"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:intsum" value="3.03074353560805"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:wTIC" value="0.672367841921044"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:TIC" value="0.672367841921044"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:prescore" value="0.107142858207226"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:log_occupancy" value="5.62904796479063"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:log_occupancy_alpha" value="5.62904796479063"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:log_occupancy_beta" value="0"/>\n+\t\t\t\t<UserParam type="float" name="OpenXQuest:log_occupancy_full_spec" value="5.33167976719043"/>\n+\t\t\t\t<UserParam type="float" name="HyperCommon" value="8.15707933298946"/>\n+\t\t\t\t<UserParam type="float" name="HyperXlink" value="4.59543196178458"/>\n+\t\t\t\t<UserParam type="float" name="HyperAlpha" value="17.3945020752073"/>\n+\t\t\t\t<UserParam type="float" name="HyperBeta" value="0"/>\n+\t\t\t\t<UserParam type="float" name="HyperBoth" value="17.3945020752073"/>\n+\t\t\t\t<UserParam type="int" name="matched_xlink_alpha" value="5"/>\n+\t\t\t\t<UserParam type="int" name="matched_xlink_beta" value="0"/>\n+\t\t\t\t<UserParam type="int" name="matched_common_alpha" value="7"/>\n+\t\t\t\t<UserParam type="int" name="matched_common_beta" value="0"/>\n+\t\t\t\t<UserParam type="string" name="selected" value="false"/>\n+\t\t\t\t<UserParam type="string" name="protein_references" value="unique"/>\n+\t\t\t</PeptideHit>\n+\t\t</PeptideIdentification>\n+\t</IdentificationRun>\n+</IdXML>\n'
b
diff -r 96cc79adfadb -r 8febc104e78c test-data/SequenceCoverageCalculator.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/SequenceCoverageCalculator.tsv Wed May 15 08:02:33 2019 -0400
b
@@ -0,0 +1,4 @@
+#protein coverage% #unique hits
+Protein1 3.86598 1
+Protein2 4.3379 1
+Protein3 3.14961 1