Repository 'pepquery'
hg clone https://toolshed.g2.bx.psu.edu/repos/galaxyp/pepquery

Changeset 0:32d3818fc635 (2020-01-22)
Next changeset 1:528bcf03e561 (2020-07-17)
Commit message:
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pepquery commit 9764eee2801a65462d26b919bfaea6e3ae0cce7a"
added:
macros.xml
pepquery.xml
test-data/Uniprot.fasta
test-data/iTRAQ_f4.mgf
test-data/novel_peptides
test-data/novel_proteins.fa
b
diff -r 000000000000 -r 32d3818fc635 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Wed Jan 22 17:12:37 2020 -0500
b
b'@@ -0,0 +1,164 @@\n+ <macros>\n+    <token name="@VERSION@">1.3.0</token>\n+    <xml name="modifications">\n+        <option value="1">HexNAc of T (203.07937251951) modaa</option>\n+        <option value="2">HexNAc of S (203.07937251951) modaa</option>\n+        <option value="3">Propionyl of peptide N-term light (56.02621474784) modn_peptide</option>\n+        <option value="4">iTRAQ 8-plex of K (304.19903946116) modaa</option>\n+        <option value="5">Dimethylation of K 2H(6) 13C(2) (36.075670278260006) modaa</option>\n+        <!--<option value="6">Carbamidomethylation of C (57.02146372057) modaa</option>-->\n+        <option value="7">Dioxidation of M (31.98982923912) modaa</option>\n+        <option value="8">Guanidination of K (42.02179807374) modaa</option>\n+        <option value="9">S-nitrosylation (28.99016359229) modaa</option>\n+        <option value="10">TMT 6-plex of K (229.16293213472) modaa</option>\n+        <option value="11">Propionyl of peptide N-term heavy (59.03627926124) modn_peptide</option>\n+        <option value="12">Formylation of protein N-term (27.99491461956) modn_protein</option>\n+        <option value="13">TMT 10-plex of K+6 (235.18306116152) modaa</option>\n+        <option value="14">TMT 11-plex of K+6 (235.18306116152) modaa</option>\n+        <option value="15">TMT 10-plex of K+8 (237.17713094832) modaa</option>\n+        <option value="16">TMT 11-plex of K+4 (233.18803911764) modaa</option>\n+        <option value="17">Diiodination of Y (251.79329593586) modaa</option>\n+        <option value="18">TMT 10-plex of K+4 (233.18803911764) modaa</option>\n+        <option value="19">ICPL10 of peptide N-term (115.06669973029) modn_peptide</option>\n+        <option value="20">Amidation of the protein C-term (-0.9840155826899988) modc_protein</option>\n+        <option value="21">Acetylation of peptide N-term (42.0105646837) modn_peptide</option>\n+        <option value="22">Palmitoylation of protein N-term (238.22966558166) modn_protein</option>\n+        <option value="23">ICAT-O (227.12699141827) modaa</option>\n+        <option value="24">TMT 2-plex of peptide N-term (225.15583272792) modn_peptide</option>\n+        <option value="25">TMT 11-plex of K+8 (237.17713094832) modaa</option>\n+        <option value="26">Label of K 2H(4) (4.025106982920001) modaa</option>\n+        <option value="27">Isoleucine 13C(6) 15N(1) (7.017163920200005) modaa</option>\n+        <option value="28">Arginine 13C(6) (6.020129026799992) modaa</option>\n+        <option value="29">Acetylation of K (42.0105646837) modaa</option>\n+        <option value="30">Dimethylation of peptide N-term 2H(6) 13C(2) (36.075670278260006) modn_peptide</option>\n+        <option value="31">ICPL0 of K (105.02146372057) modaa</option>\n+        <option value="32">Trimethylation of protein N-term A (42.04695019242) modn_protein</option>\n+        <option value="33">Thioacyl of peptide N-term (87.99828574784) modn_peptide</option>\n+        <option value="34">Trideuterated Methyl Ester of R (17.034480301330003) modaa</option>\n+        <option value="35">ICPL4 of peptide N-term (109.04657070349) modn_peptide</option>\n+        <option value="36">4-Hydroxyproline (83.07349929343) modaa</option>\n+        <option value="37">Hex(5) HexNAc(4) NeuAc(2) of N (2204.77244018348) modaa</option>\n+        <option value="38">Palmitoylation of T (238.22966558166) modaa</option>\n+        <option value="39">Palmitoylation of S (238.22966558166) modaa</option>\n+        <option value="40">Methylation of K (14.01565006414) modaa</option>\n+        <option value="41">Hexose of K (162.0528234185) modaa</option>\n+        <option value="42">Methylation of E (14.01565006414) modaa</option>\n+        <option value="43">Lysine 13C(6) (6.020129026799992) modaa</option>\n+        <option value="44">Palmitoylation of K (238.22966558166) modaa</option>\n+        <option value="45">Methylation of C (14.01565006414) modaa</option>\n+        <option value="46">Sodium adduct to D (21.98194424883) modaa</optio'..b'ide N-term (71.03711378471) modn_peptide</option>\n+        <option value="114">Phosphorylation of Y (79.96633052074999) modaa</option>\n+        <option value="115">Didehydro of T (-2.01565006414) modaa</option>\n+        <option value="116">Oxidation of K (15.99491461956) modaa</option>\n+        <!--<option value="117">Oxidation of M (15.99491461956) modaa</option>-->\n+        <option value="118">Phosphorylation of T (79.96633052074999) modaa</option>\n+        <option value="119">Oxidation of P (15.99491461956) modaa</option>\n+        <option value="120">Trioxidation of C (47.98474385868) modaa</option>\n+        <option value="121">Dimethylation of K 2H(6) (34.068960602660006) modaa</option>\n+        <option value="122">NIPCAM of C (99.06841391299) modaa</option>\n+        <option value="123">Methylthio of N (45.987721064140004) modaa</option>\n+        <option value="124">Trideuterated Methyl Ester of K (17.034480301330003) modaa</option>\n+        <option value="125">Heme B of C (616.17729182368) modaa</option>\n+        <option value="126">Trideuterated Methyl Ester of E (17.034480301330003) modaa</option>\n+        <option value="127">Methylthio of C (45.987721064140004) modaa</option>\n+        <option value="128">Phosphorylation of S (79.96633052074999) modaa</option>\n+        <option value="129">Methylthio of D (45.987721064140004) modaa</option>\n+        <option value="130">Heme B of H (616.17729182368) modaa</option>\n+        <option value="131">Trideuterated Methyl Ester of D (17.034480301330003) modaa</option>\n+        <option value="132">ICPL10 of K (115.06669973029) modaa</option>\n+        <option value="133">Deamidation of N 18O (2.9882619631300003) modaa</option>\n+        <option value="134">Dimethylation of peptide N-term 2H(6) (34.068960602660006) modn_peptide</option>\n+        <option value="135">Pyridylethyl of C (105.05784922929) modaa</option>\n+        <option value="136">Arginine 13C(6) 15N(4) (10.008268600400015) modaa</option>\n+        <option value="137">Trideuterated Methyl Ester of peptide C-term (17.034480301330003) modn_peptide</option>\n+        <option value="138">Hex(1)NAc(1) of S (365.13219593801) modaa</option>\n+        <option value="139">TMT 6-plex of K+8 (237.17713094832) modaa</option>\n+        <option value="140">Hex(1)NAc(1) of T (365.13219593801) modaa</option>\n+        <option value="141">TMT 6-plex of K+6 (235.18306116152) modaa</option>\n+        <option value="142">Propionyl of K heavy (59.03627926124) modaa</option>\n+        <option value="143">Citrullination of R (0.9840155826899988) modaa</option>\n+        <option value="144">TMT 6-plex of K+4 (233.18803911764) modaa</option>\n+        <option value="145">mTRAQ of peptide N-term 13C(3) 15N (144.1020624208) modn_peptide</option>\n+        <option value="146">ICPL0 of peptide N-term (105.02146372057) modn_peptide</option>\n+        <option value="147">Geranyl-geranyl of C (272.25040102624) modaa</option>\n+        <option value="148">Lysine 13C(6) 15N(2) (8.014198813600004) modaa</option>\n+        <option value="149">Homoserine lactone of peptide C-term M (-48.00337112828) modcaa_peptide</option>\n+        <option value="150">Carbamidomethylation of E (57.02146372057) modaa</option>\n+        <option value="151">Dimethylation of K 2H(4) (32.0564071112) modaa</option>\n+        <option value="152">Glutathione of C (305.06815621281) modaa</option>\n+        <option value="153">Carbamidomethylation of K (57.02146372057) modaa</option>\n+        <option value="154">iodoTMT zero of C (324.21614077584) modaa</option>\n+        <option value="155">Lipoyl of K (188.0329570044) modaa</option>\n+        <option value="156">Leucine 13C(6) 15N(1) (7.017163920200005) modaa</option>\n+        <option value="157">iTRAQ 4-plex of peptide N-term (144.1020624208) modn_peptide</option>\n+        <option value="158">Deamidation of Q (0.9840155826899988) modaa</option>\n+        <option value="159">Deamidation of N (0.9840155826899988) modaa</option>\n+    </xml>\n+</macros>\n'
b
diff -r 000000000000 -r 32d3818fc635 pepquery.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pepquery.xml Wed Jan 22 17:12:37 2020 -0500
[
b'@@ -0,0 +1,456 @@\n+<tool id="pepquery" name="PepQuery" version="@VERSION@.0">\n+    <description>Peptide-centric search engine for novel peptide identification and validation.</description>\n+    <macros>\n+        <import>macros.xml</import>\n+    </macros>\n+    <requirements> \n+        <requirement type="package" version="@VERSION@">pepquery</requirement>\n+    </requirements>\n+    <stdio>\n+        <exit_code range="1:" level="fatal" description="Failed" />\n+        <regex match="Exception"\n+               source="stderr"\n+               level="fatal"\n+               description="java Exception" />\n+    </stdio>\n+    <command><![CDATA[\n+            #import re\n+            #set $spectrum_file = $re.sub(\'\\s\',\'_\',$re.sub(\'[.][^.]*$\',\'\',$req_inputs.spectrum_file.display_name.split(\'/\')[-1])) + ".mgf"\n+            #set $db_file = $re.sub(\'\\s\',\'_\',$re.sub(\'[.][^.]*$\',\'\',$req_inputs.db_file.display_name.split(\'/\')[-1])) + ".fa"\n+            ln -s \'$req_inputs.spectrum_file\' \'$spectrum_file\' &&\n+            ln -s \'$req_inputs.db_file\' \'$db_file\' &&\n+            pepquery \n+                -ms \'$spectrum_file\'\n+                -db \'$db_file\'\n+                #if $req_inputs.input_type.input_type_selector == \'peptide\'\n+                    -pep \'$req_inputs.input_type.multiple.input\'\n+                #else\n+                    -t \'$req_inputs.input_type.input_type_selector\'\n+                    #if int(str($req_inputs.input_type.input_type_selector)) == 1\n+                        -i \'$req_inputs.input_type.multiple.input\'\n+                    #else\n+                        -i \'$req_inputs.input_type.input\'\n+                        #if int(str($req_inputs.input_type.input_type_selector)) == 2\n+                            #if $req_inputs.input_type.frame == \'None\'\n+                                -f \'0\'\n+                            #else\n+                                -f \'$req_inputs.input_type.frame\'\n+                            #end if\n+                        #else\n+                            -anno \'$req_inputs.input_type.anno\'\n+                        #end if\n+                    #end if\n+                #end if\n+                #if $modifications.fixed_mod\n+                -fixMod \'$modifications.fixed_mod\'\n+                #end if\n+                #if $modifications.var_mod\n+                -varMod \'$modifications.var_mod\'\n+                #end if\n+                -maxVar \'$modifications.max_mods\'\n+                $modifications.unmodified\n+                $modifications.aa\n+                -tol \'$ms_params.tolerance_params.precursor_tolerance\'\n+                -tolu \'$ms_params.tolerance_params.precursor_unit\'\n+                -itol \'$ms_params.tolerance_params.tolerance\'\n+                -e \'$ms_params.digestion.enzyme\'\n+                -fragmentMethod \'$ms_params.search.frag_method\'\n+                -m \'$ms_params.search.scoring_method\'\n+                -maxCharge \'$ms_params.search.max_charge\'\n+                -minCharge \'$ms_params.search.min_charge\'\n+                -minPeaks \'$ms_params.search.min_peaks\'\n+                -minScore \'$ms_params.search.min_score\'\n+                -maxLength \'$ms_params.search.max_length\'\n+                -n \'$ms_params.search.num_random_peptides\'\n+            -o pepquery_output\n+            | sed \'s/No valid peptide/Error: No valid peptide/\' | tee >(cat 1>&2)\n+            #if $report_spectrum_file:\n+                && for f in pepquery_output/psm.txt pepquery_output/psm_annotation.txt pepquery_output/detail.txt pepquery_output/psm_rank.txt; do if [ -e \\${f} ]; then cp \\${f} \\${f}.orig; awk \'BEGIN{FS="\\t"; OFS="\\t"; stc = 0;}; NR==1{for (i = 1; i <= NF; i++) {if (\\$i == "spectrum_title") stc = i;}}; NR==1{if (stc>0){\\$stc = "spectrum_file" FS \\$stc}; print}; NR>1{if (stc>0){\\$stc = "$spectrum_file" FS \\$stc}; print}\' \\${f}.orig > \\${f};fi;done\n+            #end if\n+        ]]>\n+    </command>\n+    <inputs>\n+        <section name="req_inputs" title="Input Data" expanded="true">\n+            <condit'..b'</section> \n+            <section name="modifications">\n+                 <param name="fixed_mod" value="6,103,157"/>\n+                 <param name="var_mod" value="117"/>\n+                 <param name="max_mods" value="3"/>\n+                 <param name="unmodified" value="False"/>\n+                 <param name="aa" value="True"/>\n+            </section>\n+            <section name="ms_params">\n+                <section name="tolerance_params">\n+                    <param name="precursor_tolerance" value="10"/>\n+                    <param name="precursor_unit" value="ppm"/>\n+                    <param name="tolerance" value="0.6"/>\n+                </section>\n+                <section name="digestion">\n+                    <param name="enzyme" value="0"/>\n+                    <param name="max_missed_cleavages" value="2"/>\n+                </section>\n+                <section name="search">\n+                    <param name="frag_method" value="1"/>\n+                    <param name="scoring_method" value="1"/>\n+                    <param name="max_charge" value="3"/>\n+                    <param name="min_charge" value="2"/>\n+                    <param name="min_peaks" value="10"/>\n+                    <param name="min_score" value="12"/>\n+                    <param name="max_length" value="45"/>\n+                    <param name="num_random_peptides" value="1000"/>\n+                </section>\n+            </section>\n+            <param name="report_spectrum_file" value="true"/>\n+            <output name="psm_rank_txt">\n+                <assert_contents>\n+                    <has_text text="ELGSSDLTAR" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+\n+    </tests>\n+    <help><![CDATA[\n+PepQuery is a peptide-centric search engine for novel peptide identification and validation. Cancer genomics studies have identified a large number of genomic alterations that may lead to novel, cancer-specific protein sequences. Proteins resulted from these genomic alterations are attractive candidates for cancer biomarkers and therapeutic targets. The leading approach to proteomic validation of genomic alterations is to analyze tandem mass spectrometry (MS/MS) data using customized proteomics databases created from genomics data. Such analysis is time-consuming and requires thorough training and detailed knowledge in proteomics data analysis, leading to a gap between MS/MS data and the cancer genomics community. PepQuery does not require customized databases and allows quick and easy proteomic validation of genomic alterations.\n+\n+**Inputs**\n+    - A sequence to match, one of the following:\n+\n+      - A peptide string or a history dataset with a list of peptides \n+      - A protein string or a history dataset with a protein fasta \n+      - A DNA string that is at least 60 base pairs in length\n+\n+    - A mass spectrometry MGF file \n+    - A reference protein fasta database, peptides matching a reference sequence will be excluded.  \n+\n+**Outputs**\n+    - PSM annotation - tabular with columns: \n+      peptide Query calc_mr observed_mz charge pepSeq m_label m_mz m_intensity mz intensity\n+    - Detail - tabular with columns: \n+      *report_spectrum_file* spectrum_title peptide modification pep_mass score\n+    - PSM - tabular with columns: \n+      peptide modification n *report_spectrum_file* spectrum_title charge exp_mass ppm pep_mass mz score n_db total_db n_random total_random pvalue\n+    - PSM Rank - tabular with columns: \n+      peptide modification n *report_spectrum_file* spectrum_title charge exp_mass ppm pep_mass mz score n_db total_db n_random total_random pvalue rank *n_ptm*\n+    - An MGF with the best matching spectrums\n+\n+    The *report_spectrum_file* is an optional field that can be added.  \n+    The *n_ptm* field is added when using unrestricted modification searching (-um).\n+    \n+\n+    ]]></help>\n+    <citations>\n+        <citation type="doi">10.1101/gr.235028.118</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r 32d3818fc635 test-data/Uniprot.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Uniprot.fasta Wed Jan 22 17:12:37 2020 -0500
b
b'@@ -0,0 +1,20 @@\n+>sp|Q8C4J7|TBL3_MOUSE Transducin beta-like protein 3 OS=Mus musculus GN=Tbl3 PE=2 SV=1\n+MAETAAGLCRFKANYAVERKIEPFYKGGKAQLDQTGHYLFCVCGTKVNILDVASGALLRSLEQEDQEDITSFDLSPDDEVLVTASRALLLAQWAWREGTVTRLWKAIHTAPVASMAFDATSTLLATGGCDGAVRVWDIVQHYGTHHFRGSPGVVHLVAFHPDPTRLLLFSSAVDTSIRVWSLQDRSCLAVLTAHYSAVTSLSFSEGGHTMLSSGRDKICIVWDLQSYQTTRTVPVFESVEASVLLPEQPAPALGVKSSGLHFLTAGDQGILRVWEAASGQCVYTQPQMPGLRQELTHCTLARAADLLLTVTADHNLLLYEAHSLQLQKQFAGYSEEVLDVRFLGPSDSHIVVASNSPCLKVFELQTLACQILHGHTDIVLALDVFRKGWLFASCAKDQSIRIWKMNKAGQVACVAQGSGHTHSVGTICCSRLKESFLVTGSQDCTVKLWPLPEALLAKSTAADSGPVLLQAQTTRRCHDKDINSLAVSPNDKLLATGSQDRTAKLWALPQCQLLGVFTGHRRGLWNVQFSPTDQVLATASADGTIKLWALQDFSCLKTFEGHDASVLKVAFVSRGSQLLSSGSDGLLKLWTIKSNECVRTLDAHEDKVWGLHCSQLDDHAITGGSDSRIILWKDVTEAEQAEEQAKREEQVIKQQELDNLLHEKRYLRALGLAISLDRPHTVLTVIQAIRRDPEACEKLEATVLRLRRDQKEALLRFCVTWNTNSRHCHEAQAVLGVLLRHEAPEELLAYDGVRGSLEALLPYTERHFQRLSRTLQAATFLDFLWHNMKLSPCPAAAPPAL\n+>tr|Q80ZP8|Q80ZP8_MOUSE Armet protein OS=Mus musculus GN=Manf PE=1 SV=1\n+METNNYLPSPPSFPVCISYLGRFYQDLKDRDVTFSPATIEEELIKFCREARGKENRLCYYIGATDDAATKIINEVSKPLAHHIPVEKICEKLKKKDSQICELKYDKQIDLSTVDLKKLRVKELKKILDDWGEMCKGCAEKSDYIRKINELMPKYAPKAASARTDL\n+>sp|Q60847|COCA1_MOUSE Collagen alpha-1(XII) chain OS=Mus musculus GN=Col12a1 PE=2 SV=3\n+MQTRLPRALAALGVALLLSSIEAEVDPPSDLNFKIIDENTVHMSWERPVDPIVGYRITVDPTTDGPTKEFTLAASTTETLLSDLIPETQYVVTITSYNEVEESVPVIGQLTIQTGGPTKPGEKKPGKTEIQKCSVSAWTDLVFLVDGSWSVGRNNFKYILDFIVALVSAFDIGEEKTRVGVVQYSSDTRTEFNLNQYYRREDLLAAVKKIPYKGGNTMTGDAIDYLVKNTFTESAGSRAGFPKVAIIITDGKSQDEVEIPARELRNIGVEVFSLGIKAADAKELKQIASTPSLNHVFNVANFDAIVDIQNEIISQVCSGVDEQLGELVSGEEVIEPPSNLVVTELSSKYIRLSWDPSPSAVTGYKILLTPMAAGSRHHALSVGPQTTTLNVRDLTADTEYQISVFAMKGLTSSEPTSVMEKTQPMKVQVECSRGVDIKADIVFLVDGSYSIGIANFVKVRAFLEVLAKSFEISPNRVQISLVQYSRDPHTEFTLKEFNRVEDIIKAINTFPYRGGSTNTGKAMTYVREKIFVPNKGSRSNVPKVMILITDGKSSDAFRDPAIKLRNSDVEIFAVGVKDAVRSELEAIASPPAETHVFTVEDFDAFQRISFELTQSICLRIEQELAAIKKKAYVPPKDLRFTQVTANSFKAEWSPPGDNVFSYHVTYKDANGDDEVTVVEPASSTSVVLNNLRPETLYLVNVTAEYEDGFSVPITGEETTAEVKGVPRNLKVTDETTDSFKLTWSQAPGRVLRYRIRYRPVSGGESKEVSTPANQRRKTLENLTPDTKYEISVIAEYSSGPGSPLTGNAATEEVRGNPRDLRVSDATTSTLKLSWSRAPGKVKQYLVTYTPAAGGETQEVTVRGDTTTTMLRKLKEGTQYDLSVTALYASGAGEALSGKGSTLEERGSPQNLVTKDITDTSIGAYWTSAPGMVRGYRVSWKSLYDDIEAGETTLPGDAIHTMIENLQPETKYKISVFATYSSGEGEPVTGDATTELSQDSKILRVDEETEHTMRVTWKAAPGKVVNYRVVYRPQGGGRQMVAKVPPTVTSTVLKRLQPQTTYDITVLPMYKTGEGKLRQGSGTTASRFKSPRNLKTSDPTMSSFRVTWEPAPGEVKGYKVTFHPTGDDRRLGELVLGPYDNTVVLEELRAGTTYRVNVFGMFDGGESLPLVGQEMTTLSDTTVTPFLSSGMDCLTRAEADIVLLVDGSWSIGRANFRTVRSFISRIVEVFEIGPKRVQIALAQYSGDPRTEWQLNAHRDKKSLLQAVANLPYKGGNTLTGMALNFIRQQSFKTQAGMRPRARKIGVLITDGKSQDDVEAPSKKLKDEGVELFAIGIKNADEVELKMIATDPDDTHAYNVADFESLSKIVDDLTINLCNSVKGPGDLEAPTNLVISERTHRSFRVSWTPPSDSVDRYKVEYYPVSGGKRQEFYVSRLDTSTVLKDLKPETDYVVNVYSVVEDEYSEPLKGTEKTLPVPVVSLNIYDVGPTTMHVQWQPVGGATGYTVSYQPTRSPEGTKPKEMRVGPTVNDVQLTGLLPNTEYEVTVQAVLYDLTSEPAKAREVTLPLPRPQDVKLRDVTHSTMNVVWEPVLGKVRKYIVRYKTPDEEFKEVEVDRSRASTILKDLSSQTQYTVSVSAVYDEGTSPPATAYDTTRRVPAPTNLQFTEVTPESFRGTWDHGASDVSLYRITWAPVGNPDKMETILNGDENTLVFENLNPNTPYEVSITAIYPDESESEDLSGTERTLRLIPLTTQAPKSGPRNLQVYNATSNSLTVKWDPASGRVQKYRITYQPSTGEGNEQTITVGGRQNSVLLQKLKPDTPYTITVYSQYPDGEGGRMTGRGKTKPLNTVRNLRVYDPSTSSLSVRWDHAEGNPRQYKLFYAPTSGGPEELVPIPGNTNYAILRNLQPDTPYTITVVPVYTEGDGGRTSDTGRTLVRGLARNIQVYNPTPNSLDVRWDPAPGPVQQYRIVYSPVAGTRPSESIVVPGNTRTVHLERLIPDTPYSVNIVALYSDGEGNPSPSQGRTLPRSGPRNIRVFGETTNSLSVAWDHADGPVQQYRIIYSPTVGDPIDEYTTVPGRRNNVILQPLQPDTPYKITVIAIYEDGDGGHLTGNGRTVGLLPPQNIHIFDEWYTRFRVSWDPSPSPVLGYKIVYKPVGSNEPMEAFVGEVTSYTLHNLNPSTTYDVSVYAQYDSGLSVPLTDQGTTLYLNVTDLKTYQVGWDTFCVKWSPHRAATSYRLKLSPADGTRGQEITVRGSETSHCFTGLSPEAEYGVTVFVQTPNLEGPGVPIKEQTTVKPTEAPTEPPTPSPPPTIPPARDVCKGAKADIVFLTDASWSIGDDNFNKVVKFIFNTVGAFDEVNPAGIQVSFVQYSDEVKSEFKLNTYNDKALALGALQNIRYRGGNTRTGKALTFIKEKVLTWESGMRKNVPKVLVVVTDGRSQDEVKKAAFVIQQSGFSVFVVGVADVDYNELANIASKPSERHVFIVDDFESFEKIEDNLITFVCETATSSCPLIYLDGYTSPGFKMLEAYNLTEKNFASVQGVSLESGSFPSYSAYRLQKNAFINQPTAELHPNGLPPSYTIILLFRLLPETPSDPFAIWQITDRDYRPQVGVIADPSSKTLSFFNKDTRGEVQTVTFDTDEVKTLFYGSFHKVHIVVTSKSVKIYIDCYEIIEKDIKEAGNITTDGYEILGKLLKGERKSATFQIQSFDIVCSPVWTSRDRCCDIPSRRDEAKCPALPNACTCTQDSVGPPGPPGPAGGPG'..b'GPPGPRGDTGPPGPQGPPGPQGPNGLSIPGEQGRQGMKGDAGEPGLPGRTGTPGLPGPPGPMGPPGDRGFTGKDGAMGPRGPPGPPGSPGSPGVTGPSGKPGKPGDHGRPGQSGLKGEKGDRGDIASQNMMRAVARQVCEQLISGQMSRFNQMLNQIPNDYHSSRNQPGPPGPPGPPGSAGARGEPGPGGRPGFPGTPGMQGPPGERGLPGEKGERGTGSQGPRGPPGPPGPQGESRTGPPGSTGSRGPPGPPGRPGNSGIRGPPGPPGYCDSSQCASIPYNGQGYPEPYVPEGGAYLPEREPFIVPVEPERTAEYEDDYGADEPDQQHPDHMRWRRALRPGPAE\n+>tr|D6RFR8|D6RFR8_MOUSE Choline/ethanolaminephosphotransferase 1 OS=Mus musculus GN=Cept1 PE=4 SV=1\n+MSGHRSTRKRCGDSHPESPVGFGHMSTTGHLCGLILPVLVAFSFTSLWMP\n+>tr|Q91VK2|Q91VK2_MOUSE Eef1d protein OS=Mus musculus GN=Eef1d PE=1 SV=1\n+MATNFLAHEKIWFDKFKYDDAERRFYEQMNGPVTSGSRQLKVMLPNSPEALGQATPGTSSGPGASSGPGGDHSELIVRITSLEVENQNLRGVVQDLQQAISKLEARLSSLEKSSPTPRATAPQTQHVSPMRQVEPPTKKGATPAEDDEDKDIDLFGSDEEEEDKEAARLREERLRQYAEKKAKKPTLVAKSSILLDVKPWDDETDMAQLETCVRSIQLDGLVWGASKLVPVGYGIRKLQIQCVVEDDKVGTDLLEEEITKFEEHVQSVDIAAFNKI\n+>tr|F6WN43|F6WN43_MOUSE Iron-sulfur cluster co-chaperone protein HscB, mitochondrial (Fragment) OS=Mus musculus GN=Hscb PE=1 SV=1\n+XQKSQTEKHFSDKHSTLVNDAYKTLQAPLTRGLYLVS\n+>sp|Q6NXK2|ZN532_MOUSE Zinc finger protein 532 OS=Mus musculus GN=Znf532 PE=1 SV=1\n+MTMGDMKTPDFDDLLAAFDIPDMVDPKAAIESGHDDHESHIKQNAHVDDDSHTPSSSDVGVSVIVKNVRNIDSSEGVEKDGHNPTGNGLHNGFLTASSLDSYGKDGAKSLKGDTPASEVTLKDPAFSQFSPISSAEEFEDDEKIEVDDPPDKEEARAGFRSNVLTGSAPQQDFDKLKALGGENSSKTGVSTSGHTDKNKVKREAESNSITLSVYEPFKVRKAEDKLKENSEKMLESRVLDGKPSSEKSDSGIAAAASSKTKPSSKLSSCIAAIAALSAKKAASDSCKEPVANSREASPLPKEVNDSPKAADKSPESQNLIDGTKKASLKPSDSPRSVSSENSSKGSPSSPVGSTPAIPKVRIKTIKTSSGEIKRTVTRVLPEVDLDSGKKPSEQAASVMASVTSLLSSSASATVLSSPPRAPLQTAMVTSAVSSAELTPKQVTIKPVATAFLPVSAVKTAGSQVINLKLANNTTVKATVISAASVQSASSAIIKAANAIQQQTVVVPASSLANAKLVPKTVHLANLNLLPQGAQATSELRQVLTKPQQQIKQAIINAAASQPPKKVSRVQVVSSLQSSVVEAFNKVLSSVNPVPVYTPNLSPPANAGITLPMRGYKCLECGDAFALEKSLSQHYDRRSVRIEVTCNHCTKNLVFYNKCSLLSHARGHKEKGVVMQCSHLILKPVPADQMIVPPSSNTAASTLQSSVGAATHTVPKVQPGIAGAVISAPASTPMSPAMPLDEDPSKLCRHSLKCLECNEVFQDEPSLATHFQHAADTSGQQMKKHPCRQCDKSFSSSHSLCRHNRIKHKGIRKVYACSHCPDSRRTFTKRLMLERHIQLMHGIKDPDVKELSDDAGDVTNDEEEEAEIKEDAKVPSPKRKLEEPVLEFRPPRGAITQPLKKLKINVFKVHKCAVCGFTTENLLQFHEHIPQHRSDGSSHQCRECGLCYTSHGSLARHLFIVHKLKEPQPVSKQNGAGEDSQQENKPSPEDEAAEGAASDRKCKVCAKTFETEAALNTHMRTHGMAFIKSKRMSSAEK\n+>sp|P58006|SESN1_MOUSE Sestrin-1 OS=Mus musculus GN=Sesn1 PE=1 SV=3\n+MRLAAASNEAYAASLAVSELLSCHQCGGDRGQDEELGIRIPRPLGHGPSRFIPEKEMLQVGSEDAQMHALFADSFAALGRLDNITLVMVFHPQYLESFLKTQHYLLQMDGPLPLHYRHYIGIMAAARHQCSYLVNLHVSDFLHVGGDPKWLNGLENAPQKLQNLGELNKVLAHRPWLITKEHIEGLLKAEEHSWSLAELVHAVVLLTHYHSLASFTFGCGISPEIHCDGGHTFRPPSVSNYCICDITNGNHSVDEMQVNSAGNASVSDSFFEVEALMEKMRQLQECREEEEASQEEMASRFEMEKRESMFVFSSDDDEVTPARDVSRHFEDTSYGYKDFSRHGMHVPTFRVQDYCWEDHGYSLVNRLYPDVGQLIDEKFHIAYNLTYNTMAMHKDVDTSMLRRAIWNYIHCMFGIRYDDYDYGEINQLLDRSFKVYIKTVVCTPEKVTKRMYDSFWRQFKHSEKVHVNLLLIEARMQAELLYALRAITRYMT\n+>tr|A0A1B0GSE5|A0A1B0GSE5_MOUSE Ubiquitin carboxyl-terminal hydrolase CYLD OS=Mus musculus GN=Cyld PE=1 SV=1\n+MSSGLWSQEKVTSPYWEERIFYLLLQECSVTDKQTQKLLKVPKGSIGQYIQDRSVGHSRVPSTKGKKNQIGLKILEQPHAVLFVDEKDVVEINEKFTELLLAITNCEERLSLFRNRLRLSKGLQVDVGSPVKVQLRSGEEKFPGVVRFRGPLLAERTVSGIFFGVELLEEGRGQGFTDGVYQGKQLFQCDEDCGVFVALDKLELIEDDDNGLESDFAGPGDTMQVEPPPLEINSRVSLKVGESTESGTVIFCDVLPGKESLGYFVGVDMDNPIGNWDGRFDGVQLCSFASVESTILLHINDIIPDSVTQERRPPKLAFMSRGVGDKGSSSHNKPKVTGSTSDPGSRNRSELFYTLNGSSVDSQQSKSKNPWYIDEAFGGYLSEVVEENTPPKMEKEGLEIMIGKKKGIQGHYNSCYLDSTLFCLFAFSSALDTVLLRPKEKNDIEYYSETQELLRTEIVNPLRIYGYVCATKIMKLRKILEKVEAASGFTSEEKDPEEFLNILFHDILRVEPLLKIRSAGQKVQDCNFYQIFMEKNEKVGVPTIQQLLEWSFINSNLKFAEAPSCLIIQMPRFGKDFKLFKKIFPSLELNITDLLEDTPRQCRICGGLAMYECRECYDDPDISAGKIKQFCKTCSTQVHLHPRRLNHSYHPVSLPKDLPDWDWRHGCIPCQKMELFAVLCIETSHYVAFVKYGKDDSAWLFFDSMADRDGGQNGFNIPQVTPCPEVGEYLKMSLEDLHSLDSRRIQGCARRLLCDAYMCMYQSPTMSLYK\n+>sp|Q60880|OL141_MOUSE Olfactory receptor 141 OS=Mus musculus GN=Olfr141 PE=3 SV=2\n+MRNITEATFFVLKGLTDNNELQIILFLLFLAIYIFTLIGNVGLIILVVGDSQLHNPMYCFLSVLSSVDACYSTDITPNMLVGFMSKSKIISFYGCATQMFLAVTFGTTECFLLAAMAYDRYVAIHDPLLYAVSMSPRVYIPLIIASYAGGIVHAIIHTVATFSLSFCRSNEVKHIFCDIPPLLAISCSETYVNELLLFFFVSFIELVTILIVLVSYAFILLSILKMNSSEGRRKVFSTCGAHLTAVSIYYGTILFMYVRPSSNYSLEHDMIVSTFYTIGIPMLNPIIYSLRNKDVKEAMKRVLRKKINIKHRIKKLNDFSVFLMP\n'
b
diff -r 000000000000 -r 32d3818fc635 test-data/iTRAQ_f4.mgf
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/iTRAQ_f4.mgf Wed Jan 22 17:12:37 2020 -0500
b
b'@@ -0,0 +1,753 @@\n+COM=Conversion to mascot generic\n+CHARGE=2+ and 3+\n+BEGIN IONS\n+PEPMASS=740.462952\n+CHARGE=3+\n+SCANS=277\n+TITLE=Mo_Tai_iTRAQ_f4.00277.00277.3\n+111.017494 85.198502\n+112.240982 97.204994\n+114.110542 168.274780\n+115.107582 369.265503\n+115.650398 84.005325\n+117.296082 86.641182\n+117.595589 82.974037\n+120.657990 81.247025\n+123.208023 88.939392\n+126.714996 76.470490\n+126.995316 81.651260\n+129.369080 82.580246\n+129.410400 90.817963\n+132.734039 124.614685\n+143.307465 87.847046\n+178.966888 94.961578\n+179.972855 89.878273\n+183.256714 104.533241\n+188.080093 123.015953\n+212.102753 442.314178\n+213.104324 84.621696\n+233.001801 107.842552\n+244.242355 102.163200\n+281.079132 97.696259\n+291.216034 243.588058\n+309.183807 104.681831\n+325.188324 282.187622\n+345.226288 167.099777\n+347.629883 88.744476\n+357.377533 105.747673\n+388.266266 1321.324341\n+430.317047 94.113419\n+458.310547 310.033203\n+489.807434 100.114334\n+490.700165 99.780800\n+501.355072 164.809708\n+517.545532 119.780251\n+529.344849 395.354340\n+529.599487 100.128220\n+535.189514 101.317970\n+657.405518 110.483780\n+699.552063 101.576553\n+736.234497 130.142654\n+752.286011 111.876389\n+756.181641 93.424118\n+771.447876 505.619843\n+776.288391 100.766472\n+804.153748 99.673531\n+840.959473 131.680695\n+870.515198 596.371582\n+871.524719 224.175797\n+938.853760 112.032951\n+983.596008 538.290894\n+985.851929 109.672272\n+999.183228 110.428818\n+1006.435791 110.520103\n+1098.597168 117.997414\n+1102.095825 108.774673\n+1154.624023 115.541344\n+1179.853027 115.300293\n+1188.129639 112.100708\n+1328.742920 120.317390\n+1535.381592 114.408516\n+1578.387207 112.895355\n+1696.229492 122.310112\n+1865.684937 119.336388\n+1891.318359 108.014870\n+END IONS\n+BEGIN IONS\n+PEPMASS=596.816895\n+CHARGE=2+\n+SCANS=3419\n+TITLE=Mo_Tai_iTRAQ_f4.03419.03419.2\n+112.086548 2060.700684\n+113.070908 591.413391\n+114.110237 56227.753906\n+115.107254 40715.632813\n+116.110573 1253.002075\n+124.087646 334.579498\n+129.065491 335.658569\n+129.682831 322.525909\n+130.096924 432.119598\n+135.059509 290.456573\n+135.933548 316.717255\n+141.158234 324.162384\n+143.102081 1042.657227\n+143.108337 991.155334\n+145.106369 4993.616699\n+145.112579 9580.018555\n+146.116013 348.826385\n+151.422028 339.654999\n+152.189240 352.970459\n+155.080811 320.076233\n+156.112503 802.023926\n+157.107681 821.452209\n+158.092041 3474.361572\n+163.116852 649.046387\n+163.123260 573.363525\n+163.860764 368.470825\n+172.107132 547.236511\n+173.091751 424.643494\n+173.713470 325.451202\n+174.132874 1181.682129\n+174.138977 1457.565674\n+175.118500 19209.625000\n+176.121841 895.593079\n+180.310822 302.979675\n+185.090881 577.591370\n+185.163925 667.431763\n+189.086395 384.102600\n+193.535583 299.085999\n+198.085999 304.650818\n+202.127197 776.504089\n+202.133865 1047.345337\n+205.158951 325.465912\n+206.438217 407.963379\n+215.112625 944.689087\n+216.097275 349.972809\n+216.178452 628.929138\n+216.187073 1542.270264\n+218.158371 710.665344\n+219.997772 493.154785\n+226.117889 345.579254\n+227.530579 380.890015\n+229.117340 398.719696\n+231.095993 553.909363\n+232.139694 11610.142578\n+233.142776 541.624878\n+240.132904 413.648132\n+242.112793 354.861603\n+242.148621 757.952148\n+244.147949 340.507446\n+244.181870 1394.464355\n+244.207031 351.863098\n+246.153458 2635.449219\n+246.159882 2076.769775\n+255.143784 947.695679\n+260.123077 377.649994\n+261.163208 358.264160\n+268.138641 800.600464\n+269.122375 665.910522\n+269.220001 322.280029\n+271.175507 2574.782715\n+271.523804 335.527252\n+272.170105 1414.468018\n+274.152344 1745.901855\n+279.726440 358.126434\n+286.150787 2142.241211\n+287.134125 399.345337\n+287.153320 381.299835\n+288.201630 1045.604614\n+289.161011 642.731201\n+297.189941 313.076477\n+303.175964 1238.757568\n+304.159454 718.974304\n+309.070404 557.175232\n+309.572754 389.136902\n+315.128113 438.987366\n+315.165070 394.897858\n+318.127045 852.624146\n+329.155304 359.383026\n+330.138916 745.477112\n+330.163727 587.573120\n+332.788940 345.351257\n+341.179504 329.149353\n+343.196503 430.196503\n+345.225616 879.687500\n+346.184479 34'..b'08055.3\n+110.070747 1733.929932\n+112.086494 691.703613\n+114.110207 40570.949219\n+115.107246 29732.470703\n+116.110382 1231.719482\n+116.696106 632.937317\n+117.730354 471.628967\n+124.086235 1593.573853\n+127.085846 719.726074\n+127.191467 500.313782\n+129.064926 511.926025\n+129.129395 1360.861450\n+136.075058 651.347961\n+137.249298 489.442627\n+143.102310 528.153137\n+143.108459 983.870300\n+145.106354 5602.307617\n+145.112564 9344.262695\n+155.080536 649.520813\n+158.091660 1477.552002\n+162.132690 594.649658\n+162.139435 534.015381\n+173.417542 600.326477\n+175.118454 8973.086914\n+176.120972 555.123108\n+178.371323 511.018158\n+183.148560 607.971863\n+186.086960 652.429138\n+188.148453 816.252747\n+188.154907 1523.843628\n+199.106628 571.695862\n+206.436493 1035.268555\n+212.269272 591.934021\n+214.144180 900.448364\n+216.150284 2525.229248\n+227.101578 976.558044\n+230.201385 1142.054443\n+242.148682 1397.006470\n+243.107559 677.465942\n+243.133698 1371.145142\n+244.138000 891.661987\n+244.180954 1242.425537\n+246.153366 2297.038574\n+254.730103 532.118347\n+255.144455 987.050415\n+256.688324 533.629761\n+257.158569 875.375305\n+272.170959 2529.128906\n+272.908997 502.400146\n+274.186554 13132.186523\n+287.221313 1091.025513\n+291.214172 2984.217773\n+300.154175 1022.979675\n+308.174438 1182.723389\n+311.169250 569.517273\n+315.217712 1153.479858\n+320.089233 1019.081665\n+329.195770 1409.046875\n+330.298096 523.613220\n+331.206055 940.368896\n+345.223816 1784.856079\n+348.235840 683.953247\n+348.581970 560.229858\n+368.226654 641.918640\n+371.281464 720.593628\n+372.184631 537.945129\n+378.248566 1106.098633\n+379.243256 531.171021\n+386.216095 1908.169556\n+388.270569 1083.493164\n+399.715698 3641.003906\n+402.287842 531.509766\n+407.745789 883.040344\n+407.965881 569.346497\n+412.240295 632.917664\n+415.698425 1032.095337\n+428.227112 4306.428711\n+428.261108 1225.675537\n+428.725769 881.870422\n+429.260712 3728.557373\n+429.697418 1331.391235\n+445.288818 567.610901\n+448.071899 583.037292\n+455.416718 647.968628\n+464.268341 654.572754\n+470.773743 1128.713501\n+472.250061 3780.671875\n+473.252808 1594.682739\n+473.278687 2412.803467\n+481.325806 1259.738159\n+484.769775 1385.499390\n+485.271332 613.115601\n+491.173309 585.062622\n+491.329742 911.582214\n+497.316864 671.196655\n+499.275757 1172.343262\n+499.337067 1123.132935\n+500.283569 1279.582520\n+500.335480 1331.533203\n+501.255157 671.378113\n+506.350983 526.060120\n+515.294067 595.644592\n+520.279419 564.501221\n+528.327942 1453.140137\n+528.796143 1023.938416\n+530.344971 699.704163\n+534.306091 632.479187\n+556.821533 578.409241\n+557.308167 996.385559\n+562.368835 1061.597290\n+562.630188 599.749634\n+563.383240 597.036011\n+572.329102 704.391174\n+575.329651 597.594116\n+578.266602 569.538757\n+584.834595 1397.603638\n+585.341125 596.237427\n+585.828064 634.964417\n+592.849548 948.947998\n+593.338867 8331.064453\n+593.809753 1243.525757\n+593.850098 3570.665283\n+594.347534 5469.266113\n+601.328064 1251.846069\n+612.421753 868.523682\n+615.363525 1159.816772\n+635.785217 692.349304\n+645.406799 926.912354\n+647.354187 1344.844971\n+660.368103 1515.965698\n+667.035950 629.281006\n+678.553528 864.705811\n+686.401672 1415.408081\n+687.406799 956.555176\n+701.317932 638.253906\n+713.436462 577.175781\n+714.438171 979.025330\n+721.332031 1133.021240\n+734.864807 659.578308\n+758.495483 1101.013306\n+759.436768 1680.954834\n+814.488220 766.761841\n+825.458008 2101.567139\n+873.476807 3603.638672\n+876.500122 1146.794800\n+877.493591 2904.569092\n+878.501526 2154.676025\n+879.500305 1179.630981\n+896.207703 648.875244\n+914.434082 1090.736938\n+914.517822 1977.191284\n+928.515015 1886.298706\n+930.495483 1311.800903\n+952.429504 625.323181\n+965.766296 606.633911\n+972.546692 2437.769531\n+999.546753 632.162537\n+1026.609131 980.280151\n+1040.573853 1742.042114\n+1041.564087 3165.200439\n+1042.581787 3063.955566\n+1043.577515 6683.765625\n+1045.518311 945.700500\n+1147.621704 832.881104\n+1148.635742 1159.247681\n+1467.104980 653.335815\n+1533.474243 661.718018\n+1620.307251 616.238525\n+1664.717529 648.275024\n+END IONS\n'
b
diff -r 000000000000 -r 32d3818fc635 test-data/novel_peptides
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/novel_peptides Wed Jan 22 17:12:37 2020 -0500
b
@@ -0,0 +1,3 @@
+DSGASGSILEASAAR
+ELGSSDLTAR
+SPYREFTDHLVK
b
diff -r 000000000000 -r 32d3818fc635 test-data/novel_proteins.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/novel_proteins.fa Wed Jan 22 17:12:37 2020 -0500
b
@@ -0,0 +1,6 @@
+>DSGASGSILEASAAR
+DSGASGSILEASAAR
+>ELGSSDLTAR
+ELGSSDLTAR
+>SPYREFTDHLVK
+SPYREFTDHLVK