# HG changeset patch
# User galaxyp
# Date 1596795451 14400
# Node ID 6b226c5907a1cac11d5238abc439edc832a81c12
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fastg2protlib commit e777bdb1d28b1ffee75cb1a8ad782a50c10a5358"
diff -r 000000000000 -r 6b226c5907a1 app_validate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/app_validate.py Fri Aug 07 06:17:31 2020 -0400
@@ -0,0 +1,32 @@
+import argparse
+
+import fastg2protlib.fastg2protlib as fg
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Run peptides for fastg")
+ parser.add_argument("msgf", help="Path MSGF+ tabular results.")
+ parser.add_argument(
+ "-d",
+ "--dbname",
+ default="results.db",
+ help="Name for the results database. Defaults to results.db",
+ )
+ parser.add_argument(
+ "-f",
+ "--fdr",
+ default=0.10,
+ type=float,
+ help="FDR cutoff for accepting PSM validation.",
+ )
+ parser.add_argument(
+ "-x",
+ "--decoy_header",
+ default="XXX_",
+ help="String used for marking decoy proteins.",
+ )
+
+ args = parser.parse_args()
+ fg.verified_proteins(
+ args.msgf, fdr_level=0.10, decoy_header="XXX_", db_name=args.dbname
+ )
diff -r 000000000000 -r 6b226c5907a1 application.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/application.py Fri Aug 07 06:17:31 2020 -0400
@@ -0,0 +1,88 @@
+import argparse
+
+import fastg2protlib.fastg2protlib as fg
+
+expasy_rules = [
+ "arg-c",
+ "asp-n",
+ "bnps-skatole",
+ "caspase 1",
+ "caspase 2",
+ "caspase 3",
+ "caspase 4",
+ "caspase 5",
+ "caspase 6",
+ "caspase 7",
+ "caspase 8",
+ "caspase 9",
+ "caspase 10",
+ "chymotrypsin high specificity",
+ "chymotrypsin low specificity",
+ "clostripain",
+ "cnbr",
+ "enterokinase",
+ "factor xa",
+ "formic acid",
+ "glutamyl endopeptidase",
+ "granzyme b",
+ "hydroxylamine",
+ "iodosobenzoic acid",
+ "lysc",
+ "ntcb",
+ "pepsin ph1.3",
+ "pepsin ph2.0",
+ "proline endopeptidase",
+ "proteinase k",
+ "staphylococcal peptidase i",
+ "thermolysin",
+ "thrombin",
+ "trypsin",
+ "trypsin_exception",
+]
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Run peptides for fastg")
+ parser.add_argument("fastg", help="Path to Spades formatted FASTG.")
+ parser.add_argument(
+ "-d",
+ "--dbname",
+ default="results.db",
+ help="Name for the results database. Defaults to results.db",
+ )
+ parser.add_argument(
+ "-c",
+ "--cleavage",
+ default="trypsin",
+ help="Cleavage rule from ExPASy cleavage rules. Defaults to trypsin.",
+ )
+ parser.add_argument(
+ "-p",
+ "--min_protein_length",
+ default=55,
+ type=int,
+ help="Minimum protein length in number of amino acids. Defaults to 55.",
+ )
+ parser.add_argument(
+ "-m",
+ "--min_peptide_length",
+ default=8,
+ type=int,
+ help="Minimum peptide length in amino acids. Defaults to eight.",
+ )
+ parser.add_argument(
+ "-l", "--plots", default=True, type=bool, help="Generate diagnostic plots.",
+ )
+
+ args = parser.parse_args()
+
+ print(args)
+
+ fg.peptides_for_fastg(
+ fastg_filename=args.fastg,
+ db_name=args.dbname,
+ cleavage=args.cleavage,
+ min_protein_length=(args.min_protein_length * 3),
+ min_peptide_length=args.min_peptide_length,
+ create_plots=args.plots,
+ )
diff -r 000000000000 -r 6b226c5907a1 fastg2protlib-peptides.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastg2protlib-peptides.xml Fri Aug 07 06:17:31 2020 -0400
@@ -0,0 +1,59 @@
+
+
+ macros.xml
+
+ Generate FASTA from FASTG
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ show_plots == True
+
+
+ show_plots == True
+
+
+ show_plots == True
+
+
+ show_plots == True
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 6b226c5907a1 fastg2protlib-validate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastg2protlib-validate.xml Fri Aug 07 06:17:31 2020 -0400
@@ -0,0 +1,41 @@
+
+
+ macros.xml
+
+
+ Validate a candidate protein library
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 6b226c5907a1 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Fri Aug 07 06:17:31 2020 -0400
@@ -0,0 +1,29 @@
+
+ 1.0.2
+
+
+
+
+
+
+
+
+
+
+
+ fastg2protlib
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 6b226c5907a1 test-data/mgf_tst.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mgf_tst.tab Fri Aug 07 06:17:31 2020 -0400
@@ -0,0 +1,25 @@
+#SpecFile SpecID ScanNum FragMethod Precursor IsotopeError PrecursorError(ppm) Charge Peptide Protein DeNovoScore MSGFScore SpecEValue EValue QValue PepQValue
+wendt005_mickela_20200214_17647_12_V.mzML index=8575 -1 CID 501.26144 1 14.785407 3 +42.011IFLPFSTHSR+0.984 Pep_1|Protein_1(pre=-,post=-) 74 36 2.7071892E-10 2.855757E-4 0.0 0.0
+wendt005_mickela_20200214_17647_12_V.mzML index=10628 -1 CID 631.3283 0 16.628782 2 RTVWSN+0.984GTSPR Pep_2|Protein_1_29(pre=-,post=P) 61 36 6.954425E-10 7.203602E-4 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=6020 -1 CID 607.79266 1 -11.606342 2 +42.011AQ+0.984YWLSQFK Pep_3|Protein_1_28(pre=-,post=-) 23 10 9.667708E-10 9.6076715E-4 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=11469 -1 CID 470.6093 1 -7.4827867 3 RLLLQ+0.984C+57.021PRVPR Pep_4|Protein_2(pre=-,post=L) 68 35 1.2193706E-9 0.0012630607 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=15017 -1 CID 799.7483 0 2.747454 3 YFM+15.995YSIQYILIFYVQYVK Pep_5|Protein_2_29(pre=-,post=-) 2 -17 2.587433E-9 0.0029598887 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=16951 -1 CID 424.5418 1 -19.301939 3 RC+57.021GPLQASEPR Pep_6|Protein_4_16_31_32(pre=-,post=E) 69 41 3.636947E-9 0.0037672587 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=34154 -1 CID 768.88916 1 13.068233 2 +42.011STPVELEFSQ+0.984VEK Pep_7|Protein_5_34(pre=-,post=-) 77 33 4.083382E-9 0.0043801093 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=12592 -1 CID 701.0163 0 -5.6593018 3 YQSTPNIYYILYMYIR Pep_8|Protein_5_6_34_40(pre=-,post=-) 77 19 6.9333055E-9 0.0077557205 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=12620 -1 CID 485.91306 0 -10.551063 3 M+15.995SGIITN+0.984EISVFK Pep_9|Protein_7_9_22(pre=-,post=-) 55 28 7.184936E-9 0.007707044 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=5150 -1 CID 479.9201 0 -11.509454 3 YFEGKPVIEEVK Pep_10|Protein_7_22(pre=-,post=-) 87 44 7.371949E-9 0.007776514 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=26411 -1 CID 708.3275 0 -15.423832 2 PAQ+0.984PTGTRPC+57.021SSR Pep_11|Protein_8_21(pre=R,post=-) 41 15 7.913002E-9 0.008488016 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=22717 -1 CID 783.3668 0 11.843052 2 +42.011EEQDTFAVNSQQK XXX_Pep_22060|Protein_2878(pre=-,post=-) 135 39 7.923481E-9 0.008499257 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=8020 -1 CID 629.31287 0 13.772342 2 +42.011FQEPQQPWR XXX_Pep_16062|Protein_2307(pre=-,post=-) 31 14 8.15928E-9 0.008108611 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=20351 -1 CID 497.26334 0 0.79782444 2 LVPASGMYR XXX_Pep_9843|Protein_1629(pre=-,post=-) 28 16 8.401295E-9 0.0083491225 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=18867 -1 CID 559.29803 1 19.583662 3 +42.011LIGTATSVDEAIAN+0.984EK XXX_Pep_14112|Protein_2085(pre=-,post=-) 57 21 8.928303E-9 0.009987362 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=43084 -1 CID 653.8416 1 0.048319984 2 +42.011YSNYILYTVK XXX_Pep_3426|Protein_702(pre=-,post=-) 13 0 9.61208E-9 0.009760836 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=43084 -1 CID 653.8416 0 14.842637 2 +42.011YSN+0.984YILYTVK XXX_Pep_3426|Protein_702(pre=-,post=-) 13 0 9.61208E-9 0.009760836 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=32362 -1 CID 1036.4833 0 5.6531625 3 AWIGMQ+0.984WNGIEWNAM+15.995EWIQLEWNGK XXX_Pep_14712|Protein_2168(pre=-,post=-) 13 -31 1.0415514E-8 0.012581826 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=10370 -1 CID 461.22098 0 0.13233389 2 N+0.984DTQMLAK XXX_Pep_7493|Protein_1335_1348(pre=-,post=-) 83 51 1.0437349E-8 0.0101322755 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=21871 -1 CID 523.7984 0 1.3982916 2 +42.011AYVLNISPK XXX_Pep_31555|Protein_3895(pre=-,post=-) 86 43 1.0667454E-8 0.010601209 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=15561 -1 CID 581.9749 0 17.304827 3 GLDWDLAADLEGN+0.984IIK XXX_Pep_17726|Protein_2472(pre=-,post=-) 107 48 1.0803276E-8 0.012084738 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=22491 -1 CID 522.26483 0 -13.790032 2 +42.011Q+0.984LEAVQ+0.984VGR XXX_Pep_12397|Protein_1871(pre=-,post=-) 46 31 1.09720055E-8 0.01090387 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=9573 -1 CID 623.81696 0 -13.893293 2 +42.011RHALDGPWPR XXX_Pep_17806|Protein_2476(pre=-,post=Q) 28 13 1.1109479E-8 0.011281409 0.088495575 0.0952381
+wendt005_mickela_20200214_17647_12_V.mzML index=16000 -1 CID 381.54922 1 13.824602 3 +42.011N+0.984TYLSFLIK XXX_Pep_29178|Protein_3650(pre=-,post=-) 66 42 1.1131118E-8 0.011061994 0.088495575 0.0952381
diff -r 000000000000 -r 6b226c5907a1 test-data/tst_valid.db
Binary file test-data/tst_valid.db has changed
diff -r 000000000000 -r 6b226c5907a1 test-data/two.fastg
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/two.fastg Fri Aug 07 06:17:31 2020 -0400
@@ -0,0 +1,40 @@
+>EDGE_1_length_84_cov_1.0:EDGE_3_length_84_cov_1.0;
+CGTTATTCGCGCCCACTCTCCCATTTATCCGCGCAAGCGGATGCGATGCGATTGCCCGCTAAGATATTCTTACCATTCTCGACA
+>EDGE_1_length_84_cov_1.0';
+TGTCGAGAATGGTAAGAATATCTTAGCGGGCAATCGCATCGCATCCGCTTGCGCGGATAAATGGGAGAGTGGGCGCGAATAACG
+>EDGE_2_length_84_cov_1.0:EDGE_3_length_84_cov_1.0;
+CTGGTCCTGTTGACTACAATGGGCCCAACTCAATCACAGCTCGAGCGCCTTGAATAACATACTCATCTCTATACATTCTCGACA
+>EDGE_2_length_84_cov_1.0':EDGE_3_length_84_cov_1.0';
+TGTCGAGAATGTATAGAGATGAGTATGTTATTCAAGGCGCTCGAGCTGTGATTGAGTTGGGCCCATTGTAGTCAACAGGACCAG
+>EDGE_3_length_84_cov_1.0:EDGE_2_length_84_cov_1.0,EDGE_4_length_84_cov_1.0;
+CATTCTCGACATGCTGAGCTGAGACGGCGTCGATGCATAGCGGACTTTCGGTCAGTCGCAATTCCTCACGAGACTGGTCCTGTT
+>EDGE_3_length_84_cov_1.0':EDGE_2_length_84_cov_1.0',EDGE_1_length_84_cov_1.0';
+AACAGGACCAGTCTCGTGAGGAATTGCGACTGACCGAAAGTCCGCTATGCATCGACGCCGTCTCAGCTCAGCATGTCGAGAATG
+>EDGE_4_length_84_cov_1.0:EDGE_5_length_84_cov_1.0;
+CTGGTCCTGTTACAGAGCTGGCGTACGCGTTGAACACTTCACAGATGATAGGGATTCGGGTAAAGAGCGTGTCATTGGGGGCTT
+>EDGE_4_length_84_cov_1.0':EDGE_3_length_84_cov_1.0';
+AAGCCCCCAATGACACGCTCTTTACCCGAATCCCTATCATCTGTGAAGTGTTCAACGCGTACGCCAGCTCTGTAACAGGACCAG
+>EDGE_5_length_84_cov_1.0;
+ATTGGGGGCTTCATACATAGAGCAAGGGCGTCGAACGGTCGTGAAAGTCTTAGTACCGCACGTACCAACTTACTGAGGATATTG
+>EDGE_5_length_84_cov_1.0':EDGE_4_length_84_cov_1.0',EDGE_6_length_84_cov_1.0';
+CAATATCCTCAGTAAGTTGGTACGTGCGGTACTAAGACTTTCACGACCGTTCGACGCCCTTGCTCTATGTATGAAGCCCCCAAT
+>EDGE_6_length_84_cov_1.0:EDGE_5_length_84_cov_1.0;
+AAGAGGCCGCCACCGTTTTAGGGGGGGAAGGTTGAAGATCTCCTCTTCTCATGACTGAACTCGCGAGGGCCGTATTGGGGGCTT
+>EDGE_6_length_84_cov_1.0':EDGE_8_length_84_cov_1.0';
+AAGCCCCCAATACGGCCCTCGCGAGTTCAGTCATGAGAAGAGGAGATCTTCAACCTTCCCCCCCTAAAACGGTGGCGGCCTCTT
+>EDGE_7_length_84_cov_1.0:EDGE_8_length_84_cov_1.0;
+AAGAGGCCGCCAAAGAACAAAGGCTTACTGTGCGCAGAGGAACGCCCATTTAGCGGCTGGCGTTTTGAATCCTTTTAATATTGT
+>EDGE_7_length_84_cov_1.0':EDGE_8_length_84_cov_1.0';
+ACAATATTAAAAGGATTCAAAACGCCAGCCGCTAAATGGGCGTTCCTCTGCGCACAGTAAGCCTTTGTTCTTTGGCGGCCTCTT
+>EDGE_8_length_84_cov_1.0:EDGE_7_length_84_cov_1.0,EDGE_6_length_84_cov_1.0;
+TTTAATATTGTTTAATCCAATTCCCTCATTTAGGACCCTACCAAGTCAACATTGGTATATGAATGCGACCTCGAAGAGGCCGCC
+>EDGE_8_length_84_cov_1.0':EDGE_7_length_84_cov_1.0',EDGE_9_length_84_cov_1.0';
+GGCGGCCTCTTCGAGGTCGCATTCATATACCAATGTTGACTTGGTAGGGTCCTAAATGAGGGAATTGGATTAAACAATATTAAA
+>EDGE_9_length_84_cov_1.0:EDGE_8_length_84_cov_1.0;
+TAAAAATGACAGTGGTTGGTGCTCTAAACTTCATTTGGTTAACTCGTGTATCAGCGCGATAGGCTGTTAGAGGTTTAATATTGT
+>EDGE_9_length_84_cov_1.0';
+ACAATATTAAACCTCTAACAGCCTATCGCGCTGATACACGAGTTAACCAAATGAAGTTTAGAGCACCAACCACTGTCATTTTTA
+>EDGE_10_length_84_cov_1.0;
+ATGGCAAGGTACTTCCGGTCTTAATGAATGGCCGGGAAAGGTACGCACGCGGTATGGGGGGGTGAAGGGGCGAATAGACAGGCT
+>EDGE_10_length_84_cov_1.0':EDGE_10_length_84_cov_1.0;
+AGCCTGTCTATTCGCCCCTTCACCCCCCCATACCGCGTGCGTACCTTTCCCGGCCATTCATTAAGACCGGAAGTACCTTGCCAT