# HG changeset patch # User galaxyp # Date 1596795451 14400 # Node ID 6b226c5907a1cac11d5238abc439edc832a81c12 "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/fastg2protlib commit e777bdb1d28b1ffee75cb1a8ad782a50c10a5358" diff -r 000000000000 -r 6b226c5907a1 app_validate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/app_validate.py Fri Aug 07 06:17:31 2020 -0400 @@ -0,0 +1,32 @@ +import argparse + +import fastg2protlib.fastg2protlib as fg + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run peptides for fastg") + parser.add_argument("msgf", help="Path MSGF+ tabular results.") + parser.add_argument( + "-d", + "--dbname", + default="results.db", + help="Name for the results database. Defaults to results.db", + ) + parser.add_argument( + "-f", + "--fdr", + default=0.10, + type=float, + help="FDR cutoff for accepting PSM validation.", + ) + parser.add_argument( + "-x", + "--decoy_header", + default="XXX_", + help="String used for marking decoy proteins.", + ) + + args = parser.parse_args() + fg.verified_proteins( + args.msgf, fdr_level=0.10, decoy_header="XXX_", db_name=args.dbname + ) diff -r 000000000000 -r 6b226c5907a1 application.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/application.py Fri Aug 07 06:17:31 2020 -0400 @@ -0,0 +1,88 @@ +import argparse + +import fastg2protlib.fastg2protlib as fg + +expasy_rules = [ + "arg-c", + "asp-n", + "bnps-skatole", + "caspase 1", + "caspase 2", + "caspase 3", + "caspase 4", + "caspase 5", + "caspase 6", + "caspase 7", + "caspase 8", + "caspase 9", + "caspase 10", + "chymotrypsin high specificity", + "chymotrypsin low specificity", + "clostripain", + "cnbr", + "enterokinase", + "factor xa", + "formic acid", + "glutamyl endopeptidase", + "granzyme b", + "hydroxylamine", + "iodosobenzoic acid", + "lysc", + "ntcb", + "pepsin ph1.3", + "pepsin ph2.0", + "proline endopeptidase", + "proteinase k", + "staphylococcal peptidase i", + "thermolysin", + "thrombin", + "trypsin", + "trypsin_exception", +] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Run peptides for fastg") + parser.add_argument("fastg", help="Path to Spades formatted FASTG.") + parser.add_argument( + "-d", + "--dbname", + default="results.db", + help="Name for the results database. Defaults to results.db", + ) + parser.add_argument( + "-c", + "--cleavage", + default="trypsin", + help="Cleavage rule from ExPASy cleavage rules. Defaults to trypsin.", + ) + parser.add_argument( + "-p", + "--min_protein_length", + default=55, + type=int, + help="Minimum protein length in number of amino acids. Defaults to 55.", + ) + parser.add_argument( + "-m", + "--min_peptide_length", + default=8, + type=int, + help="Minimum peptide length in amino acids. Defaults to eight.", + ) + parser.add_argument( + "-l", "--plots", default=True, type=bool, help="Generate diagnostic plots.", + ) + + args = parser.parse_args() + + print(args) + + fg.peptides_for_fastg( + fastg_filename=args.fastg, + db_name=args.dbname, + cleavage=args.cleavage, + min_protein_length=(args.min_protein_length * 3), + min_peptide_length=args.min_peptide_length, + create_plots=args.plots, + ) diff -r 000000000000 -r 6b226c5907a1 fastg2protlib-peptides.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastg2protlib-peptides.xml Fri Aug 07 06:17:31 2020 -0400 @@ -0,0 +1,59 @@ + + + macros.xml + + Generate FASTA from FASTG + + + + + + + + + + + + + + + + + + + show_plots == True + + + show_plots == True + + + show_plots == True + + + show_plots == True + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 6b226c5907a1 fastg2protlib-validate.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastg2protlib-validate.xml Fri Aug 07 06:17:31 2020 -0400 @@ -0,0 +1,41 @@ + + + macros.xml + + + Validate a candidate protein library + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 6b226c5907a1 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Fri Aug 07 06:17:31 2020 -0400 @@ -0,0 +1,29 @@ + + 1.0.2 + + + + + + + + + + + + fastg2protlib + + + \ No newline at end of file diff -r 000000000000 -r 6b226c5907a1 test-data/mgf_tst.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mgf_tst.tab Fri Aug 07 06:17:31 2020 -0400 @@ -0,0 +1,25 @@ +#SpecFile SpecID ScanNum FragMethod Precursor IsotopeError PrecursorError(ppm) Charge Peptide Protein DeNovoScore MSGFScore SpecEValue EValue QValue PepQValue +wendt005_mickela_20200214_17647_12_V.mzML index=8575 -1 CID 501.26144 1 14.785407 3 +42.011IFLPFSTHSR+0.984 Pep_1|Protein_1(pre=-,post=-) 74 36 2.7071892E-10 2.855757E-4 0.0 0.0 +wendt005_mickela_20200214_17647_12_V.mzML index=10628 -1 CID 631.3283 0 16.628782 2 RTVWSN+0.984GTSPR Pep_2|Protein_1_29(pre=-,post=P) 61 36 6.954425E-10 7.203602E-4 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=6020 -1 CID 607.79266 1 -11.606342 2 +42.011AQ+0.984YWLSQFK Pep_3|Protein_1_28(pre=-,post=-) 23 10 9.667708E-10 9.6076715E-4 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=11469 -1 CID 470.6093 1 -7.4827867 3 RLLLQ+0.984C+57.021PRVPR Pep_4|Protein_2(pre=-,post=L) 68 35 1.2193706E-9 0.0012630607 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=15017 -1 CID 799.7483 0 2.747454 3 YFM+15.995YSIQYILIFYVQYVK Pep_5|Protein_2_29(pre=-,post=-) 2 -17 2.587433E-9 0.0029598887 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=16951 -1 CID 424.5418 1 -19.301939 3 RC+57.021GPLQASEPR Pep_6|Protein_4_16_31_32(pre=-,post=E) 69 41 3.636947E-9 0.0037672587 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=34154 -1 CID 768.88916 1 13.068233 2 +42.011STPVELEFSQ+0.984VEK Pep_7|Protein_5_34(pre=-,post=-) 77 33 4.083382E-9 0.0043801093 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=12592 -1 CID 701.0163 0 -5.6593018 3 YQSTPNIYYILYMYIR Pep_8|Protein_5_6_34_40(pre=-,post=-) 77 19 6.9333055E-9 0.0077557205 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=12620 -1 CID 485.91306 0 -10.551063 3 M+15.995SGIITN+0.984EISVFK Pep_9|Protein_7_9_22(pre=-,post=-) 55 28 7.184936E-9 0.007707044 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=5150 -1 CID 479.9201 0 -11.509454 3 YFEGKPVIEEVK Pep_10|Protein_7_22(pre=-,post=-) 87 44 7.371949E-9 0.007776514 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=26411 -1 CID 708.3275 0 -15.423832 2 PAQ+0.984PTGTRPC+57.021SSR Pep_11|Protein_8_21(pre=R,post=-) 41 15 7.913002E-9 0.008488016 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=22717 -1 CID 783.3668 0 11.843052 2 +42.011EEQDTFAVNSQQK XXX_Pep_22060|Protein_2878(pre=-,post=-) 135 39 7.923481E-9 0.008499257 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=8020 -1 CID 629.31287 0 13.772342 2 +42.011FQEPQQPWR XXX_Pep_16062|Protein_2307(pre=-,post=-) 31 14 8.15928E-9 0.008108611 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=20351 -1 CID 497.26334 0 0.79782444 2 LVPASGMYR XXX_Pep_9843|Protein_1629(pre=-,post=-) 28 16 8.401295E-9 0.0083491225 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=18867 -1 CID 559.29803 1 19.583662 3 +42.011LIGTATSVDEAIAN+0.984EK XXX_Pep_14112|Protein_2085(pre=-,post=-) 57 21 8.928303E-9 0.009987362 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=43084 -1 CID 653.8416 1 0.048319984 2 +42.011YSNYILYTVK XXX_Pep_3426|Protein_702(pre=-,post=-) 13 0 9.61208E-9 0.009760836 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=43084 -1 CID 653.8416 0 14.842637 2 +42.011YSN+0.984YILYTVK XXX_Pep_3426|Protein_702(pre=-,post=-) 13 0 9.61208E-9 0.009760836 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=32362 -1 CID 1036.4833 0 5.6531625 3 AWIGMQ+0.984WNGIEWNAM+15.995EWIQLEWNGK XXX_Pep_14712|Protein_2168(pre=-,post=-) 13 -31 1.0415514E-8 0.012581826 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=10370 -1 CID 461.22098 0 0.13233389 2 N+0.984DTQMLAK XXX_Pep_7493|Protein_1335_1348(pre=-,post=-) 83 51 1.0437349E-8 0.0101322755 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=21871 -1 CID 523.7984 0 1.3982916 2 +42.011AYVLNISPK XXX_Pep_31555|Protein_3895(pre=-,post=-) 86 43 1.0667454E-8 0.010601209 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=15561 -1 CID 581.9749 0 17.304827 3 GLDWDLAADLEGN+0.984IIK XXX_Pep_17726|Protein_2472(pre=-,post=-) 107 48 1.0803276E-8 0.012084738 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=22491 -1 CID 522.26483 0 -13.790032 2 +42.011Q+0.984LEAVQ+0.984VGR XXX_Pep_12397|Protein_1871(pre=-,post=-) 46 31 1.09720055E-8 0.01090387 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=9573 -1 CID 623.81696 0 -13.893293 2 +42.011RHALDGPWPR XXX_Pep_17806|Protein_2476(pre=-,post=Q) 28 13 1.1109479E-8 0.011281409 0.088495575 0.0952381 +wendt005_mickela_20200214_17647_12_V.mzML index=16000 -1 CID 381.54922 1 13.824602 3 +42.011N+0.984TYLSFLIK XXX_Pep_29178|Protein_3650(pre=-,post=-) 66 42 1.1131118E-8 0.011061994 0.088495575 0.0952381 diff -r 000000000000 -r 6b226c5907a1 test-data/tst_valid.db Binary file test-data/tst_valid.db has changed diff -r 000000000000 -r 6b226c5907a1 test-data/two.fastg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/two.fastg Fri Aug 07 06:17:31 2020 -0400 @@ -0,0 +1,40 @@ +>EDGE_1_length_84_cov_1.0:EDGE_3_length_84_cov_1.0; +CGTTATTCGCGCCCACTCTCCCATTTATCCGCGCAAGCGGATGCGATGCGATTGCCCGCTAAGATATTCTTACCATTCTCGACA +>EDGE_1_length_84_cov_1.0'; +TGTCGAGAATGGTAAGAATATCTTAGCGGGCAATCGCATCGCATCCGCTTGCGCGGATAAATGGGAGAGTGGGCGCGAATAACG +>EDGE_2_length_84_cov_1.0:EDGE_3_length_84_cov_1.0; +CTGGTCCTGTTGACTACAATGGGCCCAACTCAATCACAGCTCGAGCGCCTTGAATAACATACTCATCTCTATACATTCTCGACA +>EDGE_2_length_84_cov_1.0':EDGE_3_length_84_cov_1.0'; +TGTCGAGAATGTATAGAGATGAGTATGTTATTCAAGGCGCTCGAGCTGTGATTGAGTTGGGCCCATTGTAGTCAACAGGACCAG +>EDGE_3_length_84_cov_1.0:EDGE_2_length_84_cov_1.0,EDGE_4_length_84_cov_1.0; +CATTCTCGACATGCTGAGCTGAGACGGCGTCGATGCATAGCGGACTTTCGGTCAGTCGCAATTCCTCACGAGACTGGTCCTGTT +>EDGE_3_length_84_cov_1.0':EDGE_2_length_84_cov_1.0',EDGE_1_length_84_cov_1.0'; +AACAGGACCAGTCTCGTGAGGAATTGCGACTGACCGAAAGTCCGCTATGCATCGACGCCGTCTCAGCTCAGCATGTCGAGAATG +>EDGE_4_length_84_cov_1.0:EDGE_5_length_84_cov_1.0; +CTGGTCCTGTTACAGAGCTGGCGTACGCGTTGAACACTTCACAGATGATAGGGATTCGGGTAAAGAGCGTGTCATTGGGGGCTT +>EDGE_4_length_84_cov_1.0':EDGE_3_length_84_cov_1.0'; +AAGCCCCCAATGACACGCTCTTTACCCGAATCCCTATCATCTGTGAAGTGTTCAACGCGTACGCCAGCTCTGTAACAGGACCAG +>EDGE_5_length_84_cov_1.0; +ATTGGGGGCTTCATACATAGAGCAAGGGCGTCGAACGGTCGTGAAAGTCTTAGTACCGCACGTACCAACTTACTGAGGATATTG +>EDGE_5_length_84_cov_1.0':EDGE_4_length_84_cov_1.0',EDGE_6_length_84_cov_1.0'; +CAATATCCTCAGTAAGTTGGTACGTGCGGTACTAAGACTTTCACGACCGTTCGACGCCCTTGCTCTATGTATGAAGCCCCCAAT +>EDGE_6_length_84_cov_1.0:EDGE_5_length_84_cov_1.0; +AAGAGGCCGCCACCGTTTTAGGGGGGGAAGGTTGAAGATCTCCTCTTCTCATGACTGAACTCGCGAGGGCCGTATTGGGGGCTT +>EDGE_6_length_84_cov_1.0':EDGE_8_length_84_cov_1.0'; +AAGCCCCCAATACGGCCCTCGCGAGTTCAGTCATGAGAAGAGGAGATCTTCAACCTTCCCCCCCTAAAACGGTGGCGGCCTCTT +>EDGE_7_length_84_cov_1.0:EDGE_8_length_84_cov_1.0; +AAGAGGCCGCCAAAGAACAAAGGCTTACTGTGCGCAGAGGAACGCCCATTTAGCGGCTGGCGTTTTGAATCCTTTTAATATTGT +>EDGE_7_length_84_cov_1.0':EDGE_8_length_84_cov_1.0'; +ACAATATTAAAAGGATTCAAAACGCCAGCCGCTAAATGGGCGTTCCTCTGCGCACAGTAAGCCTTTGTTCTTTGGCGGCCTCTT +>EDGE_8_length_84_cov_1.0:EDGE_7_length_84_cov_1.0,EDGE_6_length_84_cov_1.0; +TTTAATATTGTTTAATCCAATTCCCTCATTTAGGACCCTACCAAGTCAACATTGGTATATGAATGCGACCTCGAAGAGGCCGCC +>EDGE_8_length_84_cov_1.0':EDGE_7_length_84_cov_1.0',EDGE_9_length_84_cov_1.0'; +GGCGGCCTCTTCGAGGTCGCATTCATATACCAATGTTGACTTGGTAGGGTCCTAAATGAGGGAATTGGATTAAACAATATTAAA +>EDGE_9_length_84_cov_1.0:EDGE_8_length_84_cov_1.0; +TAAAAATGACAGTGGTTGGTGCTCTAAACTTCATTTGGTTAACTCGTGTATCAGCGCGATAGGCTGTTAGAGGTTTAATATTGT +>EDGE_9_length_84_cov_1.0'; +ACAATATTAAACCTCTAACAGCCTATCGCGCTGATACACGAGTTAACCAAATGAAGTTTAGAGCACCAACCACTGTCATTTTTA +>EDGE_10_length_84_cov_1.0; +ATGGCAAGGTACTTCCGGTCTTAATGAATGGCCGGGAAAGGTACGCACGCGGTATGGGGGGGTGAAGGGGCGAATAGACAGGCT +>EDGE_10_length_84_cov_1.0':EDGE_10_length_84_cov_1.0; +AGCCTGTCTATTCGCCCCTTCACCCCCCCATACCGCGTGCGTACCTTTCCCGGCCATTCATTAAGACCGGAAGTACCTTGCCAT