# HG changeset patch # User jay # Date 1609215715 0 # Node ID 9b5e990a0ebb078ba91dfa83b515102afbc5f216 # Parent 7557b48b2872d590f6783d61896afac864e0a816 "planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 3c91f421d26c8f42cf2671e47db735d2cf69dde8" diff -r 7557b48b2872 -r 9b5e990a0ebb PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py --- a/PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py Wed Oct 28 02:10:12 2020 +0000 +++ b/PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py Tue Dec 29 04:21:55 2020 +0000 @@ -10,13 +10,13 @@ def DataGen(DataBaseType, OutFile, IDs): - if DataBaseType == 'AMPvsTM': + if DataBaseType == 'AMPvsTMP': data = load_AMPvsTM() elif DataBaseType == 'AMPvsUniProt': data = load_AMPvsUniProt() - elif DataBaseType == 'ACPvsTM': + elif DataBaseType == 'ACPvsTMP': data = load_ACPvsTM() elif DataBaseType == 'ACPvsRandom': @@ -39,16 +39,11 @@ print ("Enter Correct Values") exit() - Target = data.target.tolist() - Target_list = set(Target) - df = data.sequences - - - Target = pd.DataFrame(Target, columns=['Target']) - df = pd.DataFrame(df, columns=['Peptide']) - - df = pd.DataFrame(df) - df = pd.concat([df, Target], axis=1) + peptide_data = data.sequences + class_label = int(len(peptide_data)/2)*[data.target_names[0]]+int(len(peptide_data)/2)*[data.target_names[1]] + peptide_data = pd.DataFrame(peptide_data, columns=['name']) + class_label = pd.DataFrame(class_label, columns=['class_label']) + df = pd.concat([peptide_data,class_label], axis=1) df.to_csv(OutFile, index=False, sep='\t') @@ -69,9 +64,9 @@ help="Out put file name for str descriptors") parser.add_argument("-L", "--List", - required=False, - default=None, - help="List of integer as ID") + required=False, + default=None, + help="List of integer as ID") args = parser.parse_args() DataGen(args.DataBaseType, args.OutFile, args.List) diff -r 7557b48b2872 -r 9b5e990a0ebb PDAUG_Peptide_Data_Access/test-data/Out.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PDAUG_Peptide_Data_Access/test-data/Out.tsv Tue Dec 29 04:21:55 2020 +0000 @@ -0,0 +1,413 @@ +name class_label +AAGAATVLLVIVLLAGSYLAVLA TM +LWIVIACLACVGSAAALTLRA TM +FYRFYMLREGTAVPAVWFSIELIFGLFA TM +GTLELGVDYGRAN TM +KLFWRAVVAEFLATTLFVFISIGSALGFK TM +HGSIGAGVDW TM +ATIYLVCFCFFKQLAMIFMSVLAGNMYE TM +GKLSLAATRSSE TM +TSRLLLAGVALGIICSALMTWAIYF TM +LGFFLVTFGFIWGMMLL TM +YMLFTMIFVISSIIITVVVI TM +ACFSAKVN TM +LIWVAATLAGAIIAVLLVIYA TM +HIPFAFAFAILAYLTLVLFRPVM TM +CGLLVLLTLLLMGAIVTLGVF TM +TGVYILVGVVLWTA TM +LSMFIITPVMVLGTIFIFVMG TM +MTLVALLVIGVSLTGYLGLKA TM +QLYYQVLNFGMIVSSALMIW TM +ALGALCLLLSVGSATACLLLGA TM +LAVIFFFASALLYSQAAT TM +MGHQLLVQLESIAITIVWSGVVAFIGYKLADLT TM +ADILILSLLVIQCALGLLTIPFSA TM +RHQQATFAGFIKGATWVSILSIAVLVFLALAN TM +GTKWWTVGIRPMYKW TM +PAIRVFATYAKWDEK TM +PHVFLLFITFPILFIGWGSQS TM +AGLVLWGAIIFVGWNALLLLFFW TM +TQIVLLGLVTAALWAGLLTLLLLWHW TM +SFIGRVFLFLMIVLPLWCGLHRMHHAMHD TM +MYLGAGIALIPVIMSINYL TM +FQTGFDFSD TM +LISGTLGIICLSLMATLGILL TM +FPFLNNLSFWFTVVGVILVNVSLG TM +LDIYTRLGGMVWR TM +FLIFLLVIMTVITVALLTLLF TM +KDIGILYLFTAGIVGLISVCFTVYMRMELQH TM +PLFYIINILVPCVLISFMINLVFYL TM +HKALKTLGIIMGVFTLCWLPFFLVNIVNVF TM +ANLKKLKTLMSAFLIVLGLLTFGA TM +IWVGIFLLAALLAALFVCLKA TM +KGAAGITGAGFITLAATLS TM +VVLLAIVTLISVVQNGFFAHKV TM +IAIDVLAVLVPLAAFLLDGSPDWSLYCAVWLLKPLRDS TM +ALTVGTLLFLTGIGAASWAIV TM +ITLIIFGVMAGVIGTILLISYGI TM +WVWISLYYVAFYVVMSGIFAL TM +GSNVALHVN TM +GVLELGVDYGRAN TM +IYLLILIILSMLCLVYASVPL TM +LCSFLFVIVLFVATFYTL TM +PLYFIVNVVIPCLLFSFLTGLVFYL TM +ALLIAGGVGLLALAAALVLNA TM +IWTWLRTTL TM +PAHMIAISFFFTNALALALHGALVLSAA TM +GGSLYIVGIFLPIWTFMIYIF TM +GLTLGTGGR TM +STVTGGYAQS TM +DCFLLLVLLLYAPVGFCLLVL TM +FWRAVVAEFLAMILFIFISIGSALG TM +DPVLYQHILWFFGHPEVYIIILPGFGIISHVISTFAK TM +LTFTEKWN TM +IRDTLMRLVLTVRSN TM +GKLSLAATRSTE TM +GYNKFVVQYATDA TM +QFHTMLMIAASGAVLIALGILCLVIQMYVSIR TM +LILVLFVVLVSSVGVSLTLYA TM +MASLWADYT TM +DAWSGFVRGYGYDNRTN TM +RWLWDFVNA TM +MFVALLGLGLGQVVCSVALFF TM +NMSTYVDYII TM +IWVKMTFIVVLGLCFVFFWSF TM +SLLISVVLVAYYLYIPLPDAI TM +RKTTINGLIVAVILGVCFTGLQAYEYSHA TM +IPVQLLWVNLVTDGLPATALG TM +LAVIAGGVGFIMVAVLLVLFA TM +MNKRNIMNLILAGGAGLPITTLALGYGAFF TM +IALIVAGLSALGIATALVLNA TM +NGNMWRILDHGAISL TM +VAIICAAVVAVGLIVGLSVGL TM +FSFLIVAGATTLFCLLHFGVI TM +VGISIATIVAIIAAIYYVPW TM +PLFAWSVFITAWLILLSLPVLAGAITMLLMDRNF TM +LILIGAGLGVLALAAGLILTA TM +LGLAAGAIYYYNTSNVFA TM +QLGAGAFGGYQV TM +IAIALLVILVVCSLITMSVIL TM +LAFLIGGIIGGLLLLIGVSCCLW TM +NGHMLRILDHGAISM TM +MALILGIDRFMSECRALTNF TM +AVIAGTTLAITALAVTS TM +LYIVLAILCGVSIAVALALTA TM +AAVTLGVLCLGLLVTVILLILQL TM +LWLVIGVLTAAALAVTLIALA TM +GTLLLLTAIGAASWAIVAVLL TM +LLLVASVIQGLGLLLCFTYICLHFSAL TM +AMIVALIVICITAVVAALV TM +LVFIGTCGAVLAVALGLVLWA TM +TRFGIAAKYQ TM +IPWAVLIVVLITSLIIALIAL TM +LWVVCAVLAGLGLTTALVLYA TM +FAGRVLAGAVMMSGIGIFGLWAGIL TM +LLIVLAGLAVVAVASGLILNA TM +VGSIVGGIYLGFCFNAGAPAVEAFI TM +MLSLGVSYR TM +LTKWFFCCVCTILTMPFF TM +LATIAASAIVLVVAVGLGLMA TM +PIVVTGAVY TM +KPHNLPMVFTGTAILYIGWFGFNAGSA TM +DEFGEQLSKVISLICVAVWL TM +HTLLTGVDF TM +SVELIALLAISCTFFLFMHT TM +LLIALLIYWTLAFITKTIKF TM +FKLVIFVVLGIAIASGLMLYA TM +CTLSISVLLAQTIFLFLIA TM +FVIAGGTLAIPILAFVASFLL TM +LLVITAIVLILSAAVGLVMYA TM +LALATALIGGVAAIASLLLYA TM +NPVIVIINLITLAAALLHTKTWFEL TM +FIVVAGVVILAVTIALLVYFL TM +IGLMCFLSIIITTVCIIMIAT TM +FSVDTQLQS TM +EVYILLNWIGYVNSGFNPLIYCRS TM +LTLAVALIVGVSAIASLLLYA TM +LYLAVVVLIGIGLTTTLVLYA TM +MSTAISVLLAQAVFLLLT TM +ALPGLMNKMEKAGCKRSVV TM +KQFIRYLISSNVGEVVCIFL TM +KYVVSSLVLVYGLIKVLTWIF TM +TGVSPVFAGGVEYA TM +IVIVLGILCFLLLLTVAVLVI TM +KSLGILGILLGVAALCTIVALSVV TM +LCLFVVTPVMVVGTAWIFL TM +LYLAIVVLIGVALTATLMLYA TM +LTILLAIAPVLALAVGLALYG TM +FFVLLLMILILVNLAMTIWIL TM +FIVLIPSVVITVIFLFFWLFM TM +ILVLLILAVITIFALVCVLLV TM +TYFIVLIPSVVITVIFLFFWLFM TM +FLVLFIFLTSFFLNYSHTMVA TM +GDKIGMFFQAMATFFGGFIIGF TM +SNGVIVGTCLAFVAGMIGMAYAA TM +FTFEGAARSDD TM +MNYMVYFNFFACVLVPLLLMLGVYL TM +AGLILLVVTLIGMSVLVRVLI TM +VFLAVYLLGGITFLPLVLFTL TM +LIKISALVFVTVAFFYLG TM +GFFGVATFFFAALGIILIAWSAVL TM +GQWEISVIWGLGVAMAIYLTA TM +LWWIQAMTGFAMFFLGSVHLYIMMT TM +IAVVITVVFLTLLSVVILIFF TM +QVVATATFR TM +MLLCFAFLWVLGIAYYMY TM +IFCIIMLFALLGFL TM +PLCICVAFTCLALVLVTSIVL TM +LIIVLAIVVGVGAAVGLALSA TM +ILVPCVLGLLLLPILAMLMALCV TM +LGLLLAALICVGIATTLVLNA TM +GYAAYYLVRKNFALAMPYLVE TM +LPRTLAVLLVGAALAISGAVMQALF TM +ILLFYVIFYGCLAGIFIGTIQ TM +IMSTLLEVGYDNVKSQ TM +LLAVALIIAMSISLAWQAAGW TM +IVGQLLFVALGITFIYYLFTP TM +NFWMFGLFFFFYFFIMGAYFPFFPIWL TM +LVLIVGIVAAVGVAAALVLNA TM +LTLAVALIGGVAAITSLLLYA TM +ASGGIILIIAAILAMIMAN TM +GSAGGAALAVVVLALAFGLSG TM +LFVLLLLAILVVNLALTIWIL TM +SPPLVLAALVACIIVLGFNYWIA TM +LLFLILGIISFITFFLQGFTF TM +LIVKALGILCFLLLITVAVLAV TM +QYIHVAFQGSFACITVGLIVGAL TM +QGIAVFGYSMAVSIGGILASR TM +DHKRLGIMYIIVAIVMLLRGFADAIMMR TM +IFRLHLVLGMTLFLLF TM +LVSAIILTSFMTGLFILSLWK TM +AVVGGVIAAVFITLITVVVLI TM +INLGCDVDFD TM +ALSALCLLLSVGSAAACLLLGA TM +FTVIAGAVIVLLLTLNSNS TM +LLFVSLLFCLIAQTCWLALV TM +MVLVALLVIGVSLAGYLGLKA TM +LVIPHILRLC TM +LIRVLLGFVILFITYILFPSI TM +PLFYIINILAPCVLIALMANLVFYL TM +HKLGLGLEFQA TM +ILFVAVSFIALGCVSAFVLFE TM +AVVSAQIAITASPIS TM +LTIIGGALFVLAVAAGLVLNA TM +GCCGLLALALCSLALSLLA TM +LIVLLAIVTIIAIALVAILP TM +TEISAGWG TM +FWRYFAGNLASGGAAGATSLCFVYPLDFARTRLAA TM +IVAALGIIGLWMFFSSNELSIAT TM +MTVILFVLLGISIASALVLYA TM +SNATIAVACLSFFVCMIGAAYAS TM +LKLTFDSSFS TM +LNVLLSAAINFFLIAFAVYFLV TM +HSAMLWTIGFIVTFSVGGMTGVLLAVPGADFV TM +AVLSAKGQY TM +LWELVIEQFEDLLVRILLLAA TM +FFIVMGLVDAIPMIAVGLGLY TM +LAVIAGGMGFIATAVLLVLFA TM +ACYCRIPACLAGERRYGTCFYMGRVWAFCC AMP +AGRGKQGGKVRAKAKTRSSRAGLQFPVGRVHRLLRKGNY AMP +ALFSILRGLKKLGNMGQAFVNCKIYKKC AMP +ALSILKGLEKLAKMGIALTNCKATKKC AMP +ALWKDILKNVGKAAGKAVLNTVTDMVNQ AMP +ALWKNMLKGIGKLAGQAALGAVKTLVGAES AMP +ALWKTLLKNVGKAAGKAALNAVTDMVNQ AMP +ALWKTMLKKLGTMALHAGKAAFGAAADTISQ AMP +ALWKTMLKKLGTMALHAGKAALGAAADTISQGTQ AMP +APGNKAECEREKGYCGFLKCSFPFVVSGKCSRFFFCCKNIW AMP +ASIIKTTIKVSKAVCKTLTCICTGSCSNCK AMP +ATCDLLSGTGIKHSACAAHCLLRGNRGGYCNGRAICVCRN AMP +ATTGCSCPQCIIFDPICASSYKNGRRGFSSGCHMRCYNRCHGTDYFQISKGSKCI AMP +AYPGNGVHCGKYSCTVDKQTAIGNIGNNAA AMP +CANSCSYGPLTWSCDGNTK AMP +CRQSCSFGPLTFVCDGNTK AMP +DDTPSSRCGSGGWGPCLPIVDLLCIVHVTVGCSGGFGCCRIG AMP +DFASCHTNGGICLPNRCPGHMIQIGICFRPRVKCCRSW AMP +DFKDWMKTAGEWLKKKGPGILKAAMAAAT AMP +DGVKLCDVPSGTWSGHCGSSSKCSQQCKDREHFAYGGACHYQFPSVKCFCKRQC AMP +DKLIGSCVWGAVNYTSDCNGECKRRGYKGGHCGSFANVNCWCET AMP +ELCEKASKTWSGNCGNTGHCDNQCKSWEGAAHGACHVRNGKHMCFCYFNC AMP +FCKSLPLPLSVK AMP +FFGSLLSLGSKLLPSVFKLFQRKKE AMP +FFGSVLKLIPKIL AMP +FFGWLIKGAIHAGKAIHGLIHRRRH AMP +FFGWLIRGAIHAGKAIHGLIHRRRH AMP +FFPIVAGVAGQVLKKIYCTISKKC AMP +FGLPMLSILPKALCILLKRKC AMP +FIGLLISAGKAIHDLIRRRH AMP +FIGPIISALASLFG AMP +FKLGSFLKKAWKSKLAKKLRAKGKEMLKDYAKGLLEGGSEEVPGQ AMP +FLGGLMKAFPAIICAVTKKC AMP +FLGGLMKAFPALICAVTKKC AMP +FLNALKNFAKTAGKRLKSLLN AMP +FLPAIAGMAAKFLPKIFCAISKKC AMP +FLPAIAGVAAKFLPKIFCAISKKC AMP +FLPAIVGAAAKFLPKIFCVISKKC AMP +FLPAIVGAAGKFLPKIFCAISKKC AMP +FLPAIVGAAGQFLPKIFCAISKKC AMP +FLPAVLRVAAKIVPTVFCAISKKC AMP +FLPAVLRVAAKVVPTVFCLISKKC AMP +FLPAVLRVAAQVVPTVFCAISKKC AMP +FLPFIAGMAAKFLPKIFCAISKKC AMP +FLPFIAGMAANFLPKIFCAISKKC AMP +FLPFIAGVAAKFLPKIFCAISKKC AMP +FLPFLATLLSKVL AMP +FLPGLLAGLL AMP +FLPIASLLGKYL AMP +FLPIIAGVAAKVFPKIFCAISKKC AMP +FLPIIASVAAKVFPKIFCAISKKC AMP +FLPIIASVAAKVFSKIFCAISKKC AMP +FLPIIASVAANVFSKIFCAISKKC AMP +FLPILASLAAKFGPKLFCLVTKKC AMP +FLPILASLAAKLGPKLFCLVTKKC AMP +FLPILASLAATLGPKLLCLITKKC AMP +FLPLFASLIGKLL AMP +FLPLIGKVLSGIL AMP +FLPLIGRVLSGIL AMP +FLPLLAGLAANFFPKIFCKITRKC AMP +FLPLLAGLAANFLPKIFCKITRKC AMP +FLPLLAGLAANFLPTIICKISYKC AMP +FLPMLAGLAASMVPKFVCLITKKC AMP +FLPVVAGLAAKVLPSIICAVTKKC AMP +FMGGLIKAATKIVPAAYCAITKKC AMP +FSFKRLKGFAKKLWNSKLARKIRTKGLKYVKNFAKDMLSEGEEAPPAAEPPVEAPQ AMP +FVPYNPPRPGQSKPFPSFPGHGPFNPKIQWPYPLPNPGH AMP +GAIKDALKGAAKTVAVELLKKAQCKLEKTC AMP +GFFSLIKGVAKIATKGLAKNLGKMGLDLVGCKISKEC AMP +GFFSTVKNLATNVAGTVIDTLKCKVTGGCRS AMP +GFGALFKFLAKKVAKTVAKQAAKQGAKYVVNKQME AMP +GFISTVKNLATNVAGTVIDTIKCKVTGGC AMP +GFKGAFKNVMFGIAKSAGKSALNALACKIDKSC AMP +GFLDSFKNAMIGVAKSVGKTALSTLACKIDKSC AMP +GFLSILKKVLPKVMAHMK AMP +GFLSTVKNLATNVAGTVIDTLKCKVTGGCRS AMP +GFMKYIGPLIPHAVKAISDLI AMP +GFSSIFRGVAKFASKGLGKDLARLGVNLVACKISKQC AMP +GFVDLAKKVVGGIRNALGI AMP +GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLNNIGNTGCNEDEC AMP +GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLSNIGNTGCNEDEC AMP +GIFSKFGGKAIKNLFIKGAKNIGKEVGMDVIRTGIDVAGCKIKGEC AMP +GIFSKLAGKKLKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC AMP +GIFSKLGRKKIKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC AMP +GIFSLIKGAAQLIGKTVAKEAGKTGLELMACKVTKQC AMP +GIFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC AMP +GILDAIKAIAKAAG AMP +GILDFAKTVVGGIRNALGI AMP +GILSLFTGGIKALGKTLFKMAGKAGAEHLACKATNQC AMP +GILSSIKGVAKGVAKNVAAQLLDTLKCKITGC AMP +GIMDSVKGLAKNLAGKLLDSLKCKITGC AMP +GIMDTIKDTAKTVAVGLLNKLKCKITGC AMP +GINTLKKVIQGLHEVIKLVSNHA AMP +GINTLKKVIQGLHEVIKLVSNHE AMP +GIPCGESCVWIPCISAALGCSCKNKVCYRN AMP +GKLQAFLAKMKEIAAQTL AMP +GKVWDWIKSAAKKIWSSEPVSQLKGQVLNAAKNYVAEKIGATPT AMP +GLADFLNKAVGKVVDFVKS AMP +GLFDVVKGVLKGVGKNVAGSLLEQLKCKLSGGC AMP +GLFKVLGSVAKHLLPHVAPIIAEKL AMP +GLFLDTLKGAAKDVAGKLLEGLKCKIAGCKP AMP +GLFLDTLKGLAGKLLQGLKCIKAGCKP AMP +GLFSILRGAAKFASKGLGKDLTKLGVDLVACKISKQC AMP +GLFSKFNKKKIKSGLFKIIKTAGKEAGLEALRTGIDVIGCKIKGEC AMP +GLFSKFNKKKIKSGLIKIIKTAGKEAGLEALRTGIDVIGCKIKGEC AMP +GLFSVLGSVAKHLLPHVAPIIAEKL AMP +GLFSVLGSVAKHLLPHVVPVIAEKL AMP +GLFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC AMP +GLLDFVTGVGKDIFAQLIKQI AMP +GLLDSIKGMAISAGKGALQNLLKVASCKLDKTC AMP +GLLDSLKNLAINAAKGAGQSVLNTLSCKLSKTC AMP +GLLDTIKGVAKTVAASMLDKLKCKISGC AMP +GLLGGLLGPLLGGGGGGGGGLL AMP +GLLGPLLKIAAKVGSNLL AMP +GLLGSIFGAGKKIACALSGLC AMP +GLLGSLFGAGKKVACALSGLC AMP +GLLKRIKTLL AMP +GLLSGLKKVGKHVAKNVAVSLMDSLKCKISGDC AMP +GLLSKVLGVGKKVLCGVSGLC AMP +GLLSVLGSVAKHVLPHVVPVIAEHL AMP +GLMSSIGKALGGLIVDVLKPKTPAS AMP +GLNALKKVFQGIHEAIKLINNHVQ AMP +GLNTLKKVFQGLHEAIKLINNHVQ AMP +GLWNKIKEAASKAAGKAALGFVNEMV AMP +GLWSKIKAAGKEAAKAAAKAAGKAALNAVSEAV AMP +GLWSKIKEAAKTAGLMAMGFVNDMV AMP +GLWSTIKQKGKEAAIAAAKAAGQAALGAL AMP +GPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW AMP +GRLQAFLAKMKEIAAQTL AMP +GRPNPVNNKPTPHPRL AMP +GRPNPVNTKPTPYPRL AMP +GSGRGSCRSQCMRRHEDEPWRVQECVSQCRRRRGGGD AMP +GSKKPVPIIYCNRRTGKCQRM AMP +GVLDILKNAAKNILAHAAEQI AMP +GVVDILKGAGKDLLAHLVGKISEKV AMP +GWKDWAKKAGGWLKKKGPGMAKAALKAAMQ AMP +GWKDWLKKGKEWLKAKGPGIVKAALQAATQ AMP +GWKDWLNKGKEWLKKKGPGIMKAALKAATQ AMP +HGVSGHGQHGVHG AMP +IFGAILPLALGALKNLIK AMP +IIEKLVNTALGLLSGL AMP +IIGHLIKTALGMLGL AMP +ILGTILGLLKGL AMP +ILGTILGLLKSL AMP +ILPLVGNLLNDLL AMP +ILQKAVLDCLKAAGSSLSKAAITAIYNKIT AMP +INWKKIAEIGKQVLSAL AMP +INWKKIAEVGGKILSSL AMP +INWLKLGKAIIDAL AMP +IPRPLDPCIAQNGRCFTGICRYPYFWIGTCRNGKSCCRRR AMP +IWLTALKFLGKHAAKHLAKQQLSKL AMP +KDRPKKPGLCPPRPQKPCVKECKNDDSCPGQQKCCNYGCKDECRDPIFVG AMP +KTCEHLADTYRGVCFTNASCDDHCKNKAHLISGTCHNWKCFCTQNC AMP +KTCENLADTY AMP +KTCENLSGTFKGPCIPDGNCNKHCRNNEHLLSGRCRDDFRCWCTNRC AMP +KTCMTKKEGWGRCLIDTTCAHSCRKYGYMGGKCQGITRRCYCLLNC AMP +KWCFRVCYRGICYRKCR AMP +KWCFRVCYRGICYRRCR AMP +KYYGNGVSCNKKGCSVDWGKAIGIIGNNSAANLATGGAAGWSK AMP +LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFCYFNC AMP +LFCRKGTCHFGGCPAHLVKVGSCFGFRACCKWPWDV AMP +LLGRCKVKSNRFHGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC AMP +LLGRCKVKSNRFNGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC AMP +LLKELWTKIKGAGKAVLGKIKGLL AMP +LLKELWTKMKGAGKAVLGKIKGLL AMP +LLPILGNLLNGLL AMP +LLPNLLKSLL AMP +LMCTHPLDCSN AMP +LNLKGIFKKVASLLT AMP +LPVNEAQCRQVGGYCGLRICNFPSRFLGLCTRNHPCCSRVWV AMP +MSWLNFLKYIAKYGKKAVSAAWKYKGKVLEWLNVGPTLEWVWQKLKKIAGL AMP +QDKCKKVYENYPVSKCQLANQCNYDCKLDKHARSGECFYDEKRNLQCICDYCEY AMP +QGVRNHVTCRIYGGFCVPIRCPGRTRQIGTCFGRPVKCCRRW AMP +QKLCERPSGTWSGVCGNNNACKNQCINLEKARHGSCNYVFPAHKCICYFPC AMP +QQCGRQASGRLCGNRLCCSQWGYCGSTASYCGAGCQSQCRS AMP +QRFIHPTYRPPPQPRRPVIMRA AMP +RQRVEELSKFSKKGAAARRRK AMP +RSGRGECRRQCLRRHEGQPWETQECMRRCRRRG AMP +RSVCRQIKICRRRGGCYYKCTNRPY AMP +SAPRGCWTKSYPPKPCK AMP +SCTTCVCTCSCCTT AMP +SFGLCRLRRGFCARGRCRFPSIPIGRCSRFVQCCRRVW AMP +SGISGPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW AMP +SIVPIRCRSNRDCRRFCGFRGGRCTYARQCLCGY AMP +SKGKKANKDVELARG AMP +SMLSVLKNLGKVGLGFVACKINKQC AMP +TSYGNGVHCNKSKCWIDVSELETYKAGTVSNPKDILW AMP +VDKGSYLPRPTPPRPIYNRN AMP +VDKPDYRPRPRPPNM AMP +VDKPDYRPRPWPRNMI AMP +VDKPDYRPRPWPRPN AMP +VDKPDYRPRPWPRPNM AMP +VLPIIGNLLNSLL AMP +VLPLISMALGKLL AMP +VNPIILGVLPKFVCLITKKC AMP +VRNFVTCRINRGFCVPIRCPGHRRQIGTCLGPQIKCCR AMP +VRNHVTCRINRGFCVPIRCPGRTRQIGTCFGPRIKCCRSW AMP +VTCDLLSFEAKGFAANHSLCAAHCLAIGRRGGSCERGVCICRR AMP +VTCDLLSIKGVAEHSACAANCLSMGKAGGRCENGICLCRKTTFKELWDKRF AMP +VTCFCKRPVCDSGETQIGYCRLGNTFYRLCCRQ AMP +WLGSALKIGAKLLPSVVGLFKKKKQ AMP +WNPFKELERAGQRVRDAIISAGPAVATVGQAAAIARG AMP +WNPFKELERAGQRVRDAVISAAPAVATVGQAAAIARG AMP +YDLSKNCRLRGGICYIGKCPRRFFRSGSCSRGNVCCLRFG AMP +YSKSLPLSVLNP AMP +YVSCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF AMP diff -r 7557b48b2872 -r 9b5e990a0ebb PDAUG_Peptide_Data_Access/test-data/out.tsv --- a/PDAUG_Peptide_Data_Access/test-data/out.tsv Wed Oct 28 02:10:12 2020 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,413 +0,0 @@ -Peptide Target -AAGAATVLLVIVLLAGSYLAVLA 0 -LWIVIACLACVGSAAALTLRA 0 -FYRFYMLREGTAVPAVWFSIELIFGLFA 0 -GTLELGVDYGRAN 0 -KLFWRAVVAEFLATTLFVFISIGSALGFK 0 -HGSIGAGVDW 0 -ATIYLVCFCFFKQLAMIFMSVLAGNMYE 0 -GKLSLAATRSSE 0 -TSRLLLAGVALGIICSALMTWAIYF 0 -LGFFLVTFGFIWGMMLL 0 -YMLFTMIFVISSIIITVVVI 0 -ACFSAKVN 0 -LIWVAATLAGAIIAVLLVIYA 0 -HIPFAFAFAILAYLTLVLFRPVM 0 -CGLLVLLTLLLMGAIVTLGVF 0 -TGVYILVGVVLWTA 0 -LSMFIITPVMVLGTIFIFVMG 0 -MTLVALLVIGVSLTGYLGLKA 0 -QLYYQVLNFGMIVSSALMIW 0 -ALGALCLLLSVGSATACLLLGA 0 -LAVIFFFASALLYSQAAT 0 -MGHQLLVQLESIAITIVWSGVVAFIGYKLADLT 0 -ADILILSLLVIQCALGLLTIPFSA 0 -RHQQATFAGFIKGATWVSILSIAVLVFLALAN 0 -GTKWWTVGIRPMYKW 0 -PAIRVFATYAKWDEK 0 -PHVFLLFITFPILFIGWGSQS 0 -AGLVLWGAIIFVGWNALLLLFFW 0 -TQIVLLGLVTAALWAGLLTLLLLWHW 0 -SFIGRVFLFLMIVLPLWCGLHRMHHAMHD 0 -MYLGAGIALIPVIMSINYL 0 -FQTGFDFSD 0 -LISGTLGIICLSLMATLGILL 0 -FPFLNNLSFWFTVVGVILVNVSLG 0 -LDIYTRLGGMVWR 0 -FLIFLLVIMTVITVALLTLLF 0 -KDIGILYLFTAGIVGLISVCFTVYMRMELQH 0 -PLFYIINILVPCVLISFMINLVFYL 0 -HKALKTLGIIMGVFTLCWLPFFLVNIVNVF 0 -ANLKKLKTLMSAFLIVLGLLTFGA 0 -IWVGIFLLAALLAALFVCLKA 0 -KGAAGITGAGFITLAATLS 0 -VVLLAIVTLISVVQNGFFAHKV 0 -IAIDVLAVLVPLAAFLLDGSPDWSLYCAVWLLKPLRDS 0 -ALTVGTLLFLTGIGAASWAIV 0 -ITLIIFGVMAGVIGTILLISYGI 0 -WVWISLYYVAFYVVMSGIFAL 0 -GSNVALHVN 0 -GVLELGVDYGRAN 0 -IYLLILIILSMLCLVYASVPL 0 -LCSFLFVIVLFVATFYTL 0 -PLYFIVNVVIPCLLFSFLTGLVFYL 0 -ALLIAGGVGLLALAAALVLNA 0 -IWTWLRTTL 0 -PAHMIAISFFFTNALALALHGALVLSAA 0 -GGSLYIVGIFLPIWTFMIYIF 0 -GLTLGTGGR 0 -STVTGGYAQS 0 -DCFLLLVLLLYAPVGFCLLVL 0 -FWRAVVAEFLAMILFIFISIGSALG 0 -DPVLYQHILWFFGHPEVYIIILPGFGIISHVISTFAK 0 -LTFTEKWN 0 -IRDTLMRLVLTVRSN 0 -GKLSLAATRSTE 0 -GYNKFVVQYATDA 0 -QFHTMLMIAASGAVLIALGILCLVIQMYVSIR 0 -LILVLFVVLVSSVGVSLTLYA 0 -MASLWADYT 0 -DAWSGFVRGYGYDNRTN 0 -RWLWDFVNA 0 -MFVALLGLGLGQVVCSVALFF 0 -NMSTYVDYII 0 -IWVKMTFIVVLGLCFVFFWSF 0 -SLLISVVLVAYYLYIPLPDAI 0 -RKTTINGLIVAVILGVCFTGLQAYEYSHA 0 -IPVQLLWVNLVTDGLPATALG 0 -LAVIAGGVGFIMVAVLLVLFA 0 -MNKRNIMNLILAGGAGLPITTLALGYGAFF 0 -IALIVAGLSALGIATALVLNA 0 -NGNMWRILDHGAISL 0 -VAIICAAVVAVGLIVGLSVGL 0 -FSFLIVAGATTLFCLLHFGVI 0 -VGISIATIVAIIAAIYYVPW 0 -PLFAWSVFITAWLILLSLPVLAGAITMLLMDRNF 0 -LILIGAGLGVLALAAGLILTA 0 -LGLAAGAIYYYNTSNVFA 0 -QLGAGAFGGYQV 0 -IAIALLVILVVCSLITMSVIL 0 -LAFLIGGIIGGLLLLIGVSCCLW 0 -NGHMLRILDHGAISM 0 -MALILGIDRFMSECRALTNF 0 -AVIAGTTLAITALAVTS 0 -LYIVLAILCGVSIAVALALTA 0 -AAVTLGVLCLGLLVTVILLILQL 0 -LWLVIGVLTAAALAVTLIALA 0 -GTLLLLTAIGAASWAIVAVLL 0 -LLLVASVIQGLGLLLCFTYICLHFSAL 0 -AMIVALIVICITAVVAALV 0 -LVFIGTCGAVLAVALGLVLWA 0 -TRFGIAAKYQ 0 -IPWAVLIVVLITSLIIALIAL 0 -LWVVCAVLAGLGLTTALVLYA 0 -FAGRVLAGAVMMSGIGIFGLWAGIL 0 -LLIVLAGLAVVAVASGLILNA 0 -VGSIVGGIYLGFCFNAGAPAVEAFI 0 -MLSLGVSYR 0 -LTKWFFCCVCTILTMPFF 0 -LATIAASAIVLVVAVGLGLMA 0 -PIVVTGAVY 0 -KPHNLPMVFTGTAILYIGWFGFNAGSA 0 -DEFGEQLSKVISLICVAVWL 0 -HTLLTGVDF 0 -SVELIALLAISCTFFLFMHT 0 -LLIALLIYWTLAFITKTIKF 0 -FKLVIFVVLGIAIASGLMLYA 0 -CTLSISVLLAQTIFLFLIA 0 -FVIAGGTLAIPILAFVASFLL 0 -LLVITAIVLILSAAVGLVMYA 0 -LALATALIGGVAAIASLLLYA 0 -NPVIVIINLITLAAALLHTKTWFEL 0 -FIVVAGVVILAVTIALLVYFL 0 -IGLMCFLSIIITTVCIIMIAT 0 -FSVDTQLQS 0 -EVYILLNWIGYVNSGFNPLIYCRS 0 -LTLAVALIVGVSAIASLLLYA 0 -LYLAVVVLIGIGLTTTLVLYA 0 -MSTAISVLLAQAVFLLLT 0 -ALPGLMNKMEKAGCKRSVV 0 -KQFIRYLISSNVGEVVCIFL 0 -KYVVSSLVLVYGLIKVLTWIF 0 -TGVSPVFAGGVEYA 0 -IVIVLGILCFLLLLTVAVLVI 0 -KSLGILGILLGVAALCTIVALSVV 0 -LCLFVVTPVMVVGTAWIFL 0 -LYLAIVVLIGVALTATLMLYA 0 -LTILLAIAPVLALAVGLALYG 0 -FFVLLLMILILVNLAMTIWIL 0 -FIVLIPSVVITVIFLFFWLFM 0 -ILVLLILAVITIFALVCVLLV 0 -TYFIVLIPSVVITVIFLFFWLFM 0 -FLVLFIFLTSFFLNYSHTMVA 0 -GDKIGMFFQAMATFFGGFIIGF 0 -SNGVIVGTCLAFVAGMIGMAYAA 0 -FTFEGAARSDD 0 -MNYMVYFNFFACVLVPLLLMLGVYL 0 -AGLILLVVTLIGMSVLVRVLI 0 -VFLAVYLLGGITFLPLVLFTL 0 -LIKISALVFVTVAFFYLG 0 -GFFGVATFFFAALGIILIAWSAVL 0 -GQWEISVIWGLGVAMAIYLTA 0 -LWWIQAMTGFAMFFLGSVHLYIMMT 0 -IAVVITVVFLTLLSVVILIFF 0 -QVVATATFR 0 -MLLCFAFLWVLGIAYYMY 0 -IFCIIMLFALLGFL 0 -PLCICVAFTCLALVLVTSIVL 0 -LIIVLAIVVGVGAAVGLALSA 0 -ILVPCVLGLLLLPILAMLMALCV 0 -LGLLLAALICVGIATTLVLNA 0 -GYAAYYLVRKNFALAMPYLVE 0 -LPRTLAVLLVGAALAISGAVMQALF 0 -ILLFYVIFYGCLAGIFIGTIQ 0 -IMSTLLEVGYDNVKSQ 0 -LLAVALIIAMSISLAWQAAGW 0 -IVGQLLFVALGITFIYYLFTP 0 -NFWMFGLFFFFYFFIMGAYFPFFPIWL 0 -LVLIVGIVAAVGVAAALVLNA 0 -LTLAVALIGGVAAITSLLLYA 0 -ASGGIILIIAAILAMIMAN 0 -GSAGGAALAVVVLALAFGLSG 0 -LFVLLLLAILVVNLALTIWIL 0 -SPPLVLAALVACIIVLGFNYWIA 0 -LLFLILGIISFITFFLQGFTF 0 -LIVKALGILCFLLLITVAVLAV 0 -QYIHVAFQGSFACITVGLIVGAL 0 -QGIAVFGYSMAVSIGGILASR 0 -DHKRLGIMYIIVAIVMLLRGFADAIMMR 0 -IFRLHLVLGMTLFLLF 0 -LVSAIILTSFMTGLFILSLWK 0 -AVVGGVIAAVFITLITVVVLI 0 -INLGCDVDFD 0 -ALSALCLLLSVGSAAACLLLGA 0 -FTVIAGAVIVLLLTLNSNS 0 -LLFVSLLFCLIAQTCWLALV 0 -MVLVALLVIGVSLAGYLGLKA 0 -LVIPHILRLC 0 -LIRVLLGFVILFITYILFPSI 0 -PLFYIINILAPCVLIALMANLVFYL 0 -HKLGLGLEFQA 0 -ILFVAVSFIALGCVSAFVLFE 0 -AVVSAQIAITASPIS 0 -LTIIGGALFVLAVAAGLVLNA 0 -GCCGLLALALCSLALSLLA 0 -LIVLLAIVTIIAIALVAILP 0 -TEISAGWG 0 -FWRYFAGNLASGGAAGATSLCFVYPLDFARTRLAA 0 -IVAALGIIGLWMFFSSNELSIAT 0 -MTVILFVLLGISIASALVLYA 0 -SNATIAVACLSFFVCMIGAAYAS 0 -LKLTFDSSFS 0 -LNVLLSAAINFFLIAFAVYFLV 0 -HSAMLWTIGFIVTFSVGGMTGVLLAVPGADFV 0 -AVLSAKGQY 0 -LWELVIEQFEDLLVRILLLAA 0 -FFIVMGLVDAIPMIAVGLGLY 0 -LAVIAGGMGFIATAVLLVLFA 0 -ACYCRIPACLAGERRYGTCFYMGRVWAFCC 1 -AGRGKQGGKVRAKAKTRSSRAGLQFPVGRVHRLLRKGNY 1 -ALFSILRGLKKLGNMGQAFVNCKIYKKC 1 -ALSILKGLEKLAKMGIALTNCKATKKC 1 -ALWKDILKNVGKAAGKAVLNTVTDMVNQ 1 -ALWKNMLKGIGKLAGQAALGAVKTLVGAES 1 -ALWKTLLKNVGKAAGKAALNAVTDMVNQ 1 -ALWKTMLKKLGTMALHAGKAAFGAAADTISQ 1 -ALWKTMLKKLGTMALHAGKAALGAAADTISQGTQ 1 -APGNKAECEREKGYCGFLKCSFPFVVSGKCSRFFFCCKNIW 1 -ASIIKTTIKVSKAVCKTLTCICTGSCSNCK 1 -ATCDLLSGTGIKHSACAAHCLLRGNRGGYCNGRAICVCRN 1 -ATTGCSCPQCIIFDPICASSYKNGRRGFSSGCHMRCYNRCHGTDYFQISKGSKCI 1 -AYPGNGVHCGKYSCTVDKQTAIGNIGNNAA 1 -CANSCSYGPLTWSCDGNTK 1 -CRQSCSFGPLTFVCDGNTK 1 -DDTPSSRCGSGGWGPCLPIVDLLCIVHVTVGCSGGFGCCRIG 1 -DFASCHTNGGICLPNRCPGHMIQIGICFRPRVKCCRSW 1 -DFKDWMKTAGEWLKKKGPGILKAAMAAAT 1 -DGVKLCDVPSGTWSGHCGSSSKCSQQCKDREHFAYGGACHYQFPSVKCFCKRQC 1 -DKLIGSCVWGAVNYTSDCNGECKRRGYKGGHCGSFANVNCWCET 1 -ELCEKASKTWSGNCGNTGHCDNQCKSWEGAAHGACHVRNGKHMCFCYFNC 1 -FCKSLPLPLSVK 1 -FFGSLLSLGSKLLPSVFKLFQRKKE 1 -FFGSVLKLIPKIL 1 -FFGWLIKGAIHAGKAIHGLIHRRRH 1 -FFGWLIRGAIHAGKAIHGLIHRRRH 1 -FFPIVAGVAGQVLKKIYCTISKKC 1 -FGLPMLSILPKALCILLKRKC 1 -FIGLLISAGKAIHDLIRRRH 1 -FIGPIISALASLFG 1 -FKLGSFLKKAWKSKLAKKLRAKGKEMLKDYAKGLLEGGSEEVPGQ 1 -FLGGLMKAFPAIICAVTKKC 1 -FLGGLMKAFPALICAVTKKC 1 -FLNALKNFAKTAGKRLKSLLN 1 -FLPAIAGMAAKFLPKIFCAISKKC 1 -FLPAIAGVAAKFLPKIFCAISKKC 1 -FLPAIVGAAAKFLPKIFCVISKKC 1 -FLPAIVGAAGKFLPKIFCAISKKC 1 -FLPAIVGAAGQFLPKIFCAISKKC 1 -FLPAVLRVAAKIVPTVFCAISKKC 1 -FLPAVLRVAAKVVPTVFCLISKKC 1 -FLPAVLRVAAQVVPTVFCAISKKC 1 -FLPFIAGMAAKFLPKIFCAISKKC 1 -FLPFIAGMAANFLPKIFCAISKKC 1 -FLPFIAGVAAKFLPKIFCAISKKC 1 -FLPFLATLLSKVL 1 -FLPGLLAGLL 1 -FLPIASLLGKYL 1 -FLPIIAGVAAKVFPKIFCAISKKC 1 -FLPIIASVAAKVFPKIFCAISKKC 1 -FLPIIASVAAKVFSKIFCAISKKC 1 -FLPIIASVAANVFSKIFCAISKKC 1 -FLPILASLAAKFGPKLFCLVTKKC 1 -FLPILASLAAKLGPKLFCLVTKKC 1 -FLPILASLAATLGPKLLCLITKKC 1 -FLPLFASLIGKLL 1 -FLPLIGKVLSGIL 1 -FLPLIGRVLSGIL 1 -FLPLLAGLAANFFPKIFCKITRKC 1 -FLPLLAGLAANFLPKIFCKITRKC 1 -FLPLLAGLAANFLPTIICKISYKC 1 -FLPMLAGLAASMVPKFVCLITKKC 1 -FLPVVAGLAAKVLPSIICAVTKKC 1 -FMGGLIKAATKIVPAAYCAITKKC 1 -FSFKRLKGFAKKLWNSKLARKIRTKGLKYVKNFAKDMLSEGEEAPPAAEPPVEAPQ 1 -FVPYNPPRPGQSKPFPSFPGHGPFNPKIQWPYPLPNPGH 1 -GAIKDALKGAAKTVAVELLKKAQCKLEKTC 1 -GFFSLIKGVAKIATKGLAKNLGKMGLDLVGCKISKEC 1 -GFFSTVKNLATNVAGTVIDTLKCKVTGGCRS 1 -GFGALFKFLAKKVAKTVAKQAAKQGAKYVVNKQME 1 -GFISTVKNLATNVAGTVIDTIKCKVTGGC 1 -GFKGAFKNVMFGIAKSAGKSALNALACKIDKSC 1 -GFLDSFKNAMIGVAKSVGKTALSTLACKIDKSC 1 -GFLSILKKVLPKVMAHMK 1 -GFLSTVKNLATNVAGTVIDTLKCKVTGGCRS 1 -GFMKYIGPLIPHAVKAISDLI 1 -GFSSIFRGVAKFASKGLGKDLARLGVNLVACKISKQC 1 -GFVDLAKKVVGGIRNALGI 1 -GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLNNIGNTGCNEDEC 1 -GIFPKIIGKGIKTGIVNGIKSLVKGVGMKVFKAGLSNIGNTGCNEDEC 1 -GIFSKFGGKAIKNLFIKGAKNIGKEVGMDVIRTGIDVAGCKIKGEC 1 -GIFSKLAGKKLKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC 1 -GIFSKLGRKKIKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC 1 -GIFSLIKGAAQLIGKTVAKEAGKTGLELMACKVTKQC 1 -GIFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC 1 -GILDAIKAIAKAAG 1 -GILDFAKTVVGGIRNALGI 1 -GILSLFTGGIKALGKTLFKMAGKAGAEHLACKATNQC 1 -GILSSIKGVAKGVAKNVAAQLLDTLKCKITGC 1 -GIMDSVKGLAKNLAGKLLDSLKCKITGC 1 -GIMDTIKDTAKTVAVGLLNKLKCKITGC 1 -GINTLKKVIQGLHEVIKLVSNHA 1 -GINTLKKVIQGLHEVIKLVSNHE 1 -GIPCGESCVWIPCISAALGCSCKNKVCYRN 1 -GKLQAFLAKMKEIAAQTL 1 -GKVWDWIKSAAKKIWSSEPVSQLKGQVLNAAKNYVAEKIGATPT 1 -GLADFLNKAVGKVVDFVKS 1 -GLFDVVKGVLKGVGKNVAGSLLEQLKCKLSGGC 1 -GLFKVLGSVAKHLLPHVAPIIAEKL 1 -GLFLDTLKGAAKDVAGKLLEGLKCKIAGCKP 1 -GLFLDTLKGLAGKLLQGLKCIKAGCKP 1 -GLFSILRGAAKFASKGLGKDLTKLGVDLVACKISKQC 1 -GLFSKFNKKKIKSGLFKIIKTAGKEAGLEALRTGIDVIGCKIKGEC 1 -GLFSKFNKKKIKSGLIKIIKTAGKEAGLEALRTGIDVIGCKIKGEC 1 -GLFSVLGSVAKHLLPHVAPIIAEKL 1 -GLFSVLGSVAKHLLPHVVPVIAEKL 1 -GLFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC 1 -GLLDFVTGVGKDIFAQLIKQI 1 -GLLDSIKGMAISAGKGALQNLLKVASCKLDKTC 1 -GLLDSLKNLAINAAKGAGQSVLNTLSCKLSKTC 1 -GLLDTIKGVAKTVAASMLDKLKCKISGC 1 -GLLGGLLGPLLGGGGGGGGGLL 1 -GLLGPLLKIAAKVGSNLL 1 -GLLGSIFGAGKKIACALSGLC 1 -GLLGSLFGAGKKVACALSGLC 1 -GLLKRIKTLL 1 -GLLSGLKKVGKHVAKNVAVSLMDSLKCKISGDC 1 -GLLSKVLGVGKKVLCGVSGLC 1 -GLLSVLGSVAKHVLPHVVPVIAEHL 1 -GLMSSIGKALGGLIVDVLKPKTPAS 1 -GLNALKKVFQGIHEAIKLINNHVQ 1 -GLNTLKKVFQGLHEAIKLINNHVQ 1 -GLWNKIKEAASKAAGKAALGFVNEMV 1 -GLWSKIKAAGKEAAKAAAKAAGKAALNAVSEAV 1 -GLWSKIKEAAKTAGLMAMGFVNDMV 1 -GLWSTIKQKGKEAAIAAAKAAGQAALGAL 1 -GPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW 1 -GRLQAFLAKMKEIAAQTL 1 -GRPNPVNNKPTPHPRL 1 -GRPNPVNTKPTPYPRL 1 -GSGRGSCRSQCMRRHEDEPWRVQECVSQCRRRRGGGD 1 -GSKKPVPIIYCNRRTGKCQRM 1 -GVLDILKNAAKNILAHAAEQI 1 -GVVDILKGAGKDLLAHLVGKISEKV 1 -GWKDWAKKAGGWLKKKGPGMAKAALKAAMQ 1 -GWKDWLKKGKEWLKAKGPGIVKAALQAATQ 1 -GWKDWLNKGKEWLKKKGPGIMKAALKAATQ 1 -HGVSGHGQHGVHG 1 -IFGAILPLALGALKNLIK 1 -IIEKLVNTALGLLSGL 1 -IIGHLIKTALGMLGL 1 -ILGTILGLLKGL 1 -ILGTILGLLKSL 1 -ILPLVGNLLNDLL 1 -ILQKAVLDCLKAAGSSLSKAAITAIYNKIT 1 -INWKKIAEIGKQVLSAL 1 -INWKKIAEVGGKILSSL 1 -INWLKLGKAIIDAL 1 -IPRPLDPCIAQNGRCFTGICRYPYFWIGTCRNGKSCCRRR 1 -IWLTALKFLGKHAAKHLAKQQLSKL 1 -KDRPKKPGLCPPRPQKPCVKECKNDDSCPGQQKCCNYGCKDECRDPIFVG 1 -KTCEHLADTYRGVCFTNASCDDHCKNKAHLISGTCHNWKCFCTQNC 1 -KTCENLADTY 1 -KTCENLSGTFKGPCIPDGNCNKHCRNNEHLLSGRCRDDFRCWCTNRC 1 -KTCMTKKEGWGRCLIDTTCAHSCRKYGYMGGKCQGITRRCYCLLNC 1 -KWCFRVCYRGICYRKCR 1 -KWCFRVCYRGICYRRCR 1 -KYYGNGVSCNKKGCSVDWGKAIGIIGNNSAANLATGGAAGWSK 1 -LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFCYFNC 1 -LFCRKGTCHFGGCPAHLVKVGSCFGFRACCKWPWDV 1 -LLGRCKVKSNRFHGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC 1 -LLGRCKVKSNRFNGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC 1 -LLKELWTKIKGAGKAVLGKIKGLL 1 -LLKELWTKMKGAGKAVLGKIKGLL 1 -LLPILGNLLNGLL 1 -LLPNLLKSLL 1 -LMCTHPLDCSN 1 -LNLKGIFKKVASLLT 1 -LPVNEAQCRQVGGYCGLRICNFPSRFLGLCTRNHPCCSRVWV 1 -MSWLNFLKYIAKYGKKAVSAAWKYKGKVLEWLNVGPTLEWVWQKLKKIAGL 1 -QDKCKKVYENYPVSKCQLANQCNYDCKLDKHARSGECFYDEKRNLQCICDYCEY 1 -QGVRNHVTCRIYGGFCVPIRCPGRTRQIGTCFGRPVKCCRRW 1 -QKLCERPSGTWSGVCGNNNACKNQCINLEKARHGSCNYVFPAHKCICYFPC 1 -QQCGRQASGRLCGNRLCCSQWGYCGSTASYCGAGCQSQCRS 1 -QRFIHPTYRPPPQPRRPVIMRA 1 -RQRVEELSKFSKKGAAARRRK 1 -RSGRGECRRQCLRRHEGQPWETQECMRRCRRRG 1 -RSVCRQIKICRRRGGCYYKCTNRPY 1 -SAPRGCWTKSYPPKPCK 1 -SCTTCVCTCSCCTT 1 -SFGLCRLRRGFCARGRCRFPSIPIGRCSRFVQCCRRVW 1 -SGISGPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW 1 -SIVPIRCRSNRDCRRFCGFRGGRCTYARQCLCGY 1 -SKGKKANKDVELARG 1 -SMLSVLKNLGKVGLGFVACKINKQC 1 -TSYGNGVHCNKSKCWIDVSELETYKAGTVSNPKDILW 1 -VDKGSYLPRPTPPRPIYNRN 1 -VDKPDYRPRPRPPNM 1 -VDKPDYRPRPWPRNMI 1 -VDKPDYRPRPWPRPN 1 -VDKPDYRPRPWPRPNM 1 -VLPIIGNLLNSLL 1 -VLPLISMALGKLL 1 -VNPIILGVLPKFVCLITKKC 1 -VRNFVTCRINRGFCVPIRCPGHRRQIGTCLGPQIKCCR 1 -VRNHVTCRINRGFCVPIRCPGRTRQIGTCFGPRIKCCRSW 1 -VTCDLLSFEAKGFAANHSLCAAHCLAIGRRGGSCERGVCICRR 1 -VTCDLLSIKGVAEHSACAANCLSMGKAGGRCENGICLCRKTTFKELWDKRF 1 -VTCFCKRPVCDSGETQIGYCRLGNTFYRLCCRQ 1 -WLGSALKIGAKLLPSVVGLFKKKKQ 1 -WNPFKELERAGQRVRDAIISAGPAVATVGQAAAIARG 1 -WNPFKELERAGQRVRDAVISAAPAVATVGQAAAIARG 1 -YDLSKNCRLRGGICYIGKCPRRFFRSGSCSRGNVCCLRFG 1 -YSKSLPLSVLNP 1 -YVSCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF 1 diff -r 7557b48b2872 -r 9b5e990a0ebb PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py --- a/PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py Wed Oct 28 02:10:12 2020 +0000 +++ b/PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py Tue Dec 29 04:21:55 2020 +0000 @@ -14,17 +14,23 @@ n = 0 m = 0 + + l = [] + + for line in lines[1:]: + l.append(line.split('\t')[1].strip('\n').strip('\r')) + l = list(set(l)) for line in lines: - if '1' in line.split('\t')[1].strip('\n'): + if l[0] in line.split('\t')[1].strip('\n').strip('\r'): n= n+1 - of1.write('>peptide_'+str(n)+'\n') + of1.write('>peptide_'+str(n)+'_'+str(l[0])+'\n') of1.write(line.split('\t')[0]+'\n') - if '0' in line.split('\t')[1].strip('\n'): + if l[1] in line.split('\t')[1].strip('\n').strip('\r'): m= m+1 - of2.write('>peptide_'+str(m)+'\n') + of2.write('>peptide_'+str(m)+'_'+str(l[1])+'\n') of2.write(line.split('\t')[0]+'\n') elif Method == 'NoClassLabel': @@ -47,11 +53,10 @@ parser = argparse.ArgumentParser() parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv") - parser.add_argument("-P", "--Postvs", required=False, default='Positive.fasta', help="Path to target tsv file") - parser.add_argument("-N", "--Negtvs", required=False, default='Negative.fasta', help="Path to target tsv file") + parser.add_argument("-P", "--Postvs", required=False, default='FirstDataFile.fasta', help="Path to target tsv file") + parser.add_argument("-N", "--Negtvs", required=False, default='SecondDataFile.fasta', help="Path to target tsv file") parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file") parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file") args = parser.parse_args() - TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile) - + TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile) \ No newline at end of file diff -r 7557b48b2872 -r 9b5e990a0ebb readme.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/readme.md Tue Dec 29 04:21:55 2020 +0000 @@ -0,0 +1,25 @@ +# PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling. + +### Overview + +Peptide Design and Analysis Under Galaxy (PDAUG) package, a Galaxy based python powered collection of tools, workflows, and datasets for a rapid in-silico peptide library analysis. PDAUG offers tools for peptide library generation, data visualization, in-built and public database based peptide sequence retrieval, peptide feature calculation, and machine learning modeling. PDAUG tool suite can be downloaded and install through galaxy toolshed as a standard galaxy tool. + + +# Prebuild Docker Image + +A prebuild build docker image based on the recent galaxy release can be obtained by the link below for a quick installation. + + - [Docker Image](https://github.com/jaidevjoshi83/docker_pdaug) + +# Contributors + - Jayadev Joshi + + - Daniel Blankenberg + +# History + + - 0.1.0: First release! + +# Support & Bug Reports + +You can file an [github issue](https://github.com/jaidevjoshi83/docker_pdaug/issues).  diff -r 7557b48b2872 -r 9b5e990a0ebb readme.txt