Repository 'pdaug_merge_dataframes'
hg clone https://toolshed.g2.bx.psu.edu/repos/jay/pdaug_merge_dataframes

Changeset 1:fcd5d259427c (2020-12-29)
Previous changeset 0:5bb52d4bf172 (2020-10-28) Next changeset 2:728e04b97852 (2020-12-29)
Commit message:
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit 3c91f421d26c8f42cf2671e47db735d2cf69dde8"
modified:
PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py
PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py
added:
PDAUG_Peptide_Data_Access/test-data/Out.tsv
readme.md
removed:
PDAUG_Peptide_Data_Access/test-data/out.tsv
readme.txt
b
diff -r 5bb52d4bf172 -r fcd5d259427c PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py
--- a/PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py Wed Oct 28 01:54:31 2020 +0000
+++ b/PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py Tue Dec 29 04:05:22 2020 +0000
[
@@ -10,13 +10,13 @@
 
 def DataGen(DataBaseType, OutFile, IDs):
 
-    if DataBaseType == 'AMPvsTM':
+    if DataBaseType == 'AMPvsTMP':
         data = load_AMPvsTM()
 
     elif DataBaseType == 'AMPvsUniProt':
         data = load_AMPvsUniProt()
 
-    elif DataBaseType == 'ACPvsTM':
+    elif DataBaseType == 'ACPvsTMP':
         data = load_ACPvsTM()
 
     elif DataBaseType == 'ACPvsRandom':
@@ -39,16 +39,11 @@
         print ("Enter Correct Values")
         exit()
 
-    Target = data.target.tolist()
-    Target_list = set(Target)
-    df = data.sequences
-
-
-    Target = pd.DataFrame(Target, columns=['Target'])
-    df = pd.DataFrame(df, columns=['Peptide'])
-    
-    df = pd.DataFrame(df)
-    df = pd.concat([df, Target], axis=1)
+    peptide_data = data.sequences
+    class_label = int(len(peptide_data)/2)*[data.target_names[0]]+int(len(peptide_data)/2)*[data.target_names[1]]
+    peptide_data = pd.DataFrame(peptide_data, columns=['name'])
+    class_label = pd.DataFrame(class_label, columns=['class_label'])
+    df = pd.concat([peptide_data,class_label], axis=1)
 
     df.to_csv(OutFile, index=False, sep='\t')
 
@@ -69,9 +64,9 @@
                         help="Out put file name for str descriptors")   
 
     parser.add_argument("-L", "--List",
-     required=False,
-     default=None,
-     help="List of integer as ID")
+                        required=False,
+                        default=None,
+                        help="List of integer as ID")
 
     args = parser.parse_args()
     DataGen(args.DataBaseType, args.OutFile, args.List)
b
diff -r 5bb52d4bf172 -r fcd5d259427c PDAUG_Peptide_Data_Access/test-data/Out.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/PDAUG_Peptide_Data_Access/test-data/Out.tsv Tue Dec 29 04:05:22 2020 +0000
b
b'@@ -0,0 +1,413 @@\n+name\tclass_label\n+AAGAATVLLVIVLLAGSYLAVLA\tTM\n+LWIVIACLACVGSAAALTLRA\tTM\n+FYRFYMLREGTAVPAVWFSIELIFGLFA\tTM\n+GTLELGVDYGRAN\tTM\n+KLFWRAVVAEFLATTLFVFISIGSALGFK\tTM\n+HGSIGAGVDW\tTM\n+ATIYLVCFCFFKQLAMIFMSVLAGNMYE\tTM\n+GKLSLAATRSSE\tTM\n+TSRLLLAGVALGIICSALMTWAIYF\tTM\n+LGFFLVTFGFIWGMMLL\tTM\n+YMLFTMIFVISSIIITVVVI\tTM\n+ACFSAKVN\tTM\n+LIWVAATLAGAIIAVLLVIYA\tTM\n+HIPFAFAFAILAYLTLVLFRPVM\tTM\n+CGLLVLLTLLLMGAIVTLGVF\tTM\n+TGVYILVGVVLWTA\tTM\n+LSMFIITPVMVLGTIFIFVMG\tTM\n+MTLVALLVIGVSLTGYLGLKA\tTM\n+QLYYQVLNFGMIVSSALMIW\tTM\n+ALGALCLLLSVGSATACLLLGA\tTM\n+LAVIFFFASALLYSQAAT\tTM\n+MGHQLLVQLESIAITIVWSGVVAFIGYKLADLT\tTM\n+ADILILSLLVIQCALGLLTIPFSA\tTM\n+RHQQATFAGFIKGATWVSILSIAVLVFLALAN\tTM\n+GTKWWTVGIRPMYKW\tTM\n+PAIRVFATYAKWDEK\tTM\n+PHVFLLFITFPILFIGWGSQS\tTM\n+AGLVLWGAIIFVGWNALLLLFFW\tTM\n+TQIVLLGLVTAALWAGLLTLLLLWHW\tTM\n+SFIGRVFLFLMIVLPLWCGLHRMHHAMHD\tTM\n+MYLGAGIALIPVIMSINYL\tTM\n+FQTGFDFSD\tTM\n+LISGTLGIICLSLMATLGILL\tTM\n+FPFLNNLSFWFTVVGVILVNVSLG\tTM\n+LDIYTRLGGMVWR\tTM\n+FLIFLLVIMTVITVALLTLLF\tTM\n+KDIGILYLFTAGIVGLISVCFTVYMRMELQH\tTM\n+PLFYIINILVPCVLISFMINLVFYL\tTM\n+HKALKTLGIIMGVFTLCWLPFFLVNIVNVF\tTM\n+ANLKKLKTLMSAFLIVLGLLTFGA\tTM\n+IWVGIFLLAALLAALFVCLKA\tTM\n+KGAAGITGAGFITLAATLS\tTM\n+VVLLAIVTLISVVQNGFFAHKV\tTM\n+IAIDVLAVLVPLAAFLLDGSPDWSLYCAVWLLKPLRDS\tTM\n+ALTVGTLLFLTGIGAASWAIV\tTM\n+ITLIIFGVMAGVIGTILLISYGI\tTM\n+WVWISLYYVAFYVVMSGIFAL\tTM\n+GSNVALHVN\tTM\n+GVLELGVDYGRAN\tTM\n+IYLLILIILSMLCLVYASVPL\tTM\n+LCSFLFVIVLFVATFYTL\tTM\n+PLYFIVNVVIPCLLFSFLTGLVFYL\tTM\n+ALLIAGGVGLLALAAALVLNA\tTM\n+IWTWLRTTL\tTM\n+PAHMIAISFFFTNALALALHGALVLSAA\tTM\n+GGSLYIVGIFLPIWTFMIYIF\tTM\n+GLTLGTGGR\tTM\n+STVTGGYAQS\tTM\n+DCFLLLVLLLYAPVGFCLLVL\tTM\n+FWRAVVAEFLAMILFIFISIGSALG\tTM\n+DPVLYQHILWFFGHPEVYIIILPGFGIISHVISTFAK\tTM\n+LTFTEKWN\tTM\n+IRDTLMRLVLTVRSN\tTM\n+GKLSLAATRSTE\tTM\n+GYNKFVVQYATDA\tTM\n+QFHTMLMIAASGAVLIALGILCLVIQMYVSIR\tTM\n+LILVLFVVLVSSVGVSLTLYA\tTM\n+MASLWADYT\tTM\n+DAWSGFVRGYGYDNRTN\tTM\n+RWLWDFVNA\tTM\n+MFVALLGLGLGQVVCSVALFF\tTM\n+NMSTYVDYII\tTM\n+IWVKMTFIVVLGLCFVFFWSF\tTM\n+SLLISVVLVAYYLYIPLPDAI\tTM\n+RKTTINGLIVAVILGVCFTGLQAYEYSHA\tTM\n+IPVQLLWVNLVTDGLPATALG\tTM\n+LAVIAGGVGFIMVAVLLVLFA\tTM\n+MNKRNIMNLILAGGAGLPITTLALGYGAFF\tTM\n+IALIVAGLSALGIATALVLNA\tTM\n+NGNMWRILDHGAISL\tTM\n+VAIICAAVVAVGLIVGLSVGL\tTM\n+FSFLIVAGATTLFCLLHFGVI\tTM\n+VGISIATIVAIIAAIYYVPW\tTM\n+PLFAWSVFITAWLILLSLPVLAGAITMLLMDRNF\tTM\n+LILIGAGLGVLALAAGLILTA\tTM\n+LGLAAGAIYYYNTSNVFA\tTM\n+QLGAGAFGGYQV\tTM\n+IAIALLVILVVCSLITMSVIL\tTM\n+LAFLIGGIIGGLLLLIGVSCCLW\tTM\n+NGHMLRILDHGAISM\tTM\n+MALILGIDRFMSECRALTNF\tTM\n+AVIAGTTLAITALAVTS\tTM\n+LYIVLAILCGVSIAVALALTA\tTM\n+AAVTLGVLCLGLLVTVILLILQL\tTM\n+LWLVIGVLTAAALAVTLIALA\tTM\n+GTLLLLTAIGAASWAIVAVLL\tTM\n+LLLVASVIQGLGLLLCFTYICLHFSAL\tTM\n+AMIVALIVICITAVVAALV\tTM\n+LVFIGTCGAVLAVALGLVLWA\tTM\n+TRFGIAAKYQ\tTM\n+IPWAVLIVVLITSLIIALIAL\tTM\n+LWVVCAVLAGLGLTTALVLYA\tTM\n+FAGRVLAGAVMMSGIGIFGLWAGIL\tTM\n+LLIVLAGLAVVAVASGLILNA\tTM\n+VGSIVGGIYLGFCFNAGAPAVEAFI\tTM\n+MLSLGVSYR\tTM\n+LTKWFFCCVCTILTMPFF\tTM\n+LATIAASAIVLVVAVGLGLMA\tTM\n+PIVVTGAVY\tTM\n+KPHNLPMVFTGTAILYIGWFGFNAGSA\tTM\n+DEFGEQLSKVISLICVAVWL\tTM\n+HTLLTGVDF\tTM\n+SVELIALLAISCTFFLFMHT\tTM\n+LLIALLIYWTLAFITKTIKF\tTM\n+FKLVIFVVLGIAIASGLMLYA\tTM\n+CTLSISVLLAQTIFLFLIA\tTM\n+FVIAGGTLAIPILAFVASFLL\tTM\n+LLVITAIVLILSAAVGLVMYA\tTM\n+LALATALIGGVAAIASLLLYA\tTM\n+NPVIVIINLITLAAALLHTKTWFEL\tTM\n+FIVVAGVVILAVTIALLVYFL\tTM\n+IGLMCFLSIIITTVCIIMIAT\tTM\n+FSVDTQLQS\tTM\n+EVYILLNWIGYVNSGFNPLIYCRS\tTM\n+LTLAVALIVGVSAIASLLLYA\tTM\n+LYLAVVVLIGIGLTTTLVLYA\tTM\n+MSTAISVLLAQAVFLLLT\tTM\n+ALPGLMNKMEKAGCKRSVV\tTM\n+KQFIRYLISSNVGEVVCIFL\tTM\n+KYVVSSLVLVYGLIKVLTWIF\tTM\n+TGVSPVFAGGVEYA\tTM\n+IVIVLGILCFLLLLTVAVLVI\tTM\n+KSLGILGILLGVAALCTIVALSVV\tTM\n+LCLFVVTPVMVVGTAWIFL\tTM\n+LYLAIVVLIGVALTATLMLYA\tTM\n+LTILLAIAPVLALAVGLALYG\tTM\n+FFVLLLMILILVNLAMTIWIL\tTM\n+FIVLIPSVVITVIFLFFWLFM\tTM\n+ILVLLILAVITIFALVCVLLV\tTM\n+TYFIVLIPSVVITVIFLFFWLFM\tTM\n+FLVLFIFLTSFFLNYSHTMVA\tTM\n+GDKIGMFFQAMATFFGGFIIGF\tTM\n+SNGVIVGTCLAFVAGMIGMAYAA\tTM\n+FTFEGAARSDD\tTM\n+MNYMVYFNFFACVLVPLLLMLGVYL\tTM\n+AGLILLVVTLIGMSVLVRVLI\tTM\n+VFLAVYLLGGITFLPLVLFTL\tTM\n+LIKISALVFVTVAFFYLG\tTM\n+GFFGVATFFFAALGIILIAWSAVL\tTM\n+GQWEISVIWGLGVAMAIYLTA\tTM\n+LWWIQAMTGFAMFFLGSVHLYIMMT\tTM\n+IAVVITVVFLTLLSVVILIFF\tTM\n+QVVATATFR\tTM\n+MLLCFAFLWVLGIAYYMY\tTM\n+IFCIIMLFALLGFL\tTM\n+PLCICVAFTCLALVLVTSIVL\tTM\n+LIIVLAIVVGVGAAVGLALSA\tTM\n+ILVPCVLGLLLL'..b'ILSLFTGGIKALGKTLFKMAGKAGAEHLACKATNQC\tAMP\n+GILSSIKGVAKGVAKNVAAQLLDTLKCKITGC\tAMP\n+GIMDSVKGLAKNLAGKLLDSLKCKITGC\tAMP\n+GIMDTIKDTAKTVAVGLLNKLKCKITGC\tAMP\n+GINTLKKVIQGLHEVIKLVSNHA\tAMP\n+GINTLKKVIQGLHEVIKLVSNHE\tAMP\n+GIPCGESCVWIPCISAALGCSCKNKVCYRN\tAMP\n+GKLQAFLAKMKEIAAQTL\tAMP\n+GKVWDWIKSAAKKIWSSEPVSQLKGQVLNAAKNYVAEKIGATPT\tAMP\n+GLADFLNKAVGKVVDFVKS\tAMP\n+GLFDVVKGVLKGVGKNVAGSLLEQLKCKLSGGC\tAMP\n+GLFKVLGSVAKHLLPHVAPIIAEKL\tAMP\n+GLFLDTLKGAAKDVAGKLLEGLKCKIAGCKP\tAMP\n+GLFLDTLKGLAGKLLQGLKCIKAGCKP\tAMP\n+GLFSILRGAAKFASKGLGKDLTKLGVDLVACKISKQC\tAMP\n+GLFSKFNKKKIKSGLFKIIKTAGKEAGLEALRTGIDVIGCKIKGEC\tAMP\n+GLFSKFNKKKIKSGLIKIIKTAGKEAGLEALRTGIDVIGCKIKGEC\tAMP\n+GLFSVLGSVAKHLLPHVAPIIAEKL\tAMP\n+GLFSVLGSVAKHLLPHVVPVIAEKL\tAMP\n+GLFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC\tAMP\n+GLLDFVTGVGKDIFAQLIKQI\tAMP\n+GLLDSIKGMAISAGKGALQNLLKVASCKLDKTC\tAMP\n+GLLDSLKNLAINAAKGAGQSVLNTLSCKLSKTC\tAMP\n+GLLDTIKGVAKTVAASMLDKLKCKISGC\tAMP\n+GLLGGLLGPLLGGGGGGGGGLL\tAMP\n+GLLGPLLKIAAKVGSNLL\tAMP\n+GLLGSIFGAGKKIACALSGLC\tAMP\n+GLLGSLFGAGKKVACALSGLC\tAMP\n+GLLKRIKTLL\tAMP\n+GLLSGLKKVGKHVAKNVAVSLMDSLKCKISGDC\tAMP\n+GLLSKVLGVGKKVLCGVSGLC\tAMP\n+GLLSVLGSVAKHVLPHVVPVIAEHL\tAMP\n+GLMSSIGKALGGLIVDVLKPKTPAS\tAMP\n+GLNALKKVFQGIHEAIKLINNHVQ\tAMP\n+GLNTLKKVFQGLHEAIKLINNHVQ\tAMP\n+GLWNKIKEAASKAAGKAALGFVNEMV\tAMP\n+GLWSKIKAAGKEAAKAAAKAAGKAALNAVSEAV\tAMP\n+GLWSKIKEAAKTAGLMAMGFVNDMV\tAMP\n+GLWSTIKQKGKEAAIAAAKAAGQAALGAL\tAMP\n+GPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW\tAMP\n+GRLQAFLAKMKEIAAQTL\tAMP\n+GRPNPVNNKPTPHPRL\tAMP\n+GRPNPVNTKPTPYPRL\tAMP\n+GSGRGSCRSQCMRRHEDEPWRVQECVSQCRRRRGGGD\tAMP\n+GSKKPVPIIYCNRRTGKCQRM\tAMP\n+GVLDILKNAAKNILAHAAEQI\tAMP\n+GVVDILKGAGKDLLAHLVGKISEKV\tAMP\n+GWKDWAKKAGGWLKKKGPGMAKAALKAAMQ\tAMP\n+GWKDWLKKGKEWLKAKGPGIVKAALQAATQ\tAMP\n+GWKDWLNKGKEWLKKKGPGIMKAALKAATQ\tAMP\n+HGVSGHGQHGVHG\tAMP\n+IFGAILPLALGALKNLIK\tAMP\n+IIEKLVNTALGLLSGL\tAMP\n+IIGHLIKTALGMLGL\tAMP\n+ILGTILGLLKGL\tAMP\n+ILGTILGLLKSL\tAMP\n+ILPLVGNLLNDLL\tAMP\n+ILQKAVLDCLKAAGSSLSKAAITAIYNKIT\tAMP\n+INWKKIAEIGKQVLSAL\tAMP\n+INWKKIAEVGGKILSSL\tAMP\n+INWLKLGKAIIDAL\tAMP\n+IPRPLDPCIAQNGRCFTGICRYPYFWIGTCRNGKSCCRRR\tAMP\n+IWLTALKFLGKHAAKHLAKQQLSKL\tAMP\n+KDRPKKPGLCPPRPQKPCVKECKNDDSCPGQQKCCNYGCKDECRDPIFVG\tAMP\n+KTCEHLADTYRGVCFTNASCDDHCKNKAHLISGTCHNWKCFCTQNC\tAMP\n+KTCENLADTY\tAMP\n+KTCENLSGTFKGPCIPDGNCNKHCRNNEHLLSGRCRDDFRCWCTNRC\tAMP\n+KTCMTKKEGWGRCLIDTTCAHSCRKYGYMGGKCQGITRRCYCLLNC\tAMP\n+KWCFRVCYRGICYRKCR\tAMP\n+KWCFRVCYRGICYRRCR\tAMP\n+KYYGNGVSCNKKGCSVDWGKAIGIIGNNSAANLATGGAAGWSK\tAMP\n+LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFCYFNC\tAMP\n+LFCRKGTCHFGGCPAHLVKVGSCFGFRACCKWPWDV\tAMP\n+LLGRCKVKSNRFHGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC\tAMP\n+LLGRCKVKSNRFNGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC\tAMP\n+LLKELWTKIKGAGKAVLGKIKGLL\tAMP\n+LLKELWTKMKGAGKAVLGKIKGLL\tAMP\n+LLPILGNLLNGLL\tAMP\n+LLPNLLKSLL\tAMP\n+LMCTHPLDCSN\tAMP\n+LNLKGIFKKVASLLT\tAMP\n+LPVNEAQCRQVGGYCGLRICNFPSRFLGLCTRNHPCCSRVWV\tAMP\n+MSWLNFLKYIAKYGKKAVSAAWKYKGKVLEWLNVGPTLEWVWQKLKKIAGL\tAMP\n+QDKCKKVYENYPVSKCQLANQCNYDCKLDKHARSGECFYDEKRNLQCICDYCEY\tAMP\n+QGVRNHVTCRIYGGFCVPIRCPGRTRQIGTCFGRPVKCCRRW\tAMP\n+QKLCERPSGTWSGVCGNNNACKNQCINLEKARHGSCNYVFPAHKCICYFPC\tAMP\n+QQCGRQASGRLCGNRLCCSQWGYCGSTASYCGAGCQSQCRS\tAMP\n+QRFIHPTYRPPPQPRRPVIMRA\tAMP\n+RQRVEELSKFSKKGAAARRRK\tAMP\n+RSGRGECRRQCLRRHEGQPWETQECMRRCRRRG\tAMP\n+RSVCRQIKICRRRGGCYYKCTNRPY\tAMP\n+SAPRGCWTKSYPPKPCK\tAMP\n+SCTTCVCTCSCCTT\tAMP\n+SFGLCRLRRGFCARGRCRFPSIPIGRCSRFVQCCRRVW\tAMP\n+SGISGPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW\tAMP\n+SIVPIRCRSNRDCRRFCGFRGGRCTYARQCLCGY\tAMP\n+SKGKKANKDVELARG\tAMP\n+SMLSVLKNLGKVGLGFVACKINKQC\tAMP\n+TSYGNGVHCNKSKCWIDVSELETYKAGTVSNPKDILW\tAMP\n+VDKGSYLPRPTPPRPIYNRN\tAMP\n+VDKPDYRPRPRPPNM\tAMP\n+VDKPDYRPRPWPRNMI\tAMP\n+VDKPDYRPRPWPRPN\tAMP\n+VDKPDYRPRPWPRPNM\tAMP\n+VLPIIGNLLNSLL\tAMP\n+VLPLISMALGKLL\tAMP\n+VNPIILGVLPKFVCLITKKC\tAMP\n+VRNFVTCRINRGFCVPIRCPGHRRQIGTCLGPQIKCCR\tAMP\n+VRNHVTCRINRGFCVPIRCPGRTRQIGTCFGPRIKCCRSW\tAMP\n+VTCDLLSFEAKGFAANHSLCAAHCLAIGRRGGSCERGVCICRR\tAMP\n+VTCDLLSIKGVAEHSACAANCLSMGKAGGRCENGICLCRKTTFKELWDKRF\tAMP\n+VTCFCKRPVCDSGETQIGYCRLGNTFYRLCCRQ\tAMP\n+WLGSALKIGAKLLPSVVGLFKKKKQ\tAMP\n+WNPFKELERAGQRVRDAIISAGPAVATVGQAAAIARG\tAMP\n+WNPFKELERAGQRVRDAVISAAPAVATVGQAAAIARG\tAMP\n+YDLSKNCRLRGGICYIGKCPRRFFRSGSCSRGNVCCLRFG\tAMP\n+YSKSLPLSVLNP\tAMP\n+YVSCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF\tAMP\n'
b
diff -r 5bb52d4bf172 -r fcd5d259427c PDAUG_Peptide_Data_Access/test-data/out.tsv
--- a/PDAUG_Peptide_Data_Access/test-data/out.tsv Wed Oct 28 01:54:31 2020 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
b'@@ -1,413 +0,0 @@\n-Peptide\tTarget\n-AAGAATVLLVIVLLAGSYLAVLA\t0\n-LWIVIACLACVGSAAALTLRA\t0\n-FYRFYMLREGTAVPAVWFSIELIFGLFA\t0\n-GTLELGVDYGRAN\t0\n-KLFWRAVVAEFLATTLFVFISIGSALGFK\t0\n-HGSIGAGVDW\t0\n-ATIYLVCFCFFKQLAMIFMSVLAGNMYE\t0\n-GKLSLAATRSSE\t0\n-TSRLLLAGVALGIICSALMTWAIYF\t0\n-LGFFLVTFGFIWGMMLL\t0\n-YMLFTMIFVISSIIITVVVI\t0\n-ACFSAKVN\t0\n-LIWVAATLAGAIIAVLLVIYA\t0\n-HIPFAFAFAILAYLTLVLFRPVM\t0\n-CGLLVLLTLLLMGAIVTLGVF\t0\n-TGVYILVGVVLWTA\t0\n-LSMFIITPVMVLGTIFIFVMG\t0\n-MTLVALLVIGVSLTGYLGLKA\t0\n-QLYYQVLNFGMIVSSALMIW\t0\n-ALGALCLLLSVGSATACLLLGA\t0\n-LAVIFFFASALLYSQAAT\t0\n-MGHQLLVQLESIAITIVWSGVVAFIGYKLADLT\t0\n-ADILILSLLVIQCALGLLTIPFSA\t0\n-RHQQATFAGFIKGATWVSILSIAVLVFLALAN\t0\n-GTKWWTVGIRPMYKW\t0\n-PAIRVFATYAKWDEK\t0\n-PHVFLLFITFPILFIGWGSQS\t0\n-AGLVLWGAIIFVGWNALLLLFFW\t0\n-TQIVLLGLVTAALWAGLLTLLLLWHW\t0\n-SFIGRVFLFLMIVLPLWCGLHRMHHAMHD\t0\n-MYLGAGIALIPVIMSINYL\t0\n-FQTGFDFSD\t0\n-LISGTLGIICLSLMATLGILL\t0\n-FPFLNNLSFWFTVVGVILVNVSLG\t0\n-LDIYTRLGGMVWR\t0\n-FLIFLLVIMTVITVALLTLLF\t0\n-KDIGILYLFTAGIVGLISVCFTVYMRMELQH\t0\n-PLFYIINILVPCVLISFMINLVFYL\t0\n-HKALKTLGIIMGVFTLCWLPFFLVNIVNVF\t0\n-ANLKKLKTLMSAFLIVLGLLTFGA\t0\n-IWVGIFLLAALLAALFVCLKA\t0\n-KGAAGITGAGFITLAATLS\t0\n-VVLLAIVTLISVVQNGFFAHKV\t0\n-IAIDVLAVLVPLAAFLLDGSPDWSLYCAVWLLKPLRDS\t0\n-ALTVGTLLFLTGIGAASWAIV\t0\n-ITLIIFGVMAGVIGTILLISYGI\t0\n-WVWISLYYVAFYVVMSGIFAL\t0\n-GSNVALHVN\t0\n-GVLELGVDYGRAN\t0\n-IYLLILIILSMLCLVYASVPL\t0\n-LCSFLFVIVLFVATFYTL\t0\n-PLYFIVNVVIPCLLFSFLTGLVFYL\t0\n-ALLIAGGVGLLALAAALVLNA\t0\n-IWTWLRTTL\t0\n-PAHMIAISFFFTNALALALHGALVLSAA\t0\n-GGSLYIVGIFLPIWTFMIYIF\t0\n-GLTLGTGGR\t0\n-STVTGGYAQS\t0\n-DCFLLLVLLLYAPVGFCLLVL\t0\n-FWRAVVAEFLAMILFIFISIGSALG\t0\n-DPVLYQHILWFFGHPEVYIIILPGFGIISHVISTFAK\t0\n-LTFTEKWN\t0\n-IRDTLMRLVLTVRSN\t0\n-GKLSLAATRSTE\t0\n-GYNKFVVQYATDA\t0\n-QFHTMLMIAASGAVLIALGILCLVIQMYVSIR\t0\n-LILVLFVVLVSSVGVSLTLYA\t0\n-MASLWADYT\t0\n-DAWSGFVRGYGYDNRTN\t0\n-RWLWDFVNA\t0\n-MFVALLGLGLGQVVCSVALFF\t0\n-NMSTYVDYII\t0\n-IWVKMTFIVVLGLCFVFFWSF\t0\n-SLLISVVLVAYYLYIPLPDAI\t0\n-RKTTINGLIVAVILGVCFTGLQAYEYSHA\t0\n-IPVQLLWVNLVTDGLPATALG\t0\n-LAVIAGGVGFIMVAVLLVLFA\t0\n-MNKRNIMNLILAGGAGLPITTLALGYGAFF\t0\n-IALIVAGLSALGIATALVLNA\t0\n-NGNMWRILDHGAISL\t0\n-VAIICAAVVAVGLIVGLSVGL\t0\n-FSFLIVAGATTLFCLLHFGVI\t0\n-VGISIATIVAIIAAIYYVPW\t0\n-PLFAWSVFITAWLILLSLPVLAGAITMLLMDRNF\t0\n-LILIGAGLGVLALAAGLILTA\t0\n-LGLAAGAIYYYNTSNVFA\t0\n-QLGAGAFGGYQV\t0\n-IAIALLVILVVCSLITMSVIL\t0\n-LAFLIGGIIGGLLLLIGVSCCLW\t0\n-NGHMLRILDHGAISM\t0\n-MALILGIDRFMSECRALTNF\t0\n-AVIAGTTLAITALAVTS\t0\n-LYIVLAILCGVSIAVALALTA\t0\n-AAVTLGVLCLGLLVTVILLILQL\t0\n-LWLVIGVLTAAALAVTLIALA\t0\n-GTLLLLTAIGAASWAIVAVLL\t0\n-LLLVASVIQGLGLLLCFTYICLHFSAL\t0\n-AMIVALIVICITAVVAALV\t0\n-LVFIGTCGAVLAVALGLVLWA\t0\n-TRFGIAAKYQ\t0\n-IPWAVLIVVLITSLIIALIAL\t0\n-LWVVCAVLAGLGLTTALVLYA\t0\n-FAGRVLAGAVMMSGIGIFGLWAGIL\t0\n-LLIVLAGLAVVAVASGLILNA\t0\n-VGSIVGGIYLGFCFNAGAPAVEAFI\t0\n-MLSLGVSYR\t0\n-LTKWFFCCVCTILTMPFF\t0\n-LATIAASAIVLVVAVGLGLMA\t0\n-PIVVTGAVY\t0\n-KPHNLPMVFTGTAILYIGWFGFNAGSA\t0\n-DEFGEQLSKVISLICVAVWL\t0\n-HTLLTGVDF\t0\n-SVELIALLAISCTFFLFMHT\t0\n-LLIALLIYWTLAFITKTIKF\t0\n-FKLVIFVVLGIAIASGLMLYA\t0\n-CTLSISVLLAQTIFLFLIA\t0\n-FVIAGGTLAIPILAFVASFLL\t0\n-LLVITAIVLILSAAVGLVMYA\t0\n-LALATALIGGVAAIASLLLYA\t0\n-NPVIVIINLITLAAALLHTKTWFEL\t0\n-FIVVAGVVILAVTIALLVYFL\t0\n-IGLMCFLSIIITTVCIIMIAT\t0\n-FSVDTQLQS\t0\n-EVYILLNWIGYVNSGFNPLIYCRS\t0\n-LTLAVALIVGVSAIASLLLYA\t0\n-LYLAVVVLIGIGLTTTLVLYA\t0\n-MSTAISVLLAQAVFLLLT\t0\n-ALPGLMNKMEKAGCKRSVV\t0\n-KQFIRYLISSNVGEVVCIFL\t0\n-KYVVSSLVLVYGLIKVLTWIF\t0\n-TGVSPVFAGGVEYA\t0\n-IVIVLGILCFLLLLTVAVLVI\t0\n-KSLGILGILLGVAALCTIVALSVV\t0\n-LCLFVVTPVMVVGTAWIFL\t0\n-LYLAIVVLIGVALTATLMLYA\t0\n-LTILLAIAPVLALAVGLALYG\t0\n-FFVLLLMILILVNLAMTIWIL\t0\n-FIVLIPSVVITVIFLFFWLFM\t0\n-ILVLLILAVITIFALVCVLLV\t0\n-TYFIVLIPSVVITVIFLFFWLFM\t0\n-FLVLFIFLTSFFLNYSHTMVA\t0\n-GDKIGMFFQAMATFFGGFIIGF\t0\n-SNGVIVGTCLAFVAGMIGMAYAA\t0\n-FTFEGAARSDD\t0\n-MNYMVYFNFFACVLVPLLLMLGVYL\t0\n-AGLILLVVTLIGMSVLVRVLI\t0\n-VFLAVYLLGGITFLPLVLFTL\t0\n-LIKISALVFVTVAFFYLG\t0\n-GFFGVATFFFAALGIILIAWSAVL\t0\n-GQWEISVIWGLGVAMAIYLTA\t0\n-LWWIQAMTGFAMFFLGSVHLYIMMT\t0\n-IAVVITVVFLTLLSVVILIFF\t0\n-QVVATATFR\t0\n-MLLCFAFLWVLGIAYYMY\t0\n-IFCIIMLFALLGFL\t0\n-PLCICVAFTCLALVLVTSIVL\t0\n-LIIVLAIVVGVGAAVGLALSA\t0\n-ILVPCVLGLLLLPILAMLMALCV\t0\n-LGLLLAALICVGIATTLVLNA\t0\n-GYAAYYLVRKNFALAMPYLVE\t0\n-LPRTLAVLLVGAALAISGAVMQALF\t0\n-ILLFYVIFYGCLAGIFIGTIQ\t0\n-IMSTLLEVGYDNVKSQ\t0\n-LLAVALIIAMSISLAWQAAG'..b'GCKIKGEC\t1\n-GIFSKLAGKKLKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC\t1\n-GIFSKLGRKKIKNLLISGLKNVGKEVGMDVVRTGIDIAGCKIKGEC\t1\n-GIFSLIKGAAQLIGKTVAKEAGKTGLELMACKVTKQC\t1\n-GIFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC\t1\n-GILDAIKAIAKAAG\t1\n-GILDFAKTVVGGIRNALGI\t1\n-GILSLFTGGIKALGKTLFKMAGKAGAEHLACKATNQC\t1\n-GILSSIKGVAKGVAKNVAAQLLDTLKCKITGC\t1\n-GIMDSVKGLAKNLAGKLLDSLKCKITGC\t1\n-GIMDTIKDTAKTVAVGLLNKLKCKITGC\t1\n-GINTLKKVIQGLHEVIKLVSNHA\t1\n-GINTLKKVIQGLHEVIKLVSNHE\t1\n-GIPCGESCVWIPCISAALGCSCKNKVCYRN\t1\n-GKLQAFLAKMKEIAAQTL\t1\n-GKVWDWIKSAAKKIWSSEPVSQLKGQVLNAAKNYVAEKIGATPT\t1\n-GLADFLNKAVGKVVDFVKS\t1\n-GLFDVVKGVLKGVGKNVAGSLLEQLKCKLSGGC\t1\n-GLFKVLGSVAKHLLPHVAPIIAEKL\t1\n-GLFLDTLKGAAKDVAGKLLEGLKCKIAGCKP\t1\n-GLFLDTLKGLAGKLLQGLKCIKAGCKP\t1\n-GLFSILRGAAKFASKGLGKDLTKLGVDLVACKISKQC\t1\n-GLFSKFNKKKIKSGLFKIIKTAGKEAGLEALRTGIDVIGCKIKGEC\t1\n-GLFSKFNKKKIKSGLIKIIKTAGKEAGLEALRTGIDVIGCKIKGEC\t1\n-GLFSVLGSVAKHLLPHVAPIIAEKL\t1\n-GLFSVLGSVAKHLLPHVVPVIAEKL\t1\n-GLFTLIKGAAKLIGKTVAKEAGKTGLELMACKITNQC\t1\n-GLLDFVTGVGKDIFAQLIKQI\t1\n-GLLDSIKGMAISAGKGALQNLLKVASCKLDKTC\t1\n-GLLDSLKNLAINAAKGAGQSVLNTLSCKLSKTC\t1\n-GLLDTIKGVAKTVAASMLDKLKCKISGC\t1\n-GLLGGLLGPLLGGGGGGGGGLL\t1\n-GLLGPLLKIAAKVGSNLL\t1\n-GLLGSIFGAGKKIACALSGLC\t1\n-GLLGSLFGAGKKVACALSGLC\t1\n-GLLKRIKTLL\t1\n-GLLSGLKKVGKHVAKNVAVSLMDSLKCKISGDC\t1\n-GLLSKVLGVGKKVLCGVSGLC\t1\n-GLLSVLGSVAKHVLPHVVPVIAEHL\t1\n-GLMSSIGKALGGLIVDVLKPKTPAS\t1\n-GLNALKKVFQGIHEAIKLINNHVQ\t1\n-GLNTLKKVFQGLHEAIKLINNHVQ\t1\n-GLWNKIKEAASKAAGKAALGFVNEMV\t1\n-GLWSKIKAAGKEAAKAAAKAAGKAALNAVSEAV\t1\n-GLWSKIKEAAKTAGLMAMGFVNDMV\t1\n-GLWSTIKQKGKEAAIAAAKAAGQAALGAL\t1\n-GPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW\t1\n-GRLQAFLAKMKEIAAQTL\t1\n-GRPNPVNNKPTPHPRL\t1\n-GRPNPVNTKPTPYPRL\t1\n-GSGRGSCRSQCMRRHEDEPWRVQECVSQCRRRRGGGD\t1\n-GSKKPVPIIYCNRRTGKCQRM\t1\n-GVLDILKNAAKNILAHAAEQI\t1\n-GVVDILKGAGKDLLAHLVGKISEKV\t1\n-GWKDWAKKAGGWLKKKGPGMAKAALKAAMQ\t1\n-GWKDWLKKGKEWLKAKGPGIVKAALQAATQ\t1\n-GWKDWLNKGKEWLKKKGPGIMKAALKAATQ\t1\n-HGVSGHGQHGVHG\t1\n-IFGAILPLALGALKNLIK\t1\n-IIEKLVNTALGLLSGL\t1\n-IIGHLIKTALGMLGL\t1\n-ILGTILGLLKGL\t1\n-ILGTILGLLKSL\t1\n-ILPLVGNLLNDLL\t1\n-ILQKAVLDCLKAAGSSLSKAAITAIYNKIT\t1\n-INWKKIAEIGKQVLSAL\t1\n-INWKKIAEVGGKILSSL\t1\n-INWLKLGKAIIDAL\t1\n-IPRPLDPCIAQNGRCFTGICRYPYFWIGTCRNGKSCCRRR\t1\n-IWLTALKFLGKHAAKHLAKQQLSKL\t1\n-KDRPKKPGLCPPRPQKPCVKECKNDDSCPGQQKCCNYGCKDECRDPIFVG\t1\n-KTCEHLADTYRGVCFTNASCDDHCKNKAHLISGTCHNWKCFCTQNC\t1\n-KTCENLADTY\t1\n-KTCENLSGTFKGPCIPDGNCNKHCRNNEHLLSGRCRDDFRCWCTNRC\t1\n-KTCMTKKEGWGRCLIDTTCAHSCRKYGYMGGKCQGITRRCYCLLNC\t1\n-KWCFRVCYRGICYRKCR\t1\n-KWCFRVCYRGICYRRCR\t1\n-KYYGNGVSCNKKGCSVDWGKAIGIIGNNSAANLATGGAAGWSK\t1\n-LCNERPSQTWSGNCGNTAHCDKQCQDWEKASHGACHKRENHWKCFCYFNC\t1\n-LFCRKGTCHFGGCPAHLVKVGSCFGFRACCKWPWDV\t1\n-LLGRCKVKSNRFHGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC\t1\n-LLGRCKVKSNRFNGPCLTDTHCSTVCRGEGYKGGDCHGLRRRCMCLC\t1\n-LLKELWTKIKGAGKAVLGKIKGLL\t1\n-LLKELWTKMKGAGKAVLGKIKGLL\t1\n-LLPILGNLLNGLL\t1\n-LLPNLLKSLL\t1\n-LMCTHPLDCSN\t1\n-LNLKGIFKKVASLLT\t1\n-LPVNEAQCRQVGGYCGLRICNFPSRFLGLCTRNHPCCSRVWV\t1\n-MSWLNFLKYIAKYGKKAVSAAWKYKGKVLEWLNVGPTLEWVWQKLKKIAGL\t1\n-QDKCKKVYENYPVSKCQLANQCNYDCKLDKHARSGECFYDEKRNLQCICDYCEY\t1\n-QGVRNHVTCRIYGGFCVPIRCPGRTRQIGTCFGRPVKCCRRW\t1\n-QKLCERPSGTWSGVCGNNNACKNQCINLEKARHGSCNYVFPAHKCICYFPC\t1\n-QQCGRQASGRLCGNRLCCSQWGYCGSTASYCGAGCQSQCRS\t1\n-QRFIHPTYRPPPQPRRPVIMRA\t1\n-RQRVEELSKFSKKGAAARRRK\t1\n-RSGRGECRRQCLRRHEGQPWETQECMRRCRRRG\t1\n-RSVCRQIKICRRRGGCYYKCTNRPY\t1\n-SAPRGCWTKSYPPKPCK\t1\n-SCTTCVCTCSCCTT\t1\n-SFGLCRLRRGFCARGRCRFPSIPIGRCSRFVQCCRRVW\t1\n-SGISGPLSCGRNGGVCIPIRCPVPMRQIGTCFGRPVKCCRSW\t1\n-SIVPIRCRSNRDCRRFCGFRGGRCTYARQCLCGY\t1\n-SKGKKANKDVELARG\t1\n-SMLSVLKNLGKVGLGFVACKINKQC\t1\n-TSYGNGVHCNKSKCWIDVSELETYKAGTVSNPKDILW\t1\n-VDKGSYLPRPTPPRPIYNRN\t1\n-VDKPDYRPRPRPPNM\t1\n-VDKPDYRPRPWPRNMI\t1\n-VDKPDYRPRPWPRPN\t1\n-VDKPDYRPRPWPRPNM\t1\n-VLPIIGNLLNSLL\t1\n-VLPLISMALGKLL\t1\n-VNPIILGVLPKFVCLITKKC\t1\n-VRNFVTCRINRGFCVPIRCPGHRRQIGTCLGPQIKCCR\t1\n-VRNHVTCRINRGFCVPIRCPGRTRQIGTCFGPRIKCCRSW\t1\n-VTCDLLSFEAKGFAANHSLCAAHCLAIGRRGGSCERGVCICRR\t1\n-VTCDLLSIKGVAEHSACAANCLSMGKAGGRCENGICLCRKTTFKELWDKRF\t1\n-VTCFCKRPVCDSGETQIGYCRLGNTFYRLCCRQ\t1\n-WLGSALKIGAKLLPSVVGLFKKKKQ\t1\n-WNPFKELERAGQRVRDAIISAGPAVATVGQAAAIARG\t1\n-WNPFKELERAGQRVRDAVISAAPAVATVGQAAAIARG\t1\n-YDLSKNCRLRGGICYIGKCPRRFFRSGSCSRGNVCCLRFG\t1\n-YSKSLPLSVLNP\t1\n-YVSCLFRGARCRVYSGRSCCFGYYCRRDFPGSIFGTCSRRNF\t1\n'
b
diff -r 5bb52d4bf172 -r fcd5d259427c PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py
--- a/PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py Wed Oct 28 01:54:31 2020 +0000
+++ b/PDAUG_TSVtoFASTA/PDAUG_TSVtoFASTA.py Tue Dec 29 04:05:22 2020 +0000
[
@@ -14,17 +14,23 @@
 
         n = 0
         m = 0
+        
+        l = []
+
+        for line in lines[1:]:
+            l.append(line.split('\t')[1].strip('\n').strip('\r'))
+        l = list(set(l))
 
         for line in lines:
 
-            if '1' in line.split('\t')[1].strip('\n'):
+            if l[0] in line.split('\t')[1].strip('\n').strip('\r'):
                 n= n+1
-                of1.write('>peptide_'+str(n)+'\n')
+                of1.write('>peptide_'+str(n)+'_'+str(l[0])+'\n')
                 of1.write(line.split('\t')[0]+'\n')
 
-            if '0' in line.split('\t')[1].strip('\n'):
+            if l[1] in line.split('\t')[1].strip('\n').strip('\r'):
                 m= m+1
-                of2.write('>peptide_'+str(m)+'\n')
+                of2.write('>peptide_'+str(m)+'_'+str(l[1])+'\n')
                 of2.write(line.split('\t')[0]+'\n')
 
     elif Method == 'NoClassLabel':
@@ -47,11 +53,10 @@
     parser = argparse.ArgumentParser()
 
     parser.add_argument("-I", "--InFile", required=True, default=None, help=".fasta or .tsv")
-    parser.add_argument("-P", "--Postvs", required=False, default='Positive.fasta', help="Path to target tsv file")
-    parser.add_argument("-N", "--Negtvs", required=False, default='Negative.fasta', help="Path to target tsv file")
+    parser.add_argument("-P", "--Postvs", required=False, default='FirstDataFile.fasta', help="Path to target tsv file")
+    parser.add_argument("-N", "--Negtvs", required=False, default='SecondDataFile.fasta', help="Path to target tsv file")
     parser.add_argument("-O", "--OutFile", required=False, default='OutFile.fasta', help="Path to target tsv file")
     parser.add_argument("-M", "--Method", required=True, default=None, help="Path to target tsv file")
     args = parser.parse_args()
 
-    TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile)
-
+    TSVtoFASTA(args.InFile, args.Method, args.Postvs, args.Negtvs, args.OutFile)
\ No newline at end of file
b
diff -r 5bb52d4bf172 -r fcd5d259427c readme.md
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.md Tue Dec 29 04:05:22 2020 +0000
[
@@ -0,0 +1,25 @@
+# PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling.
+
+### Overview 
+
+Peptide Design and Analysis Under Galaxy (PDAUG) package, a Galaxy based python powered collection of tools, workflows, and datasets for a rapid in-silico peptide library analysis. PDAUG offers tools for peptide library generation, data visualization, in-built and public database based peptide sequence retrieval, peptide feature calculation, and machine learning modeling. PDAUG tool suite can be downloaded and install through galaxy toolshed as a standard galaxy tool. 
+
+
+# Prebuild Docker Image 
+
+A prebuild build docker image based on the recent galaxy release can be obtained by the link below for a quick installation. 
+
+ - [Docker Image](https://github.com/jaidevjoshi83/docker_pdaug)
+
+# Contributors
+ - Jayadev Joshi

+ - Daniel Blankenberg
+
+# History
+
+ - 0.1.0: First release!
+
+# Support & Bug Reports
+
+You can file an [github issue](https://github.com/jaidevjoshi83/docker_pdaug/issues).