Mercurial > repos > jay > pdaug_word_vector_descriptor
view PDAUG_Sequence_Property_Based_Descriptors/PDAUG_Sequence_Property_Based_Descriptors.py @ 0:a3a1d9bea1ad draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 02:29:04 +0000 |
parents | |
children | 8de738fa6552 |
line wrap: on
line source
import pandas as pd from pydpi.pypro import PyPro import os def BinaryDescriptor(seq): BinaryCode = { 'A':"1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 'C':"0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 'D':"0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 'E':"0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 'F':"0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 'G':"0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0", 'H':"0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0", 'I':"0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0", 'K':"0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0", 'L':"0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0", 'M':"0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0", 'N':"0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0", 'P':"0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0", 'Q':"0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0", 'R':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0", 'S':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0", 'T':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0", 'V':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0", 'W':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0", 'Y':"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1", } lines = [] Df = [] for s in seq: des = [] for n in s: des.append(BinaryCode[n.upper()]) lines.append(','.join(des).split(',')) df = pd.DataFrame(lines) return df def Decriptor_generator(InFile, Lamda, Weight, DesType, Out_file): list_pep_name = [] f = open(InFile) lines = f.readlines() for line in lines: if ">" in line: pass else: list_pep_name.append(line.strip('\n')) out_df = pd.DataFrame() for seq in list_pep_name: protein = PyPro() protein.ReadProteinSequence(seq) if DesType == 'PAAC': DS = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight)) df = pd.DataFrame(DS, index=[0]) elif DesType == 'APAAC': DS = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight)) df = pd.DataFrame(DS, index=[0]) elif DesType == 'CTD': DS = protein.GetCTD() df = pd.DataFrame(DS, index=[0]) elif DesType == 'DPComp': DS = protein.GetDPComp() df = pd.DataFrame(DS, index=[0]) elif DesType == 'GearyAuto': DS = protein.GetGearyAuto() df = pd.DataFrame(DS, index=[0]) elif DesType == 'MoranAuto': DS = protein.GetMoranAuto() df = pd.DataFrame(DS, index=[0]) elif DesType == 'MoreauBrotoAuto': DS = protein.GetMoreauBrotoAuto() df = pd.DataFrame(DS, index=[0]) elif DesType == 'QSO': DS = protein.GetQSO() df = pd.DataFrame(DS, index=[0]) elif DesType == 'SOCN': DS = protein.GetSOCN() df = pd.DataFrame(DS, index=[0]) elif DesType == 'TPComp': DS = protein.GetTPComp() df = pd.DataFrame(DS, index=[0]) elif DesType == 'All': DS_1 = protein.GetPAAC(lamda=int(Lamda), weight=float(Weight)) DS_2 = protein.GetAPAAC(lamda=int(Lamda), weight=float(Weight)) DS_3 = protein.GetCTD() DS_4 = protein.GetDPComp() DS_5 = protein.GetGearyAuto() DS_6 = protein.GetMoranAuto() DS_7 = protein.GetMoreauBrotoAuto() DS_8 = protein.GetQSO() DS_9 = protein.GetSOCN() DS_10 = protein.GetTPComp() DS = {} for D in (DS_1, DS_2, DS_3, DS_4, DS_5, DS_6, DS_7, DS_8, DS_9, DS_10): DS.update(D) df = pd.DataFrame(DS, index=[0]) if DesType == 'BinaryDescriptor': out_df = BinaryDescriptor(list_pep_name) else: out_df = pd.concat([out_df, df], axis=0) out_df.to_csv(Out_file, index=False, sep='\t') if __name__=="__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("-I", "--InFile", required=True, default=None, help="pep file") parser.add_argument("-l", "--Lamda", required=False, default=50, help="pep file") parser.add_argument("-w", "--Weight", required=False, default=0.5, help="pep file") parser.add_argument("-t", "--DesType", required=True, default=None, help="out put file name for str Descriptors") parser.add_argument("-O", "--Out_file", required=False, default="Out.tsv", help="Path to target tsv file") args = parser.parse_args() Decriptor_generator(args.InFile, args.Lamda, args.Weight, args.DesType, args.Out_file)