view PDAUG_Peptide_Data_Access/PDAUG_Peptide_Data_Access.py @ 0:7557b48b2872 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author jay
date Wed, 28 Oct 2020 02:10:12 +0000
parents
children 9b5e990a0ebb
line wrap: on
line source

import modlamp
from modlamp.datasets import load_AMPvsTM
from modlamp.datasets import load_AMPvsUniProt
from modlamp.datasets import load_ACPvsTM
from modlamp.datasets import load_ACPvsRandom
from modlamp.database import query_apd
from modlamp.database import query_camp
import os
import pandas as pd

def DataGen(DataBaseType, OutFile, IDs):

    if DataBaseType == 'AMPvsTM':
        data = load_AMPvsTM()

    elif DataBaseType == 'AMPvsUniProt':
        data = load_AMPvsUniProt()

    elif DataBaseType == 'ACPvsTM':
        data = load_ACPvsTM()

    elif DataBaseType == 'ACPvsRandom':
        data = load_ACPvsRandom()

    elif DataBaseType == 'query_apd':

        data = query_apd([int(i) for i in IDs.split(',')])
        df = pd.DataFrame(data, columns=['Peptides'])
        df.to_csv(OutFile, index=False, sep='\t')
        exit()

    elif DataBaseType == 'query_camp':
        data = query_camp([int(i) for i in IDs.split(',')])
        df = pd.DataFrame(data, columns=['Peptides'])
        df.to_csv(OutFile, index=False, sep='\t')
        exit()

    else:
        print ("Enter Correct Values")
        exit()

    Target = data.target.tolist()
    Target_list = set(Target)
    df = data.sequences


    Target = pd.DataFrame(Target, columns=['Target'])
    df = pd.DataFrame(df, columns=['Peptide'])
    
    df = pd.DataFrame(df)
    df = pd.concat([df, Target], axis=1)

    df.to_csv(OutFile, index=False, sep='\t')


if __name__=="__main__":

    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument("-d", "--DataBaseType",
                        required=True,
                        default=None,
                        help="Name of the dataset ")
                        
    parser.add_argument("-o", "--OutFile",
                        required=False,
                        default='Out.tsv',
                        help="Out put file name for str descriptors")   

    parser.add_argument("-L", "--List",
    					required=False,
    					default=None,
    					help="List of integer as ID")

    args = parser.parse_args()
    DataGen(args.DataBaseType, args.OutFile, args.List)