changeset 1:03815b87eb65 draft

Uploaded
author siyuan
date Fri, 21 Feb 2014 18:08:37 -0500
parents acc2ca1a3ba4
children 74f6c4fcab2f
files pyPRADA_1.2/bioclass.pyc pyPRADA_1.2/conf.txt pyPRADA_1.2/ioprada.pyc pyPRADA_1.2/parse_gft.py pyPRADA_1.2/pyPRADA-1.2-manual.docx pyPRADA_1.2/testdata/U87_chr17p13.2.bam.bai
diffstat 6 files changed, 1 insertions(+), 111 deletions(-) [+]
line wrap: on
line diff
Binary file pyPRADA_1.2/bioclass.pyc has changed
--- a/pyPRADA_1.2/conf.txt	Thu Feb 20 00:44:58 2014 -0500
+++ b/pyPRADA_1.2/conf.txt	Fri Feb 21 18:08:37 2014 -0500
@@ -21,7 +21,7 @@
 --PBS--
 -M = szheng2@mdanderson.org
 -q = long
--l = nodes=1:ppn=i2,walltime=120:00:00		#nodes and ppn are required
+-l = nodes=1:ppn=12,walltime=120:00:00		#nodes and ppn are required
 
 --BWA aln--
 -t = 2										#This should be equal to the number of ppn
Binary file pyPRADA_1.2/ioprada.pyc has changed
--- a/pyPRADA_1.2/parse_gft.py	Thu Feb 20 00:44:58 2014 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-import bioclass
-
-featurefile='/RIS/home/verhaakgroup/PRADA/hg19broad/Ensembl64.canonical.gene.exons.tab.txt'
-
-infile=open('/RIS/home/verhaakgroup/PRADA/hg19broad/Homo_sapiens.GRCh37.64.gtf')
-txdb={}    #keep track of all transcripts
-exdb={}    #keep track of all exons
-genedb={}  #keep track of all genes
-verbose=True
-i=0
-for line in infile:
-    i+=1
-    if verbose:
-        if i%100000==0:
-            print '%d features scanned'%i
-    info=line.strip().split('\t')
-    chr,cat,entity,start,end=info[0:5]
-    strand=info[6]
-    attributes=info[8]
-    if cat != 'protein_coding':
-        continue
-    attinfo=attributes.split(';')
-    atts=dict()
-    for item in attinfo:
-        item=item.strip()
-        if not item:
-            continue
-        tmp=item.split()
-        kk=tmp[0]
-        vv=tmp[1][1:-1]
-        atts[kk]=vv
-    tx=atts['transcript_id']
-    gene=atts['gene_name']
-    if entity=='exon':
-        exon=bioclass.Exon(chr,int(start),int(end),strand,tx,gene)
-        exdb[exon.name]=exon
-        if not txdb.has_key(tx):
-            txdb[tx]=bioclass.Transcript(tx,gene)
-        txdb[tx].add_exon(exon)
-    for txname in txdb:
-        if not genedb.has_key(t.gene):
-            genedb[t.gene]=bioclass.Gene(t.gene)
-        genedb[t.gene].add_transcript(t)
-
-infile.close()
-
-
-
-def read_feature(featurefile,verbose=True):
-    '''Read exon/transcript/gene info and return bioclass obj'''
-    infile=open(featurefile)
-    txdb={}    #keep track of all transcripts
-    exdb={}    #keep track of all exons
-    genedb={}  #keep track of all genes
-    i=0
-    for line in infile:
-        i+=1
-        if verbose:
-            if i%100000==0:
-                print '%d exon records loaded'%i
-        chr,start,end,tx,gene,strand,cat=line.split()
-        if cat != 'protein_coding':
-            continue
-        exon=bioclass.Exon(chr,int(start),int(end),strand,tx,gene)
-        exdb[exon.name]=exon
-        if not txdb.has_key(tx):
-            txdb[tx]=bioclass.Transcript(tx,gene)
-        txdb[tx].add_exon(exon)
-    for txname in txdb:
-        t=txdb[txname]
-        if not genedb.has_key(t.gene):
-            genedb[t.gene]=bioclass.Gene(t.gene)
-        genedb[t.gene].add_transcript(t)
-    infile.close()
-    return (txdb,genedb)
-
-def read_feature_genes(featurefile,*args):
-    '''Read exon/transcript/gene info for spec genes and return bioclass obj'''
-    infile=open(featurefile)
-    txdb={}
-    exdb={}
-    genedb={}
-    for line in infile:
-        chr,start,end,tx,gene,strand,cat=line.split()
-        if gene not in args:
-            continue
-        if cat != 'protein_coding':
-            continue
-        exon=bioclass.Exon(chr,int(start),int(end),strand,tx,gene)
-        exdb[exon.name]=exon
-        if not txdb.has_key(tx):
-            txdb[tx]=bioclass.Transcript(tx,gene)
-        txdb[tx].add_exon(exon)
-    for txname in txdb:
-        t=txdb[txname]
-        if not genedb.has_key(t.gene):
-            genedb[t.gene]=bioclass.Gene(t.gene)
-        genedb[t.gene].add_transcript(t)
-    res=[]
-    for g in args:
-        if genedb.has_key(g):
-            gobj=genedb[g]
-        else:
-            gobj=None
-        res.append(gobj)
-    infile.close()
-    return res
-
-if __name__=='__main__':
-    pass
Binary file pyPRADA_1.2/pyPRADA-1.2-manual.docx has changed
Binary file pyPRADA_1.2/testdata/U87_chr17p13.2.bam.bai has changed