Mercurial > repos > siyuan > prada
changeset 1:03815b87eb65 draft
Uploaded
author | siyuan |
---|---|
date | Fri, 21 Feb 2014 18:08:37 -0500 |
parents | acc2ca1a3ba4 |
children | 74f6c4fcab2f |
files | pyPRADA_1.2/bioclass.pyc pyPRADA_1.2/conf.txt pyPRADA_1.2/ioprada.pyc pyPRADA_1.2/parse_gft.py pyPRADA_1.2/pyPRADA-1.2-manual.docx pyPRADA_1.2/testdata/U87_chr17p13.2.bam.bai |
diffstat | 6 files changed, 1 insertions(+), 111 deletions(-) [+] |
line wrap: on
line diff
--- a/pyPRADA_1.2/conf.txt Thu Feb 20 00:44:58 2014 -0500 +++ b/pyPRADA_1.2/conf.txt Fri Feb 21 18:08:37 2014 -0500 @@ -21,7 +21,7 @@ --PBS-- -M = szheng2@mdanderson.org -q = long --l = nodes=1:ppn=i2,walltime=120:00:00 #nodes and ppn are required +-l = nodes=1:ppn=12,walltime=120:00:00 #nodes and ppn are required --BWA aln-- -t = 2 #This should be equal to the number of ppn
--- a/pyPRADA_1.2/parse_gft.py Thu Feb 20 00:44:58 2014 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,110 +0,0 @@ -import bioclass - -featurefile='/RIS/home/verhaakgroup/PRADA/hg19broad/Ensembl64.canonical.gene.exons.tab.txt' - -infile=open('/RIS/home/verhaakgroup/PRADA/hg19broad/Homo_sapiens.GRCh37.64.gtf') -txdb={} #keep track of all transcripts -exdb={} #keep track of all exons -genedb={} #keep track of all genes -verbose=True -i=0 -for line in infile: - i+=1 - if verbose: - if i%100000==0: - print '%d features scanned'%i - info=line.strip().split('\t') - chr,cat,entity,start,end=info[0:5] - strand=info[6] - attributes=info[8] - if cat != 'protein_coding': - continue - attinfo=attributes.split(';') - atts=dict() - for item in attinfo: - item=item.strip() - if not item: - continue - tmp=item.split() - kk=tmp[0] - vv=tmp[1][1:-1] - atts[kk]=vv - tx=atts['transcript_id'] - gene=atts['gene_name'] - if entity=='exon': - exon=bioclass.Exon(chr,int(start),int(end),strand,tx,gene) - exdb[exon.name]=exon - if not txdb.has_key(tx): - txdb[tx]=bioclass.Transcript(tx,gene) - txdb[tx].add_exon(exon) - for txname in txdb: - if not genedb.has_key(t.gene): - genedb[t.gene]=bioclass.Gene(t.gene) - genedb[t.gene].add_transcript(t) - -infile.close() - - - -def read_feature(featurefile,verbose=True): - '''Read exon/transcript/gene info and return bioclass obj''' - infile=open(featurefile) - txdb={} #keep track of all transcripts - exdb={} #keep track of all exons - genedb={} #keep track of all genes - i=0 - for line in infile: - i+=1 - if verbose: - if i%100000==0: - print '%d exon records loaded'%i - chr,start,end,tx,gene,strand,cat=line.split() - if cat != 'protein_coding': - continue - exon=bioclass.Exon(chr,int(start),int(end),strand,tx,gene) - exdb[exon.name]=exon - if not txdb.has_key(tx): - txdb[tx]=bioclass.Transcript(tx,gene) - txdb[tx].add_exon(exon) - for txname in txdb: - t=txdb[txname] - if not genedb.has_key(t.gene): - genedb[t.gene]=bioclass.Gene(t.gene) - genedb[t.gene].add_transcript(t) - infile.close() - return (txdb,genedb) - -def read_feature_genes(featurefile,*args): - '''Read exon/transcript/gene info for spec genes and return bioclass obj''' - infile=open(featurefile) - txdb={} - exdb={} - genedb={} - for line in infile: - chr,start,end,tx,gene,strand,cat=line.split() - if gene not in args: - continue - if cat != 'protein_coding': - continue - exon=bioclass.Exon(chr,int(start),int(end),strand,tx,gene) - exdb[exon.name]=exon - if not txdb.has_key(tx): - txdb[tx]=bioclass.Transcript(tx,gene) - txdb[tx].add_exon(exon) - for txname in txdb: - t=txdb[txname] - if not genedb.has_key(t.gene): - genedb[t.gene]=bioclass.Gene(t.gene) - genedb[t.gene].add_transcript(t) - res=[] - for g in args: - if genedb.has_key(g): - gobj=genedb[g] - else: - gobj=None - res.append(gobj) - infile.close() - return res - -if __name__=='__main__': - pass