annotate pep_pointer.py @ 2:073a2965e3b2 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
author galaxyp
date Fri, 06 Apr 2018 18:13:10 -0400
parents 149ed6a9680f
children a6282baa8c6f
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
1
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
2 #
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
3 # Author: Praveen Kumar
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
4 # Updated: April 6th, 2018
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
5 #
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
6 #
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
7 #
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
8
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
9 import re
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
10
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
11
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
12 def main():
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
13 import sys
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
14 if len(sys.argv) == 4:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
15 inputFile = sys.argv
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
16 infh = open(inputFile[1], "r")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
17 # infh = open("Mus_musculus.GRCm38.90.chr.gtf", "r")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
18
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
19 gtf = {}
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
20 gtf_transcript = {}
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
21 gtf_gene = {}
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
22 for each in infh.readlines():
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
23 a = each.split("\t")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
24 if re.search("^[^#]", each):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
25 if re.search("gene_biotype \"protein_coding\"", a[8]) and int(a[4].strip()) != int(a[3].strip()):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
26 type = a[2].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
27 if type == "gene" or type == "exon" or type == "CDS" or type == "five_prime_utr" or type == "three_prime_utr":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
28 chr = "chr" + a[0].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
29 strand = a[6].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
30 if strand == "+":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
31 start = a[3].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
32 end = a[4].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
33 elif strand == "-":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
34 if int(a[4].strip()) > int(a[3].strip()):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
35 start = a[3].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
36 end = a[4].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
37 elif int(a[4].strip()) < int(a[3].strip()):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
38 start = a[4].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
39 end = a[3].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
40 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
41 print "Please check the start end coordinates in the GTF file"
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
42 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
43 print "Please check the strand information in the GTF file. It should be '+' or '-'."
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
44 if not gtf.has_key(strand):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
45 gtf[strand] = {}
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
46 if not gtf[strand].has_key(type):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
47 gtf[strand][type] = []
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
48 b = re.search("gene_id \"(.+?)\";", a[8].strip())
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
49 gene = b.group(1)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
50 if type == "gene":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
51 transcript = ""
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
52 else:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
53 b = re.search("transcript_id \"(.+?)\";", a[8].strip())
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
54 transcript = b.group(1)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
55 data = (chr, start, end, gene, transcript, strand, type)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
56 gtf[strand][type].append(data)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
57
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
58 if type == "exon":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
59 if gtf_transcript.has_key(chr+"#"+strand):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
60 if gtf_transcript[chr+"#"+strand].has_key(transcript+"#"+gene):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
61 gtf_transcript[chr+"#"+strand][transcript+"#"+gene][0].append(int(start))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
62 gtf_transcript[chr+"#"+strand][transcript+"#"+gene][1].append(int(end))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
63 else:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
64 gtf_transcript[chr+"#"+strand][transcript+"#"+gene] = [[],[]]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
65 gtf_transcript[chr+"#"+strand][transcript+"#"+gene][0].append(int(start))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
66 gtf_transcript[chr+"#"+strand][transcript+"#"+gene][1].append(int(end))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
67 else:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
68 gtf_transcript[chr+"#"+strand] = {}
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
69 gtf_transcript[chr+"#"+strand][transcript+"#"+gene] = [[],[]]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
70 gtf_transcript[chr+"#"+strand][transcript+"#"+gene][0].append(int(start))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
71 gtf_transcript[chr+"#"+strand][transcript+"#"+gene][1].append(int(end))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
72
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
73 if type == "gene":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
74 if gtf_gene.has_key(chr+"#"+strand):
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
75 gtf_gene[chr+"#"+strand][0].append(int(start))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
76 gtf_gene[chr+"#"+strand][1].append(int(end))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
77 gtf_gene[chr+"#"+strand][2].append(gene)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
78 else:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
79 gtf_gene[chr+"#"+strand] = [[0],[0],["no_gene"]]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
80 gtf_gene[chr+"#"+strand][0].append(int(start))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
81 gtf_gene[chr+"#"+strand][1].append(int(end))
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
82 gtf_gene[chr+"#"+strand][2].append(gene)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
83
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
84
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
85
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
86 # "Starting Reading Intron . . ."
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
87
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
88 gtf["+"]["intron"] = []
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
89 gtf["-"]["intron"] = []
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
90 for chr_strand in gtf_transcript.keys():
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
91 chr = chr_strand.split("#")[0]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
92 strand = chr_strand.split("#")[1]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
93
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
94 for transcript_gene in gtf_transcript[chr_strand].keys():
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
95 start_list = gtf_transcript[chr_strand][transcript_gene][0]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
96 end_list = gtf_transcript[chr_strand][transcript_gene][1]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
97 sorted_start_index = [i[0] for i in sorted(enumerate(start_list), key=lambda x:x[1])]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
98 sorted_end_index = [i[0] for i in sorted(enumerate(end_list), key=lambda x:x[1])]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
99 if sorted_start_index == sorted_end_index:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
100 sorted_start = sorted(start_list)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
101 sorted_end = [end_list[i] for i in sorted_start_index]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
102 for x in range(len(sorted_start))[1:]:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
103 intron_start = sorted_end[x-1]+1
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
104 intron_end = sorted_start[x]-1
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
105 transcript = transcript_gene.split("#")[0]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
106 gene = transcript_gene.split("#")[1]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
107 data = (chr, str(intron_start), str(intron_end), gene, transcript, strand, "intron")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
108 gtf[strand]["intron"].append(data)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
109
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
110
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
111 # "Starting Reading Intergenic . . ."
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
112
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
113 gtf["+"]["intergenic"] = []
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
114 gtf["-"]["intergenic"] = []
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
115 for chr_strand in gtf_gene.keys():
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
116 chr = chr_strand.split("#")[0]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
117 strand = chr_strand.split("#")[1]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
118 start_list = gtf_gene[chr_strand][0]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
119 end_list = gtf_gene[chr_strand][1]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
120 gene_list = gtf_gene[chr_strand][2]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
121 sorted_start_index = [i[0] for i in sorted(enumerate(start_list), key=lambda x:x[1])]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
122 sorted_end_index = [i[0] for i in sorted(enumerate(end_list), key=lambda x:x[1])]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
123
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
124 sorted_start = sorted(start_list)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
125 sorted_end = [end_list[i] for i in sorted_start_index]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
126 sorted_gene = [gene_list[i] for i in sorted_start_index]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
127 for x in range(len(sorted_start))[1:]:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
128 intergene_start = sorted_end[x-1]+1
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
129 intergene_end = sorted_start[x]-1
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
130 if intergene_start < intergene_end:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
131 intergene_1 = sorted_gene[x-1]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
132 intergene_2 = sorted_gene[x]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
133 gene = intergene_1 + "-#-" + intergene_2
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
134 data = (chr, str(intergene_start), str(intergene_end), gene, "", strand, "intergenic")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
135 gtf[strand]["intergenic"].append(data)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
136
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
137 import sqlite3
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
138 # conn = sqlite3.connect('gtf_database.db')
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
139 conn = sqlite3.connect(":memory:")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
140 c = conn.cursor()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
141 # c.execute("DROP TABLE IF EXISTS gtf_data;")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
142 # c.execute("CREATE TABLE IF NOT EXISTS gtf_data(chr text, start int, end int, gene text, transcript text, strand text, type text)")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
143 c.execute("CREATE TABLE gtf_data(chr text, start int, end int, gene text, transcript text, strand text, type text)")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
144
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
145 for strand in gtf.keys():
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
146 if strand == "+":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
147 st = "positive"
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
148 elif strand == "-":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
149 st = "negative"
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
150 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
151 print "Please check the strand information in the GTF file. It should be '+' or '-'."
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
152
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
153 for type in gtf[strand].keys():
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
154 data = gtf[strand][type]
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
155 c.executemany('INSERT INTO gtf_data VALUES (?,?,?,?,?,?,?)', data)
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
156
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
157 conn.commit()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
158
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
159 infh = open(inputFile[2], "r")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
160 # infh = open("Mouse_Data_All_peptides_withNewDBs.txt", "r")
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
161 data = infh.readlines()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
162 # output file
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
163 outfh = open(inputFile[3], 'w')
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
164 # outfh = open("classified_1_Mouse_Data_All_peptides_withNewDBs.txt", "w")
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
165
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
166 for each in data:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
167 a = each.strip().split("\t")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
168 chr = a[0].strip()
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
169 pep_start = str(int(a[1].strip())+1)
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
170 pep_end = a[2].strip()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
171 strand = a[5].strip()
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
172 each = "\t".join(a[:6])
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
173 if (len(a) == 12 and int(a[9]) == 1) or (len(a) == 6):
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
174 c.execute("select * from gtf_data where type = 'CDS' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
175 rows = c.fetchall()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
176 if len(rows) > 0:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
177 outfh.write(each.strip() + "\tCDS\n")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
178 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
179 c.execute("select * from gtf_data where type = 'five_prime_utr' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
180 rows = c.fetchall()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
181 if len(rows) > 0:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
182 outfh.write(each.strip() + "\tfive_prime_utr\n")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
183 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
184 c.execute("select * from gtf_data where type = 'three_prime_utr' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
185 rows = c.fetchall()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
186 if len(rows) > 0:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
187 outfh.write(each.strip() + "\tthree_prime_utr\n")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
188 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
189 c.execute("select * from gtf_data where type = 'exon' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
190 rows = c.fetchall()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
191 if len(rows) > 0:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
192 outfh.write(each.strip() + "\texon\n")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
193 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
194 c.execute("select * from gtf_data where type = 'intron' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
195 rows = c.fetchall()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
196 if len(rows) > 0:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
197 outfh.write(each.strip() + "\tintron\n")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
198 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
199 c.execute("select * from gtf_data where type = 'gene' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
200 rows = c.fetchall()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
201 if len(rows) > 0:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
202 outfh.write(each.strip() + "\tgene\n")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
203 else:
2
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
204 c.execute("select * from gtf_data where type = 'intergenic' and chr = '"+chr+"' and start <= "+pep_start+" and end >= "+pep_end+" and strand = '"+strand+"' ")
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
205 rows = c.fetchall()
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
206 if len(rows) > 0:
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
207 outfh.write(each.strip() + "\tintergene\n")
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
208 else:
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
209 outfh.write(each.strip() + "\tOVERLAPPING_ON_TWO_REGIONS: PLEASE_LOOK_MANUALLY (Will be updated in next version)\n")
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
210 elif (len(a) == 12 and int(a[9]) == 2):
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
211 outfh.write(each.strip() + "\tSpliceJunction\n")
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
212 else:
073a2965e3b2 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit e3996c3bda75b16d19997d1e2f67267dd0ea2dff
galaxyp
parents: 0
diff changeset
213 outfh.write(each.strip() + "\tPlease check\n")
0
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
214
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
215 conn.close()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
216 outfh.close()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
217 else:
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
218 print "USAGE: python pep_pointer.py <input GTF file> <input tblastn file> <name of output file>"
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
219 return None
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
220
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
221 if __name__ == "__main__":
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
222 main()
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
223
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
224
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
225
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
226
149ed6a9680f planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/pep_pointer commit ac27a958fcb897c3cb56db313ebd282805b01103
galaxyp
parents:
diff changeset
227