Mercurial > repos > drosofff > msp_blastparser_and_hits
annotate BlastParser_and_hits.py @ 0:69ea2a13947f draft
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
author | drosofff |
---|---|
date | Sun, 21 Jun 2015 14:31:29 -0400 |
parents | |
children | 1964514aabde |
rev | line source |
---|---|
0
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
1 #!/usr/bin/python |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
2 # blastn blastx parser revised debugged: 3-4-2015. Commit issue. |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
3 # drosofff@gmail.com |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
4 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
5 import sys |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
6 import argparse |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
7 from collections import defaultdict |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
8 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
9 def Parser(): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
10 the_parser = argparse.ArgumentParser() |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
11 the_parser.add_argument('--blast', action="store", type=str, help="Path to the blast output (tabular format, 12 column)") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
12 the_parser.add_argument('--sequences', action="store", type=str, help="Path to the fasta file with blasted sequences") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
13 the_parser.add_argument('--fastaOutput', action="store", type=str, help="fasta output file of blast hits") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
14 the_parser.add_argument('--tabularOutput', action="store", type=str, help="tabular output file of blast analysis") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
15 the_parser.add_argument('--flanking', action="store", type=int, help="number of flanking nucleotides added to the hit sequences") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
16 the_parser.add_argument('--mode', action="store", choices=["verbose", "short"], type=str, help="reporting (verbose) or not reporting (short) oases contigs") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
17 args = the_parser.parse_args() |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
18 if not all ( (args.sequences, args.blast, args.fastaOutput, args.tabularOutput) ): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
19 the_parser.error('argument(s) missing, call the -h option of the script') |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
20 if not args.flanking: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
21 args.flanking = 0 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
22 return args |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
23 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
24 def median(lst): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
25 lst = sorted(lst) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
26 if len(lst) < 1: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
27 return None |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
28 if len(lst) %2 == 1: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
29 return lst[((len(lst)+1)/2)-1] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
30 if len(lst) %2 == 0: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
31 return float(sum(lst[(len(lst)/2)-1:(len(lst)/2)+1]))/2.0 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
32 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
33 def mean(lst): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
34 if len(lst) < 1: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
35 return 0 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
36 return sum(lst) / float(len(lst)) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
37 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
38 def getfasta (fastafile): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
39 fastadic = {} |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
40 for line in open (fastafile): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
41 if line[0] == ">": |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
42 header = line[1:-1] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
43 fastadic[header] = "" |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
44 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
45 fastadic[header] += line |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
46 for header in fastadic: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
47 fastadic[header] = "".join(fastadic[header].split("\n")) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
48 return fastadic |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
49 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
50 def insert_newlines(string, every=60): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
51 lines = [] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
52 for i in xrange(0, len(string), every): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
53 lines.append(string[i:i+every]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
54 return '\n'.join(lines) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
55 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
56 def getblast (blastfile): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
57 '''blastinfo [0] Percentage of identical matches |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
58 blastinfo [1] Alignment length |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
59 blastinfo [2] Number of mismatches |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
60 blastinfo [3] Number of gap openings |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
61 blastinfo [4] Start of alignment in query |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
62 blastinfo [5] End of alignment in query |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
63 blastinfo [6] Start of alignment in subject (database hit) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
64 blastinfo [7] End of alignment in subject (database hit) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
65 blastinfo [8] Expectation value (E-value) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
66 blastinfo [9] Bit score |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
67 blastinfo [10] Subject length (NEED TO BE SPECIFIED WHEN RUNNING BLAST) ''' |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
68 blastdic = defaultdict (dict) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
69 for line in open (blastfile): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
70 fields = line[:-1].split("\t") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
71 transcript = fields[0] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
72 subject = fields[1] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
73 blastinfo = [float(fields[2]) ] # blastinfo[0] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
74 blastinfo = blastinfo + [int(i) for i in fields[3:10] ] # blastinfo[1:8] insets 1 to 7 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
75 blastinfo.append(fields[10]) # blastinfo[8] E-value remains as a string type |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
76 blastinfo.append(float(fields[11])) # blastinfo[9] Bit score |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
77 blastinfo.append(int(fields[12])) # blastinfo[10] Subject length MUST BE RETRIEVED THROUGH A 13 COLUMN BLAST OUTPUT |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
78 try: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
79 blastdic[subject][transcript].append(blastinfo) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
80 except: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
81 blastdic[subject][transcript] = [ blastinfo ] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
82 return blastdic |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
83 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
84 def getseq (fastadict, transcript, up, down, orientation="direct"): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
85 def reverse (seq): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
86 revdict = {"A":"T","T":"A","G":"C","C":"G","N":"N"} |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
87 revseq = [revdict[i] for i in seq[::-1]] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
88 return "".join(revseq) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
89 pickseq = fastadict[transcript][up-1:down] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
90 if orientation == "direct": |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
91 return pickseq |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
92 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
93 return reverse(pickseq) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
94 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
95 def subjectCoverage (fastadict, blastdict, subject, QueriesFlankingNucleotides=0): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
96 SubjectCoverageList = [] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
97 HitDic = {} |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
98 bitScores = [] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
99 for transcript in blastdict[subject]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
100 prefix = "%s--%s_" % (subject, transcript) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
101 hitNumber = 0 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
102 for hit in blastdict[subject][transcript]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
103 hitNumber += 1 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
104 suffix = "hit%s_IdMatch=%s,AligLength=%s,E-val=%s" % (hitNumber, hit[0], hit[1], hit[8]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
105 HitDic[prefix+suffix] = GetHitSequence (fastadict, transcript, hit[4], hit[5], QueriesFlankingNucleotides) #query coverage by a hit is in hit[4:6] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
106 SubjectCoverageList += range (min([hit[6], hit[7]]), max([hit[6], hit[7]]) + 1) # subject coverage by a hit is in hit[6:8] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
107 bitScores.append(hit[9]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
108 subjectLength = hit [10] # always the same value for a given subject. Stupid but simple |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
109 TotalSubjectCoverage = len ( set (SubjectCoverageList) ) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
110 RelativeSubjectCoverage = TotalSubjectCoverage/float(subjectLength) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
111 return HitDic, subjectLength, TotalSubjectCoverage, RelativeSubjectCoverage, max(bitScores), mean(bitScores) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
112 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
113 def GetHitSequence (fastadict, FastaHeader, leftCoordinate, rightCoordinate, FlankingValue): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
114 if rightCoordinate > leftCoordinate: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
115 polarity = "direct" |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
116 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
117 polarity = "reverse" |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
118 leftCoordinate, rightCoordinate = rightCoordinate, leftCoordinate |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
119 if leftCoordinate - FlankingValue > 0: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
120 leftCoordinate -= FlankingValue |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
121 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
122 leftCoordinate = 1 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
123 return getseq (fastadict, FastaHeader, leftCoordinate, rightCoordinate, polarity) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
124 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
125 def outputParsing (F, Fasta, results, Xblastdict, fastadict, mode="verbose"): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
126 F= open(F, "w") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
127 Fasta=open(Fasta, "w") |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
128 if mode == "verbose": |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
129 print >>F, "# SeqId\t%Identity\tAlignLength\tStartSubject\tEndSubject\t%QueryHitCov\tE-value\tBitScore\n" |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
130 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
131 print >> F, "#\n# %s" % subject |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
132 print >> F, "# Suject Length: %s" % (results[subject]["subjectLength"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
133 print >> F, "# Total Subject Coverage: %s" % (results[subject]["TotalCoverage"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
134 print >> F, "# Relative Subject Coverage: %s" % (results[subject]["RelativeSubjectCoverage"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
135 print >> F, "# Maximum Bit Score: %s" % (results[subject]["maxBitScores"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
136 print >> F, "# Mean Bit Score: %s" % (results[subject]["meanBitScores"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
137 for header in results[subject]["HitDic"]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
138 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
139 print >> Fasta, "" # final carriage return for the sequence |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
140 for transcript in Xblastdict[subject]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
141 transcriptSize = float(len(fastadict[transcript])) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
142 for hit in Xblastdict[subject][transcript]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
143 percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov = hit[0], hit[1], hit[6], hit[7], "%.1f" % (abs(hit[5]-hit[4])/transcriptSize*100) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
144 Eval, BitScore = hit[8], hit[9] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
145 info = [transcript] + [percentIdentity, alignLenght, subjectStart, subjectEnd, queryCov, Eval, BitScore] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
146 info = [str(i) for i in info] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
147 info = "\t".join(info) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
148 print >> F, info |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
149 else: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
150 print >>F, "# subject\tsubject length\tTotal Subject Coverage\tRelative Subject Coverage\tMaximum Bit Score\tMean Bit Score" |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
151 for subject in sorted (results, key=lambda x: results[x]["meanBitScores"], reverse=True): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
152 line = [] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
153 line.append(subject) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
154 line.append(results[subject]["subjectLength"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
155 line.append(results[subject]["TotalCoverage"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
156 line.append(results[subject]["RelativeSubjectCoverage"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
157 line.append(results[subject]["maxBitScores"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
158 line.append(results[subject]["meanBitScores"]) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
159 line = [str(i) for i in line] |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
160 print >> F, "\t".join(line) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
161 for header in results[subject]["HitDic"]: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
162 print >> Fasta, ">%s\n%s" % (header, insert_newlines(results[subject]["HitDic"][header]) ) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
163 print >> Fasta, "" # final carriage return for the sequence |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
164 F.close() |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
165 Fasta.close() |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
166 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
167 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
168 |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
169 def __main__ (): |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
170 args = Parser() |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
171 fastadict = getfasta (args.sequences) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
172 Xblastdict = getblast (args.blast) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
173 results = defaultdict(dict) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
174 for subject in Xblastdict: |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
175 results[subject]["HitDic"], results[subject]["subjectLength"], results[subject]["TotalCoverage"], results[subject]["RelativeSubjectCoverage"], results[subject]["maxBitScores"], results[subject]["meanBitScores"] = subjectCoverage(fastadict, Xblastdict, subject, args.flanking) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
176 outputParsing (args.tabularOutput, args.fastaOutput, results, Xblastdict, fastadict, args.mode) |
69ea2a13947f
planemo upload for repository https://bitbucket.org/drosofff/gedtools/
drosofff
parents:
diff
changeset
|
177 if __name__=="__main__": __main__() |