Mercurial > repos > yating-l > regtools_junctions_extract
annotate validator.py @ 1:06d9062f5430 draft default tip
planemo upload commit 93217f9e3a1bc1afc8ce971176e4ae0169a8f265
| author | sargentl |
|---|---|
| date | Wed, 28 Nov 2018 14:20:39 -0500 |
| parents | 01ed8e112f2a |
| children |
| rev | line source |
|---|---|
|
0
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
1 import sys |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
2 |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
3 """ |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
4 Call checkAndFixBed, check the integrity of bed file. If the strand is not "+" or "-" truncate that line and report to users |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
5 create a column and move the score column to that column. |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
6 """ |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
7 def checkAndFixBed(bedfile, revised_file): |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
8 # Store the lines that have been removed |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
9 removedLines = [] |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
10 # Remove the lines with invalid strand, create a score column to store the original scores and change scores in the original score column all to 1000 |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
11 with open(revised_file, 'w') as tmp: |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
12 with open(bedfile, 'r') as f: |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
13 lines = f.readlines() |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
14 i = 1 |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
15 for line in lines: |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
16 fields = line.split() |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
17 strand = fields[5] |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
18 score = fields[4] |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
19 fields[4] = '1000' |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
20 fields.append(score) |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
21 if (strand == '+' or strand == '-'): |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
22 tmp.write('\t'.join(map(str, fields))) |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
23 tmp.write("\n") |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
24 else: |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
25 removedLines.append("line" + str(i) + ": " + line) |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
26 i = i+1 |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
27 |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
28 return removedLines |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
29 |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
30 def main(): |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
31 inputfile = str(sys.argv[1]) |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
32 outputfile = str(sys.argv[2]) |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
33 removed = checkAndFixBed(inputfile, outputfile) |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
34 if (removed != []): |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
35 print "\nRemoved invalid lines: \n" |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
36 print "\n".join(removed) |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
37 |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
38 if __name__ == "__main__": |
|
01ed8e112f2a
planemo upload commit 6bf6d4ec8ff2ec6bd45b5f483ff2f83b6229d57d
yating-l
parents:
diff
changeset
|
39 main() |
