Mercurial > repos > yating-l > rename_tracks
annotate rename_tracks.py @ 0:8b19a698d90e draft
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
author | yating-l |
---|---|
date | Mon, 25 Jun 2018 15:01:41 -0400 |
parents | |
children | 72ccb2f848a0 |
rev | line source |
---|---|
0
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
1 # -*- coding: utf8 -*- |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
2 |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
3 """ |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
4 Rename the custom evidence tracks so that the tracks use the same sequence names as the renamed reference |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
5 """ |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
6 import sys |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
7 import csv |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
8 import subprocess |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
9 import tempfile |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
10 |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
11 def rename_interval(inputFile, nameDict, renamedFile): |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
12 writer = open(renamedFile, 'w') |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
13 with open(inputFile, 'r') as f: |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
14 lines = f.readlines() |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
15 for l in lines: |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
16 if not l.startswith("#"): |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
17 scaffold_name = l.split()[0] |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
18 if scaffold_name in nameDict: |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
19 l = l.replace(scaffold_name, nameDict[scaffold_name]) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
20 writer.write(l) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
21 writer.close() |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
22 |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
23 def rename_bam(inputFile, nameDict, renamedFile): |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
24 header = subprocess.Popen(['samtools', 'view', '-H', inputFile], stdout=subprocess.PIPE) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
25 array_call = ['sed'] |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
26 for k,v in nameDict.items(): |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
27 substitute = "s/%s/%s/" % (str(k), str(v)) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
28 array_call.append('-e') |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
29 array_call.append(substitute) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
30 reheader = subprocess.Popen(array_call, stdin=header.stdout, stdout=subprocess.PIPE) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
31 out = open(renamedFile, 'w') |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
32 subprocess.Popen(['samtools', 'reheader', '-', inputFile], stdin=reheader.stdout, stdout=out) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
33 |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
34 def rename_bigwig(inputFile, renamedReference, nameDict, renamedFile): |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
35 bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
36 chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
37 sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
38 renamed_sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
39 |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
40 subprocess.call(['bigWigToBedGraph', inputFile, bedGraphFile.name]) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
41 subprocess.call(['faSize', '-detailed', '-tab', renamedReference], stdout=chrom_sizes) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
42 subprocess.call(['sort', '-k1,1', '-k2,2n', bedGraphFile.name], stdout=sorted_bedGraphFile) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
43 rename_interval(sorted_bedGraphFile.name, nameDict, renamed_sorted_bedGraphFile.name) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
44 subprocess.call(['bedGraphToBigWig', renamed_sorted_bedGraphFile.name, chrom_sizes.name, renamedFile]) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
45 |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
46 def getNameDict(nameMapping): |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
47 nameDict = {} |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
48 with open(nameMapping, 'r') as f: |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
49 reader = csv.reader(f) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
50 for row in reader: |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
51 nameDict[row[0]] = row[1] |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
52 return nameDict |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
53 |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
54 def main(): |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
55 inputFile = sys.argv[1] |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
56 nameMapping = sys.argv[2] |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
57 inputFormat = sys.argv[3] |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
58 outputfile = sys.argv[4] |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
59 nameDict = getNameDict(nameMapping) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
60 if inputFormat == "bed" or inputFormat == "gff3" or inputFormat == "gtf": |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
61 rename_interval(inputFile, nameDict, outputfile) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
62 elif inputFormat == "bam": |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
63 rename_bam(inputFile, nameDict, outputfile) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
64 elif inputFormat == "bigwig": |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
65 renamedReference = sys.argv[5] |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
66 rename_bigwig(inputFile, renamedReference, nameDict, outputfile) |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
67 |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
68 if __name__ == "__main__": |
8b19a698d90e
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff
changeset
|
69 main() |