annotate rename_tracks.py @ 1:72ccb2f848a0 draft default tip

planemo upload commit 869adf4bb92923a8a5cb0d62ae40cdab9bf6e1d7
author yating-l
date Thu, 26 Jul 2018 15:16:02 -0400
parents 8b19a698d90e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
1 # -*- coding: utf8 -*-
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
2
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
3 """
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
4 Rename the custom evidence tracks so that the tracks use the same sequence names as the renamed reference
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
5 """
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
6 import sys
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
7 import csv
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
8 import subprocess
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
9 import tempfile
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
10
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
11 def rename_interval(inputFile, nameDict, renamedFile):
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
12 writer = open(renamedFile, 'w')
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
13 with open(inputFile, 'r') as f:
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
14 lines = f.readlines()
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
15 for l in lines:
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
16 if not l.startswith("#"):
1
72ccb2f848a0 planemo upload commit 869adf4bb92923a8a5cb0d62ae40cdab9bf6e1d7
yating-l
parents: 0
diff changeset
17 scaffold_name = l.split('\t')[0]
0
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
18 if scaffold_name in nameDict:
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
19 l = l.replace(scaffold_name, nameDict[scaffold_name])
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
20 writer.write(l)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
21 writer.close()
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
22
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
23 def rename_bam(inputFile, nameDict, renamedFile):
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
24 header = subprocess.Popen(['samtools', 'view', '-H', inputFile], stdout=subprocess.PIPE)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
25 array_call = ['sed']
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
26 for k,v in nameDict.items():
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
27 substitute = "s/%s/%s/" % (str(k), str(v))
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
28 array_call.append('-e')
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
29 array_call.append(substitute)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
30 reheader = subprocess.Popen(array_call, stdin=header.stdout, stdout=subprocess.PIPE)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
31 out = open(renamedFile, 'w')
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
32 subprocess.Popen(['samtools', 'reheader', '-', inputFile], stdin=reheader.stdout, stdout=out)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
33
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
34 def rename_bigwig(inputFile, renamedReference, nameDict, renamedFile):
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
35 bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
36 chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
37 sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
38 renamed_sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
39
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
40 subprocess.call(['bigWigToBedGraph', inputFile, bedGraphFile.name])
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
41 subprocess.call(['faSize', '-detailed', '-tab', renamedReference], stdout=chrom_sizes)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
42 subprocess.call(['sort', '-k1,1', '-k2,2n', bedGraphFile.name], stdout=sorted_bedGraphFile)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
43 rename_interval(sorted_bedGraphFile.name, nameDict, renamed_sorted_bedGraphFile.name)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
44 subprocess.call(['bedGraphToBigWig', renamed_sorted_bedGraphFile.name, chrom_sizes.name, renamedFile])
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
45
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
46 def getNameDict(nameMapping):
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
47 nameDict = {}
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
48 with open(nameMapping, 'r') as f:
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
49 reader = csv.reader(f)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
50 for row in reader:
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
51 nameDict[row[0]] = row[1]
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
52 return nameDict
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
53
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
54 def main():
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
55 inputFile = sys.argv[1]
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
56 nameMapping = sys.argv[2]
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
57 inputFormat = sys.argv[3]
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
58 outputfile = sys.argv[4]
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
59 nameDict = getNameDict(nameMapping)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
60 if inputFormat == "bed" or inputFormat == "gff3" or inputFormat == "gtf":
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
61 rename_interval(inputFile, nameDict, outputfile)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
62 elif inputFormat == "bam":
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
63 rename_bam(inputFile, nameDict, outputfile)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
64 elif inputFormat == "bigwig":
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
65 renamedReference = sys.argv[5]
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
66 rename_bigwig(inputFile, renamedReference, nameDict, outputfile)
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
67
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
68 if __name__ == "__main__":
8b19a698d90e planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
yating-l
parents:
diff changeset
69 main()