Mercurial > repos > yating-l > rename_tracks
diff rename_tracks.py @ 0:8b19a698d90e draft
planemo upload commit b615ca3bfed9f2115f62d1af30d4726fe8ba08d8
author | yating-l |
---|---|
date | Mon, 25 Jun 2018 15:01:41 -0400 |
parents | |
children | 72ccb2f848a0 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rename_tracks.py Mon Jun 25 15:01:41 2018 -0400 @@ -0,0 +1,69 @@ +# -*- coding: utf8 -*- + +""" +Rename the custom evidence tracks so that the tracks use the same sequence names as the renamed reference +""" +import sys +import csv +import subprocess +import tempfile + +def rename_interval(inputFile, nameDict, renamedFile): + writer = open(renamedFile, 'w') + with open(inputFile, 'r') as f: + lines = f.readlines() + for l in lines: + if not l.startswith("#"): + scaffold_name = l.split()[0] + if scaffold_name in nameDict: + l = l.replace(scaffold_name, nameDict[scaffold_name]) + writer.write(l) + writer.close() + +def rename_bam(inputFile, nameDict, renamedFile): + header = subprocess.Popen(['samtools', 'view', '-H', inputFile], stdout=subprocess.PIPE) + array_call = ['sed'] + for k,v in nameDict.items(): + substitute = "s/%s/%s/" % (str(k), str(v)) + array_call.append('-e') + array_call.append(substitute) + reheader = subprocess.Popen(array_call, stdin=header.stdout, stdout=subprocess.PIPE) + out = open(renamedFile, 'w') + subprocess.Popen(['samtools', 'reheader', '-', inputFile], stdin=reheader.stdout, stdout=out) + +def rename_bigwig(inputFile, renamedReference, nameDict, renamedFile): + bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) + chrom_sizes = tempfile.NamedTemporaryFile(bufsize=0) + sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) + renamed_sorted_bedGraphFile = tempfile.NamedTemporaryFile(bufsize=0) + + subprocess.call(['bigWigToBedGraph', inputFile, bedGraphFile.name]) + subprocess.call(['faSize', '-detailed', '-tab', renamedReference], stdout=chrom_sizes) + subprocess.call(['sort', '-k1,1', '-k2,2n', bedGraphFile.name], stdout=sorted_bedGraphFile) + rename_interval(sorted_bedGraphFile.name, nameDict, renamed_sorted_bedGraphFile.name) + subprocess.call(['bedGraphToBigWig', renamed_sorted_bedGraphFile.name, chrom_sizes.name, renamedFile]) + +def getNameDict(nameMapping): + nameDict = {} + with open(nameMapping, 'r') as f: + reader = csv.reader(f) + for row in reader: + nameDict[row[0]] = row[1] + return nameDict + +def main(): + inputFile = sys.argv[1] + nameMapping = sys.argv[2] + inputFormat = sys.argv[3] + outputfile = sys.argv[4] + nameDict = getNameDict(nameMapping) + if inputFormat == "bed" or inputFormat == "gff3" or inputFormat == "gtf": + rename_interval(inputFile, nameDict, outputfile) + elif inputFormat == "bam": + rename_bam(inputFile, nameDict, outputfile) + elif inputFormat == "bigwig": + renamedReference = sys.argv[5] + rename_bigwig(inputFile, renamedReference, nameDict, outputfile) + +if __name__ == "__main__": + main() \ No newline at end of file