Mercurial > repos > tyty > structurefold
view get_reads/get_read.py @ 41:b083b21bbd43 draft
Uploaded
author | tyty |
---|---|
date | Mon, 20 Oct 2014 15:12:42 -0400 |
parents | 297cdb01d656 |
children |
line wrap: on
line source
#!/usr/bin/env python # -*- coding: utf-8 -*- import sys #from galaxy.tools.read_file import * from Bio import SeqIO import os from read_file import * fasta_file = sys.argv[1] map_file = sys.argv[2] result_file = sys.argv[3] os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt") fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); length_seq = {}; for seq in fasta_sequences: nuc = seq.id; length_seq[nuc] = len(seq.seq.tostring()); mapping = {} transcripts = [] f = open("map_info.txt"); for aline in f.readlines(): tline = aline.strip(); tl = tline.split('\t'); if tl[0].strip() not in transcripts: transcripts.append(tl[0].strip()); mapping[tl[0].strip()] = []; mapping[tl[0].strip()].append(tl[1].strip()); distribution = {}; coverage = {}; for transcript in length_seq: distribution[transcript] = []; for i in range(0, length_seq[transcript]): distribution[transcript].append(0); sum_count = float(0); if transcript in mapping: for j in range(0, len(mapping[transcript])): index = mapping[transcript][j]; #count = reads[mapping[transcript][j][0]]; sum_count = sum_count + 1; distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; coverage[transcript] = float(sum_count)/float(length_seq[transcript]); else: coverage[transcript] = 0 h = file(result_file, 'w') for transcript in length_seq: h.write(transcript); h.write('\n') for i in range(0, length_seq[transcript]): h.write(str(distribution[transcript][i])) h.write('\t') h.write('\n') h.write('\n') f.close(); h.close()