# HG changeset patch
# User tyty
# Date 1410806862 14400
# Node ID 297cdb01d656d1f872170454d468d27d72d03cc7
# Parent b6d9b0059499f305501ca5d442900410b13f01c0
Uploaded
diff -r b6d9b0059499 -r 297cdb01d656 get_reads/.DS_Store
Binary file get_reads/.DS_Store has changed
diff -r b6d9b0059499 -r 297cdb01d656 get_reads/get_read.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/get_read.py Mon Sep 15 14:47:42 2014 -0400
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+#from galaxy.tools.read_file import *
+from Bio import SeqIO
+import os
+from read_file import *
+
+fasta_file = sys.argv[1]
+map_file = sys.argv[2]
+result_file = sys.argv[3]
+
+os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt")
+
+fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
+length_seq = {};
+for seq in fasta_sequences:
+ nuc = seq.id;
+ length_seq[nuc] = len(seq.seq.tostring());
+
+
+
+mapping = {}
+transcripts = []
+
+f = open("map_info.txt");
+for aline in f.readlines():
+ tline = aline.strip();
+ tl = tline.split('\t');
+ if tl[0].strip() not in transcripts:
+ transcripts.append(tl[0].strip());
+ mapping[tl[0].strip()] = [];
+
+ mapping[tl[0].strip()].append(tl[1].strip());
+
+distribution = {};
+coverage = {};
+for transcript in length_seq:
+ distribution[transcript] = [];
+ for i in range(0, length_seq[transcript]):
+ distribution[transcript].append(0);
+ sum_count = float(0);
+ if transcript in mapping:
+ for j in range(0, len(mapping[transcript])):
+ index = mapping[transcript][j];
+ #count = reads[mapping[transcript][j][0]];
+ sum_count = sum_count + 1;
+ distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
+ coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
+ else:
+ coverage[transcript] = 0
+
+
+
+
+
+h = file(result_file, 'w')
+for transcript in length_seq:
+ h.write(transcript);
+ h.write('\n')
+ for i in range(0, length_seq[transcript]):
+ h.write(str(distribution[transcript][i]))
+ h.write('\t')
+ h.write('\n')
+ h.write('\n')
+
+
+
+
+
+f.close();
+h.close()
+
+
+
+
diff -r b6d9b0059499 -r 297cdb01d656 get_reads/get_read.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/get_read.xml Mon Sep 15 14:47:42 2014 -0400
@@ -0,0 +1,43 @@
+
+
+ get_read.py $lib_file $map_file $output
+
+ biopython
+ samtools
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**TIPS**:
+
+-----
+
+**Input**
+1. A mapped (bam) file from Bowtie (or any mapping program)
+2. Reference library sequences (fasta) used to map the reads
+
+-----
+
+**Output**:
+A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file)
+
+
+
+
+
diff -r b6d9b0059499 -r 297cdb01d656 get_reads/read_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/read_file.py Mon Sep 15 14:47:42 2014 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split('\t');
+ for i in range(0, len(tl)):
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r b6d9b0059499 -r 297cdb01d656 get_reads/test.bam
Binary file get_reads/test.bam has changed
diff -r b6d9b0059499 -r 297cdb01d656 get_reads/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/tool_dependencies.xml Mon Sep 15 14:47:42 2014 -0400
@@ -0,0 +1,12 @@
+
+
+
+
+
+
+
+
+
+
+
+