changeset 2:297cdb01d656 draft

Uploaded
author tyty
date Mon, 15 Sep 2014 14:47:42 -0400
parents b6d9b0059499
children f4cc06e92530
files get_reads/.DS_Store get_reads/get_read.py get_reads/get_read.xml get_reads/read_file.py get_reads/test.bam get_reads/tool_dependencies.xml
diffstat 6 files changed, 153 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file get_reads/.DS_Store has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/get_read.py	Mon Sep 15 14:47:42 2014 -0400
@@ -0,0 +1,77 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+#from galaxy.tools.read_file import *
+from Bio import SeqIO
+import os
+from read_file import *
+
+fasta_file = sys.argv[1]
+map_file = sys.argv[2]
+result_file = sys.argv[3]
+
+os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt") 
+
+fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
+length_seq = {};
+for seq in fasta_sequences:
+        nuc = seq.id;
+        length_seq[nuc] = len(seq.seq.tostring());
+
+
+
+mapping = {}
+transcripts = []
+
+f = open("map_info.txt");
+for aline in f.readlines():
+    tline = aline.strip();
+    tl = tline.split('\t');
+    if tl[0].strip() not in transcripts:
+        transcripts.append(tl[0].strip());
+        mapping[tl[0].strip()] = [];
+
+    mapping[tl[0].strip()].append(tl[1].strip());
+
+distribution = {};
+coverage = {};
+for transcript in length_seq:
+    distribution[transcript] = [];
+    for i in range(0, length_seq[transcript]):
+        distribution[transcript].append(0);
+    sum_count = float(0);
+    if transcript in mapping:
+        for j in range(0, len(mapping[transcript])):
+            index = mapping[transcript][j];
+            #count = reads[mapping[transcript][j][0]];
+            sum_count = sum_count + 1;
+            distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
+            coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
+    else:
+        coverage[transcript] = 0
+
+        
+        
+    
+
+h = file(result_file, 'w')
+for transcript in length_seq:
+    h.write(transcript);
+    h.write('\n')
+    for i in range(0, length_seq[transcript]):
+        h.write(str(distribution[transcript][i]))
+        h.write('\t')
+    h.write('\n')
+    h.write('\n')
+
+
+
+    
+
+f.close();
+h.close()
+
+
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/get_read.xml	Mon Sep 15 14:47:42 2014 -0400
@@ -0,0 +1,43 @@
+<tool id="get_read_pipeline" name="Get RT stop counts" version="1.0">
+	<description></description>
+	<command interpreter="python">get_read.py $lib_file $map_file $output </command>
+        <requirements>
+                <requirement type="package" version="1.61">biopython</requirement>
+                <requirement type="package" version="0.1.18">samtools</requirement>
+        </requirements>
+	<inputs>
+                <param name="lib_file" type="data" format="fasta" label="Library file (fasta)"/>
+		<param name="map_file" type="data" format="bam" label="Mapped file"/>
+	</inputs>
+	<outputs>
+		<data name="output" format="txt"/>
+	</outputs>
+        <tests>
+          <test>
+            <param name="lib_file" value="test.bam" />
+	    <param name="map_file" value="com_rna.txt" />
+	    <output name="output" file="get_RT_stop_test.out" />
+ 
+          </test>
+        </tests>
+
+	<help>
+
+
+**TIPS**:
+
+-----
+
+**Input**
+1. A mapped (bam) file from Bowtie (or any mapping program)
+2. Reference library sequences (fasta) used to map the reads
+
+-----
+
+**Output**:
+A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file)	
+
+
+
+	</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/read_file.py	Mon Sep 15 14:47:42 2014 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+    f = open(in_file);
+    result = [];
+    for aline in f.readlines():
+        temp = [];
+        tline = aline.strip();
+        tl = tline.split('\t');
+        for i in range(0, len(tl)):
+            temp.append(tl[i].strip());
+        result.append(temp);
+    f.close();
+    return result;
+
+
Binary file get_reads/test.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/get_reads/tool_dependencies.xml	Mon Sep 15 14:47:42 2014 -0400
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="biopython" version="1.61">
+        <repository changeset_revision="ae9dda584395" name="package_biopython_1_61" owner="biopython" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="numpy" version="1.7">
+        <repository changeset_revision="ef12a3a11d5b" name="package_numpy_1_7" owner="iuc" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.18">
+        <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>