Mercurial > repos > tyty > structurefold
changeset 2:297cdb01d656 draft
Uploaded
author | tyty |
---|---|
date | Mon, 15 Sep 2014 14:47:42 -0400 |
parents | b6d9b0059499 |
children | f4cc06e92530 |
files | get_reads/.DS_Store get_reads/get_read.py get_reads/get_read.xml get_reads/read_file.py get_reads/test.bam get_reads/tool_dependencies.xml |
diffstat | 6 files changed, 153 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_reads/get_read.py Mon Sep 15 14:47:42 2014 -0400 @@ -0,0 +1,77 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +#from galaxy.tools.read_file import * +from Bio import SeqIO +import os +from read_file import * + +fasta_file = sys.argv[1] +map_file = sys.argv[2] +result_file = sys.argv[3] + +os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt") + +fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); +length_seq = {}; +for seq in fasta_sequences: + nuc = seq.id; + length_seq[nuc] = len(seq.seq.tostring()); + + + +mapping = {} +transcripts = [] + +f = open("map_info.txt"); +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if tl[0].strip() not in transcripts: + transcripts.append(tl[0].strip()); + mapping[tl[0].strip()] = []; + + mapping[tl[0].strip()].append(tl[1].strip()); + +distribution = {}; +coverage = {}; +for transcript in length_seq: + distribution[transcript] = []; + for i in range(0, length_seq[transcript]): + distribution[transcript].append(0); + sum_count = float(0); + if transcript in mapping: + for j in range(0, len(mapping[transcript])): + index = mapping[transcript][j]; + #count = reads[mapping[transcript][j][0]]; + sum_count = sum_count + 1; + distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; + coverage[transcript] = float(sum_count)/float(length_seq[transcript]); + else: + coverage[transcript] = 0 + + + + + +h = file(result_file, 'w') +for transcript in length_seq: + h.write(transcript); + h.write('\n') + for i in range(0, length_seq[transcript]): + h.write(str(distribution[transcript][i])) + h.write('\t') + h.write('\n') + h.write('\n') + + + + + +f.close(); +h.close() + + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_reads/get_read.xml Mon Sep 15 14:47:42 2014 -0400 @@ -0,0 +1,43 @@ +<tool id="get_read_pipeline" name="Get RT stop counts" version="1.0"> + <description></description> + <command interpreter="python">get_read.py $lib_file $map_file $output </command> + <requirements> + <requirement type="package" version="1.61">biopython</requirement> + <requirement type="package" version="0.1.18">samtools</requirement> + </requirements> + <inputs> + <param name="lib_file" type="data" format="fasta" label="Library file (fasta)"/> + <param name="map_file" type="data" format="bam" label="Mapped file"/> + </inputs> + <outputs> + <data name="output" format="txt"/> + </outputs> + <tests> + <test> + <param name="lib_file" value="test.bam" /> + <param name="map_file" value="com_rna.txt" /> + <output name="output" file="get_RT_stop_test.out" /> + + </test> + </tests> + + <help> + + +**TIPS**: + +----- + +**Input** +1. A mapped (bam) file from Bowtie (or any mapping program) +2. Reference library sequences (fasta) used to map the reads + +----- + +**Output**: +A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file) + + + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_reads/read_file.py Mon Sep 15 14:47:42 2014 -0400 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/get_reads/tool_dependencies.xml Mon Sep 15 14:47:42 2014 -0400 @@ -0,0 +1,12 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="biopython" version="1.61"> + <repository changeset_revision="ae9dda584395" name="package_biopython_1_61" owner="biopython" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + <package name="numpy" version="1.7"> + <repository changeset_revision="ef12a3a11d5b" name="package_numpy_1_7" owner="iuc" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + <package name="samtools" version="0.1.18"> + <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>