# HG changeset patch # User tyty # Date 1426801368 14400 # Node ID 60278e5df4931b37b4d2011bc902b362d6bdf18a # Parent 51fa1298f55e912f3446d3fda1ad06b9156c2d57 Uploaded diff -r 51fa1298f55e -r 60278e5df493 ._Iterative_mapping Binary file ._Iterative_mapping has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._.DS_Store Binary file Iterative_mapping/._.DS_Store has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._iterative_map.py Binary file Iterative_mapping/._iterative_map.py has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._iterative_map.xml Binary file Iterative_mapping/._iterative_map.xml has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._map_ex.py Binary file Iterative_mapping/._map_ex.py has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._read_file.py Binary file Iterative_mapping/._read_file.py has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._read_s_file.py Binary file Iterative_mapping/._read_s_file.py has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._remove_map.py Binary file Iterative_mapping/._remove_map.py has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._seq_track.py Binary file Iterative_mapping/._seq_track.py has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._truncate.py Binary file Iterative_mapping/._truncate.py has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/._unmap.py Binary file Iterative_mapping/._unmap.py has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/iterative_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/iterative_map.py Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +from read_file import * +from read_s_file import * +import random +import string + +type_input = sys.argv[1] +seq_file = sys.argv[2] +ref_file = sys.argv[3] +shift = sys.argv[4] +length = sys.argv[5] +t_end = sys.argv[6] +map_type = sys.argv[7] +output_file = sys.argv[8] + + +if map_type!="default": + s = "" + sm = "" + s = s+"-v "+sys.argv[9] + sm = sm+"-v "+sys.argv[9] + sm = sm+" -5 "+sys.argv[10] + sm = sm+" -3 "+sys.argv[11] + s = s+" -k "+sys.argv[12] + sm = sm+" -k "+sys.argv[12] + if sys.argv[13]: + s = s+" -a" + sm = sm+" -a" + if int(sys.argv[14])>=1: + s = s+" -m "+sys.argv[14] + sm = sm+" -m "+sys.argv[14] + if sys.argv[15]: + s = s+" --best --strata " + sm = sm+" --best --strata " + +else: + s = "-v 3 -a --best --strata " + sm = "-v 3 -a --best --strata " + +ospath = os.path.realpath(sys.argv[0]) +ost = ospath.split('/') +syspath = "" +for i in range(len(ost)-1): + syspath = syspath+ost[i].strip() + syspath = syspath+'/' + +syspathrs = os.getcwd() +syspathrs = syspathrs+'/' + +os.system("bowtie-build -f "+ref_file+" "+syspathrs+"ref > "+syspathrs+"log.txt") + +os.system("cp "+seq_file+" "+syspathrs+"seq0.fa") + +if type_input == "fasta": + tp = 'fasta' +if type_input == "fastq": + tp = 'fastq' + +k = 0 + +if type_input == "fasta": + os.system("bowtie "+sm+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") +if type_input == "fastq": + os.system("bowtie "+sm+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") + +while(True): + os.system("samtools view -Sb -F 0xfff "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get mapped reads + os.system("samtools view -Sb -f 0x4 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"umapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get unmapped reads + os.system("samtools view -Sb -f 0x10 "+syspathrs+"map"+str(k)+".sam > "+syspathrs+"rmapped"+str(k)+".bam 2>"+syspathrs+"log.txt") #get reversed mapped reads + os.system("samtools merge -f "+syspathrs+"unmapped"+str(k)+".bam "+syspathrs+"umapped"+str(k)+".bam "+syspathrs+"rmapped"+str(k)+".bam") #get reversed mapped reads + os.system("samtools view -h -o "+syspathrs+"unmapped"+str(k)+".sam "+syspathrs+"unmapped"+str(k)+".bam") #get reversed mapped reads + if k>0: + os.system("samtools view -h -o "+syspathrs+"mapped"+str(k)+".sam "+syspathrs+"mapped"+str(k)+".bam") #get reversed mapped reads + os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"unmapped"+str(k)+".txt") + os.system("cut -f 1 "+syspathrs+"mapped"+str(k)+".sam > "+syspathrs+"mapped"+str(k)+".txt") + os.system("python "+syspath+"remove_map.py "+syspathrs+"unmapped"+str(k)+".txt "+syspathrs+"mapped"+str(k)+".txt "+syspathrs+"runmapped"+str(k)+".txt") + os.system("rm "+syspathrs+"mapped"+str(k)+".sam") + os.system("rm "+syspathrs+"mapped"+str(k)+".txt") + os.system("rm "+syspathrs+"unmapped"+str(k)+".txt") + else: + os.system("cut -f 1 "+syspathrs+"unmapped"+str(k)+".sam > "+syspathrs+"runmapped"+str(k)+".txt") + + os.system("rm "+syspathrs+"unmapped"+str(k)+".bam") + os.system("rm "+syspathrs+"umapped"+str(k)+".bam") + os.system("rm "+syspathrs+"rmapped"+str(k)+".bam") + os.system("python "+syspath+"seq_track.py "+syspathrs+"runmapped"+str(k)+".txt "+syspathrs+"seq"+str(k)+".fa "+syspathrs+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence + os.system("python "+syspath+"truncate.py "+syspathrs+"unmap_seq"+str(k)+".fa "+shift+" "+syspathrs+"seq"+str(k+1)+".fa "+length+" "+t_end) #truncate unmapped sequence + os.system("rm "+syspathrs+"seq"+str(k)+".fa") #Remove sequences being mapped + os.system("rm "+syspathrs+"map"+str(k)+".sam") #Remove mapping file + os.system("rm "+syspathrs+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce + os.system("rm "+syspathrs+"runmapped"+str(k)+".txt") + os.system("rm "+syspathrs+"unmapped"+str(k)+".sam") + + os.system("wc -l "+syspathrs+"seq"+str(k+1)+".fa > "+syspathrs+"count"+str(k+1)+".txt") + c = read_sp_file(syspathrs+"count"+str(k+1)+".txt") + if c[0][0] == '0': #If no reads is in the sequence file, stop + os.system("rm "+syspathrs+"count"+str(k+1)+".txt") + os.system("rm "+syspathrs+"seq"+str(k+1)+".fa") + break + os.system("rm "+syspathrs+"count"+str(k+1)+".txt") + k = k+1 + if type_input == "fasta": + os.system("bowtie "+s+"-f "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") + if type_input == "fastq": + os.system("bowtie "+s+"-q "+syspathrs+"ref"+" "+syspathrs+"seq"+str(k)+".fa --quiet -S > "+syspathrs+"map"+str(k)+".sam") + + +ss = "" +for i in range(0,k+1): + ss = ss+" "+syspathrs+"mapped"+str(i)+".bam" + + +os.system("samtools merge -f "+syspathrs+"combine.bam"+" "+ss) +os.system("samtools sort "+syspathrs+"combine.bam sorted") +os.system("samtools view -b -h sorted.bam > " + output_file) +#print("samtools merge mapped_all.bam"+ss) +os.system("rm "+syspathrs+"mapped*.bam") +os.system("rm "+syspathrs+"combine.bam") +os.system("rm "+syspathrs+"sorted.bam") +os.system("rm "+syspathrs+"ref*") +#os.system("rm -r "+syspathrs) + + diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/iterative_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/iterative_map.xml Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,104 @@ + + iteratively maps the raw reads of RNA structural data to the reference transcriptome + + #if $mapping_file.type == "user" + iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best + #else + iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $t_end $mapping_file.type $output + #end if + + + biopython + numpy + samtools + bowtie + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**Overview of StructureFold** + +* StructureFold is a series of software packages that automates the process of predicting RNA secondary structure for a transcript or an entire transcriptome, with or without the inclusion of constraints on the structure(s) provided by wet bench experimentation. The process consists of mapping the raw reads of RNA structural data on every transcript in the dataset to the transcriptome, getting RT stop counts on each nucleotide, calculating structural reactivities on the nucleotides, and predicting the RNA structures. Please cite: Tang, Y, Bouvier, E, Kwok CK, Ding Y, Nekrutenko, A, Bevilacqua PC, Assmann SM, StructureFold: Genome-wide RNA secondary structure mapping and reconstruction in vivo, submitted. RNA structure is predicted using the RNAstructure algorithm (http://rna.urmc.rochester.edu/RNAstructure.html). + +----- + +**Function** + +* Iterative Mapping maps the raw reads of RNA structural data to the reference transcriptome using Bowtie (v0.12.8). It allows users to trim each read from either end to iteratively map the read to the reference transcriptome. + +----- + +**Input**: + +* 1. Sequence file type (FASTA/FASTQ) +* 2. Sequence file (fasta/fastq format) +* 3. Reference file (fasta) used to map the reads to +* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping) +* 5. “Length” (The minimum length of the reads for mapping after trimming) +* [Optional] +* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. Use -5/-3 flag to trim the nucleotides from 5'/3' end of the reads) + +----- + +**Output**: + +A sorted .bam file with all of the reads that are mapped + + + + + diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/map_ex.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/map_ex.py Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +map_file = sys.argv[1] +result_file = sys.argv[2] + + +#reads = read_t_file(read_file); + +f = open(map_file); +h = file(result_file, 'w') + +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if len(tl)>4: + if int(tl[1].strip())== 0: + h.write(tline) + h.write('\n') + + +f.close(); +h.close() + + + + diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/read_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/read_file.py Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/read_file.pyc Binary file Iterative_mapping/read_file.pyc has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/read_s_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/read_s_file.py Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_sp_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split(' '); + for i in range(0, len(tl)): + if len(tl[i].strip())>0: + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/read_s_file.pyc Binary file Iterative_mapping/read_s_file.pyc has changed diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/remove_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/remove_map.py Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * + + +unmap_file = sys.argv[1] +map_file = sys.argv[2] +result_file = sys.argv[3] + + +unmap = read_t_file(unmap_file) +mapped = read_t_file(map_file) +h = file(result_file, 'w') + +maps = set() +for i in range(len(mapped)): + maps.add(mapped[i][0]) + + +for i in range(len(unmap)): + name = unmap[i][0] + if name not in maps: + h.write(name) + h.write('\n') + + +h.close() diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/seq_track.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/seq_track.py Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +unmap_file = sys.argv[1] +reads_file = sys.argv[2] +result_file = sys.argv[3] +tp = sys.argv[4] + + +unmap = read_t_file(unmap_file); + +h = file(result_file, 'w') + +reads = SeqIO.parse(reads_file,tp) +um = set() +for i in range(0, len(unmap)): + id_r = unmap[i][0] + um.add(id_r) + +for read in reads: + if read.id in um: + h.write('>') + h.write(read.id) + h.write('\n') + h.write(read.seq.tostring()) + h.write('\n') + + + +h.close() + + + + diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/truncate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/truncate.py Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from Bio import SeqIO + +fasta_file = sys.argv[1] +shift_in = sys.argv[2] +result_file = sys.argv[3] +length = sys.argv[4] +t_end = sys.argv[5] + +shift = int(shift_in) + +fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); +h = file(result_file,'w') +for seq in fasta_sequences: + nuc = seq.id; + sequence = seq.seq.tostring(); + if (len(sequence)-shift)>=int(length): + h.write('>'+nuc) + h.write('\n') + if t_end == 'three_end': + h.write(sequence[0:(len(sequence)-shift)]) + if t_end == 'five_end': + h.write(sequence[(shift):(len(sequence))]) + h.write('\n') + + + + +h.close() + + + + diff -r 51fa1298f55e -r 60278e5df493 Iterative_mapping/unmap.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/unmap.py Thu Mar 19 17:42:48 2015 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +map_file = sys.argv[1] +result_file = sys.argv[2] + + +#reads = read_t_file(read_file); + +f = open(map_file); +h = file(result_file, 'w') + +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if len(tl)>4: + if int(tl[1].strip()) != 0: + h.write(tl[0].strip()); + h.write('\n'); + + +f.close(); +h.close() + + + +