# HG changeset patch # User tyty # Date 1410807140 14400 # Node ID f4cc06e925309dc933b42c8c6ce2eb2c3af422c3 # Parent 297cdb01d656d1f872170454d468d27d72d03cc7 Uploaded diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/.DS_Store Binary file Iterative_mapping/.DS_Store has changed diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/iterative_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/iterative_map.py Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,103 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +import os +from read_file import * +from read_s_file import * + +type_input = sys.argv[1] +seq_file = sys.argv[2] +ref_file = sys.argv[3] +shift = sys.argv[4] +length = sys.argv[5] +map_type = sys.argv[6] +output_file = sys.argv[7] + + +if map_type!="default": + s = "" + s = s+"-v "+sys.argv[8] + s = s+" -5 "+sys.argv[9] + s = s+" -3 "+sys.argv[10] + s = s+" -k "+sys.argv[11] + if sys.argv[12]: + s = s+" -a" + if int(sys.argv[13])>=1: + s = s+" -m "+sys.argv[13] + if sys.argv[14]: + s = s+" --best --strata " + +else: + s = "-v 3 -a --best --strata " + +ospath = os.path.realpath(sys.argv[0]) +ost = ospath.split('/') +syspath = "" +for i in range(len(ost)-1): + syspath = syspath+ost[i].strip() + syspath = syspath+'/' + +os.system("bowtie-build -f "+ref_file+" "+syspath+"ref > "+syspath+"log.txt") + +os.system("cp "+seq_file+" "+syspath+"seq0.fa") + +if type_input == "fasta": + tp = 'fasta' +if type_input == "fastq": + tp = 'fastq' + +k = 0 +print(type_input) +while(True): + if type_input == "fasta": + os.system("bowtie "+s+"-f "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam") + if type_input == "fastq": + os.system("bowtie "+s+"-q "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam") + os.system("samtools view -Sb -F 0xfff "+syspath+"map"+str(k)+".sam > "+syspath+"mapped"+str(k)+".bam 2>"+syspath+"log.txt") #get mapped reads + os.system("samtools view -Sb -f 0x4 "+syspath+"map"+str(k)+".sam > "+syspath+"umapped"+str(k)+".bam 2>"+syspath+"log.txt") #get unmapped reads + os.system("samtools view -Sb -f 0x10 "+syspath+"map"+str(k)+".sam > "+syspath+"rmapped"+str(k)+".bam 2>"+syspath+"log.txt") #get reversed mapped reads + os.system("samtools merge -f "+syspath+"unmapped"+str(k)+".bam "+syspath+"umapped"+str(k)+".bam "+syspath+"rmapped"+str(k)+".bam") #get reversed mapped reads + os.system("samtools view -h -o "+syspath+"unmapped"+str(k)+".sam "+syspath+"unmapped"+str(k)+".bam") #get reversed mapped reads + if k>0: + os.system("samtools view -h -o "+syspath+"mapped"+str(k)+".sam "+syspath+"mapped"+str(k)+".bam") #get reversed mapped reads + os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"unmapped"+str(k)+".txt") + os.system("cut -f 1 "+syspath+"mapped"+str(k)+".sam > "+syspath+"mapped"+str(k)+".txt") + os.system("python "+syspath+"remove_map.py "+syspath+"unmapped"+str(k)+".txt "+syspath+"mapped"+str(k)+".txt "+syspath+"runmapped"+str(k)+".txt") + os.system("rm "+syspath+"mapped"+str(k)+".sam") + os.system("rm "+syspath+"mapped"+str(k)+".txt") + os.system("rm "+syspath+"unmapped"+str(k)+".txt") + else: + os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"runmapped"+str(k)+".txt") + + os.system("rm "+syspath+"unmapped"+str(k)+".bam") + os.system("rm "+syspath+"umapped"+str(k)+".bam") + os.system("rm "+syspath+"rmapped"+str(k)+".bam") + os.system("python "+syspath+"seq_track.py "+syspath+"runmapped"+str(k)+".txt "+syspath+"seq"+str(k)+".fa "+syspath+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence + os.system("python "+syspath+"truncate.py "+syspath+"unmap_seq"+str(k)+".fa "+shift+" "+syspath+"seq"+str(k+1)+".fa "+length) #truncate unmapped sequence + os.system("rm "+syspath+"seq"+str(k)+".fa") #Remove sequences being mapped + os.system("rm "+syspath+"map"+str(k)+".sam") #Remove mapping file + os.system("rm "+syspath+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce + os.system("rm "+syspath+"runmapped"+str(k)+".txt") + os.system("rm "+syspath+"unmapped"+str(k)+".sam") + + os.system("wc -l "+syspath+"seq"+str(k+1)+".fa > "+syspath+"count"+str(k+1)+".txt") + c = read_sp_file(syspath+"count"+str(k+1)+".txt") + if c[0][0] == '0': #If no reads is in the sequence file, stop + os.system("rm "+syspath+"count"+str(k+1)+".txt") + os.system("rm "+syspath+"seq"+str(k+1)+".fa") + break + os.system("rm "+syspath+"count"+str(k+1)+".txt") + k = k+1 + +ss = "" +for i in range(0,k+1): + ss = ss+" "+syspath+"mapped"+str(i)+".bam" + + +os.system("samtools merge -f "+output_file+" "+ss) +#print("samtools merge mapped_all.bam"+ss) +os.system("rm "+syspath+"mapped*.bam") +os.system("rm "+syspath+"ref*") + + diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/iterative_map.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/iterative_map.xml Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,80 @@ + + + + #if $mapping_file.type == "user" + iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best + #else + iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output + #end if + + + biopython + numpy + samtools + bowtie + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +**TIPS**: + +----- + +**Input**: + +* 1. Sequence file type (FASTA/FASTQ) +* 2. Sequence file (fasta/fastq format) {Default: fastq file} +* 3. Reference file (e.g. cDNA library [fasta]) +* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping) +* 5. “Length” (The minimum length of the reads for mapping after trimming) +* [Optional] +* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. use -5/-3 flag to trim nucleotides from 5'/3' end of the reads) + +----- + +**Output**: + +A bam file with all of the reads that are mapped + + + + + diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/map_ex.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/map_ex.py Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +map_file = sys.argv[1] +result_file = sys.argv[2] + + +#reads = read_t_file(read_file); + +f = open(map_file); +h = file(result_file, 'w') + +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if len(tl)>4: + if int(tl[1].strip())== 0: + h.write(tline) + h.write('\n') + + +f.close(); +h.close() + + + + diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/read_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/read_file.py Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,21 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_t_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split('\t'); + for i in range(0, len(tl)): + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/read_s_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/read_s_file.py Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,22 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys + + + +def read_sp_file(in_file): + f = open(in_file); + result = []; + for aline in f.readlines(): + temp = []; + tline = aline.strip(); + tl = tline.split(' '); + for i in range(0, len(tl)): + if len(tl[i].strip())>0: + temp.append(tl[i].strip()); + result.append(temp); + f.close(); + return result; + + diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/remove_map.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/remove_map.py Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,29 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * + + +unmap_file = sys.argv[1] +map_file = sys.argv[2] +result_file = sys.argv[3] + + +unmap = read_t_file(unmap_file) +mapped = read_t_file(map_file) +h = file(result_file, 'w') + +maps = set() +for i in range(len(mapped)): + maps.add(mapped[i][0]) + + +for i in range(len(unmap)): + name = unmap[i][0] + if name not in maps: + h.write(name) + h.write('\n') + + +h.close() diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/seq_track.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/seq_track.py Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +unmap_file = sys.argv[1] +reads_file = sys.argv[2] +result_file = sys.argv[3] +tp = sys.argv[4] + + +unmap = read_t_file(unmap_file); + +h = file(result_file, 'w') + +reads = SeqIO.parse(reads_file,tp) +um = set() +for i in range(0, len(unmap)): + id_r = unmap[i][0] + um.add(id_r) + +for read in reads: + if read.id in um: + h.write('>') + h.write(read.id) + h.write('\n') + h.write(read.seq.tostring()) + h.write('\n') + + + +h.close() + + + + diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/tool_dependencies.xml Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,15 @@ + + + + + + + + + + + + + + + diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/truncate.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/truncate.py Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,32 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from Bio import SeqIO + +fasta_file = sys.argv[1] +shift_in = sys.argv[2] +result_file = sys.argv[3] +length = sys.argv[4] + +shift = int(shift_in) + +fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); +h = file(result_file,'w') +for seq in fasta_sequences: + nuc = seq.id; + sequence = seq.seq.tostring(); + if (len(sequence)-shift)>=int(length): + h.write('>'+nuc) + h.write('\n') + h.write(sequence[0:(len(sequence)-shift)]) + h.write('\n') + + + + +h.close() + + + + diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/unmap.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Iterative_mapping/unmap.py Mon Sep 15 14:52:20 2014 -0400 @@ -0,0 +1,31 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +import sys +from read_file import * +from Bio import SeqIO + +map_file = sys.argv[1] +result_file = sys.argv[2] + + +#reads = read_t_file(read_file); + +f = open(map_file); +h = file(result_file, 'w') + +for aline in f.readlines(): + tline = aline.strip(); + tl = tline.split('\t'); + if len(tl)>4: + if int(tl[1].strip()) != 0: + h.write(tl[0].strip()); + h.write('\n'); + + +f.close(); +h.close() + + + + diff -r 297cdb01d656 -r f4cc06e92530 get_reads/get_read.py --- a/get_reads/get_read.py Mon Sep 15 14:47:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -#from galaxy.tools.read_file import * -from Bio import SeqIO -import os -from read_file import * - -fasta_file = sys.argv[1] -map_file = sys.argv[2] -result_file = sys.argv[3] - -os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt") - -fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); -length_seq = {}; -for seq in fasta_sequences: - nuc = seq.id; - length_seq[nuc] = len(seq.seq.tostring()); - - - -mapping = {} -transcripts = [] - -f = open("map_info.txt"); -for aline in f.readlines(): - tline = aline.strip(); - tl = tline.split('\t'); - if tl[0].strip() not in transcripts: - transcripts.append(tl[0].strip()); - mapping[tl[0].strip()] = []; - - mapping[tl[0].strip()].append(tl[1].strip()); - -distribution = {}; -coverage = {}; -for transcript in length_seq: - distribution[transcript] = []; - for i in range(0, length_seq[transcript]): - distribution[transcript].append(0); - sum_count = float(0); - if transcript in mapping: - for j in range(0, len(mapping[transcript])): - index = mapping[transcript][j]; - #count = reads[mapping[transcript][j][0]]; - sum_count = sum_count + 1; - distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1; - coverage[transcript] = float(sum_count)/float(length_seq[transcript]); - else: - coverage[transcript] = 0 - - - - - -h = file(result_file, 'w') -for transcript in length_seq: - h.write(transcript); - h.write('\n') - for i in range(0, length_seq[transcript]): - h.write(str(distribution[transcript][i])) - h.write('\t') - h.write('\n') - h.write('\n') - - - - - -f.close(); -h.close() - - - - diff -r 297cdb01d656 -r f4cc06e92530 get_reads/get_read.xml --- a/get_reads/get_read.xml Mon Sep 15 14:47:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ - - - get_read.py $lib_file $map_file $output - - biopython - samtools - - - - - - - - - - - - - - - - - - - - -**TIPS**: - ------ - -**Input** -1. A mapped (bam) file from Bowtie (or any mapping program) -2. Reference library sequences (fasta) used to map the reads - ------ - -**Output**: -A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file) - - - - - diff -r 297cdb01d656 -r f4cc06e92530 get_reads/read_file.py --- a/get_reads/read_file.py Mon Sep 15 14:47:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - - diff -r 297cdb01d656 -r f4cc06e92530 get_reads/test.bam Binary file get_reads/test.bam has changed diff -r 297cdb01d656 -r f4cc06e92530 get_reads/tool_dependencies.xml --- a/get_reads/tool_dependencies.xml Mon Sep 15 14:47:42 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ - - - - - - - - - - - -