# HG changeset patch
# User tyty
# Date 1410807140 14400
# Node ID f4cc06e925309dc933b42c8c6ce2eb2c3af422c3
# Parent 297cdb01d656d1f872170454d468d27d72d03cc7
Uploaded
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/.DS_Store
Binary file Iterative_mapping/.DS_Store has changed
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/iterative_map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/iterative_map.py Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,103 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+import os
+from read_file import *
+from read_s_file import *
+
+type_input = sys.argv[1]
+seq_file = sys.argv[2]
+ref_file = sys.argv[3]
+shift = sys.argv[4]
+length = sys.argv[5]
+map_type = sys.argv[6]
+output_file = sys.argv[7]
+
+
+if map_type!="default":
+ s = ""
+ s = s+"-v "+sys.argv[8]
+ s = s+" -5 "+sys.argv[9]
+ s = s+" -3 "+sys.argv[10]
+ s = s+" -k "+sys.argv[11]
+ if sys.argv[12]:
+ s = s+" -a"
+ if int(sys.argv[13])>=1:
+ s = s+" -m "+sys.argv[13]
+ if sys.argv[14]:
+ s = s+" --best --strata "
+
+else:
+ s = "-v 3 -a --best --strata "
+
+ospath = os.path.realpath(sys.argv[0])
+ost = ospath.split('/')
+syspath = ""
+for i in range(len(ost)-1):
+ syspath = syspath+ost[i].strip()
+ syspath = syspath+'/'
+
+os.system("bowtie-build -f "+ref_file+" "+syspath+"ref > "+syspath+"log.txt")
+
+os.system("cp "+seq_file+" "+syspath+"seq0.fa")
+
+if type_input == "fasta":
+ tp = 'fasta'
+if type_input == "fastq":
+ tp = 'fastq'
+
+k = 0
+print(type_input)
+while(True):
+ if type_input == "fasta":
+ os.system("bowtie "+s+"-f "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam")
+ if type_input == "fastq":
+ os.system("bowtie "+s+"-q "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam")
+ os.system("samtools view -Sb -F 0xfff "+syspath+"map"+str(k)+".sam > "+syspath+"mapped"+str(k)+".bam 2>"+syspath+"log.txt") #get mapped reads
+ os.system("samtools view -Sb -f 0x4 "+syspath+"map"+str(k)+".sam > "+syspath+"umapped"+str(k)+".bam 2>"+syspath+"log.txt") #get unmapped reads
+ os.system("samtools view -Sb -f 0x10 "+syspath+"map"+str(k)+".sam > "+syspath+"rmapped"+str(k)+".bam 2>"+syspath+"log.txt") #get reversed mapped reads
+ os.system("samtools merge -f "+syspath+"unmapped"+str(k)+".bam "+syspath+"umapped"+str(k)+".bam "+syspath+"rmapped"+str(k)+".bam") #get reversed mapped reads
+ os.system("samtools view -h -o "+syspath+"unmapped"+str(k)+".sam "+syspath+"unmapped"+str(k)+".bam") #get reversed mapped reads
+ if k>0:
+ os.system("samtools view -h -o "+syspath+"mapped"+str(k)+".sam "+syspath+"mapped"+str(k)+".bam") #get reversed mapped reads
+ os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"unmapped"+str(k)+".txt")
+ os.system("cut -f 1 "+syspath+"mapped"+str(k)+".sam > "+syspath+"mapped"+str(k)+".txt")
+ os.system("python "+syspath+"remove_map.py "+syspath+"unmapped"+str(k)+".txt "+syspath+"mapped"+str(k)+".txt "+syspath+"runmapped"+str(k)+".txt")
+ os.system("rm "+syspath+"mapped"+str(k)+".sam")
+ os.system("rm "+syspath+"mapped"+str(k)+".txt")
+ os.system("rm "+syspath+"unmapped"+str(k)+".txt")
+ else:
+ os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"runmapped"+str(k)+".txt")
+
+ os.system("rm "+syspath+"unmapped"+str(k)+".bam")
+ os.system("rm "+syspath+"umapped"+str(k)+".bam")
+ os.system("rm "+syspath+"rmapped"+str(k)+".bam")
+ os.system("python "+syspath+"seq_track.py "+syspath+"runmapped"+str(k)+".txt "+syspath+"seq"+str(k)+".fa "+syspath+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence
+ os.system("python "+syspath+"truncate.py "+syspath+"unmap_seq"+str(k)+".fa "+shift+" "+syspath+"seq"+str(k+1)+".fa "+length) #truncate unmapped sequence
+ os.system("rm "+syspath+"seq"+str(k)+".fa") #Remove sequences being mapped
+ os.system("rm "+syspath+"map"+str(k)+".sam") #Remove mapping file
+ os.system("rm "+syspath+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce
+ os.system("rm "+syspath+"runmapped"+str(k)+".txt")
+ os.system("rm "+syspath+"unmapped"+str(k)+".sam")
+
+ os.system("wc -l "+syspath+"seq"+str(k+1)+".fa > "+syspath+"count"+str(k+1)+".txt")
+ c = read_sp_file(syspath+"count"+str(k+1)+".txt")
+ if c[0][0] == '0': #If no reads is in the sequence file, stop
+ os.system("rm "+syspath+"count"+str(k+1)+".txt")
+ os.system("rm "+syspath+"seq"+str(k+1)+".fa")
+ break
+ os.system("rm "+syspath+"count"+str(k+1)+".txt")
+ k = k+1
+
+ss = ""
+for i in range(0,k+1):
+ ss = ss+" "+syspath+"mapped"+str(i)+".bam"
+
+
+os.system("samtools merge -f "+output_file+" "+ss)
+#print("samtools merge mapped_all.bam"+ss)
+os.system("rm "+syspath+"mapped*.bam")
+os.system("rm "+syspath+"ref*")
+
+
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/iterative_map.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/iterative_map.xml Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,80 @@
+
+
+
+ #if $mapping_file.type == "user"
+ iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best
+ #else
+ iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output
+ #end if
+
+
+ biopython
+ numpy
+ samtools
+ bowtie
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+**TIPS**:
+
+-----
+
+**Input**:
+
+* 1. Sequence file type (FASTA/FASTQ)
+* 2. Sequence file (fasta/fastq format) {Default: fastq file}
+* 3. Reference file (e.g. cDNA library [fasta])
+* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping)
+* 5. “Length” (The minimum length of the reads for mapping after trimming)
+* [Optional]
+* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. use -5/-3 flag to trim nucleotides from 5'/3' end of the reads)
+
+-----
+
+**Output**:
+
+A bam file with all of the reads that are mapped
+
+
+
+
+
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/map_ex.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/map_ex.py Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+map_file = sys.argv[1]
+result_file = sys.argv[2]
+
+
+#reads = read_t_file(read_file);
+
+f = open(map_file);
+h = file(result_file, 'w')
+
+for aline in f.readlines():
+ tline = aline.strip();
+ tl = tline.split('\t');
+ if len(tl)>4:
+ if int(tl[1].strip())== 0:
+ h.write(tline)
+ h.write('\n')
+
+
+f.close();
+h.close()
+
+
+
+
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/read_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/read_file.py Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_t_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split('\t');
+ for i in range(0, len(tl)):
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/read_s_file.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/read_s_file.py Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+
+
+
+def read_sp_file(in_file):
+ f = open(in_file);
+ result = [];
+ for aline in f.readlines():
+ temp = [];
+ tline = aline.strip();
+ tl = tline.split(' ');
+ for i in range(0, len(tl)):
+ if len(tl[i].strip())>0:
+ temp.append(tl[i].strip());
+ result.append(temp);
+ f.close();
+ return result;
+
+
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/remove_map.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/remove_map.py Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+
+
+unmap_file = sys.argv[1]
+map_file = sys.argv[2]
+result_file = sys.argv[3]
+
+
+unmap = read_t_file(unmap_file)
+mapped = read_t_file(map_file)
+h = file(result_file, 'w')
+
+maps = set()
+for i in range(len(mapped)):
+ maps.add(mapped[i][0])
+
+
+for i in range(len(unmap)):
+ name = unmap[i][0]
+ if name not in maps:
+ h.write(name)
+ h.write('\n')
+
+
+h.close()
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/seq_track.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/seq_track.py Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+unmap_file = sys.argv[1]
+reads_file = sys.argv[2]
+result_file = sys.argv[3]
+tp = sys.argv[4]
+
+
+unmap = read_t_file(unmap_file);
+
+h = file(result_file, 'w')
+
+reads = SeqIO.parse(reads_file,tp)
+um = set()
+for i in range(0, len(unmap)):
+ id_r = unmap[i][0]
+ um.add(id_r)
+
+for read in reads:
+ if read.id in um:
+ h.write('>')
+ h.write(read.id)
+ h.write('\n')
+ h.write(read.seq.tostring())
+ h.write('\n')
+
+
+
+h.close()
+
+
+
+
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/tool_dependencies.xml Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,15 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/truncate.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/truncate.py Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,32 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from Bio import SeqIO
+
+fasta_file = sys.argv[1]
+shift_in = sys.argv[2]
+result_file = sys.argv[3]
+length = sys.argv[4]
+
+shift = int(shift_in)
+
+fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
+h = file(result_file,'w')
+for seq in fasta_sequences:
+ nuc = seq.id;
+ sequence = seq.seq.tostring();
+ if (len(sequence)-shift)>=int(length):
+ h.write('>'+nuc)
+ h.write('\n')
+ h.write(sequence[0:(len(sequence)-shift)])
+ h.write('\n')
+
+
+
+
+h.close()
+
+
+
+
diff -r 297cdb01d656 -r f4cc06e92530 Iterative_mapping/unmap.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/Iterative_mapping/unmap.py Mon Sep 15 14:52:20 2014 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import sys
+from read_file import *
+from Bio import SeqIO
+
+map_file = sys.argv[1]
+result_file = sys.argv[2]
+
+
+#reads = read_t_file(read_file);
+
+f = open(map_file);
+h = file(result_file, 'w')
+
+for aline in f.readlines():
+ tline = aline.strip();
+ tl = tline.split('\t');
+ if len(tl)>4:
+ if int(tl[1].strip()) != 0:
+ h.write(tl[0].strip());
+ h.write('\n');
+
+
+f.close();
+h.close()
+
+
+
+
diff -r 297cdb01d656 -r f4cc06e92530 get_reads/get_read.py
--- a/get_reads/get_read.py Mon Sep 15 14:47:42 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-#from galaxy.tools.read_file import *
-from Bio import SeqIO
-import os
-from read_file import *
-
-fasta_file = sys.argv[1]
-map_file = sys.argv[2]
-result_file = sys.argv[3]
-
-os.system("samtools view -F 0xfff "+map_file+"|cut -f 3,4 > map_info.txt")
-
-fasta_sequences = SeqIO.parse(open(fasta_file),'fasta');
-length_seq = {};
-for seq in fasta_sequences:
- nuc = seq.id;
- length_seq[nuc] = len(seq.seq.tostring());
-
-
-
-mapping = {}
-transcripts = []
-
-f = open("map_info.txt");
-for aline in f.readlines():
- tline = aline.strip();
- tl = tline.split('\t');
- if tl[0].strip() not in transcripts:
- transcripts.append(tl[0].strip());
- mapping[tl[0].strip()] = [];
-
- mapping[tl[0].strip()].append(tl[1].strip());
-
-distribution = {};
-coverage = {};
-for transcript in length_seq:
- distribution[transcript] = [];
- for i in range(0, length_seq[transcript]):
- distribution[transcript].append(0);
- sum_count = float(0);
- if transcript in mapping:
- for j in range(0, len(mapping[transcript])):
- index = mapping[transcript][j];
- #count = reads[mapping[transcript][j][0]];
- sum_count = sum_count + 1;
- distribution[transcript][int(index)-1] = distribution[transcript][int(index)-1] + 1;
- coverage[transcript] = float(sum_count)/float(length_seq[transcript]);
- else:
- coverage[transcript] = 0
-
-
-
-
-
-h = file(result_file, 'w')
-for transcript in length_seq:
- h.write(transcript);
- h.write('\n')
- for i in range(0, length_seq[transcript]):
- h.write(str(distribution[transcript][i]))
- h.write('\t')
- h.write('\n')
- h.write('\n')
-
-
-
-
-
-f.close();
-h.close()
-
-
-
-
diff -r 297cdb01d656 -r f4cc06e92530 get_reads/get_read.xml
--- a/get_reads/get_read.xml Mon Sep 15 14:47:42 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,43 +0,0 @@
-
-
- get_read.py $lib_file $map_file $output
-
- biopython
- samtools
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-**TIPS**:
-
------
-
-**Input**
-1. A mapped (bam) file from Bowtie (or any mapping program)
-2. Reference library sequences (fasta) used to map the reads
-
------
-
-**Output**:
-A text file with reverse transcription stop counts mapped to each nucleotide (RTSC file)
-
-
-
-
-
diff -r 297cdb01d656 -r f4cc06e92530 get_reads/read_file.py
--- a/get_reads/read_file.py Mon Sep 15 14:47:42 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-
-import sys
-
-
-
-def read_t_file(in_file):
- f = open(in_file);
- result = [];
- for aline in f.readlines():
- temp = [];
- tline = aline.strip();
- tl = tline.split('\t');
- for i in range(0, len(tl)):
- temp.append(tl[i].strip());
- result.append(temp);
- f.close();
- return result;
-
-
diff -r 297cdb01d656 -r f4cc06e92530 get_reads/test.bam
Binary file get_reads/test.bam has changed
diff -r 297cdb01d656 -r f4cc06e92530 get_reads/tool_dependencies.xml
--- a/get_reads/tool_dependencies.xml Mon Sep 15 14:47:42 2014 -0400
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-
-
-
-
-
-
-
-
-
-
-
-