# HG changeset patch # User tyty # Date 1410806521 14400 # Node ID b6d9b0059499f305501ca5d442900410b13f01c0 # Parent d56631911cc16db2c852e30f8db287717eb1897a Deleted selected files diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/.DS_Store Binary file Iterative_mapping/.DS_Store has changed diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/iterative_map.py --- a/Iterative_mapping/iterative_map.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -import os -from read_file import * -from read_s_file import * - -type_input = sys.argv[1] -seq_file = sys.argv[2] -ref_file = sys.argv[3] -shift = sys.argv[4] -length = sys.argv[5] -map_type = sys.argv[6] -output_file = sys.argv[7] - - -if map_type!="default": - s = "" - s = s+"-v "+sys.argv[8] - s = s+" -5 "+sys.argv[9] - s = s+" -3 "+sys.argv[10] - s = s+" -k "+sys.argv[11] - if sys.argv[12]: - s = s+" -a" - if int(sys.argv[13])>=1: - s = s+" -m "+sys.argv[13] - if sys.argv[14]: - s = s+" --best --strata " - -else: - s = "-v 3 -a --best --strata " - -ospath = os.path.realpath(sys.argv[0]) -ost = ospath.split('/') -syspath = "" -for i in range(len(ost)-1): - syspath = syspath+ost[i].strip() - syspath = syspath+'/' - -os.system("bowtie-build -f "+ref_file+" "+syspath+"ref > "+syspath+"log.txt") - -os.system("cp "+seq_file+" "+syspath+"seq0.fa") - -if type_input == "fasta": - tp = 'fasta' -if type_input == "fastq": - tp = 'fastq' - -k = 0 -print(type_input) -while(True): - if type_input == "fasta": - os.system("bowtie "+s+"-f "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam") - if type_input == "fastq": - os.system("bowtie "+s+"-q "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam") - os.system("samtools view -Sb -F 0xfff "+syspath+"map"+str(k)+".sam > "+syspath+"mapped"+str(k)+".bam 2>"+syspath+"log.txt") #get mapped reads - os.system("samtools view -Sb -f 0x4 "+syspath+"map"+str(k)+".sam > "+syspath+"umapped"+str(k)+".bam 2>"+syspath+"log.txt") #get unmapped reads - os.system("samtools view -Sb -f 0x10 "+syspath+"map"+str(k)+".sam > "+syspath+"rmapped"+str(k)+".bam 2>"+syspath+"log.txt") #get reversed mapped reads - os.system("samtools merge -f "+syspath+"unmapped"+str(k)+".bam "+syspath+"umapped"+str(k)+".bam "+syspath+"rmapped"+str(k)+".bam") #get reversed mapped reads - os.system("samtools view -h -o "+syspath+"unmapped"+str(k)+".sam "+syspath+"unmapped"+str(k)+".bam") #get reversed mapped reads - if k>0: - os.system("samtools view -h -o "+syspath+"mapped"+str(k)+".sam "+syspath+"mapped"+str(k)+".bam") #get reversed mapped reads - os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"unmapped"+str(k)+".txt") - os.system("cut -f 1 "+syspath+"mapped"+str(k)+".sam > "+syspath+"mapped"+str(k)+".txt") - os.system("python "+syspath+"remove_map.py "+syspath+"unmapped"+str(k)+".txt "+syspath+"mapped"+str(k)+".txt "+syspath+"runmapped"+str(k)+".txt") - os.system("rm "+syspath+"mapped"+str(k)+".sam") - os.system("rm "+syspath+"mapped"+str(k)+".txt") - os.system("rm "+syspath+"unmapped"+str(k)+".txt") - else: - os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"runmapped"+str(k)+".txt") - - os.system("rm "+syspath+"unmapped"+str(k)+".bam") - os.system("rm "+syspath+"umapped"+str(k)+".bam") - os.system("rm "+syspath+"rmapped"+str(k)+".bam") - os.system("python "+syspath+"seq_track.py "+syspath+"runmapped"+str(k)+".txt "+syspath+"seq"+str(k)+".fa "+syspath+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence - os.system("python "+syspath+"truncate.py "+syspath+"unmap_seq"+str(k)+".fa "+shift+" "+syspath+"seq"+str(k+1)+".fa "+length) #truncate unmapped sequence - os.system("rm "+syspath+"seq"+str(k)+".fa") #Remove sequences being mapped - os.system("rm "+syspath+"map"+str(k)+".sam") #Remove mapping file - os.system("rm "+syspath+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce - os.system("rm "+syspath+"runmapped"+str(k)+".txt") - os.system("rm "+syspath+"unmapped"+str(k)+".sam") - - os.system("wc -l "+syspath+"seq"+str(k+1)+".fa > "+syspath+"count"+str(k+1)+".txt") - c = read_sp_file(syspath+"count"+str(k+1)+".txt") - if c[0][0] == '0': #If no reads is in the sequence file, stop - os.system("rm "+syspath+"count"+str(k+1)+".txt") - os.system("rm "+syspath+"seq"+str(k+1)+".fa") - break - os.system("rm "+syspath+"count"+str(k+1)+".txt") - k = k+1 - -ss = "" -for i in range(0,k+1): - ss = ss+" "+syspath+"mapped"+str(i)+".bam" - - -os.system("samtools merge -f "+output_file+" "+ss) -#print("samtools merge mapped_all.bam"+ss) -os.system("rm "+syspath+"mapped*.bam") -os.system("rm "+syspath+"ref*") - - diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/iterative_map.xml --- a/Iterative_mapping/iterative_map.xml Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ - - - - #if $mapping_file.type == "user" - iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best - #else - iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output - #end if - - - biopython - numpy - samtools - bowtie - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -**TIPS**: - ------ - -**Input**: - -* 1. Sequence file type (FASTA/FASTQ) -* 2. Sequence file (fasta/fastq format) {Default: fastq file} -* 3. Reference file (e.g. cDNA library [fasta]) -* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping) -* 5. “Length” (The minimum length of the reads for mapping after trimming) -* [Optional] -* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. use -5/-3 flag to trim nucleotides from 5'/3' end of the reads) - ------ - -**Output**: - -A bam file with all of the reads that are mapped - - - - - diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/map_ex.py --- a/Iterative_mapping/map_ex.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -map_file = sys.argv[1] -result_file = sys.argv[2] - - -#reads = read_t_file(read_file); - -f = open(map_file); -h = file(result_file, 'w') - -for aline in f.readlines(): - tline = aline.strip(); - tl = tline.split('\t'); - if len(tl)>4: - if int(tl[1].strip())== 0: - h.write(tline) - h.write('\n') - - -f.close(); -h.close() - - - - diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/read_file.py --- a/Iterative_mapping/read_file.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - - diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/read_s_file.py --- a/Iterative_mapping/read_s_file.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_sp_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split(' '); - for i in range(0, len(tl)): - if len(tl[i].strip())>0: - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - - diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/remove_map.py --- a/Iterative_mapping/remove_map.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * - - -unmap_file = sys.argv[1] -map_file = sys.argv[2] -result_file = sys.argv[3] - - -unmap = read_t_file(unmap_file) -mapped = read_t_file(map_file) -h = file(result_file, 'w') - -maps = set() -for i in range(len(mapped)): - maps.add(mapped[i][0]) - - -for i in range(len(unmap)): - name = unmap[i][0] - if name not in maps: - h.write(name) - h.write('\n') - - -h.close() diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/seq_track.py --- a/Iterative_mapping/seq_track.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -unmap_file = sys.argv[1] -reads_file = sys.argv[2] -result_file = sys.argv[3] -tp = sys.argv[4] - - -unmap = read_t_file(unmap_file); - -h = file(result_file, 'w') - -reads = SeqIO.parse(reads_file,tp) -um = set() -for i in range(0, len(unmap)): - id_r = unmap[i][0] - um.add(id_r) - -for read in reads: - if read.id in um: - h.write('>') - h.write(read.id) - h.write('\n') - h.write(read.seq.tostring()) - h.write('\n') - - - -h.close() - - - - diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/tool_dependencies.xml --- a/Iterative_mapping/tool_dependencies.xml Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ - - - - - - - - - - - - - - - diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/truncate.py --- a/Iterative_mapping/truncate.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from Bio import SeqIO - -fasta_file = sys.argv[1] -shift_in = sys.argv[2] -result_file = sys.argv[3] -length = sys.argv[4] - -shift = int(shift_in) - -fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); -h = file(result_file,'w') -for seq in fasta_sequences: - nuc = seq.id; - sequence = seq.seq.tostring(); - if (len(sequence)-shift)>=int(length): - h.write('>'+nuc) - h.write('\n') - h.write(sequence[0:(len(sequence)-shift)]) - h.write('\n') - - - - -h.close() - - - - diff -r d56631911cc1 -r b6d9b0059499 Iterative_mapping/unmap.py --- a/Iterative_mapping/unmap.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -map_file = sys.argv[1] -result_file = sys.argv[2] - - -#reads = read_t_file(read_file); - -f = open(map_file); -h = file(result_file, 'w') - -for aline in f.readlines(): - tline = aline.strip(); - tl = tline.split('\t'); - if len(tl)>4: - if int(tl[1].strip()) != 0: - h.write(tl[0].strip()); - h.write('\n'); - - -f.close(); -h.close() - - - -