Mercurial > repos > tyty > structurefold
changeset 1:b6d9b0059499 draft
Deleted selected files
author | tyty |
---|---|
date | Mon, 15 Sep 2014 14:42:01 -0400 |
parents | d56631911cc1 |
children | 297cdb01d656 |
files | Iterative_mapping/.DS_Store Iterative_mapping/iterative_map.py Iterative_mapping/iterative_map.xml Iterative_mapping/map_ex.py Iterative_mapping/read_file.py Iterative_mapping/read_s_file.py Iterative_mapping/remove_map.py Iterative_mapping/seq_track.py Iterative_mapping/tool_dependencies.xml Iterative_mapping/truncate.py Iterative_mapping/unmap.py |
diffstat | 11 files changed, 0 insertions(+), 402 deletions(-) [+] |
line wrap: on
line diff
--- a/Iterative_mapping/iterative_map.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,103 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -import os -from read_file import * -from read_s_file import * - -type_input = sys.argv[1] -seq_file = sys.argv[2] -ref_file = sys.argv[3] -shift = sys.argv[4] -length = sys.argv[5] -map_type = sys.argv[6] -output_file = sys.argv[7] - - -if map_type!="default": - s = "" - s = s+"-v "+sys.argv[8] - s = s+" -5 "+sys.argv[9] - s = s+" -3 "+sys.argv[10] - s = s+" -k "+sys.argv[11] - if sys.argv[12]: - s = s+" -a" - if int(sys.argv[13])>=1: - s = s+" -m "+sys.argv[13] - if sys.argv[14]: - s = s+" --best --strata " - -else: - s = "-v 3 -a --best --strata " - -ospath = os.path.realpath(sys.argv[0]) -ost = ospath.split('/') -syspath = "" -for i in range(len(ost)-1): - syspath = syspath+ost[i].strip() - syspath = syspath+'/' - -os.system("bowtie-build -f "+ref_file+" "+syspath+"ref > "+syspath+"log.txt") - -os.system("cp "+seq_file+" "+syspath+"seq0.fa") - -if type_input == "fasta": - tp = 'fasta' -if type_input == "fastq": - tp = 'fastq' - -k = 0 -print(type_input) -while(True): - if type_input == "fasta": - os.system("bowtie "+s+"-f "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam") - if type_input == "fastq": - os.system("bowtie "+s+"-q "+syspath+"ref"+" "+syspath+"seq"+str(k)+".fa --quiet -S > "+syspath+"map"+str(k)+".sam") - os.system("samtools view -Sb -F 0xfff "+syspath+"map"+str(k)+".sam > "+syspath+"mapped"+str(k)+".bam 2>"+syspath+"log.txt") #get mapped reads - os.system("samtools view -Sb -f 0x4 "+syspath+"map"+str(k)+".sam > "+syspath+"umapped"+str(k)+".bam 2>"+syspath+"log.txt") #get unmapped reads - os.system("samtools view -Sb -f 0x10 "+syspath+"map"+str(k)+".sam > "+syspath+"rmapped"+str(k)+".bam 2>"+syspath+"log.txt") #get reversed mapped reads - os.system("samtools merge -f "+syspath+"unmapped"+str(k)+".bam "+syspath+"umapped"+str(k)+".bam "+syspath+"rmapped"+str(k)+".bam") #get reversed mapped reads - os.system("samtools view -h -o "+syspath+"unmapped"+str(k)+".sam "+syspath+"unmapped"+str(k)+".bam") #get reversed mapped reads - if k>0: - os.system("samtools view -h -o "+syspath+"mapped"+str(k)+".sam "+syspath+"mapped"+str(k)+".bam") #get reversed mapped reads - os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"unmapped"+str(k)+".txt") - os.system("cut -f 1 "+syspath+"mapped"+str(k)+".sam > "+syspath+"mapped"+str(k)+".txt") - os.system("python "+syspath+"remove_map.py "+syspath+"unmapped"+str(k)+".txt "+syspath+"mapped"+str(k)+".txt "+syspath+"runmapped"+str(k)+".txt") - os.system("rm "+syspath+"mapped"+str(k)+".sam") - os.system("rm "+syspath+"mapped"+str(k)+".txt") - os.system("rm "+syspath+"unmapped"+str(k)+".txt") - else: - os.system("cut -f 1 "+syspath+"unmapped"+str(k)+".sam > "+syspath+"runmapped"+str(k)+".txt") - - os.system("rm "+syspath+"unmapped"+str(k)+".bam") - os.system("rm "+syspath+"umapped"+str(k)+".bam") - os.system("rm "+syspath+"rmapped"+str(k)+".bam") - os.system("python "+syspath+"seq_track.py "+syspath+"runmapped"+str(k)+".txt "+syspath+"seq"+str(k)+".fa "+syspath+"unmap_seq"+str(k)+".fa "+tp) #get unmapped sequence - os.system("python "+syspath+"truncate.py "+syspath+"unmap_seq"+str(k)+".fa "+shift+" "+syspath+"seq"+str(k+1)+".fa "+length) #truncate unmapped sequence - os.system("rm "+syspath+"seq"+str(k)+".fa") #Remove sequences being mapped - os.system("rm "+syspath+"map"+str(k)+".sam") #Remove mapping file - os.system("rm "+syspath+"unmap_seq"+str(k)+".fa") #Remove unmapped sequnce - os.system("rm "+syspath+"runmapped"+str(k)+".txt") - os.system("rm "+syspath+"unmapped"+str(k)+".sam") - - os.system("wc -l "+syspath+"seq"+str(k+1)+".fa > "+syspath+"count"+str(k+1)+".txt") - c = read_sp_file(syspath+"count"+str(k+1)+".txt") - if c[0][0] == '0': #If no reads is in the sequence file, stop - os.system("rm "+syspath+"count"+str(k+1)+".txt") - os.system("rm "+syspath+"seq"+str(k+1)+".fa") - break - os.system("rm "+syspath+"count"+str(k+1)+".txt") - k = k+1 - -ss = "" -for i in range(0,k+1): - ss = ss+" "+syspath+"mapped"+str(i)+".bam" - - -os.system("samtools merge -f "+output_file+" "+ss) -#print("samtools merge mapped_all.bam"+ss) -os.system("rm "+syspath+"mapped*.bam") -os.system("rm "+syspath+"ref*") - -
--- a/Iterative_mapping/iterative_map.xml Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,80 +0,0 @@ -<tool id="iterative_map_pipeline" name="Iterative mapping" version="1.0"> - <description></description> - <command interpreter="python"> - #if $mapping_file.type == "user" - iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output $mapping_file.param_v $mapping_file.param_five $mapping_file.param_three $mapping_file.param_k $mapping_file.param_a $mapping_file.param_m $mapping_file.param_best - #else - iterative_map.py $file_format.type $file_format.seq_file $reference_file $shift $length $mapping_file.type $output - #end if - </command> - <requirements> - <requirement type="package" version="1.61">biopython</requirement> - <requirement type="package" version="1.7">numpy</requirement> - <requirement type="package" version="0.1.18">samtools</requirement> - <requirement type="package" version="0.12.7">bowtie</requirement> - </requirements> - <inputs> - <conditional name="file_format"> - <param name="type" type="select" label="Format of the file of the reads (Default FASTQ)"> - <option value="fastq">FASTQ</option> - <option value="fasta">FASTA</option> - </param> - <when value="fastq"> - <param name="seq_file" type="data" format="fastq" label="Fastq file"/> - </when> - <when value="fasta"> - <param name="seq_file" type="data" format="fasta" label="Fasta file"/> - </when> - </conditional> - <param name="reference_file" type="data" format="fasta" label="Reference genome/transcriptome"/> - <param name="shift" type="integer" value="1" label="Number of nucleotide trimmed each round"/> - <param name="length" type="integer" value="21" label="Minimum requirement of read length for mapping"/> - <conditional name="mapping_file"> - <param name="type" type="select" label="Bowtie mapping flags (Default -v 0 -a --best --strata)"> - <option value="default">Default</option> - <option value="user">User specified</option> - </param> - <when value="default"/> - <when value="user"> - <param name="param_v" type="integer" value="0" label="Number of mismatches for SOAP-like alignment policy (-v)"/> - <param name="param_five" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)"/> - <param name="param_three" type="integer" value="0" label="Trim n bases from high-quality (right) end of each read before alignment (-3)"/> - <param name="param_k" type="integer" value="1" label="Report up to n valid alignments per read (-k)"/> - <param name="param_a" type="boolean" checked="False" truevalue = "1" falsevalue = "0" label="Whether or not to report all valid alignments per read (-a)"/> - <param name="param_m" type="integer" value="-1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m), -1 for unlimited"/> - <param name="param_best" type="boolean" checked="False" truevalue = "1" falsevalue = "0" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best --strata)"/> - </when> - </conditional> - - </inputs> - <outputs> - <data name="output" type="data" format="bam"/> - </outputs> - - <help> - - -**TIPS**: - ------ - -**Input**: - -* 1. Sequence file type (FASTA/FASTQ) -* 2. Sequence file (fasta/fastq format) {Default: fastq file} -* 3. Reference file (e.g. cDNA library [fasta]) -* 4. “Shift” (The length of the sequence that will be trimmed at the 3’end of the reads before each round of mapping) -* 5. “Length” (The minimum length of the reads for mapping after trimming) -* [Optional] -* 1. Bowtie mapping flags (options) [Default: -v 0 -a --best --strata] (-v flag indicates the number of allowed mismatches. use -5/-3 flag to trim nucleotides from 5'/3' end of the reads) - ------ - -**Output**: - -A bam file with all of the reads that are mapped - - - - </help> -</tool>
--- a/Iterative_mapping/map_ex.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -map_file = sys.argv[1] -result_file = sys.argv[2] - - -#reads = read_t_file(read_file); - -f = open(map_file); -h = file(result_file, 'w') - -for aline in f.readlines(): - tline = aline.strip(); - tl = tline.split('\t'); - if len(tl)>4: - if int(tl[1].strip())== 0: - h.write(tline) - h.write('\n') - - -f.close(); -h.close() - - - -
--- a/Iterative_mapping/read_file.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_t_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split('\t'); - for i in range(0, len(tl)): - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - -
--- a/Iterative_mapping/read_s_file.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,22 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys - - - -def read_sp_file(in_file): - f = open(in_file); - result = []; - for aline in f.readlines(): - temp = []; - tline = aline.strip(); - tl = tline.split(' '); - for i in range(0, len(tl)): - if len(tl[i].strip())>0: - temp.append(tl[i].strip()); - result.append(temp); - f.close(); - return result; - -
--- a/Iterative_mapping/remove_map.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * - - -unmap_file = sys.argv[1] -map_file = sys.argv[2] -result_file = sys.argv[3] - - -unmap = read_t_file(unmap_file) -mapped = read_t_file(map_file) -h = file(result_file, 'w') - -maps = set() -for i in range(len(mapped)): - maps.add(mapped[i][0]) - - -for i in range(len(unmap)): - name = unmap[i][0] - if name not in maps: - h.write(name) - h.write('\n') - - -h.close()
--- a/Iterative_mapping/seq_track.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,38 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -unmap_file = sys.argv[1] -reads_file = sys.argv[2] -result_file = sys.argv[3] -tp = sys.argv[4] - - -unmap = read_t_file(unmap_file); - -h = file(result_file, 'w') - -reads = SeqIO.parse(reads_file,tp) -um = set() -for i in range(0, len(unmap)): - id_r = unmap[i][0] - um.add(id_r) - -for read in reads: - if read.id in um: - h.write('>') - h.write(read.id) - h.write('\n') - h.write(read.seq.tostring()) - h.write('\n') - - - -h.close() - - - -
--- a/Iterative_mapping/tool_dependencies.xml Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,15 +0,0 @@ -<?xml version="1.0"?> -<tool_dependency> - <package name="biopython" version="1.61"> - <repository changeset_revision="ae9dda584395" name="package_biopython_1_61" owner="biopython" prior_installation_required="True" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="numpy" version="1.7"> - <repository changeset_revision="4:ef12a3a11d5b" name="package_numpy_1_7" owner="iuc" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="samtools" version="0.1.18"> - <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> - <package name="bowtie" version="0.12.7"> - <repository changeset_revision="9f9f38617a98" name="package_bowtie_0_12_7" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" /> - </package> -</tool_dependency>
--- a/Iterative_mapping/truncate.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,32 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from Bio import SeqIO - -fasta_file = sys.argv[1] -shift_in = sys.argv[2] -result_file = sys.argv[3] -length = sys.argv[4] - -shift = int(shift_in) - -fasta_sequences = SeqIO.parse(open(fasta_file),'fasta'); -h = file(result_file,'w') -for seq in fasta_sequences: - nuc = seq.id; - sequence = seq.seq.tostring(); - if (len(sequence)-shift)>=int(length): - h.write('>'+nuc) - h.write('\n') - h.write(sequence[0:(len(sequence)-shift)]) - h.write('\n') - - - - -h.close() - - - -
--- a/Iterative_mapping/unmap.py Mon Sep 15 14:41:13 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,31 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -import sys -from read_file import * -from Bio import SeqIO - -map_file = sys.argv[1] -result_file = sys.argv[2] - - -#reads = read_t_file(read_file); - -f = open(map_file); -h = file(result_file, 'w') - -for aline in f.readlines(): - tline = aline.strip(); - tl = tline.split('\t'); - if len(tl)>4: - if int(tl[1].strip()) != 0: - h.write(tl[0].strip()); - h.write('\n'); - - -f.close(); -h.close() - - - -