Previous changeset 5:7c679db88fa3 (2017-11-02) Next changeset 7:c267c8a8ec12 (2017-11-02) |
Commit message:
MitoBim and interleave |
modified:
interleave.xml |
added:
._interleave-fastqgz-MITOBIM.py ._interleave.xml ._mitobim.xml MITObim_1.8.pl interleave-fastqgz-MITOBIM.py mitobim.xml |
removed:
Archive.zip |
b |
diff -r 7c679db88fa3 -r a03d23c6ab95 ._interleave-fastqgz-MITOBIM.py |
b |
Binary file ._interleave-fastqgz-MITOBIM.py has changed |
b |
diff -r 7c679db88fa3 -r a03d23c6ab95 ._interleave.xml |
b |
Binary file ._interleave.xml has changed |
b |
diff -r 7c679db88fa3 -r a03d23c6ab95 ._mitobim.xml |
b |
Binary file ._mitobim.xml has changed |
b |
diff -r 7c679db88fa3 -r a03d23c6ab95 Archive.zip |
b |
Binary file Archive.zip has changed |
b |
diff -r 7c679db88fa3 -r a03d23c6ab95 MITObim_1.8.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MITObim_1.8.pl Thu Nov 02 12:44:55 2017 -0400 |
[ |
b'@@ -0,0 +1,1009 @@\n+#! /usr/bin/perl\n+#\n+# MITObim - mitochondrial baiting and iterative mapping\n+# wrapper script version 1.8\n+# Author: Christoph Hahn, 2012-2016\n+# christoph.hahn@nhm.uio.no\n+\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+use Cwd qw(abs_path);\n+use File::Copy;\n+use List::Util qw< min max >;\n+use POSIX qw(strftime);\n+use POSIX qw(ceil);\n+use File::Path \'rmtree\';\n+\n+my $startiteration = 1;\n+my $enditeration = 1;\n+\n+my ($quick, $noshow, $help, $strainname, $paired, $mode, $refname, $readpool, $maf, $proofreading, $readlength, $insertsize, $MM, $trim, $trimoverhang, $k_bait, $clean, $clean_interval, $min_contig_cov) = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, 31, 0, 2, 0);\n+my ($miramode, $key, $val, $exit, $current_contiglength, $current_number_of_reads, $current_number_of_contigs);\n+my $splitting = 0;\n+my $platform = "solexa";\n+my $platform_settings;# = "SOLEXA";\n+my $shme = "";\n+my $trim_off = "";\n+my $redirect_temp = "";\n+my $NFS_warn_only = "";\n+my ($mirapath, $mira, $miraconvert, $mirabait) = ("", "mira", "miraconvert", "mirabait");\n+my (@reads, @output, @path, @current_contig_stats, @contiglengths, @number_of_reads, @number_of_contigs);\n+my %hash;\n+my $PROGRAM = "\\nMITObim - mitochondrial baiting and iterative mapping\\n";\n+my $VERSION = "version 1.8\\n";\n+my $AUTHOR = "author: Christoph Hahn, (c) 2012-2016\\n";\n+my $cite = "\\nif you found MITObim useful, please cite:\n+Hahn C, Bachmann L and Chevreux B. (2013) Reconstructing mitochondrial genomes directly from genomic next-generation sequencing reads -\n+a baiting and iterative mapping approach. Nucl. Acids Res. 41(13):e129. doi: 10.1093/nar/gkt371\\n\\n";\n+my $USAGE = \t"\\nusage: ./MITObim.pl <parameters>\n+\t \t\\nparameters:\n+\t\t-start <int>\t\titeration to start with, default=1\n+\t\t-end <int>\t\titeration to end with, default=1\n+\t\t-sample <string>\tsampleID as used in initial MIRA assembly\n+\t\t-ref <string>\t\treferenceID as used in initial MIRA assembly\n+\t\t-readpool <FILE>\treadpool in fastq format (*.gz is also allowed)\n+\t\t-maf <FILE>\t\tmaf file from previous MIRA assembly\n+\t\t\\noptional:\n+\t\t--quick <FILE>\t\tstarts process with initial baiting using provided fasta reference\n+\t\t--kbait <int>\t\tset kmer for baiting stringency (default: 31)\n+\t\t--platform\t\tspecify sequencing platform (default: \'solexa\'; other options: \'iontor\', \'454\', \'pacbio\')\n+\t\t--denovo\t\truns MIRA in denovo mode (default: mapping)\n+\t\t--pair\t\t\tfinds pairs after baiting (relies on /1 and /2 header convention for read pairs) (default: no)\n+\t\t--verbose\t\tshow detailed output of MIRA modules (default: no)\n+\t\t--split\t\t\tsplit reference at positions with more than 5N (default: no)\n+\t\t--help\t\t\tshows this helpful information\n+\t\t--clean retain only the last 2 iteration directories (default: no)\n+\t\t--trimreads\t\ttrim data (default: no; we recommend to trim beforehand and feed MITObim with pre trimmed data)\n+\t\t--trimoverhang\t\ttrim overhang up- and downstream of reference (default: no)\n+\t\t--missmatch <int>\tnumber of allowed missmatches in mapping - only for illumina data (default: 15% of avg. read length)\n+\t\t--min_cov <int>\t\tminimum average coverage of contigs to be retained (default: off)\n+\t\t--mirapath <string> full path to MIRA binaries (only needed if MIRA is not in PATH)\n+\t\t--redirect_tmp\t\tredirect temporary output to this location (useful in case you are running MITObim on an NFS mount)\n+\t\t--NFS_warn_only\t\tallow MIRA to run on NFS mount without aborting - warn only (expert option - see MIRA documentation \'check_nfs\')\n+\t\t\\nexamples:\n+\t\t./MITObim.pl -start 1 -end 5 -sample StrainX -ref reference-mt -readpool illumina_readpool.fastq -maf initial_assembly.maf\n+\t\t./MITObim.pl -end 10 --quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq\\n\\n";\n+#\t\t--proofread\t\tapplies proofreading (atm only to be used if starting the process from a single short seed reference)\n+#\t\t--readlength <int>\tread length of illumina library, default=150, relevant only for proofreading\n+'..b' $factor=ceil(length($full_sequence)/$critical);\n+ }\n+ if ($factor == 1){\n+ push(@output,$header);\n+ push(@output,$full_sequence);\n+ }else{ #too long\n+ print "\\nreference is too long for mirabait to be handled in one go -> will be split into sub-sequences\\n";\n+ $header=substr $header, 1;\n+ for (my $i=0; $i<$factor; $i++){\n+ unless ((length(substr $full_sequence, $i*$critical, $critical+$kmer)-$kmer)<0){\n+ push(@output,">sub$i\\_" .$header);\n+ push(@output,substr $full_sequence, $i*$critical, $critical+$kmer);\n+ }\n+ }\n+ }\n+ return @output;\n+}\n+\n+sub create_manifest {\n+\tmy ($iter, $sampleID, $refID, $mmode, $trim, $platform, $solexa_missmatches, $pair, $overhang, $reads, $ref, $redirect, $NFS_warn);\n+\t$iter = $_[0];\n+\t$sampleID = $_[1];\n+\t$refID = $_[2];\n+\t$mmode = $_[3];\n+\t$trim = $_[4];\n+\t$platform = $_[5];\n+\t$solexa_missmatches = $_[6];\n+\t$pair = $_[7];\n+\t$overhang = $_[8];\n+\t$reads = $_[9];\n+\t$ref = $_[10];\n+\t$redirect = $_[11];\n+\t$NFS_warn = $_[12];\n+\n+\tif ($NFS_warn){\n+\t\t$NFS_warn = ":cnfs=warn"\n+\t}\n+\n+\topen (MANIFEST,">manifest.conf") or die $!;\n+\tprint MANIFEST "#manifest file for iteration $iter created by MITObim\\n\\nproject = $sampleID-$refID\n+\t\\njob = genome,$mmode,accurate\n+\t\\nparameters = -NW:mrnl=0$NFS_warn -AS:nop=1 $redirect $overhang $platform $trim -CO:msr=no $solexa_missmatches\\n";\n+\tmy @technology = split("_",$platform);\n+\t#-notraceinfo -\n+\tif ($mmode eq "mapping"){\n+\t\tprint MANIFEST "\\nreadgroup\\nis_reference\\ndata = $ref\\nstrain = $refID\\n";\n+\t}\n+\tprint MANIFEST "\\nreadgroup = reads\\ndata = $reads\\ntechnology = $technology[0]";\n+\tif ($pair){\n+#\t\tprint MANIFEST "\\nsegmentplacement = ---> <--- infoonly";\n+\t\tprint MANIFEST "\\nsegmentplacement = ---> <--- exclusion_criterion";\n+\t\tprint MANIFEST "\\ntemplatesize = -1 600 exclusion_criterion";\n+\t}\n+\tprint MANIFEST "\\nstrain = $sampleID\\n";\n+\tclose MANIFEST;\n+}\n+sub clean {\n+ my $interval = shift;\n+ my $cur = shift;\n+ my $dir = $cur-$interval;\n+ my $path=abs_path;\n+ if (-d "$path/../iteration$dir"){\n+ print "\\nnow removing directory iteration$dir\\n";\n+ rmtree ("$path/../iteration$dir") or die $!;\n+ }\n+}\n+\n+sub remove_unmapped_contigs{\n+\tmy $file = shift;\n+\tmy $out = shift;\n+\tmy ($head, $seq,$length);\n+\tmy @split;\n+\tmy ($dropped,$split,$trim)=(0,0,0);\n+\topen (FASTA,"<$out") or die $!."\\ncould not open $file for reading\\n";\n+\twhile (<FASTA>){\n+\t\tchomp;\n+\t\tif ($_ =~ /^>/){\n+\t\t\t$head=$_;\n+\t\t}else {\n+\t\t\tif ($_ =~ /X/){\n+\t\t\t\t$dropped++;\n+\t\t\t\tprint "drop:\\n$head\\n$_\\n";\n+\t\t\t}else{\n+\t\t\t\t$length=length($_);\n+\t\t\t\t$_ =~ s/^N+//g;\t#remove all Ns from the beginning of the sequence\n+\t\t\t\t$_ =~ s/N+$//g;\t#remove all Ns from the end of the sequence\n+\t\t\t\tif ($length != length($_)){\n+\t\t\t\t\t$trim++;\n+\t\t\t\t}\n+\t\t\t\t@split=split(/N{3,}/);\t#split at every occurance of 3 or more Ns\n+\t\t\tif (@split > 1){\n+\t\t\t\t\tprint "contig $head has been split into ".scalar @split." sub-contigs\\n";\n+\t\t\t\t\t$split++;\n+\t\t\t\t\tfor (my $i=0; $i<@split; $i++){\n+#\t\t\t\t\t\tprint "$head\\_$i\\n$split[$i]\\n";\n+\t\t\t\t\t\t$seq.="$head\\_$i\\n$split[$i]\\n";\n+\t\t\t\t\t}\n+\t\t\t\t}else{\n+#\t\t\t\t\tprint "$head\\n$_\\n";\n+\t\t\t\t\t$seq.="$head\\n$_\\n";\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\t}\t\t\n+\tclose FASTA;\n+\tif (($dropped) || ($split) || ($trim)){\n+\t\tprint "creating backup of $out before changing it\\n";\n+\t\tcopy $out,$file;\n+\t\topen (OUT,">$out") or die $!."\\ncould not open $out for writing\\n";\n+\t\tprint OUT "$seq\\n";\n+\t\tclose OUT;\n+\t}\n+\n+\tif ($dropped){\n+\t\tprint "\\nremoved $dropped contigs from baitfile because they did not receive any hits\\n";\n+\t}else{\n+\t\tprint "\\nno need to remove any sequences from the baitfile\\n";\n+\t}\n+\n+\tif (!$split){\n+\t\tprint "\\nno need to split any contigs\\n";\n+\t}\n+\tif ($trim){\n+\t\tprint "\\n$trim contigs had N\'s trimmed off their ends\\n";\n+\t}\n+}\n' |
b |
diff -r 7c679db88fa3 -r a03d23c6ab95 interleave-fastqgz-MITOBIM.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interleave-fastqgz-MITOBIM.py Thu Nov 02 12:44:55 2017 -0400 |
[ |
@@ -0,0 +1,45 @@ +#!/usr/bin/python +# encoding:utf8 +# authors: Erik Garrison, Sébastien Boisvert +# modified by github@cypridina on 20151104 to work with MITObim +"""This script takes two fastq or fastq.gz files and interleaves them +Usage: + interleave-fasta fasta_file1 fasta_file2 +""" + +import sys,re + +def interleave(f1, f2): + """Interleaves two (open) fastq files. + """ + while True: + line = f1.readline() + if line.strip() == "": + break + print re.sub(r" 1:N.*", "/1",line.strip()) + + for i in xrange(3): + print re.sub(r" 2:N.*","/2",f1.readline().strip()) + + for i in xrange(4): + print re.sub(r" 2:N.*","/2",f2.readline().strip()) + +if __name__ == '__main__': + try: + file1 = sys.argv[1] + file2 = sys.argv[2] + except: + print __doc__ + sys.exit(1) + + if file1[-2:] == "gz": + import gzip + with gzip.open(file1) as f1: + with gzip.open(file2) as f2: + interleave(f1, f2) + else: + with open(file1) as f1: + with open(file2) as f2: + interleave(f1, f2) + f1.close() + f2.close() |
b |
diff -r 7c679db88fa3 -r a03d23c6ab95 interleave.xml --- a/interleave.xml Thu Nov 02 12:43:28 2017 -0400 +++ b/interleave.xml Thu Nov 02 12:44:55 2017 -0400 |
[ |
@@ -7,7 +7,7 @@ <exit_code range="1:" /> </stdio> <command><![CDATA[ - /home/lijing/galaxy/tools/ngs_mapping/interleave-fastqgz-MITOBIM.py + interleave-fastqgz-MITOBIM.py $fastq1 $fastq2 > $output1 ]]></command> <inputs> |
b |
diff -r 7c679db88fa3 -r a03d23c6ab95 mitobim.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mitobim.xml Thu Nov 02 12:44:55 2017 -0400 |
[ |
@@ -0,0 +1,77 @@ +<tool id="mitobim" name="MITObim" version="0.1.0"> + <description>mitochondrial baiting and iterative mapping</description> + <requirements> + <requirement type="package" version="4.0.2">mira4_assembler</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command><![CDATA[ + cp $readpool readpool.fastq; + MITObim_1.8.pl + --pair + --quick $quick + -start 1 + -end $end + -sample $sample + -ref $ref + -readpool readpool.fastq + --clean >& log; + rm readpool.fastq + ]]></command> + <inputs> + <param type="data" name="quick" format="fasta,txt" + label="starts process with initial baiting using provided fasta reference" /> + <param type="data" name="readpool" format="fastqsanger,fastq" + label="readpool in fastq format (*.gz is also allowed) ! For pair-end reads, please use interleave tool to join the forward and revers reads into one fastq file. !" /> + <param name="end" type="integer" label="iteration to end with, default=1" + value="50" min="1" max="500" help="(-end)" /> + <param name="sample" size="30" type="text" value="sample" label="sampleID as used in initial MIRA assembly"/> + <param name="ref" size="30" type="text" value="reference" label="referenceID as used in initial MIRA assembly"/> + </inputs> + <outputs> + <data name="output1" format="fasta" from_work_dir="finaloutput.fasta" label="${tool.name} on ${on_string}: Fasta"/> + <data name="output2" format="txt" from_work_dir="log" label="${tool.name} on ${on_string}: Log" /> + </outputs> + + <help><![CDATA[ + +MITObim - mitochondrial baiting and iterative mapping +version 1.8 +author: Christoph Hahn, (c) 2012-2015 + +usage: ./MITObim.pl <parameters> + +parameters: + -start <int> iteration to start with, default=1 + -end <int> iteration to end with, default=1 + -sample <string> sampleID as used in initial MIRA assembly + -ref <string> referenceID as used in initial MIRA assembly + -readpool <FILE> readpool in fastq format (*.gz is also allowed) + -maf <FILE> maf file from previous MIRA assembly + +optional: + --quick <FILE> starts process with initial baiting using provided fasta reference + --kbait <int> set kmer for baiting stringency (default: 31) + --denovo runs MIRA in denovo mode (default: mapping) + --pair finds pairs after baiting (relies on /1 and /2 header convention for read pairs) (default: no) + --verbose show detailed output of MIRA modules (default: no) + --split split reference at positions with more than 5N (default: no) + --help shows this helpful information + --clean retain only the last 2 iteration directories (default: no) + --trimreads trim data (default: no; we recommend to trim beforehand and feed MITObim with pre trimmed data) + --trimoverhang trim overhang up- and downstream of reference (default: no) + --missmatch <int> number of allowed missmatches in mapping - only for illumina data (default: 15% of avg. read length) + --min_cov <int> minimum average coverage of contigs to be retained (default: off) + --mirapath <string> full path to MIRA binaries (only needed if MIRA is not in PATH) + --iontor use iontorrent data (experimental - default is illumina data) + --454 use 454 data (experimental - default is illumina data) + +examples: + ./MITObim.pl -start 1 -end 5 -sample StrainX -ref reference-mt -readpool illumina_readpool.fastq -maf initial_assembly.maf + ./MITObim.pl -end 10 --quick reference.fasta -sample StrainY -ref reference-mt -readpool illumina_readpool.fastq + + + ]]></help> + +</tool> |