# HG changeset patch # User bjoern-gruening # Date 1362163162 18000 # Node ID 427fb56f2e41bb2c744e45ea44dc2d5c36c6ff73 # Parent 7eefe5d6eecd9ee4acebfb14a0d96768a97fe047 - new options - fixes diff -r 7eefe5d6eecd -r 427fb56f2e41 README --- a/README Tue Dec 25 05:54:01 2012 -0500 +++ b/README Fri Mar 01 13:39:22 2013 -0500 @@ -1,8 +1,5 @@ Bismark uses Bowtie or Bowtie2 to map the reads. -Create your reference index with bismark_genome_preparation in your -normal Bowtie2/Botwie index directory. -bismark_genome_preparation will create a Bisulfite_Genome folder directly in -your Bowtie2/Bowtie index directory. If you follow that approach you do not -need to specify or modify an extra *.loc file. -That wrapper will extract the path to the Bisulfite_Genome folder from -./tool-data/bowtie2_indices.loc or ./tool-data/bowtie_indices.loc. + +Create your reference index with bismark_genome_preparation in your normal Bowtie2/Botwie index directory. +bismark_genome_preparation will create a Bisulfite_Genome folder directly in your Bowtie2/Bowtie index directory. If you follow that approach you do not need to specify or modify an extra *.loc file. +That wrapper will extract the path to the Bisulfite_Genome folder from ./tool-data/bowtie2_indices.loc or ./tool-data/bowtie_indices.loc. diff -r 7eefe5d6eecd -r 427fb56f2e41 bismark_bowtie2_wrapper.xml --- a/bismark_bowtie2_wrapper.xml Tue Dec 25 05:54:01 2012 -0500 +++ b/bismark_bowtie2_wrapper.xml Fri Mar 01 13:39:22 2013 -0500 @@ -12,7 +12,7 @@ bismark_wrapper.py ## Change this to accommodate the number of threads you have available. - --num-threads 4 + --num-threads 12 --bismark_path \$SCRIPT_PATH @@ -47,16 +47,23 @@ --fasta #end if #else: - --mate-paired - --mate1 $singlePaired.input_mate1 - --mate2 $singlePaired.input_mate2 + --mate-paired + #set $mate1 = list() + #set $mate2 = list() + #for $mate_pair in $singlePaired.mate_list + $mate1.append( str($mate_pair.input_mate1) ) + $mate2.append( str($mate_pair.input_mate2) ) + #end for - #if $singlePaired.input_mate1.ext == "fastqillumina": + --mate1 #echo ','.join($mate1) + --mate2 #echo ','.join($mate2) + + #if $singlePaired.mate_list[0].input_mate1.ext == "fastqillumina": --phred64-quals --fastq - #elif $singlePaired.input_mate1.ext == "fastqsanger": + #elif $singlePaired.mate_list[0].input_mate1.ext == "fastqsanger": --fastq - #elif $singlePaired.input_mate1.ext == "fasta": + #elif $singlePaired.mate_list[0].input_mate1.ext == "fasta": --fasta #end if @@ -157,8 +164,10 @@ - - + + + + @@ -247,7 +256,7 @@ - @@ -267,7 +276,7 @@ - @@ -291,7 +300,7 @@ - @@ -310,7 +319,7 @@ - @@ -353,6 +362,11 @@ .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +.. class:: warningmark + +Make sure all your input reads are in the correct and same format. If thats not the case please adjust/convert the filetype with galaxy's build-in converters. + ------ **Input formats** @@ -415,12 +429,7 @@ **Bismark parameter list** -This is an exhaustive list of Bismark options: - ------- - -**OPTIONS** - +This is an exhaustive list of Bismark options. Input:: diff -r 7eefe5d6eecd -r 427fb56f2e41 bismark_bowtie_wrapper.xml --- a/bismark_bowtie_wrapper.xml Tue Dec 25 05:54:01 2012 -0500 +++ b/bismark_bowtie_wrapper.xml Fri Mar 01 13:39:22 2013 -0500 @@ -1,4 +1,4 @@ - + bisulfite mapper (bowtie) @@ -10,9 +10,6 @@ bismark_wrapper.py - - ## Change this to accommodate the number of threads you have available. - --num-threads 4 --bismark_path \$SCRIPT_PATH @@ -45,16 +42,23 @@ --fasta #end if #else: - --mate-paired - --mate1 $singlePaired.input_mate1 - --mate2 $singlePaired.input_mate2 + --mate-paired + #set $mate1 = list() + #set $mate2 = list() + #for $mate_pair in $singlePaired.mate_list + $mate1.append( str($mate_pair.input_mate1) ) + $mate2.append( str($mate_pair.input_mate2) ) + #end for - #if $singlePaired.input_mate1.ext == "fastqillumina": + --mate1 #echo ','.join($mate1) + --mate2 #echo ','.join($mate2) + + #if $singlePaired.mate_list[0].input_mate1.ext == "fastqillumina": --phred64-quals --fastq - #elif $singlePaired.input_mate1.ext == "fastqsanger": + #elif $singlePaired.mate_list[0].input_mate1.ext == "fastqsanger": --fastq - #elif $singlePaired.input_mate1.ext == "fasta": + #elif $singlePaired.mate_list[0].input_mate1.ext == "fasta": --fasta #end if @@ -73,11 +77,7 @@ --seed-len $params.seed_len ## default 0 --seed-mismatches $params.seed_mismatches - ## default 15 - --seed-extention-attempts $params.seed_extention_attempts - ## default 2 - --max-reseed $params.max_reseed - + ## default 70 ##--maqerr $params.maqerr @@ -89,12 +89,6 @@ --skip-reads $params.skip_reads #end if - ## if set, disable the original behaviour - $params.no_mixed - ## if set, disable the original behaviour - $params.no_discordant - - ###if str($params.isReportOutput) == "yes": ## --output-report-file $report_file ###end if @@ -155,8 +149,10 @@ - - + + + + @@ -186,9 +182,6 @@ - - - @@ -245,7 +238,7 @@ - @@ -265,7 +258,7 @@ - @@ -289,7 +282,7 @@ - @@ -308,7 +301,7 @@ - @@ -351,6 +344,11 @@ .. __: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +.. class:: warningmark + +Make sure all your input reads are in the correct and same format. If thats not the case please adjust/convert the filetype with galaxy's build-in converters. + ------ **Input formats** @@ -413,12 +411,7 @@ **Bismark parameter list** -This is an exhaustive list of Bismark options: - ------- - -**OPTIONS** - +This is an exhaustive list of Bismark options. Input:: @@ -539,76 +532,5 @@ the specified folder does not exist, Bismark will attempt to create it first. The path to the temporary folder can be either relative or absolute. ------- - -Bowtie 2 alignment options:: - - -N INT Sets the number of mismatches to allowed in a seed alignment during multiseed alignment. - Can be set to 0 or 1. Setting this higher makes alignment slower (often much slower) - but increases sensitivity. Default: 0. This option is only available for Bowtie 2 (for - Bowtie 1 see -n). - - -L INT Sets the length of the seed substrings to align during multiseed alignment. Smaller values - make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is - used by default, which sets -L to 20. This option is only available for Bowtie 2 (for - Bowtie 1 see -l). - - --ignore-quals When calculating a mismatch penalty, always consider the quality value at the mismatched - position to be the highest possible, regardless of the actual value. I.e. input is treated - as though all quality values are high. This is also the default behavior when the input - doesn't specify quality values (e.g. in -f mode). This option is invariable and on by default. - - -Bowtie 2 paired-end options:: - - --no-mixed This option disables Bowtie 2's behavior to try to find alignments for the individual mates if - it cannot find a concordant or discordant alignment for a pair. This option is invariable and - and on by default. - - --no-discordant Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments. - A discordant alignment is an alignment where both mates align uniquely, but that does not - satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior - and it is on by default. - - -Bowtie 2 effort options:: - - -D INT Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using - the alignments found so far. A seed extension "fails" if it does not yield a new best or a - new second-best alignment. Default: 15. - - -R INT INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds. - When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of - mismatches allowed) at different offsets and searches for more alignments. A read is considered - to have repetitive seeds if the total number of seed hits divided by the number of seeds - that aligned at least once is greater than 300. Default: 2. - - -Bowtie 2 Scoring options:: - - --score_min "func" Sets a function governing the minimum alignment score needed for an alignment to be considered - "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying - L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length. - See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is - L,0,-0.2. - - -Bowtie 2 Reporting options:: - - --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is - deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the - default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the - effort expended to find valid alignments. - - For reference, this used to be the old (now deprecated) description of -M: - Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it - can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever - happens first. Only the best alignment is reported. Information from the other alignments is used to - estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes - Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that - aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not - guarantee that the alignment reported is the best possible in terms of alignment score. -M is - always used and its default value is set to 10. - diff -r 7eefe5d6eecd -r 427fb56f2e41 bismark_methylation_extractor.py --- a/bismark_methylation_extractor.py Tue Dec 25 05:54:01 2012 -0500 +++ b/bismark_methylation_extractor.py Fri Mar 01 13:39:22 2013 -0500 @@ -25,6 +25,8 @@ parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') # input options + parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) + parser.add_argument( '--infile', help='Input file in SAM format.' ) parser.add_argument( '--single-end', dest='single_end', action="store_true" ) parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) @@ -70,6 +72,9 @@ # Build methylation extractor command output_dir = tempfile.mkdtemp() cmd = 'bismark_methylation_extractor --no_header -o %s %s %s' + if args.bismark_path: + # add the path to the bismark perl scripts, that is needed for galaxy + cmd = os.path.join(args.bismark_path, cmd) additional_opts = '' # Set up all options diff -r 7eefe5d6eecd -r 427fb56f2e41 bismark_wrapper.py --- a/bismark_wrapper.py Tue Dec 25 05:54:01 2012 -0500 +++ b/bismark_wrapper.py Fri Mar 01 13:39:22 2013 -0500 @@ -119,8 +119,12 @@ else: cmd_index = 'bismark_genome_preparation %s ' % ( tmp_index_dir ) if args.bismark_path: - # add the path to the bismark perl scripts, that is needed for galaxy - cmd_index = '%s/%s' % (args.bismark_path, cmd_index) + if os.path.exists(args.bismark_path): + # add the path to the bismark perl scripts, that is needed for galaxy + cmd_index = os.path.join(args.bismark_path, cmd_index) + else: + # assume the same directory as that script + cmd_index = 'perl %s' % os.path.join(os.path.realpath(os.path.dirname(__file__)), cmd_index) try: tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name tmp_stderr = open( tmp, 'wb' ) @@ -147,15 +151,27 @@ stop_err( 'Error indexing reference sequence\n' + str( e ) ) index_dir = tmp_index_dir else: - index_dir = args.index_path + # bowtie path is the path to the index directory and the first path of the index file name + index_dir = os.path.dirname( args.index_path ) # Build bismark command tmp_bismark_dir = tempfile.mkdtemp() output_dir = os.path.join( tmp_bismark_dir, 'results') cmd = 'bismark %(args)s --temp_dir %(tmp_bismark_dir)s -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' + + if args.fasta: + # he query input files (specified as mate1,mate2 or singles) are FastA + cmd = '%s %s' % (cmd, '--fasta') + elif args.fastq: + cmd = '%s %s' % (cmd, '--fastq') + if args.bismark_path: # add the path to the bismark perl scripts, that is needed for galaxy - cmd = '%s/%s' % (args.bismark_path, cmd) + if os.path.exists(args.bismark_path): + cmd = os.path.join(args.bismark_path, cmd) + else: + # assume the same directory as that script + cmd = 'perl %s' % os.path.join(os.path.realpath(os.path.dirname(__file__)), cmd) arguments = { 'genome_folder': index_dir, @@ -178,7 +194,7 @@ if not args.bowtie2: # use bowtie specific options - additional_opts += ' --best ' + #additional_opts += ' --best ' # bug in bismark, --best is not available as option. Only --non-best, best-mode is activated by default if args.seed_mismatches: # --seedmms additional_opts += ' -n %s ' % args.seed_mismatches