| Previous changeset 2:82814a8a2395 (2013-08-21) Next changeset 4:243e8f9fb75b (2015-02-09) |
|
Commit message:
Uploaded |
|
modified:
bismark bismark_bowtie2_wrapper.xml bismark_bowtie_wrapper.xml bismark_genome_preparation bismark_methylation_extractor bismark_methylation_extractor.py bismark_methylation_extractor.xml bismark_wrapper.py readme.rst tool_dependencies.xml |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca bismark --- a/bismark Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark Mon Apr 14 16:43:14 2014 -0400 |
| b |
| b'@@ -24,7 +24,7 @@\n \n \n my $parent_dir = getcwd;\n-my $bismark_version = \'v0.7.12\';\n+my $bismark_version = \'v0.10.0\';\n my $command_line = join (" ",@ARGV);\n \n ### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the \'.\' in the option name will cause Getopt::Long to fail\n@@ -35,7 +35,7 @@\n }\n my @filenames; # will be populated by processing the command line\n \n-my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat) = process_command_line();\n+my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag) = process_command_line();\n \n my @fhs; # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment\n my %chromosomes; # stores the chromosome sequences of the mouse genome\n@@ -293,9 +293,13 @@\n \n ### printing all alignments to a results file\n my $outfile = $filename;\n+ if ($prefix){\n+ $outfile = "$prefix.$outfile";\n+ }\n+\n \n if ($bowtie2){ # SAM format is the default for Bowtie 2\n- $outfile =~ s/$/_bt2_bismark.sam/;\n+ $outfile =~ s/$/_bismark_bt2.sam/;\n }\n elsif ($vanilla){ # vanilla custom Bismark output single-end output (like Bismark versions 0.5.X)\n $outfile =~ s/$/_bismark.txt/;\n@@ -327,8 +331,12 @@\n \n ### printing alignment and methylation call summary to a report file\n my $reportfile = $filename;\n+ if ($prefix){\n+ $reportfile = "$prefix.$reportfile";\n+ }\n+\n if ($bowtie2){\n- $reportfile =~ s/$/_bt2_bismark_SE_report.txt/;\n+ $reportfile =~ s/$/_bismark_bt2_SE_report.txt/;\n }\n else{\n $reportfile =~ s/$/_bismark_SE_report.txt/;\n@@ -339,12 +347,19 @@\n \n if ($unmapped){\n my $unmapped_file = $filename;\n+ if ($prefix){\n+ $unmapped_file = "$prefix.$unmapped_file";\n+ }\n+\n $unmapped_file =~ s/$/_unmapped_reads.txt/;\n open (UNMAPPED,\'>\',"$output_dir$unmapped_file") or die "Failed to write to $unmapped_file: $!\\n";\n print "Unmapped sequences will be written to $output_dir$unmapped_file\\n";\n }\n if ($ambiguous){\n my $ambiguous_file = $filename;\n+ if ($prefix){\n+ $ambiguous_file = "$prefix.$ambiguous_file";\n+ }\n $ambiguous_file =~ s/$/_ambiguous_reads.txt/;\n open (AMBIG,\'>\',"$output_dir$ambiguous_file") or die "Failed to write to $ambiguous_file: $!\\n";\n print "Ambiguously mapping sequences will be written to $output_dir$ambiguous_file\\n";\n@@ -399,7 +414,12 @@\n }\n \n ### printing all alignments to a results file\n- my $outfile = $filename_1;\n+ my $outfile = $filename_1; \n+\n+ if ($prefix){\n+ $outfile = "$prefix.$outfile";\n+ }\n+\n if ($bowtie2){ # SAM format is the default Bowtie 2 output\n $outfile =~ s/$/_bismark_bt2_pe.sam/;\n }\n@@ -433,6 +453,10 @@\n \n ### printing alignment and methylation call summary to a report file\n my $reportfile = $filename_1;\n+ if ($prefix){\n+ $reportfile = "$prefix.$reportfile";\n+ }\n+\n if ($bowtie2){\n $reportfile =~ s/$/_bismark_bt2_PE_report.txt/;\n }\n@@ -449,6 +473,10 @@\n if ($unmapped){\n my $unmapped_1 = $filename_1;\n my $unmapped_2 = $filename_2;\n+ if ($prefix){\n+ $unmapped_1 = "$prefix.$unmapped_1";\n+ $unmapped_2 = "$prefix.$unmapped_2";\n+ }\n $unmapped_1 =~ s/$/_unmapped_reads_1.txt/;\n $unmapped_2 =~ s/$/_unmapped_reads_2.txt/;\n open (UNMAPPED_1,\'>\',"$output_dir$unmapped_1") or die "Failed to write to $unmapped_1: $!\\n";\n@@ -459,6 +487,11 @@\n if ($a'..b" # Read 2 is on the + strand (1+2+16+32+128)\n+ }\n }\n \n #####\n@@ -6846,11 +7258,12 @@\n \t# or\n \t#\n \t# -------------------------> read 1\n-\t# <----------- read 2 read 2 contained within read 1\n-\n-\t# start and end of read 2 are fully contained within read 1\n-\t$tlen_1 = 0; # Set as 0 when the information is unavailable\n-\t$tlen_2 = 0; # Set as 0 when the information is unavailable\n+\t# <------------------------ read 2 read 2 contained within read 1\n+\n+\t# start and end of read 2 are fully contained within read 1, using the length of read 1 for the TLEN variable\n+\t$tlen_1 = $end_read_1 - $start_read_1 + 1; # Set to length of read 1 Leftmost read has a + sign,\n+\t$tlen_2 = ($end_read_1 - $start_read_1 + 1) * -1; # Set to length of read 1 Rightmost read has a - sign. well this is debatable. Changed this\n+ ### as a request by frozenlyse on SeqAnswers on 24 July 2013\n }\n \n }\n@@ -6882,12 +7295,13 @@\n \t# or\n \t#\n \t# -------------------------> read 2\n-\t# <----------- read 1 read 1 contained within read 2\n+\t# <------------------------ read 1 read 1 contained within read 2\n \t\n-\t# start and end of read 1 are fully contained within read 2\n-\t$tlen_1 = 0; # Set as 0 when the information is unavailable\n-\t$tlen_2 = 0; # Set as 0 when the information is unavailable\n- }\n+\t# start and end of read 1 are fully contained within read 2, using the length of read 2 for the TLEN variable\n+\t$tlen_1 = ($end_read_2 - $start_read_2 + 1) * -1; # Set to length of read 2 Shorter read receives a - sign,\n+\t$tlen_2 = $end_read_2 - $start_read_2 + 1; # Set to length of read 2 Longer read receives a +. Well this is debatable. Changed this\n+\t ### as a request by frozenlyse on SeqAnswers on 24 July 2013\n+ }\n }\n }\n \n@@ -7353,6 +7767,27 @@\n --samtools_path The path to your Samtools installation, e.g. /home/user/samtools/. Does not need to be specified\n explicitly if Samtools is in the PATH already.\n \n+--prefix <prefix> Prefixes <prefix> to the output filenames. Trailing dots will be replaced by a single one. For\n+ example, '--prefix test' with 'file.fq' would result in the output file 'test.file.fq_bismark.sam' etc.\n+\n+--old_flag Only in paired-end SAM mode, uses the FLAG values used by Bismark v0.8.2 and before. In addition,\n+ this options appends /1 and /2 to the read IDs for reads 1 and 2 relative to the input file. Since\n+ both the appended read IDs and custom FLAG values may cause problems with some downstream tools \n+ such as Picard, new defaults were implemented as of version 0.8.3.\n+\n+\n+ default old_flag\n+ =================== ===================\n+ Read 1 Read 2 Read 1 Read 2\n+\n+ OT: 99 147 67 131\n+\n+ OB: 83 163 115 179\n+\n+ CTOT: 99 147 67 131\n+\n+ CTOB: 83 163 115 179\n+\n \n \n Other:\n@@ -7518,7 +7953,7 @@\n Each read of paired-end alignments is written out in a separate line in the above format.\n \n \n-Last edited on 10 May 2013.\n+Last edited on 07 October 2013.\n \n HOW_TO\n }\n" |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca bismark_bowtie2_wrapper.xml --- a/bismark_bowtie2_wrapper.xml Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_bowtie2_wrapper.xml Mon Apr 14 16:43:14 2014 -0400 |
| b |
| @@ -1,5 +1,5 @@ -<tool id="bismark_bowtie2" name="Bismark" version="0.7.12.1"> - <!-- Wrapper compatible with Bismark version 0.7.11 --> +<tool id="bismark_bowtie2" name="Bismark" version="0.10.1"> + <!-- Wrapper compatible with Bismark version 0.10 --> <description>bisulfite mapper (bowtie2)</description> <!--<version_command>bismark version</version_command>--> <requirements> @@ -12,7 +12,7 @@ bismark_wrapper.py ## Change this to accommodate the number of threads you have available. - --num-threads 24 + --num-threads "\${GALAXY_SLOTS:-24}" --bismark_path \$SCRIPT_PATH @@ -71,6 +71,9 @@ -X $singlePaired.maxInsert #end if + #if $sort_bam: + --sort-bam + #end if ## for now hardcode the value for the required memory per thread in --best mode --chunkmbs 512 @@ -176,6 +179,7 @@ </when> </conditional> + <param name="sort_bam" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Sort BAM file by chromosomal position (not compatibile with methylation extractor)"/> <conditional name="params"> <param name="settingsType" type="select" label="Bismark settings to use" help="You can use the default settings or set custom values for any of Bismark's parameters."> @@ -342,8 +346,6 @@ </conditional> </actions> </data> - - </outputs> <tests> |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca bismark_bowtie_wrapper.xml --- a/bismark_bowtie_wrapper.xml Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_bowtie_wrapper.xml Mon Apr 14 16:43:14 2014 -0400 |
| b |
| @@ -1,5 +1,5 @@ -<tool id="bismark_bowtie" name="Bismark" version="0.7.12.1"> - <!-- Wrapper compatible with Bismark version 0.7.11 --> +<tool id="bismark_bowtie" name="Bismark" version="0.10.0"> + <!-- Wrapper compatible with Bismark version 0.10 --> <description>bisulfite mapper (bowtie)</description> <!--<version_command>bismark version</version_command>--> <requirements> |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca bismark_genome_preparation --- a/bismark_genome_preparation Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_genome_preparation Mon Apr 14 16:43:14 2014 -0400 |
| [ |
| @@ -33,7 +33,7 @@ my $single_fasta; my $bowtie2; -my $bismark_version = 'v0.7.12'; +my $bismark_version = 'v0.10.0'; GetOptions ('verbose' => \$verbose, 'help' => \$help, @@ -44,10 +44,6 @@ 'bowtie2' => \$bowtie2, ); -my $genome_folder = shift @ARGV; # mandatory -my $CT_dir; -my $GA_dir; - if ($help or $man){ print_helpfile(); exit; @@ -66,6 +62,31 @@ exit; } +my $genome_folder = shift @ARGV; # mandatory + +# Ensuring a genome folder has been specified +if ($genome_folder){ + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } + $verbose and print "Path to genome folder specified as: $genome_folder\n"; + chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; + + # making the genome folder path abolsolute so it won't break if the path was specified relative + $genome_folder = getcwd; + unless ($genome_folder =~ /\/$/){ + $genome_folder =~ s/$/\//; + } +} +else{ + die "Please specify a genome folder to be used for bisulfite conversion\n\n"; +} + + +my $CT_dir; +my $GA_dir; + + if ($single_fasta){ print "Writing individual genomes out into single-entry fasta files (one per chromosome)\n\n"; $multi_fasta = 0; @@ -309,41 +330,6 @@ $verbose and print "Bismark Genome Preparation - Step I: Preparing folders\n\n"; - # Ensuring a genome folder has been specified - if ($genome_folder){ - unless ($genome_folder =~ /\/$/){ - $genome_folder =~ s/$/\//; - } - $verbose and print "Path to genome folder specified: $genome_folder\n"; - chdir $genome_folder or die "Could't move to directory $genome_folder. Make sure the directory exists! $!"; - - # making the genome folder path abolsolute so it won't break if the path was specified relative - $genome_folder = getcwd; - unless ($genome_folder =~ /\/$/){ - $genome_folder =~ s/$/\//; - } - } - - else{ - $verbose and print "Genome folder was not provided as argument "; - while (1){ - print "Please specify a genome folder to be bisulfite converted:\n"; - $genome_folder = <STDIN>; - chomp $genome_folder; - - # adding a trailing slash unless already present - unless ($genome_folder =~ /\/$/){ - $genome_folder =~ s/$/\//; - } - if (chdir $genome_folder){ - last; - } - else{ - warn "Could't move to directory $genome_folder! $!"; - } - } - } - if ($path_to_bowtie){ unless ($path_to_bowtie =~ /\/$/){ $path_to_bowtie =~ s/$/\//; @@ -376,7 +362,7 @@ die "The specified genome folder $genome_folder does not contain any sequence files in FastA format (with .fa or .fasta file extensions\n"; } - warn "Bisulfite Genome Indexer version $bismark_version (last modified 17 Nov 2011)\n\n"; + warn "Bisulfite Genome Indexer version $bismark_version (last modified 19 Sept 2013)\n\n"; sleep (3); # creating a directory inside the genome folder to store the bisfulfite genomes unless it already exists @@ -386,27 +372,10 @@ $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\n"; } else{ - while (1){ - print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indexes might be overwritten!\nDo you want to continue anyway?\t"; - my $proceed = <STDIN>; - chomp $proceed; - if ($proceed =~ /^y/i ){ - last; - } - elsif ($proceed =~ /^n/i){ - die "Terminated by user\n\n"; - } - } + print "\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indices will be overwritten!\n\n"; + sleep(5); } - ### as of version 0.6.0 the Bismark indexer will no longer delete the Bisulfite_Genome directory if it was present already, since it could store the Bowtie 1 or 2 indexes already - # removing any existing files and subfolders in the bisulfite directory (the specified directory won't be deleted) - # rmtree($bisulfite_dir, {verbose => 1,keep_root => 1}); - # unless (-d $bisulfite_dir){ # had to add this after changing remove_tree to rmtree // suggested by Samantha Cooper @ Illumina - # mkdir $bisulfite_dir or die "Unable to create directory $bisulfite_dir $!\n"; - # } - # } - chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\n"; $CT_dir = "${bisulfite_dir}CT_conversion/"; $GA_dir = "${bisulfite_dir}GA_conversion/"; @@ -440,15 +409,14 @@ bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference genome folder. Once the bisulfite conversion has been completed the program will fork and launch -two simultaneous instances of the bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware +two simultaneous instances of the Bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware that the indexing process can take up to several hours; this will mainly depend on genome size and system resources. - The following is a brief description of command line options and arguments to control the -Bismark Genome Preparation script: +Bismark Genome Preparation: USAGE: bismark_genome_preparation [options] <arguments> @@ -462,8 +430,9 @@ --verbose Print verbose output for more details or debugging. ---path_to_bowtie The full path to the Bowtie 1 or Bowtie 2 installation on your system.If - the path </../../> is not provided as an option you will be prompted for it. +--path_to_bowtie </../> The full path to the Bowtie 1 or Bowtie 2 installation on your system + (depending on which aligner/indexer you intend to use). Unless this path + is specified it is assumed that Bowtie is in the PATH. --bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1). @@ -481,12 +450,10 @@ ARGUMENTS: <path_to_genome_folder> The path to the folder containing the genome to be bisulfite converted. - At the current time Bismark Genome Preparation expects one or more fastA - files in the folder (with the file extension: .fa or .fasta). If the path - is not provided as an argument you will be prompted for it. + The Bismark Genome Preparation expects one or more fastA files in the folder + (with the file extension: .fa or .fasta). Specifying this path is mandatory. - -This script was last modified on 18 Nov 2011. +This script was last modified on 19 Sept 2013. HOW_TO } |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca bismark_methylation_extractor --- a/bismark_methylation_extractor Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_methylation_extractor Mon Apr 14 16:43:14 2014 -0400 |
| [ |
| b'@@ -8,6 +8,7 @@\n use FindBin qw($Bin);\n use lib "$Bin/../lib";\n \n+\n ## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)\n \n ## This program is free software: you can redistribute it and/or modify\n@@ -29,8 +30,8 @@\n \n my %fhs;\n \n-my $version = \'v0.7.11\';\n-my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip) = process_commandline();\n+my $version = \'v0.10.1\';\n+my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_only,$gazillion,$ample_mem) = process_commandline();\n \n \n ### only needed for bedGraph output\n@@ -41,6 +42,9 @@\n ### only needed for genome-wide cytosine methylation report\n my %chromosomes;\n \n+my %mbias_1;\n+my %mbias_2;\n+\n ##############################################################################################\n ### Summarising Run Parameters\n ##############################################################################################\n@@ -67,9 +71,20 @@\n }\n }\n \n-if ($ignore){\n- warn "First $ignore bases will be disregarded when processing the methylation call string\\n";\n+if ($single){\n+ if ($ignore){\n+ warn "First $ignore bp will be disregarded when processing the methylation call string\\n";\n+ }\n }\n+else{ ## paired-end\n+ if ($ignore){\n+ warn "First $ignore bp will be disregarded when processing the methylation call string of Read 1\\n";\n+ }\n+ if ($ignore_r2){\n+ warn "First $ignore_r2 bp will be disregarded when processing the methylation call string of Read 2\\n";\n+ }\n+}\n+\n \n if ($full){\n warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\\n";\n@@ -95,7 +110,7 @@\n warn \'=\'x63,"\\n";\n \n if ($counts){\n- warn "Generating additional output in bedGraph format including methylating counts (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>)\\n";\n+ warn "Generating additional output in bedGraph and coverage format\\nbedGraph format:\\t<Chromosome> <Start Position> <End Position> <Methylation Percentage>\\ncoverage format:\\t<Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>\\n\\n";\n }\n else{\n warn "Generating additional sorted output in bedGraph format (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage>)\\n";\n@@ -115,7 +130,10 @@\n warn "White spaces in read ID names will be removed prior to sorting\\n";\n }\n \n- if (defined $sort_size){\n+ if ($ample_mem){\n+ warn "Sorting chromosomal postions for the bedGraph step using arrays instead of using UNIX sort\\n";\n+ }\n+ elsif (defined $sort_size){\n warn "The bedGraph UNIX sort command will use the following memory setting:\\t\'$sort_size\'. Temporary directory used for sorting is the output directory\\n";\n }\n else{\n@@ -186,9 +204,20 @@\n \t total_unmethylated_CpG_count => 0,\n \t sequences_count => 0,\n \t );\n+\n @sorting_files = ();\n @bedfiles = ();\n \n+ %mbias_1 = ();\n+ %mbias_2 = ();\n+\n+ ### performing a quick check to see if a paired-end SAM file has been sorted by positions which does interfere with the logic used by the extractor\n+ unless ($vanilla){\n+ if ($paired){\n+ test_positional_sorting($filename);\n+ }\n+ }\n+\n process_Bismark_results_file($filename);\n \n ### Closing all filehandles so that the Bismark methylation extractor output doesn\'t get truncated due to buffering issues\n@@ -196,13 +225,18 @@\n if ($fh =~ /^[1230]$/) {\n foreach my $context (keys %{$fhs{$fh}'..b"overed\n in the experiment irrespective of its sequence context. This applies to both forward and\n reverse strands. Please be aware that this option may generate large temporary and output files\n@@ -3945,8 +4670,32 @@\n (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line.\n Defaults to 2G.\n \n+--scaffolds/--gazillion Users working with unfinished genomes sporting tens or even hundreds of thousands of\n+ scaffolds/contigs/chromosomes frequently encountered errors with pre-sorting reads to\n+ individual chromosome files. These errors were caused by the operating system's limit\n+ of the number of filehandle that can be written to at any one time (typically 1024; to\n+ find out this limit on Linux, type: ulimit -a).\n+ To bypass the limitation of open filehandles, the option --scaffolds does not pre-sort\n+ methylation calls into individual chromosome files. Instead, all input files are\n+ temporarily merged into a single file (unless there is only a single file), and this\n+ file will then be sorted by both chromosome AND position using the Unix sort command.\n+ Please be aware that this option might take a looooong time to complete, depending on\n+ the size of the input files, and the memory you allocate to this process (see --buffer_size).\n+ Nevertheless, it seems to be working.\n+\n+--ample_memory Using this option will not sort chromosomal positions using the UNIX 'sort' command, but will\n+ instead use two arrays to sort methylated and unmethylated calls. This may result in a faster\n+ sorting process of very large files, but this comes at the cost of a larger memory footprint\n+ (two arrays of the length of the largest human chromosome 1 (~250M bp) consume around 16GB\n+ of RAM). Due to overheads in creating and looping through these arrays it seems that it will\n+ actually be *slower* for small files (few million alignments), and we are currently testing at\n+ which point it is advisable to use this option. Note that --ample_memory is not compatible\n+ with options '--scaffolds/--gazillion' (as it requires pre-sorted files to begin with).\n+\n+\n \n Genome-wide cytosine methylation report specific options:\n+=========================================================\n \n --cytosine_report After the conversion to bedGraph has completed, the option '--cytosine_report' produces a\n genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based\n@@ -3983,11 +4732,17 @@\n \n \n \n-The bedGraph output (optional) looks like this (tab-delimited):\n-===============================================================\n+The bedGraph output (optional) looks like this (tab-delimited; 0-based start coords, 1-based end coords):\n+=========================================================================================================\n+\n+track type=bedGraph (header line)\n+\n <chromosome> <start position> <end position> <methylation percentage>\n \n-The bedGraph output with '--counts' specified looks like this (tab-delimited):\n+\n+\n+The coverage output looks like this (tab-delimited, 1-based genomic coords):\n+============================================================================\n \n <chromosome> <start position> <end position> <methylation percentage> <count methylated> <count non-methylated>\n \n@@ -3999,7 +4754,7 @@\n \n \n \n-This script was last modified on 21 April 2013.\n+This script was last modified on 25 November 2013.\n \n HOW_TO\n }\n" |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca bismark_methylation_extractor.py --- a/bismark_methylation_extractor.py Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_methylation_extractor.py Mon Apr 14 16:43:14 2014 -0400 |
| b |
| @@ -27,10 +27,10 @@ # input options parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) - parser.add_argument( '--infile', help='Input file in SAM format.' ) + parser.add_argument( '--infile', help='Input file in SAM or BAM format.' ) parser.add_argument( '--single-end', dest='single_end', action="store_true" ) parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) - + parser.add_argument( '--report-file', dest='report_file' ) parser.add_argument( '--comprehensive', action="store_true" ) parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" ) @@ -93,9 +93,15 @@ if args.report_file: additional_opts += ' --report ' - - # Final command: - cmd = cmd % (output_dir, additional_opts, args.infile) + #detect BAM file, use samtools view if it is a bam file + f = open (args.infile, 'rb') + sig = f.read(4) + f.close() + if sig == '\x1f\x8b\x08\x04' : + cmd = cmd % (output_dir, additional_opts, '-') + cmd = 'samtools view %s | %s' % (args.infile, cmd ) + else : + cmd = cmd % (output_dir, additional_opts, args.infile) # Run try: |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca bismark_methylation_extractor.xml --- a/bismark_methylation_extractor.xml Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_methylation_extractor.xml Mon Apr 14 16:43:14 2014 -0400 |
| b |
| @@ -1,11 +1,11 @@ -<tool id="bismark_methylation_extractor" name="Bismark" version="0.7.12"> - <!-- Wrapper compatible with Bismark version 0.7.7 --> - <description>methylation extractor</description> +<tool id="bismark_methylation_extractor" name="Bismark Meth. Extractor" version="0.10.1"> + <!-- Wrapper compatible with Bismark version 0.10 --> + <description>Reports on methylation status of reads mapped by Bismark</description> <!--<version_command>bismark_methylation_extractor version</version_command>--> <requirements> <requirement type="set_environment">SCRIPT_PATH</requirement> <requirement type="package" version="0.12.8">bowtie</requirement> - <requirement type="package" version="2.0.0-beta7">bowtie2</requirement> + <requirement type="package" version="2.1.0">bowtie2</requirement> </requirements> <parallelism method="basic"></parallelism> <command interpreter="python"> @@ -81,7 +81,7 @@ </command> <inputs> <!-- Input Parameters --> - <param name="input" type="data" format="sam" label="SAM file from Bismark bisulfid mapper" /> + <param name="input" type="data" format="sam,bam" label="SAM/BAM file from Bismark bisulfite mapper" /> <conditional name="singlePaired"> <param name="sPaired" type="select" label="Is this library mate-paired?"> <option value="single">Single-end</option> @@ -92,7 +92,6 @@ <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two" help="" /> </when> </conditional> - <param name="ignore_bps" type="integer" value="0" label="Ignore the first N bp when processing the methylation call string" /> <param name="comprehensive" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all four possible strand-specific methylation info into context-dependent output files" help="" /> |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca bismark_wrapper.py --- a/bismark_wrapper.py Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_wrapper.py Mon Apr 14 16:43:14 2014 -0400 |
| [ |
| @@ -43,6 +43,7 @@ help='The forward reads file in Sanger FASTQ or FASTA format.' ) parser.add_argument( '-2', '--mate2', dest='mate2', help='The reverse reads file in Sanger FASTQ or FASTA format.' ) + parser.add_argument( '--sort-bam', dest='sort_bam', action="store_true" ) parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads', help='Additional output file with unmapped reads (single-end).' ) @@ -176,10 +177,10 @@ #tmp_bismark_dir = tempfile.mkdtemp( dir='/data/0/galaxy_db/tmp/' ) tmp_bismark_dir = tempfile.mkdtemp() output_dir = os.path.join( tmp_bismark_dir, 'results') - cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' + cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s --gzip -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' if args.fasta: - # he query input files (specified as mate1,mate2 or singles) are FastA + # the query input files (specified as mate1,mate2 or singles) are FastA cmd = '%s %s' % (cmd, '--fasta') elif args.fastq: cmd = '%s %s' % (cmd, '--fastq') @@ -223,7 +224,7 @@ # alignment options if args.bowtie2: - additional_opts += ' -p %s --bowtie2 ' % args.num_threads + additional_opts += ' -p %s --bowtie2 ' % (int(args.num_threads/2)) #divides by 2 here since bismark will spawn 2 (original top and original bottom) jobs with -p threads each if args.seed_mismatches: additional_opts += ' -N %s ' % args.seed_mismatches if args.seed_len: @@ -327,24 +328,29 @@ bam_files = glob( os.path.join( output_dir, '*.bam') ) if len( bam_files ) > 1: - cmd = 'samtools merge -@ %s -f - %s ' % ( args.num_threads, ' '.join( bam_files ) ) + cmd = 'samtools merge -@ %s -f %s %s ' % ( args.num_threads, tmp_res, ' '.join( bam_files ) ) - p1 = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE ) - proc = subprocess.Popen( ['samtools', 'sort', '-@', str(args.num_threads), '-', tmp_res], stdin=p1.stdout, stdout=tmp_stdout, stderr=tmp_stderr ) - else: - proc = subprocess.Popen( ['samtools', 'sort', '-@', str(args.num_threads), bam_files[0], tmp_res], stdout=tmp_stdout, stderr=tmp_stderr ) + proc = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE ) - returncode = proc.wait() - tmp_stdout.close() - tmp_stderr.close() - if returncode != 0: - raise Exception, open( tmp_stderr.name ).read() + returncode = proc.wait() + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, open( tmp_stderr.name ).read() + else: + tmp_res = bam_files[0] - bam_path = "%s.bam" % tmp_res + bam_path = "%s" % tmp_res + if os.path.exists( bam_path ): - shutil.copy( bam_path, args.output ) + if args.sort_bam: + cmd = 'samtools sort -@ %s %s %s' % (args.num_threads, bam_path, args.output) + else: + shutil.copy( bam_path, args.output ) else: stop_err( 'BAM file no found:\n' + str( bam_path ) ) + + # TODO: look for errors in program output. except Exception, e: |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca readme.rst --- a/readme.rst Wed Aug 21 05:19:54 2013 -0400 +++ b/readme.rst Mon Apr 14 16:43:14 2014 -0400 |
| b |
| @@ -26,6 +26,8 @@ - v0.7.11.1 change default output to BAM, from now on samtools are required - v0.7.11.2 added multi-threading to samtools (samtools > 0.1.19 is required) - v0.7.12 upgrade to bismark 0.7.12 and fix a major slowdown +- v0.7.12.1 define a dependency to samtools 0.1.19 + =============================== Wrapper Licence (MIT/BSD style) |
| b |
| diff -r 82814a8a2395 -r 91f07ff056ca tool_dependencies.xml --- a/tool_dependencies.xml Wed Aug 21 05:19:54 2013 -0400 +++ b/tool_dependencies.xml Mon Apr 14 16:43:14 2014 -0400 |
| b |
| @@ -1,7 +1,7 @@ <?xml version="1.0"?> <tool_dependency> <package name="samtools" version="0.1.19"> - <repository changeset_revision="cb87eae7fc3e" name="package_samtools_0_1_19" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="00e17a794a2e" name="package_samtools_0_1_19" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> </package> <set_environment version="1.0"> <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable> |