Previous changeset 6:0f8646f22b8d (2015-02-11) Next changeset 8:9bfe38410155 (2018-08-22) |
Commit message:
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit b'e6ee273f75fff61d1e419283fa8088528cf59470\n' |
modified:
tool_data_table_conf.xml.sample tool_dependencies.xml |
added:
bismark_deduplicate/bismark_deduplicate_wrapper.py bismark_deduplicate/bismark_deduplicate_wrapper.xml bismark_deduplicate/deduplicate_bismark bismark_mapping/bismark bismark_mapping/bismark_bowtie2_wrapper.xml bismark_mapping/bismark_genome_preparation bismark_mapping/bismark_wrapper.py bismark_methyl_extractor/bismark2bedGraph bismark_methyl_extractor/bismark_methylation_extractor bismark_methyl_extractor/bismark_methylation_extractor.py bismark_methyl_extractor/bismark_methylation_extractor.xml bismark_methyl_extractor/coverage2cytosine bismark_pretty_report/bismark2report bismark_pretty_report/bismark2report_wrapper.py bismark_pretty_report/bismark2report_wrapper.xml bismark_pretty_report/bismark_sitrep.tpl documentation/Bismark_User_Guide.pdf documentation/readme.rst macros.xml new/bismark new/bismark_genome_preparation new/bismark_methylation_extractor old/bismark old/bismark_genome_preparation old/bismark_methylation_extractor test-data/bowtie1_singles_output_result1.bam test-data/bowtie1_singles_output_result2.bam test-data/bowtie1_singles_report_result1.txt test-data/bowtie1_singles_report_result2.txt test-data/bowtie1_singles_stdout_result1.txt test-data/bowtie1_singles_stdout_result2.txt test-data/bowtie1_singles_suppressed_reads_l_result2.txt test-data/bowtie1_singles_suppressed_reads_r_result2.txt test-data/bowtie1_singles_unmapped_reads_l_result2.txt test-data/bowtie1_singles_unmapped_reads_r_result2.txt test-data/bwa-mem-fastq1.fq test-data/bwa-mem-fastq2.fq test-data/bwa-mem-mt-genome.fa tool-data/all_fasta.loc.sample tool-data/bismark_indexes.loc.sample |
removed:
bismark bismark_bowtie2_wrapper.xml bismark_bowtie_wrapper.xml bismark_genome_preparation bismark_methylation_extractor bismark_methylation_extractor.py bismark_methylation_extractor.xml bismark_wrapper.py readme.rst tool-data/bowtie2_indices.loc.sample tool-data/bowtie_indices.loc.sample |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark --- a/bismark Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,7959 +0,0 @@\n-#!/usr/bin/perl --\n-use strict;\n-use warnings;\n-use IO::Handle;\n-use Cwd;\n-$|++;\n-use Getopt::Long;\n-\n-\n-## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)\n-\n-## This program is free software: you can redistribute it and/or modify\n-## it under the terms of the GNU General Public License as published by\n-## the Free Software Foundation, either version 3 of the License, or\n-## (at your option) any later version.\n-\n-## This program is distributed in the hope that it will be useful,\n-## but WITHOUT ANY WARRANTY; without even the implied warranty of\n-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-## GNU General Public License for more details.\n-\n-## You should have received a copy of the GNU General Public License\n-## along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\n-\n-my $parent_dir = getcwd;\n-my $bismark_version = \'v0.10.0\';\n-my $command_line = join (" ",@ARGV);\n-\n-### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the \'.\' in the option name will cause Getopt::Long to fail\n-foreach my $arg (@ARGV){\n- if ($arg eq \'--solexa1.3-quals\'){\n- $arg = \'--phred64-quals\';\n- }\n-}\n-my @filenames; # will be populated by processing the command line\n-\n-my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag) = process_command_line();\n-\n-my @fhs; # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment\n-my %chromosomes; # stores the chromosome sequences of the mouse genome\n-my %counting; # counting various events\n-\n-my $seqID_contains_tabs;\n-\n-foreach my $filename (@filenames){\n-\n- chdir $parent_dir or die "Unable to move to initial working directory $!\\n";\n- ### resetting the counting hash and fhs\n- reset_counters_and_fhs($filename);\n- $seqID_contains_tabs = 0;\n-\n- ### PAIRED-END ALIGNMENTS\n- if ($filename =~ \',\'){\n- my ($C_to_T_infile_1,$G_to_A_infile_1); # to be made from mate1 file\n-\n- $fhs[0]->{name} = \'CTread1GAread2CTgenome\';\n- $fhs[1]->{name} = \'GAread1CTread2GAgenome\';\n- $fhs[2]->{name} = \'GAread1CTread2CTgenome\';\n- $fhs[3]->{name} = \'CTread1GAread2GAgenome\';\n-\n- warn "\\nPaired-end alignments will be performed\\n",\'=\'x39,"\\n\\n";\n-\n- my ($filename_1,$filename_2) = (split (/,/,$filename));\n- warn "The provided filenames for paired-end alignments are $filename_1 and $filename_2\\n";\n-\n- ### additional variables only for paired-end alignments\n- my ($C_to_T_infile_2,$G_to_A_infile_2); # to be made from mate2 file\n-\n- ### FastA format\n- if ($sequence_file_format eq \'FASTA\'){\n- warn "Input files are in FastA format\\n";\n-\n- if ($directional){\n-\t($C_to_T_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number\n-\t($G_to_A_infile_2) = biTransformFastAFiles_paired_end ($filename_2,2);\n-\n-\t$fhs[0]->{inputfile_1} = $C_to_T_infile_1;\n-\t$fhs[0]->{inputfile_2} = $G_to_A_infile_2;\n-\t$fhs[1]->{inputfile_1} = undef;\n-\t$fhs[1]->{inputfile_2} = undef;\n-\t$fhs[2]->{inputfile_1} = undef;\n-\t$fhs[2]->{inputfile_2} = undef;\n-\t$fhs[3]->{inputfile_1} = $C_to_T_infile_1;\n-\t$fhs[3]->{inputfile_2} = $G_to_A_infile_2;\n- }\n- else{\n-\t($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number\n-\t($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastAFiles_paired_end ($filename_2,2);\n-\n-\t$fhs[0]->{inputfile_1} = $C_to_T_infile_1;\n-\t$fhs[0]->{inputfile_2} = $G_to_A_infile_2;\n-\t$fhs[1]->{inputfile_1} = $G_to_A_infile_1;\n-\t$fhs[1]->{inputfile_2} = $C_to_T_infile_2;\n-\t$fhs[2]->{inputfile_1} = $G_to_A_i'..b" a function of read length. For instance, specifying\n- L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.\n- See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is\n- L,0,-0.2.\n-\n---rdg <int1>,<int2> Sets the read gap open (<int1>) and extend (<int2>) penalties. A read gap of length N gets a penalty\n- of <int1> + N * <int2>. Default: 5, 3.\n-\n---rfg <int1>,<int2> Sets the reference gap open (<int1>) and extend (<int2>) penalties. A reference gap of length N gets\n- a penalty of <int1> + N * <int2>. Default: 5, 3.\n-\n-\n-Bowtie 2 Reporting options:\n-\n--most_valid_alignments <int> This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is\n- deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the\n- default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the\n- effort expended to find valid alignments.\n-\n- For reference, this used to be the old (now deprecated) description of -M:\n- Bowtie 2 searches for at most <int>+1 distinct, valid alignments for each read. The search terminates when it\n- can't find more distinct valid alignments, or when it finds <int>+1 distinct alignments, whichever\n- happens first. Only the best alignment is reported. Information from the other alignments is used to\n- estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes \n- Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that\n- aligns many places. For reads that have more than <int>+1 distinct, valid alignments, Bowtie 2 does not\n- guarantee that the alignment reported is the best possible in terms of alignment score. -M is\n- always used and its default value is set to 10.\n-\n-\n-'VANILLA' Bismark OUTPUT:\n-\n-Single-end output format (tab-separated):\n-\n- (1) <seq-ID>\n- (2) <read alignment strand>\n- (3) <chromosome>\n- (4) <start position>\n- (5) <end position>\n- (6) <observed bisulfite sequence>\n- (7) <equivalent genomic sequence>\n- (8) <methylation call>\n- (9) <read conversion\n-(10) <genome conversion>\n-(11) <read quality score (Phred33)>\n-\n-\n-Paired-end output format (tab-separated):\n- (1) <seq-ID>\n- (2) <read 1 alignment strand>\n- (3) <chromosome>\n- (4) <start position>\n- (5) <end position>\n- (6) <observed bisulfite sequence 1>\n- (7) <equivalent genomic sequence 1>\n- (8) <methylation call 1>\n- (9) <observed bisulfite sequence 2>\n-(10) <equivalent genomic sequence 2>\n-(11) <methylation call 2>\n-(12) <read 1 conversion\n-(13) <genome conversion>\n-(14) <read 1 quality score (Phred33)>\n-(15) <read 2 quality score (Phred33)>\n-\n-\n-Bismark SAM OUTPUT (default):\n-\n- (1) QNAME (seq-ID)\n- (2) FLAG (this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!))\n- (3) RNAME (chromosome)\n- (4) POS (start position)\n- (5) MAPQ (always 255)\n- (6) CIGAR\n- (7) RNEXT\n- (8) PNEXT\n- (9) TLEN\n-(10) SEQ\n-(11) QUAL (Phred33 scale)\n-(12) NM-tag (edit distance to the reference)\n-(13) XX-tag (base-by-base mismatches to the reference. This does not include indels)\n-(14) XM-tag (methylation call string)\n-(15) XR-tag (read conversion state for the alignment)\n-(16) XG-tag (genome conversion state for the alignment)\n-(17) XA/XB-tag (non-bisulfite mismatches) (optional!)\n-\n-Each read of paired-end alignments is written out in a separate line in the above format.\n-\n-\n-Last edited on 07 October 2013.\n-\n-HOW_TO\n-}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_bowtie2_wrapper.xml --- a/bismark_bowtie2_wrapper.xml Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,666 +0,0 @@\n-<tool id="bismark_bowtie2" name="Bismark" version="0.10.2">\n- <!-- Wrapper compatible with Bismark version 0.10 -->\n- <description>bisulfite mapper (bowtie2)</description>\n- <!--<version_command>bismark version</version_command>-->\n- <requirements>\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\n- <requirement type="package" version="0.1.19">samtools</requirement>\n- <requirement type="package" version="2.1.0">bowtie2</requirement>\n- </requirements>\n- <stdio>\n- <exit_code range="1:" />\n- <exit_code range=":-1" />\n- <regex match="Error:" />\n- <regex match="Exception:" />\n- </stdio>\n- <command interpreter="python">\n-<![CDATA[\n- bismark_wrapper.py\n-\n- ## Change this to accommodate the number of threads you have available.\n- --num-threads "\\${GALAXY_SLOTS:-24}"\n-\n- --bismark_path \\$SCRIPT_PATH\n-\n- --bowtie2\n-\n- ##\n- ## Bismark Genome Preparation, if desired.\n- ##\n-\n- ## Handle reference file.\n- #if $refGenomeSource.genomeSource == "history":\n- --own-file=$refGenomeSource.ownFile\n- #else:\n- --indexes-path ${refGenomeSource.index.fields.path}\n- #end if\n-\n-\n- ##\n- ## Input parameters\n- ##\n-\n- #if $singlePaired.sPaired == "single":\n- --single-paired $singlePaired.input_singles\n-\n- #if $singlePaired.input_singles.ext == "fastqillumina":\n- --phred64-quals\n- --fastq\n- #elif $singlePaired.input_singles.ext == "fastqsanger":\n- --fastq\n- #elif $singlePaired.input_singles.ext == "fasta":\n- --fasta\n- #end if\n- #else:\n- --mate-paired\n- #set $mate1 = list()\n- #set $mate2 = list()\n- #for $mate_pair in $singlePaired.mate_list\n- $mate1.append( str($mate_pair.input_mate1) )\n- $mate2.append( str($mate_pair.input_mate2) )\n- #end for\n-\n- --mate1 #echo \',\'.join($mate1)\n- --mate2 #echo \',\'.join($mate2)\n-\n- #for $mate_pair in $singlePaired.mate_list:\n- #if $mate_pair.input_mate1.ext == "fastqillumina":\n- --phred64-quals\n- --fastq\n- #elif $mate_pair.input_mate1.ext == "fastqsanger":\n- --fastq\n- #elif $mate_pair.input_mate1.ext == "fasta":\n- --fasta\n- #end if\n- #break\n- #end for\n-\n- -I $singlePaired.minInsert\n- -X $singlePaired.maxInsert\n- #end if\n-\n- #if $sort_bam:\n- --sort-bam\n- #end if\n-\n- ## for now hardcode the value for the required memory per thread in --best mode\n- --chunkmbs 512\n-\n-\n- #if $params.settingsType == "custom":\n-\n- ## default 20\n- --seed-len $params.seed_len\n- ## default 0\n- --seed-mismatches $params.seed_mismatches\n- ## default 15\n- --seed-extention-attempts $params.seed_extention_attempts\n- ## default 2\n- --max-reseed $params.max_reseed\n-\n- ## default 70\n- ##--maqerr $params.maqerr\n-\n- ## default unlimited\n- #if $params.qupto != 0:\n- --qupto $params.qupto\n- #end if\n- #if $params.skip_reads != 0:\n- --skip-reads $params.skip_reads\n- #end if\n-\n- ## if set, disable the original behaviour\n- $params.no_mixed\n- ## if set, disable the original behaviour\n- $params.no_discordant\n-\n- #if $params.bismark_stdout:\n- --stdout $output_stdout\n- #end if\n-\n- #if $params.isReportOutput:\n- --output-report-file $report_file\n- #end if\n-\n- '..b'ult behavior when the input\n- doesn\'t specify quality values (e.g. in -f mode). This option is invariable and on by default.\n-\n-\n-Bowtie 2 paired-end options::\n-\n- --no-mixed This option disables Bowtie 2\'s behavior to try to find alignments for the individual mates if\n- it cannot find a concordant or discordant alignment for a pair. This option is invariable and\n- and on by default.\n-\n- --no-discordant Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments.\n- A discordant alignment is an alignment where both mates align uniquely, but that does not\n- satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior\n- and it is on by default.\n-\n-\n-Bowtie 2 effort options::\n-\n- -D INT Up to INT consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using\n- the alignments found so far. A seed extension "fails" if it does not yield a new best or a\n- new second-best alignment. Default: 15.\n-\n- -R INT INT is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds.\n- When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of\n- mismatches allowed) at different offsets and searches for more alignments. A read is considered\n- to have repetitive seeds if the total number of seed hits divided by the number of seeds\n- that aligned at least once is greater than 300. Default: 2.\n-\n-\n-Bowtie 2 Scoring options::\n-\n- --score_min "func" Sets a function governing the minimum alignment score needed for an alignment to be considered\n- "valid" (i.e. good enough to report). This is a function of read length. For instance, specifying\n- L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.\n- See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is\n- L,0,-0.2.\n-\n-\n-Bowtie 2 Reporting options::\n-\n- --most_valid_alignments INT This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is\n- deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the\n- default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the\n- effort expended to find valid alignments.\n-\n- For reference, this used to be the old (now deprecated) description of -M:\n- Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it\n- can\'t find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever\n- happens first. Only the best alignment is reported. Information from the other alignments is used to\n- estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes\n- Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that\n- aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not\n- guarantee that the alignment reported is the best possible in terms of alignment score. -M is\n- always used and its default value is set to 10.\n-\n-]]>\n- </help>\n- <citations>\n- <citation type="doi">10.1093/bioinformatics/btr167</citation>\n- </citations>\n-</tool>\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_bowtie_wrapper.xml --- a/bismark_bowtie_wrapper.xml Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,563 +0,0 @@\n-<tool id="bismark_bowtie" name="Bismark" version="0.10.2">\n- <!-- Wrapper compatible with Bismark version 0.10 -->\n- <description>bisulfite mapper (bowtie)</description>\n- <!--<version_command>bismark version</version_command>-->\n- <requirements>\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\n- <requirement type="package" version="0.1.19">samtools</requirement>\n- <requirement type="package" version="0.12.8">bowtie</requirement>\n- </requirements>\n- <stdio>\n- <exit_code range="1:" />\n- <exit_code range=":-1" />\n- <regex match="Error:" />\n- <regex match="Exception:" />\n- </stdio>\n- <command interpreter="python">\n-<![CDATA[\n- bismark_wrapper.py\n-\n- --bismark_path \\$SCRIPT_PATH\n-\n- ##\n- ## Bismark Genome Preparation, if desired.\n- ##\n-\n- ## Handle reference file.\n- #if $refGenomeSource.genomeSource == "history":\n- --own-file=$refGenomeSource.ownFile\n- #else:\n- --indexes-path ${refGenomeSource.index.fields.path}\n- #end if\n-\n-\n- ##\n- ## Input parameters\n- ##\n-\n-\n- #if $singlePaired.sPaired == "single":\n- --single-paired $singlePaired.input_singles\n-\n- #if $singlePaired.input_singles.ext == "fastqillumina":\n- --phred64-quals\n- --fastq\n- #elif $singlePaired.input_singles.ext == "fastqsanger":\n- --fastq\n- #elif $singlePaired.input_singles.ext == "fasta":\n- --fasta\n- #end if\n- #else:\n- --mate-paired\n- #set $mate1 = list()\n- #set $mate2 = list()\n- #for $mate_pair in $singlePaired.mate_list\n- $mate1.append( str($mate_pair.input_mate1) )\n- $mate2.append( str($mate_pair.input_mate2) )\n- #end for\n-\n- --mate1 #echo \',\'.join($mate1)\n- --mate2 #echo \',\'.join($mate2)\n-\n- #for $mate_pair in $singlePaired.mate_list:\n- #if $mate_pair.input_mate1.ext == "fastqillumina":\n- --phred64-quals\n- --fastq\n- #elif $mate_pair.input_mate1.ext == "fastqsanger":\n- --fastq\n- #elif $mate_pair.input_mate1.ext == "fasta":\n- --fasta\n- #end if\n- #break\n- #end for\n-\n- -I $singlePaired.minInsert\n- -X $singlePaired.maxInsert\n- #end if\n-\n-\n- ## for now hardcode the value for the required memory per thread in --best mode\n- --chunkmbs 512\n-\n-\n- #if $params.settingsType == "custom":\n-\n- ## default 20\n- --seed-len $params.seed_len\n- ## default 0\n- --seed-mismatches $params.seed_mismatches\n-\n- ## default 70\n- ##--maqerr $params.maqerr\n-\n- ## default unlimited\n- #if $params.qupto != 0:\n- --qupto $params.qupto\n- #end if\n- #if $params.skip_reads != 0:\n- --skip-reads $params.skip_reads\n- #end if\n-\n- #if $params.bismark_stdout:\n- --stdout $output_stdout\n- #end if\n-\n- #if $params.isReportOutput:\n- --output-report-file $report_file\n- #end if\n-\n- #end if\n-\n- ##\n- ## Output parameters.\n- ##\n- --output $output\n- ##$suppress_header\n-\n- #if str( $singlePaired.sPaired ) == "single"\n- #if $output_unmapped_reads_l\n- --output-unmapped-reads $output_unmapped_reads_l\n- #end if\n- #if $output_suppressed_reads_l\n- --output-suppressed-reads $output_suppressed_reads_l\n- #end if\n- #else\n- #if $output_unmapped_reads_l and $output_unmapped_reads_r\n- --output-unmapped-reads-l $output_unmapped_reads_l\n- '..b'fied). A 19-bp gap would not be valid in that case. Default: 0.\n-\n- -X/--maxins INT The maximum insert size for valid paired-end alignments. E.g. if -X 100 is specified and\n- a paired-end alignment consists of two 20-bp alignments in the proper orientation with a\n- 60-bp gap between them, that alignment is considered valid (as long as -I is also satisfied).\n- A 61-bp gap would not be valid in that case. Default: 500.\n-\n-\n-\n-Output::\n-\n- --non_directional The sequencing library was constructed in a non strand-specific manner, alignments to all four\n- bisulfite strands will be reported. Default: OFF.\n-\n- (The current Illumina protocol for BS-Seq is directional, in which case the strands complementary\n- to the original strands are merely theoretical and should not exist in reality. Specifying directional\n- alignments (which is the default) will only run 2 alignment threads to the original top (OT)\n- or bottom (OB) strands in parallel and report these alignments. This is the recommended option\n- for sprand-specific libraries).\n-\n- --sam-no-hd Suppress SAM header lines (starting with @). This might be useful when very large input files are\n- split up into several smaller files to run concurrently and the output files are to be merged.\n-\n- --quiet Print nothing besides alignments.\n-\n- --vanilla Performs bisulfite mapping with Bowtie 1 and prints the \'old\' output (as in Bismark 0.5.X) instead\n- of SAM format output.\n-\n- -un/--unmapped Write all reads that could not be aligned to a file in the output directory. Written reads will\n- appear as they did in the input, without any translation of quality values that may have\n- taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1\n- and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads\n- with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping)\n- are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well.\n-\n- --ambiguous Write all reads which produce more than one valid alignment with the same number of lowest\n- mismatches or other reads that fail to align uniquely to a file in the output directory.\n- Written reads will appear as they did in the input, without any of the translation of quality\n- values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two\n- parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and\n- _ambiguous_reads_2.txt. These reads are not written to the file specified with --un.\n-\n- -o/--output_dir DIR Write all output files into this directory. By default the output files will be written into\n- the same folder as the input file(s). If the specified folder does not exist, Bismark will attempt\n- to create it first. The path to the output folder can be either relative or absolute.\n-\n- --temp_dir DIR Write temporary files to this directory instead of into the same directory as the input files. If\n- the specified folder does not exist, Bismark will attempt to create it first. The path to the\n- temporary folder can be either relative or absolute.\n-\n-]]>\n- </help>\n- <citations>\n- <citation type="doi">10.1093/bioinformatics/btr167</citation>\n- </citations>\n-</tool>\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_deduplicate/bismark_deduplicate_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_deduplicate/bismark_deduplicate_wrapper.py Sat May 06 13:18:09 2017 -0400 |
[ |
@@ -0,0 +1,66 @@ +#!/usr/bin/python + +import argparse +import os +import re +import shutil +import subprocess +import sys +import tempfile +import logging +from glob import glob + +def cleanup_before_exit(tmp_dir): + if tmp_dir and os.path.exists(tmp_dir): + shutil.rmtree(tmp_dir) + +def get_arg(): + parser = argparse.ArgumentParser() + parser.add_argument('--tool_dir', dest='tool_dir', action='store', nargs=1, metavar='tool_dir', type=str) + parser.add_argument('-p', action='store_true') + parser.add_argument('-s', action='store_true') + parser.add_argument('--input', dest='input', action='store', nargs=1, metavar='input', type=str) + parser.add_argument('--output_report', dest='output_report', action='store', nargs=1, metavar='output_report', type=str) + parser.add_argument('--output_bam', dest='output_bam', action='store', nargs=1, metavar='output_report', type=str) + parser.add_argument('--log_report', dest='log_report', action='store', nargs=1, metavar='log_filename', type=str) + args = parser.parse_args() + return args + +def __main__(): + args = get_arg() + + tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='') + os.chdir(tmp_dir) + + if args.log_report: + logging.basicConfig(level=logging.INFO, filename=args.log_report[0], filemode="a+", format='%(message)s') + else: + logging.basicConfig(level=logging.INFO, filename=os.path.join(tmp_dir, 'log_report.txt'), filemode="a+", format='%(message)s') + + default_reads_name = 'submitted_reads.bam' + os.symlink(args.input[0], default_reads_name) + + if args.p is True: + sPaired = '-p' + if args.s is True: + sPaired = '-s' + + cmd = 'perl %s %s duplicated_reads.bam --bam' % (os.path.join(args.tool_dir[0], 'deduplicate_bismark'), sPaired) + logging.info('COMMAND LINE:\n\n%s' % cmd) + + proc = subprocess.Popen(['perl', os.path.join(args.tool_dir[0], 'deduplicate_bismark'), sPaired, default_reads_name, '--bam'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + proc_out, proc_err = proc.communicate() + + logging.info("__________________________________________________________________\n") + logging.info("BISMARK DEDUPLICATE STDOUT:\n\n%s" % proc_out) + if proc_err: + logging.critical("__________________________________________________________________\n") + logging.critical("BISMARK DEDUPLICATE WARNING:\n\n%s" % proc_err) + sys.exit("Dedpulicate Bismark crashed with the folowing error message:\n%s" % proc_err) + + shutil.move( glob('*deduplicated.bam')[0], args.output_bam[0] ) + shutil.move( glob('*deduplication_report.txt')[0], args.output_report[0]) + + cleanup_before_exit(tmp_dir) + +if __name__=="__main__": __main__() \ No newline at end of file |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_deduplicate/bismark_deduplicate_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_deduplicate/bismark_deduplicate_wrapper.xml Sat May 06 13:18:09 2017 -0400 |
[ |
@@ -0,0 +1,69 @@ +<tool id="bismark_deduplicate" name="Bismark Deduplicate" version="0.16.3"> + + <description>Deduplicates reads mapped by Bismark</description> + <!--<version_command>bismark version</version_command>--> + + <requirements> + <requirement type="package" version="0.1.19">samtools</requirement> + <requirement type="package" version="2.1.0">bowtie2</requirement> + </requirements> + + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + + <command interpreter="python"> +<![CDATA[ + bismark_deduplicate_wrapper.py + + --tool_dir "$__tool_directory__" + + #if str ( $sPaired ) == "single": + -s + #else + -p + #end if + + --input "$mapping_output" + + --output_report "$output_report" + --output_bam "$output_bam" + + ##--log_report "$log_report" +]]> + </command> + + <inputs> + <param name="sPaired" type="select" label="Is this library mate-paired?" format="bam"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <param name="mapping_output" type="data" format="bam, sam" label="Submit the resulting bam/sam file from Bismark bisulfite mapper" /> + </inputs> + + <outputs> + <data name="output_bam" format="bam" label="${tool.name} on ${on_string}: deduplicated mapped reads" /> + <data name="output_report" format="txt" label="${tool.name} on ${on_string}: deduplication report"/> + <!--<data name="log_report" format="txt" label="${tool.name} on ${on_string}: log report (tool stdout)"/>--> + </outputs> + + <help> +<![CDATA[ +**What it does** + + | This tool is supposed to remove alignments to the same position in the genome from the Bismark mapping output (both single and paired-end SAM files), which can arise by e.g. excessive PCR amplification. If sequences align to the same genomic position but on different strands they will be scored individually. + | + | Note that deduplication is not recommended for RRBS-type experiments! + | + | For single-end alignments only use the start-coordinate of a read will be used for deduplication. + | For paired-end alignments the start-coordinate of the first read and the end coordinate of the second read will be used for deduplication. + +]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btr167</citation> + </citations> +</tool> \ No newline at end of file |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_deduplicate/deduplicate_bismark --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_deduplicate/deduplicate_bismark Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,1272 @@\n+#!/usr/bin/env perl\n+use strict;\n+use warnings;\n+use Getopt::Long;\n+\n+\n+### This script is supposed to remove alignments to the same position in the genome which can arise by e.g. PCR amplification\n+### Paired-end alignments are considered a duplicate if both partner reasd start and end at the exact same position\n+\n+### May 13, 2013\n+### Changed the single-end trimming behavior so that only the start coordinate will be used. This avoids duplicate reads that have been trimmed to a varying extent\n+### Changed the way of determining the end of reads in SAM format to using the CIGAR string if the read contains InDels\n+\n+### 16 July 2013\n+### Adding a new deduplication mode for barcoded RRBS-Seq\n+\n+### 27 Sept 2013\n+### Added close statement for all output filehandles (which should probably have been there from the start...)\n+\n+### 8 Jan 2015\n+### to detect paired-end command from the @PG line we are no requiring spaces before and after the -1 or -2\n+\n+### 09 Mar 2015\n+### Removing newline characters also from the read conversion flag in case the tags had been reordered and are now present in the very last column\n+\n+### 19 08 2015\n+### Hiding the option --representative from view to discourage people from using it (it was nearly always not what they wanted to do anyway). It should still work \n+### for alignments that do not contain any indels\n+### Just for reference, here is the the text:\n+### print "--representative\\twill browse through all sequences and print out the sequence with the most representative (as in most frequent) methylation call for any given position. Note that this is very likely the most highly amplified PCR product for a given sequence\\n\\n";\n+\n+\n+my $dedup_version = \'v0.16.3\';\n+\n+my $help;\n+my $representative;\n+my $single;\n+my $paired;\n+my $global_single;\n+my $global_paired;\n+my $vanilla;\n+my $samtools_path;\n+my $bam;\n+my $rrbs;\n+my $version;\n+\n+my $command_line = GetOptions (\'help\' => \\$help,\n+\t\t\t \'representative\' => \\$representative,\n+\t\t\t \'s|single\' => \\$global_single,\n+\t\t\t \'p|paired\' => \\$global_paired,\n+\t\t\t \'vanilla\' => \\$vanilla,\n+\t\t\t \'samtools_path=s\' => \\$samtools_path,\n+\t\t\t \'bam\' => \\$bam,\n+\t\t\t \'barcode\' => \\$rrbs,\n+\t\t\t \'version\' => \\$version,\n+\t\t\t );\n+\n+die "Please respecify command line options\\n\\n" unless ($command_line);\n+\n+if ($help){\n+ print_helpfile();\n+ exit;\n+}\n+\n+if ($version){\n+ print << "VERSION";\n+\n+ Bismark Deduplication Module\n+\n+ Deduplicator Version: $dedup_version\n+ Copyright 2010-16 Felix Krueger, Babraham Bioinformatics\n+ www.bioinformatics.babraham.ac.uk/projects/bismark/\n+\n+\n+VERSION\n+ exit;\n+ }\n+\n+\n+\n+my @filenames = @ARGV;\n+\n+unless (@filenames){\n+ print "Please provide one or more Bismark output files for deduplication\\n\\n";\n+ sleep (2);\n+ print_helpfile();\n+ exit;\n+}\n+\n+\n+### OPTIONS\n+unless ($global_single or $global_paired){\n+ if ($vanilla){\n+ die "Please specify either -s (single-end) or -p (paired-end) for deduplication. Reading this information from the \\@PG header line only works for SAM/BAM files\\n\\n";\n+ }\n+ warn "\\nNeither -s (single-end) nor -p (paired-end) selected for deduplication. Trying to extract this information for each file separately from the \\@PG line of the SAM/BAM file\\n";\n+}\n+\n+if ($global_paired){\n+ if ($global_single){\n+ die "Please select either -s for single-end files or -p for paired-end files, but not both at the same time!\\n\\n";\n+ }\n+ if ($vanilla){\n+\n+ if ($rrbs){\n+ die "Barcode deduplication only works with Bismark SAM (or BAM) output (in attempt to phase out the vanilla format)\\n";\n+ }\n+\n+ warn "Processing paired-end custom Bismark output file(s):\\n";\n+ warn join ("\\t",@filenames),"\\n\\n";\n+ }\n+ else{\n+ warn "Processing paired-end Bismark output file(s) (SAM format):\\n";\n+ warn join ("\\t",@filenames),"\\n\\n";\n+ }\n+}\n+else{\n+ if ($vanilla){\n+ warn'..b'### we are going to concatenate both methylation call strings from the paired end file to form a joint methylation call string\n+\n+ my $outfile = $file;\n+ if ($vanilla){\n+ $outfile =~ s/$/_deduplicated_to_representative_sequences_pe.txt/;\n+ }\n+ else{\n+ $outfile =~ s/$/_deduplicated_to_representative_sequences_pe.sam/;\n+ }\n+\n+ open (OUT,\'>\',$outfile) or die "Unable to write to $outfile: $!\\n";\n+ warn "Reading and storing all alignment positions\\n";\n+\n+ ### need to proceed slightly differently for the custom Bismark and Bismark SAM output\n+ if ($vanilla){\n+ $_ = <IN>; # Bismark version header\n+ print OUT; # Printing the Bismark version to the de-duplicated file again\n+ }\n+\n+ while (<IN>){\n+\n+ if ($count == 0){\n+\tif ($_ =~ /^Bismark version:/){\n+\t warn "The file appears to be in the custom Bismark and not SAM format. Please see option --vanilla!\\n";\n+\t sleep (2);\n+\t print_helpfile();\n+\t exit;\n+\t}\n+ }\n+\n+ ### if this was a SAM file we ignore header lines\n+ unless ($vanilla){\n+\tif (/^\\@\\w{2}\\t/){\n+\t warn "skipping SAM header line:\\t$_";\n+\t print OUT; # Printing the header lines again into the de-duplicated file\n+\t next;\n+\t}\n+ }\n+\n+ my ($strand,$chr,$start,$end,$meth_call_1,$meth_call_2);\n+ my $line1;\n+\n+ if ($vanilla){\n+\t($strand,$chr,$start,$end,$meth_call_1,$meth_call_2) = (split (/\\t/))[1,2,3,4,7,10];\n+ }\n+ else{ # SAM paired-end format\n+\t\n+\t($strand,$chr,$start,$meth_call_1) = (split (/\\t/))[1,2,3,13]; # we are assigning the FLAG value to $strand\n+\t\n+\t### storing the first line (= read 1 alignment)\t\n+\t$line1 = $_;\n+\t\n+\t### reading in the next line\n+\t$_ = <IN>;\n+\t# we only need the end position and the methylation call\n+\t(my $pos,my $seq_2,$meth_call_2) = (split (/\\t/))[3,9,13];\n+\t$end = $pos + length($seq_2) - 1;\n+ }\n+\n+ my $composite = join (":",$strand,$chr,$start,$end);\n+ $count++;\n+ my $meth_call = $meth_call_1.$meth_call_2;\n+\n+ $positions{$composite}->{$meth_call}->{count}++;\n+ if ($vanilla){\n+\t$positions{$composite}->{$meth_call}->{alignment} = $_;\n+ }\n+ else{ # SAM PAIRED-END\n+\t$positions{$composite}->{$meth_call}->{alignment_1} = $line1;\n+\t$positions{$composite}->{$meth_call}->{alignment_2} = $_;\n+ }\n+ }\n+ warn "Stored ",scalar keys %positions," different positions for $count sequences in total (+ and - alignments to the same position are scored individually)\\n\\n";\n+ close IN or warn $!;\n+ }\n+\n+ ### PRINTING RESULTS\n+\n+ ### Now going through all stored positions and printing out the methylation call which is most representative, i.e. the one which occurred most often\n+ warn "Now printing out alignments with the most representative methylation call(s)\\n";\n+\n+ foreach my $pos (keys %positions){\n+ foreach my $meth_call (sort { $positions{$pos}->{$b}->{count} <=> $positions{$pos}->{$a}->{count} }keys %{$positions{$pos}}){\n+ if ($paired){\n+\tif ($vanilla){\n+\t print OUT $positions{$pos}->{$meth_call}->{alignment};\n+\t}\n+\telse{\n+\t print OUT $positions{$pos}->{$meth_call}->{alignment_1}; # SAM read 1\n+\t print OUT $positions{$pos}->{$meth_call}->{alignment_2}; # SAM read 2\n+\t}\n+ }\n+ else{ # single-end\n+\tprint OUT $positions{$pos}->{$meth_call}->{alignment};\n+ }\n+ $unique_seqs++;\n+ last; ### exiting once we printed a sequence with the most frequent methylation call for a position\n+ }\n+ }\n+\n+ my $percentage;\n+ unless ($count == 0){\n+ $percentage = sprintf ("%.2f",$unique_seqs*100/$count);\n+ }\n+ else{\n+ $percentage = \'N/A\';\n+ }\n+\n+ warn "\\nTotal number of alignments analysed in $file:\\t$count\\n";\n+ warn "Total number of representative alignments printed from $file in total:\\t$unique_seqs ($percentage%)\\n\\n";\n+ print REPORT "\\nTotal number of alignments analysed in $file:\\t$count\\n";\n+ print REPORT "Total number of representative alignments printed from $file in total:\\t$unique_seqs ($percentage%)\\n\\n";\n+\n+}\n+\n+\n+\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_genome_preparation --- a/bismark_genome_preparation Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,459 +0,0 @@\n-#!/usr/bin/perl --\n-use strict;\n-use warnings;\n-use Cwd;\n-use File::Path qw(rmtree);\n-$|++;\n-\n-\n-## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)\n-\n-## This program is free software: you can redistribute it and/or modify\n-## it under the terms of the GNU General Public License as published by\n-## the Free Software Foundation, either version 3 of the License, or\n-## (at your option) any later version.\n-\n-## This program is distributed in the hope that it will be useful,\n-## but WITHOUT ANY WARRANTY; without even the implied warranty of\n-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-## GNU General Public License for more details.\n-\n-## You should have received a copy of the GNU General Public License\n-## along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\n-use Getopt::Long;\n-use Cwd;\n-\n-my $verbose;\n-my $help;\n-my $version;\n-my $man;\n-my $path_to_bowtie;\n-my $multi_fasta;\n-my $single_fasta;\n-my $bowtie2;\n-\n-my $bismark_version = \'v0.10.0\';\n-\n-GetOptions (\'verbose\' => \\$verbose,\n-\t \'help\' => \\$help,\n-\t \'man\' => \\$man,\n-\t \'version\' => \\$version,\n-\t \'path_to_bowtie:s\' => \\$path_to_bowtie,\n-\t \'single_fasta\' => \\$single_fasta,\n-\t \'bowtie2\' => \\$bowtie2,\n-\t );\n-\n-if ($help or $man){\n- print_helpfile();\n- exit;\n-}\n-\n-if ($version){\n- print << "VERSION";\n-\n- Bismark - Bisulfite Mapper and Methylation Caller.\n-\n- Bismark Genome Preparation Version: $bismark_version\n- Copyright 2010-13 Felix Krueger, Babraham Bioinformatics\n- www.bioinformatics.babraham.ac.uk/projects/\n-\n-VERSION\n- exit;\n-}\n-\n-my $genome_folder = shift @ARGV; # mandatory\n-\n-# Ensuring a genome folder has been specified\n-if ($genome_folder){\n- unless ($genome_folder =~ /\\/$/){\n- $genome_folder =~ s/$/\\//;\n- }\n- $verbose and print "Path to genome folder specified as: $genome_folder\\n";\n- chdir $genome_folder or die "Could\'t move to directory $genome_folder. Make sure the directory exists! $!";\n-\n- # making the genome folder path abolsolute so it won\'t break if the path was specified relative\n- $genome_folder = getcwd;\n- unless ($genome_folder =~ /\\/$/){\n- $genome_folder =~ s/$/\\//;\n- }\n-}\n-else{\n- die "Please specify a genome folder to be used for bisulfite conversion\\n\\n";\n-}\n-\n-\n-my $CT_dir;\n-my $GA_dir;\n-\n-\n-if ($single_fasta){\n- print "Writing individual genomes out into single-entry fasta files (one per chromosome)\\n\\n";\n- $multi_fasta = 0;\n-}\n-else{\n- print "Writing bisulfite genomes out into a single MFA (multi FastA) file\\n\\n";\n- $single_fasta = 0;\n- $multi_fasta = 1;\n-}\n-\n-my @filenames = create_bisulfite_genome_folders();\n-\n-process_sequence_files ();\n-\n-launch_bowtie_indexer();\n-\n-sub launch_bowtie_indexer{\n- if ($bowtie2){\n- print "Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer\\n";\n- }\n- else{\n- print "Bismark Genome Preparation - Step III: Launching the Bowtie (1) indexer\\n";\n- }\n- print "Please be aware that this process can - depending on genome size - take up to several hours!\\n";\n- sleep(5);\n-\n- ### if the path to bowtie was specfified explicitely\n- if ($path_to_bowtie){\n- if ($bowtie2){\n- $path_to_bowtie =~ s/$/bowtie2-build/;\n- }\n- else{\n- $path_to_bowtie =~ s/$/bowtie-build/;\n- }\n- }\n- ### otherwise we assume that bowtie-build is in the path\n- else{\n- if ($bowtie2){\n- $path_to_bowtie = \'bowtie2-build\';\n- }\n- else{\n- $path_to_bowtie = \'bowtie-build\';\n- }\n- }\n-\n- $verbose and print "\\n";\n-\n- ### Forking the program to run 2 instances of Bowtie-build or Bowtie2-build (= the Bowtie (1/2) indexer)\n- my $pid = fork();\n-\n- # parent process\n- if ($pid){\n- sleep(1);\n- chdir $CT_dir or die "Unable to change directory: $!\\n";\n- $verbose and warn "Preparing indexing of CT converted genome in $CT_dir\\n";\n- my @fasta_files = <*.fa>;\n- my $file_list = join (\',\',@fasta_files);\n- $verbo'..b'te_dir $!\\n";\n- $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\\n";\n- }\n- else{\n- print "\\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indices will be overwritten!\\n\\n";\n- sleep(5);\n- }\n-\n- chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\\n";\n- $CT_dir = "${bisulfite_dir}CT_conversion/";\n- $GA_dir = "${bisulfite_dir}GA_conversion/";\n-\n- # creating 2 subdirectories to store a C->T (forward strand conversion) and a G->A (reverse strand conversion)\n- # converted version of the genome\n- unless (-d $CT_dir){\n- mkdir $CT_dir or die "Unable to create directory $CT_dir $!\\n";\n- $verbose and print "Created Bisulfite Genome folder $CT_dir\\n";\n- }\n- unless (-d $GA_dir){\n- mkdir $GA_dir or die "Unable to create directory $GA_dir $!\\n";\n- $verbose and print "Created Bisulfite Genome folder $GA_dir\\n";\n- }\n-\n- # moving back to the original genome folder\n- chdir $genome_folder or die "Could\'t move to directory $genome_folder $!";\n- # $verbose and print "Moved back to genome folder folder $genome_folder\\n";\n- warn "\\nStep I - Prepare genome folders - completed\\n\\n\\n";\n- return @filenames;\n-}\n-\n-sub print_helpfile{\n- print << \'HOW_TO\';\n-\n-\n-DESCRIPTION\n-\n-This script is supposed to convert a specified reference genome into two different bisulfite\n-converted versions and index them for alignments with Bowtie 1 (default), or Bowtie 2. The first\n-bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs\n-converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference\n-genome folder. Once the bisulfite conversion has been completed the program will fork and launch\n-two simultaneous instances of the Bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware\n-that the indexing process can take up to several hours; this will mainly depend on genome size\n-and system resources.\n-\n-\n-\n-The following is a brief description of command line options and arguments to control the\n-Bismark Genome Preparation:\n-\n-\n-USAGE: bismark_genome_preparation [options] <arguments>\n-\n-\n-OPTIONS:\n-\n---help/--man Displays this help filea and exits.\n-\n---version Displays version information and exits.\n-\n---verbose Print verbose output for more details or debugging.\n-\n---path_to_bowtie </../> The full path to the Bowtie 1 or Bowtie 2 installation on your system\n- (depending on which aligner/indexer you intend to use). Unless this path\n- is specified it is assumed that Bowtie is in the PATH.\n-\n---bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1).\n-\n---single_fasta Instruct the Bismark Indexer to write the converted genomes into\n- single-entry FastA files instead of making one multi-FastA file (MFA)\n- per chromosome. This might be useful if individual bisulfite converted\n- chromosomes are needed (e.g. for debugging), however it can cause a\n- problem with indexing if the number of chromosomes is vast (this is likely\n- to be in the range of several thousand files; the operating system can\n- only handle lists up to a certain length, and some newly assembled\n- genomes may contain 20000-50000 contigs of scaffold files which do exceed\n- this list length limit).\n-\n-\n-ARGUMENTS:\n-\n-<path_to_genome_folder> The path to the folder containing the genome to be bisulfite converted.\n- The Bismark Genome Preparation expects one or more fastA files in the folder\n- (with the file extension: .fa or .fasta). Specifying this path is mandatory.\n-\n-\n-This script was last modified on 19 Sept 2013.\n-HOW_TO\n-}\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_mapping/bismark --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_mapping/bismark Sat May 06 13:18:09 2017 -0400 |
b |
b'@@ -0,0 +1,10220 @@\n+#!/usr/bin/env perl\n+use strict;\n+use warnings;\n+use IO::Handle;\n+use Cwd;\n+$|++;\n+use Getopt::Long;\n+use FindBin qw($Bin);\n+use lib "$Bin/../lib";\n+\n+## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+\n+my $parent_dir = getcwd;\n+my $bismark_version = \'v0.16.3\';\n+my $command_line = join (" ",@ARGV);\n+\n+\n+### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the \'.\' in the option name will cause Getopt::Long to fail\n+foreach my $arg (@ARGV){\n+ if ($arg eq \'--solexa1.3-quals\'){\n+ $arg = \'--phred64-quals\';\n+ }\n+}\n+my @filenames; # will be populated by processing the command line\n+\n+my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag,$basename,$score_min_intercept,$score_min_slope,$bt2_large_index,$multicore,$rg_tag,$rg_id,$rg_sample,$ambig_bam,$cram,$cram_ref,$nucleotide_coverage,$dovetail) = process_command_line();\n+\n+my @fhs; # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment\n+my %chromosomes; # stores the chromosome sequences of the mouse genome\n+my %SQ_order; # stores the order of sequences in the reference. This is to produce SAM/BAM files with a known order of chromosomes\n+my %counting; # counting various events\n+my $final_output_filename; # required for the nucleotide coverage report \n+my @pids; # storing the process IDs of child processes in parallel mode\n+\n+\n+my $seqID_contains_tabs;\n+my $verbose = 0;\n+\n+if ($multicore > 1){\n+ warn "Running Bismark Parallel version. Number of parallel instances to be spawned: $multicore\\n\\n";\n+}\n+\n+\n+sub multi_process_handling{\n+\n+ my $offset = 1;\n+ my $process_id;\n+ if ($multicore > 1){\n+\n+ until ($offset == $multicore){\n+ # warn "multicore: $multicore\\noffset: $offset\\n";\n+ my $fork = fork;\n+\n+ if (defined $fork){\n+\tif ($fork != 0){\n+\t $process_id = $fork;\n+\t push @pids, $process_id;\n+\t if ($offset < $multicore){\n+\t ++$offset;\n+\t # warn "I am the parent process, child pid: $fork\\nIncrementing offset counter to: $offset\\n\\n";\n+\t }\n+\t else{\n+\t # warn "Reached the number of maximum multicores. Proceeeding to processing...\\n";\n+\t }\n+\t}\n+\telsif ($fork == 0){\n+\t # warn "I am a child process, pid: $fork\\nOffset counter is: $offset\\nProceeding to processing...\\n";\n+\t $process_id = $fork;\n+\t last;\n+\t}\n+ }\n+ else{\n+\tdie "Forking unsuccessful. Proceeding using a single thread only\\n";\n+ }\n+ }\n+\n+ # warn "\\nThe Thread Identity\\n===================\\n";\n+ if ($process_id){\n+ # print "I am the parent process. My children are called:\\n";\n+ # print join ("\\t",@pids),"\\n";\n+ # print "I am going to process the following line count: $offset\\n\\n";\n+ }\n+ elsif($process_id == 0){\n+ # warn "I am a child process: Process ID: $process_id\\n";\n+ # warn "I am going to process the following line count: $offset\\n\\n";\n+ }\n+ else{\n+ die "Process ID was: \'$process_id\'\\n";\n+'..b" a function of read length. For instance, specifying\n+ L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.\n+ See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is\n+ L,0,-0.2.\n+\n+--rdg <int1>,<int2> Sets the read gap open (<int1>) and extend (<int2>) penalties. A read gap of length N gets a penalty\n+ of <int1> + N * <int2>. Default: 5, 3.\n+\n+--rfg <int1>,<int2> Sets the reference gap open (<int1>) and extend (<int2>) penalties. A reference gap of length N gets\n+ a penalty of <int1> + N * <int2>. Default: 5, 3.\n+\n+\n+Bowtie 2 Reporting options:\n+\n+-most_valid_alignments <int> This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is\n+ deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the\n+ default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the\n+ effort expended to find valid alignments.\n+\n+ For reference, this used to be the old (now deprecated) description of -M:\n+ Bowtie 2 searches for at most <int>+1 distinct, valid alignments for each read. The search terminates when it\n+ can't find more distinct valid alignments, or when it finds <int>+1 distinct alignments, whichever\n+ happens first. Only the best alignment is reported. Information from the other alignments is used to\n+ estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes \n+ Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that\n+ aligns many places. For reads that have more than <int>+1 distinct, valid alignments, Bowtie 2 does not\n+ guarantee that the alignment reported is the best possible in terms of alignment score. -M is\n+ always used and its default value is set to 10.\n+\n+\n+'VANILLA' Bismark OUTPUT:\n+\n+Single-end output format (tab-separated):\n+\n+ (1) <seq-ID>\n+ (2) <read alignment strand>\n+ (3) <chromosome>\n+ (4) <start position>\n+ (5) <end position>\n+ (6) <observed bisulfite sequence>\n+ (7) <equivalent genomic sequence>\n+ (8) <methylation call>\n+ (9) <read conversion\n+(10) <genome conversion>\n+(11) <read quality score (Phred33)>\n+\n+\n+Paired-end output format (tab-separated):\n+ (1) <seq-ID>\n+ (2) <read 1 alignment strand>\n+ (3) <chromosome>\n+ (4) <start position>\n+ (5) <end position>\n+ (6) <observed bisulfite sequence 1>\n+ (7) <equivalent genomic sequence 1>\n+ (8) <methylation call 1>\n+ (9) <observed bisulfite sequence 2>\n+(10) <equivalent genomic sequence 2>\n+(11) <methylation call 2>\n+(12) <read 1 conversion\n+(13) <genome conversion>\n+(14) <read 1 quality score (Phred33)>\n+(15) <read 2 quality score (Phred33)>\n+\n+\n+Bismark SAM OUTPUT (default):\n+\n+ (1) QNAME (seq-ID)\n+ (2) FLAG (this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!))\n+ (3) RNAME (chromosome)\n+ (4) POS (start position)\n+ (5) MAPQ (always 255 for use with Bowtie)\n+ (6) CIGAR\n+ (7) RNEXT\n+ (8) PNEXT\n+ (9) TLEN\n+(10) SEQ\n+(11) QUAL (Phred33 scale)\n+(12) NM-tag (edit distance to the reference)\n+(13) MD-tag (base-by-base mismatches to the reference (handles indels)\n+(14) XM-tag (methylation call string)\n+(15) XR-tag (read conversion state for the alignment)\n+(16) XG-tag (genome conversion state for the alignment)\n+(17) XA/XB-tag (non-bisulfite mismatches) (optional!)\n+\n+Each read of paired-end alignments is written out in a separate line in the above format.\n+\n+\n+Last edited on 25 July 2016\n+HOW_TO\n+}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_mapping/bismark_bowtie2_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_mapping/bismark_bowtie2_wrapper.xml Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,561 @@\n+<tool id="bismark_bowtie2" name="Bismark Mapper" version="0.16.3">\n+ \n+ <description>Bisulfite reads mapper</description>\n+ <!--<version_command>bismark version</version_command>-->\n+ <requirements>\n+ <requirement type="package" version="0.1.19">samtools</requirement>\n+ <requirement type="package" version="2.1.0">bowtie2</requirement>\n+ </requirements>\n+ <stdio>\n+ <exit_code range="1:" />\n+ <exit_code range=":-1" />\n+ <regex match="Error:" />\n+ <regex match="Exception:" />\n+ </stdio>\n+ <command interpreter="python">\n+<![CDATA[\n+ bismark_wrapper.py\n+\n+ ## Change this to accommodate the number of threads you have available.\n+ --num-threads "\\${GALAXY_SLOTS:-24}"\n+\n+ ##--bismark_path \\$SCRIPT_PATH\n+ --bismark_path "$__tool_directory__"\n+\n+ --bowtie2\n+\n+ ##\n+ ## Bismark Genome Preparation, if desired.\n+ ##\n+\n+ ## Handle reference file.\n+ #if $refGenomeSource.genomeSource == "built_in_fasta":\n+ --own-file="${refGenomeSource.built_in_fasta.fields.path}"\n+ #else if $refGenomeSource.genomeSource == "own_fasta":\n+ --own-file="$refGenomeSource[\'own_fasta\']"\n+ #else:\n+ --indexes-path "${refGenomeSource.built_in_indexes.fields.path}"\n+ #end if\n+\n+ ##\n+ ## Input parameters\n+ ##\n+\n+ #if $singlePaired.sPaired == "single":\n+ --single-paired $singlePaired.input_singles\n+\n+ #if $singlePaired.input_singles.ext == "fastqillumina":\n+ --phred64-quals\n+ --fastq\n+ #elif $singlePaired.input_singles.ext == "fastqsanger":\n+ --fastq\n+ #elif $singlePaired.input_singles.ext == "fasta":\n+ --fasta\n+ #end if\n+ #else:\n+ --mate-paired\n+ #set $mate1 = list()\n+ #set $mate2 = list()\n+ #for $mate_pair in $singlePaired.mate_list\n+ $mate1.append( str($mate_pair.input_mate1) )\n+ $mate2.append( str($mate_pair.input_mate2) )\n+ #end for\n+\n+ --mate1 #echo \',\'.join($mate1)\n+ --mate2 #echo \',\'.join($mate2)\n+\n+ #for $mate_pair in $singlePaired.mate_list:\n+ #if $mate_pair.input_mate1.ext == "fastqillumina":\n+ --phred64-quals\n+ --fastq\n+ #elif $mate_pair.input_mate1.ext == "fastqsanger":\n+ --fastq\n+ #elif $mate_pair.input_mate1.ext == "fasta":\n+ --fasta\n+ #end if\n+ #break\n+ #end for\n+\n+ -I $singlePaired.minInsert\n+ -X $singlePaired.maxInsert\n+ #end if\n+\n+ #if $sort_bam:\n+ --sort-bam\n+ #end if\n+\n+ ## for now hardcode the value for the required memory per thread in --best mode\n+ --chunkmbs 512\n+\n+\n+ #if $params.settingsType == "custom":\n+\n+ ## default 20\n+ --seed-len $params.seed_len\n+ ## default 0\n+ --seed-mismatches $params.seed_mismatches\n+ ## default 15\n+ --seed-extention-attempts $params.seed_extention_attempts\n+ ## default 2\n+ --max-reseed $params.max_reseed\n+\n+ ## default 70\n+ ##--maqerr $params.maqerr\n+\n+ ## default unlimited\n+ #if $params.qupto != 0:\n+ --qupto $params.qupto\n+ #end if\n+ #if $params.skip_reads != 0:\n+ --skip-reads $params.skip_reads\n+ #end if\n+\n+ ## if set, disable the original behaviour\n+ $params.no_mixed\n+ ## if set, disable the original behaviour\n+ $params.no_discordant\n+\n+ #if $params.bismark_stdout:\n+ --stdout "$output_stdout"\n+ #end if\n+\n+ #if $params.isRepor'..b'gth of the seed substrings to align during multiseed alignment. Smaller values make alignment slower but more senstive. Default: the --sensitive preset of Bowtie 2 is used by default, which sets -L to 20. This option is only available for Bowtie 2 (for Bowtie 1 see -l).\n+ |\n+ | Bowtie 2 Specific option *-L <INT>*\n+\n+\n+* **How many consecutive seed extension attempts can fail before Bowtie 2 moves on**\n+\n+ | Up to <int> consecutive seed extension attempts can "fail" before Bowtie 2 moves on, using the alignments found so far. A seed extension "fails" if it does not yield a new best or a new second-best alignment. Default: 15.\n+ |\n+ | Bowtie 2 Effort option *-D <INT>*\n+\n+* **Maximum number of times Bowtie 2 will re-seed reads with repetitive seeds**\n+\n+ | <int> is the maximum number of times Bowtie 2 will "re-seed" reads with repetitive seeds. When "re-seeding," Bowtie 2 simply chooses a new set of reads (same length, same number of mismatches allowed) at different offsets and searches for more alignments. A read is considered to have repetitive seeds if the total number of seed hits divided by the number of seeds that aligned at least once is greater than 300. Default: 2.\n+ |\n+ | Bowtie 2 Effort option *-R <INT>*\n+\n+* **Only aligns the first N reads or read pairs from the input**\n+\n+ | Only aligns the first <int> reads or read pairs from the input. Default: no limit.\n+ |\n+ | Input option *-u/--upto <INT>*\n+\n+* **Skip (i.e. do not align) the first N reads or read pairs from the input**\n+\n+ | Input option *-s/--skip*\n+\n+* **Disable looking for discordant alignments if it cannot find any concordant alignments**\n+\n+ | Normally, Bowtie 2 looks for discordant alignments if it cannot find any concordant alignments. A discordant alignment is an alignment where both mates align uniquely, but that does not satisfy the paired-end constraints (--fr/--rf/--ff, -I, -X). This option disables that behavior and it is on by default.\n+ |\n+ | Bowtie 2 Paired-End option *--no-discordant*\n+\n+* **Disable Bowtie 2\'s behaviour to try to find alignments for the individual mates**\n+\n+ | This option disables Bowtie 2\'s behavior to try to find alignments for the individual mates if it cannot find a concordant or discordant alignment for a pair. This option is invariable and on by default.\n+ |\n+ | Bowtie 2 Paired-End option *--no-mixed*\n+\n+* **Write ambiguous reads to an extra output file**\n+\n+ | Write all reads which produce more than one valid alignment with the same number of lowest mismatches or other reads that fail to align uniquely to a file in the output directory. Written reads will appear as they did in the input, without any of the translation of quality values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1 and _2 inserted in theit filenames, i.e. _ambiguous_reads_1.txt and _ambiguous_reads_2.txt. These reads are not written to the file specified with --un.\n+ |\n+ | Output option *--ambiguous*\n+\n+* **Write all reads that could not be aligned to a file**\n+\n+ | Write all reads that could not be aligned to a file in the output directory. Written reads will appear as they did in the input, without any translation of quality values that may have taken place within Bowtie or Bismark. Paired-end reads will be written to two parallel files with _1 and _2 inserted in their filenames, i.e. _unmapped_reads_1.txt and unmapped_reads_2.txt. Reads with more than one valid alignment with the same number of lowest mismatches (ambiguous mapping) are also written to _unmapped_reads.txt unless the option --ambiguous is specified as well.\n+ |\n+ | Output option *-un/--unmapped*\n+\n+* **Offer all report files concatenated in one file**\n+\n+ | Prints out a Bismark mapping report\n+\n+]]>\n+ </help>\n+ <citations>\n+ <citation type="doi">10.1093/bioinformatics/btr167</citation>\n+ </citations>\n+</tool>\n\\ No newline at end of file\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_mapping/bismark_genome_preparation --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_mapping/bismark_genome_preparation Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,632 @@\n+#!/usr/bin/env perl\n+use strict;\n+use warnings;\n+use Cwd;\n+use Getopt::Long;\n+$|++;\n+\n+\n+## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+my $verbose;\n+my $help;\n+my $version;\n+my $man;\n+my $path_to_bowtie;\n+my $multi_fasta;\n+my $single_fasta;\n+my $bowtie2;\n+my $bowtie1;\n+my $parent_dir = getcwd();\n+\n+my $genomic_composition;\n+my %genomic_freqs; # storing the genomic sequence composition\n+my %freqs;\n+\n+my $bismark_version = \'v0.16.3\';\n+\n+GetOptions (\'verbose\' => \\$verbose,\n+\t \'help\' => \\$help,\n+\t \'man\' => \\$man,\n+\t \'version\' => \\$version,\n+\t \'path_to_bowtie:s\' => \\$path_to_bowtie,\n+\t \'single_fasta\' => \\$single_fasta,\n+\t \'bowtie2\' => \\$bowtie2,\n+\t \'bowtie1\' => \\$bowtie1,\n+\t \'genomic_composition\' => \\$genomic_composition,\n+\t );\n+\n+if ($help or $man){\n+ print_helpfile();\n+ exit;\n+}\n+\n+if ($version){\n+ print << "VERSION";\n+\n+ Bismark - Bisulfite Mapper and Methylation Caller.\n+\n+ Bismark Genome Preparation Version: $bismark_version\n+ Copyright 2010-16 Felix Krueger, Babraham Bioinformatics\n+ www.bioinformatics.babraham.ac.uk/projects/\n+\n+VERSION\n+ exit;\n+}\n+\n+my $genome_folder = shift @ARGV; # mandatory\n+my %chromosomes; # checking if chromosome names are unique (required)\n+\n+# Ensuring a genome folder has been specified\n+if ($genome_folder){\n+ unless ($genome_folder =~ /\\/$/){\n+\t$genome_folder =~ s/$/\\//;\n+ }\n+ $verbose and print "Path to genome folder specified as: $genome_folder\\n";\n+ chdir $genome_folder or die "Could\'t move to directory $genome_folder. Make sure the directory exists! $!";\n+ \n+ # making the genome folder path abolsolute so it won\'t break if the path was specified relative\n+ $genome_folder = getcwd();\n+ unless ($genome_folder =~ /\\/$/){\n+\t$genome_folder =~ s/$/\\//;\n+ }\n+}\n+else{\n+ die "Please specify a genome folder to be used for bisulfite conversion\\n\\n";\n+}\n+\n+\n+my $CT_dir;\n+my $GA_dir;\n+\n+if ($bowtie1){\n+ if ($bowtie2){\n+ die "You may not select both --bowtie1 and --bowtie2. Make your pick! (default is Bowtie2)\\n";\n+ }\n+ $bowtie2 = 0;\n+ $verbose and print "Aligner to be used: Bowtie (1)\\n";\n+}\n+else{ # Bowtie 2 is now the default mode (as of 27 July 2015)\n+ if ($bowtie2){\n+ $verbose and print "Aligner to be used: Bowtie 2 (user-defined)\\n";\n+ }\n+ else{\n+ $verbose and print "Aligner to be used: Bowtie 2 (default)\\n";\n+ }\n+ $bowtie2 = 1;\n+}\n+\n+if ($single_fasta){\n+ warn "Writing individual genomes out into single-entry fasta files (one per chromosome)\\n\\n";\n+ $multi_fasta = 0;\n+}\n+else{\n+ warn "Writing bisulfite genomes out into a single MFA (multi FastA) file\\n\\n";\n+ $single_fasta = 0;\n+ $multi_fasta = 1;\n+}\n+\n+my @filenames = create_bisulfite_genome_folders();\n+\n+if ($genomic_composition){\n+ get_genomic_frequencies();\n+ warn "Finished processing genomic nucleotide frequencies\\n\\n";\n+ %chromosomes = (); # resetting\n+}\n+\n+process_sequence_files ();\n+\n+launch_bowtie_indexer();\n+\n+sub launch_bowtie_indexer{\n+ if ($bowtie2){\n+ warn "Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer\\n";\n+ }\n+ else{\n+ warn "Bismark Genome Preparation - Step III: Launching the Bowtie (1) indexer\\n";\n+ }\n+ print "Please be aware that this process can - depending on genome'..b'etting new chromosome name\n+\t$chromosome_name = extract_chromosome_name($_);\n+ }\n+ else{\n+\t$sequence .= uc$_;\n+ }\n+ }\n+\n+ if (exists $chromosomes{$chromosome_name}){\n+ warn "chr $chromosome_name (",length $sequence ," bp)\\t";\n+ die "Exiting because chromosome name already exists. Please make sure all chromosomes have a unique name.\\n";\n+ }\n+ else{\n+ if (length($sequence) == 0){\n+\twarn "Chromosome $chromosome_name in the file $chromosome_filename did not contain any sequence information!\\n";\n+ }\n+ warn "chr $chromosome_name (",length $sequence ," bp)\\n";\n+ $chromosomes{$chromosome_name} = $sequence;\n+ }\n+ }\n+ warn "\\n";\n+ chdir $parent_dir or die "Failed to move to directory $parent_dir\\n";\n+}\n+\n+\n+\n+\n+sub print_helpfile{\n+ print << \'HOW_TO\';\n+\n+\n+DESCRIPTION\n+\n+This script is supposed to convert a specified reference genome into two different bisulfite\n+converted versions and index them for alignments with Bowtie 2 (default), or Bowtie 1. The first\n+bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs\n+converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference\n+genome folder. Once the bisulfite conversion has been completed the program will fork and launch\n+two simultaneous instances of the Bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware\n+that the indexing process can take up to several hours; this will mainly depend on genome size\n+and system resources.\n+\n+\n+\n+The following is a brief description of command line options and arguments to control the\n+Bismark Genome Preparation:\n+\n+\n+USAGE: bismark_genome_preparation [options] <arguments>\n+\n+\n+OPTIONS:\n+\n+--help/--man Displays this help filea and exits.\n+\n+--version Displays version information and exits.\n+\n+--verbose Print verbose output for more details or debugging.\n+\n+--path_to_bowtie </../> The full path to the Bowtie 1 or Bowtie 2 installation on your system\n+ (depending on which aligner/indexer you intend to use). Unless this path\n+ is specified it is assumed that Bowtie is in the PATH.\n+\n+--bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: ON).\n+\n+--bowtie1 This will create bisulfite indexes for Bowtie 1. (Default: OFF).\n+\n+--single_fasta Instruct the Bismark Indexer to write the converted genomes into\n+ single-entry FastA files instead of making one multi-FastA file (MFA)\n+ per chromosome. This might be useful if individual bisulfite converted\n+ chromosomes are needed (e.g. for debugging), however it can cause a\n+ problem with indexing if the number of chromosomes is vast (this is likely\n+ to be in the range of several thousand files; the operating system can\n+ only handle lists up to a certain length, and some newly assembled\n+ genomes may contain 20000-500000 contigs of scaffold files which do exceed\n+ this list length limit).\n+\n+--genomic_composition Calculate and extract the genomic sequence composition for mono and di-nucleotides\n+ and write the genomic composition table \'genomic_nucleotide_frequencies.txt\' to the\n+ genome folder. This may be useful later on when using bam2nuc or the Bismark option\n+ --nucleotide_coverage.\n+\n+\n+ARGUMENTS:\n+\n+<path_to_genome_folder> The path to the folder containing the genome to be bisulfite converted.\n+ The Bismark Genome Preparation expects one or more fastA files in the folder\n+ (with the file extension: .fa or .fasta). Specifying this path is mandatory.\n+\n+\n+This script was last modified on 07 July 2016.\n+HOW_TO\n+}\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_mapping/bismark_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_mapping/bismark_wrapper.py Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,379 @@\n+#!/usr/bin/env python\n+\n+import argparse\n+import os\n+import shutil\n+import subprocess\n+import sys\n+import shlex\n+import tempfile\n+import fileinput\n+import fileinput\n+from glob import glob\n+\n+def stop_err( msg ):\n+ sys.stderr.write( "%s\\n" % msg )\n+ sys.exit()\n+\n+def __main__():\n+\n+ #Parse Command Line\n+ parser = argparse.ArgumentParser(description=\'Wrapper for the bismark bisulfite mapper.\')\n+ parser.add_argument( \'-p\', \'--num-threads\', dest=\'num_threads\',\n+ type=int, default=4, help=\'Use this many threads to align reads. The default is 4.\' )\n+\n+ parser.add_argument( \'--bismark_path\', dest=\'bismark_path\', help=\'Path to the bismark perl scripts\' )\n+\n+ parser.add_argument( \'--bowtie2\', action=\'store_true\', default=False, help=\'Running bismark with bowtie2 and not with bowtie.\' )\n+\n+ # input options\n+ parser.add_argument( \'--own-file\', dest=\'own_file\', help=\'\' )\n+ parser.add_argument( \'-D\', \'--indexes-path\', dest=\'index_path\', help=\'Indexes directory; location of .ebwt and .fa files.\' )\n+ parser.add_argument( \'-O\', \'--output\', dest=\'output\' )\n+\n+\n+ parser.add_argument( \'--output-report-file\', dest=\'output_report_file\' )\n+ parser.add_argument( \'--suppress-header\', dest=\'suppress_header\', action="store_true" )\n+\n+ parser.add_argument( \'--mate-paired\', dest=\'mate_paired\', action=\'store_true\', help=\'Reads are mate-paired\', default=False)\n+\n+\n+ parser.add_argument( \'-1\', \'--mate1\', dest=\'mate1\',\n+ help=\'The forward reads file in Sanger FASTQ or FASTA format.\' )\n+ parser.add_argument( \'-2\', \'--mate2\', dest=\'mate2\',\n+ help=\'The reverse reads file in Sanger FASTQ or FASTA format.\' )\n+ parser.add_argument( \'--sort-bam\', dest=\'sort_bam\', action="store_true" )\n+\n+ parser.add_argument( \'--output-unmapped-reads\', dest=\'output_unmapped_reads\',\n+ help=\'Additional output file with unmapped reads (single-end).\' )\n+ parser.add_argument( \'--output-unmapped-reads-l\', dest=\'output_unmapped_reads_l\',\n+ help=\'File name for unmapped reads (left, paired-end).\' )\n+ parser.add_argument( \'--output-unmapped-reads-r\', dest=\'output_unmapped_reads_r\',\n+ help=\'File name for unmapped reads (right, paired-end).\' )\n+\n+\n+ parser.add_argument( \'--output-suppressed-reads\', dest=\'output_suppressed_reads\',\n+ help=\'Additional output file with suppressed reads (single-end).\' )\n+ parser.add_argument( \'--output-suppressed-reads-l\', dest=\'output_suppressed_reads_l\',\n+ help=\'File name for suppressed reads (left, paired-end).\' )\n+ parser.add_argument( \'--output-suppressed-reads-r\', dest=\'output_suppressed_reads_r\',\n+ help=\'File name for suppressed reads (right, paired-end).\' )\n+ parser.add_argument( \'--stdout\', dest=\'output_stdout\',\n+ help=\'File name for the standard output of bismark.\' )\n+\n+\n+ parser.add_argument( \'--single-paired\', dest=\'single_paired\',\n+ help=\'The single-end reads file in Sanger FASTQ or FASTA format.\' )\n+\n+ parser.add_argument( \'--fastq\', action=\'store_true\', help=\'Query filetype is in FASTQ format\')\n+ parser.add_argument( \'--fasta\', action=\'store_true\', help=\'Query filetype is in FASTA format\')\n+ parser.add_argument( \'--phred64-quals\', dest=\'phred64\', action="store_true" )\n+\n+\n+ parser.add_argument( \'--skip-reads\', dest=\'skip_reads\', type=int )\n+ parser.add_argument( \'--qupto\', type=int)\n+\n+\n+ # paired end options\n+ parser.add_argument( \'-I\', \'--minins\', dest=\'min_insert\' )\n+ parser.add_argument( \'-X\', \'--maxins\', dest=\'max_insert\' )\n+ parser.add_argument( \'--no-mixed\', dest=\'no_mixed\', action="store_true" )\n+ parser.add_argument( \'--no-discordant\', dest=\'no_discordant\', action="store_true" )\n+\n+ #parse general options\n+ # default 20\n+ parser.add_argument( \'--seed-len\', dest=\'seed_len\', type=int)\n+ # default 15\n+ parser.add_argument( \'--seed-extention-attempts\', dest=\'seed_extention_attempts\', type=int )\n+ # default 0\n+ parser.add_argu'..b' tmp_stderr.close()\n+\n+ # TODO: look for errors in program output.\n+ except Exception, e:\n+ stop_err( \'Error in bismark:\\n\' + str( e ) )\n+\n+ # collect and copy output files\n+ if args.output_report_file:\n+ output_report_file = open(args.output_report_file, \'w+\')\n+ for line in fileinput.input(glob( os.path.join( output_dir, \'*report.txt\') )):\n+ output_report_file.write(line)\n+ output_report_file.close()\n+\n+\n+ if args.output_suppressed_reads:\n+ if glob(os.path.join( output_dir, \'*ambiguous_reads.txt\')):\n+ shutil.move( glob(os.path.join( output_dir, \'*ambiguous_reads.txt\'))[0], args.output_suppressed_reads )\n+ if args.output_suppressed_reads_l:\n+ if glob(os.path.join(output_dir, \'*ambiguous_reads_1.txt\')):\n+ shutil.move( glob(os.path.join( output_dir, \'*ambiguous_reads_1.txt\'))[0], args.output_suppressed_reads_l )\n+ if args.output_suppressed_reads_r:\n+ if glob(os.path.join(output_dir, \'*ambiguous_reads_2.txt\')):\n+ shutil.move( glob(os.path.join( output_dir, \'*ambiguous_reads_2.txt\'))[0], args.output_suppressed_reads_r )\n+\n+ if args.output_unmapped_reads:\n+ if glob(os.path.join(output_dir, \'*unmapped_reads.txt\')):\n+ shutil.move( glob(os.path.join( output_dir, \'*unmapped_reads.txt\'))[0], args.output_unmapped_reads )\n+ if args.output_unmapped_reads_l:\n+ if glob(os.path.join(output_dir, \'*unmapped_reads_1.txt\')):\n+ shutil.move( glob(os.path.join( output_dir, \'*unmapped_reads_1.txt\'))[0], args.output_unmapped_reads_l )\n+ if args.output_unmapped_reads_r:\n+ if glob(os.path.join(output_dir, \'*unmapped_reads_2.txt\')):\n+ shutil.move( glob(os.path.join( output_dir, \'*unmapped_reads_2.txt\'))[0], args.output_unmapped_reads_r )\n+\n+ try:\n+ """\n+ merge all bam files\n+ """\n+ #tmp_out = tempfile.NamedTemporaryFile( dir=output_dir ).name\n+ tmp_stdout = open( tmp_out, \'wab\' )\n+ #tmp_err = tempfile.NamedTemporaryFile( dir=output_dir ).name\n+ tmp_stderr = open( tmp_err, \'wab\' )\n+\n+ tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name\n+\n+ bam_files = glob( os.path.join( output_dir, \'*.bam\') )\n+ if len( bam_files ) > 1:\n+ cmd = \'samtools merge -@ %s -f %s %s \' % ( args.num_threads, tmp_res, \' \'.join( bam_files ) )\n+\n+ proc = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE )\n+\n+ returncode = proc.wait()\n+ tmp_stdout.close()\n+ tmp_stderr.close()\n+ if returncode != 0:\n+ raise Exception, open( tmp_stderr.name ).read()\n+ else:\n+ tmp_res = bam_files[0]\n+\n+ bam_path = "%s" % tmp_res\n+\n+ if os.path.exists( bam_path ):\n+ if args.sort_bam:\n+ cmd = \'samtools sort -@ %s %s sorted_bam\' % (args.num_threads, bam_path)\n+ proc = subprocess.Popen( args=shlex.split( cmd ) )\n+ returncode = proc.wait()\n+ if returncode != 0:\n+ raise Exception("Error during \'%s\'" % cmd)\n+ shutil.move( \'sorted_bam.bam\', args.output )\n+ else:\n+ shutil.move( bam_path, args.output )\n+ else:\n+ stop_err( \'BAM file no found:\\n\' + str( bam_path ) )\n+\n+\n+ # TODO: look for errors in program output.\n+ except Exception, e:\n+ stop_err( \'Error in merging bam files:\\n\' + str( e ) )\n+\n+\n+ if args.output_stdout:\n+ # copy the temporary saved stdout from bismark\n+ shutil.move( tmp_out, args.output_stdout )\n+\n+ # Clean up temp dirs\n+ if args.own_file:\n+ if os.path.exists( tmp_index_dir ):\n+ shutil.rmtree( tmp_index_dir )\n+ if os.path.exists( tmp_bismark_dir ):\n+ shutil.rmtree( tmp_bismark_dir )\n+ if os.path.exists( output_dir ):\n+ shutil.rmtree( output_dir )\n+\n+if __name__=="__main__": __main__()\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_methyl_extractor/bismark2bedGraph --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_methyl_extractor/bismark2bedGraph Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,830 @@\n+#!/usr/bin/env perl\n+use warnings;\n+use strict;\n+$|++;\n+use Getopt::Long;\n+use Cwd;\n+use Carp;\n+\n+## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+my $bismark2bedGraph_version = \'v0.16.3\';\n+\n+my @bedfiles;\n+my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total\n+my @sorting_files;\n+\n+my ($bedGraph_output,$parent_dir,$output_dir,$remove,$CX_context,$no_header,$sort_size,$coverage_threshold,$counts,$gazillion,$ample_mem,$zero,$input_dir) = process_commandline();\n+\n+warn "Using these input files: @sorting_files\\n";\n+warn "\\nSummary of parameters for bismark2bedGraph conversion:\\n";\n+warn \'=\'x54,"\\n";\n+warn "bedGraph output:\\t\\t$bedGraph_output\\n";\n+warn "output directory:\\t\\t>$output_dir<\\n";\n+if ($remove){\n+ warn "remove whitespaces:\\t\\tyes\\n";\n+}\n+else{\n+ warn "remove whitespaces:\\t\\tno\\n";\n+}\n+if ($CX_context){\n+ warn "CX context:\\t\\t\\tyes\\n";\n+}\n+else{\n+ warn "CX context:\\t\\t\\tno (CpG context only, default)\\n";\n+}\n+if ($no_header){\n+ warn "No-header selected:\\t\\tyes\\n";\n+}\n+else{\n+ warn "No-header selected:\\t\\tno\\n";\n+}\n+\n+if ($ample_mem){\n+ warn "Sorting method:\\t\\t\\tArray-based (faster, but larger memory footprint)\\n";\n+}\n+else{\n+ warn "Sorting method:\\t\\t\\tUnix sort-based (smaller memory footprint, but slower)\\n";\n+}\n+unless($ample_mem){\n+ warn "Sort buffer size:\\t\\t$sort_size\\n";\n+}\n+warn "Coverage threshold:\\t\\t$coverage_threshold\\n";\n+\n+\n+warn "="x77,"\\n";\n+warn "Methylation information will now be written into a bedGraph and coverage file\\n";\n+warn "="x77,"\\n\\n";\n+sleep (2);\n+\n+### deciding which files to use for bedGraph conversion\n+foreach my $filename (@sorting_files){\n+ \n+ ### DETERMINING THE FULL PATH OF INPUT FILES\n+ if ($filename =~ /^(.*\\/)(.*)$/){ # if files are in a different output folder we extract the filename again\n+\t# warn "folder name: $1\\nfilename: $2\\n\\n";\n+\tchdir $1 or die "Failed to change directory to $1\\n"; # $1 might be a relative path \n+\t$input_dir = getcwd(); # this will always be the full path\n+\t$filename = $2;\n+\t# warn "Full Input folder: $input_dir\\nFilename: $filename\\n\\n"; sleep (1);\n+\tchdir $parent_dir or die "Failed to move back to the parent directory\\n\\n"; # moving back\n+ }\n+ else{\n+\t$input_dir = $parent_dir;\n+ } \n+ $input_dir .= \'/\';\n+ \n+ if ($CX_context){\n+\t# push @bedfiles,$output_dir.$filename;\n+ \tpush @bedfiles,$input_dir.$filename;\n+ }\n+ else{ ## CpG context only (default)\n+\tif ($filename =~ /^CpG/){ # only testing the actual filename without the path information\n+\t push @bedfiles,$input_dir.$filename; # we are adding the full path to the filename\n+\t}\n+\telse{\n+\t # skipping CHH or CHG files\n+\t}\n+ }\n+}\n+\n+if (@bedfiles){\n+ warn "Using the following files as Input:\\n";\n+ print join ("\\t",@bedfiles),"\\n\\n";\n+ sleep (2);\n+}\n+else{\n+ die "It seems that you are trying to generate bedGraph files for files not starting with CpG.... Please specify the option \'--CX\' and try again\\n\\n";\n+}\n+\n+open (OUT,"| gzip -c - > ${output_dir}${bedGraph_output}") or die "Problems with the bedGraph output filename detected: file path: \'$output_dir\'\\tfile name: \'$bedGraph_output\' $!\\n";\n+warn "Writing bedGraph to file: $bedGraph_output\\n";\n+print OUT "track type=b'..b"nd may take a long time to sort (up to many hours). Default: OFF.\n+ (i.e. Default = CpG context only).\n+\n+--buffer_size <string> This allows you to specify the main memory sort buffer when sorting the methylation information.\n+ Either specify a percentage of physical memory by appending % (e.g. --buffer_size 50%) or\n+\t\t\t a multiple of 1024 bytes, e.g. 'K' multiplies by 1024, 'M' by 1048576 and so on for 'T' etc.\n+ (e.g. --buffer_size 20G). For more information on sort type 'info sort' on a command line.\n+ Defaults to 2G.\n+\n+--scaffolds/--gazillion Users working with unfinished genomes sporting tens or even hundreds of thousands of\n+ scaffolds/contigs/chromosomes frequently encountered errors with pre-sorting reads to \n+ individual chromosome files. These errors were caused by the operating system's limit\n+ of the number of filehandle that can be written to at any one time (typically 1024; to\n+ find out this limit on Linux, type: ulimit -a).\n+ To bypass the limitation of open filehandles, the option --scaffolds does not pre-sort\n+ methylation calls into individual chromosome files. Instead, all input files are\n+ temporarily merged into a single file (unless there is only a single file), and this\n+ file will then be sorted by both chromosome AND position using the Unix sort command.\n+ Please be aware that this option might take a looooong time to complete, depending on \n+ the size of the input files, and the memory you allocate to this process (see --buffer_size).\n+ Nevertheless, it seems to be working.\n+\n+--ample_memory Using this option will not sort chromosomal positions using the UNIX 'sort' command, but will\n+ instead use two arrays to sort methylated and unmethylated calls, respectively. This may result\n+ in a faster sorting process for very large files, but this comes at the cost of a larger memory\n+ footprint (as an estimate, two arrays of the length of (the largest) human chromosome 1 (nearly\n+ 250 million bp) temporarily consume around 16GB of RAM). Note however that due to the overheads\n+ of creating and looping through arrays this option might in fact be *slower* for small-ish\n+ files (up to a few million alignments). Note also that this option is not currently compatible\n+ with options '--scaffolds/--gazillion'.\n+\n+--zero_based Write out an additional coverage file (ending in .zero.cov) that uses 0-based genomic start\n+ and 1-based genomic end coordinates (zero-based, half-open), like used in the bedGraph file,\n+ instead of using 1-based coordinates throughout. Default: OFF.\n+\n+\n+\n+The bedGraph output looks like this (tab-delimited; 0-based start coords, 1-based end coords):\n+==============================================================================================\n+\n+track type=bedGraph (header line)\n+\n+<chromosome> <start position> <end position> <methylation percentage>\n+\n+\n+\n+The coverage output looks like this (tab-delimited, 1-based genomic coords; optional zero-based, half-open coords with '--zero_based'):\n+=======================================================================================================================================\n+\n+<chromosome> <start position> <end position> <methylation percentage> <count methylated> <count non-methylated>\n+\n+\n+ Script last modified: 09 December 2015\n+\n+EOF\n+ ;\n+ exit 1;\n+}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_methyl_extractor/bismark_methylation_extractor --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_methyl_extractor/bismark_methylation_extractor Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,5907 @@\n+#!/usr/bin/env perl\n+use warnings;\n+use strict;\n+$|++;\n+use Getopt::Long;\n+use Cwd;\n+use Carp;\n+use FindBin qw($Bin);\n+use lib "$Bin/../lib";\n+\n+## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+my @filenames; # input files\n+my %counting;\n+my $parent_dir = getcwd();\n+\n+my %fhs;\n+\n+my $version = \'v0.16.3\';\n+my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_off,$mbias_only,$gazillion,$ample_mem,$ignore_3prime,$ignore_3prime_r2,$multicore) = process_commandline();\n+\n+\n+### only needed for bedGraph output\n+my @sorting_files; # if files are to be written to bedGraph format, these are the methylation extractor output files\n+my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total\n+my @bedfiles;\n+\n+### only needed for genome-wide cytosine methylation report\n+my %chromosomes;\n+\n+my %mbias_1;\n+my %mbias_2;\n+\n+\n+##############################################################################################\n+### Summarising Run Parameters\n+##############################################################################################\n+\n+### METHYLATION EXTRACTOR\n+\n+warn "Summarising Bismark methylation extractor parameters:\\n";\n+warn \'=\'x63,"\\n";\n+\n+if ($single){\n+ if ($vanilla){\n+ warn "Bismark single-end vanilla format specified\\n";\n+ }\n+ else{\n+ warn "Bismark single-end SAM format specified (default)\\n"; # default\n+ }\n+}\n+elsif ($paired){\n+ if ($vanilla){\n+ warn "Bismark paired-end vanilla format specified\\n";\n+ }\n+ else{\n+ warn "Bismark paired-end SAM format specified (default)\\n"; # default\n+ }\n+}\n+\n+warn "Number of cores to be used: $multicore\\n";\n+\n+if ($single){\n+ if ($ignore){\n+ warn "First $ignore bp will be disregarded when processing the methylation call string\\n";\n+ }\n+ if ($ignore_3prime){\n+ warn "Last $ignore_3prime bp will be disregarded when processing the methylation call string\\n";\n+ }\n+\n+}\n+else{ ## paired-end\n+ if ($ignore){\n+ warn "First $ignore bp will be disregarded when processing the methylation call string of Read 1\\n";\n+ }\n+ if ($ignore_r2){\n+ warn "First $ignore_r2 bp will be disregarded when processing the methylation call string of Read 2\\n";\n+ }\n+\n+ if ($ignore_3prime){\n+ warn "Last $ignore_3prime bp will be disregarded when processing the methylation call string of Read 1\\n";\n+ }\n+ if ($ignore_3prime_r2){\n+ warn "Last $ignore_3prime_r2 bp will be disregarded when processing the methylation call string of Read 2\\n";\n+ }\n+\n+\n+}\n+\n+\n+if ($full){\n+ warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\\n";\n+}\n+if ($merge_non_CpG){\n+ warn "Merge CHG and CHH context to non-CpG context specified\\n";\n+}\n+### output directory\n+if ($output_dir eq \'\'){\n+ warn "Output will be written to the current directory (\'$parent_dir\')\\n";\n+}\n+else{\n+ warn "Output path specified as: $output_dir\\n";\n+}\n+\n+\n+sleep (1);\n+\n+### BEDGRAPH\n+\n+if ($bedGraph){\n+ warn "\\n\\nSummarising bedGraph parameters:\\n";\n+ warn \'=\'x63,"\\n";\n+\n+ if ($counts){\n'..b"romosome 1 (~250M bp) consume around 16GB\n+ of RAM). Due to overheads in creating and looping through these arrays it seems that it will\n+ actually be *slower* for small files (few million alignments), and we are currently testing at\n+ which point it is advisable to use this option. Note that --ample_memory is not compatible\n+ with options '--scaffolds/--gazillion' (as it requires pre-sorted files to begin with).\n+\n+\n+\n+Genome-wide cytosine methylation report specific options:\n+=========================================================\n+\n+--cytosine_report After the conversion to bedGraph has completed, the option '--cytosine_report' produces a\n+ genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based\n+ chromosome coordinates (zero-based start coords are optional) and reports CpG context only (all\n+ cytosine context is optional). The output considers all Cs on both forward and reverse strands and\n+ reports their position, strand, trinucleotide content and methylation state (counts are 0 if not\n+ covered). The cytosine report conversion step is performed by the external module\n+ 'coverage2cytosine'; this script needs to reside in the same folder as the bismark_methylation_extractor\n+ itself.\n+\n+--CX/--CX_context The output file contains information on every single cytosine in the genome irrespective of\n+ its context. This applies to both forward and reverse strands. Please be aware that this will\n+ generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse.\n+ Default: OFF (i.e. Default = CpG context only).\n+\n+--zero_based Uses 0-based genomic coordinates instead of 1-based coordinates. Default: OFF.\n+\n+--genome_folder <path> Enter the genome folder you wish to use to extract sequences from (full path only). Accepted\n+ formats are FastA files ending with '.fa' or '.fasta'. Specifying a genome folder path is mandatory.\n+\n+--split_by_chromosome Writes the output into individual files for each chromosome instead of a single output file. Files\n+ will be named to include the input filename and the chromosome number.\n+\n+\n+\n+OUTPUT:\n+\n+The bismark_methylation_extractor output is in the form:\n+========================================================\n+<seq-ID> <methylation state*> <chromosome> <start position (= end position)> <methylation call>\n+\n+* Methylated cytosines receive a '+' orientation,\n+* Unmethylated cytosines receive a '-' orientation.\n+\n+\n+\n+The bedGraph output (optional) looks like this (tab-delimited; 0-based start coords, 1-based end coords):\n+=========================================================================================================\n+\n+track type=bedGraph (header line)\n+\n+<chromosome> <start position> <end position> <methylation percentage>\n+\n+\n+\n+The coverage output looks like this (tab-delimited, 1-based genomic coords; zero-based half-open coordinates available with '--zero_based'):\n+============================================================================================================================================\n+\n+<chromosome> <start position> <end position> <methylation percentage> <count methylated> <count non-methylated>\n+\n+\n+\n+The genome-wide cytosine methylation output file is tab-delimited in the following format:\n+==========================================================================================\n+<chromosome> <position> <strand> <count methylated> <count non-methylated> <C-context> <trinucleotide context>\n+\n+\n+\n+This script was last modified on 25 February 2016.\n+\n+HOW_TO\n+}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_methyl_extractor/bismark_methylation_extractor.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_methyl_extractor/bismark_methylation_extractor.py Sat May 06 13:18:09 2017 -0400 |
[ |
@@ -0,0 +1,181 @@ +#!/usr/bin/env python + +import argparse, os, shutil, subprocess, sys, tempfile, fileinput +import zipfile +import re +from glob import glob + +def stop_err( msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + +def zipper(dir, zip_file): + output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*') + bedgraph_regex = re.compile('.*bedGraph.gz') + zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) + root_len = len(os.path.abspath(dir)) + for root, dirs, files in os.walk(dir): + archive_root = os.path.abspath(root)[root_len:] + for f in files: + if re.search(output_files_regex, f) or re.search(bedgraph_regex, f): + fullpath = os.path.join(root, f) + archive_name = os.path.join(archive_root, f) + zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) + zip.close() + return zip_file + +def build_genome_dir(genome_file): + tmp_genome_dir = tempfile.mkdtemp(prefix='tmp') + genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1])) + try: + """ + Create a hard link pointing to genome_file named 'genome_path'.fa. + """ + os.symlink(genome_file, genome_path + '.fa') + except Exception, e: + if os.path.exists(tmp_genome_dir): + shutil.rmtree(tmp_genome_dir) + stop_err('Error in linking the reference database.\n' + str(e)) + return tmp_genome_dir + +def __main__(): + #Parse Command Line + parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') + + # input options + parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) + + parser.add_argument( '--infile', help='Input file in SAM or BAM format.' ) + parser.add_argument( '--single-end', dest='single_end', action="store_true" ) + parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) + + parser.add_argument('--splitting_report', dest='splitting_report') + parser.add_argument('--mbias_report', dest='mbias_report') + parser.add_argument('--cytosine_report', dest="cytosine_report") + parser.add_argument('--genome_file', dest="genome_file") + parser.add_argument('--cx_context', action="store_true" ) + + parser.add_argument( '--comprehensive', action="store_true" ) + parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" ) + parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" ) + parser.add_argument( '--compress' ) + parser.add_argument('--ignore', dest='ignore', type=int) + parser.add_argument('--ignore_r2', dest='ignore_r2', type=int) + parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int) + parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int) + + args = parser.parse_args() + + # Build methylation extractor command + output_dir = tempfile.mkdtemp() + cmd = 'bismark_methylation_extractor --no_header -o %s %s %s' + if args.bismark_path: + # add the path to the bismark perl scripts, that is needed for galaxy + cmd = os.path.join(args.bismark_path, cmd) + + # Set up all options + additional_opts = '' + if args.single_end: + additional_opts += ' --single-end ' + else: + additional_opts += ' --paired-end ' + if args.no_overlap: + additional_opts += ' --no_overlap ' + if args.ignore: + additional_opts += ' --ignore %s ' % args.ignore + if args.ignore_r2: + additional_opts += ' --ignore_r2 %s ' % args.ignore_r2 + if args.ignore_3prime: + additional_opts += ' --ignore_3prime %s ' % args.ignore_3prime + if args.ignore_3prime_r2: + additional_opts += ' --ignore_3prime_r2 %s ' % args.ignore_3prime_r2 + if args.comprehensive: + additional_opts += ' --comprehensive ' + if args.merge_non_cpg: + additional_opts += ' --merge_non_CpG ' + if args.splitting_report: + additional_opts += ' --report ' + if args.cytosine_report: + tmp_genome_dir = build_genome_dir(args.genome_file) + if args.cx_context: + additional_opts += ' --bedgraph --CX_context --cytosine_report --CX_context --genome_folder %s ' % tmp_genome_dir + else: + additional_opts += ' --bedgraph --cytosine_report --genome_folder %s ' % tmp_genome_dir + + + #detect BAM file, use samtools view if it is a bam file + f = open (args.infile, 'rb') + sig = f.read(4) + f.close() + if sig == '\x1f\x8b\x08\x04' : + #cmd = cmd % (output_dir, additional_opts, '-') + new_infilename = os.path.join(output_dir, 'submitted_bs_mapped_reads.sam') + new_sam = open(new_infilename, 'wb') + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open(tmp_err, 'wb') + proc = subprocess.Popen(['samtools', 'view', args.infile], stdout=new_sam, stderr=tmp_stderr) + new_sam.close() + tmp_stderr.close() + if os.stat(tmp_err).st_size != 0: + tmp_sterr = open(tmp_err, 'rb') + error_msg = tmp_sterr.read() + tmp_sterr.close() + sys.exit("error: %s" % error_msg) + cmd = cmd % (output_dir, additional_opts, new_infilename) + else: + cmd = cmd % (output_dir, additional_opts, args.infile) + + # Run + try: + tmp_out = tempfile.NamedTemporaryFile().name + tmp_stdout = open( tmp_out, 'wb' ) + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, stderr + + # TODO: look for errors in program output. + except Exception, e: + stop_err( 'Error in bismark methylation extractor:\n' + str( e ) ) + + # collect and copy output files + if args.compress: + zipper(output_dir, args.compress) + + # cytosine report + if args.cytosine_report: + if args.cx_context: + shutil.move( glob(os.path.join( output_dir, '*CX_report.txt'))[0], args.cytosine_report ) + else: + shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report) + # splitting report + if args.splitting_report: + shutil.move( glob(os.path.join( output_dir, '*_splitting_report.txt'))[0], args.splitting_report ) + if args.mbias_report: + shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report) + + + #Clean up temp dirs + if os.path.exists( output_dir ): + shutil.rmtree( output_dir ) + if args.cytosine_report: + if os.path.exists( tmp_genome_dir ): + shutil.rmtree( tmp_genome_dir ) + +if __name__=="__main__": __main__() |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_methyl_extractor/bismark_methylation_extractor.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_methyl_extractor/bismark_methylation_extractor.xml Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,304 @@\n+<tool id="bismark_methylation_extractor" name="Bismark Meth. Extractor" version="0.16.3">\n+\n+ <description>Reports on methylation status of reads mapped by Bismark</description>\n+ <!--<version_command>bismark_methylation_extractor version</version_command>-->\n+\n+ <requirements>\n+ <requirement type="package" version="0.1.19">samtools</requirement>\n+ <requirement type="package" version="2.1.0">bowtie2</requirement>\n+ </requirements>\n+\n+ <parallelism method="basic"></parallelism>\n+\n+ <command interpreter="python">\n+<![CDATA[\n+ bismark_methylation_extractor.py\n+\n+ --infile "$input"\n+\n+ ##--bismark_path \\$SCRIPT_PATH\n+ --bismark_path "$__tool_directory__"\n+\n+ #if $singlePaired.sPaired == "single":\n+ --single-end\n+ #else:\n+ --paired-end\n+ $singlePaired.no_overlap\n+ #end if\n+\n+ #if str( $singlePaired[\'ignore_r1\'] ) != "0":\n+ --ignore $singlePaired[\'ignore_r1\']\n+ #end if\n+ #if str( $singlePaired[\'ignore_3prime_r1\'] ) != "0":\n+ --ignore_3prime $singlePaired[\'ignore_3prime_r1\']\n+ #end if\n+\n+ #if $singlePaired.sPaired == "paired":\n+ #if str( $singlePaired[\'ignore_r2\'] ) != "0":\n+ --ignore_r2 $singlePaired[\'ignore_r2\']\n+ #end if\n+ #if str( $singlePaired[\'ignore_3prime_r2\'] ) != "0":\n+ --ignore_3prime_r2 $singlePaired[\'ignore_3prime_r2\']\n+ #end if\n+ #end if\n+\n+ #if $splitting_report:\n+ --splitting_report "$output_splitting_report"\n+ #end if\n+\n+ #if $mbias_report:\n+ --mbias_report "$output_mbias_report"\n+ #end if\n+\n+ #if $cytosine_report[\'cytosine_report_selector\']:\n+ --cytosine_report "$output_cytosine_report"\n+ --genome_file "${cytosine_report.built_in_fasta.fields.path}"\n+ #if not $cytosine_report[\'cpg_context\']:\n+ --cx_context\n+ #end if\n+ #end if\n+\n+ #if $output_settings[\'comprehensive\']:\n+ --comprehensive\n+ #end if\n+\n+ #if $output_settings[\'merge_non_cpg\']:\n+ --merge-non-cpg\n+ #end if\n+\n+ --compress "$compressed_output"\n+]]>\n+ </command>\n+\n+ <inputs>\n+ <!-- Input Parameters -->\n+ <param name="input" type="data" format="sam,bam" label="SAM/BAM file from Bismark bisulfite mapper" />\n+ <conditional name="singlePaired">\n+ <param name="sPaired" type="select" label="Is this library mate-paired?">\n+ <option value="single">Single-end</option>\n+ <option value="paired">Paired-end</option>\n+ </param>\n+ <when value="single">\n+ <param name="ignore_r1" type="integer" value="0" label="Ignore the first N bp from the 5\xe2\x80\x99 end of single-end read when processing the methylation call string." />\n+ <param name="ignore_3prime_r1" type="integer" value="0" label="Ignore the last N bp from the 3\' end of single-end read when processing the methylation call string."/>\n+ </when>\n+ <when value="paired">\n+ <param name="ignore_r1" type="integer" value="0" label="Ignore the first N bp from the 5\xe2\x80\x99 end of Read 1 when processing the methylation call string." />\n+ <param name="ignore_3prime_r1" type="integer" value="0" label="Ignore the last N bp from the 3\' end of Read 1 when processing the methylation call string."/>\n+ <param name="ignore_r2" type="integer" value="0" label="Ignore the first N bp from the 5\' end of Read 2 of paired-end sequencing results" />\n+ <param name="ignore_3prime_r2" type="integer" value="0" label="Ignore the last N bp from the 3\' end of Read 2 of paired-end sequencing results"/>\n+ <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylati'..b' any desired way.\n+ |\n+ | *Input option --ignore_r2*\n+\n+ * **Ignore the last N bp from the 3\' end of Read 2 of paired-end sequencing results**\n+\n+ | This can remove unwanted biases from the end of reads.\n+ |\n+ | *Input option --ignore_3prime_r2*\n+\n+ * **This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two**\n+\n+ | For paired-end reads it is theoretically possible that read_1 and read_2 overlap. This option avoids scoring overlapping methylation calls twice (only methylation calls of read 1 are used for in the process since read 1 has historically higher quality basecalls than read 2). Whilst this option removes a bias towards more methylation calls in the center of sequenced fragments it may de facto remove a sizable proportion of the data. This option is highly recommended for paired-end data.\n+ |\n+ | *Input option --no_overlap*\n+\n+* **Short methylation summary output (Splitting Report)**\n+\n+ | Prints out a short methylation summary as well as the paramaters used to run this script.\n+ |\n+ | *Output option --report*\n+\n+* **Methylation proportion report for each possible position in the read (Mbias Report)**\n+\n+ | This report shows the methylation proportion across each possible position in the read (described in further detail in:Hansen et al., Genome Biology, 2012, 13:R83). The data for the M-bias plot is also written into a text file and is in the following format:\n+ |\n+ | <read position> <count methylated> <count unmethylated> <% methylation> <total coverage>\n+ |\n+ | This allows generating nice graphs by alternative means, e.g. using R or Excel\n+\n+* **Genome-wide methylation report for all cytosines in the genome**\n+\n+ | the option --cytosine_report produces a genome-wide methylation report for all cytosines in the genome. \n+\n+ * **If CpG Context only**\n+\n+ | the output uses 1-based chromosome coordinates (zero-based cords are optional) and reports CpG context only (all cytosine context is optional). The output considers all Cs on both forward and reverse strands and reports their position, strand, trinucleotide content and methylation state (counts are 0 if not covered).\n+ |\n+ | *Genome-wide cytosine methylation report specific option --bedgraph --cytosine_report --genome_folder <path>*\n+\n+ * **If not CpG Context only**\n+\n+ | The output file contains information on every single cytosine in the genome irrespective of its context. This applies to both forward and reverse strands. Please be aware that this will generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse. Default: OFF (i.e. Default = CpG context only).\n+ |\n+ | *Genome-wide cytosine methylation report specific option --bedgraph --CX_context --cytosine_report --CX_context --genome_folder <path>*\n+\n+* **Merge all four possible strand-specific methylation info into context-dependent output files**\n+\n+ | Specifying this option will merge all four possible strand-specific methylation info into context-dependent output files. The default contexts are:\n+ | - CpG context\n+ | - CHG context\n+ | - CHH context\n+ |\n+ | *Output option --comprehensive*\n+\n+* **Merge all non-CpG contexts into one file**\n+\n+ | This will produce two output files (in --comprehensive mode) or eight strand-specific output files (default) for Cs in\n+ | - CpG context\n+ | - non-CpG context\n+ |\n+ | *Output option --merge_non_CpG*\n+\n+* **Compress all result files and output one single file**\n+\n+ | The methylation extractor files (CpG_OT..., CpG_OB... etc) will be written out in a GZIP compressed form to save disk space. This option does not work on bedGraph and genome-wide cytosine reports as they are \'tiny\' anyway.\n+ |\n+ | *Output option --gzip*\n+\n+]]>\n+ </help>\n+\n+ <citations>\n+ <citation type="doi">10.1093/bioinformatics/btr167</citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_methyl_extractor/coverage2cytosine --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_methyl_extractor/coverage2cytosine Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,1254 @@\n+#!/usr/bin/env perl\n+use warnings;\n+use strict;\n+$|++;\n+use Getopt::Long;\n+use Cwd;\n+use Carp;\n+\n+## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+my %chromosomes; # storing sequence information of all chromosomes/scaffolds\n+my %processed; # keeping a record of which chromosomes have been processed\n+my $coverage2cytosine_version = \'v0.16.3\';\n+\n+my ($output_dir,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$parent_dir,$coverage_infile,$cytosine_out,$merge_CpGs,$gc_context,$gzip,$tetra) = process_commandline();\n+\n+warn "Summary of parameters for genome-wide cytosine report:\\n";\n+warn \'=\'x78,"\\n";\n+warn "Coverage infile:\\t\\t\\t$coverage_infile\\n";\n+warn "Output directory:\\t\\t\\t>$output_dir<\\n";\n+warn "Parent directory:\\t\\t\\t>$parent_dir<\\n";\n+warn "Genome directory:\\t\\t\\t>$genome_folder<\\n";\n+\n+if ($CX_context){\n+ warn "CX context:\\t\\t\\t\\tyes\\n";\n+}\n+else{\n+ warn "CX context:\\t\\t\\t\\tno (CpG context only, default)\\n";\n+}\n+if ($merge_CpGs){\n+ warn "Pooling CpG top/bottom strand evidence:\\tyes\\n";\n+}\n+if($gc_context){\n+ warn "Optional GC context track:\\t\\tyes\\n";\n+}\n+if ($tetra){\n+ warn "Tetra/Penta nucleotide context:\\t\\tyes\\n";\n+}\n+\n+if ($zero){\n+ warn "Genome coordinates used:\\t\\t0-based (user specified)\\n";\n+}\n+else{\n+ warn "Genome coordinates used:\\t\\t1-based (default)\\n";\n+}\n+\n+if ($gzip){\n+ warn "GZIP compression:\\t\\t\\tyes\\n";\n+}\n+else{\n+ warn "GZIP compression:\\t\\t\\tno\\n";\n+}\n+\n+if ($split_by_chromosome){\n+ warn "Split by chromosome:\\t\\t\\tyes\\n\\n\\n";\n+}\n+else{\n+ warn "Split by chromosome:\\t\\t\\tno\\n\\n\\n";\n+}\n+sleep (3);\n+\n+read_genome_into_memory();\n+warn "Stored sequence information of ",scalar keys %chromosomes," chromosomes/scaffolds in total\\n\\n";\n+\n+generate_genome_wide_cytosine_report($coverage_infile);\n+\n+### 11 December 2014\n+\n+# The following optional section re-reads the genome-wide report and merges methylation evidence of both top and bottom strand\n+# into a single CpG dinucleotide entity. This significantly simplifies downstream processing, e.g. by the bsseq R-/Bioconductor package\n+# which recommends this merging process to increase coverage per CpG and reduce the memory burden for its processing\n+\n+if ($merge_CpGs){\n+ # we only allow this operation if the report is limited to CpG context, and for a single report for the entire genome for the time being\n+ combine_CpGs_to_single_CG_entity($cytosine_out);\n+}\n+\n+### 18 August 2015\n+\n+# The following section reprocessed the genome to generate cytosine methylation output in GC context (e.g. when a GpC methylase had been deployed\n+if ($gc_context){\n+ generate_GC_context_report($coverage_infile);\n+}\n+\n+\n+sub combine_CpGs_to_single_CG_entity{\n+ my $CpG_report_file = shift;\n+ warn "Now merging top and bottom strand CpGs into a single CG dinucleotide entity\\n";\n+\n+ open (IN,$CpG_report_file) or die "Failed to open file $CpG_report_file: $!\\n\\n";\n+ my $pooled_CG = $CpG_report_file;\n+ $pooled_CG =~ s/$/.merged_CpG_evidence.cov/;\n+ open (OUT,\'>\',$pooled_CG) or die "Failed to write to file \'$pooled_CG\': $!\\n\\n";\n+ warn ">>> Writing a new coverage file with top and bottom strand CpG methylation evidence merged to $pooled_CG <<<\\n\\n";\n+ sleep(1);\n+\n+ while (1){\n+ my $line1 = <IN>;\n+ my $line2 = <IN>;\n+ last unless ($'..b"inates of the input file are expected to be 1-based throughout (do not use files ending in .zero.cov!).\n+\n+\n+ USAGE: coverage2cytosine [options] --genome_folder <path> -o <output> [input]\n+\n+\n+-o/--output <filename> Name of the output file, mandatory.\n+\n+--dir Output directory. Output is written to the current directory if not specified explicitly.\n+\n+--genome_folder <path> Enter the genome folder you wish to use to extract sequences from (full path only). Accepted\n+ formats are FastA files ending with '.fa' or '.fasta'. Specifying a genome folder path is mandatory.\n+\n+-CX/--CX_context The output file contains information on every single cytosine in the genome irrespective of\n+ its context. This applies to both forward and reverse strands. Please be aware that this will\n+ generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse.\n+ Default: OFF (i.e. Default = CpG context only).\n+\n+--merge_CpG Using this option will post-process the genome-wide report to write out an additional coverage\n+ file (see above for the coverage file format) which has the top and bottom strand methylation\n+ evidence pooled into a single CpG dinucleotide entity. This may be the desirable input format\n+ for some downstream processing tools such as the R-package bsseq (by K.D. Hansen). An example would be:\n+\n+\t\t\t genome-wide CpG report (old)\n+\t\t\t gi|9626372|ref|NC_001422.1| 157 + 313 156 CG\n+\t\t\t gi|9626372|ref|NC_001422.1| 158 - 335 156 CG\n+\t\t\t merged CpG evidence coverage file (new)\n+\t\t\t gi|9626372|ref|NC_001422.1| 157 158 67.500000 648 312\n+\n+\t\t\t This option is currently experimental, and only works if CpG context only and a single genome-wide report\n+ were specified (i.e. it doesn't work with the options --CX or --split_by_chromosome).\n+\n+--gc/--gc_context In addition to normal processing this option will reprocess the genome to find methylation in \n+ GpC context. This might be useful for specialist applications where GpC methylases had been\n+ deployed. The output format is exactly the same as for the normal cytosine report, and only\n+ positions covered by at least one read are reported (output file ends in .GpC_report.txt). In addition\n+ this will write out a Bismark coverage file (ending in GpC.cov).\n+\n+--ff In addition to the standard output selecting --ff will also extract a four and five (tetra/penta)\n+ nucleotide context for the cytosines in question. Too short sequences (e.g. at the edges of the\n+ chromosome) will be left blank; sequences containing Ns are ignored.\n+\n+--zero_based Uses 0-based coordinates instead of 1-based coordinates throughout. Default: OFF.\n+\n+--split_by_chromosome Writes the output into individual files for each chromosome instead of a single output file. Files\n+ will be named to include the input filename and the chromosome number.\n+\n+--gzip Output file will be GZIP compressed (ending in .gz). Only works for standard CpG- and CX-output.\n+ \n+--help Displays this help message and exits\n+\n+\n+\n+OUTPUT FORMAT:\n+\n+The genome-wide cytosine methylation output file is tab-delimited in the following format (1-based coords):\n+===========================================================================================================\n+\n+<chromosome> <position> <strand> <count methylated> <count non-methylated> <C-context> <trinucleotide context>\n+\n+\n+ Script last modified: 04 April 2016\n+\n+EOF\n+ ;\n+ exit 1;\n+}\n+\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_methylation_extractor --- a/bismark_methylation_extractor Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,4760 +0,0 @@\n-#!/usr/bin/perl\n-use warnings;\n-use strict;\n-$|++;\n-use Getopt::Long;\n-use Cwd;\n-use Carp;\n-use FindBin qw($Bin);\n-use lib "$Bin/../lib";\n-\n-\n-## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)\n-\n-## This program is free software: you can redistribute it and/or modify\n-## it under the terms of the GNU General Public License as published by\n-## the Free Software Foundation, either version 3 of the License, or\n-## (at your option) any later version.\n-\n-## This program is distributed in the hope that it will be useful,\n-## but WITHOUT ANY WARRANTY; without even the implied warranty of\n-## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-## GNU General Public License for more details.\n-\n-## You should have received a copy of the GNU General Public License\n-## along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\n-my @filenames; # input files\n-my %counting;\n-my $parent_dir = getcwd();\n-\n-my %fhs;\n-\n-my $version = \'v0.10.1\';\n-my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_only,$gazillion,$ample_mem) = process_commandline();\n-\n-\n-### only needed for bedGraph output\n-my @sorting_files; # if files are to be written to bedGraph format, these are the methylation extractor output files\n-my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total\n-my @bedfiles;\n-\n-### only needed for genome-wide cytosine methylation report\n-my %chromosomes;\n-\n-my %mbias_1;\n-my %mbias_2;\n-\n-##############################################################################################\n-### Summarising Run Parameters\n-##############################################################################################\n-\n-### METHYLATION EXTRACTOR\n-\n-warn "Summarising Bismark methylation extractor parameters:\\n";\n-warn \'=\'x63,"\\n";\n-\n-if ($single){\n- if ($vanilla){\n- warn "Bismark single-end vanilla format specified\\n";\n- }\n- else{\n- warn "Bismark single-end SAM format specified (default)\\n"; # default\n- }\n-}\n-elsif ($paired){\n- if ($vanilla){\n- warn "Bismark paired-end vanilla format specified\\n";\n- }\n- else{\n- warn "Bismark paired-end SAM format specified (default)\\n"; # default\n- }\n-}\n-\n-if ($single){\n- if ($ignore){\n- warn "First $ignore bp will be disregarded when processing the methylation call string\\n";\n- }\n-}\n-else{ ## paired-end\n- if ($ignore){\n- warn "First $ignore bp will be disregarded when processing the methylation call string of Read 1\\n";\n- }\n- if ($ignore_r2){\n- warn "First $ignore_r2 bp will be disregarded when processing the methylation call string of Read 2\\n";\n- }\n-}\n-\n-\n-if ($full){\n- warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\\n";\n-}\n-if ($merge_non_CpG){\n- warn "Merge CHG and CHH context to non-CpG context specified\\n";\n-}\n-### output directory\n-if ($output_dir eq \'\'){\n- warn "Output will be written to the current directory (\'$parent_dir\')\\n";\n-}\n-else{\n- warn "Output path specified as: $output_dir\\n";\n-}\n-\n-\n-sleep (1);\n-\n-### BEDGRAPH\n-\n-if ($bedGraph){\n- warn "\\n\\nSummarising bedGraph parameters:\\n";\n- warn \'=\'x63,"\\n";\n-\n- if ($counts){\n- warn "Generating additional output in bedGraph and coverage format\\nbedGraph format:\\t<Chromosome> <Start Position> <End Position> <Methylation Percentage>\\ncoverage format:\\t<Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>\\n\\n";\n- }\n- else{\n- warn "Generating additional sorted output in bedGraph format (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage>)\\n";\n- }\n-\n- warn "Using a cutoff of $coverage_threshold read(s) to repo'..b"e cost of a larger memory footprint\n- (two arrays of the length of the largest human chromosome 1 (~250M bp) consume around 16GB\n- of RAM). Due to overheads in creating and looping through these arrays it seems that it will\n- actually be *slower* for small files (few million alignments), and we are currently testing at\n- which point it is advisable to use this option. Note that --ample_memory is not compatible\n- with options '--scaffolds/--gazillion' (as it requires pre-sorted files to begin with).\n-\n-\n-\n-Genome-wide cytosine methylation report specific options:\n-=========================================================\n-\n---cytosine_report After the conversion to bedGraph has completed, the option '--cytosine_report' produces a\n- genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based\n- chromosome coordinates (zero-based cords are optional) and reports CpG context only (all\n- cytosine context is optional). The output considers all Cs on both forward and reverse strands and\n- reports their position, strand, trinucleotide content and methylation state (counts are 0 if not\n- covered). The cytsoine report conversion step is performed by the external module \n- 'bedGraph2cytosine'; this script needs to reside in the same folder as the bismark_methylation_extractor\n- itself.\n-\n---CX/--CX_context The output file contains information on every single cytosine in the genome irrespective of\n- its context. This applies to both forward and reverse strands. Please be aware that this will\n- generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse.\n- Default: OFF (i.e. Default = CpG context only).\n-\n---zero_based Uses zero-based coordinates like used in e.g. bed files instead of 1-based coordinates. Default: OFF.\n-\n---genome_folder <path> Enter the genome folder you wish to use to extract sequences from (full path only). Accepted\n- formats are FastA files ending with '.fa' or '.fasta'. Specifying a genome folder path is mandatory.\n-\n---split_by_chromosome Writes the output into individual files for each chromosome instead of a single output file. Files\n- will be named to include the input filename and the chromosome number.\n-\n-\n-\n-OUTPUT:\n-\n-The bismark_methylation_extractor output is in the form:\n-========================================================\n-<seq-ID> <methylation state*> <chromosome> <start position (= end position)> <methylation call>\n-\n-* Methylated cytosines receive a '+' orientation,\n-* Unmethylated cytosines receive a '-' orientation.\n-\n-\n-\n-The bedGraph output (optional) looks like this (tab-delimited; 0-based start coords, 1-based end coords):\n-=========================================================================================================\n-\n-track type=bedGraph (header line)\n-\n-<chromosome> <start position> <end position> <methylation percentage>\n-\n-\n-\n-The coverage output looks like this (tab-delimited, 1-based genomic coords):\n-============================================================================\n-\n-<chromosome> <start position> <end position> <methylation percentage> <count methylated> <count non-methylated>\n-\n-\n-\n-The genome-wide cytosine methylation output file is tab-delimited in the following format:\n-==========================================================================================\n-<chromosome> <position> <strand> <count methylated> <count non-methylated> <C-context> <trinucleotide context>\n-\n-\n-\n-This script was last modified on 25 November 2013.\n-\n-HOW_TO\n-}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_methylation_extractor.py --- a/bismark_methylation_extractor.py Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,197 +0,0 @@ -#!/usr/bin/env python - -import argparse, os, shutil, subprocess, sys, tempfile, fileinput -import zipfile -from glob import glob - -def stop_err( msg ): - sys.stderr.write( "%s\n" % msg ) - sys.exit() - -def zipper(dir, zip_file): - zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED) - root_len = len(os.path.abspath(dir)) - for root, dirs, files in os.walk(dir): - archive_root = os.path.abspath(root)[root_len:] - for f in files: - fullpath = os.path.join(root, f) - archive_name = os.path.join(archive_root, f) - zip.write(fullpath, archive_name, zipfile.ZIP_DEFLATED) - zip.close() - return zip_file - -def __main__(): - #Parse Command Line - parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.') - - # input options - parser.add_argument( '--bismark_path', dest='bismark_path', help='Path to the bismark perl scripts' ) - - parser.add_argument( '--infile', help='Input file in SAM or BAM format.' ) - parser.add_argument( '--single-end', dest='single_end', action="store_true" ) - parser.add_argument( '--paired-end', dest='paired_end', action="store_true" ) - - parser.add_argument( '--report-file', dest='report_file' ) - parser.add_argument( '--comprehensive', action="store_true" ) - parser.add_argument( '--merge-non-cpg', dest='merge_non_cpg', action="store_true" ) - parser.add_argument( '--no-overlap', dest='no_overlap', action="store_true" ) - parser.add_argument( '--compress' ) - parser.add_argument( '--ignore-bps', dest='ignore_bps', type=int ) - - # OT - original top strand - parser.add_argument( '--cpg_ot' ) - parser.add_argument( '--chg_ot' ) - parser.add_argument( '--chh_ot' ) - # CTOT - complementary to original top strand - parser.add_argument( '--cpg_ctot' ) - parser.add_argument( '--chg_ctot' ) - parser.add_argument( '--chh_ctot' ) - # OB - original bottom strand - parser.add_argument( '--cpg_ob' ) - parser.add_argument( '--chg_ob' ) - parser.add_argument( '--chh_ob' ) - # CTOT - complementary to original bottom strand - parser.add_argument( '--cpg_ctob' ) - parser.add_argument( '--chg_ctob' ) - parser.add_argument( '--chh_ctob' ) - - parser.add_argument( '--cpg_context' ) - parser.add_argument( '--chg_context' ) - parser.add_argument( '--chh_context' ) - - parser.add_argument( '--non_cpg_context' ) - - parser.add_argument( '--non_cpg_context_ot' ) - parser.add_argument( '--non_cpg_context_ctot' ) - parser.add_argument( '--non_cpg_context_ob' ) - parser.add_argument( '--non_cpg_context_ctob' ) - - args = parser.parse_args() - - - # Build methylation extractor command - output_dir = tempfile.mkdtemp() - cmd = 'bismark_methylation_extractor --no_header -o %s %s %s' - if args.bismark_path: - # add the path to the bismark perl scripts, that is needed for galaxy - cmd = os.path.join(args.bismark_path, cmd) - - additional_opts = '' - # Set up all options - if args.single_end: - additional_opts += ' --single-end ' - else: - additional_opts += ' --paired-end ' - if args.no_overlap: - additional_opts += ' --no_overlap ' - if args.ignore_bps: - additional_opts += ' --ignore %s ' % args.ignore_bps - if args.comprehensive: - additional_opts += ' --comprehensive ' - if args.merge_non_cpg: - additional_opts += ' --merge_non_CpG ' - if args.report_file: - additional_opts += ' --report ' - - #detect BAM file, use samtools view if it is a bam file - f = open (args.infile, 'rb') - sig = f.read(4) - f.close() - if sig == '\x1f\x8b\x08\x04' : - cmd = cmd % (output_dir, additional_opts, '-') - cmd = 'samtools view %s | %s' % (args.infile, cmd ) - else : - cmd = cmd % (output_dir, additional_opts, args.infile) - - # Run - try: - tmp_out = tempfile.NamedTemporaryFile().name - tmp_stdout = open( tmp_out, 'wb' ) - tmp_err = tempfile.NamedTemporaryFile().name - tmp_stderr = open( tmp_err, 'wb' ) - proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) - returncode = proc.wait() - tmp_stderr.close() - # get stderr, allowing for case where it's very large - tmp_stderr = open( tmp_err, 'rb' ) - stderr = '' - buffsize = 1048576 - try: - while True: - stderr += tmp_stderr.read( buffsize ) - if not stderr or len( stderr ) % buffsize != 0: - break - except OverflowError: - pass - tmp_stdout.close() - tmp_stderr.close() - if returncode != 0: - raise Exception, stderr - - # TODO: look for errors in program output. - except Exception, e: - stop_err( 'Error in bismark methylation extractor:\n' + str( e ) ) - - - # collect and copy output files - - if args.compress: - zipper(output_dir, args.compress) - - - if args.cpg_ot: - shutil.move( glob(os.path.join( output_dir, '*CpG_OT_*'))[0], args.cpg_ot ) - if args.chg_ot: - shutil.move( glob(os.path.join( output_dir, '*CHG_OT_*'))[0], args.chg_ot ) - if args.chh_ot: - shutil.move( glob(os.path.join( output_dir, '*CHH_OT_*'))[0], args.chh_ot ) - if args.cpg_ctot: - shutil.move( glob(os.path.join( output_dir, '*CpG_CTOT_*'))[0], args.cpg_ctot ) - if args.chg_ctot: - shutil.move( glob(os.path.join( output_dir, '*CHG_CTOT_*'))[0], args.chg_ctot ) - if args.chh_ctot: - shutil.move( glob(os.path.join( output_dir, '*CHH_CTOT_*'))[0], args.chh_ctot ) - if args.cpg_ob: - shutil.move( glob(os.path.join( output_dir, '*CpG_OB_*'))[0], args.cpg_ob ) - if args.chg_ob: - shutil.move( glob(os.path.join( output_dir, '*CHG_OB_*'))[0], args.chg_ob ) - if args.chh_ob: - shutil.move( glob(os.path.join( output_dir, '*CHH_OB_*'))[0], args.chh_ob ) - if args.cpg_ctob: - shutil.move( glob(os.path.join( output_dir, '*CpG_CTOB_*'))[0], args.cpg_ctob ) - if args.chg_ctob: - shutil.move( glob(os.path.join( output_dir, '*CHG_CTOB_*'))[0], args.chg_ctob ) - if args.chh_ctob: - shutil.move( glob(os.path.join( output_dir, '*CHH_CTOB_*'))[0], args.chh_ctob ) - - # context-dependent methylation output files - if args.cpg_context: - shutil.move( glob(os.path.join( output_dir, '*CpG_context_*'))[0], args.cpg_context ) - if args.chg_context: - shutil.move( glob(os.path.join( output_dir, '*CHG_context_*'))[0], args.chg_context ) - if args.chh_context: - shutil.move( glob(os.path.join( output_dir, '*CHH_context_*'))[0], args.chh_context ) - - if args.non_cpg_context: - shutil.move( glob(os.path.join( output_dir, '*Non_CpG_context_*'))[0], args.non_cpg_context ) - - if args.non_cpg_context_ot: - shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OT_*'))[0], args.non_cpg_context_ot ) - if args.non_cpg_context_ctot: - shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOT_*'))[0], args.non_cpg_context_ctot ) - if args.non_cpg_context_ob: - shutil.move( glob(os.path.join( output_dir, '*Non_CpG_OB_*'))[0], args.non_cpg_context_ob ) - if args.non_cpg_context_ctob: - shutil.move( glob(os.path.join( output_dir, '*Non_CpG_CTOB_*'))[0], args.non_cpg_context_ctob ) - - - - if args.report_file: - shutil.move( glob(os.path.join( output_dir, '*_splitting_report*'))[0], args.report_file ) - - - # Clean up temp dirs - if os.path.exists( output_dir ): - shutil.rmtree( output_dir ) - -if __name__=="__main__": __main__() |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_methylation_extractor.xml --- a/bismark_methylation_extractor.xml Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,309 +0,0 @@\n-<tool id="bismark_methylation_extractor" name="Bismark Meth. Extractor" version="0.10.2">\n- <!-- Wrapper compatible with Bismark version 0.10 -->\n- <description>Reports on methylation status of reads mapped by Bismark</description>\n- <!--<version_command>bismark_methylation_extractor version</version_command>-->\n- <requirements>\n- <requirement type="set_environment">SCRIPT_PATH</requirement>\n- <requirement type="package" version="0.12.8">bowtie</requirement>\n- <requirement type="package" version="2.1.0">bowtie2</requirement>\n- </requirements>\n- <parallelism method="basic"></parallelism>\n- <command interpreter="python">\n-<![CDATA[\n- bismark_methylation_extractor.py\n-\n- --infile $input\n-\n- --bismark_path \\$SCRIPT_PATH\n-\n- #if $singlePaired.sPaired == "single":\n- --single-end\n- #else:\n- --paired-end\n- $singlePaired.no_overlap\n- #end if\n-\n- #if str($ignore_bps) != "0":\n- --ignore $ignore_bps\n- #end if\n-\n- #if $report:\n- --report-file $o_report\n- #end if\n-\n- #if $comprehensive:\n- --comprehensive\n- #end if\n-\n- #if $merge_non_cpg:\n- --merge-non-cpg\n- #end if\n-\n- #if $compress:\n- --compress $compressed_output\n- #else:\n- #if $comprehensive == False and $merge_non_cpg == False:\n- ##twelfe files\n- --cpg_ot $cpg_ot\n- --chg_ot $chg_ot\n- --chh_ot $chh_ot\n- --cpg_ctot $cpg_ctot\n- --chg_ctot $chg_ctot\n- --chh_ctot $chh_ctot\n- --cpg_ob $cpg_ob\n- --chg_ob $chg_ob\n- --chh_ob $chh_ob\n- --cpg_ctob $cpg_ctob\n- --chg_ctob $chg_ctob\n- --chh_ctob $chh_ctob\n- #elif $merge_non_cpg and $comprehensive:\n- ## two files\n- --non_cpg_context $non_cpg_context\n- --cpg_context $cpg_context\n- #elif $comprehensive:\n- ## three files\n- --cpg_context $cpg_context\n- --chg_context $chg_context\n- --chh_context $chh_context\n- #elif $merge_non_cpg:\n- ## eight files\n- --non_cpg_context_ctot $non_cpg_context_ctot\n- --non_cpg_context_ot $non_cpg_context_ot\n- --non_cpg_context_ob $non_cpg_context_ob\n- --non_cpg_context_ctob $non_cpg_context_ctob\n- --cpg_ot $cpg_ot\n- --cpg_ctot $cpg_ctot\n- --cpg_ob $cpg_ob\n- --cpg_ctob $cpg_ctob\n- #end if\n- ## end compress\n- #end if\n-\n-]]>\n- </command>\n- <inputs>\n- <!-- Input Parameters -->\n- <param name="input" type="data" format="sam,bam" label="SAM/BAM file from Bismark bisulfite mapper" />\n- <conditional name="singlePaired">\n- <param name="sPaired" type="select" label="Is this library mate-paired?">\n- <option value="single">Single-end</option>\n- <option value="paired">Paired-end</option>\n- </param>\n- <when value="single" />\n- <when value="paired">\n- <param name="no_overlap" type="boolean" truevalue="--no-overlap" falsevalue="" checked="False" label="This option avoids scoring overlapping methylation calls twice, in case of overlapping read one and read two" help="" />\n- </when>\n- </conditional>\n- <param name="ignore_bps" type="integer" value="0" label="Ignore the first N bp when processing the methylation call string" />\n- <param name="comprehensive" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Merge all four possible strand-specific methylation info\n-into context-dependent output files" help="" />\n- <param name="mer'..b"become very large and more difficult to handle. The C\n-methylation info additionally splits cytosine methylation calls up into one of the four possible\n-strands a given bisulfite read aligned against:\n-\n- - OT = original top strand\n- - CTOT = complementary to original top strand\n-\n- - OB = original bottom strand\n- - CTOB = complementary to original bottom strand\n-\n-Thus, by default twelve individual output files are being generated per input file (unless\n---comprehensive is specified, see below). The output files can be imported into a genome\n-viewer, such as SeqMonk, and re-combined into a single data group if desired (in fact\n-unless the bisulfite reads were generated preserving directionality it doesn't make any\n-sense to look at the data in a strand-specific manner). Strand-specific output files can\n-optionally be skipped, in which case only three output files for CpG, CHG or CHH context\n-will be generated. For both the strand-specific and comprehensive outputs there is also\n-the option to merge both non-CpG contexts (CHG and CHH) into one single non-CpG context.\n-\n-\n-.. _Bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/\n-\n-\n-It is developed by Krueger F and Andrews SR. at the Babraham Institute. Krueger F, Andrews SR. (2011) Bismark: a flexible aligner and methylation caller for Bisulfite-Seq applications. Bioinformatics, 27, 1571-2.\n-\n--------\n-\n-**Bismark settings**\n-\n-All of the options have a default value. You can change any of them. If any Bismark function is missing please contact the tool author or your Galaxy admin.\n-\n-------\n-\n-**Outputs**\n-\n-The output files are in the following format (tab delimited)::\n-\n-\n- Column Description\n- -------- --------------------------------------------------------\n- 1 seq-ID\n- 2 strand\n- 3 chromosome\n- 4 position\n- 5 methylation call\n-\n-\n- * Methylated cytosines receive a '+' orientation,\n- * Unmethylated cytosines receive a '-' orientation.\n-\n-------\n-\n-**OPTIONS**\n-\n-Input::\n-\n- -s/--single-end Input file(s) are Bismark result file(s) generated from single-end\n- read data. Specifying either --single-end or --paired-end is\n- mandatory.\n-\n- -p/--paired-end Input file(s) are Bismark result file(s) generated from paired-end\n- read data. Specifying either --paired-end or --single-end is\n- mandatory.\n-\n- --no_overlap For paired-end reads it is theoretically possible that read_1 and\n- read_2 overlap. This option avoids scoring overlapping methylation\n- calls twice. Whilst this removes a bias towards more methylation calls\n- towards the center of sequenced fragments it can de facto remove\n- a good proportion of the data.\n-\n- --ignore INT Ignore the first INT bp at the 5' end of each read when processing the\n- methylation call string. This can remove e.g. a restriction enzyme site\n- at the start of each read.\n-\n-Output::\n-\n- --comprehensive Specifying this option will merge all four possible strand-specific\n- methylation info into context-dependent output files. The default\n- contexts are:\n- - CpG context\n- - CHG context\n- - CHH context\n-\n- --merge_non_CpG This will produce two output files (in --comprehensive mode) or eight\n- strand-specific output files (default) for Cs in\n- - CpG context\n- - non-CpG context\n-\n- --report Prints out a short methylation summary as well as the paramaters used to run\n- this script.\n-\n-\n-]]>\n- </help>\n-</tool>\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_pretty_report/bismark2report --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_pretty_report/bismark2report Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,1237 @@\n+#!/usr/bin/env perl\n+use warnings;\n+use strict;\n+use Getopt::Long;\n+use FindBin qw($Bin);\n+use lib "$Bin/../lib";\n+\n+## This program is Copyright (C) 2010-16, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+my $bismark2report_version = \'v0.16.3\';\n+my (@alignment_reports,@dedup_reports,@splitting_reports,@mbias_reports,@nuc_reports);\n+\n+my ($output_dir,$verbose,$manual_output_file) = process_commandline();\n+\n+# print join (",",@alignment_reports)."\\n";\n+# print join (",",@dedup_reports)."\\n";\n+# print join (",",@splitting_reports)."\\n";\n+# print join (",",@mbias_reports)."\\n";\n+# print join (",",@nuc_reports)."\\n";\n+\n+while (@alignment_reports){\n+\n+ my $alignment_report = shift @alignment_reports;\n+ my $dedup_report = shift @dedup_reports;\n+ my $splitting_report = shift @splitting_reports;\n+ my $mbias_report = shift @mbias_reports;\n+ my $nuc_report = shift @nuc_reports;\n+\n+ ### HTML OUTPUT FILE\n+ my $report_output = $alignment_report;\n+ $report_output =~ s/^.*\\///; # deleting optional path information\n+ $report_output =~ s/\\.txt$//;\n+ $report_output =~ s/$/.html/;\n+\n+ # if -o output_file was specified we are going to use that name preferentially. This may only happen if there is a single report in the folder, or if a single report has been specified manually\n+ if ($manual_output_file){\n+ warn "A specific output filename was specified: $manual_output_file. Using that one instead of deriving the filename\\n"; sleep(1);\n+ $report_output = $manual_output_file;\n+ }\n+\n+ $report_output = $output_dir.$report_output;\n+ warn "\\nWriting Bismark HTML report to >> $report_output <<\\n\\n";\n+\n+ my $doc = read_report_template(); #reading and storing the entire report template\n+\n+ # BISMARK ALIGNMENT REPORT (mandatory)\n+ warn "="x110,"\\n";\n+ warn "Using the following alignment report:\\t\\t> $alignment_report <\\n";\n+ # DEDUPLICATION REPORT (optional)\n+ if ($dedup_report){\n+ warn "Using the following deduplication report:\\t> $dedup_report <\\n";\n+ }\n+ else{\n+ warn "No deduplication report file specified, skipping this step\\n";\n+ }\n+\n+ # SPLITTING REPORT (optional)\n+ if ($splitting_report){\n+ warn "Using the following splitting report:\\t\\t> $splitting_report <\\n";\n+ }\n+ else{\n+ warn "No splitting report file specified, skipping this step\\n";\n+ }\n+\n+ # M-BIAS REPORT (optional)\n+ if ($mbias_report){\n+ warn "Using the following M-bias report:\\t\\t> $mbias_report <\\n";\n+ }\n+ else{\n+ warn "No M-bias report file specified, skipping this step\\n";\n+ }\n+ \n+ # NUCLEOTIDE COVERAGE REPORT (optional)\n+ if ($nuc_report){\n+ warn "Using the following nucleotide coverage report:\\t> $nuc_report <\\n";\n+ }\n+ else{\n+ warn "No nucleotide coverage report file specified, skipping this step\\n";\n+ } \n+ warn "="x110,"\\n\\n\\n";\n+ $verbose and sleep(3);\n+\n+ # creating timestamp\n+ $doc = getLoggingTime($doc);\n+\n+ $doc = read_alignment_report($alignment_report,$doc); # mandatory\n+\n+ if ($dedup_report){ # optional\n+ $doc = read_deduplication_report($dedup_report,$doc);\n+\n+ # removing the delete tags in the html template\n+ $doc =~ s/\\{\\{start_deletion_duplication\\}\\}//g;\n+ $doc =~ s/\\{\\{end_deletion_duplication\\}\\}//g;\n+ }\n+ else{\n+ # removing the entire graph and table section for the deduplication pa'..b't automatically ...\\n\\n";\n+ }\n+ elsif (scalar @mbias_report_files == 0){\n+\tpush @mbias_reports, \'\';\n+ }\n+ else{\n+\t# there is only a single M-bias report in the current directory, using this one\n+\t$mbias_report = shift @mbias_report_files;\n+\tpush @mbias_reports, $mbias_report;\n+ }\n+ }\n+ $dedup_report = $splitting_report = $mbias_report = $nucleotide_coverage_report = undef;\n+ }\n+\n+ return ($output_dir,$verbose,$manual_output_file);\n+\n+}\n+\n+sub print_helpfile{\n+ print <<EOF\n+\n+ SYNOPSIS:\n+\n+ This script uses a Bismark alignment report to generate a graphical HTML report page. Optionally, further reports of\n+ the Bismark suite such as deduplication, methylation extractor splitting or M-bias reports can be specified as well. If several\n+ Bismark reports are found in the same folder, a separate report will be generated for each of these, whereby the output filename\n+ will be derived from the Bismark alignment report file. Bismark2report attempts to find optional reports automatically based\n+ on the file basename.\n+\n+\n+ USAGE: bismark2report [options]\n+\n+\n+-o/--output <filename> Name of the output file (optional). If not specified explicitly, the output filename will be derived\n+ from the Bismark alignment report file. Specifying an output filename only works if the HTML report is\n+ to be generated for a single Bismark alignment report (and potentially additional reports).\n+\n+--dir Output directory. Output is written to the current directory if not specified explicitly.\n+\n+\n+--alignment_report FILE If not specified explicitly, bismark2report attempts to find Bismark report file(s) in the current\n+ directory and produces a separate HTML report for each mapping report file. Based on the basename of\n+ the Bismark mapping report, bismark2report will also attempt to find the other Bismark reports (see below)\n+ for inclusion into the HTML report. Specifying a Bismark alignment report file is mandatory.\n+\n+--dedup_report FILE If not specified explicitly, bismark2report attempts to find a deduplication report file with the same\n+ basename as the Bismark mapping report (generated by deduplicate_bismark) in the\n+ current working directory. Including a deduplication report is optional, and using the FILE \'none\'\n+ will skip this step entirely.\n+\n+--splitting_report FILE If not specified explicitly, bismark2report attempts to find a splitting report file with the same\n+ basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current\n+ working directory. Including a splitting report is optional, and using the FILE \'none\' will skip this\n+ step entirely.\n+\n+--mbias_report FILE If not specified explicitly, bismark2report attempts to find a single M-bias report file with the same\n+ basename as the Bismark mapping report (generated by the Bismark methylation extractor) in the current\n+ working directory. Including an M-Bias report is optional, and using the FILE \'none\' will skip this step\n+ entirely.\n+\n+--nucleotide_report FILE If not specified explicitly, bismark2report attempts to find a single nucleotide coverage report file\n+ with the same basename as the Bismark mapping report (generated by Bismark with the option\n+ \'--nucleotide_coverage\') in the current working directory. Including a nucleotide coverage statistics\n+ report is optional, and using the FILE \'none\' will skip this report entirely.\n+\n+ Script last modified: 13 May 2016\n+\n+EOF\n+ ;\n+ exit 1;\n+}\n+\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_pretty_report/bismark2report_wrapper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_pretty_report/bismark2report_wrapper.py Sat May 06 13:18:09 2017 -0400 |
[ |
@@ -0,0 +1,122 @@ +#!/usr/bin/python + +import argparse +import os +import shutil +import subprocess +import sys +import tempfile +import logging + +def cleanup_before_exit(tmp_dir): + if tmp_dir and os.path.exists(tmp_dir): + shutil.rmtree(tmp_dir) + +def get_arg(): + parser = argparse.ArgumentParser() + parser.add_argument('--tool_dir', dest='tool_dir', action='store', nargs=1, metavar='tool_dir', type=str) + parser.add_argument('--alignment_report', dest='alignment_report', action='store', nargs=1, metavar='alignment_report', type=str) + parser.add_argument('--dedup_report', dest='dedup_report', action='store', nargs=1, metavar='dedup_report', type=str) + parser.add_argument('--splitting_report', dest='splitting_report', action='store', nargs=1, metavar='splitting_report', type=str) + parser.add_argument('--mbias_report', dest='mbias_report', action='store', nargs=1, metavar='mbias_report', type=str) + parser.add_argument('--nucleotide_report', dest='nucleotide_report', action='store', nargs=1, metavar='nucleotide_report', type=str) + parser.add_argument('--output_html_report', dest='output_html_report', action='store', nargs=1, metavar='output_html_report', type=str) + parser.add_argument('--output_html_report_link', dest='output_html_report_link', action='store', nargs=1, metavar='output_html_report_link', type=str) + parser.add_argument('--log_report', dest='log_report', action='store', nargs=1, metavar='log_report', type=str) + parser.add_argument('--output_dir', dest='job_dir', action='store', nargs=1, metavar='job_dir', type=str) + args = parser.parse_args() + return args + +def create_and_write_html_link(job_dir, output_html_report_link, tmp_dir): + """ + Web browsers don't allow to open a link pointing to the absolute path of a local html file FROM a website page; + The only way to make such link functional is to integrate the local file inside the web structure of the site. + Galaxy has been designed such that the child_dir <dataset_[0-9]+_files> of the output_dir is considered as the root + of the html base tag (i.e <base href="/" /> for the current job running. + The function proceeds the following steps: + #1. Extracts the galaxy dir where the output files are stored + #2. Creating a child dir <dataset_[0-9]+_files> in this output_dir is needed because it is considered as the root of the html base tag + # We can extract the exact name of this child dir from the jobs_directory name + #3. Moves the html file in this child_dir + """ + output_path_list = output_html_report_link.split('/') + output_path = '/'.join(output_path_list[0:-1]) + html_root = job_dir.split('/')[-1] + final_dir = os.path.join(output_path, html_root) + os.makedirs(final_dir) + shutil.move(os.path.join(tmp_dir, 'html_report'), os.path.join(final_dir, 'html_report.html')) + + html_report = open(output_html_report_link, 'wb') + html_report.write('<!DOCTYPE html>\n') + html_report.write('<head>\n') + html_report.write('\t<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n') + html_report.write('\t\t<base href="/" />\n') + html_report.write('\t\t<a href="html_report.html/" target="_blank">Link to Bismark Pretty Report Page</a>\n') + html_report.write('</head>') + html_report.close() + +def __main__(): + args = get_arg() + + tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='') + + if args.log_report: + logging.basicConfig(level=logging.INFO, filename=args.log_report[0], filemode="a+", format='%(message)s') + else: + logging.basicConfig(level=logging.INFO, filename=os.path.join(tmp_dir, 'log_report.txt'), filemode="a+", format='%(message)s') + + alignment_option = '--alignment_report' + alignment_report = args.alignment_report[0] + if args.dedup_report: + dedup_option = '--dedup_report' + dedup_report = args.dedup_report[0] + else: + dedup_option = '' + dedup_report = '' + if args.splitting_report: + splitting_option = '--splitting_report' + splitting_report = args.splitting_report[0] + else: + splitting_option = '' + splitting_report = '' + if args.mbias_report: + mbias_option = '--mbias_report' + mbias_report = args.mbias_report[0] + else: + mbias_option = '' + mbias_report = '' + if args.nucleotide_report: + nucleotide_option = '--nucleotide_report' + nucleotide_report = args.nucleotide_report[0] + else: + nucleotide_option = '' + nucleotide_report = '' + + proc = subprocess.Popen(['perl', os.path.join(args.tool_dir[0], 'bismark2report'), alignment_option, alignment_report, dedup_option, dedup_report,\ + splitting_option, splitting_report, mbias_option, mbias_report, nucleotide_option, nucleotide_report,\ + '--dir', tmp_dir, '--output', 'html_report'],\ + stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + proc_out, proc_err = proc.communicate() + + cmd = 'perl %s %s %s %s %s %s %s %s %s %s %s --output html_report --dir %s'\ + % (os.path.join(args.tool_dir[0], 'bismark2report'), alignment_option, alignment_report, dedup_option, dedup_report,\ + splitting_option, splitting_report, mbias_option, mbias_report, nucleotide_option, nucleotide_report, tmp_dir) + + logging.info('COMMAND LINE:\n\n%s' % cmd) + logging.info("__________________________________________________________________\n") + logging.info("BISMARK PRETTY REPORT STDOUT:\n\n%s" % proc_out) + if proc_err: + logging.critical("__________________________________________________________________\n") + logging.critical("BISMARK PRETTY REPORT ERROR:\n\n%s" % proc_err) + sys.exit("Bismark pretty report crashed with the folowing error message:\n%s" % proc_err) + + if args.output_html_report: + shutil.copy(os.path.join(tmp_dir, 'html_report'), args.output_html_report[0]) + + #This function writes a link towards the Bismark html page inside an html file. + #This is needed because the direct visualization of the Bismark html report via Galaxy is ugly + create_and_write_html_link(args.job_dir[0], args.output_html_report_link[0], tmp_dir) + + cleanup_before_exit(tmp_dir) + +if __name__=="__main__": __main__() \ No newline at end of file |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_pretty_report/bismark2report_wrapper.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_pretty_report/bismark2report_wrapper.xml Sat May 06 13:18:09 2017 -0400 |
[ |
@@ -0,0 +1,75 @@ +<tool id="bismark_pretty_report" name="Bismark Pretty Report" version="0.16.3"> + + <description>Generates a graphical HTML report page from report outputs of Bismark</description> + <!--<version_command>bismark version</version_command>--> + + <requirements> + <requirement type="package" version="0.1.19">samtools</requirement> + <requirement type="package" version="2.1.0">bowtie2</requirement> + </requirements> + + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + + <command interpreter="python"> +<![CDATA[ + bismark2report_wrapper.py + + --tool_dir "$__tool_directory__" + + --alignment_report "$alignment" + + #if $additional_reports['dedup']: + --dedup_report "$additional_reports['dedup']" + #end if + #if $additional_reports['splitting']: + --splitting_report "$additional_reports['splitting']" + #end if + #if $additional_reports['mbias']: + --mbias_report "$additional_reports['mbias']" + #end if + #if $additional_reports['nucleotide']: + --nucleotide_report "$additional_reports['nucleotide']" + #end if + + ##--output_html_report $output_html_report + --output_html_report_link "$output_html_report_link" + --output_dir "$output_html_report_link.files_path" + + ##--log_report $log_report +]]> + </command> + + <inputs> + <param name="alignment" type="data" format="txt" label="Submit a Bismark mapping report" optional="False"/> + <section name="additional_reports" title="Additional reports to include in the HTML page (optional)" expanded="True"> + <param name="dedup" type="data" format="txt" label="Submit the corresponding Bismark deduplication report" optional="True" help="Optional output of the module *Bismark Deduplicate*"/> + <param name="splitting" type="data" format="txt" label="Submit the corresponding Bismark splitting report" optional="True" help="Optional output of the module *Bismark Methylation Extractor*"/> + <param name="mbias" type="data" format="txt" label="Submit the corresponding Bismark M-bias report" optional="True" help="Optional output of the module *Bismark Methylation Extractor*"/> + <param name="nucleotide" type="data" format="txt" label="Submit the corresponding Bismark nucleotide report" optional="True" help="Optional output of the module *Bismark Mapping*"/> + </section> + </inputs> + + <outputs> + <!--<data name="output_html_report" format="html" label="${tool.name} on ${on_string}: Download pretty html report"/>--> + <data name="output_html_report_link" format="html" label="${tool.name} on ${on_string}: Visualize pretty html report"/> + <!--<data name="log_report" format="txt" label="${tool.name} on ${on_string}: log report (tool stdout)"/>--> + </outputs> + + <help> +<![CDATA[ +**What it does** + + | This tool uses a Bismark alignment report to generate a graphical HTML report page. + | Optionally, further reports of the Bismark suite such as deduplication, methylation extractor splitting or M-bias reports can be specified as well. + +]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btr167</citation> + </citations> +</tool> |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_pretty_report/bismark_sitrep.tpl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bismark_pretty_report/bismark_sitrep.tpl Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,807 @@\n+<!DOCTYPE html>\r\n+<html>\r\n+<head>\r\n+\t<meta http-equiv="content-type" content="text/html; charset=UTF-8">\r\n+\t<title>Bismark Processing Report - {{filename}}</title>\r\n+\t<style type="text/css">\r\n+\t\tbody {\r\n+\t\t\tfont-family: Arial, sans-serif;\r\n+\t\t\tfont-size:14px;\r\n+\t\t\tpadding:0 20px 20px;\r\n+\t\t}\r\n+\t\t.container {\r\n+\t\t\tmargin:0 auto;\r\n+\t\t\tmax-width:1200px;\r\n+\t\t}\r\n+\t\t.header h1,\r\n+\t\t.header img {\r\n+\t\t\tfloat:left;\r\n+\t\t}\r\n+\t\t.header h1 {\r\n+\t\t\tmargin: 20px 0 10px;\r\n+\t\t}\r\n+\t\t.header img {\r\n+\t\t\tpadding: 0 20px 20px 0;\r\n+\t\t}\r\n+\t\t.subtitle {\r\n+\t\t\tmargin-top:120px;\r\n+\t\t\tfloat:right;\r\n+\t\t\ttext-align:right;\r\n+\r\n+\t\t}\r\n+\t\t.header_subtitle h3,\r\n+\t\t.header_subtitle p {\r\n+\t\t\tmargin:0;\r\n+\t\t}\r\n+\t\th1 {\r\n+\t\t\tfont-size: 3.2em;\r\n+\t\t}\r\n+\t\th2 {\r\n+\t\t\tfont-size:2.2em;\r\n+\t\t}\r\n+\t\th3 {\r\n+\t\t\tfont-size:1.4em;\r\n+\t\t}\r\n+\t\th2, h3, hr {\r\n+\t\t\tclear:both;\r\n+\t\t}\r\n+\t\thr {\r\n+\t\t\tborder-top:1px solid #CCC;\r\n+\t\t\tborder-bottom:1px solid #F3F3F3;\r\n+\t\t\tborder-left:0;\r\n+\t\t\tborder-right:0;\r\n+\t\t\theight:0;\r\n+\t\t}\r\n+\t\t.data {\r\n+\t\t\tfloat:left;\r\n+\t\t\twidth:500px;\r\n+\t\t\tmax-width:100%;\r\n+\t\t\tmargin-right:30px;\r\n+\t\t\tborder:1px solid #CCC;\r\n+\t\t\tborder-collapse:separate;\r\n+\t\t\tborder-spacing: 0;\r\n+\t\t\tborder-left:0;\r\n+\t\t\t-webkit-border-radius:4px;\r\n+\t\t\t-moz-border-radius:4px;\r\n+\t\t\tborder-radius:4px;\r\n+\t\t}\r\n+\t\t.data th, .data td {\r\n+\t\t\tborder-left:1px solid #CCC;\r\n+\t\t\tborder-top:1px solid #CCC;\r\n+\t\t\tpadding:5px 7px;\r\n+\t\t}\r\n+\t\t.data tr:first-child th,\r\n+\t\t.data tr:first-child td {\r\n+\t\t\tborder-top:0;\r\n+\t\t}\r\n+\t\t.data tr:last-child th,\r\n+\t\t.data tr:last-child td {\r\n+\t\t\tborder-bottom: 2px solid #666;\r\n+\t\t}\r\n+\t\t.plot {\r\n+\t\t\twidth:650px;\r\n+\t\t\tmax-width:100%;\r\n+\t\t\tfloat:left;\r\n+\t\t\tmargin-bottom:30px;\r\n+\t\t}\r\n+\t\t\r\n+\t\t.fullWidth_plot {\r\n+\t\t\theight: 600px;\r\n+\t\t}\r\n+\t\t\r\n+\t\t.data th {\r\n+\t\t\ttext-align:left;\r\n+\t\t}\r\n+\t\t.data td {\r\n+\t\t\ttext-align:right;\r\n+\t\t}\r\n+\t\tfooter {\r\n+\t\t\tcolor:#999;\r\n+\t\t}\r\n+\t\tfooter a {\r\n+\t\t\tcolor:#999;\r\n+\t\t}\r\n+\t</style>\r\n+</head>\r\n+<body>\r\n+\t<script>\r\n+\t\t/*! jQuery v1.10.2 | (c) 2005, 2013 jQuery Foundation, Inc. | jquery.org/license\r\n+\t\t//@ sourceMappingURL=jquery-1.10.2.min.map\r\n+\t\t*/\r\n+\t\t(function(e,t){var n,r,i=typeof t,o=e.location,a=e.document,s=a.documentElement,l=e.jQuery,u=e.$,c={},p=[],f="1.10.2",d=p.concat,h=p.push,g=p.slice,m=p.indexOf,y=c.toString,v=c.hasOwnProperty,b=f.trim,x=function(e,t){return new x.fn.init(e,t,r)},w=/[+-]?(?:\\d*\\.|)\\d+(?:[eE][+-]?\\d+|)/.source,T=/\\S+/g,C=/^[\\s\\uFEFF\\xA0]+|[\\s\\uFEFF\\xA0]+$/g,N=/^(?:\\s*(<[\\w\\W]+>)[^>]*|#([\\w-]*))$/,k=/^<(\\w+)\\s*\\/?>(?:<\\/\\1>|)$/,E=/^[\\],:{}\\s]*$/,S=/(?:^|:|,)(?:\\s*\\[)+/g,A=/\\\\(?:["\\\\\\/bfnrt]|u[\\da-fA-F]{4})/g,j=/"[^"\\\\\\r\\n]*"|true|false|null|-?(?:\\d+\\.|)\\d+(?:[eE][+-]?\\d+|)/g,D=/^-ms-/,L=/-([\\da-z])/gi,H=function(e,t){return t.toUpperCase()},q=function(e){(a.addEventListener||"load"===e.type||"complete"===a.readyState)&&(_(),x.ready())},_=function(){a.addEventListener?(a.removeEventListener("DOMContentLoaded",q,!1),e.removeEventListener("load",q,!1)):(a.detachEvent("onreadystatechange",q),e.detachEvent("onload",q))};x.fn=x.prototype={jquery:f,constructor:x,init:function(e,n,r){var i,o;if(!e)return this;if("string"==typeof e){if(i="<"===e.charAt(0)&&">"===e.charAt(e.length-1)&&e.length>=3?[null,e,null]:N.exec(e),!i||!i[1]&&n)return!n||n.jquery?(n||r).find(e):this.constructor(n).find(e);if(i[1]){if(n=n instanceof x?n[0]:n,x.merge(this,x.parseHTML(i[1],n&&n.nodeType?n.ownerDocument||n:a,!0)),k.test(i[1])&&x.isPlainObject(n))for(i in n)x.isFunction(this[i])?this[i](n[i]):this.attr(i,n[i]);return this}if(o=a.getElementById(i[2]),o&&o.parentNode){if(o.id!==i[2])return r.find(e);this.length=1,this[0]=o}return this.context=a,this.selector=e,this}return e.nodeType?(this.context=this[0]=e,this.length=1,this):x.isFunction(e)?r.ready(e):(e.selector!==t&&(this.selector=e.selector,this.context=e.context),x.makeArray(e,this))},selector:"",length:0,toArray:function(){return g.call(this)},get:function(e){return null==e?this.toArray():0>e?this[this.length+e]:this[e]},pushStack:function(e){var t=x.merge(this.constructor(),e);'..b'BQy6J9vORuXwCswtA11TEUWUVb7ejR1+8OtqGrGJXrE7xC/0rM3s69uZ87P+oT/1KD44vGWwHbG9mO4Jdi6tO8pkqWm7Gf+KTG+ZAf85P0DEYC8D2AH6G0SHpY76eFOYqf5xuTvS6PSpwGsZ55raBAnzZg+OnGDsCc81srif+/YDLDDVZef96Pbe8BHhA4ksG/xvr7DtwWcPnOuZi84BL/Yg+A9wMtgTsUGA+xvFyzON00L6lV5m8FLoasz3MbLbBjmbMBx4WegdwkLeZc2AXevUDb3wf4iSnDkK6kxInqj7Hbn/+T70EOQVjG2AOZnvhqpZcaHC8OWP9AIMvGCzB7PVgc4E5SKfgtsJWgiOXw5Ip1hx1MoXW9vLDg+q3SbgcvUsijOJCsP28I2L0atII3ScyY4cIZ24HjjV4t+AuzD5MeQGBqJVgZUqBONKrNbdSXhztOeBMc3sIThdcaeHOwwasDl8C9cLyi+0zQkfh9sunMQYkbvWXHGGwo5zK9FQkiL4ncIZhfxM6wxM+wCsG/yUXLDscVwPr5lgILu29Y9+vob0/hSuXuTTy/QXAO83YBHGuHNDD9ldcRfPTwbbBlRXFq5gnGAzGvL/P4or0XYY7JuBX/tpbvXQBWQ9wW60D6yjbk2ZhNOYTuO2zHwFFkzj/hHGqxLnAUqed6BD/6CtBD/nHZamV/BlGz084k8F5C4arpBhtByG+gtsn1A38BLNvo7DC/Ogsp9EAxNfB4sfRMI2vWHOf58bPS/GAXRXnpPvbcf5PPy87yMzdsAq4QQ4oB+BK8gznuAgsXKxKeK7wKlOVgy2KG2ViZ1roaG9JXVcCRxgCtZzTubWnxBFm3BzpS9oT/g/qGLd/hFhVcmPAnWFBHuzSWCULgOW+54X4ySPhRJfVTVBR/WyK5ff7oJkCsGSZTVNf657p7Zxu4NqyKLz79z45Oy10z4enmOyCCEAFD5yQZiJjL0XPGweHzQHO8UUEl3kN4Cps/SRfjiWSfqM3OhN+6bbGcdPfgC7wIni4NhFjOx8j+2u1c85UqgC4WyPu9EjmRFP0rxqhSukJLCEXQETwRJxwPGd43P+2nS9FlIupeylqe8ZKETCLlAhy2x4CUGdEYtVVi6080pHwhvgWuCqSyB+6XDuCMnxEx7OSnZzjwB40Z8z7+a0eIDK4AeODEsd7dfpbwO+LcxLR0+Wj50P1ouelti3wLsQRoBcwTgW7CWnV+nSDjRYgBecNcGXtPf9oxjgJ9A2J73sX8I3FQntWHnnz4nqyUJd/3poai/V3T4ybNyYlbVTVmuLZVipJgfAo4tU1HtlXBLuTPmtjjx2tV3Q0926Pi10cJpiCaI5IRGuIvVgdh66blLAY4MqyJIMauS+C20yc4WwBjgSOlPPEXeodMBWcyUXPq9bgbQZmO1vJ5mK2FOk4hqnQ/loDJGzxEQwivgPMNjhbZh9Dug3IRk8ojp2EEDixaxZNErLyCj45L64T0WVQrRD3mGNTFapX4HK9VFDVQ0aL57MFI8Fm3XPcVeXswHKiq5Lpp7PA/hvHcG4wuFrGY4i9gU8YdRJDqmTT1BlGuO6Fsn1nfnuA+ap+5d2zK53do2Odh1F7OKeJ5nqVqAyElkxWYxZTnZaiLlxw+LKiWrkB21gAohq//skdFKndMDYBXql2eq2f3LVAr6FJYK3lKlYxAa3Fo2oNDdYSsdiqqzY91BlU0Zhca06Xbrfq3DXwdNIrMUBFeuAYcDo8sYaDOVTim96//yawOyKKz4QKDlImJhvZ61LW/u4797pK4aJyX0x5bt3zYF8y+BamTyHOFpyDuA63bRoLAoKBPppeXMbgtjvCUNThaWsdbSn7Wub3BOvlKeVzPKFYKEaRZNrqg3olLBgsNBPzUobI8QDSVI+XJxoL2Ywuxb7GXdkwqCnZjFKfSgqQVDzq4QWwDc7RSvuQo0xH7/F67CU+LhNVE1MNr94w2yuccW2P+atmYUxpZLpVri32AJ+Q6U4gYbBnqSicIDNEx103u4IMKvMI9lXxjG7UAMlbRe0jAuAEPxkPhAG8Yi2mGDEaVkDc6jG0X4337OF13HvCWkvV9V3zx5+FhZIVLdwzAh5ZUab5Vq837W+RP0d+3M/373bXBTWETw1DUPFsY6tlXLshTvFK2UtuKhS9p+x83khJoXAkKUQuUjPZja1aArHjDI/j0jRmIN5c5eJOwQdxKfFVrMMig3nR+7KSxdk0Yelmmp99nOanHm3kLJCNEiChLGhzrkO1+Z+3pRTwK2B8pRHEm8ucXS14J2LnGOUcjHMZPoC487VOAvIEdr2PVyzClQOKtvneKH7Ree9e+2aOrS/19L4oiiIZO4J9MLQZYreu856kramsV1Wv9Xv9H1wGwMyofSJxsTfAT/MS/2Bc4mp0YrfBeJNnjPeX16cTymZpXvJ0/XMJX6M2mrI/KT+UHwn1ekM7hYsaJ3D5P58AuyGyESH6rmSMjy7GVRO5Uu78im/iUjTm444IXgt8RKb+0tEAiFL9LNVwnSZqMO4mqtfeSvnxxVelB+wsTL+SuBZXu+thT1hn4dJuThR6idLRGPIel6ZhGFMt9Sfso+qsWSIy/O/i4kknAx0ST4K2MdhPKBt9V2QbTreka4ATQb8F+7V/2TJfdcR8H5Lxfnimtr/cQTx3GLpazmA+yBng/NmMS/0cfAWXHfELwYMGXYjjcKlE/1PlWAYKzU20PPYAPfu+hUJL22jqX/3DANLniWNTwiPNSueXPwLcZdgPgYdU6RR6BRcdr3YW3nc9sM71Xo3Ae6/uBT6BVZxVmMNFlUtertJ3z+MKhK2luk3xqNeD4yb3c8DjYP1RNPmL7kAcgsvv+ZCfN/P2yfup2HVIzs9Hdw0NfZ3B3+QCW9W8A0/4eaomgbs9U1kV0S5/J/E+4FO4zIQjgaXIPuLPiLyEaFKge1fO4MNycZa3+IDoAO7sckyY3Bwn8OlB4UYzQcaMMxEv4rInzvZPXgNcYW4NV0gKcDll5xi8VfA2P8wlwMcxvlZ1iyOgXHbM1erWi8k3omTFnU9pRZquEjCiUz4ELDW/8V3xKsdiUy9llhmsrOE5mutzjCY7o5fbPUcrc3/iDgLdxi/aM7jKiKGeOw2YJLHYqxHRiU4izQHyLlcMi+j6s5yqaIuJZOdGTtQAl57xJj+OFYbdHSZNlr+GpMQcT+DPUtzVWrygE3dW4wrB8irSZRsPwmdCkEQ0pymYbWGwVGK1leqHy2CRjIUG6yTuMuxBORV4O4MX3SGgcfNcHcBcqRjDWQz0mDt5ao6nkadCFS2aISHRgrE/YlcgZ8bDkt1jaF2sKN+WuJSdmZ7JPgDc7+ouVKG/Qp5CazvdHzjf52LlyxMSQwKLaiixvfWl32P+6xEmK8o2ApSOt/G2sbZgfArG23gbB8h4G2/jABlv420cIONtvI0DZLyNt3GAjLfxttG3/x8AscyrBFrkMAcAAAAASUVORK5CYII=" /></a>\r\n+\t\t<p>Analysis produced by <a href="http://www.bioinformatics.babraham.ac.uk/projects/bismark/"><strong>Bismark</strong></a> (version {{bismark_version}}) - a tool to map bisulfite converted sequence reads and determine cytosine methylation states</p>\r\n+\t\t<p>Report graphs rendered using <a href="http://jquery.com/">jQuery</a> and <a href="http://www.highcharts.com/">Highcharts</a>. Page design by <a href="http://phil.ewels.co.uk/">Phil Ewels.</a></p>\r\n+\t</footer>\r\n+\r\n+</div>\r\n+</body>\r\n+</html>\r\n+\r\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 bismark_wrapper.py --- a/bismark_wrapper.py Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,373 +0,0 @@\n-#!/usr/bin/env python\n-\n-import argparse\n-import os\n-import shutil\n-import subprocess\n-import sys\n-import shlex\n-import tempfile\n-import fileinput\n-import fileinput\n-from glob import glob\n-\n-def stop_err( msg ):\n- sys.stderr.write( "%s\\n" % msg )\n- sys.exit()\n-\n-def __main__():\n-\n- #Parse Command Line\n- parser = argparse.ArgumentParser(description=\'Wrapper for the bismark bisulfite mapper.\')\n- parser.add_argument( \'-p\', \'--num-threads\', dest=\'num_threads\',\n- type=int, default=4, help=\'Use this many threads to align reads. The default is 4.\' )\n-\n- parser.add_argument( \'--bismark_path\', dest=\'bismark_path\', help=\'Path to the bismark perl scripts\' )\n-\n- parser.add_argument( \'--bowtie2\', action=\'store_true\', default=False, help=\'Running bismark with bowtie2 and not with bowtie.\' )\n-\n- # input options\n- parser.add_argument( \'--own-file\', dest=\'own_file\', help=\'\' )\n- parser.add_argument( \'-D\', \'--indexes-path\', dest=\'index_path\', help=\'Indexes directory; location of .ebwt and .fa files.\' )\n- parser.add_argument( \'-O\', \'--output\', dest=\'output\' )\n-\n-\n- parser.add_argument( \'--output-report-file\', dest=\'output_report_file\' )\n- parser.add_argument( \'--suppress-header\', dest=\'suppress_header\', action="store_true" )\n-\n- parser.add_argument( \'--mate-paired\', dest=\'mate_paired\', action=\'store_true\', help=\'Reads are mate-paired\', default=False)\n-\n-\n- parser.add_argument( \'-1\', \'--mate1\', dest=\'mate1\',\n- help=\'The forward reads file in Sanger FASTQ or FASTA format.\' )\n- parser.add_argument( \'-2\', \'--mate2\', dest=\'mate2\',\n- help=\'The reverse reads file in Sanger FASTQ or FASTA format.\' )\n- parser.add_argument( \'--sort-bam\', dest=\'sort_bam\', action="store_true" )\n-\n- parser.add_argument( \'--output-unmapped-reads\', dest=\'output_unmapped_reads\',\n- help=\'Additional output file with unmapped reads (single-end).\' )\n- parser.add_argument( \'--output-unmapped-reads-l\', dest=\'output_unmapped_reads_l\',\n- help=\'File name for unmapped reads (left, paired-end).\' )\n- parser.add_argument( \'--output-unmapped-reads-r\', dest=\'output_unmapped_reads_r\',\n- help=\'File name for unmapped reads (right, paired-end).\' )\n-\n-\n- parser.add_argument( \'--output-suppressed-reads\', dest=\'output_suppressed_reads\',\n- help=\'Additional output file with suppressed reads (single-end).\' )\n- parser.add_argument( \'--output-suppressed-reads-l\', dest=\'output_suppressed_reads_l\',\n- help=\'File name for suppressed reads (left, paired-end).\' )\n- parser.add_argument( \'--output-suppressed-reads-r\', dest=\'output_suppressed_reads_r\',\n- help=\'File name for suppressed reads (right, paired-end).\' )\n- parser.add_argument( \'--stdout\', dest=\'output_stdout\',\n- help=\'File name for the standard output of bismark.\' )\n-\n-\n- parser.add_argument( \'--single-paired\', dest=\'single_paired\',\n- help=\'The single-end reads file in Sanger FASTQ or FASTA format.\' )\n-\n- parser.add_argument( \'--fastq\', action=\'store_true\', help=\'Query filetype is in FASTQ format\')\n- parser.add_argument( \'--fasta\', action=\'store_true\', help=\'Query filetype is in FASTA format\')\n- parser.add_argument( \'--phred64-quals\', dest=\'phred64\', action="store_true" )\n-\n-\n- parser.add_argument( \'--skip-reads\', dest=\'skip_reads\', type=int )\n- parser.add_argument( \'--qupto\', type=int)\n-\n-\n- # paired end options\n- parser.add_argument( \'-I\', \'--minins\', dest=\'min_insert\' )\n- parser.add_argument( \'-X\', \'--maxins\', dest=\'max_insert\' )\n- parser.add_argument( \'--no-mixed\', dest=\'no_mixed\', action="store_true" )\n- parser.add_argument( \'--no-discordant\', dest=\'no_discordant\', action="store_true" )\n-\n- #parse general options\n- # default 20\n- parser.add_argument( \'--seed-len\', dest=\'seed_len\', type=int)\n- # default 15\n- parser.add_argument( \'--seed-extention-attempts\', dest=\'seed_extention_attempts\', type=int )\n- # default 0\n- parser.add_argu'..b'e\n- tmp_stderr = open( tmp_err, \'rb\' )\n- stderr = \'\'\n- buffsize = 1048576\n- try:\n- while True:\n- stderr += tmp_stderr.read( buffsize )\n- if not stderr or len( stderr ) % buffsize != 0:\n- break\n- except OverflowError:\n- pass\n-\n- raise Exception, stderr\n- tmp_stdout.close()\n- tmp_stderr.close()\n-\n- # TODO: look for errors in program output.\n- except Exception, e:\n- stop_err( \'Error in bismark:\\n\' + str( e ) )\n-\n- # collect and copy output files\n- if args.output_report_file:\n- output_report_file = open(args.output_report_file, \'w+\')\n- for line in fileinput.input(glob( os.path.join( output_dir, \'*report.txt\') )):\n- output_report_file.write(line)\n- output_report_file.close()\n-\n-\n- if args.output_suppressed_reads:\n- shutil.move( glob(os.path.join( output_dir, \'*ambiguous_reads.txt\'))[0], args.output_suppressed_reads )\n- if args.output_suppressed_reads_l:\n- shutil.move( glob(os.path.join( output_dir, \'*ambiguous_reads_1.txt\'))[0], args.output_suppressed_reads_l )\n- if args.output_suppressed_reads_r:\n- shutil.move( glob(os.path.join( output_dir, \'*ambiguous_reads_2.txt\'))[0], args.output_suppressed_reads_r )\n-\n- if args.output_unmapped_reads:\n- shutil.move( glob(os.path.join( output_dir, \'*unmapped_reads.txt\'))[0], args.output_unmapped_reads )\n- if args.output_unmapped_reads_l:\n- shutil.move( glob(os.path.join( output_dir, \'*unmapped_reads_1.txt\'))[0], args.output_unmapped_reads_l )\n- if args.output_unmapped_reads_r:\n- shutil.move( glob(os.path.join( output_dir, \'*unmapped_reads_2.txt\'))[0], args.output_unmapped_reads_r )\n-\n- try:\n- """\n- merge all bam files\n- """\n- #tmp_out = tempfile.NamedTemporaryFile( dir=output_dir ).name\n- tmp_stdout = open( tmp_out, \'wab\' )\n- #tmp_err = tempfile.NamedTemporaryFile( dir=output_dir ).name\n- tmp_stderr = open( tmp_err, \'wab\' )\n-\n- tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name\n-\n- bam_files = glob( os.path.join( output_dir, \'*.bam\') )\n- if len( bam_files ) > 1:\n- cmd = \'samtools merge -@ %s -f %s %s \' % ( args.num_threads, tmp_res, \' \'.join( bam_files ) )\n-\n- proc = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE )\n-\n- returncode = proc.wait()\n- tmp_stdout.close()\n- tmp_stderr.close()\n- if returncode != 0:\n- raise Exception, open( tmp_stderr.name ).read()\n- else:\n- tmp_res = bam_files[0]\n-\n- bam_path = "%s" % tmp_res\n-\n- if os.path.exists( bam_path ):\n- if args.sort_bam:\n- cmd = \'samtools sort -@ %s %s sorted_bam\' % (args.num_threads, bam_path)\n- proc = subprocess.Popen( args=shlex.split( cmd ) )\n- returncode = proc.wait()\n- if returncode != 0:\n- raise Exception("Error during \'%s\'" % cmd)\n- shutil.move( \'sorted_bam.bam\', args.output )\n- else:\n- shutil.move( bam_path, args.output )\n- else:\n- stop_err( \'BAM file no found:\\n\' + str( bam_path ) )\n-\n-\n- # TODO: look for errors in program output.\n- except Exception, e:\n- stop_err( \'Error in merging bam files:\\n\' + str( e ) )\n-\n-\n- if args.output_stdout:\n- # copy the temporary saved stdout from bismark\n- shutil.move( tmp_out, args.output_stdout )\n-\n- # Clean up temp dirs\n- if args.own_file:\n- if os.path.exists( tmp_index_dir ):\n- shutil.rmtree( tmp_index_dir )\n- if os.path.exists( tmp_bismark_dir ):\n- shutil.rmtree( tmp_bismark_dir )\n- if os.path.exists( output_dir ):\n- shutil.rmtree( output_dir )\n-\n-if __name__=="__main__": __main__()\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 documentation/Bismark_User_Guide.pdf |
b |
Binary file documentation/Bismark_User_Guide.pdf has changed |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 documentation/readme.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/documentation/readme.rst Sat May 06 13:18:09 2017 -0400 |
b |
@@ -0,0 +1,53 @@ +=============== +Bismark Wrapper +=============== + +Bismark_ uses Bowtie or Bowtie2 to map bisulfite converted sequence reads to a reference genome and determine cytosine methylation states. + +Publication: http://www.ncbi.nlm.nih.gov/pubmed/21493656 + +User Guide: http://www.bioinformatics.babraham.ac.uk/projects/bismark/Bismark_User_Guide_v0.7.12.pdf + +.. _bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ + +Preparation +=========== + +Create your reference index with *bismark_genome_preparation* in your normal Galaxy Bowtie2/Botwie index directory. It will create a Bisulfite_Genome folder directly in your Bowtie2/Bowtie index directory. +If you follow that approach you do not need to specify or modify an extra .loc file. +That wrapper will extract the path to the Bisulfite_Genome folder from ./tool-data/bowtie2_indices.loc or ./tool-data/bowtie_indices.loc. + +======= +History +======= + +- v0.7: Initial public release +- v0.7.8: update and add Tool Shed Integration +- v0.7.11.1 change default output to BAM, from now on samtools are required +- v0.7.11.2 added multi-threading to samtools (samtools > 0.1.19 is required) +- v0.7.12 upgrade to bismark 0.7.12 and fix a major slowdown +- v0.7.12.1 define a dependency to samtools 0.1.19 + + +=============================== +Wrapper Licence (MIT/BSD style) +=============================== + +Permission to use, copy, modify, and distribute this software and its +documentation with or without modifications and for any purpose and +without fee is hereby granted, provided that any copyright notices +appear in all copies and that both those copyright notices and this +permission notice appear in supporting documentation, and that the +names of the contributors or copyright holders not be used in +advertising or publicity pertaining to distribution of the software +without specific prior permission. + +THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL +WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT +OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE +OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE +OR PERFORMANCE OF THIS SOFTWARE. + |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sat May 06 13:18:09 2017 -0400 |
b |
@@ -0,0 +1,65 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">0.14</token> + <xml name="requirements_stdio"> + <requirements> + <requirement type="package" version="0.1.19">samtools</requirement> + <yield/> + </requirements> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + <version_command>bismark --version</version_command> + </xml> + + <xml name="single_paired_conditional"> + <!-- Input Parameters --> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="input_singles" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="FASTQ/FASTA file" help="FASTQ or FASTA files." /> + </when> + <when value="paired"> + <repeat name="mate_list" title="Paired End Pairs" min="1"> + <param name="input_mate1" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Mate pair 1" help="FASTQ or FASTA files." /> + <param name="input_mate2" type="data" format="fastqsanger,fastqillumina,fastq,fasta" label="Mate pair 2" help="FASTQ or FASTA files." /> + </repeat> + <param name="minInsert" type="integer" value="0" label="Minimum insert size for valid paired-end alignments" /> + <param name="maxInsert" type="integer" value="500" label="Maximum insert size for valid paired-end alignments" /> + </when> + </conditional> + </xml> + + <xml name="advanced_params"> + <param name="seed_len" type="integer" value="28" label="Seed length" + help="The number of bases of the high quality end of the read to which the maximum number of mismatches applies. (--seed-len)" /> + <param name="qupto" type="integer" value="0" min="0" label="Only aligns the first N reads or read pairs from the input" + help="Default is 0 and means 'no-limit'. (--qupto)"/> + <param name="skip_reads" type="integer" value="0" label="Skip (i.e. do not align) the first N reads or read pairs from the input" + help="(--skip-reads)" /> + + <param name="suppressed_read_file" type="boolean" truevalue="true" falsevalue="false" checked="false" + label="Write ambiguous reads to an extra output file" + help="Write all reads which produce more than one valid alignment with the same number of lowest + mismatches or other reads that fail to align uniquely. (--output-suppressed-reads-r/l)" /> + <param name="unmapped_read_file" type="boolean" truevalue="true" falsevalue="false" checked="false" + label="Write all reads that could not be aligned to a file. (--output-unmapped-reads-r/l)" /> + + <!-- output Options --> + <param name="bismark_stdout" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write the bismark output and summary information to an extra file" /> + <param name="isReportOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Offer all report files concatenated in one file" /> + <!--end output options --> + + </xml> + <xml name="citation"> + <citations> + <citation type="doi">10.1093/bioinformatics/btr167</citation> + </citations> + </xml> +</macros> |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 new/bismark --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/new/bismark Sat May 06 13:18:09 2017 -0400 |
b |
b'@@ -0,0 +1,9636 @@\n+#!/usr/bin/perl --\n+use strict;\n+use warnings;\n+use IO::Handle;\n+use Cwd;\n+$|++;\n+use Getopt::Long;\n+\n+\n+## This program is Copyright (C) 2010-15, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+\n+my $parent_dir = getcwd;\n+my $bismark_version = \'v0.14.3\';\n+my $command_line = join (" ",@ARGV);\n+\n+\n+### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the \'.\' in the option name will cause Getopt::Long to fail\n+foreach my $arg (@ARGV){\n+ if ($arg eq \'--solexa1.3-quals\'){\n+ $arg = \'--phred64-quals\';\n+ }\n+}\n+my @filenames; # will be populated by processing the command line\n+\n+my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag,$basename,$score_min_intercept,$score_min_slope,$bt2_large_index,$multicore) = process_command_line();\n+\n+my @fhs; # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment\n+my %chromosomes; # stores the chromosome sequences of the mouse genome\n+my %SQ_order; # stores the order of sequences in the reference. This is to produce SAM/BAM files with a known order of chromosomes\n+my %counting; # counting various events\n+my @pids; # storing the process IDs of child processes in parallel mode\n+\n+\n+my $seqID_contains_tabs;\n+my $verbose = 0;\n+\n+if ($multicore > 1){\n+ warn "Running Bismark Parallel version. Number of parallel instances to be spawned: $multicore\\n\\n";\n+}\n+\n+\n+sub multi_process_handling{\n+\n+ my $offset = 1;\n+ my $process_id;\n+ if ($multicore > 1){\n+\n+ until ($offset == $multicore){\n+ # warn "multicore: $multicore\\noffset: $offset\\n";\n+ my $fork = fork;\n+\n+ if (defined $fork){\n+\tif ($fork != 0){\n+\t $process_id = $fork;\n+\t push @pids, $process_id;\n+\t if ($offset < $multicore){\n+\t ++$offset;\n+\t # warn "I am the parent process, child pid: $fork\\nIncrementing offset counter to: $offset\\n\\n";\n+\t }\n+\t else{\n+\t # warn "Reached the number of maximum multicores. Proceeeding to processing...\\n";\n+\t }\n+\t}\n+\telsif ($fork == 0){\n+\t # warn "I am a child process, pid: $fork\\nOffset counter is: $offset\\nProceeding to processing...\\n";\n+\t $process_id = $fork;\n+\t last;\n+\t}\n+ }\n+ else{\n+\tdie "Forking unsuccessful. Proceeding using a single thread only\\n";\n+ }\n+ }\n+\n+ # warn "\\nThe Thread Identity\\n===================\\n";\n+ if ($process_id){\n+ # print "I am the parent process. My children are called:\\n";\n+ # print join ("\\t",@pids),"\\n";\n+ # print "I am going to process the following line count: $offset\\n\\n";\n+ }\n+ elsif($process_id == 0){\n+ # warn "I am a child process: Process ID: $process_id\\n";\n+ # warn "I am going to process the following line count: $offset\\n\\n";\n+ }\n+ else{\n+ die "Process ID was: \'$process_id\'\\n";\n+ }\n+ }\n+ else{\n+ warn "Single-core mode: setting pid to 1\\n";\n+ $process_id = 1;\n+ }\n+\n+ return ($process_id,$offset);\n+}\n+\n+\n+sub subset_input_file_FastQ{\n+\n+ my ($filename,$process_id,$offse'..b" function of read length. For instance, specifying\n+ L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.\n+ See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is\n+ L,0,-0.2.\n+\n+--rdg <int1>,<int2> Sets the read gap open (<int1>) and extend (<int2>) penalties. A read gap of length N gets a penalty\n+ of <int1> + N * <int2>. Default: 5, 3.\n+\n+--rfg <int1>,<int2> Sets the reference gap open (<int1>) and extend (<int2>) penalties. A reference gap of length N gets\n+ a penalty of <int1> + N * <int2>. Default: 5, 3.\n+\n+\n+Bowtie 2 Reporting options:\n+\n+-most_valid_alignments <int> This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is\n+ deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the\n+ default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the\n+ effort expended to find valid alignments.\n+\n+ For reference, this used to be the old (now deprecated) description of -M:\n+ Bowtie 2 searches for at most <int>+1 distinct, valid alignments for each read. The search terminates when it\n+ can't find more distinct valid alignments, or when it finds <int>+1 distinct alignments, whichever\n+ happens first. Only the best alignment is reported. Information from the other alignments is used to\n+ estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes \n+ Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that\n+ aligns many places. For reads that have more than <int>+1 distinct, valid alignments, Bowtie 2 does not\n+ guarantee that the alignment reported is the best possible in terms of alignment score. -M is\n+ always used and its default value is set to 10.\n+\n+\n+'VANILLA' Bismark OUTPUT:\n+\n+Single-end output format (tab-separated):\n+\n+ (1) <seq-ID>\n+ (2) <read alignment strand>\n+ (3) <chromosome>\n+ (4) <start position>\n+ (5) <end position>\n+ (6) <observed bisulfite sequence>\n+ (7) <equivalent genomic sequence>\n+ (8) <methylation call>\n+ (9) <read conversion\n+(10) <genome conversion>\n+(11) <read quality score (Phred33)>\n+\n+\n+Paired-end output format (tab-separated):\n+ (1) <seq-ID>\n+ (2) <read 1 alignment strand>\n+ (3) <chromosome>\n+ (4) <start position>\n+ (5) <end position>\n+ (6) <observed bisulfite sequence 1>\n+ (7) <equivalent genomic sequence 1>\n+ (8) <methylation call 1>\n+ (9) <observed bisulfite sequence 2>\n+(10) <equivalent genomic sequence 2>\n+(11) <methylation call 2>\n+(12) <read 1 conversion\n+(13) <genome conversion>\n+(14) <read 1 quality score (Phred33)>\n+(15) <read 2 quality score (Phred33)>\n+\n+\n+Bismark SAM OUTPUT (default):\n+\n+ (1) QNAME (seq-ID)\n+ (2) FLAG (this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!))\n+ (3) RNAME (chromosome)\n+ (4) POS (start position)\n+ (5) MAPQ (always 255 for use with Bowtie)\n+ (6) CIGAR\n+ (7) RNEXT\n+ (8) PNEXT\n+ (9) TLEN\n+(10) SEQ\n+(11) QUAL (Phred33 scale)\n+(12) NM-tag (edit distance to the reference)\n+(13) MD-tag (base-by-base mismatches to the reference (handles indels)\n+(14) XM-tag (methylation call string)\n+(15) XR-tag (read conversion state for the alignment)\n+(16) XG-tag (genome conversion state for the alignment)\n+(17) XA/XB-tag (non-bisulfite mismatches) (optional!)\n+\n+Each read of paired-end alignments is written out in a separate line in the above format.\n+\n+\n+Last edited on 06 May 2015.\n+\n+HOW_TO\n+}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 new/bismark_genome_preparation --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/new/bismark_genome_preparation Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,468 @@\n+#!/usr/bin/perl --\n+use strict;\n+use warnings;\n+use Cwd;\n+# use File::Path qw(rmtree);\n+$|++;\n+\n+\n+## This program is Copyright (C) 2010-15, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+use Getopt::Long;\n+use Cwd;\n+\n+my $verbose;\n+my $help;\n+my $version;\n+my $man;\n+my $path_to_bowtie;\n+my $multi_fasta;\n+my $single_fasta;\n+my $bowtie2;\n+\n+my $bismark_version = \'v0.14.3\';\n+\n+GetOptions (\'verbose\' => \\$verbose,\n+\t \'help\' => \\$help,\n+\t \'man\' => \\$man,\n+\t \'version\' => \\$version,\n+\t \'path_to_bowtie:s\' => \\$path_to_bowtie,\n+\t \'single_fasta\' => \\$single_fasta,\n+\t \'bowtie2\' => \\$bowtie2,\n+\t );\n+\n+if ($help or $man){\n+ print_helpfile();\n+ exit;\n+}\n+\n+if ($version){\n+ print << "VERSION";\n+\n+ Bismark - Bisulfite Mapper and Methylation Caller.\n+\n+ Bismark Genome Preparation Version: $bismark_version\n+ Copyright 2010-15 Felix Krueger, Babraham Bioinformatics\n+ www.bioinformatics.babraham.ac.uk/projects/\n+\n+VERSION\n+ exit;\n+}\n+\n+my $genome_folder = shift @ARGV; # mandatory\n+my %chromosomes; # checking if chromosome names are unique (required)\n+\n+# Ensuring a genome folder has been specified\n+if ($genome_folder){\n+ unless ($genome_folder =~ /\\/$/){\n+ $genome_folder =~ s/$/\\//;\n+ }\n+ $verbose and print "Path to genome folder specified as: $genome_folder\\n";\n+ chdir $genome_folder or die "Could\'t move to directory $genome_folder. Make sure the directory exists! $!";\n+\n+ # making the genome folder path abolsolute so it won\'t break if the path was specified relative\n+ $genome_folder = getcwd;\n+ unless ($genome_folder =~ /\\/$/){\n+ $genome_folder =~ s/$/\\//;\n+ }\n+}\n+else{\n+ die "Please specify a genome folder to be used for bisulfite conversion\\n\\n";\n+}\n+\n+\n+my $CT_dir;\n+my $GA_dir;\n+\n+\n+if ($single_fasta){\n+ print "Writing individual genomes out into single-entry fasta files (one per chromosome)\\n\\n";\n+ $multi_fasta = 0;\n+}\n+else{\n+ print "Writing bisulfite genomes out into a single MFA (multi FastA) file\\n\\n";\n+ $single_fasta = 0;\n+ $multi_fasta = 1;\n+}\n+\n+my @filenames = create_bisulfite_genome_folders();\n+\n+process_sequence_files ();\n+\n+launch_bowtie_indexer();\n+\n+sub launch_bowtie_indexer{\n+ if ($bowtie2){\n+ print "Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer\\n";\n+ }\n+ else{\n+ print "Bismark Genome Preparation - Step III: Launching the Bowtie (1) indexer\\n";\n+ }\n+ print "Please be aware that this process can - depending on genome size - take up to several hours!\\n";\n+ sleep(5);\n+\n+ ### if the path to bowtie was specfified explicitely\n+ if ($path_to_bowtie){\n+ if ($bowtie2){\n+ $path_to_bowtie =~ s/$/bowtie2-build/;\n+ }\n+ else{\n+ $path_to_bowtie =~ s/$/bowtie-build/;\n+ }\n+ }\n+ ### otherwise we assume that bowtie-build is in the path\n+ else{\n+ if ($bowtie2){\n+ $path_to_bowtie = \'bowtie2-build\';\n+ }\n+ else{\n+ $path_to_bowtie = \'bowtie-build\';\n+ }\n+ }\n+\n+ $verbose and print "\\n";\n+\n+ ### Forking the program to run 2 instances of Bowtie-build or Bowtie2-build (= the Bowtie (1/2) indexer)\n+ my $pid = fork();\n+\n+ # parent process\n+ if ($pid){\n+ sleep(1);\n+ chdir $CT_dir or die "Unable to change directory: $!\\n";\n+ $verbose and warn "Preparing indexing of CT converted genome in $CT_dir\\n";\n+ my @fasta_'..b'ite_dir $!\\n";\n+ $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\\n";\n+ }\n+ else{\n+ print "\\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indices will be overwritten!\\n\\n";\n+ sleep(5);\n+ }\n+\n+ chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\\n";\n+ $CT_dir = "${bisulfite_dir}CT_conversion/";\n+ $GA_dir = "${bisulfite_dir}GA_conversion/";\n+\n+ # creating 2 subdirectories to store a C->T (forward strand conversion) and a G->A (reverse strand conversion)\n+ # converted version of the genome\n+ unless (-d $CT_dir){\n+ mkdir $CT_dir or die "Unable to create directory $CT_dir $!\\n";\n+ $verbose and print "Created Bisulfite Genome folder $CT_dir\\n";\n+ }\n+ unless (-d $GA_dir){\n+ mkdir $GA_dir or die "Unable to create directory $GA_dir $!\\n";\n+ $verbose and print "Created Bisulfite Genome folder $GA_dir\\n";\n+ }\n+\n+ # moving back to the original genome folder\n+ chdir $genome_folder or die "Could\'t move to directory $genome_folder $!";\n+ # $verbose and print "Moved back to genome folder folder $genome_folder\\n";\n+ warn "\\nStep I - Prepare genome folders - completed\\n\\n\\n";\n+ return @filenames;\n+}\n+\n+sub print_helpfile{\n+ print << \'HOW_TO\';\n+\n+\n+DESCRIPTION\n+\n+This script is supposed to convert a specified reference genome into two different bisulfite\n+converted versions and index them for alignments with Bowtie 1 (default), or Bowtie 2. The first\n+bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs\n+converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference\n+genome folder. Once the bisulfite conversion has been completed the program will fork and launch\n+two simultaneous instances of the Bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware\n+that the indexing process can take up to several hours; this will mainly depend on genome size\n+and system resources.\n+\n+\n+\n+The following is a brief description of command line options and arguments to control the\n+Bismark Genome Preparation:\n+\n+\n+USAGE: bismark_genome_preparation [options] <arguments>\n+\n+\n+OPTIONS:\n+\n+--help/--man Displays this help filea and exits.\n+\n+--version Displays version information and exits.\n+\n+--verbose Print verbose output for more details or debugging.\n+\n+--path_to_bowtie </../> The full path to the Bowtie 1 or Bowtie 2 installation on your system\n+ (depending on which aligner/indexer you intend to use). Unless this path\n+ is specified it is assumed that Bowtie is in the PATH.\n+\n+--bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1).\n+\n+--single_fasta Instruct the Bismark Indexer to write the converted genomes into\n+ single-entry FastA files instead of making one multi-FastA file (MFA)\n+ per chromosome. This might be useful if individual bisulfite converted\n+ chromosomes are needed (e.g. for debugging), however it can cause a\n+ problem with indexing if the number of chromosomes is vast (this is likely\n+ to be in the range of several thousand files; the operating system can\n+ only handle lists up to a certain length, and some newly assembled\n+ genomes may contain 20000-50000 contigs of scaffold files which do exceed\n+ this list length limit).\n+\n+\n+ARGUMENTS:\n+\n+<path_to_genome_folder> The path to the folder containing the genome to be bisulfite converted.\n+ The Bismark Genome Preparation expects one or more fastA files in the folder\n+ (with the file extension: .fa or .fasta). Specifying this path is mandatory.\n+\n+\n+This script was last modified on 16 Oct 2014.\n+HOW_TO\n+}\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 new/bismark_methylation_extractor --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/new/bismark_methylation_extractor Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,5875 @@\n+#!/usr/bin/perl\n+use warnings;\n+use strict;\n+$|++;\n+use Getopt::Long;\n+use Cwd;\n+use Carp;\n+use FindBin qw($Bin);\n+use lib "$Bin/../lib";\n+\n+## This program is Copyright (C) 2010-15, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+my @filenames; # input files\n+my %counting;\n+my $parent_dir = getcwd();\n+\n+my %fhs;\n+\n+my $version = \'v0.14.3\';\n+my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_off,$mbias_only,$gazillion,$ample_mem,$ignore_3prime,$ignore_3prime_r2,$multicore) = process_commandline();\n+\n+\n+### only needed for bedGraph output\n+my @sorting_files; # if files are to be written to bedGraph format, these are the methylation extractor output files\n+my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total\n+my @bedfiles;\n+\n+### only needed for genome-wide cytosine methylation report\n+my %chromosomes;\n+\n+my %mbias_1;\n+my %mbias_2;\n+\n+\n+##############################################################################################\n+### Summarising Run Parameters\n+##############################################################################################\n+\n+### METHYLATION EXTRACTOR\n+\n+warn "Summarising Bismark methylation extractor parameters:\\n";\n+warn \'=\'x63,"\\n";\n+\n+if ($single){\n+ if ($vanilla){\n+ warn "Bismark single-end vanilla format specified\\n";\n+ }\n+ else{\n+ warn "Bismark single-end SAM format specified (default)\\n"; # default\n+ }\n+}\n+elsif ($paired){\n+ if ($vanilla){\n+ warn "Bismark paired-end vanilla format specified\\n";\n+ }\n+ else{\n+ warn "Bismark paired-end SAM format specified (default)\\n"; # default\n+ }\n+}\n+\n+warn "Number of cores to be used: $multicore\\n";\n+\n+if ($single){\n+ if ($ignore){\n+ warn "First $ignore bp will be disregarded when processing the methylation call string\\n";\n+ }\n+ if ($ignore_3prime){\n+ warn "Last $ignore_3prime bp will be disregarded when processing the methylation call string\\n";\n+ }\n+\n+}\n+else{ ## paired-end\n+ if ($ignore){\n+ warn "First $ignore bp will be disregarded when processing the methylation call string of Read 1\\n";\n+ }\n+ if ($ignore_r2){\n+ warn "First $ignore_r2 bp will be disregarded when processing the methylation call string of Read 2\\n";\n+ }\n+\n+ if ($ignore_3prime){\n+ warn "Last $ignore_3prime bp will be disregarded when processing the methylation call string of Read 1\\n";\n+ }\n+ if ($ignore_3prime_r2){\n+ warn "Last $ignore_3prime_r2 bp will be disregarded when processing the methylation call string of Read 2\\n";\n+ }\n+\n+\n+}\n+\n+\n+if ($full){\n+ warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\\n";\n+}\n+if ($merge_non_CpG){\n+ warn "Merge CHG and CHH context to non-CpG context specified\\n";\n+}\n+### output directory\n+if ($output_dir eq \'\'){\n+ warn "Output will be written to the current directory (\'$parent_dir\')\\n";\n+}\n+else{\n+ warn "Output path specified as: $output_dir\\n";\n+}\n+\n+\n+sleep (1);\n+\n+### BEDGRAPH\n+\n+if ($bedGraph){\n+ warn "\\n\\nSummarising bedGraph parameters:\\n";\n+ warn \'=\'x63,"\\n";\n+\n+ if ($counts){\n+ '..b" chromosome 1 (~250M bp) consume around 16GB\n+ of RAM). Due to overheads in creating and looping through these arrays it seems that it will\n+ actually be *slower* for small files (few million alignments), and we are currently testing at\n+ which point it is advisable to use this option. Note that --ample_memory is not compatible\n+ with options '--scaffolds/--gazillion' (as it requires pre-sorted files to begin with).\n+\n+\n+\n+Genome-wide cytosine methylation report specific options:\n+=========================================================\n+\n+--cytosine_report After the conversion to bedGraph has completed, the option '--cytosine_report' produces a\n+ genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based\n+ chromosome coordinates (zero-based start coords are optional) and reports CpG context only (all\n+ cytosine context is optional). The output considers all Cs on both forward and reverse strands and\n+ reports their position, strand, trinucleotide content and methylation state (counts are 0 if not\n+ covered). The cytosine report conversion step is performed by the external module\n+ 'coverage2cytosine'; this script needs to reside in the same folder as the bismark_methylation_extractor\n+ itself.\n+\n+--CX/--CX_context The output file contains information on every single cytosine in the genome irrespective of\n+ its context. This applies to both forward and reverse strands. Please be aware that this will\n+ generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse.\n+ Default: OFF (i.e. Default = CpG context only).\n+\n+--zero_based Uses 0-based genomic coordinates instead of 1-based coordinates. Default: OFF.\n+\n+--genome_folder <path> Enter the genome folder you wish to use to extract sequences from (full path only). Accepted\n+ formats are FastA files ending with '.fa' or '.fasta'. Specifying a genome folder path is mandatory.\n+\n+--split_by_chromosome Writes the output into individual files for each chromosome instead of a single output file. Files\n+ will be named to include the input filename and the chromosome number.\n+\n+\n+\n+OUTPUT:\n+\n+The bismark_methylation_extractor output is in the form:\n+========================================================\n+<seq-ID> <methylation state*> <chromosome> <start position (= end position)> <methylation call>\n+\n+* Methylated cytosines receive a '+' orientation,\n+* Unmethylated cytosines receive a '-' orientation.\n+\n+\n+\n+The bedGraph output (optional) looks like this (tab-delimited; 0-based start coords, 1-based end coords):\n+=========================================================================================================\n+\n+track type=bedGraph (header line)\n+\n+<chromosome> <start position> <end position> <methylation percentage>\n+\n+\n+\n+The coverage output looks like this (tab-delimited, 1-based genomic coords; zero-based half-open coordinates available with '--zero_based'):\n+============================================================================================================================================\n+\n+<chromosome> <start position> <end position> <methylation percentage> <count methylated> <count non-methylated>\n+\n+\n+\n+The genome-wide cytosine methylation output file is tab-delimited in the following format:\n+==========================================================================================\n+<chromosome> <position> <strand> <count methylated> <count non-methylated> <C-context> <trinucleotide context>\n+\n+\n+\n+This script was last modified on 22 April 2015.\n+\n+HOW_TO\n+}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 old/bismark --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/old/bismark Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,7959 @@\n+#!/usr/bin/perl --\n+use strict;\n+use warnings;\n+use IO::Handle;\n+use Cwd;\n+$|++;\n+use Getopt::Long;\n+\n+\n+## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+\n+my $parent_dir = getcwd;\n+my $bismark_version = \'v0.10.0\';\n+my $command_line = join (" ",@ARGV);\n+\n+### before processing the command line we will replace --solexa1.3-quals with --phred64-quals as the \'.\' in the option name will cause Getopt::Long to fail\n+foreach my $arg (@ARGV){\n+ if ($arg eq \'--solexa1.3-quals\'){\n+ $arg = \'--phred64-quals\';\n+ }\n+}\n+my @filenames; # will be populated by processing the command line\n+\n+my ($genome_folder,$CT_index_basename,$GA_index_basename,$path_to_bowtie,$sequence_file_format,$bowtie_options,$directional,$unmapped,$ambiguous,$phred64,$solexa,$output_dir,$bowtie2,$vanilla,$sam_no_hd,$skip,$upto,$temp_dir,$non_bs_mm,$insertion_open,$insertion_extend,$deletion_open,$deletion_extend,$gzip,$bam,$samtools_path,$pbat,$prefix,$old_flag) = process_command_line();\n+\n+my @fhs; # stores alignment process names, bisulfite index location, bowtie filehandles and the number of times sequences produced an alignment\n+my %chromosomes; # stores the chromosome sequences of the mouse genome\n+my %counting; # counting various events\n+\n+my $seqID_contains_tabs;\n+\n+foreach my $filename (@filenames){\n+\n+ chdir $parent_dir or die "Unable to move to initial working directory $!\\n";\n+ ### resetting the counting hash and fhs\n+ reset_counters_and_fhs($filename);\n+ $seqID_contains_tabs = 0;\n+\n+ ### PAIRED-END ALIGNMENTS\n+ if ($filename =~ \',\'){\n+ my ($C_to_T_infile_1,$G_to_A_infile_1); # to be made from mate1 file\n+\n+ $fhs[0]->{name} = \'CTread1GAread2CTgenome\';\n+ $fhs[1]->{name} = \'GAread1CTread2GAgenome\';\n+ $fhs[2]->{name} = \'GAread1CTread2CTgenome\';\n+ $fhs[3]->{name} = \'CTread1GAread2GAgenome\';\n+\n+ warn "\\nPaired-end alignments will be performed\\n",\'=\'x39,"\\n\\n";\n+\n+ my ($filename_1,$filename_2) = (split (/,/,$filename));\n+ warn "The provided filenames for paired-end alignments are $filename_1 and $filename_2\\n";\n+\n+ ### additional variables only for paired-end alignments\n+ my ($C_to_T_infile_2,$G_to_A_infile_2); # to be made from mate2 file\n+\n+ ### FastA format\n+ if ($sequence_file_format eq \'FASTA\'){\n+ warn "Input files are in FastA format\\n";\n+\n+ if ($directional){\n+\t($C_to_T_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number\n+\t($G_to_A_infile_2) = biTransformFastAFiles_paired_end ($filename_2,2);\n+\n+\t$fhs[0]->{inputfile_1} = $C_to_T_infile_1;\n+\t$fhs[0]->{inputfile_2} = $G_to_A_infile_2;\n+\t$fhs[1]->{inputfile_1} = undef;\n+\t$fhs[1]->{inputfile_2} = undef;\n+\t$fhs[2]->{inputfile_1} = undef;\n+\t$fhs[2]->{inputfile_2} = undef;\n+\t$fhs[3]->{inputfile_1} = $C_to_T_infile_1;\n+\t$fhs[3]->{inputfile_2} = $G_to_A_infile_2;\n+ }\n+ else{\n+\t($C_to_T_infile_1,$G_to_A_infile_1) = biTransformFastAFiles_paired_end ($filename_1,1); # also passing the read number\n+\t($C_to_T_infile_2,$G_to_A_infile_2) = biTransformFastAFiles_paired_end ($filename_2,2);\n+\n+\t$fhs[0]->{inputfile_1} = $C_to_T_infile_1;\n+\t$fhs[0]->{inputfile_2} = $G_to_A_infile_2;\n+\t$fhs[1]->{inputfile_1} = $G_to_A_infile_1;\n+\t$fhs[1]->{inputfile_2} = $C_to_T_infile_2;\n+\t$fhs[2]->{inputfile_1} = $G_to_A_i'..b" a function of read length. For instance, specifying\n+ L,0,-0.2 sets the minimum-score function f to f(x) = 0 + -0.2 * x, where x is the read length.\n+ See also: setting function options at http://bowtie-bio.sourceforge.net/bowtie2. The default is\n+ L,0,-0.2.\n+\n+--rdg <int1>,<int2> Sets the read gap open (<int1>) and extend (<int2>) penalties. A read gap of length N gets a penalty\n+ of <int1> + N * <int2>. Default: 5, 3.\n+\n+--rfg <int1>,<int2> Sets the reference gap open (<int1>) and extend (<int2>) penalties. A reference gap of length N gets\n+ a penalty of <int1> + N * <int2>. Default: 5, 3.\n+\n+\n+Bowtie 2 Reporting options:\n+\n+-most_valid_alignments <int> This used to be the Bowtie 2 parameter -M. As of Bowtie 2 version 2.0.0 beta7 the option -M is\n+ deprecated. It will be removed in subsequent versions. What used to be called -M mode is still the\n+ default mode, but adjusting the -M setting is deprecated. Use the -D and -R options to adjust the\n+ effort expended to find valid alignments.\n+\n+ For reference, this used to be the old (now deprecated) description of -M:\n+ Bowtie 2 searches for at most <int>+1 distinct, valid alignments for each read. The search terminates when it\n+ can't find more distinct valid alignments, or when it finds <int>+1 distinct alignments, whichever\n+ happens first. Only the best alignment is reported. Information from the other alignments is used to\n+ estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes \n+ Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that\n+ aligns many places. For reads that have more than <int>+1 distinct, valid alignments, Bowtie 2 does not\n+ guarantee that the alignment reported is the best possible in terms of alignment score. -M is\n+ always used and its default value is set to 10.\n+\n+\n+'VANILLA' Bismark OUTPUT:\n+\n+Single-end output format (tab-separated):\n+\n+ (1) <seq-ID>\n+ (2) <read alignment strand>\n+ (3) <chromosome>\n+ (4) <start position>\n+ (5) <end position>\n+ (6) <observed bisulfite sequence>\n+ (7) <equivalent genomic sequence>\n+ (8) <methylation call>\n+ (9) <read conversion\n+(10) <genome conversion>\n+(11) <read quality score (Phred33)>\n+\n+\n+Paired-end output format (tab-separated):\n+ (1) <seq-ID>\n+ (2) <read 1 alignment strand>\n+ (3) <chromosome>\n+ (4) <start position>\n+ (5) <end position>\n+ (6) <observed bisulfite sequence 1>\n+ (7) <equivalent genomic sequence 1>\n+ (8) <methylation call 1>\n+ (9) <observed bisulfite sequence 2>\n+(10) <equivalent genomic sequence 2>\n+(11) <methylation call 2>\n+(12) <read 1 conversion\n+(13) <genome conversion>\n+(14) <read 1 quality score (Phred33)>\n+(15) <read 2 quality score (Phred33)>\n+\n+\n+Bismark SAM OUTPUT (default):\n+\n+ (1) QNAME (seq-ID)\n+ (2) FLAG (this flag tries to take the strand a bisulfite read originated from into account (this is different from ordinary DNA alignment flags!))\n+ (3) RNAME (chromosome)\n+ (4) POS (start position)\n+ (5) MAPQ (always 255)\n+ (6) CIGAR\n+ (7) RNEXT\n+ (8) PNEXT\n+ (9) TLEN\n+(10) SEQ\n+(11) QUAL (Phred33 scale)\n+(12) NM-tag (edit distance to the reference)\n+(13) XX-tag (base-by-base mismatches to the reference. This does not include indels)\n+(14) XM-tag (methylation call string)\n+(15) XR-tag (read conversion state for the alignment)\n+(16) XG-tag (genome conversion state for the alignment)\n+(17) XA/XB-tag (non-bisulfite mismatches) (optional!)\n+\n+Each read of paired-end alignments is written out in a separate line in the above format.\n+\n+\n+Last edited on 07 October 2013.\n+\n+HOW_TO\n+}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 old/bismark_genome_preparation --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/old/bismark_genome_preparation Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,459 @@\n+#!/usr/bin/perl --\n+use strict;\n+use warnings;\n+use Cwd;\n+use File::Path qw(rmtree);\n+$|++;\n+\n+\n+## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+use Getopt::Long;\n+use Cwd;\n+\n+my $verbose;\n+my $help;\n+my $version;\n+my $man;\n+my $path_to_bowtie;\n+my $multi_fasta;\n+my $single_fasta;\n+my $bowtie2;\n+\n+my $bismark_version = \'v0.10.0\';\n+\n+GetOptions (\'verbose\' => \\$verbose,\n+\t \'help\' => \\$help,\n+\t \'man\' => \\$man,\n+\t \'version\' => \\$version,\n+\t \'path_to_bowtie:s\' => \\$path_to_bowtie,\n+\t \'single_fasta\' => \\$single_fasta,\n+\t \'bowtie2\' => \\$bowtie2,\n+\t );\n+\n+if ($help or $man){\n+ print_helpfile();\n+ exit;\n+}\n+\n+if ($version){\n+ print << "VERSION";\n+\n+ Bismark - Bisulfite Mapper and Methylation Caller.\n+\n+ Bismark Genome Preparation Version: $bismark_version\n+ Copyright 2010-13 Felix Krueger, Babraham Bioinformatics\n+ www.bioinformatics.babraham.ac.uk/projects/\n+\n+VERSION\n+ exit;\n+}\n+\n+my $genome_folder = shift @ARGV; # mandatory\n+\n+# Ensuring a genome folder has been specified\n+if ($genome_folder){\n+ unless ($genome_folder =~ /\\/$/){\n+ $genome_folder =~ s/$/\\//;\n+ }\n+ $verbose and print "Path to genome folder specified as: $genome_folder\\n";\n+ chdir $genome_folder or die "Could\'t move to directory $genome_folder. Make sure the directory exists! $!";\n+\n+ # making the genome folder path abolsolute so it won\'t break if the path was specified relative\n+ $genome_folder = getcwd;\n+ unless ($genome_folder =~ /\\/$/){\n+ $genome_folder =~ s/$/\\//;\n+ }\n+}\n+else{\n+ die "Please specify a genome folder to be used for bisulfite conversion\\n\\n";\n+}\n+\n+\n+my $CT_dir;\n+my $GA_dir;\n+\n+\n+if ($single_fasta){\n+ print "Writing individual genomes out into single-entry fasta files (one per chromosome)\\n\\n";\n+ $multi_fasta = 0;\n+}\n+else{\n+ print "Writing bisulfite genomes out into a single MFA (multi FastA) file\\n\\n";\n+ $single_fasta = 0;\n+ $multi_fasta = 1;\n+}\n+\n+my @filenames = create_bisulfite_genome_folders();\n+\n+process_sequence_files ();\n+\n+launch_bowtie_indexer();\n+\n+sub launch_bowtie_indexer{\n+ if ($bowtie2){\n+ print "Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer\\n";\n+ }\n+ else{\n+ print "Bismark Genome Preparation - Step III: Launching the Bowtie (1) indexer\\n";\n+ }\n+ print "Please be aware that this process can - depending on genome size - take up to several hours!\\n";\n+ sleep(5);\n+\n+ ### if the path to bowtie was specfified explicitely\n+ if ($path_to_bowtie){\n+ if ($bowtie2){\n+ $path_to_bowtie =~ s/$/bowtie2-build/;\n+ }\n+ else{\n+ $path_to_bowtie =~ s/$/bowtie-build/;\n+ }\n+ }\n+ ### otherwise we assume that bowtie-build is in the path\n+ else{\n+ if ($bowtie2){\n+ $path_to_bowtie = \'bowtie2-build\';\n+ }\n+ else{\n+ $path_to_bowtie = \'bowtie-build\';\n+ }\n+ }\n+\n+ $verbose and print "\\n";\n+\n+ ### Forking the program to run 2 instances of Bowtie-build or Bowtie2-build (= the Bowtie (1/2) indexer)\n+ my $pid = fork();\n+\n+ # parent process\n+ if ($pid){\n+ sleep(1);\n+ chdir $CT_dir or die "Unable to change directory: $!\\n";\n+ $verbose and warn "Preparing indexing of CT converted genome in $CT_dir\\n";\n+ my @fasta_files = <*.fa>;\n+ my $file_list = join (\',\',@fasta_files);\n+ $verbo'..b'te_dir $!\\n";\n+ $verbose and print "Created Bisulfite Genome folder $bisulfite_dir\\n";\n+ }\n+ else{\n+ print "\\nA directory called $bisulfite_dir already exists. Bisulfite converted sequences and/or already existing Bowtie (1 or 2) indices will be overwritten!\\n\\n";\n+ sleep(5);\n+ }\n+\n+ chdir $bisulfite_dir or die "Unable to move to $bisulfite_dir\\n";\n+ $CT_dir = "${bisulfite_dir}CT_conversion/";\n+ $GA_dir = "${bisulfite_dir}GA_conversion/";\n+\n+ # creating 2 subdirectories to store a C->T (forward strand conversion) and a G->A (reverse strand conversion)\n+ # converted version of the genome\n+ unless (-d $CT_dir){\n+ mkdir $CT_dir or die "Unable to create directory $CT_dir $!\\n";\n+ $verbose and print "Created Bisulfite Genome folder $CT_dir\\n";\n+ }\n+ unless (-d $GA_dir){\n+ mkdir $GA_dir or die "Unable to create directory $GA_dir $!\\n";\n+ $verbose and print "Created Bisulfite Genome folder $GA_dir\\n";\n+ }\n+\n+ # moving back to the original genome folder\n+ chdir $genome_folder or die "Could\'t move to directory $genome_folder $!";\n+ # $verbose and print "Moved back to genome folder folder $genome_folder\\n";\n+ warn "\\nStep I - Prepare genome folders - completed\\n\\n\\n";\n+ return @filenames;\n+}\n+\n+sub print_helpfile{\n+ print << \'HOW_TO\';\n+\n+\n+DESCRIPTION\n+\n+This script is supposed to convert a specified reference genome into two different bisulfite\n+converted versions and index them for alignments with Bowtie 1 (default), or Bowtie 2. The first\n+bisulfite genome will have all Cs converted to Ts (C->T), and the other one will have all Gs\n+converted to As (G->A). Both bisulfite genomes will be stored in subfolders within the reference\n+genome folder. Once the bisulfite conversion has been completed the program will fork and launch\n+two simultaneous instances of the Bowtie 1 or 2 indexer (bowtie-build or bowtie2-build). Be aware\n+that the indexing process can take up to several hours; this will mainly depend on genome size\n+and system resources.\n+\n+\n+\n+The following is a brief description of command line options and arguments to control the\n+Bismark Genome Preparation:\n+\n+\n+USAGE: bismark_genome_preparation [options] <arguments>\n+\n+\n+OPTIONS:\n+\n+--help/--man Displays this help filea and exits.\n+\n+--version Displays version information and exits.\n+\n+--verbose Print verbose output for more details or debugging.\n+\n+--path_to_bowtie </../> The full path to the Bowtie 1 or Bowtie 2 installation on your system\n+ (depending on which aligner/indexer you intend to use). Unless this path\n+ is specified it is assumed that Bowtie is in the PATH.\n+\n+--bowtie2 This will create bisulfite indexes for Bowtie 2. (Default: Bowtie 1).\n+\n+--single_fasta Instruct the Bismark Indexer to write the converted genomes into\n+ single-entry FastA files instead of making one multi-FastA file (MFA)\n+ per chromosome. This might be useful if individual bisulfite converted\n+ chromosomes are needed (e.g. for debugging), however it can cause a\n+ problem with indexing if the number of chromosomes is vast (this is likely\n+ to be in the range of several thousand files; the operating system can\n+ only handle lists up to a certain length, and some newly assembled\n+ genomes may contain 20000-50000 contigs of scaffold files which do exceed\n+ this list length limit).\n+\n+\n+ARGUMENTS:\n+\n+<path_to_genome_folder> The path to the folder containing the genome to be bisulfite converted.\n+ The Bismark Genome Preparation expects one or more fastA files in the folder\n+ (with the file extension: .fa or .fasta). Specifying this path is mandatory.\n+\n+\n+This script was last modified on 19 Sept 2013.\n+HOW_TO\n+}\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 old/bismark_methylation_extractor --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/old/bismark_methylation_extractor Sat May 06 13:18:09 2017 -0400 |
[ |
b'@@ -0,0 +1,4760 @@\n+#!/usr/bin/perl\n+use warnings;\n+use strict;\n+$|++;\n+use Getopt::Long;\n+use Cwd;\n+use Carp;\n+use FindBin qw($Bin);\n+use lib "$Bin/../lib";\n+\n+\n+## This program is Copyright (C) 2010-13, Felix Krueger (felix.krueger@babraham.ac.uk)\n+\n+## This program is free software: you can redistribute it and/or modify\n+## it under the terms of the GNU General Public License as published by\n+## the Free Software Foundation, either version 3 of the License, or\n+## (at your option) any later version.\n+\n+## This program is distributed in the hope that it will be useful,\n+## but WITHOUT ANY WARRANTY; without even the implied warranty of\n+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+## GNU General Public License for more details.\n+\n+## You should have received a copy of the GNU General Public License\n+## along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+my @filenames; # input files\n+my %counting;\n+my $parent_dir = getcwd();\n+\n+my %fhs;\n+\n+my $version = \'v0.10.1\';\n+my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_only,$gazillion,$ample_mem) = process_commandline();\n+\n+\n+### only needed for bedGraph output\n+my @sorting_files; # if files are to be written to bedGraph format, these are the methylation extractor output files\n+my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total\n+my @bedfiles;\n+\n+### only needed for genome-wide cytosine methylation report\n+my %chromosomes;\n+\n+my %mbias_1;\n+my %mbias_2;\n+\n+##############################################################################################\n+### Summarising Run Parameters\n+##############################################################################################\n+\n+### METHYLATION EXTRACTOR\n+\n+warn "Summarising Bismark methylation extractor parameters:\\n";\n+warn \'=\'x63,"\\n";\n+\n+if ($single){\n+ if ($vanilla){\n+ warn "Bismark single-end vanilla format specified\\n";\n+ }\n+ else{\n+ warn "Bismark single-end SAM format specified (default)\\n"; # default\n+ }\n+}\n+elsif ($paired){\n+ if ($vanilla){\n+ warn "Bismark paired-end vanilla format specified\\n";\n+ }\n+ else{\n+ warn "Bismark paired-end SAM format specified (default)\\n"; # default\n+ }\n+}\n+\n+if ($single){\n+ if ($ignore){\n+ warn "First $ignore bp will be disregarded when processing the methylation call string\\n";\n+ }\n+}\n+else{ ## paired-end\n+ if ($ignore){\n+ warn "First $ignore bp will be disregarded when processing the methylation call string of Read 1\\n";\n+ }\n+ if ($ignore_r2){\n+ warn "First $ignore_r2 bp will be disregarded when processing the methylation call string of Read 2\\n";\n+ }\n+}\n+\n+\n+if ($full){\n+ warn "Strand-specific outputs will be skipped. Separate output files for cytosines in CpG, CHG and CHH context will be generated\\n";\n+}\n+if ($merge_non_CpG){\n+ warn "Merge CHG and CHH context to non-CpG context specified\\n";\n+}\n+### output directory\n+if ($output_dir eq \'\'){\n+ warn "Output will be written to the current directory (\'$parent_dir\')\\n";\n+}\n+else{\n+ warn "Output path specified as: $output_dir\\n";\n+}\n+\n+\n+sleep (1);\n+\n+### BEDGRAPH\n+\n+if ($bedGraph){\n+ warn "\\n\\nSummarising bedGraph parameters:\\n";\n+ warn \'=\'x63,"\\n";\n+\n+ if ($counts){\n+ warn "Generating additional output in bedGraph and coverage format\\nbedGraph format:\\t<Chromosome> <Start Position> <End Position> <Methylation Percentage>\\ncoverage format:\\t<Chromosome> <Start Position> <End Position> <Methylation Percentage> <count methylated> <count non-methylated>\\n\\n";\n+ }\n+ else{\n+ warn "Generating additional sorted output in bedGraph format (output format: <Chromosome> <Start Position> <End Position> <Methylation Percentage>)\\n";\n+ }\n+\n+ warn "Using a cutoff of $coverage_threshold read(s) to repo'..b"e cost of a larger memory footprint\n+ (two arrays of the length of the largest human chromosome 1 (~250M bp) consume around 16GB\n+ of RAM). Due to overheads in creating and looping through these arrays it seems that it will\n+ actually be *slower* for small files (few million alignments), and we are currently testing at\n+ which point it is advisable to use this option. Note that --ample_memory is not compatible\n+ with options '--scaffolds/--gazillion' (as it requires pre-sorted files to begin with).\n+\n+\n+\n+Genome-wide cytosine methylation report specific options:\n+=========================================================\n+\n+--cytosine_report After the conversion to bedGraph has completed, the option '--cytosine_report' produces a\n+ genome-wide methylation report for all cytosines in the genome. By default, the output uses 1-based\n+ chromosome coordinates (zero-based cords are optional) and reports CpG context only (all\n+ cytosine context is optional). The output considers all Cs on both forward and reverse strands and\n+ reports their position, strand, trinucleotide content and methylation state (counts are 0 if not\n+ covered). The cytsoine report conversion step is performed by the external module \n+ 'bedGraph2cytosine'; this script needs to reside in the same folder as the bismark_methylation_extractor\n+ itself.\n+\n+--CX/--CX_context The output file contains information on every single cytosine in the genome irrespective of\n+ its context. This applies to both forward and reverse strands. Please be aware that this will\n+ generate output files with > 1.1 billion lines for a mammalian genome such as human or mouse.\n+ Default: OFF (i.e. Default = CpG context only).\n+\n+--zero_based Uses zero-based coordinates like used in e.g. bed files instead of 1-based coordinates. Default: OFF.\n+\n+--genome_folder <path> Enter the genome folder you wish to use to extract sequences from (full path only). Accepted\n+ formats are FastA files ending with '.fa' or '.fasta'. Specifying a genome folder path is mandatory.\n+\n+--split_by_chromosome Writes the output into individual files for each chromosome instead of a single output file. Files\n+ will be named to include the input filename and the chromosome number.\n+\n+\n+\n+OUTPUT:\n+\n+The bismark_methylation_extractor output is in the form:\n+========================================================\n+<seq-ID> <methylation state*> <chromosome> <start position (= end position)> <methylation call>\n+\n+* Methylated cytosines receive a '+' orientation,\n+* Unmethylated cytosines receive a '-' orientation.\n+\n+\n+\n+The bedGraph output (optional) looks like this (tab-delimited; 0-based start coords, 1-based end coords):\n+=========================================================================================================\n+\n+track type=bedGraph (header line)\n+\n+<chromosome> <start position> <end position> <methylation percentage>\n+\n+\n+\n+The coverage output looks like this (tab-delimited, 1-based genomic coords):\n+============================================================================\n+\n+<chromosome> <start position> <end position> <methylation percentage> <count methylated> <count non-methylated>\n+\n+\n+\n+The genome-wide cytosine methylation output file is tab-delimited in the following format:\n+==========================================================================================\n+<chromosome> <position> <strand> <count methylated> <count non-methylated> <C-context> <trinucleotide context>\n+\n+\n+\n+This script was last modified on 25 November 2013.\n+\n+HOW_TO\n+}\n" |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 readme.rst --- a/readme.rst Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,53 +0,0 @@ -=============== -Bismark Wrapper -=============== - -Bismark_ uses Bowtie or Bowtie2 to map bisulfite converted sequence reads to a reference genome and determine cytosine methylation states. - -Publication: http://www.ncbi.nlm.nih.gov/pubmed/21493656 - -User Guide: http://www.bioinformatics.babraham.ac.uk/projects/bismark/Bismark_User_Guide_v0.7.12.pdf - -.. _bismark: http://www.bioinformatics.babraham.ac.uk/projects/bismark/ - -Preparation -=========== - -Create your reference index with *bismark_genome_preparation* in your normal Galaxy Bowtie2/Botwie index directory. It will create a Bisulfite_Genome folder directly in your Bowtie2/Bowtie index directory. -If you follow that approach you do not need to specify or modify an extra .loc file. -That wrapper will extract the path to the Bisulfite_Genome folder from ./tool-data/bowtie2_indices.loc or ./tool-data/bowtie_indices.loc. - -======= -History -======= - -- v0.7: Initial public release -- v0.7.8: update and add Tool Shed Integration -- v0.7.11.1 change default output to BAM, from now on samtools are required -- v0.7.11.2 added multi-threading to samtools (samtools > 0.1.19 is required) -- v0.7.12 upgrade to bismark 0.7.12 and fix a major slowdown -- v0.7.12.1 define a dependency to samtools 0.1.19 - - -=============================== -Wrapper Licence (MIT/BSD style) -=============================== - -Permission to use, copy, modify, and distribute this software and its -documentation with or without modifications and for any purpose and -without fee is hereby granted, provided that any copyright notices -appear in all copies and that both those copyright notices and this -permission notice appear in supporting documentation, and that the -names of the contributors or copyright holders not be used in -advertising or publicity pertaining to distribution of the software -without specific prior permission. - -THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL -WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE -CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT -OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS -OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE -OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE -OR PERFORMANCE OF THIS SOFTWARE. - |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 test-data/bwa-mem-fastq1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bwa-mem-fastq1.fq Sat May 06 13:18:09 2017 -0400 |
b |
b'@@ -0,0 +1,396 @@\n+@M01368:8:000000000-A3GHV:1:1101:6911:8255/1\n+ATCTGGTTCCTACTTCAGGGCCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATT\n++\n+BCCCCFFFFFFFGGGGGGGGGGGHHHHGHGHHHHHHHHHGGGGGGHHHHGHHHHHHHHHHGHHHHHHGGHGGHHHGHHHHFHHGHHHHHHHHHGHEHEFFGHHEGGCEFGGFHHHBGHHGHHHHGHFHHHGHGHGHGGCDFDDACGGGGGGGAAFFFFFFFFFBAFFFFFB;FFFFFFADDFFFFFFFFFFEFFFFFFFFFFBFFFFFFFFFFFFFFEFFFFFFFFBFEFFFFEFE;DFFFDFBFF/9BFB\n+@M01368:8:000000000-A3GHV:1:1101:14518:9998/1\n+GTTATTATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGAT\n++\n+AAAAAFFFFFFFGGGGGGGGGGHGGHHHHGHHHHHHHGCGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHGHHGFHFE5BGEEHFGGGHHHHHHHHFBHHGGGGFHGHHFGHHHHGHHHHHHGEGGGGFHFHGEGHHGGCDGDGHGGGDGGHGGCGGGHGHHH/ACDG?.1FGCDCCGCA.CC@CDCHFHGFFGGGEBFGAB//CEFBFGG.:;D;;A0AFFFFFB..:@ABFF//;BFFFFFBF/9D:A//\n+@M01368:8:000000000-A3GHV:1:1101:18422:19051/1\n+GTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACG\n++\n+CCCCCFDDDDDFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHFHHHHGGGGHHHHHHHHHGHHHHHHHHHHHHGGHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHGCGGGHHHHHHHHHHHHHHHHHHHHHHGFDHGFHCFGGGGFGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF;FFFFFFFFFFFFFFFFFFFFFFFFFFFFEFBFFFFFFFFFF:FFF.\n+@M01368:8:000000000-A3GHV:1:1101:25545:21098/1\n+ATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATAAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGGCTTATTTAAGGGGAACGGGTGGGG\n++\n+33AA?DFD5BDFGGGFEBDGEGHEGHGEGHCEGGHHCHGHHFFHHGFGAGE53FF2FAFFGDE5FFFE5GFBFGAEE1GHHHGHHHEHE3FGHF@GEGEGGHHGG3FAGFFDE?EEE3GFEGFGFGGCG?GHHHFHGGGC@DHFFHD/A<C@EGFDCGGGHFHHHEGFGHBFHG0:CEHFCHGGED.;0CEF.F99B0CFFEEFGGG0FBFBBF0F/FFBDE?/9//9B.FFBFFFFFFBF..A..;@B--\n+@M01368:8:000000000-A3GHV:1:1101:5446:12248/1\n+AATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTAT\n++\n+CCCCDFFFFCCFGGGGGGGGFGHHHHHGGGGHHHHHHHHHHHHHHHHGBGHGGHGGHHHHHHHHHHGHGHGGGGGHHHHHHHHGHHHHHHHHHGGGGGHHHHFFGHHHGGGGGGHHHGFGGHHGGGGHHHHHHGGGGGGHGHHGGGGGGGHGGGGGGHHHHHHHHHHHHHFHGGGHHHHGGGGGG:FE;EGEGGGGG/;?FGGGGGGGFFFFGGFFFFFFFFFBFFFFFFFFFFBFFFFFFEFFFFFEFFF\n+@M01368:8:000000000-A3GHV:1:1101:5861:6452/1\n+ATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTT\n++\n+ABCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHGHHHHGHHHHHHHHHHHGGGGHHHHHHHHFHHHHHHGGHGHGGHGGHHHHHHHGGHFHHHGGGGGHHHHHHHHHHHHHHHHHHGGGGGHHHHHEGGHHGGGGGGHHHGGGGHGGGGGHHHHHHGGGDCGHHHHGGGGGGGHEFGGGGHGHHHGHGGGFGGGGGGGEGGGGGGG?E0CEFGGGGGFEE9EEFFFFFBFFFFFFFBFFBD.AFFFFFFF0\n+@M01368:8:000000000-A3GHV:1:1102:10403:6021/1\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTG\n++\n+>A@AAAAD2ADDFFGGGGGFGGHA?EAEFBEAGHFABFGG5FDF5DB1EEGAFDFB53FF5FH@G5FFEHGHEFHFFHBE333GF43GCGGGGE@0?BFGGB0B?FHGFDGGHHHBFFDEGGHGFFFDFE@<1>@FFFGHHHHFHEFGDABFFGG/@DCE<CG1<GF0/DD000=<DHGBDFDCECE/:AC?-;-;9B/ABBB-AD9BFB99AB?BDFBAD-.9..@;=;;..9..9/9;BEF;A:9/BFF\n+@M01368:8:000000000-A3GHV:1:1102:10677:23253/1\n+CCTTAAATAAGACATCACGATGGATCACAGGTCTATCACC'..b'111F1<F1FBGHGHCF-CHHFA./</0CGHF<0CC/;C-:-;;09;FFBFBBFFBC0FFFGGFC0009C00090/-:--9--;-;AFFE;/99-9--/;///\n+@M01368:8:000000000-A3GHV:1:1113:25528:14016/1\n+CCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGG\n++\n+>AAAAFFFBFFFGGGGGGGGGGHHFHGHHHHGHHHHHHHHHHHGGGGHHHHFHHHHFHGHHHHGAFFEFHEGHHHHHHHHGHEHHGGFGGGHHHHHHFHHHHHGGHHHHGGGGGHFHHFF?HHGGFECEFFGHFFGFHGECDGHGBGFHGDF@@?CGFHCEGGGFD.CCC?EGHBHHHFHHFBCFFGEB/CEGGGGDAA.90C9CEBFGGBBF/9.9FBFFFBBFF//99FFFFEABF//99FFEFFFBFF\n+@M01368:8:000000000-A3GHV:1:1113:5741:16959/1\n+TAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGA\n++\n+ABBBBFFFFFFFGGGFGGGGGGHHHGHHGGHBGHGAGFFCAFGHGFFGHHGFHHHHHGGGGGHGHHHHHHHHE3BFFHHHGG0EDF@GHFFGGGHGGGGGGGGGGGGGHHGGEEFHGFHHDDG@DGGGHHGDGGGGGHGG?CF?HHGHHHGHGHHHFFHGGGHHHHGGCD.;0<C;CGGGGEFF/.;0;FFFBF/0;0CFGFFB..9B/;0CBFFBBFFFFBAC?DED9;B9AD;.FFFB/B/;FBA/B//\n+@M01368:8:000000000-A3GHV:1:1114:10130:11959/1\n+CGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTCTGATCTGTCTCTTATACACATCTCCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTGCAACAAACACACATCCAGA\n++\n+>A33>FFFFFF@FFFGGGGFGGD4EGGGGGHHGFFGHFGGHHHFEFHECFHHHEHGAGBCCGCCEGGHGHEGFBFHFHHHHGGGHFHGHEGGGFEGEGG??@DBGHGGC-@FFGHFHFHB-??DA-DD@9:BEBG@B?E?A.ABBFBFA??F.FF/BFFB/B9B/9;BF9FF9FFFFFFFFFFFFFF?BB?;9EE;-;DDB9//;B-B@;ABFFEFFFF/-.9A.;//9/BF./B/..9.9:...//////\n+@M01368:8:000000000-A3GHV:1:1114:14540:5315/1\n+CTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGGGGGCTATTTAGGTTTTT\n++\n+AABCCFFFFFFFGGGGGGGGGGHHHHHHHFHHHHGHHGHHGGGHGGHHHHHHHGHHHHHHGGGGGHHFHHHFGHHGGFHHHHHGGGGGHHHGHGGHHHGGGGGGHGHGGGGHHGGGGHHHHHEGDDFGFFFHHGGGGGCDAFCFGFDHHHHGGHGHHHHHHBCGEHHHHGGHG.ACGEHGG0CBFFF:A;BB0;09CGF00CFFFE0AA?//CFFFFFFFFFFFFFFFBEF;A.-=A--:BBFB90;;FE.\n+@M01368:8:000000000-A3GHV:1:1114:15066:16302/1\n+TAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAACGGTTGTTAATTAATTATTGCTTGTAGGACA\n++\n+BBBBAFFBDFBFBGGGGGFGGGBGFFFHGFHHGFFFHGHHHGHHHHFFHHHGHGC?AEFFHEFBFFFGHHHHH1FDGFFHGHGHFEGCGC-<<AHHHGGGGGGGFHH0GHFCCCADGGG?.9/A-???DGGFFF.9F9/EE-;;BBBFFBFFFFFFFFFEFFFFBFFBBFFFFF/BFFBFFFFF-DBFFF;/BFF//BB//9/BEA---9:BFFFFFF/F/.;.:@9.BBFF/;BFF/;/////9/////.\n+@M01368:8:000000000-A3GHV:1:1114:16639:15258/1\n+CCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGCGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTCCAATATTACAGGCGAACATACTTACTAAATTGTGT\n++\n+11>>ABFFFFFFGGCGC1BBF1GHHHBHHHHGFHGH1A100AA/GGEHGFBAA1FFD2FHHHHECG2F1BB/E0FC/A>EE/FGGFGEF1EGBBEHHCGGGHBGEHFHE0B?E--;C?CCGGG@A@GBFBBBB09?@??A-AB--ABE@=-=-9AE9A;FFFFFE=9-//;//;9FF/////;;///9B///;/B////9FFBB;--A@-;/9;9-:-/;;FFFE9BF?@;-9-99/B9F/://///99/;\n+@M01368:8:000000000-A3GHV:1:1114:2404:13066/1\n+TCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCTGTCTATTATACACATC\n++\n+CCCCCFFFFCFFGGGGGGGGGGHHHHHGHHHHHHHHHFFHHHHHGGGGHHHHHHHHFHHHHHHFGGHHGGHGGHHHHHHGHHFHHHHGGGGGGHHHHHHGHHHHHHHHHHGGGGGGGHH?FGHHHGGGGGGHHGGFGGHHGGGGHHHHHFGGGGFGHGHHGGGGGGGHGGGEGGHHGHHHHHHHHHGFBFFDA0FGGGFFGG0:EFGGGGGGGG;AEBF0B0BFFBFFFFFFFFFFFFFFFFFFFFFEFF0\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 test-data/bwa-mem-fastq2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bwa-mem-fastq2.fq Sat May 06 13:18:09 2017 -0400 |
b |
b'@@ -0,0 +1,400 @@\n+@M01368:8:000000000-A3GHV:1:1101:6911:8255/2\n+TCGCCTTACCGCTACTCACCCACGGCGGCCATCAGCCGATACTAAGTTTGGGGTATGGTGGGGGGGATAATGAATTAGGTTGTGGGGGAGGGTTTGTGGTTGAGAGAAACACAAAAAACAATCTTATATATGGGTAGTCGTTTTGTATTGGTTTTTTGTTTTGTTTGTGTTTTGAGTGTCGGTTTAGTTCGGTGTACTAGGGGGGGTGGATGGGGTCGGCTGGTGAGGGGGTCTTAGTGTATTGAGTGTGG\n++\n+1>11111@11111A111A100000000////011110//>>/12@1@22B/////1@>21/>>/-----9/;////9////--;-;-;-----;--------9/-/-///9-;-------9//////9/////-//-/9-;-;9--/////99-;--9-:-;----/---/-----////---9-/////--;A-//////---------9/-----;-----/-/-----;--;//////////9;///-\n+@M01368:8:000000000-A3GHV:1:1101:14518:9998/2\n+CATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTAATTAATTAATGCTTGTAGGACATAATAATAA\n++\n+CCCCCFCCCCCFGGGGGGGGGGHHHHHHHHHHHHHHHHGFHHHHGGGGGHGFHHHHHHHHHHHHHHHHHHHGHGGEHGGGGCGGGHHGGCGGGGGHHGHHHGGGGGGGG.BFFFGAGADFGAFDGFGGCFFF;DDFFFFFFFFFFFFFFFFFFEFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFF09FFFE00;BE@;DABBFFFFFBBFB00;F:9;FFBFFF9BFFFFFFFFFFFFF90/::BFFFBF0\n+@M01368:8:000000000-A3GHV:1:1101:18422:19051/2\n+CTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGG\n++\n+BCCCCFFCFFFFGGGGGGGGGGHHHGHHHHHHHHHHHHHHHHGGGGHHHHHHHHEHHHHHHHGGHHGGHGGHHHHHHHGHGGHHHGGGGGHGHHHHGGGHFHFHHHHHGGGGGHBFFCGDHHHGGGGGGHGGGGGGHHGCGGGFGHHBGGGGGFFFHEGGGGGCDCCE@EFGHHHHFHEGHGFFHHGB;ECBFGGGEFEFFGF0AFGFGFFG.;;DFFFFFFFFFF090BFFFE?FEFBBFBFFFB990BF\n+@M01368:8:000000000-A3GHV:1:1101:25545:21098/2\n+GCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTTCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCAACCTATGTCGCAGTATCTGTCTTTGAT\n++\n+3AAA34BFFFFFGCGGCGGGCGHHHGHHHHCGHFGHFHGFGGCDGGBD5FGFFFHFHGGGHAEBFGHHHEHGGFHBGGFHHHGGGEEFHHHFHFFHDHGHGHHHHFFHHGGFAEGFFFFFBGHHHFFHHHHHHHFHFHHFHGFHGHGA/<@EFHEHH1GGHFFFHG1>=FGHFHGF.GE//-<BB?9.9BFFFB==AA/FFFFFA-@BA.;;D?F9FAB;---./99BFFFBBAE-.9B/BFB9F/9BFBB\n+@M01368:8:000000000-A3GHV:1:1101:5446:12248/2\n+ATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTT\n++\n+CCDDDCCCCFFFGGGGGGGGGGHHHHHGHHHHHHHHGHHHHHHGHHHHGGGGHHHHHHHHHHHHHHHHHHHHHHGGGGGHHHHHHHGFGHHHHHHHHHHHHHHHHHGHHHHHGGGGGHHHHHHHHHHHHHHHGHHHHHHGHGHGHHGGGGGGGGGGGFFFFFFFFFFFFFFAFFFFFFFFFFFFFBFC?.EAFFFFFFFFAFFFFFFFFFFFFFFFEFFFF0FFFFFFF0BFFFFFFFFF?.BDCFFEDA/\n+@M01368:8:000000000-A3GHV:1:1101:5861:6452/2\n+TTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACG\n++\n+BCCCCFBBCCCFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHHHHHHGGGGHHHHHHHHGHHHHHHHHHHHHHGGHGGHHHHHHHGHHHHHHHHHHHHHHHHHHHGGHHHHGGGGGGEHGHHHHHHHHHHHGHHHHHHGHGHGHGGGGCGGGGGFFFFFFFFFFFFFFFFFFFFFFFF?FFEAF:DAEF9DEFFFFFFF-A.BFFFFFEFFFFEFFFFFFFF0FFEB0FFFFBFFFFFFADEFCF0/.\n+@M01368:8:000000000-A3GHV:1:1102:10403:6021/2\n+GGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGGGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGGGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGAGGTTAATAGGGGGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGACGCTGAAGTAGGAACCAGATGTCGGATACAGTTGACTTTAGGTAACCCA\n++\n+AA?A34BF>AADEBGGGGFFCGGHHA5DFGAE?AAEDFHG2AEE1FBE00A1BGE0?E1FFGEEEGB4F43?EE/>///F??//@BCBAC<<BAGHB?11F/FGFGGBFBGBD.11><.<<<C0<A</<<=0GFHC@-.;FF09BFFGB00B0;FE.C/:CCF0;0C0;BBBFF@.9AF.:9EFF.;.BFF/FF/;/./BFB/..;;ABF/9FF//.;AAF9BBBA;-A.B/9///9/BF////;///:9.\n+@M01368:8:000000000-A3GHV:1:1102:10677:23253/2\n+AATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTG'..b'HGGGGGGGGGFEACFFFFFFFFFFFFFFFDFFFAFFFFBFAB@EFAAEFFFFF.ACF.BBFFEBFFFEB;FFFFFFFA/BFBFBBBFFFBFFFFFED.>DFFF.\n+@M01368:8:000000000-A3GHV:1:1113:5741:16959/2\n+GTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACG\n++\n+BBBBCFCCCCCFGGGGGGGGGGHHHHHHHGHHHHHHHHGHHHHGHDGHHHGGGGHHHHHHHHHHHHHHHGHHHHHGGGEGGHHHHFHHHHHHHHHHHHHHHHHHHHHHGHHGHHGGGGGHHHHHHHHHHHGHHHFHHHHHHGHGHGHGGGGCGGFGGFFFFFFFFFFFFFBFFFFFFFFFFFFFFAFFFFEAEFFFFFFFFFFF9BFFBFFFFFFFFFFFFFFBFFFFFFFFFFFFFFFFFFADAB-/BF.\n+@M01368:8:000000000-A3GHV:1:1114:10130:11959/2\n+ATCAGAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGCTGTCTCTTATACACATCTGACGCTGCCGACGAGCGATCTAGTGGAGAGTTCTGTGGGGCCGGGGATCCTTATAAAAAAAATAGA\n++\n+BCCDDFFFFFFFGGGGGGGGGGGHHHHHGGHHHGGGCGGHGHGGGGHHGGGGHHHHHHGGGEGGHHHFGGGGG?E1FE?/EEHHHHHGHHGHHHHGHFHGHGHHGDGGFG2FF2?GHHHHHGCCCFHGHGHHHHGHHFEHHFGHHGHH<1=DGHHHGHHGHGAGAEEDG.CGCGHC0CGBFHGFBBF0ABDDEFF@?--:BB@.;:BF;0.0:0.0:000:BF.-.------.9/;000::0;0:--.000\n+@M01368:8:000000000-A3GHV:1:1114:14540:5315/2\n+CACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCC\n++\n+CCCCCFFCCDCCGGGGGGGGGGHHHHHHHHHGGGGGHHHHHHHHHHHHHHHHHHHGHHHHHHHGHHHHFHFHHHGGGGGHHHHHHHHHHHHHGHHHHHHHHGGGGGGHHHFHHHGHHHHHHHHHHHHHHHHHHGHGHHHGGGFGHHHHHHHHFHHHHHF?1FHHGHGHGHGHHGGFFFFDBFBE;BCC.:BFFFFFFFFFFFFFF;AFFFFF-=-.AEDEFFFFF..9A;9FFFF0FFFFE00FFF0:BA.\n+@M01368:8:000000000-A3GHV:1:1114:15066:16302/2\n+TTATTATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGTGG\n++\n+CCCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHDGHHHHFHHGHHGHHHHHGGGGEHHHHFHHFF5FHHFEGHHHGDHGGHGHGFGGGEHFGHHGGGGGHGHHHGHHFHHB3FGHHFGGGG?GFFHCCEBGFFECCDFEGFCFGCHHGFDDHHHGHHCFGGGGGFBFDGFG?-:..AFG.-C0C009;:00;00:9/:CEFFF?AE::9;9?0:FEF0;0..--./00::B/090000000;A....\n+@M01368:8:000000000-A3GHV:1:1114:16639:15258/2\n+TTATTATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCGCACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGATG\n++\n+CDCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHHHGHHHHHHGGGGGHGIHHHIH5DEGHHHF?FGHGGHGGHEGGHFHHGHGEHHGGGGGFFFGHFBG2GHEBGHHGHGGEG/GFGABEDFGHEED?GGHHFFGGGCFEGD/GFHFFGEFGCGG?CC??D-EF@EEEFGCDDBBFGGGEBBFFF09090A.BFGA.9CCA0;EBAB00BBFF.@-./;BB;BFFF0:00099AAFFF0\n+@M01368:8:000000000-A3GHV:1:1114:2404:13066/2\n+ATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGACCATACTTACTAAAGTGTGTTAATTAATTAATGCTTGTAGGACTGTCTCTTATACACATT\n++\n+CCCCCFFFFFFCGGGGGGGGGGHHHHHHHHHFFHHHHGGGGGHFFFHHFHHHHHHHHHHHHHHHGFEGGGHGEDFCDFHGHFG@@DGGHHHHHHGGGGCGGGGGEHGGCGBB?CF99EGFGGFGG?D9CFFFF/BBFFFFFEF9BFFAFFFFEFFFFFFFFFFFFFFFFFFFFF.FFBBFFFFFFFFFFFF-9;;;BFFFFFB9BFBFBFABFFEFFFFFFFFFF::BFFBFFFF.9//;FFFFF/BFFB/\n+@M01368:8:000000000-A3GHV:1:1114:9184:6959/2\n+AAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCCTGTCTCTTA\n++\n+CCCCBFFFFFFFGGGGGGGGGGHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHEHIHHGGGGHHHHHHHHHHHHHGHHHHHHHHGGGGGHHFHHHHHBGHHHHHHHHHHHHHHHHHGHHHHHGGGGGHHHHGHHHHHHHHHHHHHHHHHGHGHGHHGGGGCFFFFFFFFFFFFFFFFFFFFFFFFFF.CFFFFAF=D=EAEFFF0B:0AF-DAFBFFFFFFFFFBFFFFFFFFFFBFFFEFF9B900B0\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 test-data/bwa-mem-mt-genome.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bwa-mem-mt-genome.fa Sat May 06 13:18:09 2017 -0400 |
b |
b'@@ -0,0 +1,239 @@\n+>gi|251831106|ref|NC_012920.1| Homo sapiens mitochondrion, complete genome\n+GATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGG\n+GTATGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTC\n+CTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAAAGTGTGTTA\n+ATTAATTAATGCTTGTAGGACATAATAATAACAATTGAATGTCTGCACAGCCACTTTCCACACAGACATC\n+ATAACAAAAAATTTCCACCAAACCCCCCCTCCCCCGCTTCTGGCCACAGCACTTAAACACATCTCTGCCA\n+AACCCCAAAAACAAAGAACCCTAACACCAGCCTAACCAGATTTCAAATTTTATCTTTTGGCGGTATGCAC\n+TTTTAACAGTCACCCCCCAACTAACACATTATTTTCCCCTCCCACTCCCATACTACTAATCTCATCAATA\n+CAACCCCCGCCCATCCTACCCAGCACACACACACCGCTGCTAACCCCATACCCCGAACCAACCAAACCCC\n+AAAGACACCCCCCACAGTTTATGTAGCTTACCTCCTCAAAGCAATACACTGAAAATGTTTAGACGGGCTC\n+ACATCACCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTAAGATTACACATGCAA\n+GCATCCCCGTTCCAGTGAGTTCACCCTCTAAATCACCACGATCAAAAGGAACAAGCATCAAGCACGCAGC\n+AATGCAGCTCAAAACGCTTAGCCTAGCCACACCCCCACGGGAAACAGCAGTGATTAACCTTTAGCAATAA\n+ACGAAAGTTTAACTAAGCTATACTAACCCCAGGGTTGGTCAATTTCGTGCCAGCCACCGCGGTCACACGA\n+TTAACCCAAGTCAATAGAAGCCGGCGTAAAGAGTGTTTTAGATCACCCCCTCCCCAATAAAGCTAAAACT\n+CACCTGAGTTGTAAAAAACTCCAGTTGACACAAAATAGACTACGAAAGTGGCTTTAACATATCTGAACAC\n+ACAATAGCTAAGACCCAAACTGGGATTAGATACCCCACTATGCTTAGCCCTAAACCTCAACAGTTAAATC\n+AACAAAACTGCTCGCCAGAACACTACGAGCCACAGCTTAAAACTCAAAGGACCTGGCGGTGCTTCATATC\n+CCTCTAGAGGAGCCTGTTCTGTAATCGATAAACCCCGATCAACCTCACCACCTCTTGCTCAGCCTATATA\n+CCGCCATCTTCAGCAAACCCTGATGAAGGCTACAAAGTAAGCGCAAGTACCCACGTAAAGACGTTAGGTC\n+AAGGTGTAGCCCATGAGGTGGCAAGAAATGGGCTACATTTTCTACCCCAGAAAACTACGATAGCCCTTAT\n+GAAACTTAAGGGTCGAAGGTGGATTTAGCAGTAAACTAAGAGTAGAGTGCTTAGTTGAACAGGGCCCTGA\n+AGCGCGTACACACCGCCCGTCACCCTCCTCAAGTATACTTCAAAGGACATTTAACTAAAACCCCTACGCA\n+TTTATATAGAGGAGACAAGTCGTAACATGGTAAGTGTACTGGAAAGTGCACTTGGACGAACCAGAGTGTA\n+GCTTAACACAAAGCACCCAACTTACACTTAGGAGATTTCAACTTAACTTGACCGCTCTGAGCTAAACCTA\n+GCCCCAAACCCACTCCACCTTACTACCAGACAACCTTAGCCAAACCATTTACCCAAATAAAGTATAGGCG\n+ATAGAAATTGAAACCTGGCGCAATAGATATAGTACCGCAAGGGAAAGATGAAAAATTATAACCAAGCATA\n+ATATAGCAAGGACTAACCCCTATACCTTCTGCATAATGAATTAACTAGAAATAACTTTGCAAGGAGAGCC\n+AAAGCTAAGACCCCCGAAACCAGACGAGCTACCTAAGAACAGCTAAAAGAGCACACCCGTCTATGTAGCA\n+AAATAGTGGGAAGATTTATAGGTAGAGGCGACAAACCTACCGAGCCTGGTGATAGCTGGTTGTCCAAGAT\n+AGAATCTTAGTTCAACTTTAAATTTGCCCACAGAACCCTCTAAATCCCCTTGTAAATTTAACTGTTAGTC\n+CAAAGAGGAACAGCTCTTTGGACACTAGGAAAAAACCTTGTAGAGAGAGTAAAAAATTTAACACCCATAG\n+TAGGCCTAAAAGCAGCCACCAATTAAGAAAGCGTTCAAGCTCAACACCCACTACCTAAAAAATCCCAAAC\n+ATATAACTGAACTCCTCACACCCAATTGGACCAATCTATCACCCTATAGAAGAACTAATGTTAGTATAAG\n+TAACATGAAAACATTCTCCTCCGCATAAGCCTGCGTCAGATTAAAACACTGAACTGACAATTAACAGCCC\n+AATATCTACAATCAACCAACAAGTCATTATTACCCTCACTGTCAACCCAACACAGGCATGCTCATAAGGA\n+AAGGTTAAAAAAAGTAAAAGGAACTCGGCAAATCTTACCCCGCCTGTTTACCAAAAACATCACCTCTAGC\n+ATCACCAGTATTAGAGGCACCGCCTGCCCAGTGACACATGTTTAACGGCCGCGGTACCCTAACCGTGCAA\n+AGGTAGCATAATCACTTGTTCCTTAAATAGGGACCTGTATGAATGGCTCCACGAGGGTTCAGCTGTCTCT\n+TACTTTTAACCAGTGAAATTGACCTGCCCGTGAAGAGGCGGGCATAACACAGCAAGACGAGAAGACCCTA\n+TGGAGCTTTAATTTATTAATGCAAACAGTACCTAACAAACCCACAGGTCCTAAACTACCAAACCTGCATT\n+AAAAATTTCGGTTGGGGCGACCTCGGAGCAGAACCCAACCTCCGAGCAGTACATGCTAAGACTTCACCAG\n+TCAAAGCGAACTACTATACTCAATTGATCCAATAACTTGACCAACGGAACAAGTTACCCTAGGGATAACA\n+GCGCAATCCTATTCTAGAGTCCATATCAACAATAGGGTTTACGACCTCGATGTTGGATCAGGACATCCCG\n+ATGGTGCAGCCGCTATTAAAGGTTCGTTTGTTCAACGATTAAAGTCCTACGTGATCTGAGTTCAGACCGG\n+AGTAATCCAGGTCGGTTTCTATCTACNTTCAAATTCCTCCCTGTACGAAAGGACAAGAGAAATAAGGCCT\n+ACTTCACAAAGCGCCTTCCCCCGTAAATGATATCATCTCAACTTAGTATTATACCCACACCCACCCAAGA\n+ACAGGGTTTGTTAAGATGGCAGAGCCCGGTAATCGCATAAAACTTAAAACTTTACAGTCAGAGGTTCAAT\n+TCCTCTTCTTAACAACATACCCATGGCCAACCTCCTACTCCTCATTGTACCCATTCTAATCGCAATGGCA\n+TTCCTAATGCTTACCGAACGAAAAATTCTAGGCTATATACAACTACGCAAAGGCCCCAACGTTGTAGGCC\n+CCTACGGGCTACTACAACCCTTCGCTGACGCCATAAAACTCTTCACCAAAGAGCCCCTAAAACCCGCCAC\n+ATCTACCATCACCCTCTACATCACCGCCCCGACCTTAGCTCTCACCATCGCTCTTCTACTATGAACCCCC\n+CTCCCCATACCCAACCCCCTGGTCAACCTCAACCTAGGCCTCCTATTTATTCTAGCCACCTCTAGCCTAG\n+CCGTTTACTCAATCCTCTGATCAGGGTGAGCATCAAACTCAAACTACGCCCTGATCGGCGCACTGCGAGC\n+AGTAGCCCAAACAATCTCATATGAAGTCACCCTAGCCATCATTCTACTATCAACATTACTAATAAGTGGC\n+TCCTTTAACCTCTCCA'..b'GTTCTTCAAATATCTACTCATCTTCCTAATTACCATACTAATCTTAGTTACCGCTAA\n+CAACCTATTCCAACTGTTCATCGGCTGAGAGGGCGTAGGAATTATATCCTTCTTGCTCATCAGTTGATGA\n+TACGCCCGAGCAGATGCCAACACAGCAGCCATTCAAGCAATCCTATACAACCGTATCGGCGATATCGGTT\n+TCATCCTCGCCTTAGCATGATTTATCCTACACTCCAACTCATGAGACCCACAACAAATAGCCCTTCTAAA\n+CGCTAATCCAAGCCTCACCCCACTACTAGGCCTCCTCCTAGCAGCAGCAGGCAAATCAGCCCAATTAGGT\n+CTCCACCCCTGACTCCCCTCAGCCATAGAAGGCCCCACCCCAGTCTCAGCCCTACTCCACTCAAGCACTA\n+TAGTTGTAGCAGGAATCTTCTTACTCATCCGCTTCCACCCCCTAGCAGAAAATAGCCCACTAATCCAAAC\n+TCTAACACTATGCTTAGGCGCTATCACCACTCTGTTCGCAGCAGTCTGCGCCCTTACACAAAATGACATC\n+AAAAAAATCGTAGCCTTCTCCACTTCAAGTCAACTAGGACTCATAATAGTTACAATCGGCATCAACCAAC\n+CACACCTAGCATTCCTGCACATCTGTACCCACGCCTTCTTCAAAGCCATACTATTTATGTGCTCCGGGTC\n+CATCATCCACAACCTTAACAATGAACAAGATATTCGAAAAATAGGAGGACTACTCAAAACCATACCTCTC\n+ACTTCAACCTCCCTCACCATTGGCAGCCTAGCATTAGCAGGAATACCTTTCCTCACAGGTTTCTACTCCA\n+AAGACCACATCATCGAAACCGCAAACATATCATACACAAACGCCTGAGCCCTATCTATTACTCTCATCGC\n+TACCTCCCTGACAAGCGCCTATAGCACTCGAATAATTCTTCTCACCCTAACAGGTCAACCTCGCTTCCCC\n+ACCCTTACTAACATTAACGAAAATAACCCCACCCTACTAAACCCCATTAAACGCCTGGCAGCCGGAAGCC\n+TATTCGCAGGATTTCTCATTACTAACAACATTTCCCCCGCATCCCCCTTCCAAACAACAATCCCCCTCTA\n+CCTAAAACTCACAGCCCTCGCTGTCACTTTCCTAGGACTTCTAACAGCCCTAGACCTCAACTACCTAACC\n+AACAAACTTAAAATAAAATCCCCACTATGCACATTTTATTTCTCCAACATACTCGGATTCTACCCTAGCA\n+TCACACACCGCACAATCCCCTATCTAGGCCTTCTTACGAGCCAAAACCTGCCCCTACTCCTCCTAGACCT\n+AACCTGACTAGAAAAGCTATTACCTAAAACAATTTCACAGCACCAAATCTCCACCTCCATCATCACCTCA\n+ACCCAAAAAGGCATAATTAAACTTTACTTCCTCTCTTTCTTCTTCCCACTCATCCTAACCCTACTCCTAA\n+TCACATAACCTATTCCCCCGAGCAATCTCAATTACAATATATACACCAACAAACAATGTTCAACCAGTAA\n+CTACTACTAATCAACGCCCATAATCATACAAAGCCCCCGCACCAATAGGATCCTCCCGAATCAACCCTGA\n+CCCCTCTCCTTCATAAATTATTCAGCTTCCTACACTATTAAAGTTTACCACAACCACCACCCCATCATAC\n+TCTTTCACCCACAGCACCAATCCTACCTCCATCGCTAACCCCACTAAAACACTCACCAAGACCTCAACCC\n+CTGACCCCCATGCCTCAGGATACTCCTCAATAGCCATCGCTGTAGTATATCCAAAGACAACCATCATTCC\n+CCCTAAATAAATTAAAAAAACTATTAAACCCATATAACCTCCCCCAAAATTCAGAATAATAACACACCCG\n+ACCACACCGCTAACAATCAATACTAAACCCCCATAAATAGGAGAAGGCTTAGAAGAAAACCCCACAAACC\n+CCATTACTAAACCCACACTCAACAGAAACAAAGCATACATCATTATTCTCGCACGGACTACAACCACGAC\n+CAATGATATGAAAAACCATCGTTGTATTTCAACTACAAGAACACCAATGACCCCAATACGCAAAACTAAC\n+CCCCTAATAAAATTAATTAACCACTCATTCATCGACCTCCCCACCCCATCCAACATCTCCGCATGATGAA\n+ACTTCGGCTCACTCCTTGGCGCCTGCCTGATCCTCCAAATCACCACAGGACTATTCCTAGCCATGCACTA\n+CTCACCAGACGCCTCAACCGCCTTTTCATCAATCGCCCACATCACTCGAGACGTAAATTATGGCTGAATC\n+ATCCGCTACCTTCACGCCAATGGCGCCTCAATATTCTTTATCTGCCTCTTCCTACACATCGGGCGAGGCC\n+TATATTACGGATCATTTCTCTACTCAGAAACCTGAAACATCGGCATTATCCTCCTGCTTGCAACTATAGC\n+AACAGCCTTCATAGGCTATGTCCTCCCGTGAGGCCAAATATCATTCTGAGGGGCCACAGTAATTACAAAC\n+TTACTATCCGCCATCCCATACATTGGGACAGACCTAGTTCAATGAATCTGAGGAGGCTACTCAGTAGACA\n+GTCCCACCCTCACACGATTCTTTACCTTTCACTTCATCTTGCCCTTCATTATTGCAGCCCTAGCAACACT\n+CCACCTCCTATTCTTGCACGAAACGGGATCAAACAACCCCCTAGGAATCACCTCCCATTCCGATAAAATC\n+ACCTTCCACCCTTACTACACAATCAAAGACGCCCTCGGCTTACTTCTCTTCCTTCTCTCCTTAATGACAT\n+TAACACTATTCTCACCAGACCTCCTAGGCGACCCAGACAATTATACCCTAGCCAACCCCTTAAACACCCC\n+TCCCCACATCAAGCCCGAATGATATTTCCTATTCGCCTACACAATTCTCCGATCCGTCCCTAACAAACTA\n+GGAGGCGTCCTTGCCCTATTACTATCCATCCTCATCCTAGCAATAATCCCCATCCTCCATATATCCAAAC\n+AACAAAGCATAATATTTCGCCCACTAAGCCAATCACTTTATTGACTCCTAGCCGCAGACCTCCTCATTCT\n+AACCTGAATCGGAGGACAACCAGTAAGCTACCCTTTTACCATCATTGGACAAGTAGCATCCGTACTATAC\n+TTCACAACAATCCTAATCCTAATACCAACTATCTCCCTAATTGAAAACAAAATACTCAAATGGGCCTGTC\n+CTTGTAGTATAAACTAATACACCAGTCTTGTAAACCGGAGATGAAAACCTTTTTCCAAGGACAAATCAGA\n+GAAAAAGTCTTTAACTCCACCATTAGCACCCAAAGCTAAGATTCTAATTTAAACTATTCTCTGTTCTTTC\n+ATGGGGAAGCAGATTTGGGTACCACCCAAGTATTGACTCACCCATCAACAACCGCTATGTATTTCGTACA\n+TTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCA\n+ATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCA\n+ACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAG\n+TACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCC\n+TCAGATAGGGGTCCCTTGACCACCATCCTCCGTGAAATCAATATCCCGCACAAGAGTGCTACTCTCCTCG\n+CTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTC\n+ATAAAGCCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATG\n+\n' |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 tool-data/all_fasta.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/all_fasta.loc.sample Sat May 06 13:18:09 2017 -0400 |
b |
@@ -0,0 +1,19 @@ +#This file lists the locations and dbkeys of all the fasta files +#under the "genome" directory (a directory that contains a directory +#for each build). The script extract_fasta.py will generate the file +#all_fasta.loc. This file has the format (white space characters are +#TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#So, all_fasta.loc could look something like this: +# +#apiMel3 apiMel3 Honeybee (Apis mellifera): apiMel3 /path/to/genome/apiMel3/apiMel3.fa +#hg19canon hg19 Human (Homo sapiens): hg19 Canonical /path/to/genome/hg19/hg19canon.fa +#hg19full hg19 Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa +# +#Your all_fasta.loc file should contain an entry for each individual +#fasta file. So there will be multiple fasta files for each build, +#such as with hg19 above. +# +#Arabidopsis_thaliana Arabidopsis_thaliana_TAIR10 Arabidopsis_thaliana: TAIR 10 /export/home1/users/biocomp/chbernar/galaxy_testing/tool-data/from_personal_data/Fasta/TAIR10.fa |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 tool-data/bismark_indexes.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool-data/bismark_indexes.loc.sample Sat May 06 13:18:09 2017 -0400 |
b |
@@ -0,0 +1,2 @@ +#<species> <version> <release> <value> <dbkey> <name> <path> +#Arabidopsis_thaliana TAIR10 30 Arabidopsis_t_TAIR10_30 Arabidopsis_t_TAIR10_30 Arabidopsis thaliana: TAIR10 /export/home1/users/biocomp/chbernar/galaxy_testing/tool-data/from_personal_data/bismark_indexes/Arabidopsis_thaliana_TAIR10/Bisulfite_Genome \ No newline at end of file |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 tool-data/bowtie2_indices.loc.sample --- a/tool-data/bowtie2_indices.loc.sample Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,37 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Bowtie2 indexed sequences data files. You will -#need to create these data files and then create a bowtie_indices.loc -#file similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The bowtie2_indices.loc -#file has this format (longer white space characters are TAB characters): -# -#<unique_build_id> <dbkey> <display_name> <file_base_path> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/bowtie2/hg18/, -#then the bowtie2_indices.loc entry would look like this: -# -#hg18 hg18 hg18 /depot/data2/galaxy/bowtie2/hg18/hg18 -# -#and your /depot/data2/galaxy/bowtie2/hg18/ directory -#would contain hg18.*.ebwt files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt -#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt -#...etc... -# -#Your bowtie2_indices.loc file should include an entry per line for each -#index set you have stored. The "file" in the path does not actually -#exist, but it is the prefix for the actual index files. For example: -# -#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie2/hg18/hg18canon -#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie2/hg18/hg18full -#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie2/hg19/hg19 -#...etc... -# -#Note that for backwards compatibility with workflows, the unique ID of -#an entry must be the path that was in the original loc file, because that -#is the value stored in the workflow for that parameter. That is why the -#hg19 entry above looks odd. New genomes can be better-looking. -# |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 tool-data/bowtie_indices.loc.sample --- a/tool-data/bowtie_indices.loc.sample Wed Feb 11 16:21:51 2015 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,37 +0,0 @@ -#This is a sample file distributed with Galaxy that enables tools -#to use a directory of Bowtie indexed sequences data files. You will -#need to create these data files and then create a bowtie_indices.loc -#file similar to this one (store it in this directory) that points to -#the directories in which those files are stored. The bowtie_indices.loc -#file has this format (longer white space characters are TAB characters): -# -#<unique_build_id> <dbkey> <display_name> <file_base_path> -# -#So, for example, if you had hg18 indexed stored in -#/depot/data2/galaxy/bowtie/hg18/, -#then the bowtie_indices.loc entry would look like this: -# -#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18 -# -#and your /depot/data2/galaxy/bowtie/hg18/ directory -#would contain hg18.*.ebwt files: -# -#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt -#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt -#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt -#...etc... -# -#Your bowtie_indices.loc file should include an entry per line for each -#index set you have stored. The "file" in the path does not actually -#exist, but it is the prefix for the actual index files. For example: -# -#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon -#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie/hg18/hg18full -#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie/hg19/hg19 -#...etc... -# -#Note that for backwards compatibility with workflows, the unique ID of -#an entry must be the path that was in the original loc file, because that -#is the value stored in the workflow for that parameter. That is why the -#hg19 entry above looks odd. New genomes can be better-looking. -# |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 tool_data_table_conf.xml.sample --- a/tool_data_table_conf.xml.sample Wed Feb 11 16:21:51 2015 -0500 +++ b/tool_data_table_conf.xml.sample Sat May 06 13:18:09 2017 -0400 |
b |
@@ -1,13 +1,13 @@ <!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> <tables> - <!-- Locations of indexes in the Bowtie mapper format --> - <table name="bowtie_indexes" comment_char="#"> - <columns>value, dbkey, name, path</columns> - <file path="tool-data/bowtie_indices.loc" /> + <!-- Location of all bismark indexes --> + <table name="bismark_indexes" comment_char="#" allow_duplicate_entries="False"> + <columns>species, version, release, value, dbkey, name, path</columns> + <file path="tool-data/bismark_indexes.loc" /> </table> - <!-- Locations of indexes in the Bowtie2 mapper format --> - <table name="bowtie2_indexes" comment_char="#"> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#" allow_duplicate_entries="False"> <columns>value, dbkey, name, path</columns> - <file path="tool-data/bowtie2_indices.loc" /> + <file path="tool-data/all_fasta.loc" /> </table> </tables> |
b |
diff -r 0f8646f22b8d -r fcadce4d9a06 tool_dependencies.xml --- a/tool_dependencies.xml Wed Feb 11 16:21:51 2015 -0500 +++ b/tool_dependencies.xml Sat May 06 13:18:09 2017 -0400 |
b |
@@ -1,12 +1,17 @@ <?xml version="1.0"?> <tool_dependency> + <!-- from test tool shed--> + <!--<package name="samtools" version="0.1.19"> + <repository changeset_revision="a0ab0fae27e5" name="package_samtools_0_1_19" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + </package>--> + <!--from main tool shed--> <package name="samtools" version="0.1.19"> - <repository changeset_revision="923adc89c666" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="c9bd782f5342" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> - <set_environment version="1.0"> - <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable> - </set_environment> <package name="bowtie2" version="2.1.0"> + <repository changeset_revision="017a00c265f1" name="package_bowtie2_2_1_0" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <!--<package name="bowtie2" version="2.1.0"> <install version="1.0"> <actions> <action type="download_by_url">http://downloads.sourceforge.net/project/bowtie-bio/bowtie2/2.1.0/bowtie2-2.1.0-source.zip</action> @@ -32,30 +37,5 @@ <readme> Compiling bowtie2 requires zlib and libpthread to be present on your system. </readme> - </package> - <package name="bowtie" version="0.12.8"> - <install version="1.0"> - <actions> - <action type="download_by_url">http://downloads.sourceforge.net/project/bowtie-bio/bowtie/0.12.8/bowtie-0.12.8-src.zip</action> - <action type="shell_command">make</action> - <action type="move_file"> - <source>bowtie</source> - <destination>$INSTALL_DIR/bin</destination> - </action> - <action type="move_file"> - <source>bowtie-inspect</source> - <destination>$INSTALL_DIR/bin</destination> - </action> - <action type="move_file"> - <source>bowtie-build</source> - <destination>$INSTALL_DIR/bin</destination> - </action> - <action type="set_environment"> - <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> - </action> - </actions> - </install> - <readme> - </readme> - </package> + </package>--> </tool_dependency> |