| Previous changeset 16:a4504327c890 (2019-08-21) Next changeset 18:bb74e17e47d7 (2019-10-01) |
|
Commit message:
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit d85012b50faac3234496bb51e2a29c2d8c113bde" |
|
modified:
bismark_bowtie2_wrapper.xml bismark_wrapper.py |
|
added:
test-data/mapped_reads_pbat.bam test-data/mapping_report_pbat.txt test-data/summary_pbat.txt |
| b |
| diff -r a4504327c890 -r aa9bf0f29a9f bismark_bowtie2_wrapper.xml --- a/bismark_bowtie2_wrapper.xml Wed Aug 21 12:59:48 2019 -0400 +++ b/bismark_bowtie2_wrapper.xml Wed Aug 28 07:08:45 2019 -0400 |
| [ |
| @@ -1,4 +1,4 @@ -<tool id="bismark_bowtie2" name="Bismark Mapper" version="0.22.1+galaxy3" profile="18.01"> +<tool id="bismark_bowtie2" name="Bismark Mapper" version="0.22.1+galaxy4" profile="18.01"> <description>Bisulfite reads mapper</description> <requirements> <requirement type="package" version="0.22.1">bismark</requirement> @@ -12,11 +12,20 @@ #if $singlePaired.input_singles.ext == "fasta": #set read1 = 'input_1.fa' #elif $singlePaired.input_singles.ext in ["fastq.gz", "fastqsanger.gz"]: - #set read1 = 'input_1.fq.gz' + #if $params.pbat == "--pbat" + #set read1 = 'input_1.fq' + #else + #set read1 = 'input_1.fq.gz' + #end if #else #set read1 = 'input_1.fq' #end if - ln -s '${singlePaired.input_singles}' '${read1}' && + + #if $params.pbat == "--pbat" and $singlePaired.input_singles.ext in ["fastq.gz", "fastqsanger.gz"]: + zcat '${singlePaired.input_singles}' > '${read1}' && + #else + ln -s '${singlePaired.input_singles}' '${read1}' && + #end if #else: #set $mate1 = list() #set $mate2 = list() @@ -25,20 +34,38 @@ #if $mate_pair.input_mate1.ext == "fasta": #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fa' #elif $mate_pair.input_mate1.ext in ["fastq.gz", "fastqsanger.gz"]: - #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq.gz' + #if $params.pbat == "--pbat" + #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq' + #else + #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq.gz' + #end if #else #set read1 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_1.fq' #end if - ln -s '${mate_pair.input_mate1}' '${read1}' && + + #if $params.pbat == "--pbat" and $mate_pair.input_mate1.ext in ["fastq.gz", "fastqsanger.gz"]: + zcat '${mate_pair.input_mate1}' > '${read1}' && + #else + ln -s '${mate_pair.input_mate1}' '${read1}' && + #end if #if $mate_pair.input_mate2.ext == "fasta": #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fa' #elif $mate_pair.input_mate2.ext in ["fastq.gz", "fastqsanger.gz"]: - #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq.gz' + #if $params.pbat == "--pbat" + #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq' + #else + #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq.gz' + #end if #else #set read2 = re.sub('[^\w\-_.]', '_', str($mate_pair.input_mate1.element_identifier)) + '_2.fq' #end if - ln -s '${mate_pair.input_mate2}' '${read2}' && + + #if $params.pbat == "--pbat" and $mate_pair.input_mate2.ext in ["fastq.gz", "fastqsanger.gz"]: + zcat '${mate_pair.input_mate2}' > '${read2}' && + #else + ln -s '${mate_pair.input_mate2}' '${read2}' && + #end if $mate1.append( str($read1) ) $mate2.append( str($read2) ) @@ -138,6 +165,8 @@ $params.non_directional + $params.pbat + #if $params.bismark_stdout: --stdout '$output_stdout' #end if @@ -289,6 +318,11 @@ <param argument="--non-directional" name="non_directional" type="boolean" truevalue="--non-directional" falsevalue="" checked="false" label="The sequencing library was constructed in a non strand-specific manner, alignments to all four bisulfite strands will be reported."/> + <param argument="--pbat" name="pbat" type="boolean" + truevalue="--pbat" falsevalue="" checked="false" + label="Treat input data as PBAT-Seq libraries" + help="Use this option only if you are certain that your libraries were constructed following a PBAT protocol. If you don't know what PBAT-Seq is you should not specify this option." /> + <!--end output options --> </when> <!-- full --> @@ -544,6 +578,38 @@ <has_text text="--score-min 'L,0,-0.8'" /> </assert_command> </test> + <test> + <param name="genomeSource" value="history"/> + <param name="own_file" value="mm10.tiny.fa.gz" /> + <param name="sPaired" value="single"/> + <param name="input_singles" value="input1.fq.gzip" ftype="fastqsanger.gz"/> + <param name="sort_bam" value="false"/> + <param name="settingsType" value="custom"/> + <param name="suppressed_read_file" value="true"/> + <param name="unmapped_read_file" value="true"/> + <param name="bismark_stdout" value="true"/> + <param name="isReportOutput" value="true"/> + <conditional name="params"> + <param name="settingsType" value="custom" /> + <param name="pbat" value="true" /> + </conditional> + + <output name="output_stdout" file="summary_pbat.txt" ftype="txt" lines_diff="94"> + <assert_contents> + <has_text text="Sequences analysed in total:" /> + <has_text text="44115" /> + <has_text text="Mapping efficiency:" /> + <has_text text="0.0%" /> + <has_text text="Bismark run complete" /> + </assert_contents> + </output> + <output name="report_file" file="mapping_report_pbat.txt" ftype="txt" lines_diff="6"/> + <output name="output" file="mapped_reads_pbat.bam" ftype="qname_input_sorted.bam" lines_diff="14"/> + + <assert_command> + <has_text text="--pbat" /> + </assert_command> + </test> </tests> <help> |
| b |
| diff -r a4504327c890 -r aa9bf0f29a9f bismark_wrapper.py --- a/bismark_wrapper.py Wed Aug 21 12:59:48 2019 -0400 +++ b/bismark_wrapper.py Wed Aug 28 07:08:45 2019 -0400 |
| [ |
| @@ -69,6 +69,7 @@ parser.add_argument('--fasta', action='store_true', help='Query filetype is in FASTA format') parser.add_argument('--phred64-quals', dest='phred64', action="store_true") parser.add_argument('--non-directional', dest='non_directional', action="store_true") + parser.add_argument('--pbat', dest='pbat', action="store_true") parser.add_argument('--skip-reads', dest='skip_reads', type=int) parser.add_argument('--score-min', dest='score_min', type=str) @@ -153,7 +154,9 @@ # Build bismark command tmp_bismark_dir = tempfile.mkdtemp() output_dir = os.path.join(tmp_bismark_dir, 'results') - cmd = ['bismark', '--bam', '--gzip', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet'] + cmd = ['bismark', '--bam', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet'] + if not args.pbat: + cmd.append('--gzip') if args.fasta: # the query input files (specified as mate1,mate2 or singles) are FastA @@ -187,6 +190,8 @@ cmd.append('--phred64-quals') if args.non_directional: cmd.append('--non-directional') + if args.pbat: + cmd.append('--pbat') if args.suppress_header: cmd.append('--sam-no-hd') if args.output_unmapped_reads or (args.output_unmapped_reads_l and args.output_unmapped_reads_r): @@ -223,24 +228,24 @@ output_report_file.close() if args.output_suppressed_reads: - if glob(os.path.join(output_dir, '*ambiguous_reads.txt')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.txt'))[0], args.output_suppressed_reads) + if glob(os.path.join(output_dir, '*ambiguous_reads.*')): + shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.*'))[0], args.output_suppressed_reads) if args.output_suppressed_reads_l: - if glob(os.path.join(output_dir, '*ambiguous_reads_1.txt')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.txt'))[0], args.output_suppressed_reads_l) + if glob(os.path.join(output_dir, '*ambiguous_reads_1.*')): + shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.*'))[0], args.output_suppressed_reads_l) if args.output_suppressed_reads_r: - if glob(os.path.join(output_dir, '*ambiguous_reads_2.txt')): - shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.txt'))[0], args.output_suppressed_reads_r) + if glob(os.path.join(output_dir, '*ambiguous_reads_2.*')): + shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.*'))[0], args.output_suppressed_reads_r) if args.output_unmapped_reads: - if glob(os.path.join(output_dir, '*unmapped_reads.txt')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.txt'))[0], args.output_unmapped_reads) + if glob(os.path.join(output_dir, '*unmapped_reads.*')): + shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.*'))[0], args.output_unmapped_reads) if args.output_unmapped_reads_l: - if glob(os.path.join(output_dir, '*unmapped_reads_1.txt')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.txt'))[0], args.output_unmapped_reads_l) + if glob(os.path.join(output_dir, '*unmapped_reads_1.*')): + shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.*'))[0], args.output_unmapped_reads_l) if args.output_unmapped_reads_r: - if glob(os.path.join(output_dir, '*unmapped_reads_2.txt')): - shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.txt'))[0], args.output_unmapped_reads_r) + if glob(os.path.join(output_dir, '*unmapped_reads_2.*')): + shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.*'))[0], args.output_unmapped_reads_r) try: # merge all bam files |
| b |
| diff -r a4504327c890 -r aa9bf0f29a9f test-data/mapped_reads_pbat.bam |
| b |
| Binary file test-data/mapped_reads_pbat.bam has changed |
| b |
| diff -r a4504327c890 -r aa9bf0f29a9f test-data/mapping_report_pbat.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mapping_report_pbat.txt Wed Aug 28 07:08:45 2019 -0400 |
| b |
| @@ -0,0 +1,40 @@ +Bismark report for: input_1.fq (version: v0.22.1) +Option '--pbat' specified: alignments to original strands (OT and OB) strands were ignored (i.e. not performed) +Bismark was run with Bowtie 2 against the bisulfite genome of /tmp/tmpuqE7r1/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet + +Final Alignment report +====================== +Sequences analysed in total: 44115 +Number of alignments with a unique best hit from the different alignments: 13 +Mapping efficiency: 0.0% +Sequences with no alignments under any condition: 44059 +Sequences did not map uniquely: 43 +Sequences which were discarded because genomic sequence could not be extracted: 0 + +Number of sequences with unique best (first) alignment came from the bowtie output: +CT/CT: 0 ((converted) top strand) +CT/GA: 0 ((converted) bottom strand) +GA/CT: 11 (complementary to (converted) top strand) +GA/GA: 2 (complementary to (converted) bottom strand) + +Final Cytosine Methylation Report +================================= +Total number of C's analysed: 307 + +Total methylated C's in CpG context: 1 +Total methylated C's in CHG context: 3 +Total methylated C's in CHH context: 227 +Total methylated C's in Unknown context: 0 + +Total unmethylated C's in CpG context: 1 +Total unmethylated C's in CHG context: 4 +Total unmethylated C's in CHH context: 71 +Total unmethylated C's in Unknown context: 0 + +C methylated in CpG context: 50.0% +C methylated in CHG context: 42.9% +C methylated in CHH context: 76.2% +Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0 + + +Bismark completed in 0d 0h 0m 7s |
| b |
| diff -r a4504327c890 -r aa9bf0f29a9f test-data/summary_pbat.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/summary_pbat.txt Wed Aug 28 07:08:45 2019 -0400 |
| [ |
| b'@@ -0,0 +1,493 @@\n+Create a temporary index with the offered files from the user. Utilizing the script: bismark_genome_preparation\n+Generating index with: \'bismark_genome_preparation --bowtie2 /tmp/tmpuqE7r1\'\n+Writing bisulfite genomes out into a single MFA (multi FastA) file\n+\n+Bisulfite Genome Indexer version v0.22.1 (last modified: 14 April 2019)\n+\n+Step I - Prepare genome folders - completed\n+\n+\n+\n+Total number of conversions performed:\n+C->T:\t146875\n+G->A:\t150504\n+\n+Step II - Genome bisulfite conversions - completed\n+\n+\n+Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer\n+Please be aware that this process can - depending on genome size - take several hours!\n+Settings:\n+ Output files: "BS_CT.*.bt2"\n+ Line rate: 6 (line is 64 bytes)\n+ Lines per side: 1 (side is 64 bytes)\n+ Offset rate: 4 (one in 16)\n+ FTable chars: 10\n+ Strings: unpacked\n+ Max bucket size: default\n+ Max bucket size, sqrt multiplier: default\n+ Max bucket size, len divisor: 4\n+ Difference-cover sample period: 1024\n+ Endianness: little\n+ Actual local endianness: little\n+ Sanity checking: disabled\n+ Assertions: disabled\n+ Random seed: 0\n+ Sizeofs: void*:8, int:4, long:8, size_t:8\n+Input files DNA, FASTA:\n+ genome_mfa.CT_conversion.fa\n+Building a SMALL index\n+Reading reference sizes\n+ Time reading reference sizes: 00:00:00\n+Calculating joined length\n+Writing header\n+Reserving space for joined string\n+Joining reference sequences\n+ Time to join reference sequences: 00:00:00\n+bmax according to bmaxDivN setting: 189039\n+Using parameters --bmax 141780 --dcv 1024\n+ Doing ahead-of-time memory usage test\n+ Passed! Constructing with these parameters: --bmax 141780 --dcv 1024\n+Constructing suffix-array element generator\n+Building DifferenceCoverSample\n+ Building sPrime\n+ Building sPrimeOrder\n+ V-Sorting samples\n+ V-Sorting samples time: 00:00:00\n+ Allocating rank array\n+ Ranking v-sort output\n+ Ranking v-sort output time: 00:00:00\n+ Invoking Larsson-Sadakane on ranks\n+ Invoking Larsson-Sadakane on ranks time: 00:00:00\n+ Sanity-checking and returning\n+Building samples\n+Reserving space for 12 sample suffixes\n+Generating random suffixes\n+QSorting 12 sample offsets, eliminating duplicates\n+QSorting sample offsets, eliminating duplicates time: 00:00:00\n+Multikey QSorting 12 samples\n+ (Using difference cover)\n+ Multikey QSorting samples time: 00:00:00\n+Calculating bucket sizes\n+Splitting and merging\n+ Splitting and merging time: 00:00:00\n+Avg bucket size: 756159 (target: 141779)\n+Converting suffix-array elements to index image\n+Allocating ftab, absorbFtab\n+Entering Ebwt loop\n+Getting block 1 of 1\n+ No samples; assembling all-inclusive block\n+ Sorting block of length 756159 for bucket 1\n+ (Using difference cover)\n+ Sorting block time: xxxx\n+Returning block of 756160 for bucket 1\n+Exited Ebwt loop\n+fchr[A]: 0\n+fchr[C]: 235897\n+fchr[G]: 235897\n+fchr[T]: 386401\n+fchr[$]: 756159\n+Exiting Ebwt::buildToDisk()\n+Returning from initFromVector\n+Wrote 4446745 bytes to primary EBWT file: BS_CT.1.bt2\n+Wrote 189044 bytes to secondary EBWT file: BS_CT.2.bt2\n+Re-opening _in1 and _in2 as input streams\n+Returning from Ebwt constructor\n+Headers:\n+ len: 756159\n+ bwtLen: 756160\n+ sz: 189040\n+ bwtSz: 189040\n+ lineRate: 6\n+ offRate: 4\n+ offMask: 0xfffffff0\n+ ftabChars: 10\n+ eftabLen: 20\n+ eftabSz: 80\n+ ftabLen: 1048577\n+ ftabSz: 4194308\n+ offsLen: 47260\n+ offsSz: 189040\n+ lineSz: 64\n+ sideSz: 64\n+ sideBwtSz: 48\n+ sideBwtLen: 192\n+ numSides: 3939\n+ numLines: 3939\n+ ebwtTotLen: 252096\n+ ebwtTotSz: 252096\n+ color: 0\n+ reverse: 0\n+Total time for call to driver() for forward index: xxxx\n+Reading reference sizes\n+ Time reading reference sizes: 00:00:00\n+Calculating joined length\n+Writing header\n+Reserving space for joined string\n+Joining reference sequences\n+ Time to join reference sequences: 00:00:00\n+ Time to reverse reference sequence: 00:00:00\n+bmax according to bmaxDivN '..b"\n+Temporary files will be written into the directory: /tmp/tmpi3V3GI/\n+Setting parallelization to single-threaded (default)\n+\n+Summary of all aligner options:\t-q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet\n+Current working directory is: /tmp/tmpU_oiEI/job_working_directory/000/4/working\n+\n+Now reading in and storing sequence information of the genome specified in: /tmp/tmpuqE7r1/\n+\n+chr chrY_JH584300_random (182347 bp)\n+chr chrY_JH584301_random (259875 bp)\n+chr chrY_JH584302_random (155838 bp)\n+chr chrY_JH584303_random (158099 bp)\n+\n+Single-core mode: setting pid to 1\n+\n+Single-end alignments will be performed\n+=======================================\n+\n+Input file is in FastQ format\n+Writing a G -> A converted version of the input file input_1.fq to /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq\n+\n+Created G -> A converted version of the FastQ file input_1.fq (44115 sequences in total)\n+\n+Input file is input_1.fq_G_to_A.fastq (FastQ)\n+\n+Now running 2 instances of Bowtie 2 against the bisulfite genome of /tmp/tmpuqE7r1/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet\n+\n+Now starting the Bowtie 2 aligner for GAreadCTgenome (reading in sequences from /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq with options -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet --nofw)\n+Using Bowtie 2 index: /tmp/tmpuqE7r1/Bisulfite_Genome/CT_conversion/BS_CT\n+\n+Found first alignment:\t1_1\t4\t*\t0\t0\t*\t*\t0\t0\tTTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA\tAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE\tYT:Z:UU\n+Now starting the Bowtie 2 aligner for GAreadGAgenome (reading in sequences from /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq with options -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --quiet --norc)\n+Using Bowtie 2 index: /tmp/tmpuqE7r1/Bisulfite_Genome/GA_conversion/BS_GA\n+\n+Found first alignment:\t1_1\t4\t*\t0\t0\t*\t*\t0\t0\tTTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA\tAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE\tYT:Z:UU\n+\n+>>> Writing bisulfite mapping results to /tmp/tmpi3V3GI/results/input_1_bismark_bt2.bam <<<\n+\n+Unmapped sequences will be written to /tmp/tmpi3V3GI/results/input_1.fq_unmapped_reads.fq.gz\n+Ambiguously mapping sequences will be written to /tmp/tmpi3V3GI/results/input_1.fq_ambiguous_reads.fq.gz\n+\n+Reading in the sequence file input_1.fq\n+Processed 44115 sequences in total\n+\n+\n+Successfully deleted the temporary file /tmp/tmpi3V3GI/input_1.fq_G_to_A.fastq\n+\n+Final Alignment report\n+======================\n+Sequences analysed in total:\t44115\n+Number of alignments with a unique best hit from the different alignments:\t13\n+Mapping efficiency:\t0.0%\n+\n+Sequences with no alignments under any condition:\t44059\n+Sequences did not map uniquely:\t43\n+Sequences which were discarded because genomic sequence could not be extracted:\t0\n+\n+Number of sequences with unique best (first) alignment came from the bowtie output:\n+CT/CT:\t0\t((converted) top strand)\n+CT/GA:\t0\t((converted) bottom strand)\n+GA/CT:\t11\t(complementary to (converted) top strand)\n+GA/GA:\t2\t(complementary to (converted) bottom strand)\n+\n+Final Cytosine Methylation Report\n+=================================\n+Total number of C's analysed:\t307\n+\n+Total methylated C's in CpG context:\t1\n+Total methylated C's in CHG context:\t3\n+Total methylated C's in CHH context:\t227\n+Total methylated C's in Unknown context:\t0\n+\n+Total unmethylated C's in CpG context:\t1\n+Total unmethylated C's in CHG context:\t4\n+Total unmethylated C's in CHH context:\t71\n+Total unmethylated C's in Unknown context:\t0\n+\n+C methylated in CpG context:\t50.0%\n+C methylated in CHG context:\t42.9%\n+C methylated in CHH context:\t76.2%\n+Can't determine percentage of methylated Cs in Unknown context (CN or CHN) if value was 0\n+\n+\n+Bismark completed in xxxx\n+\n+====================\n+Bismark run complete\n+====================\n+\n" |