# HG changeset patch
# User bgruening
# Date 1619111146 0
# Node ID 120b7b35e4428f4cd019deef254c15cad77efe72
# Parent ff6ee551b153db7adb87499a964abf6934da0cb6
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/bismark commit 8fdc76a99a9dcf34549898a208317607afd18798"
diff -r ff6ee551b153 -r 120b7b35e442 bismark2report_wrapper.py
--- a/bismark2report_wrapper.py Fri Oct 04 11:33:27 2019 -0400
+++ b/bismark2report_wrapper.py Thu Apr 22 17:05:46 2021 +0000
@@ -12,30 +12,68 @@
def log_subprocess_output(logger, pipe):
- for line in iter(pipe.readline, b''):
+ for line in iter(pipe.readline, b""):
logger.debug(line.decode().rstrip())
def get_arg():
parser = argparse.ArgumentParser()
- parser.add_argument('--alignment_report', dest='alignment_report', action='store', metavar='alignment_report',
- type=str)
- parser.add_argument('--dedup_report', dest='dedup_report', action='store', metavar='dedup_report', type=str)
- parser.add_argument('--splitting_report', dest='splitting_report', action='store', metavar='splitting_report',
- type=str)
- parser.add_argument('--mbias_report', dest='mbias_report', action='store', metavar='mbias_report', type=str)
- parser.add_argument('--nucleotide_report', dest='nucleotide_report', action='store', metavar='nucleotide_report',
- type=str)
- parser.add_argument('--output_html_report', dest='output_html_report', action='store', metavar='output_html_report',
- type=str)
- parser.add_argument('--log_report', dest='log_report', action='store', metavar='log_report', type=str)
+ parser.add_argument(
+ "--alignment_report",
+ dest="alignment_report",
+ action="store",
+ metavar="alignment_report",
+ type=str,
+ )
+ parser.add_argument(
+ "--dedup_report",
+ dest="dedup_report",
+ action="store",
+ metavar="dedup_report",
+ type=str,
+ )
+ parser.add_argument(
+ "--splitting_report",
+ dest="splitting_report",
+ action="store",
+ metavar="splitting_report",
+ type=str,
+ )
+ parser.add_argument(
+ "--mbias_report",
+ dest="mbias_report",
+ action="store",
+ metavar="mbias_report",
+ type=str,
+ )
+ parser.add_argument(
+ "--nucleotide_report",
+ dest="nucleotide_report",
+ action="store",
+ metavar="nucleotide_report",
+ type=str,
+ )
+ parser.add_argument(
+ "--output_html_report",
+ dest="output_html_report",
+ action="store",
+ metavar="output_html_report",
+ type=str,
+ )
+ parser.add_argument(
+ "--log_report",
+ dest="log_report",
+ action="store",
+ metavar="log_report",
+ type=str,
+ )
args = parser.parse_args()
return args
def __main__():
args = get_arg()
- logger = logging.getLogger('bismark_deduplicate_wrapper')
+ logger = logging.getLogger("bismark_deduplicate_wrapper")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
if args.log_report:
@@ -47,17 +85,23 @@
ch.setLevel(logging.DEBUG)
logger.addHandler(ch)
- cmd = ['bismark2report', '--verbose', '--alignment_report', args.alignment_report,
- '--output', args.output_html_report]
+ cmd = [
+ "bismark2report",
+ "--verbose",
+ "--alignment_report",
+ args.alignment_report,
+ "--output",
+ args.output_html_report,
+ ]
if args.dedup_report:
- cmd.extend(['--dedup_report', args.dedup_report])
+ cmd.extend(["--dedup_report", args.dedup_report])
if args.splitting_report:
- cmd.extend(['--splitting_report', args.splitting_report])
+ cmd.extend(["--splitting_report", args.splitting_report])
if args.mbias_report:
- cmd.extend(['--mbias_report', args.mbias_report])
+ cmd.extend(["--mbias_report", args.mbias_report])
if args.nucleotide_report:
- cmd.extend(['--nucleotide_report', args.nucleotide_report])
+ cmd.extend(["--nucleotide_report", args.nucleotide_report])
logger.info("Generating report with: '%s'", " ".join(cmd))
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
@@ -65,7 +109,12 @@
log_subprocess_output(logger, process.stdout)
exitcode = process.wait()
if exitcode != 0:
- stop_err(logger, "Bismark pretty report error (also check the log file if any)!\n%s" % process.stderr)
+ stop_err(
+ logger,
+ "Bismark pretty report error (also check the log file if any)!\n%s"
+ % process.stderr,
+ )
-if __name__ == "__main__": __main__()
+if __name__ == "__main__":
+ __main__()
diff -r ff6ee551b153 -r 120b7b35e442 bismark_bowtie2_wrapper.xml
--- a/bismark_bowtie2_wrapper.xml Fri Oct 04 11:33:27 2019 -0400
+++ b/bismark_bowtie2_wrapper.xml Thu Apr 22 17:05:46 2021 +0000
@@ -315,8 +315,8 @@
label="Write the bismark output and summary information to an extra file"/>
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r ff6ee551b153 -r 120b7b35e442 bismark_deduplicate_wrapper.py
--- a/bismark_deduplicate_wrapper.py Fri Oct 04 11:33:27 2019 -0400
+++ b/bismark_deduplicate_wrapper.py Thu Apr 22 17:05:46 2021 +0000
@@ -4,11 +4,10 @@
import logging
import os
import shutil
+import signal
import subprocess
import sys
-import signal
import tempfile
-from glob import glob
def stop_err(logger, msg):
@@ -24,17 +23,21 @@
def log_subprocess_output(logger, pipe):
- for line in iter(pipe.readline, b''):
+ for line in iter(pipe.readline, b""):
logger.debug(line.decode().rstrip())
def get_arg():
parser = argparse.ArgumentParser()
- parser.add_argument('--single_or_paired', dest='single_or_paired')
- parser.add_argument('--input', dest='input', metavar='input')
- parser.add_argument('--output_report', dest='output_report', metavar='output_report')
- parser.add_argument('--output_bam', dest='output_bam', metavar='output_report')
- parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str)
+ parser.add_argument("--single_or_paired", dest="single_or_paired")
+ parser.add_argument("--input", dest="input", metavar="input")
+ parser.add_argument(
+ "--output_report", dest="output_report", metavar="output_report"
+ )
+ parser.add_argument("--output_bam", dest="output_bam", metavar="output_report")
+ parser.add_argument(
+ "--log_report", dest="log_report", metavar="log_filename", type=str
+ )
args = parser.parse_args()
return args
@@ -42,7 +45,7 @@
def __main__():
args = get_arg()
- logger = logging.getLogger('bismark_deduplicate_wrapper')
+ logger = logging.getLogger("bismark_deduplicate_wrapper")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
if args.log_report:
@@ -55,27 +58,39 @@
logger.addHandler(ch)
# ensure the input has a .bam suffix
- tmp_dir = tempfile.mkdtemp(prefix='tmp', suffix='')
+ tmp_dir = tempfile.mkdtemp(prefix="tmp", suffix="")
os.chdir(tmp_dir)
- default_reads_name = 'submitted_reads.bam'
+ default_reads_name = "submitted_reads.bam"
os.symlink(args.input, default_reads_name)
- single_or_paired = '-s' if args.single_or_paired == 'single' else '-p'
- cmd = ['deduplicate_bismark', single_or_paired, default_reads_name, '--bam']
+ single_or_paired = "-s" if args.single_or_paired == "single" else "-p"
+ cmd = ["deduplicate_bismark", single_or_paired, default_reads_name, "--bam"]
logger.info("Deduplicating with: '%s'", " ".join(cmd))
- process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
- preexec_fn=restore_sigpipe)
+ process = subprocess.Popen(
+ cmd,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.STDOUT,
+ preexec_fn=restore_sigpipe,
+ )
proc_out, proc_err = process.communicate()
logger.info(proc_out)
if process.returncode != 0:
- stop_err(logger, "Bismark deduplication error (also check the log file if any)!\n%s" % proc_err)
+ stop_err(
+ logger,
+ "Bismark deduplication error (also check the log file if any)!\n%s"
+ % proc_err,
+ )
- deduplicated_out_name = 'submitted_reads.deduplicated.bam'
- deduplicated_report_name = 'submitted_reads.deduplication_report.txt'
+ deduplicated_out_name = "submitted_reads.deduplicated.bam"
+ deduplicated_report_name = "submitted_reads.deduplication_report.txt"
logger.debug("Moving '%s' to galaxy: '%s'.", deduplicated_out_name, args.output_bam)
- shutil.move(deduplicated_out_name, args.output_bam )
- logger.debug("Moving '%s' to galaxy: '%s'.", deduplicated_report_name, args.output_report)
- shutil.move('submitted_reads.deduplication_report.txt', args.output_report)
+ shutil.move(deduplicated_out_name, args.output_bam)
+ logger.debug(
+ "Moving '%s' to galaxy: '%s'.", deduplicated_report_name, args.output_report
+ )
+ shutil.move("submitted_reads.deduplication_report.txt", args.output_report)
logger.debug("Done.")
-if __name__=="__main__": __main__()
\ No newline at end of file
+
+if __name__ == "__main__":
+ __main__()
diff -r ff6ee551b153 -r 120b7b35e442 bismark_methylation_extractor.py
--- a/bismark_methylation_extractor.py Fri Oct 04 11:33:27 2019 -0400
+++ b/bismark_methylation_extractor.py Thu Apr 22 17:05:46 2021 +0000
@@ -19,14 +19,14 @@
def log_subprocess_output(logger, pipe):
- for line in iter(pipe.readline, b''):
+ for line in iter(pipe.readline, b""):
logger.debug(line.decode().rstrip())
def zipper(dir, zip_file):
- output_files_regex = re.compile('^(Non_)?C[pH][GH]_.*')
- bedgraph_regex = re.compile('.*bedGraph.gz')
- zip = zipfile.ZipFile(zip_file, 'w', compression=zipfile.ZIP_DEFLATED)
+ output_files_regex = re.compile("^(Non_)?C[pH][GH]_.*")
+ bedgraph_regex = re.compile(".*bedGraph.gz")
+ zip = zipfile.ZipFile(zip_file, "w", compression=zipfile.ZIP_DEFLATED)
root_len = len(os.path.abspath(dir))
for root, dirs, files in os.walk(dir):
archive_root = os.path.abspath(root)[root_len:]
@@ -40,46 +40,52 @@
def build_genome_dir(genome_file):
- tmp_genome_dir = tempfile.mkdtemp(prefix='tmp')
- genome_path = os.path.join(tmp_genome_dir, '.'.join(os.path.split(genome_file)[1].split('.')[:-1]))
+ tmp_genome_dir = tempfile.mkdtemp(prefix="tmp")
+ genome_path = os.path.join(
+ tmp_genome_dir, ".".join(os.path.split(genome_file)[1].split(".")[:-1])
+ )
try:
# Create a hard link pointing to genome_file named 'genome_path'.fa.
- os.symlink(genome_file, genome_path + '.fa')
+ os.symlink(genome_file, genome_path + ".fa")
except Exception as e:
if os.path.exists(tmp_genome_dir):
shutil.rmtree(tmp_genome_dir)
- stop_err('Error in linking the reference database!\n%s' % e)
+ stop_err("Error in linking the reference database!\n%s" % e)
return tmp_genome_dir
def __main__():
# Parse Command Line
- parser = argparse.ArgumentParser(description='Wrapper for the bismark methylation caller.')
+ parser = argparse.ArgumentParser(
+ description="Wrapper for the bismark methylation caller."
+ )
# input options
- parser.add_argument('--infile', help='Input file in SAM or BAM format.')
- parser.add_argument('--single-end', dest='single_end', action="store_true")
- parser.add_argument('--paired-end', dest='paired_end', action="store_true")
+ parser.add_argument("--infile", help="Input file in SAM or BAM format.")
+ parser.add_argument("--single-end", dest="single_end", action="store_true")
+ parser.add_argument("--paired-end", dest="paired_end", action="store_true")
+
+ parser.add_argument("--multicore", dest="multicore", type=int, default=1)
+ parser.add_argument("--splitting_report", dest="splitting_report")
+ parser.add_argument("--mbias_report", dest="mbias_report")
+ parser.add_argument("--cytosine_report", dest="cytosine_report")
+ parser.add_argument("--genome_file", dest="genome_file")
+ parser.add_argument("--cx_context", action="store_true")
- parser.add_argument('--multicore', dest='multicore', type=int, default=1)
- parser.add_argument('--splitting_report', dest='splitting_report')
- parser.add_argument('--mbias_report', dest='mbias_report')
- parser.add_argument('--cytosine_report', dest="cytosine_report")
- parser.add_argument('--genome_file', dest="genome_file")
- parser.add_argument('--cx_context', action="store_true")
-
- parser.add_argument('--comprehensive', action="store_true")
- parser.add_argument('--merge-non-cpg', dest='merge_non_cpg', action="store_true")
- parser.add_argument('--no-overlap', dest='no_overlap', action="store_true")
- parser.add_argument('--compress', dest='compress')
- parser.add_argument('--ignore', dest='ignore', type=int)
- parser.add_argument('--ignore_r2', dest='ignore_r2', type=int)
- parser.add_argument('--ignore_3prime', dest='ignore_3prime', type=int)
- parser.add_argument('--ignore_3prime_r2', dest='ignore_3prime_r2', type=int)
- parser.add_argument('--log_report', dest='log_report', metavar='log_filename', type=str)
+ parser.add_argument("--comprehensive", action="store_true")
+ parser.add_argument("--merge-non-cpg", dest="merge_non_cpg", action="store_true")
+ parser.add_argument("--no-overlap", dest="no_overlap", action="store_true")
+ parser.add_argument("--compress", dest="compress")
+ parser.add_argument("--ignore", dest="ignore", type=int)
+ parser.add_argument("--ignore_r2", dest="ignore_r2", type=int)
+ parser.add_argument("--ignore_3prime", dest="ignore_3prime", type=int)
+ parser.add_argument("--ignore_3prime_r2", dest="ignore_3prime_r2", type=int)
+ parser.add_argument(
+ "--log_report", dest="log_report", metavar="log_filename", type=str
+ )
args = parser.parse_args()
- logger = logging.getLogger('bismark_methylation_extractor_wrapper')
+ logger = logging.getLogger("bismark_methylation_extractor_wrapper")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
if args.log_report:
@@ -93,52 +99,68 @@
# Build methylation extractor command
output_dir = tempfile.mkdtemp()
- cmd = ['bismark_methylation_extractor', '--no_header', '-o', output_dir]
+ cmd = ["bismark_methylation_extractor", "--no_header", "-o", output_dir]
# Set up all options
if args.multicore > 3:
# divide multicore by 3 here since bismark will spawn ~3 jobs.
- cmd.extend(['--multicore', str(int(math.floor(args.multicore / 3)))])
+ cmd.extend(["--multicore", str(int(math.floor(args.multicore / 3)))])
if args.single_end:
- cmd.append('--single-end')
+ cmd.append("--single-end")
else:
- cmd.append('--paired-end')
+ cmd.append("--paired-end")
if args.no_overlap:
- cmd.append('--no_overlap')
+ cmd.append("--no_overlap")
if args.ignore:
- cmd.extend(['--ignore', str(args.ignore)])
+ cmd.extend(["--ignore", str(args.ignore)])
if args.ignore_r2:
- cmd.extend(['--ignore_r2', str(args.ignore_r2)])
+ cmd.extend(["--ignore_r2", str(args.ignore_r2)])
if args.ignore_3prime:
- cmd.extend(['--ignore_3prime', str(args.ignore_3prime)])
+ cmd.extend(["--ignore_3prime", str(args.ignore_3prime)])
if args.ignore_3prime_r2:
- cmd.extend(['--ignore_3prime_r2', str(args.ignore_3prime_r2)])
+ cmd.extend(["--ignore_3prime_r2", str(args.ignore_3prime_r2)])
if args.comprehensive:
- cmd.append('--comprehensive')
+ cmd.append("--comprehensive")
if args.merge_non_cpg:
- cmd.append('--merge_non_CpG')
+ cmd.append("--merge_non_CpG")
if args.splitting_report:
- cmd.append('--report')
+ cmd.append("--report")
tmp_genome_dir = None
if args.cytosine_report:
tmp_genome_dir = build_genome_dir(args.genome_file)
if args.cx_context:
cmd.extend(
- ['--bedGraph', '--CX_context', '--cytosine_report', '--CX_context', '--genome_folder', tmp_genome_dir])
+ [
+ "--bedGraph",
+ "--CX_context",
+ "--cytosine_report",
+ "--CX_context",
+ "--genome_folder",
+ tmp_genome_dir,
+ ]
+ )
else:
- cmd.extend(['--bedGraph', '--cytosine_report', '--genome_folder', tmp_genome_dir])
+ cmd.extend(
+ ["--bedGraph", "--cytosine_report", "--genome_folder", tmp_genome_dir]
+ )
cmd.append(args.infile)
# Run
logger.info("Methylation extractor run with: '%s'", " ".join(cmd))
prev_dir = os.getcwd()
- os.chdir(output_dir) # needed due to a bug in bismark where the coverage file cannot be found
+ os.chdir(
+ output_dir
+ ) # needed due to a bug in bismark where the coverage file cannot be found
process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
with process.stdout:
log_subprocess_output(logger, process.stdout)
exitcode = process.wait()
if exitcode != 0:
- stop_err(logger, "Bismark methylation extractor error (also check the log file if any)!\n%s" % process.stderr)
+ stop_err(
+ logger,
+ "Bismark methylation extractor error (also check the log file if any)!\n%s"
+ % process.stderr,
+ )
logger.info("Finished methylation extractor.")
# collect and copy output files
logger.debug("Zip output files to '%s'.", args.compress)
@@ -149,16 +171,25 @@
if args.cytosine_report:
logger.debug("Collecting cytosine report.")
if args.cx_context:
- shutil.move(glob(os.path.join(output_dir, '*CX_report.txt'))[0], args.cytosine_report)
+ shutil.move(
+ glob(os.path.join(output_dir, "*CX_report.txt"))[0],
+ args.cytosine_report,
+ )
else:
- shutil.move(glob(os.path.join(output_dir, '*CpG_report.txt'))[0], args.cytosine_report)
+ shutil.move(
+ glob(os.path.join(output_dir, "*CpG_report.txt"))[0],
+ args.cytosine_report,
+ )
# splitting report
if args.splitting_report:
logger.debug("Collecting splitting report.")
- shutil.move(glob(os.path.join(output_dir, '*_splitting_report.txt'))[0], args.splitting_report)
+ shutil.move(
+ glob(os.path.join(output_dir, "*_splitting_report.txt"))[0],
+ args.splitting_report,
+ )
if args.mbias_report:
logger.debug("Collecting M-Bias file.")
- shutil.move(glob(os.path.join(output_dir, '*M-bias.txt'))[0], args.mbias_report)
+ shutil.move(glob(os.path.join(output_dir, "*M-bias.txt"))[0], args.mbias_report)
# Clean up temp dirs
logger.debug("Cleanup temp dirs.")
@@ -168,4 +199,6 @@
shutil.rmtree(tmp_genome_dir)
logger.info("Done.")
-if __name__ == "__main__": __main__()
+
+if __name__ == "__main__":
+ __main__()
diff -r ff6ee551b153 -r 120b7b35e442 bismark_wrapper.py
--- a/bismark_wrapper.py Fri Oct 04 11:33:27 2019 -0400
+++ b/bismark_wrapper.py Thu Apr 22 17:05:46 2021 +0000
@@ -18,79 +18,137 @@
def log_subprocess_output(logger, pipe):
- for line in iter(pipe.readline, b''):
+ for line in iter(pipe.readline, b""):
logger.debug(line.decode().rstrip())
def __main__():
# Parse Command Line
- parser = argparse.ArgumentParser(description='Wrapper for the bismark bisulfite mapper.')
- parser.add_argument('-p', '--num-threads', dest='num_threads',
- type=int, default=4, help='Use this many threads to align reads. The default is 4.')
+ parser = argparse.ArgumentParser(
+ description="Wrapper for the bismark bisulfite mapper."
+ )
+ parser.add_argument(
+ "-p",
+ "--num-threads",
+ dest="num_threads",
+ type=int,
+ default=4,
+ help="Use this many threads to align reads. The default is 4.",
+ )
# input options
- parser.add_argument('--own-file', dest='own_file', help='')
- parser.add_argument('-D', '--indexes-path', dest='index_path',
- help='Indexes directory; location of .ebwt and .fa files.')
- parser.add_argument('-O', '--output', dest='output')
+ parser.add_argument("--own-file", dest="own_file", help="")
+ parser.add_argument(
+ "-D",
+ "--indexes-path",
+ dest="index_path",
+ help="Indexes directory; location of .ebwt and .fa files.",
+ )
+ parser.add_argument("-O", "--output", dest="output")
- parser.add_argument('--output-report-file', dest='output_report_file')
- parser.add_argument('--suppress-header', dest='suppress_header', action="store_true")
+ parser.add_argument("--output-report-file", dest="output_report_file")
+ parser.add_argument(
+ "--suppress-header", dest="suppress_header", action="store_true"
+ )
- parser.add_argument('--mate-paired', dest='mate_paired', action='store_true', help='Reads are mate-paired',
- default=False)
+ parser.add_argument(
+ "--mate-paired",
+ dest="mate_paired",
+ action="store_true",
+ help="Reads are mate-paired",
+ default=False,
+ )
- parser.add_argument('-1', '--mate1', dest='mate1',
- help='The forward reads file in Sanger FASTQ or FASTA format.')
- parser.add_argument('-2', '--mate2', dest='mate2',
- help='The reverse reads file in Sanger FASTQ or FASTA format.')
- parser.add_argument('--sort-bam', dest='sort_bam', action="store_true")
+ parser.add_argument(
+ "-1",
+ "--mate1",
+ dest="mate1",
+ help="The forward reads file in Sanger FASTQ or FASTA format.",
+ )
+ parser.add_argument(
+ "-2",
+ "--mate2",
+ dest="mate2",
+ help="The reverse reads file in Sanger FASTQ or FASTA format.",
+ )
+ parser.add_argument("--sort-bam", dest="sort_bam", action="store_true")
- parser.add_argument('--output-unmapped-reads', dest='output_unmapped_reads',
- help='Additional output file with unmapped reads (single-end).')
- parser.add_argument('--output-unmapped-reads-l', dest='output_unmapped_reads_l',
- help='File name for unmapped reads (left, paired-end).')
- parser.add_argument('--output-unmapped-reads-r', dest='output_unmapped_reads_r',
- help='File name for unmapped reads (right, paired-end).')
+ parser.add_argument(
+ "--output-unmapped-reads",
+ dest="output_unmapped_reads",
+ help="Additional output file with unmapped reads (single-end).",
+ )
+ parser.add_argument(
+ "--output-unmapped-reads-l",
+ dest="output_unmapped_reads_l",
+ help="File name for unmapped reads (left, paired-end).",
+ )
+ parser.add_argument(
+ "--output-unmapped-reads-r",
+ dest="output_unmapped_reads_r",
+ help="File name for unmapped reads (right, paired-end).",
+ )
- parser.add_argument('--output-suppressed-reads', dest='output_suppressed_reads',
- help='Additional output file with suppressed reads (single-end).')
- parser.add_argument('--output-suppressed-reads-l', dest='output_suppressed_reads_l',
- help='File name for suppressed reads (left, paired-end).')
- parser.add_argument('--output-suppressed-reads-r', dest='output_suppressed_reads_r',
- help='File name for suppressed reads (right, paired-end).')
- parser.add_argument('--stdout', dest='output_stdout',
- help='File name for the standard output of bismark.')
+ parser.add_argument(
+ "--output-suppressed-reads",
+ dest="output_suppressed_reads",
+ help="Additional output file with suppressed reads (single-end).",
+ )
+ parser.add_argument(
+ "--output-suppressed-reads-l",
+ dest="output_suppressed_reads_l",
+ help="File name for suppressed reads (left, paired-end).",
+ )
+ parser.add_argument(
+ "--output-suppressed-reads-r",
+ dest="output_suppressed_reads_r",
+ help="File name for suppressed reads (right, paired-end).",
+ )
+ parser.add_argument(
+ "--stdout",
+ dest="output_stdout",
+ help="File name for the standard output of bismark.",
+ )
- parser.add_argument('--single-paired', dest='single_paired',
- help='The single-end reads file in Sanger FASTQ or FASTA format.')
+ parser.add_argument(
+ "--single-paired",
+ dest="single_paired",
+ help="The single-end reads file in Sanger FASTQ or FASTA format.",
+ )
- parser.add_argument('--fastq', action='store_true', help='Query filetype is in FASTQ format')
- parser.add_argument('--fasta', action='store_true', help='Query filetype is in FASTA format')
- parser.add_argument('--phred64-quals', dest='phred64', action="store_true")
- parser.add_argument('--non-directional', dest='non_directional', action="store_true")
- parser.add_argument('--pbat', dest='pbat', action="store_true")
+ parser.add_argument(
+ "--fastq", action="store_true", help="Query filetype is in FASTQ format"
+ )
+ parser.add_argument(
+ "--fasta", action="store_true", help="Query filetype is in FASTA format"
+ )
+ parser.add_argument("--phred64-quals", dest="phred64", action="store_true")
+ parser.add_argument(
+ "--non_directional", dest="non_directional", action="store_true"
+ )
+ parser.add_argument("--pbat", dest="pbat", action="store_true")
- parser.add_argument('--skip-reads', dest='skip_reads', type=int)
- parser.add_argument('--score-min', dest='score_min', type=str)
- parser.add_argument('--qupto', type=int)
+ parser.add_argument("--skip-reads", dest="skip_reads", type=int)
+ parser.add_argument("--score-min", dest="score_min", type=str)
+ parser.add_argument("--qupto", type=int)
# paired end options
- parser.add_argument('-I', '--minins', dest='min_insert')
- parser.add_argument('-X', '--maxins', dest='max_insert')
- parser.add_argument('--no-mixed', dest='no_mixed', action="store_true")
- parser.add_argument('--no-discordant', dest='no_discordant', action="store_true")
+ parser.add_argument("-I", "--minins", dest="min_insert")
+ parser.add_argument("-X", "--maxins", dest="max_insert")
+ parser.add_argument("--no-mixed", dest="no_mixed", action="store_true")
+ parser.add_argument("--no-discordant", dest="no_discordant", action="store_true")
# parse general options
# default 20
- parser.add_argument('--seed-len', dest='seed_len', type=int)
+ parser.add_argument("--seed-len", dest="seed_len", type=int)
# default 15
- parser.add_argument('--seed-extention-attempts', dest='seed_extention_attempts', type=int)
+ parser.add_argument(
+ "--seed-extention-attempts", dest="seed_extention_attempts", type=int
+ )
# default 0
- parser.add_argument('--seed-mismatches', dest='seed_mismatches', type=int)
+ parser.add_argument("--seed-mismatches", dest="seed_mismatches", type=int)
# default 2
- parser.add_argument('--max-reseed', dest='max_reseed', type=int)
-
+ parser.add_argument("--max-reseed", dest="max_reseed", type=int)
"""
The number of megabytes of memory a given thread is given to store path
@@ -101,11 +159,11 @@
saying that chunk memory has been exhausted in --best mode, try adjusting
this parameter up to dedicate more memory to the descriptors. Default: 512.
"""
- parser.add_argument('--chunkmbs', type=int, default=512)
+ parser.add_argument("--chunkmbs", type=int, default=512)
args = parser.parse_args()
- logger = logging.getLogger('bismark_wrapper')
+ logger = logging.getLogger("bismark_wrapper")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler(sys.stdout)
if args.output_stdout:
@@ -121,22 +179,28 @@
index_dir = ""
tmp_index_dir = None
if args.own_file:
- logger.info("Create a temporary index with the offered files from the user. "
- "Utilizing the script: bismark_genome_preparation")
+ logger.info(
+ "Create a temporary index with the offered files from the user. "
+ "Utilizing the script: bismark_genome_preparation"
+ )
tmp_index_dir = tempfile.mkdtemp()
- index_path = os.path.join(tmp_index_dir, '.'.join(os.path.split(args.own_file)[1].split('.')[:-1]))
+ index_path = os.path.join(
+ tmp_index_dir, ".".join(os.path.split(args.own_file)[1].split(".")[:-1])
+ )
try:
# Create a hard link pointing to args.own_file named 'index_path'.fa.
- os.symlink(args.own_file, index_path + '.fa')
+ os.symlink(args.own_file, index_path + ".fa")
except Exception as e:
if os.path.exists(tmp_index_dir):
shutil.rmtree(tmp_index_dir)
- stop_err(logger, 'Error in linking the reference database!\n%s' % e)
+ stop_err(logger, "Error in linking the reference database!\n%s" % e)
# bismark_genome_preparation needs the complete path to the folder in which the database is stored
- cmd_index = ['bismark_genome_preparation', '--bowtie2', tmp_index_dir]
+ cmd_index = ["bismark_genome_preparation", "--bowtie2", tmp_index_dir]
try:
logger.info("Generating index with: '%s'", " ".join(cmd_index))
- process = subprocess.Popen(cmd_index, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ process = subprocess.Popen(
+ cmd_index, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+ )
with process.stdout:
log_subprocess_output(logger, process.stdout)
exitcode = process.wait()
@@ -145,7 +209,7 @@
except Exception as e:
if os.path.exists(tmp_index_dir):
shutil.rmtree(tmp_index_dir)
- stop_err(logger, 'Error indexing reference sequence!\n%s' % e)
+ stop_err(logger, "Error indexing reference sequence!\n%s" % e)
index_dir = tmp_index_dir
else:
# bowtie path is the path to the index directory and the first path of the index file name
@@ -153,57 +217,80 @@
# Build bismark command
tmp_bismark_dir = tempfile.mkdtemp()
- output_dir = os.path.join(tmp_bismark_dir, 'results')
- cmd = ['bismark', '--bam', '--temp_dir', tmp_bismark_dir, '-o', output_dir, '--quiet']
+ output_dir = os.path.join(tmp_bismark_dir, "results")
+ cmd = [
+ "bismark",
+ "--bam",
+ "--temp_dir",
+ tmp_bismark_dir,
+ "-o",
+ output_dir,
+ "--quiet",
+ ]
if not args.pbat:
- cmd.append('--gzip')
+ cmd.append("--gzip")
if args.fasta:
# the query input files (specified as mate1,mate2 or singles) are FastA
- cmd.append('--fasta')
+ cmd.append("--fasta")
elif args.fastq:
- cmd.append('--fastq')
+ cmd.append("--fastq")
# alignment options
if args.num_threads > 2:
# divide num_threads by 2 here since bismark will spawn 2 jobs with -p threads each
- cmd.extend(['-p', str(int(math.floor(args.num_threads / 2)))])
+ cmd.extend(["-p", str(int(math.floor(args.num_threads / 2)))])
if args.seed_mismatches:
- cmd.extend(['-N', str(args.seed_mismatches)])
+ cmd.extend(["-N", str(args.seed_mismatches)])
if args.seed_len:
- cmd.extend(['-L', str(args.seed_len)])
+ cmd.extend(["-L", str(args.seed_len)])
if args.seed_extention_attempts:
- cmd.extend(['-D', str(args.seed_extention_attempts)])
+ cmd.extend(["-D", str(args.seed_extention_attempts)])
if args.max_reseed:
- cmd.extend(['-R', str(args.max_reseed)])
+ cmd.extend(["-R", str(args.max_reseed)])
if args.no_discordant:
- cmd.append('--no-discordant')
+ cmd.append("--no-discordant")
if args.no_mixed:
- cmd.append('--no-mixed')
+ cmd.append("--no-mixed")
if args.skip_reads:
- cmd.extend(['--skip', str(args.skip_reads)])
+ cmd.extend(["--skip", str(args.skip_reads)])
if args.score_min:
- cmd.extend(['--score_min', str(args.score_min)])
+ cmd.extend(["--score_min", str(args.score_min)])
if args.qupto:
- cmd.extend(['--upto', 'args.qupto'])
+ cmd.extend(["--upto", "args.qupto"])
if args.phred64:
- cmd.append('--phred64-quals')
+ cmd.append("--phred64-quals")
if args.non_directional:
- cmd.append('--non-directional')
+ cmd.append("--non_directional")
if args.pbat:
- cmd.append('--pbat')
+ cmd.append("--pbat")
if args.suppress_header:
- cmd.append('--sam-no-hd')
- if args.output_unmapped_reads or (args.output_unmapped_reads_l and args.output_unmapped_reads_r):
- cmd.append('--un')
- if args.output_suppressed_reads or (args.output_suppressed_reads_l and args.output_suppressed_reads_r):
- cmd.append('--ambiguous')
+ cmd.append("--sam-no-hd")
+ if args.output_unmapped_reads or (
+ args.output_unmapped_reads_l and args.output_unmapped_reads_r
+ ):
+ cmd.append("--un")
+ if args.output_suppressed_reads or (
+ args.output_suppressed_reads_l and args.output_suppressed_reads_r
+ ):
+ cmd.append("--ambiguous")
cmd.append(index_dir)
# Set up the reads
if args.mate_paired:
# paired-end reads library
- cmd.extend(['-1', args.mate1, '-2', args.mate2, '-I', str(args.min_insert), '-X', str(args.max_insert)])
+ cmd.extend(
+ [
+ "-1",
+ args.mate1,
+ "-2",
+ args.mate2,
+ "-I",
+ str(args.min_insert),
+ "-X",
+ str(args.max_insert),
+ ]
+ )
else:
# single paired reads library
cmd.append(args.single_paired)
@@ -211,51 +298,81 @@
# Run
try:
logger.info("Running bismark with: '%s'", " ".join(cmd))
- process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ process = subprocess.Popen(
+ args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+ )
with process.stdout:
log_subprocess_output(logger, process.stdout)
exitcode = process.wait()
if exitcode != 0:
raise Exception(process.stderr)
except Exception as e:
- stop_err(logger, 'Error in running bismark!\n%s' % e)
+ stop_err(logger, "Error in running bismark!\n%s" % e)
# collect and copy output files
if args.output_report_file:
- output_report_file = open(args.output_report_file, 'w+')
- for line in fileinput.input(glob(os.path.join(output_dir, '*report.txt'))):
+ output_report_file = open(args.output_report_file, "w+")
+ for line in fileinput.input(glob(os.path.join(output_dir, "*report.txt"))):
output_report_file.write(line)
output_report_file.close()
if args.output_suppressed_reads:
- if glob(os.path.join(output_dir, '*ambiguous_reads.*')):
- shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads.*'))[0], args.output_suppressed_reads)
+ if glob(os.path.join(output_dir, "*ambiguous_reads.*")):
+ shutil.move(
+ glob(os.path.join(output_dir, "*ambiguous_reads.*"))[0],
+ args.output_suppressed_reads,
+ )
if args.output_suppressed_reads_l:
- if glob(os.path.join(output_dir, '*ambiguous_reads_1.*')):
- shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_1.*'))[0], args.output_suppressed_reads_l)
+ if glob(os.path.join(output_dir, "*ambiguous_reads_1.*")):
+ shutil.move(
+ glob(os.path.join(output_dir, "*ambiguous_reads_1.*"))[0],
+ args.output_suppressed_reads_l,
+ )
if args.output_suppressed_reads_r:
- if glob(os.path.join(output_dir, '*ambiguous_reads_2.*')):
- shutil.move(glob(os.path.join(output_dir, '*ambiguous_reads_2.*'))[0], args.output_suppressed_reads_r)
+ if glob(os.path.join(output_dir, "*ambiguous_reads_2.*")):
+ shutil.move(
+ glob(os.path.join(output_dir, "*ambiguous_reads_2.*"))[0],
+ args.output_suppressed_reads_r,
+ )
if args.output_unmapped_reads:
- if glob(os.path.join(output_dir, '*unmapped_reads.*')):
- shutil.move(glob(os.path.join(output_dir, '*unmapped_reads.*'))[0], args.output_unmapped_reads)
+ if glob(os.path.join(output_dir, "*unmapped_reads.*")):
+ shutil.move(
+ glob(os.path.join(output_dir, "*unmapped_reads.*"))[0],
+ args.output_unmapped_reads,
+ )
if args.output_unmapped_reads_l:
- if glob(os.path.join(output_dir, '*unmapped_reads_1.*')):
- shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_1.*'))[0], args.output_unmapped_reads_l)
+ if glob(os.path.join(output_dir, "*unmapped_reads_1.*")):
+ shutil.move(
+ glob(os.path.join(output_dir, "*unmapped_reads_1.*"))[0],
+ args.output_unmapped_reads_l,
+ )
if args.output_unmapped_reads_r:
- if glob(os.path.join(output_dir, '*unmapped_reads_2.*')):
- shutil.move(glob(os.path.join(output_dir, '*unmapped_reads_2.*'))[0], args.output_unmapped_reads_r)
+ if glob(os.path.join(output_dir, "*unmapped_reads_2.*")):
+ shutil.move(
+ glob(os.path.join(output_dir, "*unmapped_reads_2.*"))[0],
+ args.output_unmapped_reads_r,
+ )
try:
# merge all bam files
tmp_res = tempfile.NamedTemporaryFile(dir=output_dir).name
- bam_files = glob(os.path.join(output_dir, '*.bam'))
+ bam_files = glob(os.path.join(output_dir, "*.bam"))
if len(bam_files) > 1:
- cmd = ['samtools', 'merge', '-@', str(args.num_threads), '-f', tmp_res, ' '.join(bam_files)]
+ cmd = [
+ "samtools",
+ "merge",
+ "-@",
+ str(args.num_threads),
+ "-f",
+ tmp_res
+ ]
+ [cmd.append(x) for x in bam_files]
logger.info("Merging bams with: '%s'", cmd)
- process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ process = subprocess.Popen(
+ args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+ )
with process.stdout:
log_subprocess_output(logger, process.stdout)
exitcode = process.wait()
@@ -268,22 +385,31 @@
if os.path.exists(bam_path):
if args.sort_bam:
- cmd = ['samtools', 'sort', '-@', str(args.num_threads), bam_path, 'sorted_bam']
+ cmd = [
+ "samtools",
+ "sort",
+ "-@",
+ str(args.num_threads),
+ bam_path,
+ "sorted_bam",
+ ]
logger.info("Sorting bam with: '%s'", cmd)
- process = subprocess.Popen(args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ process = subprocess.Popen(
+ args=cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+ )
with process.stdout:
log_subprocess_output(logger, process.stdout)
exitcode = process.wait()
if exitcode != 0:
raise Exception(process.stderr)
- shutil.move('sorted_bam.bam', args.output)
+ shutil.move("sorted_bam.bam", args.output)
else:
shutil.move(bam_path, args.output)
else:
- stop_err(logger, 'BAM file no found:\n%s' % bam_path)
+ stop_err(logger, "BAM file no found:\n%s" % bam_path)
except Exception as e:
- stop_err(logger, 'Error in merging bam files!\n%s' % e)
+ stop_err(logger, "Error in merging bam files!\n%s" % e)
# Clean up temp dirs
if tmp_index_dir and os.path.exists(tmp_index_dir):
@@ -294,4 +420,5 @@
shutil.rmtree(output_dir)
-if __name__ == "__main__": __main__()
+if __name__ == "__main__":
+ __main__()
diff -r ff6ee551b153 -r 120b7b35e442 test-data/input2.fq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input2.fq Thu Apr 22 17:05:46 2021 +0000
@@ -0,0 +1,4000 @@
+@1_1
+TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA
++
+AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE
+@1_2
+TAATTTTGAATTTTGGTTGGGTAGTTTGTTTTAGAATTATTATGGTTATATAGTTTTTTAGTAAGATTGTTATTTATTATTTGAT
++
+AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEET: 146875
+G->A: 150504
+
+Step II - Genome bisulfite conversions - completed
+
+
+Bismark Genome Preparation - Step III: Launching the Bowtie 2 indexer
+Please be aware that this process can - depending on genome size - take several hours!
+Settings:
+ Output files: "BS_CT.*.bt2"
+ Line rate: 6 (line is 64 bytes)
+ Lines per side: 1 (side is 64 bytes)
+ Offset rate: 4 (one in 16)
+ FTable chars: 10
+ Strings: unpacked
+ Max bucket size: default
+ Max bucket size, sqrt multiplier: default
+ Max bucket size, len divisor: 4
+ Difference-cover sample period: 1024
+ Endianness: little
+ Actual local endianness: little
+ Sanity checking: disabled
+ Assertions: disabled
+ Random seed: 0
+ Sizeofs: void*:8, int:4, long:8, size_t:8
+Input files DNA, FASTA:
+ genome_mfa.CT_conversion.fa
+Building a SMALL index
+Reading reference sizes
+ Time reading reference sizes: 00:00:00
+Calculating joined length
+Writing header
+Reserving space for joined string
+Joining reference sequences
+ Time to join reference sequences: 00:00:00
+bmax according to bmaxDivN setting: 189039
+Using parameters --bmax 141780 --dcv 1024
+ Doing ahead-of-time memory usage test
+ Passed! Constructing with these parameters: --bmax 141780 --dcv 1024
+Constructing suffix-array element generator
+Building DifferenceCoverSample
+ Building sPrime
+ Building sPrimeOrder
+ V-Sorting samples
+ V-Sorting samples time: 00:00:00
+ Allocating rank array
+ Ranking v-sort output
+ Ranking v-sort output time: 00:00:00
+ Invoking Larsson-Sadakane on ranks
+ Invoking Larsson-Sadakane on ranks time: 00:00:00
+ Sanity-checking and returning
+Building samples
+Reserving space for 12 sample suffixes
+Generating random suffixes
+QSorting 12 sample offsets, eliminating duplicates
+QSorting sample offsets, eliminating duplicates time: 00:00:00
+Multikey QSorting 12 samples
+ (Using difference cover)
+ Multikey QSorting samples time: 00:00:00
+Calculating bucket sizes
+Splitting and merging
+ Splitting and merging time: 00:00:00
+Avg bucket size: 756159 (target: 141779)
+Converting suffix-array elements to index image
+Allocating ftab, absorbFtab
+Entering Ebwt loop
+Getting block 1 of 1
+ No samples; assembling all-inclusive block
+ Sorting block of length 756159 for bucket 1
+ (Using difference cover)
+ Sorting block time: 00:00:00
+Returning block of 756160 for bucket 1
+Exited Ebwt loop
+fchr[A]: 0
+fchr[C]: 235897
+fchr[G]: 235897
+fchr[T]: 386401
+fchr[$]: 756159
+Exiting Ebwt::buildToDisk()
+Returning from initFromVector
+Wrote 4446745 bytes to primary EBWT file: BS_CT.1.bt2
+Wrote 189044 bytes to secondary EBWT file: BS_CT.2.bt2
+Re-opening _in1 and _in2 as input streams
+Returning from Ebwt constructor
+Headers:
+ len: 756159
+ bwtLen: 756160
+ sz: 189040
+ bwtSz: 189040
+ lineRate: 6
+ offRate: 4
+ offMask: 0xfffffff0
+ ftabChars: 10
+ eftabLen: 20
+ eftabSz: 80
+ ftabLen: 1048577
+ ftabSz: 4194308
+ offsLen: 47260
+ offsSz: 189040
+ lineSz: 64
+ sideSz: 64
+ sideBwtSz: 48
+ sideBwtLen: 192
+ numSides: 3939
+ numLines: 3939
+ ebwtTotLen: 252096
+ ebwtTotSz: 252096
+ color: 0
+ reverse: 0
+Total time for call to driver() for forward index: 00:00:00
+Reading reference sizes
+ Time reading reference sizes: 00:00:00
+Calculating joined length
+Writing header
+Reserving space for joined string
+Joining reference sequences
+ Time to join reference sequences: 00:00:00
+ Time to reverse reference sequence: 00:00:00
+bmax according to bmaxDivN setting: 189039
+Using parameters --bmax 141780 --dcv 1024
+ Doing ahead-of-time memory usage test
+ Passed! Constructing with these parameters: --bmax 141780 --dcv 1024
+Constructing suffix-array element generator
+Building DifferenceCoverSample
+ Building sPrime
+ Building sPrimeOrder
+ V-Sorting samples
+ V-Sorting samples time: 00:00:00
+ Allocating rank array
+ Ranking v-sort output
+ Ranking v-sort output time: 00:00:00
+ Invoking Larsson-Sadakane on ranks
+ Invoking Larsson-Sadakane on ranks time: 00:00:00
+ Sanity-checking and returning
+Building samples
+Reserving space for 12 sample suffixes
+Generating random suffixes
+QSorting 12 sample offsets, eliminating duplicates
+QSorting sample offsets, eliminating duplicates time: 00:00:00
+Multikey QSorting 12 samples
+ (Using difference cover)
+ Multikey QSorting samples time: 00:00:00
+Calculating bucket sizes
+Splitting and merging
+ Splitting and merging time: 00:00:00
+Avg bucket size: 756159 (target: 141779)
+Converting suffix-array elements to index image
+Allocating ftab, absorbFtab
+Entering Ebwt loop
+Getting block 1 of 1
+ No samples; assembling all-inclusive block
+ Sorting block of length 756159 for bucket 1
+ (Using difference cover)
+ Sorting block time: 00:00:00
+Returning block of 756160 for bucket 1
+Exited Ebwt loop
+fchr[A]: 0
+fchr[C]: 235897
+fchr[G]: 235897
+fchr[T]: 386401
+fchr[$]: 756159
+Exiting Ebwt::buildToDisk()
+Returning from initFromVector
+Wrote 4446745 bytes to primary EBWT file: BS_CT.rev.1.bt2
+Wrote 189044 bytes to secondary EBWT file: BS_CT.rev.2.bt2
+Re-opening _in1 and _in2 as input streams
+Returning from Ebwt constructor
+Headers:
+ len: 756159
+ bwtLen: 756160
+ sz: 189040
+ bwtSz: 189040
+ lineRate: 6
+ offRate: 4
+ offMask: 0xfffffff0
+ ftabChars: 10
+ eftabLen: 20
+ eftabSz: 80
+ ftabLen: 1048577
+ ftabSz: 4194308
+ offsLen: 47260
+ offsSz: 189040
+ lineSz: 64
+ sideSz: 64
+ sideBwtSz: 48
+ sideBwtLen: 192
+ numSides: 3939
+ numLines: 3939
+ ebwtTotLen: 252096
+ ebwtTotSz: 252096
+ color: 0
+ reverse: 1
+Total time for backward call to driver() for mirror index: 00:00:01
+Settings:
+ Output files: "BS_GA.*.bt2"
+ Line rate: 6 (line is 64 bytes)
+ Lines per side: 1 (side is 64 bytes)
+ Offset rate: 4 (one in 16)
+ FTable chars: 10
+ Strings: unpacked
+ Max bucket size: default
+ Max bucket size, sqrt multiplier: default
+ Max bucket size, len divisor: 4
+ Difference-cover sample period: 1024
+ Endianness: little
+ Actual local endianness: little
+ Sanity checking: disabled
+ Assertions: disabled
+ Random seed: 0
+ Sizeofs: void*:8, int:4, long:8, size_t:8
+Input files DNA, FASTA:
+ genome_mfa.GA_conversion.fa
+Building a SMALL index
+Reading reference sizes
+ Time reading reference sizes: 00:00:00
+Calculating joined length
+Writing header
+Reserving space for joined string
+Joining reference sequences
+ Time to join reference sequences: 00:00:00
+bmax according to bmaxDivN setting: 189039
+Using parameters --bmax 141780 --dcv 1024
+ Doing ahead-of-time memory usage test
+ Passed! Constructing with these parameters: --bmax 141780 --dcv 1024
+Constructing suffix-array element generator
+Building DifferenceCoverSample
+ Building sPrime
+ Building sPrimeOrder
+ V-Sorting samples
+ V-Sorting samples time: 00:00:00
+ Allocating rank array
+ Ranking v-sort output
+ Ranking v-sort output time: 00:00:00
+ Invoking Larsson-Sadakane on ranks
+ Invoking Larsson-Sadakane on ranks time: 00:00:00
+ Sanity-checking and returning
+Building samples
+Reserving space for 12 sample suffixes
+Generating random suffixes
+QSorting 12 sample offsets, eliminating duplicates
+QSorting sample offsets, eliminating duplicates time: 00:00:00
+Multikey QSorting 12 samples
+ (Using difference cover)
+ Multikey QSorting samples time: 00:00:00
+Calculating bucket sizes
+Splitting and merging
+ Splitting and merging time: 00:00:00
+Avg bucket size: 756159 (target: 141779)
+Converting suffix-array elements to index image
+Allocating ftab, absorbFtab
+Entering Ebwt loop
+Getting block 1 of 1
+ No samples; assembling all-inclusive block
+ Sorting block of length 756159 for bucket 1
+ (Using difference cover)
+ Sorting block time: 00:00:00
+Returning block of 756160 for bucket 1
+Exited Ebwt loop
+fchr[A]: 0
+fchr[C]: 386401
+fchr[G]: 533276
+fchr[T]: 533276
+fchr[$]: 756159
+Exiting Ebwt::buildToDisk()
+Returning from initFromVector
+Wrote 4446745 bytes to primary EBWT file: BS_GA.1.bt2
+Wrote 189044 bytes to secondary EBWT file: BS_GA.2.bt2
+Re-opening _in1 and _in2 as input streams
+Returning from Ebwt constructor
+Headers:
+ len: 756159
+ bwtLen: 756160
+ sz: 189040
+ bwtSz: 189040
+ lineRate: 6
+ offRate: 4
+ offMask: 0xfffffff0
+ ftabChars: 10
+ eftabLen: 20
+ eftabSz: 80
+ ftabLen: 1048577
+ ftabSz: 4194308
+ offsLen: 47260
+ offsSz: 189040
+ lineSz: 64
+ sideSz: 64
+ sideBwtSz: 48
+ sideBwtLen: 192
+ numSides: 3939
+ numLines: 3939
+ ebwtTotLen: 252096
+ ebwtTotSz: 252096
+ color: 0
+ reverse: 0
+Total time for call to driver() for forward index: 00:00:00
+Reading reference sizes
+ Time reading reference sizes: 00:00:00
+Calculating joined length
+Writing header
+Reserving space for joined string
+Joining reference sequences
+ Time to join reference sequences: 00:00:00
+ Time to reverse reference sequence: 00:00:00
+bmax according to bmaxDivN setting: 189039
+Using parameters --bmax 141780 --dcv 1024
+ Doing ahead-of-time memory usage test
+ Passed! Constructing with these parameters: --bmax 141780 --dcv 1024
+Constructing suffix-array element generator
+Building DifferenceCoverSample
+ Building sPrime
+ Building sPrimeOrder
+ V-Sorting samples
+ V-Sorting samples time: 00:00:00
+ Allocating rank array
+ Ranking v-sort output
+ Ranking v-sort output time: 00:00:00
+ Invoking Larsson-Sadakane on ranks
+ Invoking Larsson-Sadakane on ranks time: 00:00:00
+ Sanity-checking and returning
+Building samples
+Reserving space for 12 sample suffixes
+Generating random suffixes
+QSorting 12 sample offsets, eliminating duplicates
+QSorting sample offsets, eliminating duplicates time: 00:00:00
+Multikey QSorting 12 samples
+ (Using difference cover)
+ Multikey QSorting samples time: 00:00:00
+Calculating bucket sizes
+Splitting and merging
+ Splitting and merging time: 00:00:00
+Avg bucket size: 756159 (target: 141779)
+Converting suffix-array elements to index image
+Allocating ftab, absorbFtab
+Entering Ebwt loop
+Getting block 1 of 1
+ No samples; assembling all-inclusive block
+ Sorting block of length 756159 for bucket 1
+ (Using difference cover)
+ Sorting block time: 00:00:00
+Returning block of 756160 for bucket 1
+Exited Ebwt loop
+fchr[A]: 0
+fchr[C]: 386401
+fchr[G]: 533276
+fchr[T]: 533276
+fchr[$]: 756159
+Exiting Ebwt::buildToDisk()
+Returning from initFromVector
+Wrote 4446745 bytes to primary EBWT file: BS_GA.rev.1.bt2
+Wrote 189044 bytes to secondary EBWT file: BS_GA.rev.2.bt2
+Re-opening _in1 and _in2 as input streams
+Returning from Ebwt constructor
+Headers:
+ len: 756159
+ bwtLen: 756160
+ sz: 189040
+ bwtSz: 189040
+ lineRate: 6
+ offRate: 4
+ offMask: 0xfffffff0
+ ftabChars: 10
+ eftabLen: 20
+ eftabSz: 80
+ ftabLen: 1048577
+ ftabSz: 4194308
+ offsLen: 47260
+ offsSz: 189040
+ lineSz: 64
+ sideSz: 64
+ sideBwtSz: 48
+ sideBwtLen: 192
+ numSides: 3939
+ numLines: 3939
+ ebwtTotLen: 252096
+ ebwtTotSz: 252096
+ color: 0
+ reverse: 1
+Total time for backward call to driver() for mirror index: 00:00:01
+Running bismark with: 'bismark --bam --temp_dir /tmp/tmp0n2gudqm -o /tmp/tmp0n2gudqm/results --quiet --gzip --fastq -L 20 -D 15 -R 2 --un --ambiguous /tmp/tmpfw7btd9x -1 input1.fq_1.fq,input2.fq_1.fq -2 input1.fq_2.fq,input2.fq_2.fq -I 0 -X 500'
+Bowtie 2 seems to be working fine (tested command 'bowtie2 --version' [2.3.5])
+Output format is BAM (default)
+Alignments will be written out in BAM format. Samtools found here: '/usr/local/bin/samtools'
+Reference genome folder provided is /tmp/tmpfw7btd9x/ (absolute path is '/tmp/tmpfw7btd9x/)'
+FastQ format specified
+
+Input files to be analysed (in current folder '/tmp/tmpfl_1r4y7/job_working_directory/000/17/working'):
+input1.fq_1.fq
+input1.fq_2.fq
+input2.fq_1.fq
+input2.fq_2.fq
+Library is assumed to be strand-specific (directional), alignments to strands complementary to the original top or bottom strands will be ignored (i.e. not performed!)
+Created output directory /tmp/tmp0n2gudqm/results/!
+
+Output will be written into the directory: /tmp/tmp0n2gudqm/results/
+
+Using temp directory: /tmp/tmp0n2gudqm
+Temporary files will be written into the directory: /tmp/tmp0n2gudqm/
+Setting parallelization to single-threaded (default)
+
+Summary of all aligner options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet
+Current working directory is: /tmp/tmpfl_1r4y7/job_working_directory/000/17/working
+
+Now reading in and storing sequence information of the genome specified in: /tmp/tmpfw7btd9x/
+
+chr chrY_JH584300_random (182347 bp)
+chr chrY_JH584301_random (259875 bp)
+chr chrY_JH584302_random (155838 bp)
+chr chrY_JH584303_random (158099 bp)
+
+Single-core mode: setting pid to 1
+
+Paired-end alignments will be performed
+=======================================
+
+The provided filenames for paired-end alignments are input1.fq_1.fq and input1.fq_2.fq
+Input files are in FastQ format
+Writing a C -> T converted version of the input file input1.fq_1.fq to /tmp/tmp0n2gudqm/input1.fq_1.fq_C_to_T.fastq.gz
+
+Created C -> T converted version of the FastQ file input1.fq_1.fq (1000 sequences in total)
+
+Writing a G -> A converted version of the input file input1.fq_2.fq to /tmp/tmp0n2gudqm/input1.fq_2.fq_G_to_A.fastq.gz
+
+Created G -> A converted version of the FastQ file input1.fq_2.fq (1000 sequences in total)
+
+Input files are input1.fq_1.fq_C_to_T.fastq.gz and input1.fq_2.fq_G_to_A.fastq.gz (FastQ)
+Now running 2 instances of Bowtie 2 against the bisulfite genome of /tmp/tmpfw7btd9x/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet
+
+Now starting a Bowtie 2 paired-end alignment for CTread1GAread2CTgenome (reading in sequences from /tmp/tmp0n2gudqm/input1.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input1.fq_2.fq_G_to_A.fastq.gz, with the options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet --norc))
+Found first alignment:
+1_1/1 77 * 0 0 * * 0 0 TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP
+1_1/2 141 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP
+Now starting a Bowtie 2 paired-end alignment for CTread1GAread2GAgenome (reading in sequences from /tmp/tmp0n2gudqm/input1.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input1.fq_2.fq_G_to_A.fastq.gz, with the options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet --nofw))
+Found first alignment:
+1_1/1 77 * 0 0 * * 0 0 TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP
+1_1/2 141 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP
+
+>>> Writing bisulfite mapping results to input1.fq_1_bismark_bt2_pe.bam <<<
+
+Unmapped sequences will be written to input1.fq_1.fq_unmapped_reads_1.fq.gz and input1.fq_2.fq_unmapped_reads_2.fq.gz
+Ambiguously mapping sequences will be written to input1.fq_1.fq_ambiguous_reads_1.fq.gz and input1.fq_2.fq_ambiguous_reads_2.fq.gz
+
+Reading in the sequence files input1.fq_1.fq and input1.fq_2.fq
+Processed 1000 sequences in total
+
+
+Successfully deleted the temporary files /tmp/tmp0n2gudqm/input1.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input1.fq_2.fq_G_to_A.fastq.gz
+
+Final Alignment report
+======================
+Sequence pairs analysed in total: 1000
+Number of paired-end alignments with a unique best hit: 0
+Mapping efficiency: 0.0%
+
+Sequence pairs with no alignments under any condition: 1000
+Sequence pairs did not map uniquely: 0
+Sequence pairs which were discarded because genomic sequence could not be extracted: 0
+
+Number of sequence pairs with unique best (first) alignment came from the bowtie output:
+CT/GA/CT: 0 ((converted) top strand)
+GA/CT/CT: 0 (complementary to (converted) top strand)
+GA/CT/GA: 0 (complementary to (converted) bottom strand)
+CT/GA/GA: 0 ((converted) bottom strand)
+
+Number of alignments to (merely theoretical) complementary strands being rejected in total: 0
+
+Final Cytosine Methylation Report
+=================================
+Total number of C's analysed: 0
+
+Total methylated C's in CpG context: 0
+Total methylated C's in CHG context: 0
+Total methylated C's in CHH context: 0
+Total methylated C's in Unknown context: 0
+
+Total unmethylated C's in CpG context: 0
+Total unmethylated C's in CHG context: 0
+Total unmethylated C's in CHH context: 0
+Total unmethylated C's in Unknown context: 0
+
+Can't determine percentage of methylated Cs in CpG context if value was 0
+Can't determine percentage of methylated Cs in CHG context if value was 0
+Can't determine percentage of methylated Cs in CHH context if value was 0
+Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0
+
+
+Bismark completed in 0d 0h 0m 6s
+
+====================
+Bismark run complete
+====================
+
+Single-core mode: setting pid to 1
+
+Paired-end alignments will be performed
+=======================================
+
+The provided filenames for paired-end alignments are input2.fq_1.fq and input2.fq_2.fq
+Input files are in FastQ format
+Writing a C -> T converted version of the input file input2.fq_1.fq to /tmp/tmp0n2gudqm/input2.fq_1.fq_C_to_T.fastq.gz
+
+Created C -> T converted version of the FastQ file input2.fq_1.fq (1000 sequences in total)
+
+Writing a G -> A converted version of the input file input2.fq_2.fq to /tmp/tmp0n2gudqm/input2.fq_2.fq_G_to_A.fastq.gz
+
+Created G -> A converted version of the FastQ file input2.fq_2.fq (1000 sequences in total)
+
+Input files are input2.fq_1.fq_C_to_T.fastq.gz and input2.fq_2.fq_G_to_A.fastq.gz (FastQ)
+Now running 2 instances of Bowtie 2 against the bisulfite genome of /tmp/tmpfw7btd9x/ with the specified options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet
+
+Now starting a Bowtie 2 paired-end alignment for CTread1GAread2CTgenome (reading in sequences from /tmp/tmp0n2gudqm/input2.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input2.fq_2.fq_G_to_A.fastq.gz, with the options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet --norc))
+Found first alignment:
+1_1/1 77 * 0 0 * * 0 0 TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP
+1_1/2 141 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP
+Now starting a Bowtie 2 paired-end alignment for CTread1GAread2GAgenome (reading in sequences from /tmp/tmp0n2gudqm/input2.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input2.fq_2.fq_G_to_A.fastq.gz, with the options: -q -L 20 -D 15 -R 2 --score-min L,0,-0.2 --ignore-quals --no-mixed --no-discordant --dovetail --minins 0 --maxins 500 --quiet --nofw))
+Found first alignment:
+1_1/1 77 * 0 0 * * 0 0 TTGTATATATTAGATAAATTAATTTTTTTTGTTTGTATGTTAAATTTTTTAATTAATTTATTAATATTTTGTGAATTTTTAGATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP
+1_1/2 141 * 0 0 * * 0 0 TTATATATATTAAATAAATTAATTTTTTTTATTTATATATTAAATTTTTTAATTAATTTATTAATATTTTATAAATTTTTAAATA AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAEEAEEEEEE YT:Z:UP
+
+>>> Writing bisulfite mapping results to input2.fq_1_bismark_bt2_pe.bam <<<
+
+Unmapped sequences will be written to input2.fq_1.fq_unmapped_reads_1.fq.gz and input2.fq_2.fq_unmapped_reads_2.fq.gz
+Ambiguously mapping sequences will be written to input2.fq_1.fq_ambiguous_reads_1.fq.gz and input2.fq_2.fq_ambiguous_reads_2.fq.gz
+
+Reading in the sequence files input2.fq_1.fq and input2.fq_2.fq
+Processed 1000 sequences in total
+
+
+Successfully deleted the temporary files /tmp/tmp0n2gudqm/input2.fq_1.fq_C_to_T.fastq.gz and /tmp/tmp0n2gudqm/input2.fq_2.fq_G_to_A.fastq.gz
+
+Final Alignment report
+======================
+Sequence pairs analysed in total: 1000
+Number of paired-end alignments with a unique best hit: 0
+Mapping efficiency: 0.0%
+
+Sequence pairs with no alignments under any condition: 1000
+Sequence pairs did not map uniquely: 0
+Sequence pairs which were discarded because genomic sequence could not be extracted: 0
+
+Number of sequence pairs with unique best (first) alignment came from the bowtie output:
+CT/GA/CT: 0 ((converted) top strand)
+GA/CT/CT: 0 (complementary to (converted) top strand)
+GA/CT/GA: 0 (complementary to (converted) bottom strand)
+CT/GA/GA: 0 ((converted) bottom strand)
+
+Number of alignments to (merely theoretical) complementary strands being rejected in total: 0
+
+Final Cytosine Methylation Report
+=================================
+Total number of C's analysed: 0
+
+Total methylated C's in CpG context: 0
+Total methylated C's in CHG context: 0
+Total methylated C's in CHH context: 0
+Total methylated C's in Unknown context: 0
+
+Total unmethylated C's in CpG context: 0
+Total unmethylated C's in CHG context: 0
+Total unmethylated C's in CHH context: 0
+Total unmethylated C's in Unknown context: 0
+
+Can't determine percentage of methylated Cs in CpG context if value was 0
+Can't determine percentage of methylated Cs in CHG context if value was 0
+Can't determine percentage of methylated Cs in CHH context if value was 0
+Can't determine percentage of methylated Cs in unknown context (CN or CHN) if value was 0
+
+
+Bismark completed in 0d 0h 0m 8s
+
+====================
+Bismark run complete
+====================
+
+Merging bams with: '['samtools', 'merge', '-@', '1', '-f', '/tmp/tmp0n2gudqm/results/tmpnqe_dadr', '/tmp/tmp0n2gudqm/results/input1.fq_1_bismark_bt2_pe.bam', '/tmp/tmp0n2gudqm/results/input2.fq_1_bismark_bt2_pe.bam']'