Mercurial > repos > cmonjeau > stacks
diff STACKS_denovomap.py @ 0:d6ba40f6c824
first commit
author | cmonjeau |
---|---|
date | Mon, 24 Aug 2015 09:29:12 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/STACKS_denovomap.py Mon Aug 24 09:29:12 2015 +0000 @@ -0,0 +1,265 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys +import re +import os +import tempfile +import shutil +import subprocess +import glob +import argparse +from os.path import basename +import zipfile +import tarfile +import gzip +from galaxy.datatypes.checkers import * +from stacks import * + + +def __main__(): + + # arguments recuperation + + parser = argparse.ArgumentParser() + parser.add_argument('-p') + parser.add_argument('-b') + parser.add_argument('-r') + parser.add_argument('-s') + parser.add_argument('-O') + parser.add_argument('-m') + parser.add_argument('-P') + parser.add_argument('-M') + parser.add_argument('-N') + parser.add_argument('-n') + parser.add_argument('-t') + parser.add_argument('-H') + parser.add_argument('--bound_low') + parser.add_argument('--bound_high') + parser.add_argument('--alpha') + parser.add_argument('--logfile') + parser.add_argument('--compress_output') + parser.add_argument('--catalogsnps') + parser.add_argument('--catalogalleles') + parser.add_argument('--catalogtags') + + # additionnal outputs + parser.add_argument('--total_output') + parser.add_argument('--tags_output') + parser.add_argument('--snps_output') + parser.add_argument('--alleles_output') + parser.add_argument('--matches_output') + + options = parser.parse_args() + + # create working directories + + os.mkdir('inputs') + os.mkdir('job_outputs') + os.mkdir('galaxy_outputs') + + cmd_line = [] + cmd_line.append('denovo_map.pl') + + # if genetic map + + if options.p: + + # parse config files + + tab_parent_files = galaxy_config_to_tabfiles_for_STACKS(options.p) + + # check if zipped files are into the tab and change tab content + + extract_compress_files_from_tabfiles(tab_parent_files, 'inputs') + + # check files extension (important to have .fq or .fasta files) + + check_fastq_extension_and_add(tab_parent_files, 'inputs') + + # create symlink into the temp dir + + create_symlinks_from_tabfiles(tab_parent_files, 'inputs') + + # parse the input dir and store all file names into a tab + + fastq_files = [] + for fastq_file in glob.glob('inputs/*'): + # if is a file (skip repository created after a decompression) + if os.path.isfile(fastq_file): + fastq_files.append(fastq_file) + + fastq_files.sort() + + # test if fastq are paired-end + if options.b == 'true': + for n in range(0, len(fastq_files), 2): + cmd_line.extend(['-p', fastq_files[n]]) + else: + for myfastqfile in fastq_files: + cmd_line.extend(['-p', myfastqfile]) + + # if genetic map with progeny files + + if options.r: + + # parse config files + tab_progeny_files = galaxy_config_to_tabfiles_for_STACKS(options.r) + + # check if zipped files are into the tab and change tab content + extract_compress_files_from_tabfiles(tab_progeny_files, 'inputs') + + # check files extension (important to have .fq or .fasta files) + check_fastq_extension_and_add(tab_progeny_files, 'inputs') + + # create symlink into the temp dir + create_symlinks_from_tabfiles(tab_progeny_files, 'inputs') + + for key in tab_progeny_files: + + # if is a file (skip repository created after a decompression) + + if os.path.isfile('inputs/' + key): + cmd_line.extend(['-r', 'inputs/' + key]) + + # if population is checked + if options.s: + + tab_individual_files = galaxy_config_to_tabfiles_for_STACKS(options.s) + + # check if zipped files are into the tab and change tab content + extract_compress_files_from_tabfiles(tab_individual_files, 'inputs') + + # check files extension (important to have .fq or .fasta files) + check_fastq_extension_and_add(tab_individual_files, 'inputs') + + # create symlink into the temp dir + create_symlinks_from_tabfiles(tab_individual_files, 'inputs') + + # create the command input line + for key in tab_individual_files: + + # if is a file (skip repository created after a decompression) + if os.path.isfile('inputs/' + key): + cmd_line.extend(['-s', 'inputs/' + key]) + + # create the command line + cmd_line.extend([ + '-S', + '-b', + '1', + '-T', + '4', + '-o', + 'job_outputs/' + ]) + + if options.O: + cmd_line.extend(['-O', options.O]) + + if options.m and options.m != '-1': + cmd_line.extend(['-m', options.m]) + + if options.P and options.P != '-1': + cmd_line.extend(['-P', options.P]) + + if options.M and options.M != '-1': + cmd_line.extend(['-M', options.M]) + + if options.N and options.N != '-1': + cmd_line.extend(['-N', options.N]) + + if options.n and options.n != '-1': + cmd_line.extend(['-n', options.n]) + + if options.t and options.t == 'true': + cmd_line.append('-t') + + if options.H and options.H == 'true': + cmd_line.append('-H') + + ## SNP model + if options.bound_low: + cmd_line.extend(['--bound_low', options.bound_low]) + cmd_line.extend(['--bound_high', options.bound_high]) + + if options.alpha: + cmd_line.extend(['--alpha', options.alpha]) + + # launch the command line + print "[CMD_LINE] : "+' '.join(cmd_line) + + p = subprocess.call(cmd_line) + + # postprocesses + try: + shutil.move('job_outputs/denovo_map.log', options.logfile) + except: + sys.stderr.write('Error in denovo_map execution; Please read the additional output (stdout)\n') + sys.exit(1) + + # go inside the outputs dir + os.chdir('job_outputs') + + # move files + for i in glob.glob('*'): + if re.search('catalog.snps.tsv$', i): + shutil.copy(i, options.catalogsnps) + if re.search('catalog.alleles.tsv$', i): + shutil.copy(i, options.catalogalleles) + if re.search('catalog.tags.tsv$', i): + shutil.copy(i, options.catalogtags) + + list_files = glob.glob('*') + + # if compress output is total + if options.compress_output == 'total': + + mytotalzipfile = zipfile.ZipFile('total.zip.temp', 'w', + allowZip64=True) + + for i in list_files: + mytotalzipfile.write(os.path.basename(i)) + + # return the unique archive + shutil.move('total.zip.temp', options.total_output) + elif options.compress_output == 'categories': + + # if compress output is by categories + mytagszip = zipfile.ZipFile('tags.zip.temp', 'w', allowZip64=True) + mysnpszip = zipfile.ZipFile('snps.zip.temp', 'w', allowZip64=True) + myalleleszip = zipfile.ZipFile('alleles.zip.temp', 'w', allowZip64=True) + mymatcheszip = zipfile.ZipFile('matches.zip.temp', 'w', allowZip64=True) + + for i in list_files: + # for each type of files + if re.search("tags\.tsv$", i) and not re.search('batch', i): + mytagszip.write(os.path.basename(i)) + os.remove(i) + elif re.search("snps\.tsv$", i) and not re.search('batch', i): + mysnpszip.write(os.path.basename(i)) + os.remove(i) + elif re.search("alleles\.tsv$", i) and not re.search('batch', i): + myalleleszip.write(os.path.basename(i)) + os.remove(i) + elif re.search("matches\.tsv$", i) and not re.search('batch', i): + mymatcheszip.write(os.path.basename(i)) + os.remove(i) + else: + shutil.move(os.path.basename(i), '../galaxy_outputs') + + # return archives.... + shutil.move('tags.zip.temp', options.tags_output) + shutil.move('snps.zip.temp', options.snps_output) + shutil.move('alleles.zip.temp', options.alleles_output) + shutil.move('matches.zip.temp', options.matches_output) + else: + # else no compression + for i in list_files: + shutil.move(os.path.basename(i), '../galaxy_outputs') + + +if __name__ == '__main__': + __main__() + +