view cnv-sim.py @ 11:b8c966f6e302 draft

Deleted selected files
author ahosny
date Thu, 18 Aug 2016 18:41:44 -0400
parents 4a4d2b78eb55
children e0f5a71e94ed
line wrap: on
line source

#!/usr/bin/python

__author__ = 'Abdelrahman Hosny'

import os.path
import datetime
import argparse
import shutil

from cnvsim.fileio import *
from cnvsim.exome_simulator import *
from cnvsim.genome_simulator import *

def log(message):
    print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message

def main():
    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \
                        help="simulate copy number variations in whole genome or exome regions")
    parser.add_argument("genome", type=file, \
                        help="path to the referece genome file in FASTA format ")
    parser.add_argument("target", type=file, nargs='?', default=None, \
                        help="path to the target regions file in BED format (if using exome)")

    parser.add_argument("-o", "--output_dir_name",type=str, default="test", \
                        help="a name to be used to create the output directory (overrides existing directory with the same name).")
    parser.add_argument("-n", "--n_reads", type=int, default=10000, \
                        help="total number of reads without variations")
    parser.add_argument("-l", "--read_length", type=int, default=100, \
                        help="read length (bp)")
    parser.add_argument("--cnv_list", type=file, default=None, \
                        help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below")

    cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed")
    cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=30, \
                        help="number of CNV regions to be randomly generated")
    cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.30, \
                        help="percentage of amplifications in range [0.0: 1.0].")
    cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.20, \
                        help="percentage of deletions in range [0.0: 1.0].")
    cnv_sim_group.add_argument("-min", "--minimum", type=float, default=3, \
                        help="minimum number of amplifications/deletions introduced")
    cnv_sim_group.add_argument("-max", "--maximum", type=float, default=10, \
                        help="maximum number of amplifications/deletions introduced")

    args = parser.parse_args()

    simulation_parameters = {}
    simulation_parameters['type'] = args.simulation_type
    simulation_parameters['genome_file'] = args.genome.name
    if args.target is not None:
        simulation_parameters['target_file'] = args.target.name
    else:
        simulation_parameters['target_file'] = None
    simulation_parameters['output_dir'] = os.path.join(os.getcwd(), args.output_dir_name)
    simulation_parameters['number_of_reads'] = args.n_reads
    simulation_parameters['read_length'] = args.read_length
    if args.cnv_list is not None:
        simulation_parameters['cnv_list_file'] = args.cnv_list.name
    else:
        simulation_parameters['cnv_list_file'] = None
    simulation_parameters['tmp_dir'] = os.path.join(os.getcwd(), args.output_dir_name , "tmp")

    cnv_list_parameters = {}
    cnv_list_parameters['regions_count'] = args.regions_count
    cnv_list_parameters['amplifications'] = args.amplifications
    cnv_list_parameters['deletions'] = args.deletions
    cnv_list_parameters['minimum_variations'] = args.minimum
    cnv_list_parameters['maximum_variations'] = args.maximum

    if simulation_parameters['type'] == 'genome':
        simulate_genome_cnv(simulation_parameters, cnv_list_parameters)
    else:
        simulate_exome_cnv(simulation_parameters, cnv_list_parameters)


if __name__ == '__main__':
    main()