# HG changeset patch # User ahosny # Date 1470511534 14400 # Node ID 4a4d2b78eb55301e491362299e1b85aaee0d700c # Parent 066299035353fa978d48937df7b210e86d771ab6 Main Python Code diff -r 066299035353 -r 4a4d2b78eb55 cnv-sim.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cnv-sim.py Sat Aug 06 15:25:34 2016 -0400 @@ -0,0 +1,79 @@ +#!/usr/bin/python + +__author__ = 'Abdelrahman Hosny' + +import os.path +import datetime +import argparse +import shutil + +from cnvsim.fileio import * +from cnvsim.exome_simulator import * +from cnvsim.genome_simulator import * + +def log(message): + print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message + +def main(): + parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \ + help="simulate copy number variations in whole genome or exome regions") + parser.add_argument("genome", type=file, \ + help="path to the referece genome file in FASTA format ") + parser.add_argument("target", type=file, nargs='?', default=None, \ + help="path to the target regions file in BED format (if using exome)") + + parser.add_argument("-o", "--output_dir_name",type=str, default="test", \ + help="a name to be used to create the output directory (overrides existing directory with the same name).") + parser.add_argument("-n", "--n_reads", type=int, default=10000, \ + help="total number of reads without variations") + parser.add_argument("-l", "--read_length", type=int, default=100, \ + help="read length (bp)") + parser.add_argument("--cnv_list", type=file, default=None, \ + help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below") + + cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed") + cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=30, \ + help="number of CNV regions to be randomly generated") + cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.30, \ + help="percentage of amplifications in range [0.0: 1.0].") + cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.20, \ + help="percentage of deletions in range [0.0: 1.0].") + cnv_sim_group.add_argument("-min", "--minimum", type=float, default=3, \ + help="minimum number of amplifications/deletions introduced") + cnv_sim_group.add_argument("-max", "--maximum", type=float, default=10, \ + help="maximum number of amplifications/deletions introduced") + + args = parser.parse_args() + + simulation_parameters = {} + simulation_parameters['type'] = args.simulation_type + simulation_parameters['genome_file'] = args.genome.name + if args.target is not None: + simulation_parameters['target_file'] = args.target.name + else: + simulation_parameters['target_file'] = None + simulation_parameters['output_dir'] = os.path.join(os.getcwd(), args.output_dir_name) + simulation_parameters['number_of_reads'] = args.n_reads + simulation_parameters['read_length'] = args.read_length + if args.cnv_list is not None: + simulation_parameters['cnv_list_file'] = args.cnv_list.name + else: + simulation_parameters['cnv_list_file'] = None + simulation_parameters['tmp_dir'] = os.path.join(os.getcwd(), args.output_dir_name , "tmp") + + cnv_list_parameters = {} + cnv_list_parameters['regions_count'] = args.regions_count + cnv_list_parameters['amplifications'] = args.amplifications + cnv_list_parameters['deletions'] = args.deletions + cnv_list_parameters['minimum_variations'] = args.minimum + cnv_list_parameters['maximum_variations'] = args.maximum + + if simulation_parameters['type'] == 'genome': + simulate_genome_cnv(simulation_parameters, cnv_list_parameters) + else: + simulate_exome_cnv(simulation_parameters, cnv_list_parameters) + + +if __name__ == '__main__': + main() \ No newline at end of file