Mercurial > repos > ahosny > cnvsim
comparison cnv-sim.py @ 13:e0f5a71e94ed draft
Uploaded
author | ahosny |
---|---|
date | Wed, 07 Sep 2016 09:32:24 -0400 |
parents | 4a4d2b78eb55 |
children |
comparison
equal
deleted
inserted
replaced
12:12eb1e77bcfa | 13:e0f5a71e94ed |
---|---|
9 | 9 |
10 from cnvsim.fileio import * | 10 from cnvsim.fileio import * |
11 from cnvsim.exome_simulator import * | 11 from cnvsim.exome_simulator import * |
12 from cnvsim.genome_simulator import * | 12 from cnvsim.genome_simulator import * |
13 | 13 |
14 class CapitalisedHelpFormatter(argparse.HelpFormatter): | |
15 def add_usage(self, usage, actions, groups, prefix=None): | |
16 if prefix is None: | |
17 prefix = 'Usage: ' | |
18 return super(CapitalisedHelpFormatter, self).add_usage(usage, actions, groups, prefix) | |
19 | |
14 def log(message): | 20 def log(message): |
15 print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message | 21 print '[CNV SIM {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now()) + "] " + message |
16 | 22 |
23 | |
17 def main(): | 24 def main(): |
18 parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | 25 parser = argparse.ArgumentParser(add_help=True, formatter_class=CapitalisedHelpFormatter, \ |
26 description='Generates NGS short reads that encompass copy number variations in whole genome and targeted exome sequencing') | |
27 parser._positionals.title = 'Positional arguments' | |
28 parser._optionals.title = 'Optional arguments' | |
29 parser.add_argument('-v', '--version', action='version', version = 'CNV-Sim v0.9.2', help = "Show program's version number and exit.") | |
30 | |
19 parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \ | 31 parser.add_argument("simulation_type", type=str, choices=['genome', 'exome'], \ |
20 help="simulate copy number variations in whole genome or exome regions") | 32 help="simulate copy number variations in whole genome or exome regions") |
21 parser.add_argument("genome", type=file, \ | 33 parser.add_argument("genome", type=file, \ |
22 help="path to the referece genome file in FASTA format ") | 34 help="path to the referece genome file in FASTA format ") |
23 parser.add_argument("target", type=file, nargs='?', default=None, \ | 35 parser.add_argument("target", type=file, nargs='?', default=None, \ |
24 help="path to the target regions file in BED format (if using exome)") | 36 help="path to the target regions file in BED format (if using exome)") |
25 | 37 |
26 parser.add_argument("-o", "--output_dir_name",type=str, default="test", \ | 38 parser.add_argument("-o", "--output_dir_name",type=str, default="simulation_output", \ |
27 help="a name to be used to create the output directory (overrides existing directory with the same name).") | 39 help="a name to be used to create the output directory (overrides existing directory with the same name).") |
28 parser.add_argument("-n", "--n_reads", type=int, default=10000, \ | 40 parser.add_argument("-n", "--n_reads", type=int, default=10000, \ |
29 help="total number of reads without variations") | 41 help="total number of reads without variations") |
30 parser.add_argument("-l", "--read_length", type=int, default=100, \ | 42 parser.add_argument("-l", "--read_length", type=int, default=100, \ |
31 help="read length (bp)") | 43 help="read length (bp)") |
32 parser.add_argument("--cnv_list", type=file, default=None, \ | 44 parser.add_argument("--cnv_list", type=file, default=None, \ |
33 help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below") | 45 help="path to a CNV list file in BED format chr | start | end | variation. If not passed, it is randomly generated using CNV list parameters below") |
34 | 46 |
35 cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed") | 47 cnv_sim_group = parser.add_argument_group('CNV list parameters', "parameters to be used if CNV list is not passed") |
36 cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=30, \ | 48 cnv_sim_group.add_argument("-g", "--regions_count", type=int, default=20, \ |
37 help="number of CNV regions to be randomly generated") | 49 help="number of CNV regions to be generated randomly") |
38 cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.30, \ | 50 cnv_sim_group.add_argument("-r_min", "--region_minimum_length", type=int, default=1000, \ |
51 help="minimum length of each CNV region") | |
52 cnv_sim_group.add_argument("-r_max", "--region_maximum_length", type=int, default=100000, \ | |
53 help="maximum length of each CNV region") | |
54 cnv_sim_group.add_argument("-a", "--amplifications", type=float, default=0.50, \ | |
39 help="percentage of amplifications in range [0.0: 1.0].") | 55 help="percentage of amplifications in range [0.0: 1.0].") |
40 cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.20, \ | 56 cnv_sim_group.add_argument("-d", "--deletions", type=float, default=0.50, \ |
41 help="percentage of deletions in range [0.0: 1.0].") | 57 help="percentage of deletions in range [0.0: 1.0].") |
42 cnv_sim_group.add_argument("-min", "--minimum", type=float, default=3, \ | 58 cnv_sim_group.add_argument("-cn_min", "--copy_number_minimum", type=float, default=3, \ |
43 help="minimum number of amplifications/deletions introduced") | 59 help="minimum level of variations (copy number) introduced") |
44 cnv_sim_group.add_argument("-max", "--maximum", type=float, default=10, \ | 60 cnv_sim_group.add_argument("-cn_max", "--copy_number_maximum", type=float, default=10, \ |
45 help="maximum number of amplifications/deletions introduced") | 61 help="maximum level of variation (copy number) introduced") |
46 | 62 |
47 args = parser.parse_args() | 63 args = parser.parse_args() |
48 | 64 |
49 simulation_parameters = {} | 65 simulation_parameters = {} |
50 simulation_parameters['type'] = args.simulation_type | 66 simulation_parameters['type'] = args.simulation_type |
62 simulation_parameters['cnv_list_file'] = None | 78 simulation_parameters['cnv_list_file'] = None |
63 simulation_parameters['tmp_dir'] = os.path.join(os.getcwd(), args.output_dir_name , "tmp") | 79 simulation_parameters['tmp_dir'] = os.path.join(os.getcwd(), args.output_dir_name , "tmp") |
64 | 80 |
65 cnv_list_parameters = {} | 81 cnv_list_parameters = {} |
66 cnv_list_parameters['regions_count'] = args.regions_count | 82 cnv_list_parameters['regions_count'] = args.regions_count |
83 cnv_list_parameters['minimum_length'] = args.region_minimum_length | |
84 cnv_list_parameters['maximum_length'] = args.region_maximum_length | |
67 cnv_list_parameters['amplifications'] = args.amplifications | 85 cnv_list_parameters['amplifications'] = args.amplifications |
68 cnv_list_parameters['deletions'] = args.deletions | 86 cnv_list_parameters['deletions'] = args.deletions |
69 cnv_list_parameters['minimum_variations'] = args.minimum | 87 cnv_list_parameters['minimum_variations'] = args.copy_number_minimum |
70 cnv_list_parameters['maximum_variations'] = args.maximum | 88 cnv_list_parameters['maximum_variations'] = args.copy_number_maximum |
89 | |
90 if cnv_list_parameters['amplifications'] + cnv_list_parameters['deletions'] != 1.0: | |
91 log("ERROR: percentage of amplifications + percentage of deletions must be equal to 1.0") | |
92 exit() | |
71 | 93 |
72 if simulation_parameters['type'] == 'genome': | 94 if simulation_parameters['type'] == 'genome': |
73 simulate_genome_cnv(simulation_parameters, cnv_list_parameters) | 95 simulate_genome_cnv(simulation_parameters, cnv_list_parameters) |
74 else: | 96 else: |
75 simulate_exome_cnv(simulation_parameters, cnv_list_parameters) | 97 simulate_exome_cnv(simulation_parameters, cnv_list_parameters) |