0
|
1 function CFG = configure_rdiff(CFG)
|
|
2 % configure_rdiff(CFG)
|
|
3
|
|
4 %%% rDiff parameters %%%
|
|
5
|
|
6 % Give the filenames of the bam-files to be considered
|
|
7 CFG.BAM_FILES={'condition_A_replicate_1.bam','condition_A_replicate_2.bam','condition_B_replicate_1.bam','condition_B_replicate_2.bam'};
|
|
8
|
|
9 %Name of the experiment. Use the FILENAMES if the entries are empty.
|
|
10 CFG.NAMES={'A1','A2','B1','B2'};
|
|
11
|
|
12
|
|
13 % Give the directory where the bam-files are
|
|
14 CFG.data_dir = '' ;
|
|
15
|
|
16 % Indicate to which sample the bam-files belong
|
|
17 CFG.SAMPLES=[1,1,2,2];
|
|
18
|
|
19 % Location of the gene structure
|
|
20 CFG.genes_path='' ;
|
|
21
|
|
22 % Output directory
|
|
23 CFG.out_base = '' ;
|
|
24
|
|
25 % Output directory for temporary files
|
|
26 CFG.out_base_temp = '' ;
|
|
27
|
|
28 %Length of the reads
|
|
29 CFG.sequenced_length=75;
|
|
30
|
|
31 % Prefix for the chromosome name when getting geetting reads from
|
|
32 % the bam-files
|
|
33 CFG.chr_prefix='';
|
|
34
|
|
35 %%% Read filters %%%
|
|
36
|
|
37 % Minimal read length
|
|
38 CFG.min_read_length=30;
|
|
39
|
|
40
|
|
41
|
|
42 %%% Parameters for gene expression estimation
|
|
43 %Count the number of reads ( CFG.estimate_gene_expression=1 for yes
|
|
44 %give the Files for the expresison in CFG.GENE_EXPR_FILES
|
|
45 CFG.estimate_gene_expression=1;
|
|
46
|
|
47 % Use the following files in CFG.GENE_EXPR_FILES for the
|
|
48 % gene_expression. Those must be Tab-delimitered files where each
|
|
49 % line contains the gene name folowed by the expressiob
|
|
50 CFG.Counts_gene_expression='';
|
|
51 CFG.Counts_rDiff_parametric='';
|
|
52 CFG.Counts_rDiff_nonparametric='';
|
|
53
|
|
54
|
|
55
|
|
56 %%% Parameters for variance function
|
|
57
|
|
58 % Use a parametric form for the variance function for sample 1: sigma= a + bx + cx^2
|
|
59 % (CFG.predefined_variance_function1=[] if not; CFG.predefined_variance_function1=[a,b,c] otherwise)
|
|
60 % If CFG.predefined_variance_function1=[a,b,c] is given, the other
|
|
61 % parameters for the variance function estimations are ignored for
|
|
62 % sample 1
|
|
63 CFG.predefined_variance_function1=[];
|
|
64
|
|
65 % Use a parametric form for the variance function for sample 2: sigma= a + bx + cx^2
|
|
66 % (CFG.predefined_variance_function2=[] if not; CFG.predefined_variance_function2=[a,b,c] otherwise)
|
|
67 % If CFG.predefined_variance_function2=[a,b,c] is given, the other
|
|
68 % parameters for the variance function estimations are ignored
|
|
69 % for sample 2
|
|
70 CFG.predefined_variance_function2=[];
|
|
71
|
|
72 % compute variance function for sample 1 ( 1 = yes , 0 = use precomputed
|
|
73 % variance function saved under CFG.variance_function_1)
|
|
74 CFG.compute_variance_function_1=1;
|
|
75 CFG.variance_function_1='';
|
|
76 CFG.save_variance_function_1='variance_function_1.mat';
|
|
77
|
|
78 % compute variance function for sample 2 ( 1 = yes , 0 = use precomputed
|
|
79 % variance function saved under CFG.variance_function2)
|
|
80 CFG.compute_variance_function_2=1;
|
|
81 CFG.variance_function_2='';
|
|
82 CFG.save_variance_function_2='variance_function_2.mat';
|
|
83
|
|
84 % subsample points for the variance function estimate for rDiff.nonparametric
|
|
85 CFG.rDiff_nonparametric_subsample_variance_estimation=10000;
|
|
86
|
|
87 % Subsample the mean-variance pairs to increas the speed of the
|
|
88 % local regression.CFG.variance_samplebins is the number of bins to
|
|
89 % use and CFG.variance_samples_per_bin is how many samples should
|
|
90 % be drwan per bin
|
|
91 CFG.variance_samplebins=100;
|
|
92 CFG.variance_samples_per_bin=500;
|
|
93
|
|
94
|
|
95
|
|
96 %%% Testing parameters %%%
|
|
97
|
|
98 % subsample reads down to rDiff.subsample to increase speed ( If no
|
|
99 % subsampling shall be done set CFG.rDiff_subsample to 0
|
|
100 CFG.rDiff_subsample=10000;
|
|
101
|
|
102 % Clib the first CFG.bases_to_clip bases at the end of the reads
|
|
103 CFG.bases_to_clip=3;
|
|
104
|
|
105 %Number of bootraps for nonparametric test
|
|
106 CFG.bootstraps=1000;
|
|
107
|
|
108 %Number of bins for variance matching
|
|
109 CFG.nr_of_slices=10;
|
|
110
|
|
111 % Tests to perform
|
|
112 CFG.perform_nonparametric=0;
|
|
113 CFG.perform_parametric=0;
|
|
114 CFG.perform_mmd=0;
|
|
115 CFG.perform_poisson=0;
|
|
116
|
|
117
|
|
118 %%%%% rproc settings %%%%%
|
|
119 CFG.use_rproc = 0; % 1: cluster submission or 0: locally
|
|
120 if CFG.use_rproc,
|
|
121 CFG.rproc_num_jobs = 100;
|
|
122 CFG.rproc_memreq = 8000;
|
|
123 CFG.rproc_par.priority = 55;
|
|
124 CFG.rproc_par.resubmit = 3;
|
|
125 CFG.rproc_par.mem_req_resubmit = [ 24000 40000 60000];
|
|
126 CFG.rproc_par.time_req_resubmit = [60*60 100*60 90*60];
|
|
127 CFG.rproc_par.express = 0;
|
|
128 CFG.rproc_par.immediately_bg = 0;
|
|
129 CFG.rproc_par.immediately = 0;
|
|
130 CFG.rproc_par.arch = 64;
|
|
131 CFG.rproc_par.identifier = '';
|
|
132 CFG.rproc_par.verbosity = 0;
|
|
133 CFG.rproc_time = 15*60; % mins
|
|
134 else
|
|
135 CFG.rproc_num_jobs = 1;
|
|
136 end
|
|
137
|
|
138
|
|
139
|
|
140
|
|
141
|