Mercurial > repos > bornea > prohits_dotplot_generator
comparison Dotplot_Release/dotplot.bash @ 12:f48b1312b6dd draft
Uploaded
author | bornea |
---|---|
date | Wed, 16 Mar 2016 12:09:43 -0400 |
parents | bc752a05f16d |
children |
comparison
equal
deleted
inserted
replaced
11:89783b79ef25 | 12:f48b1312b6dd |
---|---|
1 #!/bin/bash | |
2 #SCRIPT=$(readlink -e $0) | |
3 #SCRIPTPATH=`dirname $SCRIPT` | |
4 pushd `dirname $0` > /dev/null | |
5 SCRIPTPATH=`pwd` | |
6 popd > /dev/null | |
7 | |
8 usage() { printf "Usage: $0 | |
9 [-f <saint_file_name.txt>] | |
10 [-i <0 for SaintExpress format, 1 for other>] | |
11 [-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>] | |
12 [-n <clustering type to be performed if option -c is set to \"h\">] | |
13 [-d <distance metric to use if option -c is set to \"h\">] | |
14 [-b <list of bait proteins in display order (see option -c n)>] | |
15 [-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>] | |
16 [-s <primary FDR cutoff [0-1, recommended=0.01]>] | |
17 [-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]> | |
18 [-x <spectral count minimum. Only preys with >= this will be used]> | |
19 [-m <maximum spectral count>] | |
20 [-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>] | |
21 [-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n" | |
22 1>&2; exit 1; } | |
23 | |
24 N=0 | |
25 n="ward" | |
26 d="canberra" | |
27 x=0 | |
28 i=0 | |
29 while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do | |
30 case "${o}" in | |
31 f) | |
32 f=${OPTARG} | |
33 ;; | |
34 i) | |
35 i=${OPTARG} | |
36 ;; | |
37 s) | |
38 s=${OPTARG} | |
39 ;; | |
40 t) | |
41 t=${OPTARG} | |
42 ;; | |
43 x) | |
44 x=${OPTARG} | |
45 ;; | |
46 m) | |
47 m=${OPTARG} | |
48 ;; | |
49 c) | |
50 c=${OPTARG} | |
51 ;; | |
52 n) | |
53 n=${OPTARG} | |
54 ;; | |
55 d) | |
56 d=${OPTARG} | |
57 ;; | |
58 b) | |
59 b=${OPTARG} | |
60 ;; | |
61 p) | |
62 p=${OPTARG} | |
63 ;; | |
64 N) | |
65 N=${OPTARG} | |
66 ;; | |
67 C) | |
68 C=${OPTARG} | |
69 ;; | |
70 *) | |
71 usage | |
72 ;; | |
73 esac | |
74 done | |
75 shift $((OPTIND-1)) | |
76 | |
77 filename=${f%%.*} | |
78 echo "Saint input file = ${f}" | |
79 echo "Primary FDR cutoff = ${s}" | |
80 echo "Secondary FDR cutoff for dotplot = ${t}" | |
81 echo "Minimum spectral count for significant preys = ${x}" | |
82 echo "Maximum spectral count for dot plot = ${m}" | |
83 | |
84 if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then | |
85 usage | |
86 fi | |
87 | |
88 if [ "${i}" == 1 ]; then | |
89 $SCRIPTPATH/SaintConvert.pl -i ${f} | |
90 f="mockSaintExpress.txt" | |
91 fi | |
92 | |
93 if [ "${x}" -ge "${m}" ]; then | |
94 echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})" | |
95 exit 1; | |
96 elif [ "${x}" -lt 0 ]; then | |
97 echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing" | |
98 x=0 | |
99 fi | |
100 | |
101 ###Check for normalization | |
102 | |
103 if [ "${N}" == 1 ]; then | |
104 printf "\nNormalization is being performed\n" | |
105 $SCRIPTPATH/Normalization.R ${f} | |
106 f="norm_saint.txt" | |
107 elif [ "${N}" == 2 ]; then | |
108 printf "\nNormalization is being performed\n" | |
109 if [ -z "${C}" ]; then | |
110 C=${t} | |
111 fi | |
112 $SCRIPTPATH/Normalization_sigpreys.R ${f} ${C} | |
113 f="norm_saint.txt" | |
114 fi | |
115 | |
116 | |
117 ###Check for clustering etc | |
118 | |
119 if [ "${c}" == "h" ] && [ -z "${n}" ]; then | |
120 printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n" | |
121 printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n" | |
122 printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n" | |
123 n="ward" | |
124 elif [ "${c}" == "h" ] && [ -n "${n}" ]; then | |
125 if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [ "${n}" == "single" ] || [ "${n}" == "ward" ]; then | |
126 printf "\nHierarchical clustering (method = ${n}) will be performed\n\n" | |
127 else | |
128 printf "\n${n} is not a valid Hierarchical clustering method.\n" | |
129 printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n" | |
130 exit 1 | |
131 fi | |
132 fi | |
133 | |
134 p_c=0 | |
135 if [ "${c}" == "h" ] && [ -z "${d}" ]; then | |
136 printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n" | |
137 printf "The input parameter -d must be set to one of \"binary\", \"canberra\", \"euclidean\",\n" | |
138 printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n" | |
139 d="canberra" | |
140 elif [ "${c}" == "h" ] && [ -n "${d}" ]; then | |
141 if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [ "${d}" == "minkowski" ]; then | |
142 printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n" | |
143 else | |
144 printf "\n${d} is not a valid Hierarchical clustering distance metric.\n" | |
145 printf "Choose one of \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n" | |
146 exit 1 | |
147 fi | |
148 fi | |
149 | |
150 if [ "${c}" == "n" ] && [ -z "${b}" ]; then | |
151 printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n" | |
152 printf "Bait list must be in .txt formart.\n\n" | |
153 exit 1 | |
154 elif [ "${c}" == "n" ] && [ -z "${p}" ]; then | |
155 printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n" | |
156 printf "Prey list must be in .txt formart.\n\n" | |
157 exit 1 | |
158 elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then | |
159 printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n" | |
160 printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n" | |
161 p="empty" | |
162 p_c=1 | |
163 n="ward" | |
164 d="canberra" | |
165 fi | |
166 | |
167 | |
168 ###Check number of baits | |
169 | |
170 bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f}) | |
171 echo "Number of baits = "$bait_n | |
172 printf "\n\n" | |
173 | |
174 if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then | |
175 printf "\nWarning only 2 baits are present. Biclustering will not performed.\n" | |
176 printf "Hierarchical clustering (method = ward) will be performed instead.\n\n" | |
177 c="h" | |
178 n="ward" | |
179 fi | |
180 | |
181 | |
182 ###Generate plots | |
183 | |
184 if [ "${c}" == "b" ]; then | |
185 printf "\nBiclustering will be performed\n\n" | |
186 $SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename} | |
187 $SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename} | |
188 GSL_RNG_SEED=123 $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt | |
189 $SCRIPTPATH/Step4_biclustering.R ${filename}.dat | |
190 | |
191 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | |
192 $SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m} | |
193 mkdir Output_${filename} | |
194 mkdir Output_${filename}/TempData_${filename} | |
195 mv bait_lists Output_${filename}/TempData_${filename} | |
196 mv Clusters Output_${filename}/TempData_${filename} | |
197 mv MCMCparameters Output_${filename}/TempData_${filename} | |
198 mv NestedClusters Output_${filename}/TempData_${filename} | |
199 mv NestedMu Output_${filename}/TempData_${filename} | |
200 mv NestedSigma2 Output_${filename}/TempData_${filename} | |
201 mv OPTclusters Output_${filename}/TempData_${filename} | |
202 mv ${filename}_matrix.txt Output_${filename}/TempData_${filename} | |
203 mv ${filename}.dat Output_${filename}/TempData_${filename} | |
204 mv SC_data.txt Output_${filename}/TempData_${filename} | |
205 mv FDR_data.txt Output_${filename}/TempData_${filename} | |
206 mv clustered_matrix.txt Output_${filename}/TempData_${filename} | |
207 mv singletons.txt Output_${filename}/TempData_${filename} | |
208 mv bait2bait_matrix.txt Output_${filename}/TempData_${filename} | |
209 mv baitClusters Output_${filename}/TempData_${filename} | |
210 mv clusteredData Output_${filename}/TempData_${filename} | |
211 mv dotplot.pdf Output_${filename} | |
212 mv bait2bait.pdf Output_${filename} | |
213 mv estimated.pdf Output_${filename} | |
214 mv stats.pdf Output_${filename} | |
215 cp $SCRIPTPATH/legend.pdf Output_${filename} | |
216 elif [ "${c}" == "h" ]; then | |
217 | |
218 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | |
219 $SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH | |
220 | |
221 mkdir Output_${filename} | |
222 mkdir Output_${filename}/TempData_${filename} | |
223 mv dotplot.pdf Output_${filename} | |
224 mv heatmap_borders.pdf Output_${filename} | |
225 mv heatmap_no_borders.pdf Output_${filename} | |
226 mv bait2bait.pdf Output_${filename} | |
227 mv SC_data.txt Output_${filename}/TempData_${filename} | |
228 mv FDR_data.txt Output_${filename}/TempData_${filename} | |
229 cp $SCRIPTPATH/legend.pdf Output_${filename} | |
230 elif [ "${c}" == "n" ]; then | |
231 | |
232 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} | |
233 echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH" | |
234 $SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH | |
235 | |
236 mkdir Output_${filename} | |
237 mkdir Output_${filename}/TempData_${filename} | |
238 mv dotplot.pdf Output_${filename} | |
239 mv heatmap_borders.pdf Output_${filename} | |
240 mv heatmap_no_borders.pdf Output_${filename} | |
241 mv SC_data.txt Output_${filename}/TempData_${filename} | |
242 mv FDR_data.txt Output_${filename}/TempData_${filename} | |
243 cp $SCRIPTPATH/legend.pdf Output_${filename} | |
244 else | |
245 printf -- "-c must be one of [b, h, n]: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n" | |
246 exit 1; | |
247 fi | |
248 | |
249 if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then | |
250 mv norm_saint.txt Output_${filename}/TempData_${filename} | |
251 fi | |
252 |