3
|
1 #!/bin/bash
|
|
2 #SCRIPT=$(readlink -e $0)
|
|
3 #SCRIPTPATH=`dirname $SCRIPT`
|
|
4 pushd `dirname $0` > /dev/null
|
|
5 SCRIPTPATH=`pwd`
|
|
6 popd > /dev/null
|
|
7
|
|
8 usage() { printf "Usage: $0
|
|
9 [-f <saint_file_name.txt>]
|
|
10 [-i <0 for SaintExpress format, 1 for other>]
|
|
11 [-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>]
|
|
12 [-n <clustering type to be performed if option -c is set to \"h\">]
|
|
13 [-d <distance metric to use if option -c is set to \"h\">]
|
|
14 [-b <list of bait proteins in display order (see option -c n)>]
|
|
15 [-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>]
|
|
16 [-s <primary FDR cutoff [0-1, recommended=0.01]>]
|
|
17 [-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]>
|
|
18 [-x <spectral count minimum. Only preys with >= this will be used]>
|
|
19 [-m <maximum spectral count>]
|
|
20 [-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>]
|
|
21 [-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n"
|
|
22 1>&2; exit 1; }
|
|
23
|
|
24 N=0
|
|
25 n="ward"
|
|
26 d="canberra"
|
|
27 x=0
|
|
28 i=0
|
|
29 while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do
|
|
30 case "${o}" in
|
|
31 f)
|
|
32 f=${OPTARG}
|
|
33 ;;
|
|
34 i)
|
|
35 i=${OPTARG}
|
|
36 ;;
|
|
37 s)
|
|
38 s=${OPTARG}
|
|
39 ;;
|
|
40 t)
|
|
41 t=${OPTARG}
|
|
42 ;;
|
|
43 x)
|
|
44 x=${OPTARG}
|
|
45 ;;
|
|
46 m)
|
|
47 m=${OPTARG}
|
|
48 ;;
|
|
49 c)
|
|
50 c=${OPTARG}
|
|
51 ;;
|
|
52 n)
|
|
53 n=${OPTARG}
|
|
54 ;;
|
|
55 d)
|
|
56 d=${OPTARG}
|
|
57 ;;
|
|
58 b)
|
|
59 b=${OPTARG}
|
|
60 ;;
|
|
61 p)
|
|
62 p=${OPTARG}
|
|
63 ;;
|
|
64 N)
|
|
65 N=${OPTARG}
|
|
66 ;;
|
|
67 C)
|
|
68 C=${OPTARG}
|
|
69 ;;
|
|
70 *)
|
|
71 usage
|
|
72 ;;
|
|
73 esac
|
|
74 done
|
|
75 shift $((OPTIND-1))
|
|
76
|
|
77 filename=${f%%.*}
|
|
78 echo "Saint input file = ${f}"
|
|
79 echo "Primary FDR cutoff = ${s}"
|
|
80 echo "Secondary FDR cutoff for dotplot = ${t}"
|
|
81 echo "Minimum spectral count for significant preys = ${x}"
|
|
82 echo "Maximum spectral count for dot plot = ${m}"
|
|
83
|
|
84 if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then
|
|
85 usage
|
|
86 fi
|
|
87
|
|
88 if [ "${i}" == 1 ]; then
|
|
89 $SCRIPTPATH/SaintConvert.pl -i ${f}
|
|
90 f="mockSaintExpress.txt"
|
|
91 fi
|
|
92
|
|
93 if [ "${x}" -ge "${m}" ]; then
|
|
94 echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})"
|
|
95 exit 1;
|
|
96 elif [ "${x}" -lt 0 ]; then
|
|
97 echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing"
|
|
98 x=0
|
|
99 fi
|
|
100
|
|
101 ###Check for normalization
|
|
102
|
|
103 if [ "${N}" == 1 ]; then
|
|
104 printf "\nNormalization is being performed\n"
|
|
105 $SCRIPTPATH/Normalization.R ${f}
|
|
106 f="norm_saint.txt"
|
|
107 elif [ "${N}" == 2 ]; then
|
|
108 printf "\nNormalization is being performed\n"
|
|
109 if [ -z "${C}" ]; then
|
|
110 C=${t}
|
|
111 fi
|
|
112 $SCRIPTPATH/Normalization_sigpreys.R ${f} ${C}
|
|
113 f="norm_saint.txt"
|
|
114 fi
|
|
115
|
|
116
|
|
117 ###Check for clustering etc
|
|
118
|
|
119 if [ "${c}" == "h" ] && [ -z "${n}" ]; then
|
|
120 printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n"
|
|
121 printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n"
|
|
122 printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n"
|
|
123 n="ward"
|
|
124 elif [ "${c}" == "h" ] && [ -n "${n}" ]; then
|
|
125 if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [ "${n}" == "single" ] || [ "${n}" == "ward" ]; then
|
|
126 printf "\nHierarchical clustering (method = ${n}) will be performed\n\n"
|
|
127 else
|
|
128 printf "\n${n} is not a valid Hierarchical clustering method.\n"
|
|
129 printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n"
|
|
130 exit 1
|
|
131 fi
|
|
132 fi
|
|
133
|
|
134 p_c=0
|
|
135 if [ "${c}" == "h" ] && [ -z "${d}" ]; then
|
|
136 printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n"
|
|
137 printf "The input parameter -d must be set to one of \"binary\", \"canberra\", \"euclidean\",\n"
|
|
138 printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n"
|
|
139 d="canberra"
|
|
140 elif [ "${c}" == "h" ] && [ -n "${d}" ]; then
|
|
141 if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [ "${d}" == "minkowski" ]; then
|
|
142 printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n"
|
|
143 else
|
|
144 printf "\n${d} is not a valid Hierarchical clustering distance metric.\n"
|
|
145 printf "Choose one of \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n"
|
|
146 exit 1
|
|
147 fi
|
|
148 fi
|
|
149
|
|
150 if [ "${c}" == "n" ] && [ -z "${b}" ]; then
|
|
151 printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n"
|
|
152 printf "Bait list must be in .txt formart.\n\n"
|
|
153 exit 1
|
|
154 elif [ "${c}" == "n" ] && [ -z "${p}" ]; then
|
|
155 printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n"
|
|
156 printf "Prey list must be in .txt formart.\n\n"
|
|
157 exit 1
|
|
158 elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then
|
|
159 printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n"
|
|
160 printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n"
|
|
161 p="empty"
|
|
162 p_c=1
|
|
163 n="ward"
|
|
164 d="canberra"
|
|
165 fi
|
|
166
|
|
167
|
|
168 ###Check number of baits
|
|
169
|
|
170 bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f})
|
|
171 echo "Number of baits = "$bait_n
|
|
172 printf "\n\n"
|
|
173
|
|
174 if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then
|
|
175 printf "\nWarning only 2 baits are present. Biclustering will not performed.\n"
|
|
176 printf "Hierarchical clustering (method = ward) will be performed instead.\n\n"
|
|
177 c="h"
|
|
178 n="ward"
|
|
179 fi
|
|
180
|
|
181
|
|
182 ###Generate plots
|
|
183
|
|
184 if [ "${c}" == "b" ]; then
|
|
185 printf "\nBiclustering will be performed\n\n"
|
|
186 $SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename}
|
|
187 $SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename}
|
|
188 GSL_RNG_SEED=123 $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt
|
|
189 $SCRIPTPATH/Step4_biclustering.R ${filename}.dat
|
|
190
|
|
191 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
|
|
192 $SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m}
|
|
193 mkdir Output_${filename}
|
|
194 mkdir Output_${filename}/TempData_${filename}
|
|
195 mv bait_lists Output_${filename}/TempData_${filename}
|
|
196 mv Clusters Output_${filename}/TempData_${filename}
|
|
197 mv MCMCparameters Output_${filename}/TempData_${filename}
|
|
198 mv NestedClusters Output_${filename}/TempData_${filename}
|
|
199 mv NestedMu Output_${filename}/TempData_${filename}
|
|
200 mv NestedSigma2 Output_${filename}/TempData_${filename}
|
|
201 mv OPTclusters Output_${filename}/TempData_${filename}
|
|
202 mv ${filename}_matrix.txt Output_${filename}/TempData_${filename}
|
|
203 mv ${filename}.dat Output_${filename}/TempData_${filename}
|
|
204 mv SC_data.txt Output_${filename}/TempData_${filename}
|
|
205 mv FDR_data.txt Output_${filename}/TempData_${filename}
|
|
206 mv clustered_matrix.txt Output_${filename}/TempData_${filename}
|
|
207 mv singletons.txt Output_${filename}/TempData_${filename}
|
|
208 mv bait2bait_matrix.txt Output_${filename}/TempData_${filename}
|
|
209 mv baitClusters Output_${filename}/TempData_${filename}
|
|
210 mv clusteredData Output_${filename}/TempData_${filename}
|
|
211 mv dotplot.pdf Output_${filename}
|
|
212 mv bait2bait.pdf Output_${filename}
|
|
213 mv estimated.pdf Output_${filename}
|
|
214 mv stats.pdf Output_${filename}
|
|
215 cp $SCRIPTPATH/legend.pdf Output_${filename}
|
|
216 elif [ "${c}" == "h" ]; then
|
|
217
|
|
218 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
|
|
219 $SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH
|
|
220
|
|
221 mkdir Output_${filename}
|
|
222 mkdir Output_${filename}/TempData_${filename}
|
|
223 mv dotplot.pdf Output_${filename}
|
|
224 mv heatmap_borders.pdf Output_${filename}
|
|
225 mv heatmap_no_borders.pdf Output_${filename}
|
|
226 mv bait2bait.pdf Output_${filename}
|
|
227 mv SC_data.txt Output_${filename}/TempData_${filename}
|
|
228 mv FDR_data.txt Output_${filename}/TempData_${filename}
|
|
229 cp $SCRIPTPATH/legend.pdf Output_${filename}
|
|
230 elif [ "${c}" == "n" ]; then
|
|
231
|
|
232 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
|
|
233 echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH"
|
|
234 $SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH
|
|
235
|
|
236 mkdir Output_${filename}
|
|
237 mkdir Output_${filename}/TempData_${filename}
|
|
238 mv dotplot.pdf Output_${filename}
|
|
239 mv heatmap_borders.pdf Output_${filename}
|
|
240 mv heatmap_no_borders.pdf Output_${filename}
|
|
241 mv SC_data.txt Output_${filename}/TempData_${filename}
|
|
242 mv FDR_data.txt Output_${filename}/TempData_${filename}
|
|
243 cp $SCRIPTPATH/legend.pdf Output_${filename}
|
|
244 else
|
|
245 printf -- "-c must be one of [b, h, n]: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n"
|
|
246 exit 1;
|
|
247 fi
|
|
248
|
|
249 if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then
|
|
250 mv norm_saint.txt Output_${filename}/TempData_${filename}
|
|
251 fi
|
|
252
|