0
+ − 1 #!/bin/bash
+ − 2 #SCRIPT=$(readlink -e $0)
+ − 3 #SCRIPTPATH=`dirname $SCRIPT`
+ − 4 pushd `dirname $0` > /dev/null
+ − 5 SCRIPTPATH=`pwd`
+ − 6 popd > /dev/null
+ − 7
+ − 8 usage() { printf "Usage: $0
+ − 9 [-f <saint_file_name.txt>]
+ − 10 [-i <0 for SaintExpress format, 1 for other>]
+ − 11 [-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>]
+ − 12 [-n <clustering type to be performed if option -c is set to \"h\">]
+ − 13 [-d <distance metric to use if option -c is set to \"h\">]
+ − 14 [-b <list of bait proteins in display order (see option -c n)>]
+ − 15 [-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>]
+ − 16 [-s <primary FDR cutoff [0-1, recommended=0.01]>]
+ − 17 [-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]>
+ − 18 [-x <spectral count minimum. Only preys with >= this will be used]>
+ − 19 [-m <maximum spectral count>]
+ − 20 [-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>]
+ − 21 [-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n"
+ − 22 1>&2; exit 1; }
+ − 23
+ − 24 N=0
+ − 25 n="ward"
+ − 26 d="canberra"
+ − 27 x=0
+ − 28 i=0
+ − 29 while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do
+ − 30 case "${o}" in
+ − 31 f)
+ − 32 f=${OPTARG}
+ − 33 ;;
+ − 34 i)
+ − 35 i=${OPTARG}
+ − 36 ;;
+ − 37 s)
+ − 38 s=${OPTARG}
+ − 39 ;;
+ − 40 t)
+ − 41 t=${OPTARG}
+ − 42 ;;
+ − 43 x)
+ − 44 x=${OPTARG}
+ − 45 ;;
+ − 46 m)
+ − 47 m=${OPTARG}
+ − 48 ;;
+ − 49 c)
+ − 50 c=${OPTARG}
+ − 51 ;;
+ − 52 n)
+ − 53 n=${OPTARG}
+ − 54 ;;
+ − 55 d)
+ − 56 d=${OPTARG}
+ − 57 ;;
+ − 58 b)
+ − 59 b=${OPTARG}
+ − 60 ;;
+ − 61 p)
+ − 62 p=${OPTARG}
+ − 63 ;;
+ − 64 N)
+ − 65 N=${OPTARG}
+ − 66 ;;
+ − 67 C)
+ − 68 C=${OPTARG}
+ − 69 ;;
+ − 70 *)
+ − 71 usage
+ − 72 ;;
+ − 73 esac
+ − 74 done
+ − 75 shift $((OPTIND-1))
+ − 76
+ − 77 filename=${f%%.*}
+ − 78 echo "Saint input file = ${f}"
+ − 79 echo "Primary FDR cutoff = ${s}"
+ − 80 echo "Secondary FDR cutoff for dotplot = ${t}"
+ − 81 echo "Minimum spectral count for significant preys = ${x}"
+ − 82 echo "Maximum spectral count for dot plot = ${m}"
+ − 83
+ − 84 if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then
+ − 85 usage
+ − 86 fi
+ − 87
+ − 88 if [ "${i}" == 1 ]; then
+ − 89 $SCRIPTPATH/SaintConvert.pl -i ${f}
+ − 90 f="mockSaintExpress.txt"
+ − 91 fi
+ − 92
+ − 93 if [ "${x}" -ge "${m}" ]; then
+ − 94 echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})"
+ − 95 exit 1;
+ − 96 elif [ "${x}" -lt 0 ]; then
+ − 97 echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing"
+ − 98 x=0
+ − 99 fi
+ − 100
+ − 101 ###Check for normalization
+ − 102
+ − 103 if [ "${N}" == 1 ]; then
+ − 104 printf "\nNormalization is being performed\n"
+ − 105 $SCRIPTPATH/Normalization.R ${f}
+ − 106 f="norm_saint.txt"
+ − 107 elif [ "${N}" == 2 ]; then
+ − 108 printf "\nNormalization is being performed\n"
+ − 109 if [ -z "${C}" ]; then
+ − 110 C=${t}
+ − 111 fi
+ − 112 $SCRIPTPATH/Normalization_sigpreys.R ${f} ${C}
+ − 113 f="norm_saint.txt"
+ − 114 fi
+ − 115
+ − 116
+ − 117 ###Check for clustering etc
+ − 118
+ − 119 if [ "${c}" == "h" ] && [ -z "${n}" ]; then
+ − 120 printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n"
+ − 121 printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n"
+ − 122 printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n"
+ − 123 n="ward"
+ − 124 elif [ "${c}" == "h" ] && [ -n "${n}" ]; then
+ − 125 if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [ "${n}" == "single" ] || [ "${n}" == "ward" ]; then
+ − 126 printf "\nHierarchical clustering (method = ${n}) will be performed\n\n"
+ − 127 else
+ − 128 printf "\n${n} is not a valid Hierarchical clustering method.\n"
+ − 129 printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n"
+ − 130 exit 1
+ − 131 fi
+ − 132 fi
+ − 133
+ − 134 p_c=0
+ − 135 if [ "${c}" == "h" ] && [ -z "${d}" ]; then
+ − 136 printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n"
+ − 137 printf "The input parameter -d must be set to one of \"binary\", \"canberra\", \"euclidean\",\n"
+ − 138 printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n"
+ − 139 d="canberra"
+ − 140 elif [ "${c}" == "h" ] && [ -n "${d}" ]; then
+ − 141 if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [ "${d}" == "minkowski" ]; then
+ − 142 printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n"
+ − 143 else
+ − 144 printf "\n${d} is not a valid Hierarchical clustering distance metric.\n"
+ − 145 printf "Choose one of \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n"
+ − 146 exit 1
+ − 147 fi
+ − 148 fi
+ − 149
+ − 150 if [ "${c}" == "n" ] && [ -z "${b}" ]; then
+ − 151 printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n"
+ − 152 printf "Bait list must be in .txt formart.\n\n"
+ − 153 exit 1
+ − 154 elif [ "${c}" == "n" ] && [ -z "${p}" ]; then
+ − 155 printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n"
+ − 156 printf "Prey list must be in .txt formart.\n\n"
+ − 157 exit 1
+ − 158 elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then
+ − 159 printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n"
+ − 160 printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n"
+ − 161 p="empty"
+ − 162 p_c=1
+ − 163 n="ward"
+ − 164 d="canberra"
+ − 165 fi
+ − 166
+ − 167
+ − 168 ###Check number of baits
+ − 169
+ − 170 bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f})
+ − 171 echo "Number of baits = "$bait_n
+ − 172 printf "\n\n"
+ − 173
+ − 174 if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then
+ − 175 printf "\nWarning only 2 baits are present. Biclustering will not performed.\n"
+ − 176 printf "Hierarchical clustering (method = ward) will be performed instead.\n\n"
+ − 177 c="h"
+ − 178 n="ward"
+ − 179 fi
+ − 180
+ − 181
+ − 182 ###Generate plots
+ − 183
+ − 184 if [ "${c}" == "b" ]; then
+ − 185 printf "\nBiclustering will be performed\n\n"
+ − 186 $SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename}
+ − 187 $SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename}
+ − 188 GSL_RNG_SEED=123 $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt
+ − 189 $SCRIPTPATH/Step4_biclustering.R ${filename}.dat
+ − 190
+ − 191 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
+ − 192 $SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m}
+ − 193 mkdir Output_${filename}
+ − 194 mkdir Output_${filename}/TempData_${filename}
+ − 195 mv bait_lists Output_${filename}/TempData_${filename}
+ − 196 mv Clusters Output_${filename}/TempData_${filename}
+ − 197 mv MCMCparameters Output_${filename}/TempData_${filename}
+ − 198 mv NestedClusters Output_${filename}/TempData_${filename}
+ − 199 mv NestedMu Output_${filename}/TempData_${filename}
+ − 200 mv NestedSigma2 Output_${filename}/TempData_${filename}
+ − 201 mv OPTclusters Output_${filename}/TempData_${filename}
+ − 202 mv ${filename}_matrix.txt Output_${filename}/TempData_${filename}
+ − 203 mv ${filename}.dat Output_${filename}/TempData_${filename}
+ − 204 mv SC_data.txt Output_${filename}/TempData_${filename}
+ − 205 mv FDR_data.txt Output_${filename}/TempData_${filename}
+ − 206 mv clustered_matrix.txt Output_${filename}/TempData_${filename}
+ − 207 mv singletons.txt Output_${filename}/TempData_${filename}
+ − 208 mv bait2bait_matrix.txt Output_${filename}/TempData_${filename}
+ − 209 mv baitClusters Output_${filename}/TempData_${filename}
+ − 210 mv clusteredData Output_${filename}/TempData_${filename}
+ − 211 mv dotplot.pdf Output_${filename}
+ − 212 mv bait2bait.pdf Output_${filename}
+ − 213 mv estimated.pdf Output_${filename}
+ − 214 mv stats.pdf Output_${filename}
+ − 215 cp $SCRIPTPATH/legend.pdf Output_${filename}
+ − 216 elif [ "${c}" == "h" ]; then
+ − 217
+ − 218 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
+ − 219 $SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH
+ − 220
+ − 221 mkdir Output_${filename}
+ − 222 mkdir Output_${filename}/TempData_${filename}
+ − 223 mv dotplot.pdf Output_${filename}
+ − 224 mv heatmap_borders.pdf Output_${filename}
+ − 225 mv heatmap_no_borders.pdf Output_${filename}
+ − 226 mv bait2bait.pdf Output_${filename}
+ − 227 mv SC_data.txt Output_${filename}/TempData_${filename}
+ − 228 mv FDR_data.txt Output_${filename}/TempData_${filename}
+ − 229 cp $SCRIPTPATH/legend.pdf Output_${filename}
+ − 230 elif [ "${c}" == "n" ]; then
+ − 231
+ − 232 $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}
+ − 233 echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH"
+ − 234 $SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH
+ − 235
+ − 236 mkdir Output_${filename}
+ − 237 mkdir Output_${filename}/TempData_${filename}
+ − 238 mv dotplot.pdf Output_${filename}
+ − 239 mv heatmap_borders.pdf Output_${filename}
+ − 240 mv heatmap_no_borders.pdf Output_${filename}
+ − 241 mv SC_data.txt Output_${filename}/TempData_${filename}
+ − 242 mv FDR_data.txt Output_${filename}/TempData_${filename}
+ − 243 cp $SCRIPTPATH/legend.pdf Output_${filename}
+ − 244 else
+ − 245 printf -- "-c must be one of [b, h, n]: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n"
+ − 246 exit 1;
+ − 247 fi
+ − 248
+ − 249 if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then
+ − 250 mv norm_saint.txt Output_${filename}/TempData_${filename}
+ − 251 fi
+ − 252