Mercurial > repos > bornea > prohits_dotplot_generator
view Dotplot_Release/dotplot.bash @ 20:3555f2c0d325 draft
Uploaded
author | bornea |
---|---|
date | Thu, 14 Apr 2016 16:32:36 -0400 |
parents | bc752a05f16d |
children |
line wrap: on
line source
#!/bin/bash #SCRIPT=$(readlink -e $0) #SCRIPTPATH=`dirname $SCRIPT` pushd `dirname $0` > /dev/null SCRIPTPATH=`pwd` popd > /dev/null usage() { printf "Usage: $0 [-f <saint_file_name.txt>] [-i <0 for SaintExpress format, 1 for other>] [-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>] [-n <clustering type to be performed if option -c is set to \"h\">] [-d <distance metric to use if option -c is set to \"h\">] [-b <list of bait proteins in display order (see option -c n)>] [-p <list of prey proteins in display order (see option -c n). Set this to \"all\" if you want to include all preys and cluster them>] [-s <primary FDR cutoff [0-1, recommended=0.01]>] [-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]> [-x <spectral count minimum. Only preys with >= this will be used]> [-m <maximum spectral count>] [-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>] [-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\n" 1>&2; exit 1; } N=0 n="ward" d="canberra" x=0 i=0 while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do case "${o}" in f) f=${OPTARG} ;; i) i=${OPTARG} ;; s) s=${OPTARG} ;; t) t=${OPTARG} ;; x) x=${OPTARG} ;; m) m=${OPTARG} ;; c) c=${OPTARG} ;; n) n=${OPTARG} ;; d) d=${OPTARG} ;; b) b=${OPTARG} ;; p) p=${OPTARG} ;; N) N=${OPTARG} ;; C) C=${OPTARG} ;; *) usage ;; esac done shift $((OPTIND-1)) filename=${f%%.*} echo "Saint input file = ${f}" echo "Primary FDR cutoff = ${s}" echo "Secondary FDR cutoff for dotplot = ${t}" echo "Minimum spectral count for significant preys = ${x}" echo "Maximum spectral count for dot plot = ${m}" if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then usage fi if [ "${i}" == 1 ]; then $SCRIPTPATH/SaintConvert.pl -i ${f} f="mockSaintExpress.txt" fi if [ "${x}" -ge "${m}" ]; then echo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})" exit 1; elif [ "${x}" -lt 0 ]; then echo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing" x=0 fi ###Check for normalization if [ "${N}" == 1 ]; then printf "\nNormalization is being performed\n" $SCRIPTPATH/Normalization.R ${f} f="norm_saint.txt" elif [ "${N}" == 2 ]; then printf "\nNormalization is being performed\n" if [ -z "${C}" ]; then C=${t} fi $SCRIPTPATH/Normalization_sigpreys.R ${f} ${C} f="norm_saint.txt" fi ###Check for clustering etc if [ "${c}" == "h" ] && [ -z "${n}" ]; then printf "\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\n" printf "The input parameter -n must be set to one of \"average\", \"centroid\", \"complete\", \"mcquitty\",\n" printf "\"median\", \"single\" or \"ward\". \"ward\" will be selected as default.\n\n" n="ward" elif [ "${c}" == "h" ] && [ -n "${n}" ]; then if [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [ "${n}" == "single" ] || [ "${n}" == "ward" ]; then printf "\nHierarchical clustering (method = ${n}) will be performed\n\n" else printf "\n${n} is not a valid Hierarchical clustering method.\n" printf "Choose one of \"average\", \"centroid\", \"complete\", \"mcquitty\", \"median\", \"single\" or \"ward\"\n\n" exit 1 fi fi p_c=0 if [ "${c}" == "h" ] && [ -z "${d}" ]; then printf "\nHierarchial clustering was selected (-c = h), but no distance metric (-d) was chosen.\n" printf "The input parameter -d must be set to one of \"binary\", \"canberra\", \"euclidean\",\n" printf "\"manhattan\", \"maximum\" or \"minkowski\". \"canberra\" will be selected as default.\n\n" d="canberra" elif [ "${c}" == "h" ] && [ -n "${d}" ]; then if [ "${d}" == "binary" ] || [ "${d}" == "canberra" ] || [ "${d}" == "euclidean" ] || [ "${d}" == "manhattan" ] || [ "${d}" == "maximum" ] || [ "${d}" == "minkowski" ]; then printf "\nHierarchical clustering (distance metric = ${d}) will be performed\n\n" else printf "\n${d} is not a valid Hierarchical clustering distance metric.\n" printf "Choose one of \"binary\", \"canberra\", \"euclidean\", \"manhattan\", \"maximum\" or \"minkowski\"\n\n" exit 1 fi fi if [ "${c}" == "n" ] && [ -z "${b}" ]; then printf "\n\"No Clustering\" option was selected (-c = n), but no bait list was included (option -b).\n" printf "Bait list must be in .txt formart.\n\n" exit 1 elif [ "${c}" == "n" ] && [ -z "${p}" ]; then printf "\n\"No Clustering\" option was selected (-c = n), but no prey list was included (option -p).\n" printf "Prey list must be in .txt formart.\n\n" exit 1 elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then printf "\n\"No Clustering\" option was selected (-c = n) for baits, but preys will still be clustered.\n" printf "using \"ward\" and \"canberra\" as defaults or options as supplied on command line.\n\n" p="empty" p_c=1 n="ward" d="canberra" fi ###Check number of baits bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f}) echo "Number of baits = "$bait_n printf "\n\n" if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then printf "\nWarning only 2 baits are present. Biclustering will not performed.\n" printf "Hierarchical clustering (method = ward) will be performed instead.\n\n" c="h" n="ward" fi ###Generate plots if [ "${c}" == "b" ]; then printf "\nBiclustering will be performed\n\n" $SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename} $SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename} GSL_RNG_SEED=123 $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt $SCRIPTPATH/Step4_biclustering.R ${filename}.dat $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} $SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m} mkdir Output_${filename} mkdir Output_${filename}/TempData_${filename} mv bait_lists Output_${filename}/TempData_${filename} mv Clusters Output_${filename}/TempData_${filename} mv MCMCparameters Output_${filename}/TempData_${filename} mv NestedClusters Output_${filename}/TempData_${filename} mv NestedMu Output_${filename}/TempData_${filename} mv NestedSigma2 Output_${filename}/TempData_${filename} mv OPTclusters Output_${filename}/TempData_${filename} mv ${filename}_matrix.txt Output_${filename}/TempData_${filename} mv ${filename}.dat Output_${filename}/TempData_${filename} mv SC_data.txt Output_${filename}/TempData_${filename} mv FDR_data.txt Output_${filename}/TempData_${filename} mv clustered_matrix.txt Output_${filename}/TempData_${filename} mv singletons.txt Output_${filename}/TempData_${filename} mv bait2bait_matrix.txt Output_${filename}/TempData_${filename} mv baitClusters Output_${filename}/TempData_${filename} mv clusteredData Output_${filename}/TempData_${filename} mv dotplot.pdf Output_${filename} mv bait2bait.pdf Output_${filename} mv estimated.pdf Output_${filename} mv stats.pdf Output_${filename} cp $SCRIPTPATH/legend.pdf Output_${filename} elif [ "${c}" == "h" ]; then $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} $SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH mkdir Output_${filename} mkdir Output_${filename}/TempData_${filename} mv dotplot.pdf Output_${filename} mv heatmap_borders.pdf Output_${filename} mv heatmap_no_borders.pdf Output_${filename} mv bait2bait.pdf Output_${filename} mv SC_data.txt Output_${filename}/TempData_${filename} mv FDR_data.txt Output_${filename}/TempData_${filename} cp $SCRIPTPATH/legend.pdf Output_${filename} elif [ "${c}" == "n" ]; then $SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x} echo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH" $SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH mkdir Output_${filename} mkdir Output_${filename}/TempData_${filename} mv dotplot.pdf Output_${filename} mv heatmap_borders.pdf Output_${filename} mv heatmap_no_borders.pdf Output_${filename} mv SC_data.txt Output_${filename}/TempData_${filename} mv FDR_data.txt Output_${filename}/TempData_${filename} cp $SCRIPTPATH/legend.pdf Output_${filename} else printf -- "-c must be one of [b, h, n]: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\n" exit 1; fi if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then mv norm_saint.txt Output_${filename}/TempData_${filename} fi