Mercurial > repos > vipints > rdiff
diff rDiff/bin/rdiff @ 0:0f80a5141704
version 0.3 uploaded
author | vipints |
---|---|
date | Thu, 14 Feb 2013 23:38:36 -0500 |
parents | |
children | 233c30f91d66 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rDiff/bin/rdiff Thu Feb 14 23:38:36 2013 -0500 @@ -0,0 +1,186 @@ +#/bin/bash +# rDiff wrapper script to start the interpreter with the correct list of arguments + +set -e + + +function usage () { +echo " +Usage: rdiff [-OPTION VALUE] + +Options: + + -h Display this help + -o The output directory for the results + -d Directory where the bam-files are + -a Comma separated list of bam-files for sample 1 (No spaces between the files: File1.bam,File2.bam,...) + -b Comma separated list of bam-files for sample 2 (No spaces between the files: File1.bam,File2.bam,...) + -g Path to GFF3 gene structure + -L Read length used for rDiff.parametric (Default: 75) + -m Method to use for testing: param for rDiff.parametric (Default) + nonparam for rDiff.nonparametric + poisson for rDiff.poisson + mmd for rDiff.mmd + +Advanced options: + + -M Minimal read length required (Default: 30) + -e Estimate the gene expression and counts in alternative regions (Yes (Default):1, No: 0) + -E Only estimate the gene expression do not perform testing (Yes:1, No (Default): 0) + -A Path to variance function for sample 1 (Experimental) + -B Path to variance function for sample 2 (Experimental) + -S Filename under which variance function for sample 1 will be saved (Experimental) + -T Filename under which variance function for sample 2 will be saved (Experimental) + -P Parameters of variance function for sample 1 of the form a+b*x+b*x^2 (Given as: a,b,c) (Experimental) + -Q Parameters of variance function for sample 2 of the form a+b*x+b*x^2 (Given as: a,b,c) (Experimental) + -y Use only the gene start and stop for the rDiff.nonparametric variance function estimation (Yes:1, No (Default): 0) (Experimental) + -s Number of reads per gene to which to downsample (Default: 10000) + -C Number of bases to clip from each end of each read (Default: 3) + -p Number of permutations performed for rDiff.nonparametric (Default: 1000) + -x Merge sample 1 and sample 2 for variance function estimation (Yes:1, No (Default): 0) + +" + exit 0 +} + +CURR_DIR=`dirname $0` +. $CURR_DIR/rdiff_config.sh + +PROG=`basename $0` +DIR=`dirname $0` + + +#check if arguments are given +[[ ! $1 ]] && { usage; } + + +#Initializing defaults +RDIFF_RES_DIR="." +RDIFF_INPUT_DIR="." +BAM_INPUT1="" +BAM_INPUT2="" +GFF_INPUT="" +READ_LENGTH="75" +MIN_READ_LENGTH="30" +EST_GENE_EXPR="1" +ONLY_GENE_EXPR="0" +VAR_PATH1="" +VAR_PATH2="" +SAVE_VAR1="" +SAVE_VAR2="" +PRED_VAR1="" +PRED_VAR2="" +ONLY_GENE_START="0" +SUBSAMPLE="10000" +CLIP="3" +BOOTSTRAP="1000" +TEST_METH_NAME="param" + +#Parsing command line arguments +while getopts ":d:o:a:b:g:L:m:e:E:A:B:S:T:P:Q:s:C:p:m:hx:y:" opt; do + case $opt in + o ) RDIFF_RES_DIR="$OPTARG" ;; + d ) RDIFF_INPUT_DIR="$OPTARG";; + a ) BAM_INPUT1="$OPTARG";; + b ) BAM_INPUT2="$OPTARG";; + g ) GFF_INPUT="$OPTARG";; + L ) READ_LENGTH="$OPTARG";; + M ) MIN_READ_LENGTH="$OPTARG";; + e ) EST_GENE_EXPR="$OPTARG";; + E ) ONLY_GENE_EXPR="$OPTARG";; + A ) VAR_PATH1="$OPTARG";; + B ) VAR_PATH2="$OPTARG";; + S ) SAVE_VAR1="$OPTARG";; + T ) SAVE_VAR2="$OPTARG";; + P ) PRED_VAR1="$OPTARG";; + Q ) PRED_VAR2="$OPTARG";; + y ) ONLY_GENE_START="$OPTARG";; + s ) SUBSAMPLE="$OPTARG";; + C ) CLIP="$OPTARG";; + p ) BOOTSTRAP="$OPTARG";; + m ) TEST_METH_NAME="$OPTARG";; + x ) MERGE_SAMPLE="$OPTARG";; + h ) usage ;exit ;; + \?) echo "Unkown parameter: $OPTARG" + usage ;; + + esac +done + +#Perform baic checks of variables + +errorFlag=true #No errors ecountered + +if [ -z "$BAM_INPUT1" ]; then + echo "Please provide bamfiles for sample 1" >&2 + errorFlag=false +fi +if [ -z "$BAM_INPUT2" ]; then + echo "Please provide bamfiles for sample 2" >&2 + errorFlag=false +fi +if [ -z "$GFF_INPUT" ]; then + echo "Please provide genome annotation file in GFF" >&2 + errorFlag=false +fi + +if [ ! $errorFlag ] +then + echo "Error" >&2 + exit 1 +fi + + +#Generate parameter vector +PARAM_VECT="RDIFF_RES_DIR:$RDIFF_RES_DIR;" +PARAM_VECT="$PARAM_VECT""RDIFF_INPUT_DIR:$RDIFF_INPUT_DIR;" +PARAM_VECT="$PARAM_VECT""BAM_INPUT1:$BAM_INPUT1;" +PARAM_VECT="$PARAM_VECT""BAM_INPUT2:$BAM_INPUT2;" +PARAM_VECT="$PARAM_VECT""READ_LENGTH:$READ_LENGTH;" +PARAM_VECT="$PARAM_VECT""MIN_READ_LENGTH:$MIN_READ_LENGTH;" +PARAM_VECT="$PARAM_VECT""EST_GENE_EXPR:$EST_GENE_EXPR;" +PARAM_VECT="$PARAM_VECT""ONLY_GENE_EXPR:$ONLY_GENE_EXPR;" +PARAM_VECT="$PARAM_VECT""VAR_PATH1:$VAR_PATH1;" +PARAM_VECT="$PARAM_VECT""VAR_PATH2:$VAR_PATH2;" +PARAM_VECT="$PARAM_VECT""SAVE_VAR1:$SAVE_VAR1;" +PARAM_VECT="$PARAM_VECT""SAVE_VAR2:$SAVE_VAR2;" +PARAM_VECT="$PARAM_VECT""PRED_VAR1:$PRED_VAR1;" +PARAM_VECT="$PARAM_VECT""PRED_VAR2:$PRED_VAR2;" +PARAM_VECT="$PARAM_VECT""ONLY_GENE_START:$ONLY_GENE_START;" +PARAM_VECT="$PARAM_VECT""SUBSAMPLE:$SUBSAMPLE;" +PARAM_VECT="$PARAM_VECT""CLIP:$CLIP;" +PARAM_VECT="$PARAM_VECT""BOOTSTRAP:$BOOTSTRAP;" +PARAM_VECT="$PARAM_VECT""TEST_METH_NAME:$TEST_METH_NAME;" +PARAM_VECT="$PARAM_VECT""MERGE_SAMPLE:$MERGE_SAMPLE;" + + + +echo %%%%%%%%%%%%%%%%%%%%%%% +echo % 1. Data preparation % +echo %%%%%%%%%%%%%%%%%%%%%%% +echo + +#Check wether results directory exists +#if [ -d $RDIFF_RES_DIR ] +#then +# echo "Writting results to: $RDIFF_RES_DIR" +#else +# mkdir $RDIFF_RES_DIR +#fi + + +echo 1a. load the genome annotation in GFF3 format, create an annotation object #\[Log file in ${RDIFF_RES_DIR}}/elegans-gff2anno.log\] +export PYTHONPATH=$PYTHONPATH:$RDIFF_PYTHON_PATH:${SCIPY_PATH} +${RDIFF_PYTHON_PATH} -W ignore::RuntimeWarning ${DIR}/../tools/ParseGFF.py ${GFF_INPUT} ${RDIFF_RES_DIR}/genes.mat #> ${RDIFF_RES_DIR}}/elegans-gff2anno.log +${DIR}/../bin/genes_cell2struct ${RDIFF_RES_DIR}/genes.mat + + +PARAM_VECT="$PARAM_VECT""GFF_INPUT:${RDIFF_RES_DIR}/genes.mat;" + +echo %%%%%%%%%%%%%%%%%%%%%%%%%%% +echo % 2. Differential testing % +echo %%%%%%%%%%%%%%%%%%%%%%%%%%% +echo + +exec ${DIR}/start_interpreter.sh ${PROG} "$PARAM_VECT" +