Mercurial > repos > vipints > deseq_hts
changeset 9:e27b4f7811c2 draft
Updated DESeq version 1.12
author | vipints <vipin@cbio.mskcc.org> |
---|---|
date | Tue, 08 Oct 2013 08:09:28 -0400 |
parents | 2b3bb3348076 |
children | 2fe512c7bfdf |
files | deseq-hts_1.0/README deseq-hts_1.0/bin/deseq_config.sh deseq-hts_1.0/bin/deseq_config.sh.sample deseq-hts_1.0/bin/start_interpreter.sh deseq-hts_1.0/galaxy/deseq.xml deseq-hts_1.0/setup_deseq-hts.sh deseq-hts_1.0/src/deseq-hts.sh deseq-hts_1.0/src/deseq_config.m deseq-hts_1.0/src/difftest_deseq.R |
diffstat | 9 files changed, 99 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- a/deseq-hts_1.0/README Wed Jun 27 15:38:39 2012 -0400 +++ b/deseq-hts_1.0/README Tue Oct 08 08:09:28 2013 -0400 @@ -1,48 +1,52 @@ ---------------------------------------------------- -DESeq-hts: A Galaxy wrapper for DESeq version 1.6.1 ---------------------------------------------------- +---------------------------------------------------- +DESeq: A Galaxy wrapper for DESeq version 1.12.1 +---------------------------------------------------- Description: +------------ DESeq can be used as a web service embedded in a Galaxy instance. We call it as DESeq-hts. -Requirements: +Requirements: +------------- MATLAB/OCTAVE and Python :- Preprocessing of sequencing reads and GFF files - R, Bio-conductor package :- Required for DESEQ + R, Bio-conductor package :- Required for DESeq SCIPY, NUMPY :- for python - SAMTOOLS :- Read processing + SAMTOOLS :- Sequencing read processing Contents: - [src] +--------- + ./src All relevant scripts for DESeq-hts are located in the subdirectory - src. src/deseq.sh is the main script to start DESeq-hts. The - preprocessing of BAM and GFF file start before the R DESEQ script. + src. src/deseq-hts.sh is the main script to start DESeq-hts. The + preprocessing of BAM and GFF file start before the R DESeq script. Please follow the shell script to understand the details. - [galaxy] + ./galaxy Galaxy tool configuration file can be found galaxy folder. Please make necessary editing for .xml file and remaining .sh files and perform few tests. - [setup_deseq-hts.sh] + ./setup_deseq-hts.sh Setup script for DESeq-hts. - [mex] + ./mex matlab executable files. - [bin] + ./bin Contains deseq_config.sh file which is used for the configuration of DESeq-hts. According to your platform, the default file will be changed. - [test_data] - This subsirectory contains all data for running a functional test in + ./test_data + This sub-directory contains all data for running a functional test in Galaxy framework. You may need to move these test files into the test-data directory. - [tools] + ./tools A python based GFF parsing program. Also contains small utils programs. Getting started: +---------------- Check for all requirements first, then a) Run ./setup_deseq-hts.sh and setup paths and configuration options for DESeq-hts. @@ -57,6 +61,7 @@ c) Edit the Galaxy tool configuration file to adjust the path if necessary. Licenses: +--------- If **DESeq** is used to obtain results for scientific publications it should be cited as [1]. This wrapper program (DESeq-hts) is free software; you can redistribute it and/or modify it @@ -65,11 +70,21 @@ Written (W) 2009-2012 Jonas Behr, Regina Bohnert, Andre Kahles, Gunnar Raetsch, Vipin T. Sreedharan Copyright (C) 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany and - 2012 cBio Memorial Sloan Kettering Cancer Center, New York City, USA. + 2013 cBio Memorial Sloan Kettering Cancer Center, New York City, USA. References: +----------- [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`. +Contributions: +-------------- + 15 Aug. 2013 + Philippe Moncuquet + Bioinformatics Analyst, Bioinformatics Core, CSIRO Mathematics, Informatics and Statistics + + Extended the DESeq result. + Contact: - vipin@cbio.mskcc.org +-------- + support [at] oqtans.org
--- a/deseq-hts_1.0/bin/deseq_config.sh Wed Jun 27 15:38:39 2012 -0400 +++ b/deseq-hts_1.0/bin/deseq_config.sh Tue Oct 08 08:09:28 2013 -0400 @@ -1,6 +1,5 @@ #!/bin/bash -# Copyright (C) 2010-2012 Max Planck Society -export DESEQ_VERSION=1.6.0 +export DESEQ_VERSION=1.12.1 export DESEQ_PATH= export DESEQ_SRC_PATH=$DESEQ_PATH/src export DESEQ_BIN_PATH=$DESEQ_PATH/bin
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_1.0/bin/deseq_config.sh.sample Tue Oct 08 08:09:28 2013 -0400 @@ -0,0 +1,17 @@ +#!/bin/bash +export DESEQ_VERSION=1.10.1 +export DESEQ_PATH=/home/galaxy/software/deseq_hts/ +export DESEQ_SRC_PATH=$DESEQ_PATH/src +export DESEQ_BIN_PATH=$DESEQ_PATH/bin +export INTERPRETER=octave +export MATLAB_BIN_PATH= +export MATLAB_MEX_PATH= +export MATLAB_INCLUDE_DIR= +export OCTAVE_BIN_PATH=/home/galaxy/software/octave/source/octave-3.6.3/octave +export OCTAVE_MKOCT=/home/galaxy/software/bin/mkoctfile +export SAMTOOLS_DIR=/home/galaxy/software/samtools-0.1.17/ +export PYTHON_PATH=/usr/bin/python +export SCIPY_PATH=/home/galaxy/software/lib/python2.6/site-packages/ +export R_PATH=/home/galaxy/software/bin/R +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/oqtansTools/oqtans_dep/octave-3.6.2_64/lib/octave/3.6.2/ +export ENVIRONMENT=galaxy
--- a/deseq-hts_1.0/bin/start_interpreter.sh Wed Jun 27 15:38:39 2012 -0400 +++ b/deseq-hts_1.0/bin/start_interpreter.sh Tue Oct 08 08:09:28 2013 -0400 @@ -5,11 +5,11 @@ . `dirname $0`/deseq_config.sh -export MATLAB_RETURN_FILE=`tempfile` +export MATLAB_RETURN_FILE=`mktemp` if [ "$INTERPRETER" == 'octave' ]; then - echo exit | ${OCTAVE_BIN_PATH} --eval "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Octave failed; rm -f $MATLAB_RETURN_FILE; exit -1) ; + echo exit | ${OCTAVE_BIN_PATH} --no-window-system --silent --eval "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Octave failed; rm -f $MATLAB_RETURN_FILE; exit -1) ; fi if [ "$INTERPRETER" == 'matlab' ];
--- a/deseq-hts_1.0/galaxy/deseq.xml Wed Jun 27 15:38:39 2012 -0400 +++ b/deseq-hts_1.0/galaxy/deseq.xml Tue Oct 08 08:09:28 2013 -0400 @@ -1,7 +1,10 @@ -<tool id="deseq-hts" name="DESeq" version="1.6.1"> - <description>Determines differentially expressed transcripts from read alignments</description> - <command> -deseq-hts/src/deseq-hts.sh $anno_input_selected $deseq_out $deseq_out.extra_files_path/gene_map.mat +<tool id="deseq-hts" name="DESeq" version="1.12.1"> + <description> Determines differentially expressed transcripts from read alignments</description> + <requirements> + <requirement type="package" version="0.1">oqtans</requirement> + </requirements> + <command interpreter="bash"> +./../src/deseq-hts.sh $anno_input_selected $deseq_out $deseq_out.extra_files_path/gene_map.mat #for $i in $replicate_groups #for $j in $i.replicates $j.bam_alignment:#slurp @@ -19,8 +22,8 @@ </inputs> <outputs> - <data format="txt" name="deseq_out" label="DESeq result"/> - <data format="txt" name="Log_File" label="DESeq log file"/> + <data format="txt" name="deseq_out" label="${tool.name} on ${on_string}: Differential Expression"/> + <data format="txt" name="Log_File" label="${tool.name} on ${on_string}: log"/> </outputs> <tests> @@ -41,19 +44,15 @@ **What it does** -`DESeq` is a tool for differential expression testing of RNA-Seq data. - +DESeq_ is a tool for differential expression testing of RNA-Seq data. -**Inputs** - -`DESeq` requires three input files to run: +.. _DESeq: http://bioconductor.org/packages/release/bioc/html/DESeq.html -1. Annotation file in GFF3, containing the necessary information about the transcripts that are to be quantified. -2. The BAM alignment files grouped into replicate groups, each containing several replicates. BAM files store the read alignments in a compressed format. They can be generated using the `SAM-to-BAM` tool in the NGS: SAM Tools section. (The script will also work with only two groups containing only a single replicate each. However, this analysis has less statistical power and is therefor not recommended.) +`DESeq` requires: -**Output** +Genome annotation file in GFF3, containing the necessary information about the transcripts that are to be quantified. -`DESeq` generates a text file containing the gene name and the p-value. +The BAM alignment files grouped into replicate groups, each containing several replicates. BAM files store the read alignments, The program will also work with only two groups containing only a single replicate each. However, this analysis has less statistical power and is therefore not recommended! ------ @@ -100,7 +99,7 @@ ------ -DESeq-hts Wrapper Version 0.3 (Feb 2012) +DESeq-hts Wrapper Version 0.5 (Aug 2013) </help> </tool>
--- a/deseq-hts_1.0/setup_deseq-hts.sh Wed Jun 27 15:38:39 2012 -0400 +++ b/deseq-hts_1.0/setup_deseq-hts.sh Tue Oct 08 08:09:28 2013 -0400 @@ -1,7 +1,8 @@ #!/bin/bash set -e -. ./bin/deseq_config.sh +DIR=`dirname $0` +. ${DIR}/./bin/deseq_config.sh echo ========================================== echo DESeq-hts setup script \(DESeq version $DESEQ_VERSION\) @@ -15,7 +16,6 @@ fi echo '=>' Setting DESeq-hts base directory to \"$DESEQ_PATH\" echo - echo SAMTools directory \(currently set to \"$SAMTOOLS_DIR\", system version used if left empty\) read SAMTOOLS_DIR if [ "$SAMTOOLS_DIR" == "" ];
--- a/deseq-hts_1.0/src/deseq-hts.sh Wed Jun 27 15:38:39 2012 -0400 +++ b/deseq-hts_1.0/src/deseq-hts.sh Tue Oct 08 08:09:28 2013 -0400 @@ -5,7 +5,7 @@ # the Free Software Foundation; either version 3 of the License, or # (at your option) any later version. # -# Copyright (C) 2009-2012 Max Planck Society +# Copyright (C) 2009-2013 Max Planck Society & Memorial Sloan-Kettering Cancer Center # set -e @@ -16,7 +16,7 @@ . ${DIR}/../bin/deseq_config.sh echo -echo ${PROG}: FML http://galaxy.fml.mpg.de Galaxy wrapper for the DESeq version $DESEQ_VERSION. +echo ${PROG}: Oqtans http://galaxy.cbio.mskcc.org Galaxy wrapper for the DESeq version $DESEQ_VERSION. echo echo DESeq performs differential expression testing from RNA-Seq measurements. echo @@ -35,14 +35,12 @@ echo %%%%%%%%%%%%%%%%%%%%%%% echo echo load the genome annotation in GFF3 format and create an annotation object -echo export PYTHONPATH=$PYTHONPATH:${SCIPY_PATH} ${PYTHON_PATH} ${DIR}/../tools/ParseGFF.py ${ANNO_INPUT} ${GENES_FN} -${DIR}/../bin/genes_cell2struct ${GENES_FN} 2>&1 +${DIR}/../bin/genes_cell2struct ${GENES_FN} echo echo genome annotation stored in $GENES_FN -echo echo %%%%%%%%%%%%%%%%%%%% echo % 2. Read counting % echo %%%%%%%%%%%%%%%%%%%% @@ -68,22 +66,18 @@ tmpfile=`mktemp --tmpdir=/tmp` echo "${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile $@" -${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile "$@" 2>&1 +${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile "$@" -echo echo %%%%%%%%%%%%%%%%%%%%%%%%%%% echo % 3. Differential testing % echo %%%%%%%%%%%%%%%%%%%%%%%%%%% echo - echo testing genes for differential expression using given alignments echo "cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $#" -cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $# 2> /dev/null +cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $# rm $tmpfile ${tmpfile}_COUNTS.tab ${tmpfile}_CONDITIONS.tab -echo echo %%%%%%%% echo % Done % echo %%%%%%%% -echo
--- a/deseq-hts_1.0/src/deseq_config.m Wed Jun 27 15:38:39 2012 -0400 +++ b/deseq-hts_1.0/src/deseq_config.m Tue Oct 08 08:09:28 2013 -0400 @@ -30,6 +30,7 @@ % switch off a few expected warnings addpath(sprintf('%s/tools', DESEQ_PATH)); +engine=''; lserve=license; if ~isequal(lserve, 'GNU General Public License'), engine='matlab';
--- a/deseq-hts_1.0/src/difftest_deseq.R Wed Jun 27 15:38:39 2012 -0400 +++ b/deseq-hts_1.0/src/difftest_deseq.R Tue Oct 08 08:09:28 2013 -0400 @@ -1,4 +1,5 @@ -library( DESeq ) +### load DESeq package +suppressMessages(require("DESeq")) ### get arguments 1: INFILE, 2: OUTFILE 3:SIZE args <- commandArgs() @@ -31,26 +32,39 @@ { cds <- estimateDispersions( cds ) } else { - writeLines("\nYou did not enter any replicates! - The results may be less valuable without replicates!\n") + writeLines("\n***You did not enter any replicates! - The results may be less valuable without replicates!***\n") cds <- estimateDispersions( cds, method='blind', sharingMode='fit-only') } experiments <- levels(conds) -res<-c() +res_1<-c() +res_2<-c() +res_3<-c() +res_4<-c() +res_5<-c() +res_6<-c() +res_7<-c() +res_8<-c() table_col_names<-c() + for (i in 1:(length(experiments)-1)) { for( j in (i+1):(length(experiments))) { print(c(i,j)) tempres <- nbinomTest(cds,experiments[i],experiments[j]) - res = cbind(res,tempres[,7]) - #res = cbind(res,tempres[,8]) - table_col_names = cbind(table_col_names,paste('cond_', experiments[i], '_vs._cond_', experiments[j], sep='')) + res_1 = cbind(res_1,tempres[,1]) + res_2 = cbind(res_2,tempres[,2]) + res_3 = cbind(res_3,tempres[,3]) + res_4 = cbind(res_4,tempres[,4]) + res_5 = cbind(res_5,tempres[,5]) + res_6 = cbind(res_6,tempres[,6]) + res_7 = cbind(res_7,tempres[,7]) + res_8 = cbind(res_8,tempres[,8]) + table_col_names = cbind(table_col_names,paste('cond_', experiments[i], '_vs._cond_', experiments[j], sep='', 'test')) } } -DiffTable<-res -rownames(DiffTable)<-rownames(countsTable) -colnames(DiffTable)<-table_col_names +DiffTable<-cbind(res_1,res_2,res_3,res_4,res_5,res_6,res_7,res_8) +colnames(DiffTable)<-c('feature ID', 'base mean', 'base mean A', 'base mean B', 'fold change', 'log2 fold change','p value', 'adjusted p value') write.table(DiffTable, file = OUTFILE, quote = FALSE, sep ="\t", eol ="\n", na = "1.000", dec = ".", row.names = TRUE,col.names =TRUE)