# HG changeset patch # User pavlo-lutsik # Date 1373060365 14400 # Node ID 39b78c48d06431b3997be0a1f44688620bec7bc7 # Parent 16f6ef2b325113bdcee9b2b6cf3374097693c4fa Uploaded diff -r 16f6ef2b3251 -r 39b78c48d064 RnBeadsGalaxy.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/RnBeadsGalaxy.R Fri Jul 05 17:39:25 2013 -0400 @@ -0,0 +1,211 @@ +#msg.file<-file("RnBeads.messages.out", open="w") +#sink(file=msg.file) + + +## add the RnBeads dependencies if we are on a cloud share-instance +if("Rsitelibrary" %in% list.files("/mnt")){ + + .libPaths("/mnt/galaxy/Rsitelibrary") + + +} + +if(!'wordcloud' %in% rownames(installed.packages())){ + install.packages('wordcloud',repos='http://cran.us.r-project.org') +} + +suppressWarnings(suppressPackageStartupMessages(library(RnBeads))) +suppressWarnings(suppressPackageStartupMessages(library(getopt))) + +#all.opts<-names(rnb.options()) +opt.class<-RnBeads:::OPTION.TYPES[-28] +all.opts<-names(opt.class) +#all.opts<-paste("--", all.opts, sep="") +#all.opts<-gsub("\\.([a-z])", "\\U\\1", all.opts, perl=TRUE) +all.opts<-gsub("\\.","-", all.opts) +#opt.class<-sapply(rnb.options(), class) + +rnb.opt.spec<-data.frame( + Long=all.opts, + Short=as.character(1:length(all.opts)), + Mask=c(1,2)[as.integer((opt.class=="logical"))+1], + Type=opt.class) + +### automated xml file preparation +xml.strings<-apply(rnb.opt.spec,1, function(row){ + + opt.lab<-gsub("-", ".", row[1]) + opt.def.val<-rnb.getOption(opt.lab) + opt.name<-gsub("-([0-9a-z])", "\\U\\1", row[1], perl=TRUE) + tf.opt<-"\t\t\t\n\t\t\t" + opt.lab<-paste(opt.lab, gsub("\\."," ", row[4]), sep=", ") + if(row[4]=="logical"){ + opt.type<-'select' + if(!is.null(opt.def.val) && opt.def.val) + opt.def.val<-"1" else + opt.def.val<-"0" + string<-sprintf("\t\t\n%s\n\t\t\n", opt.name, opt.type, opt.lab, opt.def.val, tf.opt) + }else{ + opt.type<-'text' + if(!is.null(opt.def.val) && opt.def.val!="") + opt.def.val<-paste(opt.def.val, collapse=",") else + opt.def.val<-"" + string<-sprintf("\t\t\n", opt.name, opt.type, opt.lab, opt.def.val) + } + string + }) + +cat(xml.strings, sep="", file="automated.settings.xml.txt") + +opt.def.strings<-apply(rnb.opt.spec,1, function(row){ + + opt.name<-gsub("-([0-9a-z])", "\\U\\1", row[1], perl=TRUE) + opt.long<-row[1] + opt.short<-row[2] + + if(row[4]=="logical"){ + def.string<-sprintf("#if str( $options.%s ) == \"True\"\n\t--%s\n#end if\n", opt.name, opt.long) + }else{ + def.string<-sprintf("#if str( $options.%s ) != \"\"\n\t--%s=\"$options.%s\" \n#end if\n", opt.name, opt.long, opt.name) + } + def.string + + }) +cat(opt.def.strings, sep="", file="automated.option.assignments.txt") + + +rnb.opt.spec$Type<-gsub("\\.vector", "", rnb.opt.spec$Type) +rnb.opt.spec$Type<-gsub("numeric", "double", rnb.opt.spec$Type) +rnb.opt.spec<-rbind(data.frame( + Long=c("data-type", "pheno", "idat-dir","idat-files","bed-files", "gs-report", "geo-series", "betas", "pvals","output-file", "report-dir"), + Short=c("d","s","a","i","f","g","e","b","p","r","o"), + Mask=c(1,2,2,2,2,2,2,2,2,1,1), + Type=c("character","character","character","character","character","character","character","character","character","character", "character")), + rnb.opt.spec) + +opts<-getopt(as.matrix(rnb.opt.spec)) +#opts<-getopt(as.matrix(rnb.opt.spec), opt=list("--data-type=idats","--report-dir=dir", "--idats=file1\tfile2")) +print(opts) + +if(opts[["data-type"]]=="idatDir"){ + + data.source<-list() + data.type<-"idat.dir" + data.source[["idat.dir"]]<-opts[["idat-dir"]] + data.source[["sample.sheet"]]<-opts[["pheno"]] + +}else if(opts[["data-type"]]=="idatFiles"){ + + data.type<-"idat.dir" + file.string<-gsub(" ","", opts[["idat-files"]]) + files<-strsplit(file.string, ",")[[1]] + files<-files[files!=""] + bed.dir<-sprintf("%s_beds",opts[["report-dir"]]) + dir.create(bed.dir) + file.copy(files, bed.dir) + for(dat.file in list.files(bed.dir, full.names = TRUE)){ + file.rename(dat.file, gsub("\\.dat", ".bed", dat.file)) + } + data.source<-list() + data.source[["bed.dir"]]<-bed.dir + data.source[["sample.sheet"]]<-opts[["pheno"]] + + +}else if(opts[["data-type"]]=="GS.report"){ + + data.type<-"GS.report" + data.source<-opts[["gs-report"]] + +}else if(opts[["data-type"]]=="GEO"){ + + data.type<-"GEO" + data.source<-opts[["geo-series"]] + +}else if(opts[["data-type"]]=="data.files"){ + + data.type<-"GEO" + data.source<-opts[["geo-series"]] + +}else if(opts[["data-type"]]=="data.files"){ + + data.type<-"data.files" + data.source<-c(opts[["pheno"]], opts[["betas"]]) + if(!is.null(opts[["pvals"]])) + data.source<-c(data.source, opts[["pvals"]]) + +}else if(opts[["data-type"]]=="bed.dir"){ + + data.type<-"bed.dir" + file.string<-gsub(" ","", opts[["bed-files"]]) + files<-strsplit(file.string, ",")[[1]] + files<-files[files!=""] + bed.dir<-sprintf("%s_beds",opts[["report-dir"]]) + dir.create(bed.dir) + file.copy(files, bed.dir) + for(dat.file in list.files(bed.dir, full.names = TRUE)){ + file.rename(dat.file, gsub("\\.dat", ".bed", dat.file)) + } + data.source<-list() + data.source[["bed.dir"]]<-bed.dir + + logger.start(fname="NA") + sample.sheet<-rnb.read.annotation(opts[["pheno"]]) + logger.close() + if(length(files) < nrow(sample.sheet)) + stop("Not all bed files are present") + + cn<-colnames(sample.sheet) + dat.files<-sapply(strsplit(files, "\\/"), function(el) el[length(el)]) + sample.sheet<-cbind(sample.sheet, gsub("\\.dat", ".bed", dat.files)) + colnames(sample.sheet)<-c(cn, "BED_files") + data.source[["sample.sheet"]]<-sample.sheet +} + +if("logging" %in% names(opts)){ # TODO create a cleaner way of checking whether the full options set was supplied + + dump<-sapply(names(opt.class), function(on){ + getoptname<-gsub("-", "\\.","-",on) + if(getoptname %in% names(opts)){ + if(opt.class[on]=="logical"){ + ov<-TRUE + }else if(opt.class %in% c("character","character.vector")){ + ov<-opts[[getoptname]] + ov<-gsub("\"", "", ov) + if(opt.class=="character.vector"){ + ov<-as.character(strsplit(ov,",")) + } + + }else if(opt.class %in% c("integer","numeric","integer.vector","numeric.vector")){ + ov<-opts[[getoptname]] + ov<-gsub("\"", "", ov) + if(opt.class %in% c("integer.vector","numeric.vector")){ + ov<-as.character(strsplit(ov,",")) + } + } + eval(parse(text=sprintf("rnb.options(%s=ov)",on))) + } + }) + + logical.opts<-names(opt.class[opt.class=="logical"]) + logical.opts.false<-logical.opts[!logical.opts %in% gsub("-",".",names(opts))] + + + dump<-sapply(logical.opts.false, function(on){ + eval(parse(text=sprintf("rnb.options(%s=FALSE)",on))) + }) +} + +print(rnb.options()) + +#report.out.dir<-sprintf("%s_rnbReport", tempdir()) +report.out.dir<-opts[["report-dir"]] +print("Starting RnBeads with the following inputs:") +print(data.source) +print(report.out.dir) +print(data.type) +rnb.run.analysis(data.source=data.source, dir.report=report.out.dir, data.type=data.type) + + +#sink(file=NULL) +#flush(msg.file) +#close(msg.file) \ No newline at end of file diff -r 16f6ef2b3251 -r 39b78c48d064 install.rnbeads.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/install.rnbeads.R Fri Jul 05 17:39:25 2013 -0400 @@ -0,0 +1,1 @@ +source("http://rnbeads.mpi-inf.mpg.de/install.R") \ No newline at end of file diff -r 16f6ef2b3251 -r 39b78c48d064 rnbeads.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rnbeads.xml Fri Jul 05 17:39:25 2013 -0400 @@ -0,0 +1,506 @@ + + + Performs RnBeads analysis for the selected set of + + R_SCRIPTS_PATH + R + + rnbeads_galaxy_wrapper.sh + --report-dir="$html_file.extra_files_path" + --output-file="$html_file" + #if str( $inputDataSelector.dataType ) == "idats" + --pheno="$inputDataSelector.sampleAnnotations" + #end if + #if str( $inputDataSelector.dataType ) != "idats" + #pass + #else if str( $inputDataSelector.idatSelector.idatSource ) == "history" + --data-type="idatFiles" + #set $idatList="" + #for $input_file in $inputDataSelector.idatSelector.idatSeries: + #set $idatList+=str( $input_file.idatFile ) + #set $idatList+="," + #end for + --idat-files="$idatList" + #else: + --data-type="idatDir" + --idat-dir="$inputDataSelector.idatSelector.idatRepo.fields.path" + #end if + #if str( $inputDataSelector.dataType ) == "gsreport" + --data-type="GS.report" + --gs-report="$inputDataSelector.gsReportFile" + #end if + #if str( $inputDataSelector.dataType ) == "geo" + --data-type="GEO" + --geo-series=$inputDataSelector.geoSeries + #end if + #if str( $inputDataSelector.dataType ) == "tables" + --data-type="data.files" + --pheno="$inputDataSelector.sampleAnnotations" + #end if + #if str( $inputDataSelector.dataType ) == "tables" + --data-type="data.files" + --pheno="$inputDataSelector.sampleAnnotations" + --betas="$inputDataSelector.betaTable" + --pvals="$inputDataSelector.pvalTable" + #end if + #if str( $inputDataSelector.dataType ) == "bed" + --data-type="bed.dir" + --pheno="$inputDataSelector.sampleAnnotations" + #set $bedList="" + #for $input_file in $inputDataSelector.bedSeries: + #set $bedList+=str( $input_file.bedFile ) + #set $bedList+="," + #end for + --bed-files="$bedList" + #end if + #if str( $options.optionSet ) == "full" + #if str( $options.analysisName ) != "" + --analysis-name="$options.analysisName" + #end if + #if str( $options.logging ) == "True" + --logging + #end if + #if str( $options.email ) != "" + --email="$options.email" + #end if + #if str( $options.assembly ) != "" + --assembly="$options.assembly" + #end if + #if str( $options.columnsPairing ) != "" + --columns-pairing="$options.columnsPairing" + #end if + #if str( $options.analyzeSites ) == "True" + --analyze-sites + #end if + #if str( $options.regionTypes ) != "" + --region-types="$options.regionTypes" + #end if + #if str( $options.identifiersColumn ) != "" + --identifiers-column="$options.identifiersColumn" + #end if + #if str( $options.pointsCategory ) != "" + --points-category="$options.pointsCategory" + #end if + #if str( $options.colorsCategory ) != "" + --colors-category="$options.colorsCategory" + #end if + #if str( $options.colorsGradient ) != "" + --colors-gradient="$options.colorsGradient" + #end if + #if str( $options.minGroupSize ) != "" + --min-group-size="$options.minGroupSize" + #end if + #if str( $options.maxGroupCount ) != "" + --max-group-count="$options.maxGroupCount" + #end if + #if str( $options.gzLargeFiles ) == "True" + --gz-large-files + #end if + #if str( $options.strandSpecific ) == "True" + --strand-specific + #end if + #if str( $options.replicateIdColumn ) != "" + --replicate-id-column="$options.replicateIdColumn" + #end if + #if str( $options.loadingNormalization ) == "True" + --loading-normalization + #end if + #if str( $options.loadingDefaultDataType ) != "" + --loading-default-data-type="$options.loadingDefaultDataType" + #end if + #if str( $options.loadingTableSeparator ) != "" + --loading-table-separator="$options.loadingTableSeparator" + #end if + #if str( $options.loadingBedStyle ) != "" + --loading-bed-style="$options.loadingBedStyle" + #end if + #if str( $options.loadingBedColumns ) != "" + --loading-bed-columns="$options.loadingBedColumns" + #end if + #if str( $options.loadingBedFrameShift ) != "" + --loading-bed-frame-shift="$options.loadingBedFrameShift" + #end if + #if str( $options.normalizationMethod ) != "" + --normalization-method="$options.normalizationMethod" + #end if + #if str( $options.normalizationBackgroundMethod ) != "" + --normalization-background-method="$options.normalizationBackgroundMethod" + #end if + #if str( $options.qc ) == "True" + --qc + #end if + #if str( $options.qcBoxplots ) == "True" + --qc-boxplots + #end if + #if str( $options.qcBarplots ) == "True" + --qc-barplots + #end if + #if str( $options.qcSnpHeatmap ) == "True" + --qc-snp-heatmap + #end if + #if str( $options.qcSnpHeatmap ) == "True" + --qc-snp-heatmap + #end if + #if str( $options.qcSnpBoxplot ) == "True" + --qc-snp-boxplot + #end if + #if str( $options.qcSnpBarplot ) == "True" + --qc-snp-barplot + #end if + #if str( $options.qcSampleBatchSize ) != "" + --qc-sample-batch-size="$options.qcSampleBatchSize" + #end if + #if str( $options.filteringContextRemoval ) != "" + --filtering-context-removal="$options.filteringContextRemoval" + #end if + #if str( $options.filteringSnp ) == "True" + --filtering-snp + #end if + #if str( $options.filteringSnpFrequency ) != "" + --filtering-snp-frequency="$options.filteringSnpFrequency" + #end if + #if str( $options.filteringSnpAccepted ) != "" + --filtering-snp-accepted="$options.filteringSnpAccepted" + #end if + #if str( $options.filteringSexChromosomesRemoval ) == "True" + --filtering-sex-chromosomes-removal + #end if + #if str( $options.filteringMissingValueQuantile ) != "" + --filtering-missing-value-quantile="$options.filteringMissingValueQuantile" + #end if + #if str( $options.filteringCoverageThreshold ) != "" + --filtering-coverage-threshold="$options.filteringCoverageThreshold" + #end if + #if str( $options.filteringLowCoverageMasking ) == "True" + --filtering-low-coverage-masking + #end if + #if str( $options.filteringHighCoverageOutliers ) == "True" + --filtering-high-coverage-outliers + #end if + #if str( $options.filteringGreedycut ) == "True" + --filtering-greedycut + #end if + #if str( $options.filteringGreedycutPvalueThreshold ) != "" + --filtering-greedycut-pvalue-threshold="$options.filteringGreedycutPvalueThreshold" + #end if + #if str( $options.filteringGreedycutRcTies ) != "" + --filtering-greedycut-rc-ties="$options.filteringGreedycutRcTies" + #end if + #if str( $options.filteringDeviationThreshold ) != "" + --filtering-deviation-threshold="$options.filteringDeviationThreshold" + #end if + #if str( $options.batch ) == "True" + --batch + #end if + #if str( $options.batchDreductionColumns ) != "" + --batch-dreduction-columns="$options.batchDreductionColumns" + #end if + #if str( $options.batchPrincipalComponents ) != "" + --batch-principal-components="$options.batchPrincipalComponents" + #end if + #if str( $options.batchCorrelationColumns ) != "" + --batch-correlation-columns="$options.batchCorrelationColumns" + #end if + #if str( $options.batchCorrelationPvalueThreshold ) != "" + --batch-correlation-pvalue-threshold="$options.batchCorrelationPvalueThreshold" + #end if + #if str( $options.batchCorrelationPermutations ) != "" + --batch-correlation-permutations="$options.batchCorrelationPermutations" + #end if + #if str( $options.batchCorrelationQc ) == "True" + --batch-correlation-qc + #end if + #if str( $options.profiles ) == "True" + --profiles + #end if + #if str( $options.profilesBetaDistribution ) == "True" + --profiles-beta-distribution + #end if + #if str( $options.profilesIntersample ) == "True" + --profiles-intersample + #end if + #if str( $options.profilesDeviationPlots ) == "True" + --profiles-deviation-plots + #end if + #if str( $options.profilesColumns ) != "" + --profiles-columns="$options.profilesColumns" + #end if + #if str( $options.profilesClustering ) == "True" + --profiles-clustering + #end if + #if str( $options.profilesClusteringTopProbes ) != "" + --profiles-clustering-top-probes="$options.profilesClusteringTopProbes" + #end if + #if str( $options.regionProfilesTypes ) != "" + --region-profiles-types="$options.regionProfilesTypes" + #end if + #if str( $options.differential ) == "True" + --differential + #end if + #if str( $options.differentialPermutations ) != "" + --differential-permutations="$options.differentialPermutations" + #end if + #if str( $options.differentialComparisonColumns ) != "" + --differential-comparison-columns="$options.differentialComparisonColumns" + #end if + #if str( $options.differentialComparisonColumnsAllPairwise ) != "" + --differential-comparison-columns-all-pairwise="$options.differentialComparisonColumnsAllPairwise" + #end if + #if str( $options.differentialEnrichment ) == "True" + --differential-enrichment + #end if + #if str( $options.exportToUcsc ) != "" + --export-to-ucsc="$options.exportToUcsc" + #end if + #if str( $options.exportToBed ) == "True" + --export-to-bed + #end if + #if str( $options.exportToCsv ) == "True" + --export-to-csv + #end if + #if str( $options.exportTypes ) != "" + --export-types="$options.exportTypes" + #end if + #if str( $options.colors3Gradient ) != "" + --colors-3-gradient="$options.colors3Gradient" + #end if + #if str( $options.loggingMemory ) == "True" + --logging-memory + #end if + #end if + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + RnBeads is an R-package for comprehensive analysis of Illumina Infinium HumanMethylation450 BeadChip data. It extends previous approaches for this type of data analysis by high throughput capabilities, as well as presenting results in a comprehensive, highly interpretable fashion. + + + \ No newline at end of file diff -r 16f6ef2b3251 -r 39b78c48d064 rnbeads_galaxy_wrapper.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rnbeads_galaxy_wrapper.sh Fri Jul 05 17:39:25 2013 -0400 @@ -0,0 +1,27 @@ +#!/bin/bash +random_hash=`tr -dc "[:alpha:]" < /dev/urandom | head -c 8` +Rscript --no-save $(dirname $(readlink -f $0))/RnBeadsGalaxy.R $* > /tmp/rnbeads_${random_hash}.stdout 2>/tmp/rnbeads_${random_hash}.stderr +#Rscript --no-save \$R_SCRIPTS_PATH/RnBeadsGalaxy.R $* +outfile=`echo $* | cut -d\ -f2 | sed -e "s/--output-file=//g"` +echo $outfile +#outdir=`echo $* | sed -e "s/.*--report-dir=\(.*\)[[:blank:]].*/\1/g"` + + +errl=`cat /tmp/rnbeads_${random_hash}.stderr | grep -e "[E|e]rror" | wc -l` +if [ "$errl" -gt 0 ] +then + cat /tmp/rnbeads_${random_hash}.stdout >&2 + #cat $outdir/analysis.log >&2 + cat /tmp/rnbeads_${random_hash}.stderr >&2 + exit 3 +else + echo "" >> $outfile + echo "RnBeads report" >> $outfile + echo "

Output was generated during the execution:
" >> $outfile + echo "
" >> outfile
+	cat /tmp/rnbeads_${random_hash}.stdout |sed -e "s/$//g" >> $outfile
+	echo "
" >> outfile + echo "

" >> $outfile + echo "" >> $outfile + exit 0 +fi \ No newline at end of file diff -r 16f6ef2b3251 -r 39b78c48d064 rnbeads_repos.txt.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rnbeads_repos.txt.sample Fri Jul 05 17:39:25 2013 -0400 @@ -0,0 +1,1 @@ +repo1 Repository1 /data/iscan/idats \ No newline at end of file diff -r 16f6ef2b3251 -r 39b78c48d064 tool_data_table_conf.xml.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Fri Jul 05 17:39:25 2013 -0400 @@ -0,0 +1,6 @@ + + + id, name, value + +
+
\ No newline at end of file diff -r 16f6ef2b3251 -r 39b78c48d064 tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Jul 05 17:39:25 2013 -0400 @@ -0,0 +1,22 @@ + + + + $REPOSITORY_INSTALL_DIR + + + + + http://CRAN.R-project.org/src/base/R-3/R-3.0.0.tar.gz + ./configure --prefix=$INSTALL_DIR --with-x=no + make + + $INSTALL_DIR/bin + + R --no-save <<< "install.packages("getopt"); source('http://rnbeads.mpi-inf.mpg.de')" + + + + You need a FORTRAN compiler or perhaps f2c in addition to a C compiler to build R. + + + \ No newline at end of file