Previous changeset 6:744905986eb9 (2016-03-15) Next changeset 8:969fe96dbc7d (2016-03-15) |
Commit message:
Uploaded |
added:
toolshed_version/Dotplot_Release.tar.gz toolshed_version/GNU Free Documentation License.txt toolshed_version/ProHits_DotPlot_Generator.py toolshed_version/ProHits_DotPlot_Generator.xml toolshed_version/test_files.tar toolshed_version/test_files/SC_SAINT_list.txt toolshed_version/tool_dependencies.xml |
removed:
Dotplot_Release/BaitCheck.pl Dotplot_Release/Normalization.R Dotplot_Release/Normalization_sigpreys.R Dotplot_Release/R_dotPlot.R Dotplot_Release/R_dotPlot_hc.R Dotplot_Release/R_dotPlot_nc.R Dotplot_Release/SOFD.pl Dotplot_Release/SaintConvert.pl Dotplot_Release/Step1_data_reformating.R Dotplot_Release/Step2_data_filtering.R Dotplot_Release/Step3_nestedcluster Dotplot_Release/Step4_biclustering.R Dotplot_Release/biclust.tar.gz Dotplot_Release/biclust_param.txt Dotplot_Release/dotplot.bash Dotplot_Release/pheatmap_j.R |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/BaitCheck.pl --- a/Dotplot_Release/BaitCheck.pl Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,45 +0,0 @@ -#!/usr/bin/perl - -# 27/04/2014 - -if($#ARGV==0){ - print "This program checks the number of baits in a Saint Output File.\n"; - print "\nusage:\n $0\n-i [csv saint output file]]\n\n"; - die; -} -else{ - $i=0; - $cutoff=0.01; - while($i<=$#ARGV){ - if($ARGV[$i] eq '-i'){ - $i++; - $ifile=$ARGV[$i]; - } - else{ - die "\Incorrect program usage\n\n"; - } - $i++; - } -} - -$file=''; -open(IFILE,"<$ifile") || die "$ifile can't be opened: $!"; -{ local $/=undef; $file=<IFILE>; } -@lines=split /[\r\n]+/, $file; -foreach $line (@lines) { - if($line =~ /^Bait/){ - } - elsif($line =~ /^([^\t]+)/){ - if($1 ne $bait[$baitn]){ - $baitn++; - $bait[$baitn]=$1; - } - } - else{ - } -} -close(IFILE); - -print $baitn; - - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/Normalization.R --- a/Dotplot_Release/Normalization.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,44 +0,0 @@ -#!/usr/bin/env Rscript - -args <- commandArgs(trailingOnly = TRUE) - -#this programs normalizes a saint input file based on the spectral counts of all preys - -d = read.delim(args[1], header=T, sep="\t", as.is=T) - -baitn = 1 -curr_bait <- d$Bait[1] -s <- vector() -s[1] = 0 -for(i in 1:length(d$Bait)){ - if(curr_bait != d$Bait[i]){ - baitn <- baitn + 1 - curr_bait <- d$Bait[i] - s[baitn] <- d$AvgSpec[i] - } - else{ - s[baitn] <- s[baitn] + d$AvgSpec[i] - } -} - -med.s = median(s) -s = s / med.s - -d_n <- d -baitn = 1 -curr_bait <- d_n$Bait[1] -for(i in 1:length(d_n$Bait)){ - if(curr_bait != d_n$Bait[i]){ - baitn <- baitn + 1 - curr_bait <- d_n$Bait[i] - d_n$AvgSpec[i] <- d_n$AvgSpec[i]/s[baitn] - } - else{ - d_n$AvgSpec[i] <- d_n$AvgSpec[i]/s[baitn] - } -} - -#print normalized data to file - -write.table(d_n, file = "norm_saint.txt", sep="\t", quote=F, row.names=F) - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/Normalization_sigpreys.R --- a/Dotplot_Release/Normalization_sigpreys.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,46 +0,0 @@ -#!/usr/bin/env Rscript - -#this programs normalizes a saint input file based on the spectral counts of "signficant" preys -# that is, preys with an FDR <= the secondary cutoff as supplied to the dotplot script - -args <- commandArgs(trailingOnly = TRUE) - -d = read.delim(args[1], header=T, as.is=T) -d <- d[d$BFDR <= as.numeric(args[2]),] - -baitn = 1 -curr_bait <- d$Bait[1] -s <- vector() -s[1] = 0 -for(i in 1:length(d$Bait)){ - if(curr_bait != d$Bait[i]){ - baitn <- baitn + 1 - curr_bait <- d$Bait[i] - s[baitn] <- d$AvgSpec[i] - } - else{ - s[baitn] <- s[baitn] + d$AvgSpec[i] - } -} - -med.s = median(s) -s = s / med.s - -d_n <- d -baitn = 1 -curr_bait <- d_n$Bait[1] -for(i in 1:length(d_n$Bait)){ - if(curr_bait != d_n$Bait[i]){ - baitn <- baitn + 1 - curr_bait <- d_n$Bait[i] - d_n$AvgSpec[i] <- d_n$AvgSpec[i]/s[baitn] - } - else{ - d_n$AvgSpec[i] <- d_n$AvgSpec[i]/s[baitn] - } -} - -#print normalized data to file - -write.table(d_n, file = "norm_saint.txt", sep="\t", quote=F, row.names=F) - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/R_dotPlot.R --- a/Dotplot_Release/R_dotPlot.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,83 +0,0 @@ -#!/usr/bin/env Rscript - -args <- commandArgs(trailingOnly = TRUE) - -library('latticeExtra') -library('colorRamps') - -data.file <- read.table("SC_data.txt", sep="\t", header=TRUE, row.names=1) ### import spectral count data -data.file2 <- read.table("FDR_data.txt", sep="\t", header=TRUE, row.names=1) ### import FDR count data -data.file3 <- read.table("clustered_matrix.txt", sep="\t", header=TRUE, row.names=1) ### import clustered matrix -data.file4 <- scan("singletons.txt", what="", sep="\n", strip.white=T) ### import singleton data - -#setting parameters - -Sfirst=as.numeric(args[1]) #first FDR cutoff -Ssecond=as.numeric(args[2]) #second FDR cutoff -maxp=as.integer(args[3]) #maximum value for a spectral count - -#calculate column and row lengths - -#determine bait and prey ordering - -bait_levels=names(data.file3) -prey_levels=c(rownames(data.file3),data.file4) - -x_ord=factor(row.names(data.file),levels=prey_levels) -y_ord=factor(names(data.file),levels=bait_levels) - -df<-data.frame(y=rep(y_ord,nrow(data.file)) - ,x=rep(x_ord, each=ncol(data.file)) - ,z1=as.vector(t(data.file)) # Circle color - ,z2=as.vector(t(data.file/apply(data.file,1,max))) # Circle size - ,z3=as.vector(t(data.file2)) # FDR -) - -df$z1[df$z1>maxp] <- maxp #maximum value for spectral count -df$z2[df$z2==0] <- NA -df$z3[df$z3>Ssecond] <- 0.05*maxp -df$z3[df$z3<=Ssecond & df$z3>Sfirst] <- 0.5*maxp -df$z3[df$z3<=Sfirst] <- 1*maxp -df$z4 <- df$z1 -df$z4[df$z4==0] <- 0 -df$z4[df$z4>0] <- 2.5 - -# The labeling for the colorkey - -labelat = c(0, maxp) -labeltext = c(0, maxp) - -# color scheme to use - -nmb.colors<-maxp -z.colors<-grey(rev(seq(0,0.9,0.9/nmb.colors))) #grayscale color scale - -#plot - -pl <- levelplot(z1~x*y, data=df - ,col.regions =z.colors #terrain.colors(100) - ,scales = list(x = list(rot = 90), y=list(cex=0.8), tck=0) # rotates X,Y labels and changes scale - ,colorkey = FALSE - ,xlab="Prey", ylab="Bait" - ,panel=function(x,y,z,...,col.regions){ - print(x) - z.c<-df$z1[ (df$x %in% as.character(x)) & (df$y %in% y)] - z.2<-df$z2[ (df$x %in% as.character(x)) & (df$y %in% y)] - z.3<-df$z3 - z.4<-df$z4 - panel.xyplot(x,y - ,as.table=TRUE - ,pch=21 # point type to use (circles in this case) - ,cex=((z.2-min(z.2,na.rm=TRUE))/(max(z.2,na.rm=TRUE)-min(z.2,na.rm=TRUE)))*3 #circle size - ,fill=z.colors[floor((z.c-min(z.c,na.rm=TRUE))*nmb.colors/(max(z.c,na.rm=TRUE)-min(z.c,na.rm=TRUE)))+1] # circle colors - ,col=z.colors[1+z.3] # border colors - ,lex=z.4 #border thickness - ) - } - #,main="Fold change" # graph main title - ) -if(ncol(data.file) > 4) ht=3.5+(0.36*((ncol(data.file)-1)-4)) else ht=3.5 -if(nrow(data.file) > 20) wd=8.25+(0.29*(nrow(data.file)-20)) else wd=5+(0.28*(nrow(data.file)-10)) -pdf("dotplot.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2) -print(pl) -dev.off() |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/R_dotPlot_hc.R --- a/Dotplot_Release/R_dotPlot_hc.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,125 +0,0 @@ -#!/usr/bin/env Rscript - -args <- commandArgs(trailingOnly = TRUE) - -pheatmapj_loc <- paste(args[6],"pheatmap_j.R",sep="/") -heatmap2j_loc <- paste(args[6],"heatmap_2j.R",sep="/") - -library('latticeExtra') -library('RColorBrewer') -library('grid') -library(reshape2) -library('gplots') -library('gtools') -source(pheatmapj_loc) -source(heatmap2j_loc) - -data.file <- read.table("SC_data.txt", sep="\t", header=TRUE, row.names=1) ### import spectral count data -data.file2 <- read.table("FDR_data.txt", sep="\t", header=TRUE, row.names=1) ### import FDR count data - -#setting parameters - -Sfirst=as.numeric(args[1]) #first FDR cutoff -Ssecond=as.numeric(args[2]) #second FDR cutoff -maxp=as.integer(args[3]) #maximum value for a spectral count -methd <- args[4] -dist_methd <- args[5] - -#determine bait and prey ordering - -dist_bait <- dist(as.matrix(t(data.file)), method= dist_methd) # "euclidean", "maximum", "manhattan", "canberra", "binary" or "minkowski" -dist_prey <- dist(as.matrix(data.file), method= dist_methd) - -if(methd == "ward"){ - dist_bait <- dist_bait^2 #comment out this line and the next if not using Ward's method of clustering - dist_prey <- dist_prey^2 -} - -hc_bait <- hclust(dist_bait, method = methd) # method = "average", "single", "complete", "ward", "mcquitty", "median" or "centroid" -hc_prey <- hclust(dist_prey, method = methd) - -data.file = data.file[hc_prey$order, , drop = FALSE] -data.file = data.file[, hc_bait$order, drop = FALSE] -data.file2 = data.file2[hc_prey$order, , drop = FALSE] -data.file2 = data.file2[, hc_bait$order, drop = FALSE] - -x_ord=factor(row.names(data.file), levels=row.names(data.file)) -y_ord=factor(names(data.file[1,]), levels=names(data.file[1,])) - -df<-data.frame(y=rep(y_ord, nrow(data.file)) - ,x=rep(x_ord, each=ncol(data.file)) - ,z1=as.vector(t(data.file)) # Circle color - ,z2=as.vector(t(data.file/apply(data.file,1,max))) # Circle size - ,z3=as.vector(t(data.file2)) # FDR -) - -df$z1[df$z1>maxp] <- maxp #maximum value for spectral count -df$z2[df$z2==0] <- NA -df$z3[df$z3>Ssecond] <- 0.05*maxp -df$z3[df$z3<=Ssecond & df$z3>Sfirst] <- 0.5*maxp -df$z3[df$z3<=Sfirst] <- 1*maxp -df$z4 <- df$z1 -df$z4[df$z4==0] <- 0 -df$z4[df$z4>0] <- 2.5 - -# The labeling for the colorkey - -labelat = c(0, maxp) -labeltext = c(0, maxp) - -# color scheme to use - -nmb.colors<-maxp -z.colors<-grey(rev(seq(0,0.9,0.9/nmb.colors))) #grayscale color scale - -#plot dotplot - -pl <- levelplot(z1~x*y, data=df - ,col.regions =z.colors #terrain.colors(100) - ,scales = list(x = list(rot = 90), y=list(cex=0.8), tck=0) # rotates X,Y labels and changes scale - ,colorkey = FALSE - #,colorkey = list(space="bottom", width=1.5, height=0.3, labels=list(at = labelat, labels = labeltext)) #put colorkey at top with my labeling scheme - ,xlab="Prey", ylab="Bait" - ,panel=function(x,y,z,...,col.regions){ - print(x) - z.c<-df$z1[ (df$x %in% as.character(x)) & (df$y %in% y)] - z.2<-df$z2[ (df$x %in% as.character(x)) & (df$y %in% y)] - z.3<-df$z3 - z.4<-df$z4 - panel.xyplot(x,y - ,as.table=TRUE - ,pch=21 # point type to use (circles in this case) - ,cex=((z.2-min(z.2,na.rm=TRUE))/(max(z.2,na.rm=TRUE)-min(z.2,na.rm=TRUE)))*3 #circle size - ,fill=z.colors[floor((z.c-min(z.c,na.rm=TRUE))*nmb.colors/(max(z.c,na.rm=TRUE)-min(z.c,na.rm=TRUE)))+1] # circle colors - ,col=z.colors[1+z.3] # border colors - ,lex=z.4 #border thickness - ) - } - #,main="Fold change" # graph main title - ) -if(ncol(data.file) > 4) ht=3.5+(0.36*((ncol(data.file)-1)-4)) else ht=3.5 -if(nrow(data.file) > 20) wd=8.25+(0.29*(nrow(data.file) -20)) else wd=5.7+(0.28*(nrow(data.file) -10)) -pdf("dotplot.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2) -print(pl) -dev.off() - -#plot bait vs prey heatmap - -heat_df <- acast(df, y~x, value.var="z1") -heat_df <- apply(heat_df, 2, rev) - -if(ncol(data.file) > 4) ht=3.5+(0.1*((ncol(data.file)-1)-4)) else ht=3.5 -if(nrow(data.file) > 20) wd=8.25+(0.1*(nrow(data.file)-20)) else wd=5+(0.1*(nrow(data.file)-10)) -pdf("heatmap_borders.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2) -pheatmap_j(heat_df, scale="none", border_color="black", border_width = 0.1, cluster_rows=FALSE, cluster_cols=FALSE, col=colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100)) -dev.off() - -pdf("heatmap_no_borders.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2) -pheatmap_j(heat_df, scale="none", border_color=NA, cluster_rows=FALSE, cluster_cols=FALSE, col=colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100)) -dev.off() - -#plot bait vs bait heatmap using dist matrix -dist_bait <- dist_bait/max(dist_bait) -pdf("bait2bait.pdf", onefile = FALSE, paper = "special") -heatmap_2j(as.matrix(dist_bait), trace="none", scale="none", density.info="none", col=rev(colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100)), xMin=0, xMax=1, margins=c(1.5*max(nchar(rownames(as.matrix(dist_bait)))),1.5*max(nchar(colnames(as.matrix(dist_bait)))))) -dev.off() |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/R_dotPlot_nc.R --- a/Dotplot_Release/R_dotPlot_nc.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,140 +0,0 @@ -#!/usr/bin/env Rscript - -args <- commandArgs(trailingOnly = TRUE) - -pheatmapj_loc <- paste(args[9],"pheatmap_j.R",sep="/") - -library('latticeExtra') -library('RColorBrewer') -library('grid') -library(reshape2) -source(pheatmapj_loc) - -data.file <- read.table("SC_data.txt", sep="\t", header=TRUE, row.names=1) ### import spectral count data -data.file2 <- read.table("FDR_data.txt", sep="\t", header=TRUE, row.names=1) ### import FDR count data -bait_l <- scan(args[4], what="") ### import bait list -if(args[5] == 0) prey_l <- scan(args[6], what="") ### import prey list -methd <- args[7] -dist_methd <- args[8] - -#setting parameters - -Sfirst=as.numeric(args[1]) #first FDR cutoff -Ssecond=as.numeric(args[2]) #second FDR cutoff -maxp=as.integer(args[3]) #maximum value for a spectral count - -#extract only needed data - -if(args[5] == 0){ - remove <- vector() - remove <- prey_l[prey_l %in% row.names(data.file)] - prey_l <- prey_l[prey_l %in% remove] - remove <- bait_l[bait_l %in% names(data.file)] - bait_l <- bait_l[bait_l %in% remove] - data.file <- data.file[prey_l, bait_l] - data.file2 <- data.file2[prey_l, bait_l] -} else{ - remove <- vector() - remove <- bait_l[bait_l %in% names(data.file)] - bait_l <- bait_l[bait_l %in% remove] - data.file <- data.file[, bait_l] - data.file2 <- data.file2[, bait_l] - prey_keep = apply(data.file2, 1, function(x) sum(x<=Sfirst) >= 1) - data.file <- data.file[prey_keep,] - data.file2 <- data.file2[prey_keep,] -} - -#determine bait and prey ordering - -y_ord=factor(names(data.file[1,]),levels=bait_l) - -if(args[5] == 0){ - x_ord=factor(rownames(data.file),levels=prey_l) -} else { - - data.file <- data.file[which(rowSums(data.file) > 0),] - dist_prey <- dist(as.matrix(data.file), method= dist_methd) - - if(methd == "ward"){ - dist_prey <- dist_prey^2 - } - - hc_prey <- hclust(dist_prey, method = methd) - - data.file = data.file[hc_prey$order, , drop = FALSE] - data.file2 = data.file2[hc_prey$order, , drop = FALSE] - - x_ord=factor(row.names(data.file), levels=row.names(data.file)) -} - -df<-data.frame(y=rep(y_ord, nrow(data.file)) - ,x=rep(x_ord, each=ncol(data.file)) - ,z1=as.vector(t(data.file)) # Circle color - ,z2=as.vector(t(data.file/apply(data.file,1,max))) # Circle size - ,z3=as.vector(t(data.file2)) # FDR -) - -df$z1[df$z1>maxp] <- maxp #maximum value for spectral count -df$z2[df$z2==0] <- NA -df$z3[df$z3>Ssecond] <- 0.05*maxp -df$z3[df$z3<=Ssecond & df$z3>Sfirst] <- 0.5*maxp -df$z3[df$z3<=Sfirst] <- 1*maxp -df$z4 <- df$z1 -df$z4[df$z4==0] <- 0 -df$z4[df$z4>0] <- 2.5 - -# The labeling for the colorkey - -labelat = c(0, maxp) -labeltext = c(0, maxp) - -# color scheme to use - -nmb.colors<-maxp -z.colors<-grey(rev(seq(0,0.9,0.9/nmb.colors))) #grayscale color scale - -#plot dotplot - -pl <- levelplot(z1~x*y, data=df - ,col.regions =z.colors #terrain.colors(100) - ,scales = list(x = list(rot = 90), y=list(cex=0.8), tck=0) # rotates X,Y labels and changes scale - ,colorkey = FALSE - #,colorkey = list(space="bottom", width=1.5, height=0.3, labels=list(at = labelat, labels = labeltext)) #put colorkey at top with my labeling scheme - ,xlab="Prey", ylab="Bait" - ,panel=function(x,y,z,...,col.regions){ - print(x) - z.c<-df$z1[ (df$x %in% as.character(x)) & (df$y %in% y)] - z.2<-df$z2[ (df$x %in% as.character(x)) & (df$y %in% y)] - z.3<-df$z3 - z.4<-df$z4 - panel.xyplot(x,y - ,as.table=TRUE - ,pch=21 # point type to use (circles in this case) - ,cex=((z.2-min(z.2,na.rm=TRUE))/(max(z.2,na.rm=TRUE)-min(z.2,na.rm=TRUE)))*3 #circle size - ,fill=z.colors[floor((z.c-min(z.c,na.rm=TRUE))*nmb.colors/(max(z.c,na.rm=TRUE)-min(z.c,na.rm=TRUE)))+1] # circle colors - ,col=z.colors[1+z.3] # border colors - ,lex=z.4 #border thickness - ) - } - #,main="Fold change" # graph main title - ) -if(ncol(data.file) > 4) ht=3.5+(0.36*((ncol(data.file)-1)-4)) else ht=3.5 -if(nrow(data.file) > 20) wd=8.25+(0.29*(nrow(data.file)-20)) else wd=5.7+(0.28*(nrow(data.file)-10)) -pdf("dotplot.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2) -print(pl) -dev.off() - -#plot heatmap - -heat_df <- acast(df, y~x, value.var="z1") -heat_df <- apply(heat_df, 2, rev) - -if(ncol(data.file) > 4) ht=3.5+(0.1*((ncol(data.file)-1)-4)) else ht=3.5 -if(nrow(data.file) > 20) wd=8.25+(0.1*(nrow(data.file)-20)) else wd=5+(0.1*(nrow(data.file)-10)) -pdf("heatmap_borders.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2) -pheatmap_j(heat_df, scale="none", border_color="black", border_width = 0.1, cluster_rows=FALSE, cluster_cols=FALSE, col=colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100)) -dev.off() - -pdf("heatmap_no_borders.pdf", onefile = FALSE, paper = "special", height = ht, width = wd, pointsize = 2) -pheatmap_j(heat_df, scale="none", border_color=NA, cluster_rows=FALSE, cluster_cols=FALSE, col=colorRampPalette(c("#FFFFFF", brewer.pal(9,"Blues")))(100)) -dev.off() |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/SOFD.pl --- a/Dotplot_Release/SOFD.pl Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,112 +0,0 @@ -#!/usr/bin/perl - -# 17/12/2013 - -if($#ARGV==0){ - print "This program takes the Saint Output File and produces two matrices.\n"; - print "One has average spectral counts and the other FDR scores,\n"; - print "\nusage:\n $0\n-i [csv saint output file]\n-s [FDR cutoff, default=0.01]\n\n"; - die; -} -else{ - $i=0; - $cutoff=0.01; - $spec_cutoff=0; - while($i<=$#ARGV){ - if($ARGV[$i] eq '-i'){ - $i++; - $ifile=$ARGV[$i]; - } - elsif($ARGV[$i] eq '-s'){ - $i++; - if($ARGV[$i]>1 || $ARGV[$i]<0){ - die "\nFDR cutoff must be between 0 and 1 \n\n"; - } - $cutoff=$ARGV[$i]; - } - elsif($ARGV[$i] eq '-x'){ - $i++; - if($ARGV[$i]<0){ - die "\nAvgSpec cutoff must be > 0 \n\n"; - } - $spec_cutoff=$ARGV[$i]; - } - else{ - die "\Incorrect program usage\n\n"; - } - $i++; - } -} - -$baitn=0, $bait[0]=xxxx, $sig_preysn=0; -$file=''; -open(IFILE,"<$ifile") || die "$ifile can't be opened: $!"; -{ local $/=undef; $file=<IFILE>; } -@lines=split /[\r\n]+/, $file; -foreach $line (@lines) { - if($line =~ /^Bait/){ - } - elsif($line =~ /^([^\t]+)\t[^\t]+\t([^\t]+)\t[^\t]+\t[\d]+\t([\d\.]+)\t[\d]+\t[^\t]+\t[^\t]+\t[^\t]+\t[^\t]+\t[^\t]+\t([^\t]+)\t[^\t]+\t([^\t]+)\t/){ - if($1 ne $bait[$baitn]){ - $baitn++; - $bait[$baitn]=$1; - $preyn[$baitn]=0; - } - $preyn[$baitn]++; - $preys[$baitn][$preyn[$baitn]]=$2; - $avgspec[$baitn][$preyn[$baitn]]=$3; - $saint[$baitn][$preyn[$baitn]]=$4; - $fdr[$baitn][$preyn[$baitn]]=$5; - if($5 <= $cutoff && $3 >= $spec_cutoff){ - $check_prey=0; - for($i=1; $i<=$sig_preysn; $i++){ - if($sig_preys[$i] eq $2){ - $check_prey=1; - } - } - if($check_prey==0){ - $sig_preysn++; - $sig_preys[$sig_preysn]=$2; - } - } - } - else{ - } -} -close(IFILE); - -open(SC_FILE, ">SC_data.txt"); -open(FDR_FILE, ">FDR_data.txt"); - -for($i=1; $i<=$baitn; $i++){ - print SC_FILE "\t$bait[$i]"; - print FDR_FILE "\t$bait[$i]"; -} -print SC_FILE "\n"; -print FDR_FILE "\n"; -for($i=1; $i<=$sig_preysn; $i++){ - print SC_FILE "$sig_preys[$i]"; - print FDR_FILE "$sig_preys[$i]"; - for($j=1; $j<=$baitn; $j++){ - $krem=0; - for($k=1; $k<=$preyn[$j]; $k++){; - if($preys[$j][$k] eq $sig_preys[$i]){ - $krem=$k; - last; - } - } - if($krem != 0){ - print SC_FILE "\t$avgspec[$j][$krem]"; - print FDR_FILE "\t$fdr[$j][$krem]"; - } - else{ - print SC_FILE "\t0"; - print FDR_FILE "\t1"; - } - } - print SC_FILE "\n"; - print FDR_FILE "\n"; -} -close(SC_FILE); -close(FDR_FILE); - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/SaintConvert.pl --- a/Dotplot_Release/SaintConvert.pl Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,52 +0,0 @@ -#!/usr/bin/perl - -# 17/12/2013 - -if($#ARGV==0){ - print "This program takes non-SaintExpress formatted data and converts it to look like it.\n"; - print "\nusage:\n $0\n-i [csv saint output file]\n\n"; - die; -} -else{ - $i=0; - while($i<=$#ARGV){ - if($ARGV[$i] eq '-i'){ - $i++; - $ifile=$ARGV[$i]; - } - else{ - die "\Incorrect program usage\n\n"; - } - $i++; - } -} - -$i=0; -$file=''; -open(IFILE,"<$ifile") || die "$ifile can't be opened: $!"; -{ local $/=undef; $file=<IFILE>; } -@lines=split /[\r\n]+/, $file; -foreach $line (@lines) { - if($line =~ /^Bait/){ - } - elsif($line =~ /^([^\t]+)\t([^\t]+)\t([^\t]+)\t([^\t]+)/){ - $bait[$i]=$1; - $prey[$i]=$2; - $spec[$i]=$3; - $fdr[$i]=$4; - $i++; - } - else{ - } -} -close(IFILE); -$line_count=$i; - -open(OFILE, ">mockSaintExpress.txt"); -print OFILE "Bait\tPrey\tPreyGene\tSpec\tSpecSum\tAvgSpec\tNumReplicates\tctrlCounts\tAvgP\tMaxP\tTopoAvgP\tTopoMaxP\tSaintScore\tFoldChange\tBFDR\tboosted_by\n"; - -for($i=0; $i<$line_count; $i++){ - print OFILE "$bait[$i]\t111\t$prey[$i]\t111\t111\t$spec[$i]\t111\t111\t111\t111\t111\t111\t111\t111\t$fdr[$i]\t111\n"; -} -close(OFILE); - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/Step1_data_reformating.R --- a/Dotplot_Release/Step1_data_reformating.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,41 +0,0 @@ -#!/usr/bin/env Rscript - -args <- commandArgs(trailingOnly = TRUE) - -d = read.delim(args[1], header=T, sep="\t", as.is=T) - -### Select Prey interactions were at least one Bait > Probability Threshold - -preylist=unique(c(d$PreyGene[d$BFDR <= as.numeric(args[2])])) -pid = d$PreyGene %in% preylist -d = d[pid,] - -bb = unique(d$Bait) -pp = unique(d$PreyGene) - -nbait = length(bb) -nprey = length(pp) - -### Reformat the SAINToutput data into a spreadsheet -mat = matrix(0, nprey, nbait) - -n = nrow(d) -mb = match(d$Bait, bb) -mp = match(d$PreyGene, pp) - -### Using the AvgSpec for the spectral counts -for(i in 1:n) { - mat[mp[i],mb[i]] = d$AvgSpec[i] -} - -rownames(mat) = pp -colnames(mat) = bb - -outfile <- paste(c(args[3]), "matrix.txt", sep="_") -### The following file is the outcome of running this step. -write.table(mat, outfile, sep="\t", quote=F) - - - - - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/Step2_data_filtering.R --- a/Dotplot_Release/Step2_data_filtering.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,28 +0,0 @@ -#!/usr/bin/env Rscript - -args <- commandArgs(trailingOnly = TRUE) - -d = read.delim(args[1], header=T, as.is=T) - -d2 = d -d2s = d - -ss_cutoff <- as.numeric(args[2]) -### Here I'm only going to take the preys which appeared in at least 2 baits with >args[2] counts -id = apply(d, 1, function(x) sum(x>ss_cutoff) >= 2) -id2 = apply(d, 1, function(x) sum(x>ss_cutoff) < 2) -d2 = d2[id, ] -d2s = d2s[id2, 0] -max.d2 = max(as.numeric(as.matrix(d2))) -d2 = d2 / max.d2 * 10 - -d3 = data.frame(PROT = rownames(d2), d2) - -outfile <- paste(c(args[3]), "dat", sep=".") - -### The following file is the outcome of running this step. -write.table(d3, outfile, sep="\t", quote=F, row.names=F) -### This is the final input file for nested cluster algorithm - -write.table(d2s, "singletons.txt", quote=F) - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/Step3_nestedcluster |
b |
Binary file Dotplot_Release/Step3_nestedcluster has changed |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/Step4_biclustering.R --- a/Dotplot_Release/Step4_biclustering.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,201 +0,0 @@ -#!/usr/bin/env Rscript - -args <- commandArgs(trailingOnly = TRUE) - -d = read.delim(args[1], header=T, sep="\t", as.is=T, row.names=1) - -clusters = read.delim("Clusters", header=T, sep="\t", as.is=T)[,-1] -clusters = data.frame(Bait=colnames(clusters), Cluster=as.numeric(clusters[1,])) -nested.clusters = read.delim("NestedClusters", header=F, sep="\t", as.is=T)[1:dim(d)[1],] -nested.phi = read.delim("NestedMu", header=F, sep="\t", as.is=T)[1:dim(d)[1],] -nested.phi2 = read.delim("NestedSigma2", header=F, sep="\t", as.is=T)[1:dim(d)[1],] -mcmc = read.delim("MCMCparameters", header=F, sep="\t", as.is=T) - -### distance between bait using phi (also reorder cluster names) -### report nested clusters with positive counts only -### rearrange rows and columns of the raw data matrix according to the back-tracking algorithm - -recursivePaste = function(x) { - n = length(x) - x = x[order(x)] - y = x[1] - if(n > 1) { - for(i in 2:n) y = paste(y, x[i], sep="-") - } - y -} - -calcDist = function(x, y) { - if(length(x) != length(y)) stop("different length\n") - else res = sum(abs(x-y)) - res -} - - -#clusters, nested.clusters, nested.phi, d - -bcl = clusters -pcl = nested.clusters -phi = nested.phi -phi2 = nested.phi2 -dat = d - - -## bipartite graph -make.graphlet = function(b,p,s) { - g = NULL - g$b = b - g$p = p - g$s = as.numeric(s) - g -} - -make.hub = function(b,p) { - g = NULL - g$b = b - g$p = p - g -} - -jaccard = function(x,y) { - j = length(intersect(x,y)) / length(union(x,y)) - j -} - -merge.graphlets = function(x, y) { - g = NULL - g$b = union(x$b, y$b) - g$p = union(x$p, y$p) - g$s1 = rep(0,length(g$p)) - g$s2 = rep(0,length(g$p)) - g$s1[match(x$p, g$p)] = x$s - g$s2[match(y$p, g$p)] = y$s - g$s = apply(cbind(g$s1, g$s2), 1, max) - g -} - -summarizeDP = function(bcl, pcl, phi, phi2, dat, hub.size=0.5, ...) { - pcl = as.matrix(pcl) - phi = as.matrix(phi) - phi2 = as.matrix(phi2) - dat = as.matrix(dat) - rownames(phi) = rownames(dat) - rownames(phi2) = rownames(dat) - - ubcl = unique(as.numeric(bcl$Cluster)) - n = length(ubcl) - pcl = pcl[,ubcl] - phi = phi[,ubcl] - phi2 = phi2[,ubcl] - phi[phi < 0.05] = 0 - - bcl$Cluster = match(as.numeric(bcl$Cluster), ubcl) - colnames(pcl) = colnames(phi) = colnames(phi2) = paste("CL", 1:n, sep="") - - ## remove non-reproducible mean values - nprey = dim(dat)[1]; nbait = dim(dat)[2] - preys = rownames(dat); baits = colnames(dat) - n = length(unique(bcl$Cluster)) - for(j in 1:n) { - id = c(1:nbait)[bcl$Cluster == j] - for(k in 1:nprey) { - do.it = ifelse(mean(as.numeric(dat[k,id]) > 0) <= 0.5,TRUE,FALSE) - if(do.it) { - phi[k,j] = 0 - } - } - } - - ## create bipartite graphs (graphlets) - gr = NULL - for(j in 1:n) { - id = c(1:nbait)[bcl$Cluster == j] - id2 = c(1:nprey)[phi[,j] > 0] - gr[[j]] = make.graphlet(baits[id], preys[id2], phi[id2,j]) - } - - ## intersecting preys between graphlets - gr2 = NULL - cur = 1 - for(i in 1:n) { - for(j in 1:n) { - if(i != j) { - combine = jaccard(gr[[i]]$p, gr[[j]]$p) >= 0.75 - if(combine) { - gr2[[cur]] = merge.graphlets(gr[[i]], gr[[j]]) - cur = cur + 1 - } - } - } - } - - old.phi = phi - phi = phi[, bcl$Cluster] - phi2 = phi2[, bcl$Cluster] - ## find hub preys - proceed = apply(old.phi, 1, function(x) sum(x>0) >= 2) - h = NULL - cur = 1 - for(k in 1:nprey) { - if(proceed[k]) { - id = as.numeric(phi[k,]) > 0 - if(mean(id) >= hub.size) { - h[[cur]] = make.hub(baits[id], preys[k]) - cur = cur + 1 - } - } - } - nhub = cur - 1 - - res = list(data=dat, baitCL=bcl, phi=phi, phi2=phi2, gr = gr, gr2 = gr2, hub = h) - res -} - -res = summarizeDP(clusters, nested.clusters, nested.phi, nested.phi2, d) - -write.table(res$baitCL[order(res$baitCL$Cluster),], "baitClusters", sep="\t", quote=F, row.names=F) -write.table(res$data, "clusteredData", sep="\t", quote=F) - -##### SOFT -library(gplots) -tmpd = res$data -tmpm = res$phi -colnames(tmpm) = paste(colnames(res$data), colnames(tmpm)) - -pdf("estimated.pdf", height=25, width=8) -my.hclust<-hclust(dist(tmpd)) -my.dend<-as.dendrogram(my.hclust) -tmp.res = heatmap.2(tmpm, Rowv=my.dend, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) -#tmp.res = heatmap.2(tmpm, Rowv=T, Colv=T, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) -tmpd = tmpd[rev(tmp.res$rowInd),tmp.res$colInd] -write.table(tmpd, "clustered_matrix.txt", sep="\t", quote=F) -heatmap.2(tmpd, Rowv=F, Colv=F, trace="n", col=rev(heat.colors(10)), breaks=seq(0,.5,by=0.05), margins=c(10,10), keysize=0.8, cexRow=0.4) -dev.off() - - -### Statistical Plots -dd = dist(1-cor((res$phi), method="pearson")) -dend = as.dendrogram(hclust(dd, "ave")) -#plot(dend) - -pdf("bait2bait.pdf") -tmp = res$phi -colnames(tmp) = paste(colnames(res$phi), res$baitCL$Bait, sep="_") - -###dd = cor(tmp[,-26]) ### This line is only for Chris' data (one bait has all zeros in the estimated parameters) -dd = cor(tmp) ### This line is only for Chris' data (one bait has all zeros in the estimated parameters) - -write.table(dd, "bait2bait_matrix.txt", sep="\t", quote=F) -heatmap.2(as.matrix(dd), trace="n", breaks=seq(-1,1,by=0.1), col=(greenred(20)), cexRow=0.7, cexCol=0.7) -dev.off() - -tmp = mcmc[,2] -ymax = max(tmp) -ymin = min(tmp) -pdf("stats.pdf", height=12, width=12) - -plot(mcmc[mcmc[,4]=="G",3], type="s", xlab="Iterations", ylab="Number of Clusters", main="") -plot(mcmc[,2], type="l", xlab="Iterations", ylab="Log-Likelihood", main="", ylim=c(ymin,ymax)) - -dev.off() - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/biclust.tar.gz |
b |
Binary file Dotplot_Release/biclust.tar.gz has changed |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/biclust_param.txt --- a/Dotplot_Release/biclust_param.txt Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,12 +0,0 @@ -np 10 -nb 100 -a 1.0 -b 1.0 -lambda 0.0 -nu 25.0 -alpha 1.0 -rho 1.0 -gamma 1.0 -nburn 5000 -niter 10000 - |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/dotplot.bash --- a/Dotplot_Release/dotplot.bash Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,252 +0,0 @@\n-#!/bin/bash\n-#SCRIPT=$(readlink -e $0)\n-#SCRIPTPATH=`dirname $SCRIPT`\n-pushd `dirname $0` > /dev/null\n-SCRIPTPATH=`pwd`\n-popd > /dev/null\n-\n-usage() { printf "Usage: $0 \n-[-f <saint_file_name.txt>]\n-[-i <0 for SaintExpress format, 1 for other>]\n-[-c <clustering to perform. Options: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering; see options -b and -p)>]\n-[-n <clustering type to be performed if option -c is set to \\"h\\">]\n-[-d <distance metric to use if option -c is set to \\"h\\">]\n-[-b <list of bait proteins in display order (see option -c n)>]\n-[-p <list of prey proteins in display order (see option -c n). Set this to \\"all\\" if you want to include all preys and cluster them>]\n-[-s <primary FDR cutoff [0-1, recommended=0.01]>]\n-[-t <secondary FDR cutoff [must be less than the primary, recommended=0.025]>\n-[-x <spectral count minimum. Only preys with >= this will be used]>\n-[-m <maximum spectral count>]\n-[-N <normalization, 0 for no (default), 1 for yes, 2 for normalization based on significant preys counts (prey FDR <= option -t)>]\n-[-C <FDR cutoff for normalization if using option -N 2 (deafult is -t)>]\\n"\n-1>&2; exit 1; }\n-\n-N=0\n-n="ward"\n-d="canberra"\n-x=0\n-i=0\n-while getopts ":f:i:s:t:x:m:c:n:d:b:p:N:C:" o; do\n- case "${o}" in\n- f)\n- f=${OPTARG}\n- ;;\n- i)\n-\t i=${OPTARG}\n- ;;\n- s)\n- s=${OPTARG}\n- ;;\n-\tt)\n- t=${OPTARG}\n- ;;\n- x)\n-\t x=${OPTARG}\n- ;;\n-\tm)\n- m=${OPTARG}\n- ;;\n-\tc)\n- c=${OPTARG}\n-\t ;;\n-\tn)\n-\t n=${OPTARG}\n-\t ;;\n-\td)\n-\t d=${OPTARG}\n-\t ;;\n-\tb)\n- b=${OPTARG}\n-\t ;;\n-\tp)\n-\t p=${OPTARG}\n-\t ;;\n-\tN)\n-\t N=${OPTARG}\n-\t ;;\n-\tC)\n-\t C=${OPTARG}\n-\t ;;\n- *)\n- usage\n- ;;\n- esac\n-done\n-shift $((OPTIND-1))\n-\n-filename=${f%%.*}\n-echo "Saint input file = ${f}"\n-echo "Primary FDR cutoff = ${s}"\n-echo "Secondary FDR cutoff for dotplot = ${t}"\n-echo "Minimum spectral count for significant preys = ${x}"\n-echo "Maximum spectral count for dot plot = ${m}"\n-\n-if [ -z "${f}" ] || [ -z "${s}" ] || [ -z "${t}" ] || [ -z "${m}" ] || [ -z "${c}" ]; then\n- usage\n-fi\n-\n-if [ "${i}" == 1 ]; then\n-\t$SCRIPTPATH/SaintConvert.pl -i ${f}\n-\tf="mockSaintExpress.txt"\n-fi\n-\n-if [ "${x}" -ge "${m}" ]; then\n-\techo "spectral count minimum (${x}) cannot be greater than or equal to the maximum (${m})"\n-\texit 1;\n-elif [ "${x}" -lt 0 ]; then\n-\techo "spectral count minimum (${x}) cannot be less than 0. Setting to 0 and continuing"\n-\tx=0\n-fi\n-\n-###Check for normalization\n-\n-if [ "${N}" == 1 ]; then\n-\tprintf "\\nNormalization is being performed\\n"\n-\t$SCRIPTPATH/Normalization.R ${f}\n-\tf="norm_saint.txt"\n-elif [ "${N}" == 2 ]; then\n-\tprintf "\\nNormalization is being performed\\n"\n-\tif [ -z "${C}" ]; then\n-\t\tC=${t}\n-\tfi\n-\t$SCRIPTPATH/Normalization_sigpreys.R ${f} ${C}\n-\tf="norm_saint.txt"\n-fi\n-\n-\n-###Check for clustering etc\n-\n-if [ "${c}" == "h" ] && [ -z "${n}" ]; then\n-\tprintf "\\nHierarchial clustering was selected (-c = h), but no clustering method (-n) was chosen.\\n"\n-\tprintf "The input parameter -n must be set to one of \\"average\\", \\"centroid\\", \\"complete\\", \\"mcquitty\\",\\n"\n-\tprintf "\\"median\\", \\"single\\" or \\"ward\\". \\"ward\\" will be selected as default.\\n\\n"\n-\tn="ward"\n-elif [ "${c}" == "h" ] && [ -n "${n}" ]; then\n-\tif [ "${n}" == "average" ] || [ "${n}" == "centroid" ] || [ "${n}" == "complete" ] || [ "${n}" == "mcquitty" ] || [ "${n}" == "median" ] || [ "${n}" == "single" ] || [ "${n}" == "ward" ]; then\n-\t\tprintf "\\nHierarchical clustering (method = ${n}) will be performed\\n\\n"\n-\telse\n-\t\tprintf "\\n${n} is not a valid Hierarchical clustering method.\\n"\n-\t\tprintf "Choose one of \\"average\\", \\"centroid\\", \\"complete\\", \\"mcquitty\\", \\"median\\", \\"single\\" or \\"ward\\"\\n\\n"\n-\t\texit 1\n-\tfi\n-fi\n-\n-p_c=0\n-if [ "${c}" == "h" ] && [ -z "${d}" ]; then\n-\tprintf "'..b'${p}" ]; then\n-\tprintf "\\n\\"No Clustering\\" option was selected (-c = n), but no prey list was included (option -p).\\n"\n-\tprintf "Prey list must be in .txt formart.\\n\\n"\n-\texit 1\n-elif [ "${c}" == "n" ] && [ "${p}" == "all" ]; then\n-\tprintf "\\n\\"No Clustering\\" option was selected (-c = n) for baits, but preys will still be clustered.\\n"\n-\tprintf "using \\"ward\\" and \\"canberra\\" as defaults or options as supplied on command line.\\n\\n"\n-\tp="empty"\n-\tp_c=1\n-\tn="ward"\n-\td="canberra"\n-fi\n-\n-\n-###Check number of baits\n-\n-bait_n=$(perl $SCRIPTPATH/BaitCheck.pl -i ${f})\n-echo "Number of baits = "$bait_n\n-printf "\\n\\n"\n-\n-if [ "${c}" == "b" ] && [ $bait_n == 2 ]; then\n-\tprintf "\\nWarning only 2 baits are present. Biclustering will not performed.\\n"\n-\tprintf "Hierarchical clustering (method = ward) will be performed instead.\\n\\n"\n-\tc="h"\n-\tn="ward"\n-fi\n-\n-\n-###Generate plots\n-\n-if [ "${c}" == "b" ]; then\n-\tprintf "\\nBiclustering will be performed\\n\\n"\n-\t$SCRIPTPATH/Step1_data_reformating.R ${f} ${s} ${filename}\n-\t$SCRIPTPATH/Step2_data_filtering.R ${filename}_matrix.txt ${x} ${filename}\n-\tGSL_RNG_SEED=123 $SCRIPTPATH/Step3_nestedcluster ${filename}.dat $SCRIPTPATH/biclust_param.txt\n-\t$SCRIPTPATH/Step4_biclustering.R ${filename}.dat\n-\n-\t$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}\n-\t$SCRIPTPATH/R_dotPlot.R ${s} ${t} ${m}\n-\tmkdir Output_${filename}\n-\tmkdir Output_${filename}/TempData_${filename}\n-\tmv bait_lists Output_${filename}/TempData_${filename}\n-\tmv Clusters Output_${filename}/TempData_${filename}\n-\tmv MCMCparameters Output_${filename}/TempData_${filename}\n-\tmv NestedClusters Output_${filename}/TempData_${filename}\n-\tmv NestedMu Output_${filename}/TempData_${filename}\n-\tmv NestedSigma2 Output_${filename}/TempData_${filename}\n-\tmv OPTclusters Output_${filename}/TempData_${filename}\n-\tmv ${filename}_matrix.txt Output_${filename}/TempData_${filename}\n-\tmv ${filename}.dat Output_${filename}/TempData_${filename}\n-\tmv SC_data.txt Output_${filename}/TempData_${filename}\n-\tmv FDR_data.txt Output_${filename}/TempData_${filename}\n-\tmv clustered_matrix.txt Output_${filename}/TempData_${filename}\n-\tmv singletons.txt Output_${filename}/TempData_${filename}\n-\tmv bait2bait_matrix.txt Output_${filename}/TempData_${filename}\n-\tmv baitClusters Output_${filename}/TempData_${filename}\n-\tmv clusteredData Output_${filename}/TempData_${filename}\n-\tmv dotplot.pdf Output_${filename}\n-\tmv bait2bait.pdf Output_${filename} \n-\tmv estimated.pdf Output_${filename} \n-\tmv stats.pdf Output_${filename}\n-\tcp $SCRIPTPATH/legend.pdf Output_${filename}\n-elif [ "${c}" == "h" ]; then\n-\n-\t$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}\n-\t$SCRIPTPATH/R_dotPlot_hc.R ${s} ${t} ${m} ${n} ${d} $SCRIPTPATH\n-\n-\tmkdir Output_${filename}\n-\tmkdir Output_${filename}/TempData_${filename}\n-\tmv dotplot.pdf Output_${filename}\n-\tmv heatmap_borders.pdf Output_${filename}\n-\tmv heatmap_no_borders.pdf Output_${filename}\n-\tmv bait2bait.pdf Output_${filename}\n-\tmv SC_data.txt Output_${filename}/TempData_${filename}\n-\tmv FDR_data.txt Output_${filename}/TempData_${filename}\n-\tcp $SCRIPTPATH/legend.pdf Output_${filename}\n-elif [ "${c}" == "n" ]; then\n-\t\n-\t$SCRIPTPATH/SOFD.pl -i ${f} -s ${s} -x ${x}\n-\techo "$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH"\n-\t$SCRIPTPATH/R_dotPlot_nc.R ${s} ${t} ${m} ${b} $p_c ${p} ${n} ${d} $SCRIPTPATH\n-\n-\tmkdir Output_${filename}\n-\tmkdir Output_${filename}/TempData_${filename}\n-\tmv dotplot.pdf Output_${filename}\n-\tmv heatmap_borders.pdf Output_${filename}\n-\tmv heatmap_no_borders.pdf Output_${filename}\n-\tmv SC_data.txt Output_${filename}/TempData_${filename}\n-\tmv FDR_data.txt Output_${filename}/TempData_${filename}\n-\tcp $SCRIPTPATH/legend.pdf Output_${filename}\n-else\n-\tprintf -- "-c must be one of [b, h, n]: b (biclustering), h (hierarchical), n (none, requires input text files for bait and prey ordering>\\n"\n-\texit 1;\n-fi\n-\n-if [ "${N}" == "1" ] || [ "${N}" == "2" ]; then\n-\tmv norm_saint.txt Output_${filename}/TempData_${filename}\n-fi\n-\n' |
b |
diff -r 744905986eb9 -r 59e57834c366 Dotplot_Release/pheatmap_j.R --- a/Dotplot_Release/pheatmap_j.R Tue Mar 15 15:27:13 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,719 +0,0 @@\n-lo = function(rown, coln, nrow, ncol, cellheight = NA, cellwidth = NA, treeheight_col, treeheight_row, legend, annotation, annotation_colors, annotation_legend, main, fontsize, fontsize_row, fontsize_col, ...){\n-\t# Get height of colnames and length of rownames\n-\tif(!is.null(coln[1])){\n-\t\tlongest_coln = which.max(strwidth(coln, units = \'in\'))\n-\t\tgp = list(fontsize = fontsize_col, ...)\n-\t\tcoln_height = unit(1, "grobheight", textGrob(coln[longest_coln], rot = 90, gp = do.call(gpar, gp))) + unit(5, "bigpts")\n-\t}\n-\telse{\n-\t\tcoln_height = unit(5, "bigpts")\n-\t}\n-\t\n-\tif(!is.null(rown[1])){\n-\t\tlongest_rown = which.max(strwidth(rown, units = \'in\'))\n-\t\tgp = list(fontsize = fontsize_row, ...)\n-\t\trown_width = unit(1, "grobwidth", textGrob(rown[longest_rown], gp = do.call(gpar, gp))) + unit(10, "bigpts")\n-\t}\n-\telse{\n-\t\trown_width = unit(5, "bigpts")\n-\t}\n-\t\n-\tgp = list(fontsize = fontsize, ...)\n-\t# Legend position\n-\tif(!is.na(legend[1])){\n-\t\tlongest_break = which.max(nchar(names(legend)))\n-\t\tlongest_break = unit(1.1, "grobwidth", textGrob(as.character(names(legend))[longest_break], gp = do.call(gpar, gp)))\n-\t\ttitle_length = unit(1.1, "grobwidth", textGrob("Scale", gp = gpar(fontface = "bold", ...)))\n-\t\tlegend_width = unit(12, "bigpts") + longest_break * 1.2\n-\t\tlegend_width = max(title_length, legend_width)\n-\t}\n-\telse{\n-\t\tlegend_width = unit(0, "bigpts")\n-\t}\n-\t\n-\t# Set main title height\n-\tif(is.na(main)){\n-\t\tmain_height = unit(0, "npc")\n-\t}\n-\telse{\n-\t\tmain_height = unit(1.5, "grobheight", textGrob(main, gp = gpar(fontsize = 1.3 * fontsize, ...)))\n-\t}\n-\t\n-\t# Column annotations\n-\tif(!is.na(annotation[[1]][1])){\n-\t\t# Column annotation height \n-\t\tannot_height = unit(ncol(annotation) * (8 + 2) + 2, "bigpts")\n-\t\t# Width of the correponding legend\n-\t\tlongest_ann = which.max(nchar(as.matrix(annotation)))\n-\t\tannot_legend_width = unit(1.2, "grobwidth", textGrob(as.matrix(annotation)[longest_ann], gp = gpar(...))) + unit(12, "bigpts")\n-\t\tif(!annotation_legend){\n-\t\t\tannot_legend_width = unit(0, "npc")\n-\t\t}\n-\t}\n-\telse{\n-\t\tannot_height = unit(0, "bigpts")\n-\t\tannot_legend_width = unit(0, "bigpts")\n-\t}\n-\t\n-\t# Tree height\n-\ttreeheight_col = unit(treeheight_col, "bigpts") + unit(5, "bigpts")\n-\ttreeheight_row = unit(treeheight_row, "bigpts") + unit(5, "bigpts") \n-\t\n-\t# Set cell sizes\n-\tif(is.na(cellwidth)){\n-\t\tmatwidth = unit(1, "npc") - rown_width - legend_width - treeheight_row - annot_legend_width\n-\t}\n-\telse{\n-\t\tmatwidth = unit(cellwidth * ncol, "bigpts")\n-\t}\n-\t\n-\tif(is.na(cellheight)){\n-\t\tmatheight = unit(1, "npc") - main_height - coln_height - treeheight_col - annot_height\n-\t}\n-\telse{\n-\t\tmatheight = unit(cellheight * nrow, "bigpts")\n-\t}\t\n-\t\n-\t\n-\t# Produce layout()\n-\tpushViewport(viewport(layout = grid.layout(nrow = 5, ncol = 5, widths = unit.c(treeheight_row, matwidth, rown_width, legend_width, annot_legend_width), heights = unit.c(main_height, treeheight_col, annot_height, matheight, coln_height)), gp = do.call(gpar, gp)))\n-\t\n-\t# Get cell dimensions\n-\tpushViewport(vplayout(4, 2))\n-\tcellwidth = convertWidth(unit(0:1, "npc"), "bigpts", valueOnly = T)[2] / ncol\n-\tcellheight = convertHeight(unit(0:1, "npc"), "bigpts", valueOnly = T)[2] / nrow\n-\tupViewport()\n-\t\n-\t# Return minimal cell dimension in bigpts to decide if borders are drawn\n-\tmindim = min(cellwidth, cellheight) \n-\treturn(mindim)\n-}\n-\n-draw_dendrogram = function(hc, horizontal = T){\n-\th = hc$height / max(hc$height) / 1.05\n-\tm = hc$merge\n-\to = hc$order\n-\tn = length(o)\n-\n-\tm[m > 0] = n + m[m > 0] \n-\tm[m < 0] = abs(m[m < 0])\n-\n-\tdist = matrix(0, nrow = 2 * n - 1, ncol = 2, dimnames = list(NULL, c("x", "y"))) \n-\tdist[1:n, 1] = 1 / n / 2 + (1 / n) * (match(1:n, o) - 1)\n-\n-\tfor(i in 1:nrow(m)){\n-\t\tdist[n + i, 1] = (dist[m[i, 1], 1] + dist[m[i, 2], 1]) / 2\n-\t\tdist[n + i, 2] = h[i]\n-\t}\n-\t\n-\tdraw_connection = function(x1, x2, y1, y2, y){\n-\t\tgrid.lines(x = c(x1, x1), y = c(y1, y))\n-\t\tgrid.lines(x = c(x2, x2), y = c(y2, y))\n-\t\tgrid.lines(x = c(x1, x2), y = c(y, y))\n-\t}\n-\t\n-\tif(horizontal){\n-'..b'ing_distance_cols = dcols)\n-#\'\n-#\' @export\n-pheatmap_j = function(mat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100), kmeans_k = NA, breaks = NA, border_color = "grey60", border_width = 1, cellwidth = NA, cellheight = NA, scale = "none", cluster_rows = TRUE, cluster_cols = TRUE, clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean", clustering_method = "complete", treeheight_row = ifelse(cluster_rows, 50, 0), treeheight_col = ifelse(cluster_cols, 50, 0), legend = TRUE, legend_breaks = NA, legend_labels = NA, annotation = NA, annotation_colors = NA, annotation_legend = TRUE, drop_levels = TRUE, show_rownames = T, show_colnames = T, main = NA, fontsize = 10, fontsize_row = fontsize, fontsize_col = fontsize, display_numbers = F, number_format = "%.2f", fontsize_number = 0.8 * fontsize, filename = NA, width = NA, height = NA, ...){\n-\t\n-\t# Preprocess matrix\n-\tmat = as.matrix(mat)\n-\tif(scale != "none"){\n-\t\tmat = scale_mat(mat, scale)\n-\t\tif(is.na(breaks)){\n-\t\t\tbreaks = generate_breaks(mat, length(color), center = T)\n-\t\t}\n-\t}\n-\t\n-\t\n-\t# Kmeans\n-\tif(!is.na(kmeans_k)){\n-\t\t# Cluster data\n-\t\tkm = kmeans(mat, kmeans_k, iter.max = 100)\n-\t\tmat = km$centers\n-\n-\t\t# Compose rownames\n-\t\tt = table(km$cluster)\n-\t\trownames(mat) = sprintf("cl%s_size_%d", names(t), t)\n-\t}\n-\telse{\n-\t\tkm = NA\n-\t}\n-\t\n-\t# Do clustering\n-\tif(cluster_rows){\n-\t\ttree_row = cluster_mat(mat, distance = clustering_distance_rows, method = clustering_method)\n-\t\tmat = mat[tree_row$order, , drop = FALSE]\n-\t}\n-\telse{\n-\t\ttree_row = NA\n-\t\ttreeheight_row = 0\n-\t}\n-\t\n-\tif(cluster_cols){\n-\t\ttree_col = cluster_mat(t(mat), distance = clustering_distance_cols, method = clustering_method)\n-\t\tmat = mat[, tree_col$order, drop = FALSE]\n-\t}\n-\telse{\n-\t\ttree_col = NA\n-\t\ttreeheight_col = 0\n-\t}\n-\t\n-\t# Format numbers to be displayed in cells \n-\tif(display_numbers){\n-\t\tfmat = matrix(sprintf(number_format, mat), nrow = nrow(mat), ncol = ncol(mat))\n-\t\tattr(fmat, "draw") = TRUE\n-\t}\n-\telse{\n-\t\tfmat = matrix(NA, nrow = nrow(mat), ncol = ncol(mat))\n-\t\tattr(fmat, "draw") = FALSE\n-\t}\n-\t\n-\t\n-\t# Colors and scales\n-\tif(!is.na(legend_breaks[1]) & !is.na(legend_labels[1])){\n-\t\tif(length(legend_breaks) != length(legend_labels)){\n-\t\t\tstop("Lengths of legend_breaks and legend_labels must be the same")\n-\t\t}\n-\t}\n-\t\n-\t\n-\tif(is.na(breaks[1])){\n- breaks = generate_breaks(as.vector(mat), length(color))\n- }\n- if (legend & is.na(legend_breaks[1])) {\n- legend = grid.pretty(range(as.vector(breaks)))\n-\t\t\tnames(legend) = legend\n- }\n-\telse if(legend & !is.na(legend_breaks[1])){\n-\t\tlegend = legend_breaks[legend_breaks >= min(breaks) & legend_breaks <= max(breaks)]\n-\t\t\n-\t\tif(!is.na(legend_labels[1])){\n-\t\t\tlegend_labels = legend_labels[legend_breaks >= min(breaks) & legend_breaks <= max(breaks)]\n-\t\t\tnames(legend) = legend_labels\n-\t\t}\n-\t\telse{\n-\t\t\tnames(legend) = legend\n-\t\t}\n-\t}\n- else {\n- legend = NA\n- }\n-\tmat = scale_colours(mat, col = color, breaks = breaks)\n-\t\n-\t# Preparing annotation colors\n-\tif(!is.na(annotation[[1]][1])){\n-\t\tannotation = annotation[colnames(mat), , drop = F]\n-\t\tannotation_colors = generate_annotation_colours(annotation, annotation_colors, drop = drop_levels)\n-\t}\n-\t\n-\tif(!show_rownames){\n-\t\trownames(mat) = NULL\n-\t}\n-\t\n-\tif(!show_colnames){\n-\t\tcolnames(mat) = NULL\n-\t}\n-\t\n-\t# Draw heatmap\n-\theatmap_motor(mat, border_color = border_color, border_width = border_width, cellwidth = cellwidth, cellheight = cellheight, treeheight_col = treeheight_col, treeheight_row = treeheight_row, tree_col = tree_col, tree_row = tree_row, filename = filename, width = width, height = height, breaks = breaks, color = color, legend = legend, annotation = annotation, annotation_colors = annotation_colors, annotation_legend = annotation_legend, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number = fontsize_number, ...)\n-\t\n-\tinvisible(list(tree_row = tree_row, tree_col = tree_col, kmeans = km))\n-}\n-\n-\n' |
b |
diff -r 744905986eb9 -r 59e57834c366 toolshed_version/Dotplot_Release.tar.gz |
b |
Binary file toolshed_version/Dotplot_Release.tar.gz has changed |
b |
diff -r 744905986eb9 -r 59e57834c366 toolshed_version/GNU Free Documentation License.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolshed_version/GNU Free Documentation License.txt Tue Mar 15 15:30:44 2016 -0400 |
b |
b'@@ -0,0 +1,451 @@\n+\r\n+ GNU Free Documentation License\r\n+ Version 1.3, 3 November 2008\r\n+\r\n+\r\n+ Copyright (C) 2000, 2001, 2002, 2007, 2008 Free Software Foundation, Inc.\r\n+ <http://fsf.org/>\r\n+ Everyone is permitted to copy and distribute verbatim copies\r\n+ of this license document, but changing it is not allowed.\r\n+\r\n+0. PREAMBLE\r\n+\r\n+The purpose of this License is to make a manual, textbook, or other\r\n+functional and useful document "free" in the sense of freedom: to\r\n+assure everyone the effective freedom to copy and redistribute it,\r\n+with or without modifying it, either commercially or noncommercially.\r\n+Secondarily, this License preserves for the author and publisher a way\r\n+to get credit for their work, while not being considered responsible\r\n+for modifications made by others.\r\n+\r\n+This License is a kind of "copyleft", which means that derivative\r\n+works of the document must themselves be free in the same sense. It\r\n+complements the GNU General Public License, which is a copyleft\r\n+license designed for free software.\r\n+\r\n+We have designed this License in order to use it for manuals for free\r\n+software, because free software needs free documentation: a free\r\n+program should come with manuals providing the same freedoms that the\r\n+software does. But this License is not limited to software manuals;\r\n+it can be used for any textual work, regardless of subject matter or\r\n+whether it is published as a printed book. We recommend this License\r\n+principally for works whose purpose is instruction or reference.\r\n+\r\n+\r\n+1. APPLICABILITY AND DEFINITIONS\r\n+\r\n+This License applies to any manual or other work, in any medium, that\r\n+contains a notice placed by the copyright holder saying it can be\r\n+distributed under the terms of this License. Such a notice grants a\r\n+world-wide, royalty-free license, unlimited in duration, to use that\r\n+work under the conditions stated herein. The "Document", below,\r\n+refers to any such manual or work. Any member of the public is a\r\n+licensee, and is addressed as "you". You accept the license if you\r\n+copy, modify or distribute the work in a way requiring permission\r\n+under copyright law.\r\n+\r\n+A "Modified Version" of the Document means any work containing the\r\n+Document or a portion of it, either copied verbatim, or with\r\n+modifications and/or translated into another language.\r\n+\r\n+A "Secondary Section" is a named appendix or a front-matter section of\r\n+the Document that deals exclusively with the relationship of the\r\n+publishers or authors of the Document to the Document\'s overall\r\n+subject (or to related matters) and contains nothing that could fall\r\n+directly within that overall subject. (Thus, if the Document is in\r\n+part a textbook of mathematics, a Secondary Section may not explain\r\n+any mathematics.) The relationship could be a matter of historical\r\n+connection with the subject or with related matters, or of legal,\r\n+commercial, philosophical, ethical or political position regarding\r\n+them.\r\n+\r\n+The "Invariant Sections" are certain Secondary Sections whose titles\r\n+are designated, as being those of Invariant Sections, in the notice\r\n+that says that the Document is released under this License. If a\r\n+section does not fit the above definition of Secondary then it is not\r\n+allowed to be designated as Invariant. The Document may contain zero\r\n+Invariant Sections. If the Document does not identify any Invariant\r\n+Sections then there are none.\r\n+\r\n+The "Cover Texts" are certain short passages of text that are listed,\r\n+as Front-Cover Texts or Back-Cover Texts, in the notice that says that\r\n+the Document is released under this License. A Front-Cover Text may\r\n+be at most 5 words, and a Back-Cover Text may be at most 25 words.\r\n+\r\n+A "Transparent" copy of the Document means a machine-readable copy,\r\n+represented in a format whose specification is available to the\r\n+general public, that is suitable for revising the document\r\n+straightforwardly with generic '..b' copy of some or all of the same material does\r\n+not give you any rights to use it.\r\n+\r\n+\r\n+10. FUTURE REVISIONS OF THIS LICENSE\r\n+\r\n+The Free Software Foundation may publish new, revised versions of the\r\n+GNU Free Documentation License from time to time. Such new versions\r\n+will be similar in spirit to the present version, but may differ in\r\n+detail to address new problems or concerns. See\r\n+http://www.gnu.org/copyleft/.\r\n+\r\n+Each version of the License is given a distinguishing version number.\r\n+If the Document specifies that a particular numbered version of this\r\n+License "or any later version" applies to it, you have the option of\r\n+following the terms and conditions either of that specified version or\r\n+of any later version that has been published (not as a draft) by the\r\n+Free Software Foundation. If the Document does not specify a version\r\n+number of this License, you may choose any version ever published (not\r\n+as a draft) by the Free Software Foundation. If the Document\r\n+specifies that a proxy can decide which future versions of this\r\n+License can be used, that proxy\'s public statement of acceptance of a\r\n+version permanently authorizes you to choose that version for the\r\n+Document.\r\n+\r\n+11. RELICENSING\r\n+\r\n+"Massive Multiauthor Collaboration Site" (or "MMC Site") means any\r\n+World Wide Web server that publishes copyrightable works and also\r\n+provides prominent facilities for anybody to edit those works. A\r\n+public wiki that anybody can edit is an example of such a server. A\r\n+"Massive Multiauthor Collaboration" (or "MMC") contained in the site\r\n+means any set of copyrightable works thus published on the MMC site.\r\n+\r\n+"CC-BY-SA" means the Creative Commons Attribution-Share Alike 3.0 \r\n+license published by Creative Commons Corporation, a not-for-profit \r\n+corporation with a principal place of business in San Francisco, \r\n+California, as well as future copyleft versions of that license \r\n+published by that same organization.\r\n+\r\n+"Incorporate" means to publish or republish a Document, in whole or in \r\n+part, as part of another Document.\r\n+\r\n+An MMC is "eligible for relicensing" if it is licensed under this \r\n+License, and if all works that were first published under this License \r\n+somewhere other than this MMC, and subsequently incorporated in whole or \r\n+in part into the MMC, (1) had no cover texts or invariant sections, and \r\n+(2) were thus incorporated prior to November 1, 2008.\r\n+\r\n+The operator of an MMC Site may republish an MMC contained in the site\r\n+under CC-BY-SA on the same site at any time before August 1, 2009,\r\n+provided the MMC is eligible for relicensing.\r\n+\r\n+\r\n+ADDENDUM: How to use this License for your documents\r\n+\r\n+To use this License in a document you have written, include a copy of\r\n+the License in the document and put the following copyright and\r\n+license notices just after the title page:\r\n+\r\n+ Copyright (c) YEAR YOUR NAME.\r\n+ Permission is granted to copy, distribute and/or modify this document\r\n+ under the terms of the GNU Free Documentation License, Version 1.3\r\n+ or any later version published by the Free Software Foundation;\r\n+ with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts.\r\n+ A copy of the license is included in the section entitled "GNU\r\n+ Free Documentation License".\r\n+\r\n+If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,\r\n+replace the "with...Texts." line with this:\r\n+\r\n+ with the Invariant Sections being LIST THEIR TITLES, with the\r\n+ Front-Cover Texts being LIST, and with the Back-Cover Texts being LIST.\r\n+\r\n+If you have Invariant Sections without Cover Texts, or some other\r\n+combination of the three, merge those two alternatives to suit the\r\n+situation.\r\n+\r\n+If your document contains nontrivial examples of program code, we\r\n+recommend releasing these examples in parallel under your choice of\r\n+free software license, such as the GNU General Public License,\r\n+to permit their use in free software.\r\n' |
b |
diff -r 744905986eb9 -r 59e57834c366 toolshed_version/ProHits_DotPlot_Generator.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolshed_version/ProHits_DotPlot_Generator.py Tue Mar 15 15:30:44 2016 -0400 |
[ |
@@ -0,0 +1,56 @@ +####################################################################################### +# Python-code: Dotplot Runner +# Author: Adam L Borne +# Contributers: Paul A Stewart, Brent Kuenzi +####################################################################################### +# This script runs the dotplot program found at http://prohitstools.mshri.on.ca/. +####################################################################################### +# Copyright (C) Adam Borne. +# Permission is granted to copy, distribute and/or modify this document +# under the terms of the GNU Free Documentation License, Version 1.3 +# or any later version published by the Free Software Foundation; +# with no Invariant Sections, no Front-Cover Texts, and no Back-Cover Texts. +# A copy of the license is included in the section entitled "GNU +# Free Documentation License". +####################################################################################### +## REQUIRED INPUT ## + +# 1) list_file: SaintExpress output file. +# 2) FDR1: Primary false discovery rate. (default = 0.01) +# 3) FDR2: Secondary false discovery rate. (default = 0.025) +# 4) spec_max: Maximum spectral count. (default = 50) +####################################################################################### +import os +import os +import sys + + +FDR1 = sys.argv[2] +FDR2 = sys.argv[3] +spec_max = sys.argv[4] +os.rename(sys.argv[1], "saint_input.txt") +ins_path = sys.argv[9] + +dirs_list = [] +for (dirpath, dirnames, filename) in os.walk(str(ins_path)): + dirs_list.extend(dirnames) + break +if r"Dotplot_Release" in dirs_list: + pass +else: + cmd = r"tar -xvf /Dotplot_Release.tar.gz " + str(ins_path) + "/Dotplot_Release.tar.gz" + os.system(cmd) + + +cmd = (str(ins_path) + r"Dotplot_Release/dotplot.bash -f saint_input.txt" + r" -c b -s " + str(FDR1) + + r" -t " + str(FDR2) + " -m " + str(spec_max)) +os.system(cmd) + +cmd1 = r"cp -a ./Output_saint_input/. ." +os.system(cmd1) + +os.rename("saint_input.txt", str(sys.argv[1])) +os.rename('dotplot.pdf', str(sys.argv[5])) +os.rename('bait2bait.pdf', str(sys.argv[6])) +os.rename('estimated.pdf', str(sys.argv[7])) +os.rename('stats.pdf', str(sys.argv[8])) \ No newline at end of file |
b |
diff -r 744905986eb9 -r 59e57834c366 toolshed_version/ProHits_DotPlot_Generator.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolshed_version/ProHits_DotPlot_Generator.xml Tue Mar 15 15:30:44 2016 -0400 |
b |
@@ -0,0 +1,31 @@ +<tool id="ProHits_DotPlot_Generator" name="ProHits DotPlot Generator"> + <description></description> + <command interpreter="python">ProHits_DotPlot_Generator.py $saint_file $FDR1 $FDR2 $spec_max $dotplot $heatmap $estimated $stats</command> + <inputs> + <param format="txt" name="saint_file" type="data" label="SAINT Output"/> + <param type="float" name="FDR1" label="False Discovery Rate 1" value="0.01"/> + <param type="float" name="FDR2" label="False Discovery Rate 2" value="0.025"/> + <param type="integer" name="spec_max" label="Maximum spectral count" value="50"/> + </inputs> + <outputs> + <data format="pdf" name="dotplot" label="DotPlot" /> + <data format="pdf" name="heatmap" label="Heatmap" /> + <data format="pdf" name="estimated" label="Estimated" /> + <data format="pdf" name="stats" label="Stats" /> + </outputs> + <stdio> + <regex match="error" + source="stdout" + level="fatal" + description="Unknown error"/> + </stdio> + + <tests> + <test> + <param name="input" value="fa_gc_content_input.fa"/> + <output name="out_file1" file="fa_gc_content_output.txt"/> + </test> + </tests> + <help> + </help> +</tool> \ No newline at end of file |
b |
diff -r 744905986eb9 -r 59e57834c366 toolshed_version/test_files.tar |
b |
Binary file toolshed_version/test_files.tar has changed |
b |
diff -r 744905986eb9 -r 59e57834c366 toolshed_version/test_files/SC_SAINT_list.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolshed_version/test_files/SC_SAINT_list.txt Tue Mar 15 15:30:44 2016 -0400 |
b |
b'@@ -0,0 +1,473 @@\n+Bait\tPrey\tPreyGene\tSpec\tSpecSum\tAvgSpec\tNumReplicates\tctrlCounts\tAvgP\tMaxP\tTopoAvgP\tTopoMaxP\tSaintScore\tlogOddsScore\tFoldChange\tBFDR\tboosted_by\n+WT_EGFR\tEGFR_HUMAN\tEGFR\t67|52|45|55\t219\t54.75\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t68.87\t547.50\t0.00\t\n+WT_EGFR\tERBB2_HUMAN\tERBB2\t4|5|3|5\t17\t4.25\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t6.28\t42.50\t0.00\t\n+WT_EGFR\tP85B_HUMAN\tPIK3R2\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tP55G_HUMAN\tPIK3R3\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tP85A_HUMAN\tPIK3R1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tK2C1_HUMAN\tKRT1\t3|4|6|5\t18\t4.50\t4\t10|10|7|7\t0.00\t0.00\t0.00\t0.00\t0.00\t-27.06\t0.53\t0.51\t\n+WT_EGFR\tK1C10_HUMAN\tKRT10\t2|0|3|3\t8\t2.00\t4\t1|2|4|3\t0.01\t0.02\t0.01\t0.02\t0.01\t-9.56\t0.80\t0.49\t\n+WT_EGFR\tK1C16_HUMAN\tKRT16\t0|0|2|1\t3\t0.75\t4\t1|1|2|2\t0.03\t0.12\t0.03\t0.12\t0.03\t-5.35\t0.50\t0.47\t\n+WT_EGFR\tGRB2_HUMAN\tGRB2\t2|0|2|2\t6\t1.50\t4\t0|0|0|0\t0.74\t0.99\t0.74\t0.99\t0.74\t-0.11\t15.00\t0.05\t\n+WT_EGFR\tGRP78_HUMAN\tHSPA5\t16|14|11|12\t53\t13.25\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t19.16\t132.50\t0.00\t\n+WT_EGFR\tK22E_HUMAN\tKRT2\t2|3|6|4\t15\t3.75\t4\t4|4|1|1\t0.15\t0.50\t0.15\t0.50\t0.15\t-5.61\t1.50\t0.44\t\n+WT_EGFR\tK2C8_HUMAN\tKRT8\t1|1|2|2\t6\t1.50\t4\t0|0|0|0\t0.49\t0.99\t0.49\t0.99\t0.49\t2.40\t15.00\t0.19\t\n+WT_EGFR\tK2C5_HUMAN\tKRT5\t1|1|3|2\t7\t1.75\t4\t1|1|2|0\t0.29\t0.74\t0.29\t0.74\t0.29\t-1.74\t1.75\t0.26\t\n+WT_EGFR\tK2C4_HUMAN\tKRT4\t0|0|0|0\t0\t0.00\t4\t0|1|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.65\t0.00\t0.51\t\n+WT_EGFR\tK2C6B_HUMAN\tKRT6B\t1|2|4|4\t11\t2.75\t4\t2|2|2|1\t0.25\t0.47\t0.25\t0.47\t0.25\t-4.44\t1.57\t0.42\t\n+WT_EGFR\tK2C7_HUMAN\tKRT7\t0|0|0|2\t2\t0.50\t4\t0|0|0|0\t0.25\t0.99\t0.25\t0.99\t0.25\t-0.11\t5.00\t0.28\t\n+WT_EGFR\tHSP7C_HUMAN\tHSPA8\t15|10|12|11\t48\t12.00\t4\t0|0|1|1\t1.00\t1.00\t1.00\t1.00\t1.00\t12.83\t24.00\t0.00\t\n+WT_EGFR\tPK3CA_HUMAN\tPIK3CA\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tERBB3_HUMAN\tERBB3\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tADT2_HUMAN\tSLC25A5\t5|3|5|5\t18\t4.50\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t6.28\t45.00\t0.00\t\n+WT_EGFR\tADT3_HUMAN\tSLC25A6\t5|4|5|5\t19\t4.75\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t8.02\t47.50\t0.00\t\n+WT_EGFR\tK1C9_HUMAN\tKRT9\t1|1|1|0\t3\t0.75\t4\t0|0|1|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.65\t3.00\t0.51\t\n+WT_EGFR\tTBA1A_HUMAN\tTUBA1A\t5|3|5|3\t16\t4.00\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t6.28\t40.00\t0.00\t\n+WT_EGFR\tTBA4A_HUMAN\tTUBA4A\t3|2|3|1\t9\t2.25\t4\t0|0|0|0\t0.75\t1.00\t0.75\t1.00\t0.75\t2.40\t22.50\t0.02\t\n+WT_EGFR\tARHG5_HUMAN\tARHGEF5\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tPK3CB_HUMAN\tPIK3CB\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tIRS1_HUMAN\tIRS1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tTBB5_HUMAN\tTUBB\t8|6|5|2\t21\t5.25\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t4.43\t52.50\t0.00\t\n+WT_EGFR\tTBB3_HUMAN\tTUBB3\t2|2|3|1\t8\t2.00\t4\t0|0|0|0\t0.74\t1.00\t0.74\t1.00\t0.74\t2.40\t20.00\t0.05\t\n+WT_EGFR\tHS90A_HUMAN\tHSP90AA1\t3|3|1|2\t9\t2.25\t4\t0|0|0|0\t0.75\t1.00\t0.75\t1.00\t0.75\t2.40\t22.50\t0.02\t\n+WT_EGFR\tHS90B_HUMAN\tHSP90AB1\t4|2|1|2\t9\t2.25\t4\t0|0|0|0\t0.74\t1.00\t0.74\t1.00\t0.74\t2.40\t22.50\t0.04\t\n+WT_EGFR\t1433E_HUMAN\tYWHAE\t1|0|0|0\t1\t0.25\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t2.50\t0.51\t\n+WT_EGFR\t1433T_HUMAN\tYWHAQ\t1|0|0|0\t1\t0.25\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t2.50\t0.51\t\n+WT_EGFR\tLPPRC_HUMAN\tLRPPRC\t1|3|3|3\t10\t2.50\t4\t0|0|0|0\t0.75\t1.00\t0.75\t1.00\t0.75\t2.40\t25.00\t0.00\t\n+WT_EGFR\tERRFI_HUMAN\tERRFI1\t1|2|1|1\t5\t1.25\t4\t0|0|0|0\t0.25\t0.99\t0.25\t0.99\t0.25\t2.40\t12.50\t0.28\t\n+WT_EGFR\tAP2A1_HUMAN\tAP2A1\t1|1|1|1\t4\t1.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t2.40\t10.00\t0.51\t\n+WT_EGFR\tGRB7_HUMAN\tGRB7\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tSTAT3_HUMAN\tSTAT3\t4|5|3|3\t15\t3.75\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t6.28\t37.50\t0.00\t\n+WT_EGFR\tANXA2_HUMAN\tANXA2\t3|4|0|1\t8\t2.00\t4\t0|0|0|0\t0.50\t1.00\t0.50\t1.00\t0.50\t-0.11\t20.00\t0.10\t\n+WT_EGFR\tPTPRA_HUMAN\tPTPRA\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+WT_EGFR\tC1QBP_HUMAN\tC1QBP\t0|0|0|0\t0\t0.00\t4\t7|'..b'\t0|0|1|1\t0.00\t0.00\t0.00\t0.00\t0.00\t-1.55\t1.00\t0.51\t\n+ER_P85B\tPK3CA_HUMAN\tPIK3CA\t3|3|6|6\t18\t4.50\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t6.28\t45.00\t0.00\t\n+ER_P85B\tERBB3_HUMAN\tERBB3\t1|0|2|1\t4\t1.00\t4\t0|0|0|0\t0.25\t0.99\t0.25\t0.99\t0.25\t-0.11\t10.00\t0.28\t\n+ER_P85B\tADT2_HUMAN\tSLC25A5\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tADT3_HUMAN\tSLC25A6\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tK1C9_HUMAN\tKRT9\t1|0|1|0\t2\t0.50\t4\t0|0|1|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.65\t2.00\t0.51\t\n+ER_P85B\tTBA1A_HUMAN\tTUBA1A\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tTBA4A_HUMAN\tTUBA4A\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tARHG5_HUMAN\tARHGEF5\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tPK3CB_HUMAN\tPIK3CB\t1|1|4|2\t8\t2.00\t4\t0|0|0|0\t0.50\t1.00\t0.50\t1.00\t0.50\t2.40\t20.00\t0.14\t\n+ER_P85B\tIRS1_HUMAN\tIRS1\t2|3|3|4\t12\t3.00\t4\t0|0|0|0\t1.00\t1.00\t1.00\t1.00\t1.00\t4.43\t30.00\t0.00\t\n+ER_P85B\tTBB5_HUMAN\tTUBB\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tTBB3_HUMAN\tTUBB3\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tHS90A_HUMAN\tHSP90AA1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tHS90B_HUMAN\tHSP90AB1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\t1433E_HUMAN\tYWHAE\t0|0|1|1\t2\t0.50\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t5.00\t0.51\t\n+ER_P85B\t1433T_HUMAN\tYWHAQ\t0|0|1|1\t2\t0.50\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t5.00\t0.51\t\n+ER_P85B\tLPPRC_HUMAN\tLRPPRC\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tERRFI_HUMAN\tERRFI1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tAP2A1_HUMAN\tAP2A1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tGRB7_HUMAN\tGRB7\t1|1|1|0\t3\t0.75\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t7.50\t0.51\t\n+ER_P85B\tSTAT3_HUMAN\tSTAT3\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tANXA2_HUMAN\tANXA2\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tPTPRA_HUMAN\tPTPRA\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tC1QBP_HUMAN\tC1QBP\t0|0|0|0\t0\t0.00\t4\t7|5|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-11.03\t0.00\t0.51\t\n+ER_P85B\tSHC1_HUMAN\tSHC1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tPTN11_HUMAN\tPTPN11\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tPON2_HUMAN\tPON2\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tMET_HUMAN\tMET\t0|0|1|1\t2\t0.50\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t5.00\t0.51\t\n+ER_P85B\tCRKL_HUMAN\tCRKL\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tCBL_HUMAN\tCBL\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tPHLA2_HUMAN\tPHLDA2\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tRT27_HUMAN\tMRPS27\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tM2OM_HUMAN\tSLC25A11\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tSTAT1_HUMAN\tSTAT1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tSLIRP_HUMAN\tSLIRP\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tPK3CD_HUMAN\tPIK3CD\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tGRP75_HUMAN\tHSPA9\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tAIFM1_HUMAN\tAIFM1\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tPHB_HUMAN\tPHB\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tHNRPK_HUMAN\tHNRNPK\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tTMM33_HUMAN\tTMEM33\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n+ER_P85B\tRT34_HUMAN\tMRPS34\t0|0|0|0\t0\t0.00\t4\t0|0|0|0\t0.00\t0.00\t0.00\t0.00\t0.00\t-0.11\t0.00\t0.51\t\n' |
b |
diff -r 744905986eb9 -r 59e57834c366 toolshed_version/tool_dependencies.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/toolshed_version/tool_dependencies.xml Tue Mar 15 15:30:44 2016 -0400 |
b |
@@ -0,0 +1,6 @@ +<?xml version="1.0"?> +<tool_dependency> + <set_environment version="1.0"> + <environment_variable name="INSTALL_RUN_PATH" action="set_to">$REPOSITORY_INSTALL_DIR</environment_variable> + </set_environment> +</tool_dependency> \ No newline at end of file |