annotate reformatPlatesTabularToLinear.pl @ 0:3ddf2607e3a2 draft default tip

Uploaded
author pmac
date Wed, 01 Jun 2016 03:57:23 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
1 ###############################################################################
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
2 # This script converts plate data from tabular to linear format.
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
3 #
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
4 # Args:
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
5 # input file:
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
6 # a text file containing a set of tabular data in vertical layout in either 384/96 well format,
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
7 # typically generated from synergy or cellomics software.
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
8 #
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
9 # Returns:
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
10 # For each input file, a linear version of tabular data is returned with
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
11 # "Well" column inserted as first column and "Table_<count> for all subsequent
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
12 # columns where <count> is ordinal number of tables in file.
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
13 #
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
14 # Author: jason ellul
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
15 ###############################################################################
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
16
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
17 use strict;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
18 use warnings;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
19 use IO::Handle;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
20 use File::Temp qw/ tempfile tempdir /;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
21 my $tdir = tempdir( CLEANUP => 0 );
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
22
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
23 # check to make sure having correct input and output files
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
24 my $usage = "usage: reformatPlatesTabularToLinear.pl [TABULAR.in] [TABULAR.out] \n";
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
25 die $usage unless @ARGV == 2;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
26
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
27 #get the input arguments
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
28 my $tabularPlateTable = $ARGV[0];
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
29 my $linearPlateTable = $ARGV[1];
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
30
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
31 #open the input files
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
32 open (INPUT, "<", $tabularPlateTable) || die("Could not open file $tabularPlateTable \n");
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
33 open (OUTPUT1, ">", $linearPlateTable) || die("Could not open file $linearPlateTable \n");
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
34
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
35 #variable to store the name of the R script file
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
36 my $r_script;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
37
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
38 # R script to implement the calcualtion of q-values based on multiple simultaneous tests p-values
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
39 # construct an R script file and save it in a temp directory
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
40 #chdir $tdir;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
41 $r_script = "reformatPlatesTabularToLinear.r";
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
42
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
43 open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n";
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
44 print Rcmd "
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
45 #options(show.error.messages = FALSE);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
46
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
47 #read the plates table
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
48 tables <- scan(\"$tabularPlateTable\", sep=\"\\n\", what=\"character\", quiet = TRUE);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
49
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
50 # if there any lines which when all tabs/spaces are removed amounts to an empty line then remove this line
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
51 if(length(which(gsub(\"\\t|\\\\s\", \"\", tables) == \"\")) > 0) tables <- tables[-which(gsub(\"\\t|\\\\s\", \"\", tables) == \"\")];
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
52
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
53 # search for occurrences of the below column header line in the tables data.
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
54 colheads <- grep(\"^\\t 1 \\t 2 \\t 3 \\t 4 \\t 5 \\t 6 \\t 7 \\t 8 \\t 9 \\t 10 \\t 11 \\t 12 \\t 13 \\t 14 \\t 15 \\t 16 \\t 17 \\t 18 \\t 19 \\t 20 \\t 21 \\t 22 \\t 23 \\t 24 \", tables);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
55 # if not found we assume the tables are 96-well
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
56 if(length(colheads) == 0) {
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
57 platetype <- 96;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
58 colheads <- grep(\"^\\t 1 \\t 2 \\t 3 \\t 4 \\t 5 \\t 6 \\t 7 \\t 8 \\t 9 \\t 10 \\t 11 \\t 12 \", tables);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
59 nc <- 12;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
60 nr <- 8;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
61 } else {
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
62 # else dealing with 384-well
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
63 platetype <- 384;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
64 nc <- 24;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
65 nr <- 16;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
66 }
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
67 # set up the structure of the output matrix
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
68 linearized.data <- matrix(NA, nrow=platetype, ncol=length(colheads)+1);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
69
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
70 # generate the well column
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
71 well.name <- NULL;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
72 for(i in LETTERS[1:nr]) {
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
73 for(j in c(\"01\", \"02\", \"03\", \"04\", \"05\", \"06\", \"07\", \"08\", \"09\", 10:nc)) {
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
74 well.name <- c(well.name , paste(i, j, sep=\"\"));
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
75 }
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
76 }
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
77
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
78 # set up the column names for the output matrix
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
79 colnames(linearized.data) <- c(\"\\\\#Well\", paste(\"Table\", 1:length(colheads), sep=\"_\"));
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
80 linearized.data[, \"\\\\#Well\"] <- well.name;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
81 colnames(linearized.data)[1] <- sub(\"^.\", \"\", colnames(linearized.data)[1]);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
82
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
83 for(i in 1:length(colheads)) {
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
84 for(j in 1:nr) {
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
85 # for each row of current table split the data by tab.
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
86 tab.row <- strsplit(tables[colheads[i]+j], \"\\t\");
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
87 # assign the current row from the current table
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
88 # the min part of code takes account for table rows which may not have the full set of values expected
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
89 linearized.data[((j-1)*nc+1):((j*nc)-(nc-min(nc+1, length(tab.row[[1]]))+1)), i+1] <- tab.row[[1]][2:min(length(tab.row[[1]]),(nc+1))];
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
90 }
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
91 }
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
92 linearized.data <- as.data.frame(linearized.data, stringsAsFactors=FALSE);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
93 # ensure all columns excluding first one are numeric
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
94 #for(i in 2:ncol(linearized.data)) {
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
95 # linearized.data[,i] <- as.numeric(linearized.data[,i]);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
96 #}
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
97
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
98 #save the linear plate data
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
99 write.table(linearized.data, file=\"$linearPlateTable\", quote=F, sep=\"\\t\", row.names=F);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
100 #eof\n";
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
101
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
102 close Rcmd;
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
103
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
104 system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
105
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
106 #close the input and output files
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
107 close(OUTPUT1);
3ddf2607e3a2 Uploaded
pmac
parents:
diff changeset
108 close(INPUT);