Mercurial > repos > pmac > reformatplatestabulartolinear
comparison reformatPlatesTabularToLinear.pl @ 0:3ddf2607e3a2 draft default tip
Uploaded
| author | pmac |
|---|---|
| date | Wed, 01 Jun 2016 03:57:23 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3ddf2607e3a2 |
|---|---|
| 1 ############################################################################### | |
| 2 # This script converts plate data from tabular to linear format. | |
| 3 # | |
| 4 # Args: | |
| 5 # input file: | |
| 6 # a text file containing a set of tabular data in vertical layout in either 384/96 well format, | |
| 7 # typically generated from synergy or cellomics software. | |
| 8 # | |
| 9 # Returns: | |
| 10 # For each input file, a linear version of tabular data is returned with | |
| 11 # "Well" column inserted as first column and "Table_<count> for all subsequent | |
| 12 # columns where <count> is ordinal number of tables in file. | |
| 13 # | |
| 14 # Author: jason ellul | |
| 15 ############################################################################### | |
| 16 | |
| 17 use strict; | |
| 18 use warnings; | |
| 19 use IO::Handle; | |
| 20 use File::Temp qw/ tempfile tempdir /; | |
| 21 my $tdir = tempdir( CLEANUP => 0 ); | |
| 22 | |
| 23 # check to make sure having correct input and output files | |
| 24 my $usage = "usage: reformatPlatesTabularToLinear.pl [TABULAR.in] [TABULAR.out] \n"; | |
| 25 die $usage unless @ARGV == 2; | |
| 26 | |
| 27 #get the input arguments | |
| 28 my $tabularPlateTable = $ARGV[0]; | |
| 29 my $linearPlateTable = $ARGV[1]; | |
| 30 | |
| 31 #open the input files | |
| 32 open (INPUT, "<", $tabularPlateTable) || die("Could not open file $tabularPlateTable \n"); | |
| 33 open (OUTPUT1, ">", $linearPlateTable) || die("Could not open file $linearPlateTable \n"); | |
| 34 | |
| 35 #variable to store the name of the R script file | |
| 36 my $r_script; | |
| 37 | |
| 38 # R script to implement the calcualtion of q-values based on multiple simultaneous tests p-values | |
| 39 # construct an R script file and save it in a temp directory | |
| 40 #chdir $tdir; | |
| 41 $r_script = "reformatPlatesTabularToLinear.r"; | |
| 42 | |
| 43 open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n"; | |
| 44 print Rcmd " | |
| 45 #options(show.error.messages = FALSE); | |
| 46 | |
| 47 #read the plates table | |
| 48 tables <- scan(\"$tabularPlateTable\", sep=\"\\n\", what=\"character\", quiet = TRUE); | |
| 49 | |
| 50 # if there any lines which when all tabs/spaces are removed amounts to an empty line then remove this line | |
| 51 if(length(which(gsub(\"\\t|\\\\s\", \"\", tables) == \"\")) > 0) tables <- tables[-which(gsub(\"\\t|\\\\s\", \"\", tables) == \"\")]; | |
| 52 | |
| 53 # search for occurrences of the below column header line in the tables data. | |
| 54 colheads <- grep(\"^\\t 1 \\t 2 \\t 3 \\t 4 \\t 5 \\t 6 \\t 7 \\t 8 \\t 9 \\t 10 \\t 11 \\t 12 \\t 13 \\t 14 \\t 15 \\t 16 \\t 17 \\t 18 \\t 19 \\t 20 \\t 21 \\t 22 \\t 23 \\t 24 \", tables); | |
| 55 # if not found we assume the tables are 96-well | |
| 56 if(length(colheads) == 0) { | |
| 57 platetype <- 96; | |
| 58 colheads <- grep(\"^\\t 1 \\t 2 \\t 3 \\t 4 \\t 5 \\t 6 \\t 7 \\t 8 \\t 9 \\t 10 \\t 11 \\t 12 \", tables); | |
| 59 nc <- 12; | |
| 60 nr <- 8; | |
| 61 } else { | |
| 62 # else dealing with 384-well | |
| 63 platetype <- 384; | |
| 64 nc <- 24; | |
| 65 nr <- 16; | |
| 66 } | |
| 67 # set up the structure of the output matrix | |
| 68 linearized.data <- matrix(NA, nrow=platetype, ncol=length(colheads)+1); | |
| 69 | |
| 70 # generate the well column | |
| 71 well.name <- NULL; | |
| 72 for(i in LETTERS[1:nr]) { | |
| 73 for(j in c(\"01\", \"02\", \"03\", \"04\", \"05\", \"06\", \"07\", \"08\", \"09\", 10:nc)) { | |
| 74 well.name <- c(well.name , paste(i, j, sep=\"\")); | |
| 75 } | |
| 76 } | |
| 77 | |
| 78 # set up the column names for the output matrix | |
| 79 colnames(linearized.data) <- c(\"\\\\#Well\", paste(\"Table\", 1:length(colheads), sep=\"_\")); | |
| 80 linearized.data[, \"\\\\#Well\"] <- well.name; | |
| 81 colnames(linearized.data)[1] <- sub(\"^.\", \"\", colnames(linearized.data)[1]); | |
| 82 | |
| 83 for(i in 1:length(colheads)) { | |
| 84 for(j in 1:nr) { | |
| 85 # for each row of current table split the data by tab. | |
| 86 tab.row <- strsplit(tables[colheads[i]+j], \"\\t\"); | |
| 87 # assign the current row from the current table | |
| 88 # the min part of code takes account for table rows which may not have the full set of values expected | |
| 89 linearized.data[((j-1)*nc+1):((j*nc)-(nc-min(nc+1, length(tab.row[[1]]))+1)), i+1] <- tab.row[[1]][2:min(length(tab.row[[1]]),(nc+1))]; | |
| 90 } | |
| 91 } | |
| 92 linearized.data <- as.data.frame(linearized.data, stringsAsFactors=FALSE); | |
| 93 # ensure all columns excluding first one are numeric | |
| 94 #for(i in 2:ncol(linearized.data)) { | |
| 95 # linearized.data[,i] <- as.numeric(linearized.data[,i]); | |
| 96 #} | |
| 97 | |
| 98 #save the linear plate data | |
| 99 write.table(linearized.data, file=\"$linearPlateTable\", quote=F, sep=\"\\t\", row.names=F); | |
| 100 #eof\n"; | |
| 101 | |
| 102 close Rcmd; | |
| 103 | |
| 104 system("R --no-restore --no-save --no-readline < $r_script > $r_script.out"); | |
| 105 | |
| 106 #close the input and output files | |
| 107 close(OUTPUT1); | |
| 108 close(INPUT); |
