Mercurial > repos > jfb > promap
view promap/promap.R @ 0:ec1bb9a71b77 draft default tip
Uploaded
author | jfb |
---|---|
date | Thu, 19 Apr 2018 19:40:39 -0400 |
parents | |
children |
line wrap: on
line source
# options(error = quote({ # dump.frames(to.file=T, dumpto='last.dump') # load('last.dump.rda') # print(last.dump) # q() # })) #symdiff <- function( x, y) { setdiff( union(x, y), intersect(x, y))} # options(error = quote({dump.frames(to.file=TRUE); q()})) CoulambicOutput<-"CoulambicOutput.csv" LJoutput<-"LJOutput.csv" args <- commandArgs() #print(args) myinput <- gsub("--in=", "", args[grepl("--in=", args)]) input<-lapply(myinput, function(x){read.fwf(file = x, widths=c(4,5,4,6,6,4,5,6,12,12,12))}) #input<-list(read.fwf(file = "1-finedata.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12)),read.fwf(file = "2-finedata.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12))) #print("hi") #myinput <- gsub("--in=", "", args[grepl("--in=", args)]) #input<-list(read.fwf(file = "1-finedata.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12)),read.fwf(file = "2-finedata.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12))) #input<-lapply(args[6:7], function(x){read.fwf(file = x, widths=c(4,5,4,6,6,4,5,6,12,12,12))}) if(1==0){ #theinputs<-lapply(input,function(x){as.data.frame(read.fwf(file=x,widths=c(4,5,4,6,6,4,5,6,12,12,12)))}) #promap org # Finedata1<-read.fwf(file = "testytestytest.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12)) # Finedata2<-read.fwf(file = "testytestytest.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12)) #View(Finedata1) #these text files unfortunately since read at fixed width they include factors and blank spaces #this code #what this code does is first turn factors into characters, and then #trim the whitespace from those characters and add the characters back in. So long as the data is always #done this way, then it should work #what is in the text file: the colomns are these #AA1 AA# Atom Atom# AA2 AA2# Atom2 Atom2# Dist Coul LJ #So first I trim off white space using twimws, after finding which of the columns contain characters as opposed to numbers, #so I trimws from the character columns. Then I put those trimmed columns back in. I only care about the following columns because #they are the only ones that get trimmed: column 1 (AA1), column 3 (atom 1) column 5 (AA2) column 7 (atom 2) #I do this for every fine data file } CHANGE<-input[1] CHANGE<-as.data.frame(CHANGE) emptystarter<-matrix(nrow=nrow(CHANGE),ncol=ncol(CHANGE)) ListOfMine<-list(emptystarter,emptystarter) #this for loop is simply to properly trim the whitespace out of the fixed width file, that is ALL IT DOES for (y in 1:length(input)) { ForToBeRearranging<-input[y] ForToBeRearranging<-as.data.frame(ForToBeRearranging) j<-sapply(ForToBeRearranging, is.factor) ForToBeRearranging[j]<-lapply(ForToBeRearranging[j], as.character) sight <- sapply(ForToBeRearranging,is.character) #ForToBeRearranging <- cbind.data.frame(sapply(ForToBeRearranging[,sight],trimws,which="both"),ForToBeRearranging[,!sight]) trimmed<-sapply(ForToBeRearranging[,sight],trimws,which="both") ForToBeRearranging[,1]<-trimmed[,1] ForToBeRearranging[,3]<-trimmed[,2] ForToBeRearranging[,5]<-trimmed[,3] ForToBeRearranging[,7]<-trimmed[,4] # if (exists("ListOfMine")){ListOfMine[y]<-ForToBeRearranging} # if (!exists("ListOfMine")){ListOfMine<-ForToBeRearranging} ListOfMine[y]<-list(ForToBeRearranging) } if (1==0){ # j<-sapply(Finedata1, is.factor) # Finedata1[j]<-lapply(Finedata1[j], as.character) # sight <- sapply(Finedata1,is.character) # #Finedata1 <- cbind.data.frame(sapply(Finedata1[,sight],trimws,which="both"),Finedata1[,!sight]) # trimmed<-sapply(Finedata1[,sight],trimws,which="both") # Finedata1[,1]<-trimmed[,1] # Finedata1[,3]<-trimmed[,2] # Finedata1[,5]<-trimmed[,3] # Finedata1[,7]<-trimmed[,4] # # # j<-sapply(Finedata2, is.factor) # Finedata2[j]<-lapply(Finedata2[j], as.character) # sight <- sapply(Finedata2,is.character) # #Finedata2 <- cbind.data.frame(sapply(Finedata2[,sight],trimws,which="both"),Finedata2[,!sight]) # trimmed<-sapply(Finedata2[,sight],trimws,which="both") # Finedata2[,1]<-trimmed[,1] # Finedata2[,3]<-trimmed[,2] # Finedata2[,5]<-trimmed[,3] # Finedata2[,7]<-trimmed[,4] #################################################################### #PAGE ONE this is the average page, output chain 1, chain 2, average dist, average colambic, #2SD+, 2SD-, MOE, then the same for LJ values #################################################################### #on and it's average between the two chains # # Distances<-cbind(Finedata1[,9],Finedata2[,9]) # AverageDistance<-apply(Distances, 1, mean) # # Coulambic<-cbind(Finedata1[,10],Finedata2[,10]) # AverageCoulombic<-apply(Coulambic, 1, mean) # # LenJones<-cbind(Finedata1[11],Finedata2[11]) # AverageLJ<-apply(LenJones,1,mean) # # numerology<-Finedata1[,6] # # TotalEnergy<-AverageLJ+AverageCoulombic # AllNumbers<-cbind.data.frame(numerology,TotalEnergy) # # TrueNumbers<-AllNumbers[order(AllNumbers$numerology,decreasing = FALSE),] # head(TrueNumbers) # plot(TrueNumbers) # TrueNumbers[TrueNumbers$numerology==7,] ################################################################################# #SO WHAT DOES IT NEED TO DO NOW #SEPARATE THE COUL FROM JL, THEN FIND IN THE NUMEROUS CHAINS, ARE THERE ANY WITH TEH SAME AA NAME, AA# AND ATOM ID (ID MEANING O, C, OH) ################################################################################# #maybe I need to do all this for distance as well? # # LJ1x<-Finedata1[11] # LJ2x<-Finedata2[11] # LJ1<-which(LJ1x<=(LJval)) # LJ2<-which(LJ2x<=(LJval)) # COUL1x<-Finedata1[10] # COUL2x<-Finedata2[10] # COUL1<-which(COUL1x<=Coulval) # COUL2<-which(COUL2x<=Coulval) # AA1namenumberID<-Finedata1[,c(1,2,3,5,6,7)] # AA2namenumberID<-Finedata2[,c(1,2,3,5,6,7)] # AA1values<-apply(AA1namenumberID,1,paste,collapse=" ") # AA2values<-apply(AA1namenumberID,1,paste,collapse=" ") # AA1valuesCOUL<-AA1values[COUL1] # AA2valuesCOUL<-AA2values[COUL2] # AA1valuesLJ<-AA1values[LJ1] # AA2valuesLJ<-AA2values[LJ2] } #AA1 AA# Atom Atom# AA2 AA2# Atom2 Atom2# Dist Coul LJ #take the amino acid names, amino acid numbers, and atoms and hold them in a vector #take the LJ interactions from both files, find which of them are greater than a criteria value #take the coulambic interactions from both files, find which of them are greater than a criteria value LJs<-list() Couls<-list() AAIDs<-list() LJval=-.1 Coulval=-.3 AALJs<-list() AAcoul<-list() combinedCoul<-c() CombinedLJ<-c() UnsharedLJData<-list() UnsharedLJPositions<-list() UnsharedCoulData<-list() UnsharedCoulPositions<-list() for (z in 1:length(ListOfMine)) { DamnThatDataIsFine<-ListOfMine[z] DamnThatDataIsFine<-as.data.frame(DamnThatDataIsFine) ThisLJ<-DamnThatDataIsFine[11] ThisCoul<-DamnThatDataIsFine[10] ThisLJ<-which(ThisLJ<=LJval) ThisCoul<-which(ThisCoul<=Coulval) #what this does is extract the data, take the LJs and Couls and see which ones are greater than a given value LJs[z]<-list(ThisLJ) Couls[z]<-list(ThisCoul) #save those which are greater, save their POSITIONS ThisAAID<-DamnThatDataIsFine[,c(1,2,3,5,6,7)] ThisAAID<-as.data.frame(ThisAAID) AA1values<-apply(ThisAAID,1,paste,collapse=" ") #take the IDs and paste them together so they can be comapred #paste the vectors together so they can be compared AAnewvalues<-AA1values AA1valuesCOUL<-AAnewvalues[ThisCoul] #find the amino acid names and numbers plus atom names for all the values that were greater than that coulambic value AA1valuesLJ<-AAnewvalues[ThisLJ] #do same as above for LJ AALJs[z]<-list(AA1valuesLJ) AAcoul[z]<-list(AA1valuesCOUL) if(z==1){ previousLJ<-AA1valuesLJ previousCoul<-AA1valuesCOUL } if (z>1){ combinedCoul<-intersect(AA1valuesCOUL,previousCoul) CombinedLJ<-intersect(AA1valuesLJ,previousLJ) previousCoul<-combinedCoul previousLJ<-CombinedLJ } #these z ifs create the intersection to find only which LJ and Coul vals were shared between the things, then I just need to extract the #positional values of those #SO THESE ARE THE FULL NAMES OF THE SHARED VALUES, NO THEIR POSITIONAL MATRIX VALUES if (z==length(ListOfMine)){ for (w in 1:length(ListOfMine)) { #setdif(x,y) means "what does x have that y doesn't?" #in this loop I am taking the set difference of each input set, the difference between them and the union set created in the previous if ThisData<-ListOfMine[w] ThisData<-as.data.frame(ThisData) ComparableData<-ThisData[,c(1,2,3,5,6,7)] ComparableData<-apply(ComparableData,1,paste,collapse=" ") #so I think I'm going to do this the stupid but easy way. I will perform the setdif function. Then I will perform a function to see #which positions are the different ones, then save those positions #currently this includes BOTH THOSE THAT WERE < THE VALUE AND THOSE THAT WERE >VALUE BUT UNSHARED TheseUnsharedCoul<-setdiff(ComparableData,combinedCoul) TheseUnsharedLJ<-setdiff(ComparableData,CombinedLJ) CoulUnsharedPositions<-match(TheseUnsharedCoul,ComparableData) LJUnsharedPositions<-match(TheseUnsharedLJ,ComparableData) UnsharedLJData[[w]]<-ThisData[LJUnsharedPositions,c(1,2,3,5,6,7,9,11)] UnsharedLJPositions[[w]]<-LJUnsharedPositions UnsharedCoulData[[w]]<-ThisData[CoulUnsharedPositions,c(1,2,3,5,6,7,9,10)] UnsharedCoulPositions[[w]]<-CoulUnsharedPositions #AA1 AA# Atom Atom# AA2 AA2# Atom2 Atom2# Dist Coul LJ } } } ListOfLJ<-matrix(data=NA, nrow=length(CombinedLJ),ncol=length(ListOfMine)) ListOfCouls<-matrix(data=NA, nrow=length(combinedCoul),ncol=length(ListOfMine)) #this is a list of LJ vectors, IE the number of coulambs for that significant LJ interaction #so what I have done is create the vector which stores the shared, signif LJ and Coul values, now I need to extract the numbers from those values for (b in 1:length(ListOfMine)) { DamnThatDataIsFine<-ListOfMine[b] DamnThatDataIsFine<-as.data.frame(DamnThatDataIsFine) ThisAAID<-DamnThatDataIsFine[,c(1,2,3,5,6,7)] ThisAAID<-as.data.frame(ThisAAID) AA1values<-apply(ThisAAID,1,paste,collapse=" ") LJforthisloop<-which(AA1values %in% CombinedLJ) #find the indexes of the shared LJ values LJEnergies<-DamnThatDataIsFine[LJforthisloop,11] #LJs are in the 11th column CoulambsForThisLoop<-which(AA1values %in% combinedCoul) CoulEnergies<-DamnThatDataIsFine[CoulambsForThisLoop,10] #same as above, Couls are in 10th column ListOfLJ[,b]<-LJEnergies ListOfCouls[,b]<-CoulEnergies } Coulmeans<-apply(ListOfCouls,1,mean) LJmeans<-apply(ListOfLJ,1,mean) CoulSD<-apply(ListOfCouls,1,sd) LJSD<-apply(ListOfLJ, 1, sd) #take the mean and SD if(1==0){ # whichweresharedCOUL<-list() # CoulNumbers<-list() # whichweresharedLJ<-list() # LJnumbers<-list() # # # # for (a in 1:length(ListOfMine)) { # thislist<-ListOfMine[a] # thislist<-as.data.frame(thislist) # # #sharedcouls<-which(AAcoul[1] %in% CombinedCoul) # #sharedcouls<-match(AAcoul[1],CombinedCoul) # TheseCouls<-(AAcoul[a]) # names(TheseCouls)<-Couls[a] # # sharedcouls<-TheseCouls[TheseCouls %in% CombinedCoul] # sharedcouls<-as.data.frame(sharedcouls) # CoulToTheTouch<-thislist[sharedcouls,10] # # sharedLJs<-which(AALJs %in% CombinedLJ) # LJtothetouch<-thislist[sharedLJs,11] # # # } # # Ctobemeaned<-cbind(Finedata2[whichwereshared2coul,10],Finedata1[whichwereshared1coul,10]) # cmean<-apply(Ctobemeaned,1,mean) # csd<-apply(Ctobemeaned,1,sd) # # LJtobemeaned<-cbind(Finedata2[whichwereshared2LJ,11],Finedata1[whichwereshared1LJ,11]) # LJmean<-apply(LJtobemeaned,1,mean) # LJsd<-apply(LJtobemeaned,1,sd) #take the mean and SD of the values that were extracted from both input files OutputCoul<-cbind(Finedata1[whichwereshared1coul,c(1,2,3,5,6,7,10)],Finedata2[whichwereshared2coul,10],cmean,csd) OutputLJ<-cbind(Finedata1[whichwereshared1LJ,c(1,2,3,5,6,7,11)],Finedata2[whichwereshared2LJ,11],LJmean,LJsd) #create output } #THERE ARE TWO OUTPUTS, ONE FOR COUL AND ONE FOR LJ #here I create the output files, they will have everything with the colnames as seen below asdf<-1:10 OutputCoul<-data.frame(matrix(asdf,ncol = 10)) OutputLJ<-data.frame(matrix(asdf,ncol = 10)) colnames(OutputCoul)<-c("First Amino Acid", "First AA number", "First AA atom", "Second Amino Acid", "Second AA number", "Second AA atom", "First Coulambic Value", "Second Coulambic Value", "Coulambic Mean", "Coulambic SD") colnames(OutputLJ)<-c("First Amino Acid", "First AA number", "First AA atom", "Second Amino Acid", "Second AA number", "Second AA atom", "First LJ Value", "Second LJ Value", "LJ mean", "LJ SD") #so what I need to do is stringsplit the AA IDs, then add them to the table, then add the values, the means and the SDs, for both coul and LJ for (e in 1:length(combinedCoul)) { myCOUL<-combinedCoul[e] myCOUL<-unlist(strsplit(myCOUL, split="\\s+")) outputhere<-c(myCOUL,ListOfCouls[e,],Coulmeans[e],CoulSD[e]) OutputCoul<-rbind(OutputCoul,outputhere) } OutputCoul<-OutputCoul[2:nrow(OutputCoul),] for (e in 1:length(CombinedLJ)) { myLJ<-CombinedLJ[e] myLJ<-unlist(strsplit(myLJ, split="\\s+")) outputhere<-c(myLJ,ListOfLJ[e,],LJmeans[e],LJSD[e]) OutputLJ<-rbind(OutputLJ,outputhere) } OutputLJ<-OutputLJ[2:nrow(OutputLJ),] write.table(x=OutputCoul, file=CoulambicOutput, sep=",",row.names = FALSE, col.names = TRUE) write.table(x=OutputLJ, file=LJoutput, sep=",",row.names = FALSE, col.names = TRUE) # UnsharedLJData<-list() # UnsharedLJPositions<-list() # # UnsharedCoulData<-list() # UnsharedCoulPositions<-list() name<-"UnsharedCoulambicData.csv" name2<-"UnsharedLJData.csv" Header<-rep(" ",times=8) CoulHead<-c("First Amino Acid","First AA Number","First Atom Name","Second Amino Acid","Second AA Number","Second Atom Name","Distance","Coulambic Value") LJHead<-c("First Amino Acid","First AA Number","First Atom Name","Second Amino Acid","Second AA Number","Second Atom Name","Distance","LJ Value") CoulHead<-rbind(Header,CoulHead) LJHead<-rbind(Header,LJHead) write.table(x=CoulHead,file = name,row.names = FALSE,col.names = FALSE,append = TRUE,sep = ",") write.table(x=LJHead,file = name2,row.names = FALSE,col.names = FALSE,append = TRUE,sep = ",") for (g in 1:length(UnsharedCoulData)) { Seperator<-"Input Number " Seperator<-paste0(Seperator,g) write.table(x=Seperator,file = name,sep = ",",row.names = FALSE,col.names = FALSE,append = TRUE) write.table(x=Seperator,file = name2,sep = ",",row.names = FALSE,col.names = FALSE,append = TRUE) LJdata<-UnsharedLJData[g] LJdata<-as.data.frame(LJdata) CoulData<-UnsharedCoulData[g] CoulData<-as.data.frame(CoulData) write.table(x=CoulData,file = name,sep = ",",row.names = FALSE,col.names = FALSE,append = TRUE) write.table(x=LJdata,file = name2,sep = ",",row.names = FALSE,col.names = FALSE,append = TRUE) }