view promap/promap.R @ 0:ec1bb9a71b77 draft default tip

Uploaded
author jfb
date Thu, 19 Apr 2018 19:40:39 -0400
parents
children
line wrap: on
line source

# options(error = quote({
#   dump.frames(to.file=T, dumpto='last.dump')
#   load('last.dump.rda')
#   print(last.dump)
#   q()
# }))

#symdiff <- function( x, y) { setdiff( union(x, y), intersect(x, y))}

# options(error = quote({dump.frames(to.file=TRUE); q()}))

CoulambicOutput<-"CoulambicOutput.csv"
LJoutput<-"LJOutput.csv"

args <- commandArgs()
#print(args)

myinput <- gsub("--in=", "", args[grepl("--in=", args)])
input<-lapply(myinput, function(x){read.fwf(file = x, widths=c(4,5,4,6,6,4,5,6,12,12,12))})

#input<-list(read.fwf(file = "1-finedata.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12)),read.fwf(file = "2-finedata.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12)))

#print("hi")

#myinput <- gsub("--in=", "", args[grepl("--in=", args)])
#input<-list(read.fwf(file = "1-finedata.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12)),read.fwf(file = "2-finedata.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12)))
#input<-lapply(args[6:7], function(x){read.fwf(file = x, widths=c(4,5,4,6,6,4,5,6,12,12,12))})


if(1==0){
#theinputs<-lapply(input,function(x){as.data.frame(read.fwf(file=x,widths=c(4,5,4,6,6,4,5,6,12,12,12)))})


#promap org
# Finedata1<-read.fwf(file = "testytestytest.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12))
# Finedata2<-read.fwf(file = "testytestytest.txt", widths=c(4,5,4,6,6,4,5,6,12,12,12))
#View(Finedata1)
#these text files unfortunately since read at fixed width they include factors and blank spaces
#this code
#what this code does is first turn factors into characters, and then 
#trim the whitespace from those characters and add the characters back in.  So long as the data is always
#done this way, then it should work

#what is in the text file: the colomns are these
#AA1 AA# Atom Atom# AA2 AA2# Atom2 Atom2# Dist Coul LJ
#So first I trim off white space using twimws, after finding which of the columns contain characters as opposed to numbers,
#so I trimws from the character columns.  Then I put those trimmed columns back in.  I only care about the following columns because
#they are the only ones that get trimmed: column 1 (AA1), column 3 (atom 1) column 5 (AA2) column 7 (atom 2)
#I do this for every fine data file
}
CHANGE<-input[1]
CHANGE<-as.data.frame(CHANGE)
emptystarter<-matrix(nrow=nrow(CHANGE),ncol=ncol(CHANGE))

ListOfMine<-list(emptystarter,emptystarter)

#this for loop is simply to properly trim the whitespace out of the fixed width file, that is ALL IT DOES
for (y in 1:length(input)) {
  ForToBeRearranging<-input[y]
  ForToBeRearranging<-as.data.frame(ForToBeRearranging)
  j<-sapply(ForToBeRearranging, is.factor)
  ForToBeRearranging[j]<-lapply(ForToBeRearranging[j], as.character)
  sight <- sapply(ForToBeRearranging,is.character)
  #ForToBeRearranging <- cbind.data.frame(sapply(ForToBeRearranging[,sight],trimws,which="both"),ForToBeRearranging[,!sight])
  trimmed<-sapply(ForToBeRearranging[,sight],trimws,which="both")
  ForToBeRearranging[,1]<-trimmed[,1]
  ForToBeRearranging[,3]<-trimmed[,2]
  ForToBeRearranging[,5]<-trimmed[,3]
  ForToBeRearranging[,7]<-trimmed[,4]
  # if (exists("ListOfMine")){ListOfMine[y]<-ForToBeRearranging}
  # if (!exists("ListOfMine")){ListOfMine<-ForToBeRearranging}
  ListOfMine[y]<-list(ForToBeRearranging)
}

if (1==0){
# j<-sapply(Finedata1, is.factor)
# Finedata1[j]<-lapply(Finedata1[j], as.character)
# sight <- sapply(Finedata1,is.character)
# #Finedata1 <- cbind.data.frame(sapply(Finedata1[,sight],trimws,which="both"),Finedata1[,!sight])
# trimmed<-sapply(Finedata1[,sight],trimws,which="both")
# Finedata1[,1]<-trimmed[,1]
# Finedata1[,3]<-trimmed[,2]
# Finedata1[,5]<-trimmed[,3]
# Finedata1[,7]<-trimmed[,4]
# 
# 
# j<-sapply(Finedata2, is.factor)
# Finedata2[j]<-lapply(Finedata2[j], as.character)
# sight <- sapply(Finedata2,is.character)
# #Finedata2 <- cbind.data.frame(sapply(Finedata2[,sight],trimws,which="both"),Finedata2[,!sight])
# trimmed<-sapply(Finedata2[,sight],trimws,which="both")
# Finedata2[,1]<-trimmed[,1]
# Finedata2[,3]<-trimmed[,2]
# Finedata2[,5]<-trimmed[,3]
# Finedata2[,7]<-trimmed[,4]

####################################################################
#PAGE ONE this is the average page, output chain 1, chain 2, average dist, average colambic,
#2SD+, 2SD-, MOE, then the same for LJ values
####################################################################
#on and it's average between the two chains
# 
# Distances<-cbind(Finedata1[,9],Finedata2[,9])
# AverageDistance<-apply(Distances, 1, mean)
# 
# Coulambic<-cbind(Finedata1[,10],Finedata2[,10])
# AverageCoulombic<-apply(Coulambic, 1, mean)
# 
# LenJones<-cbind(Finedata1[11],Finedata2[11])
# AverageLJ<-apply(LenJones,1,mean)
# 
# numerology<-Finedata1[,6]
# 
# TotalEnergy<-AverageLJ+AverageCoulombic
# AllNumbers<-cbind.data.frame(numerology,TotalEnergy)
# 
# TrueNumbers<-AllNumbers[order(AllNumbers$numerology,decreasing = FALSE),]
# head(TrueNumbers)
# plot(TrueNumbers)
# TrueNumbers[TrueNumbers$numerology==7,]
#################################################################################
#SO WHAT DOES IT NEED TO DO NOW
#SEPARATE THE COUL FROM JL, THEN FIND IN THE NUMEROUS CHAINS, ARE THERE ANY WITH TEH SAME AA NAME, AA# AND ATOM ID (ID MEANING O, C, OH)
#################################################################################
#maybe I need to do all this for distance as well?
# 
# LJ1x<-Finedata1[11]
# LJ2x<-Finedata2[11]
# LJ1<-which(LJ1x<=(LJval))
# LJ2<-which(LJ2x<=(LJval))
# COUL1x<-Finedata1[10]
# COUL2x<-Finedata2[10]
# COUL1<-which(COUL1x<=Coulval)
# COUL2<-which(COUL2x<=Coulval)
# AA1namenumberID<-Finedata1[,c(1,2,3,5,6,7)]
# AA2namenumberID<-Finedata2[,c(1,2,3,5,6,7)]
# AA1values<-apply(AA1namenumberID,1,paste,collapse=" ")
# AA2values<-apply(AA1namenumberID,1,paste,collapse=" ")
# AA1valuesCOUL<-AA1values[COUL1]
# AA2valuesCOUL<-AA2values[COUL2]
# AA1valuesLJ<-AA1values[LJ1]
# AA2valuesLJ<-AA2values[LJ2]
}


#AA1 AA# Atom Atom# AA2 AA2# Atom2 Atom2# Dist Coul LJ
#take the amino acid names, amino acid numbers, and atoms and hold them in a vector
#take the LJ interactions from both files, find which of them are greater than a criteria value
#take the coulambic interactions from both files, find which of them are greater than a criteria value
LJs<-list()
Couls<-list()
AAIDs<-list()

LJval=-.1
Coulval=-.3
AALJs<-list()
AAcoul<-list()

combinedCoul<-c()
CombinedLJ<-c()

UnsharedLJData<-list()
UnsharedLJPositions<-list()

UnsharedCoulData<-list()
UnsharedCoulPositions<-list()

for (z in 1:length(ListOfMine)) {
  DamnThatDataIsFine<-ListOfMine[z]
  DamnThatDataIsFine<-as.data.frame(DamnThatDataIsFine)
  ThisLJ<-DamnThatDataIsFine[11]
  ThisCoul<-DamnThatDataIsFine[10]
  ThisLJ<-which(ThisLJ<=LJval)
  ThisCoul<-which(ThisCoul<=Coulval)
  
  #what this does is extract the data, take the LJs and Couls and see which ones are greater than a given value
  
  LJs[z]<-list(ThisLJ)
  Couls[z]<-list(ThisCoul)
  #save those which are greater, save their POSITIONS
  
  ThisAAID<-DamnThatDataIsFine[,c(1,2,3,5,6,7)]
  ThisAAID<-as.data.frame(ThisAAID)
  AA1values<-apply(ThisAAID,1,paste,collapse=" ")
  #take the IDs and paste them together so they can be comapred
  #paste the vectors together so they can be compared
  
  AAnewvalues<-AA1values

  AA1valuesCOUL<-AAnewvalues[ThisCoul]
  #find the amino acid names and numbers plus atom names for all the values that were greater than that coulambic value
  AA1valuesLJ<-AAnewvalues[ThisLJ]
  #do same as above for LJ
  
  AALJs[z]<-list(AA1valuesLJ)
  AAcoul[z]<-list(AA1valuesCOUL)
  if(z==1){
    previousLJ<-AA1valuesLJ
    previousCoul<-AA1valuesCOUL
  }
  if (z>1){
    combinedCoul<-intersect(AA1valuesCOUL,previousCoul)
    CombinedLJ<-intersect(AA1valuesLJ,previousLJ)
    
    previousCoul<-combinedCoul
    previousLJ<-CombinedLJ
  }
  #these z ifs create the intersection to find only which LJ and Coul vals were shared between the things, then I just need to extract the
  #positional values of those
  #SO THESE ARE THE FULL NAMES OF THE SHARED VALUES, NO THEIR POSITIONAL MATRIX VALUES
  if (z==length(ListOfMine)){
    for (w in 1:length(ListOfMine)) {
      #setdif(x,y) means "what does x have that y doesn't?"
      #in this loop I am taking the set difference of each input set, the difference between them and the union set created in the previous if
      ThisData<-ListOfMine[w]
      ThisData<-as.data.frame(ThisData)
      ComparableData<-ThisData[,c(1,2,3,5,6,7)]
      ComparableData<-apply(ComparableData,1,paste,collapse=" ")
      
      #so I think I'm going to do this the stupid but easy way.  I will perform the setdif function.  Then I will perform a function to see 
      #which positions are the different ones, then save those positions
      #currently this includes BOTH THOSE THAT WERE < THE VALUE AND THOSE THAT WERE >VALUE BUT UNSHARED
      TheseUnsharedCoul<-setdiff(ComparableData,combinedCoul)
      TheseUnsharedLJ<-setdiff(ComparableData,CombinedLJ)
      
      CoulUnsharedPositions<-match(TheseUnsharedCoul,ComparableData)
      LJUnsharedPositions<-match(TheseUnsharedLJ,ComparableData)
      
      UnsharedLJData[[w]]<-ThisData[LJUnsharedPositions,c(1,2,3,5,6,7,9,11)]
      UnsharedLJPositions[[w]]<-LJUnsharedPositions
      
      UnsharedCoulData[[w]]<-ThisData[CoulUnsharedPositions,c(1,2,3,5,6,7,9,10)]
      UnsharedCoulPositions[[w]]<-CoulUnsharedPositions
      #AA1 AA# Atom Atom# AA2 AA2# Atom2 Atom2# Dist Coul LJ
    }
  }
}


ListOfLJ<-matrix(data=NA, nrow=length(CombinedLJ),ncol=length(ListOfMine))
ListOfCouls<-matrix(data=NA, nrow=length(combinedCoul),ncol=length(ListOfMine))
#this is a list of LJ vectors, IE the number of coulambs for that significant LJ interaction
#so what I have done is create the vector which stores the shared, signif LJ and Coul values, now I need to extract the numbers from those values

for (b in 1:length(ListOfMine)) {
  DamnThatDataIsFine<-ListOfMine[b]
  DamnThatDataIsFine<-as.data.frame(DamnThatDataIsFine)
  
  ThisAAID<-DamnThatDataIsFine[,c(1,2,3,5,6,7)]
  ThisAAID<-as.data.frame(ThisAAID)
  AA1values<-apply(ThisAAID,1,paste,collapse=" ")
  
  LJforthisloop<-which(AA1values %in% CombinedLJ)
  #find the indexes of the shared LJ values
  
  LJEnergies<-DamnThatDataIsFine[LJforthisloop,11]
  #LJs are in the 11th column
  
  CoulambsForThisLoop<-which(AA1values %in% combinedCoul)
  CoulEnergies<-DamnThatDataIsFine[CoulambsForThisLoop,10]
  #same as above, Couls are in 10th column
  
  ListOfLJ[,b]<-LJEnergies
  ListOfCouls[,b]<-CoulEnergies
}

Coulmeans<-apply(ListOfCouls,1,mean)
LJmeans<-apply(ListOfLJ,1,mean)

CoulSD<-apply(ListOfCouls,1,sd)
LJSD<-apply(ListOfLJ, 1, sd)
#take the mean and SD

if(1==0){
# whichweresharedCOUL<-list()
# CoulNumbers<-list()
# whichweresharedLJ<-list()
# LJnumbers<-list()
# 
# 
# 
# for (a in 1:length(ListOfMine)) {
#   thislist<-ListOfMine[a]
#   thislist<-as.data.frame(thislist)
#   
#   #sharedcouls<-which(AAcoul[1] %in% CombinedCoul)
#   #sharedcouls<-match(AAcoul[1],CombinedCoul)
#   TheseCouls<-(AAcoul[a])
#   names(TheseCouls)<-Couls[a]
#   
#   sharedcouls<-TheseCouls[TheseCouls %in% CombinedCoul]
#   sharedcouls<-as.data.frame(sharedcouls)
#   CoulToTheTouch<-thislist[sharedcouls,10]
#   
#   sharedLJs<-which(AALJs %in% CombinedLJ)
#   LJtothetouch<-thislist[sharedLJs,11]
#   
#   
# }
  # 
  # Ctobemeaned<-cbind(Finedata2[whichwereshared2coul,10],Finedata1[whichwereshared1coul,10])
  # cmean<-apply(Ctobemeaned,1,mean)
  # csd<-apply(Ctobemeaned,1,sd)
  # 
  # LJtobemeaned<-cbind(Finedata2[whichwereshared2LJ,11],Finedata1[whichwereshared1LJ,11])
  # LJmean<-apply(LJtobemeaned,1,mean)
  # LJsd<-apply(LJtobemeaned,1,sd)
  #take the mean and SD of the values that were extracted from both input files
  
  OutputCoul<-cbind(Finedata1[whichwereshared1coul,c(1,2,3,5,6,7,10)],Finedata2[whichwereshared2coul,10],cmean,csd)
  OutputLJ<-cbind(Finedata1[whichwereshared1LJ,c(1,2,3,5,6,7,11)],Finedata2[whichwereshared2LJ,11],LJmean,LJsd)
  
  #create output
  
}

#THERE ARE TWO OUTPUTS, ONE FOR COUL AND ONE FOR LJ
#here I create the output files, they will have everything with the colnames as seen below

asdf<-1:10
OutputCoul<-data.frame(matrix(asdf,ncol = 10))
OutputLJ<-data.frame(matrix(asdf,ncol = 10))

colnames(OutputCoul)<-c("First Amino Acid", "First AA number", "First AA atom", "Second Amino Acid", "Second AA number", "Second AA atom",
                        "First Coulambic Value", "Second Coulambic Value", "Coulambic Mean", "Coulambic SD")
colnames(OutputLJ)<-c("First Amino Acid", "First AA number", "First AA atom", "Second Amino Acid", "Second AA number", "Second AA atom",
                      "First LJ Value", "Second LJ Value", "LJ mean", "LJ SD")

#so what I need to do is stringsplit the AA IDs, then add them to the table, then add the values, the means and the SDs, for both coul and LJ
for (e in 1:length(combinedCoul)) {
  myCOUL<-combinedCoul[e]
  myCOUL<-unlist(strsplit(myCOUL, split="\\s+"))
  outputhere<-c(myCOUL,ListOfCouls[e,],Coulmeans[e],CoulSD[e])
  OutputCoul<-rbind(OutputCoul,outputhere)
}
OutputCoul<-OutputCoul[2:nrow(OutputCoul),]

for (e in 1:length(CombinedLJ)) {
  myLJ<-CombinedLJ[e]
  myLJ<-unlist(strsplit(myLJ, split="\\s+"))
  outputhere<-c(myLJ,ListOfLJ[e,],LJmeans[e],LJSD[e])
  OutputLJ<-rbind(OutputLJ,outputhere)
}
OutputLJ<-OutputLJ[2:nrow(OutputLJ),]

write.table(x=OutputCoul, file=CoulambicOutput, sep=",",row.names = FALSE, col.names = TRUE)
write.table(x=OutputLJ, file=LJoutput, sep=",",row.names = FALSE, col.names = TRUE)




# UnsharedLJData<-list()
# UnsharedLJPositions<-list()
# 
# UnsharedCoulData<-list()
# UnsharedCoulPositions<-list()



name<-"UnsharedCoulambicData.csv"
name2<-"UnsharedLJData.csv"


Header<-rep(" ",times=8)
CoulHead<-c("First Amino Acid","First AA Number","First Atom Name","Second Amino Acid","Second AA Number","Second Atom Name","Distance","Coulambic Value")
LJHead<-c("First Amino Acid","First AA Number","First Atom Name","Second Amino Acid","Second AA Number","Second Atom Name","Distance","LJ Value")

CoulHead<-rbind(Header,CoulHead)
LJHead<-rbind(Header,LJHead)

write.table(x=CoulHead,file = name,row.names = FALSE,col.names = FALSE,append = TRUE,sep = ",")
write.table(x=LJHead,file = name2,row.names = FALSE,col.names = FALSE,append = TRUE,sep = ",")

for (g in 1:length(UnsharedCoulData)) {
  Seperator<-"Input Number "
  Seperator<-paste0(Seperator,g)
  
  write.table(x=Seperator,file = name,sep = ",",row.names = FALSE,col.names = FALSE,append = TRUE)
  write.table(x=Seperator,file = name2,sep = ",",row.names = FALSE,col.names = FALSE,append = TRUE)
  
  LJdata<-UnsharedLJData[g]
  LJdata<-as.data.frame(LJdata)
  
  CoulData<-UnsharedCoulData[g]
  CoulData<-as.data.frame(CoulData)
  
  write.table(x=CoulData,file = name,sep = ",",row.names = FALSE,col.names = FALSE,append = TRUE)
  write.table(x=LJdata,file = name2,sep = ",",row.names = FALSE,col.names = FALSE,append = TRUE)
}