comparison pre_process_protein_name_set.R @ 21:9e0a894d2676 draft

Uploaded
author bornea
date Thu, 19 Nov 2015 13:38:20 -0500
parents e21be0412789
children
comparison
equal deleted inserted replaced
20:e21be0412789 21:9e0a894d2676
24 library(stringr) 24 library(stringr)
25 library(mygene) 25 library(mygene)
26 library(VennDiagram) 26 library(VennDiagram)
27 ##### 27 #####
28 #data 28 #data
29 main <- function(peptides_file) { 29 main <- function(peptides_file, db_path) {
30 peptides_file = read.delim(peptides_file,header=TRUE,stringsAsFactors=FALSE,fill=TRUE) 30 peptides_file = read.delim(peptides_file,header=TRUE,stringsAsFactors=FALSE,fill=TRUE)
31 peptides_txt = peptides_file 31 peptides_txt = peptides_file
32 intensity_columns = names(peptides_txt[,str_detect(names(peptides_txt),"Intensity\\.*")]) #Pulls out all lines with Intensity in them. 32 intensity_columns = names(peptides_txt[,str_detect(names(peptides_txt),"Intensity\\.*")]) #Pulls out all lines with Intensity in them.
33 intensity_columns = intensity_columns[2:length(intensity_columns)] #Removes the first column that does not have a bait. 33 intensity_columns = intensity_columns[2:length(intensity_columns)] #Removes the first column that does not have a bait.
34 peptides_txt_mapped = as.data.frame(map_peptides_proteins(peptides_txt)) #This function as below sets every line to a 1 to 1 intensity to each possible protein. 34 peptides_txt_mapped = as.data.frame(map_peptides_proteins(peptides_txt)) #This function as below sets every line to a 1 to 1 intensity to each possible protein.
35 peptides_txt_mapped$Uniprot = str_extract(peptides_txt_mapped$mapped_protein, "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") #Pulls out just Uniprot id from the script. 35 peptides_txt_mapped$Uniprot = str_extract(peptides_txt_mapped$mapped_protein, "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") #Pulls out just Uniprot id from the script.
36 peptides_txt_mapped = subset(peptides_txt_mapped,!is.na(Uniprot)) #removes reverse sequences and any that didn't match a uniprot accession 36 peptides_txt_mapped = subset(peptides_txt_mapped,!is.na(Uniprot)) #removes reverse sequences and any that didn't match a uniprot accession
37 columns_comb = c("Uniprot", intensity_columns) 37 columns_comb = c("Uniprot", intensity_columns)
38 peptides_mapped_intensity = subset(peptides_txt_mapped, select = columns_comb) #Subsets out only the needed cloumns for Tukeys (Uniprot IDS and baited intensities) 38 peptides_mapped_intensity = subset(peptides_txt_mapped, select = columns_comb) #Subsets out only the needed cloumns for Tukeys (Uniprot IDS and baited intensities)
39 swissprot_fasta = scan("/home/philip/galaxy/tools/Moffitt_Tools/uniprot_names.txt",what="character") 39 swissprot_fasta = scan(db_path, what="character")
40 peptides_txt_mapped_log2 = peptides_mapped_intensity 40 peptides_txt_mapped_log2 = peptides_mapped_intensity
41 # Takes the log2 of the intensities. 41 # Takes the log2 of the intensities.
42 for (i in intensity_columns) { 42 for (i in intensity_columns) {
43 peptides_txt_mapped_log2[,i] = log2(subset(peptides_txt_mapped_log2, select = i)) 43 peptides_txt_mapped_log2[,i] = log2(subset(peptides_txt_mapped_log2, select = i))
44 } 44 }
93 } 93 }
94 return(Tukeys_df) 94 return(Tukeys_df)
95 } 95 }
96 96
97 args <- commandArgs(trailingOnly = TRUE) 97 args <- commandArgs(trailingOnly = TRUE)
98 main(args[1]) 98 main(args[1], args[2])