Mercurial > repos > bornea > saint_preproc
comparison pre_process_protein_name_set.R @ 21:9e0a894d2676 draft
Uploaded
author | bornea |
---|---|
date | Thu, 19 Nov 2015 13:38:20 -0500 |
parents | e21be0412789 |
children |
comparison
equal
deleted
inserted
replaced
20:e21be0412789 | 21:9e0a894d2676 |
---|---|
24 library(stringr) | 24 library(stringr) |
25 library(mygene) | 25 library(mygene) |
26 library(VennDiagram) | 26 library(VennDiagram) |
27 ##### | 27 ##### |
28 #data | 28 #data |
29 main <- function(peptides_file) { | 29 main <- function(peptides_file, db_path) { |
30 peptides_file = read.delim(peptides_file,header=TRUE,stringsAsFactors=FALSE,fill=TRUE) | 30 peptides_file = read.delim(peptides_file,header=TRUE,stringsAsFactors=FALSE,fill=TRUE) |
31 peptides_txt = peptides_file | 31 peptides_txt = peptides_file |
32 intensity_columns = names(peptides_txt[,str_detect(names(peptides_txt),"Intensity\\.*")]) #Pulls out all lines with Intensity in them. | 32 intensity_columns = names(peptides_txt[,str_detect(names(peptides_txt),"Intensity\\.*")]) #Pulls out all lines with Intensity in them. |
33 intensity_columns = intensity_columns[2:length(intensity_columns)] #Removes the first column that does not have a bait. | 33 intensity_columns = intensity_columns[2:length(intensity_columns)] #Removes the first column that does not have a bait. |
34 peptides_txt_mapped = as.data.frame(map_peptides_proteins(peptides_txt)) #This function as below sets every line to a 1 to 1 intensity to each possible protein. | 34 peptides_txt_mapped = as.data.frame(map_peptides_proteins(peptides_txt)) #This function as below sets every line to a 1 to 1 intensity to each possible protein. |
35 peptides_txt_mapped$Uniprot = str_extract(peptides_txt_mapped$mapped_protein, "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") #Pulls out just Uniprot id from the script. | 35 peptides_txt_mapped$Uniprot = str_extract(peptides_txt_mapped$mapped_protein, "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") #Pulls out just Uniprot id from the script. |
36 peptides_txt_mapped = subset(peptides_txt_mapped,!is.na(Uniprot)) #removes reverse sequences and any that didn't match a uniprot accession | 36 peptides_txt_mapped = subset(peptides_txt_mapped,!is.na(Uniprot)) #removes reverse sequences and any that didn't match a uniprot accession |
37 columns_comb = c("Uniprot", intensity_columns) | 37 columns_comb = c("Uniprot", intensity_columns) |
38 peptides_mapped_intensity = subset(peptides_txt_mapped, select = columns_comb) #Subsets out only the needed cloumns for Tukeys (Uniprot IDS and baited intensities) | 38 peptides_mapped_intensity = subset(peptides_txt_mapped, select = columns_comb) #Subsets out only the needed cloumns for Tukeys (Uniprot IDS and baited intensities) |
39 swissprot_fasta = scan("/home/philip/galaxy/tools/Moffitt_Tools/uniprot_names.txt",what="character") | 39 swissprot_fasta = scan(db_path, what="character") |
40 peptides_txt_mapped_log2 = peptides_mapped_intensity | 40 peptides_txt_mapped_log2 = peptides_mapped_intensity |
41 # Takes the log2 of the intensities. | 41 # Takes the log2 of the intensities. |
42 for (i in intensity_columns) { | 42 for (i in intensity_columns) { |
43 peptides_txt_mapped_log2[,i] = log2(subset(peptides_txt_mapped_log2, select = i)) | 43 peptides_txt_mapped_log2[,i] = log2(subset(peptides_txt_mapped_log2, select = i)) |
44 } | 44 } |
93 } | 93 } |
94 return(Tukeys_df) | 94 return(Tukeys_df) |
95 } | 95 } |
96 | 96 |
97 args <- commandArgs(trailingOnly = TRUE) | 97 args <- commandArgs(trailingOnly = TRUE) |
98 main(args[1]) | 98 main(args[1], args[2]) |