saint_preproc: pre_process_protein_name

comparison pre_process_protein_name_set.R @ 21:9e0a894d2676 draft

Uploaded

author	bornea
date	Thu, 19 Nov 2015 13:38:20 -0500
parents	e21be0412789
children

comparison

equal deleted inserted replaced

-:e21be0412789
+:9e0a894d2676
 library(stringr)
 library(mygene)
 library(VennDiagram)
 #####
 #data
-main <- function(peptides_file) {
+main <- function(peptides_file, db_path) {
 	peptides_file = read.delim(peptides_file,header=TRUE,stringsAsFactors=FALSE,fill=TRUE)
 peptides_txt = peptides_file
 	intensity_columns = names(peptides_txt[,str_detect(names(peptides_txt),"Intensity\\.*")]) #Pulls out all lines with Intensity in them.
 	intensity_columns = intensity_columns[2:length(intensity_columns)] #Removes the first column that does not have a bait.
 	peptides_txt_mapped = as.data.frame(map_peptides_proteins(peptides_txt)) #This function as below sets every line to a 1 to 1 intensity to each possible protein.
 	peptides_txt_mapped$Uniprot = str_extract(peptides_txt_mapped$mapped_protein, "[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") #Pulls out just Uniprot id from the script.
 	peptides_txt_mapped = subset(peptides_txt_mapped,!is.na(Uniprot)) #removes reverse sequences and any that didn't match a uniprot accession
 	columns_comb = c("Uniprot", intensity_columns)
 	peptides_mapped_intensity = subset(peptides_txt_mapped, select = columns_comb) #Subsets out only the needed cloumns for Tukeys (Uniprot IDS and baited intensities)
-	swissprot_fasta = scan("/home/philip/galaxy/tools/Moffitt_Tools/uniprot_names.txt",what="character")
+	swissprot_fasta = scan(db_path, what="character")
 	peptides_txt_mapped_log2 = peptides_mapped_intensity
 # Takes the log2 of the intensities.
 	for (i in intensity_columns) {
 		peptides_txt_mapped_log2[,i] = log2(subset(peptides_txt_mapped_log2, select = i))
 	}
 }
 return(Tukeys_df)
 }
 args <- commandArgs(trailingOnly = TRUE)
-main(args[1])
+main(args[1], args[2])

Mercurial > repos > bornea > saint_preproc

comparison pre_process_protein_name_set.R @ 21:9e0a894d2676 draft