# HG changeset patch # User bornea # Date 1461705363 14400 # Node ID 63008bdf576e9cb96af88fbeaa3364818dc2786c # Parent 761e1ad2b1303a7ce46dbc4b61d4a9f631ca57f4 Uploaded diff -r 761e1ad2b130 -r 63008bdf576e pre_process_protein_name_set.R --- a/pre_process_protein_name_set.R Tue Apr 26 16:21:13 2016 -0400 +++ b/pre_process_protein_name_set.R Tue Apr 26 17:16:03 2016 -0400 @@ -80,8 +80,10 @@ mapped_protein_uniprotonly = str_extract(peptides_txt_mapped_log2$Uniprot,"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") mapped_protein_uniprot_accession = str_extract(peptides_txt_mapped_log2$Uniprot,"[OPQ][0-9][A-Z0-9]{3}[0-9](-[0-9]+)?|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}(-[0-9]+)?|[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") peptides_txt_mapped_log2$mapped_protein = mapped_protein_uniprotonly + names_db = str_extract(swissprot_fasta,"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") + names_db = names_db[!is.na(names_db)] # Runs the Tukey function returning completed table. - peptides_txt_mapped_log2 = subset(peptides_txt_mapped_log2,mapped_protein %in% swissprot_fasta) + peptides_txt_mapped_log2 = subset(peptides_txt_mapped_log2,mapped_protein %in% names_db) if (nrow(peptides_txt_mapped_log2) == 0) { print("Uniprot Database does not have any of the proteins in the peptides file") quit()