Mercurial > repos > davidvanzessen > shm_csr
comparison check_unique_id.r @ 56:ee807645b224 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 17 Jul 2017 10:44:40 -0400 |
| parents | |
| children | cb779a45537b |
comparison
equal
deleted
inserted
replaced
| 55:6cd12c71c3d3 | 56:ee807645b224 |
|---|---|
| 1 args <- commandArgs(trailingOnly = TRUE) #first argument must be the summary file so it can grab the | |
| 2 | |
| 3 current_file = args[1] | |
| 4 | |
| 5 current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F) | |
| 6 | |
| 7 if(!("Sequence number" %in% names(current))){ | |
| 8 stop("First argument doesn't contain the 'Sequence number' column") | |
| 9 } | |
| 10 | |
| 11 tbl = table(current$Sequence.ID) | |
| 12 l_tbl = length(tbl) | |
| 13 check = any(tbl > 1) | |
| 14 | |
| 15 #if(l_tbl != nrow(current)){ # non unique IDs? | |
| 16 if(check){ | |
| 17 print("Sequence.ID is not unique for every sequence, adding sequence number to IDs") | |
| 18 for(i in 1:length(args)){ | |
| 19 current_file = args[i] | |
| 20 print(paste("Appending 'Sequence number' column to 'Sequence ID' column in", current_file)) | |
| 21 current = read.table(current_file, header=T, sep="\t", fill=T, stringsAsFactors=F, quote="", check.names=F) | |
| 22 current[,"Sequence ID"] = paste(current[,"Sequence ID"], current[,"Sequence number"], sep="_") | |
| 23 write.table(x = current, file = current_file, quote = F, sep = "\t", na = "", row.names = F, col.names = T) | |
| 24 } | |
| 25 } |
