# HG changeset patch # User davidvanzessen # Date 1483013145 18000 # Node ID ca2512e1e3ab1dbf153622e4abb6719a1f42e166 # Parent a24f8c93583a59e55e7579fb262c517270eca446 Uploaded diff -r a24f8c93583a -r ca2512e1e3ab merge_and_filter.r --- a/merge_and_filter.r Thu Dec 22 09:39:27 2016 -0500 +++ b/merge_and_filter.r Thu Dec 29 07:05:45 2016 -0500 @@ -47,8 +47,8 @@ filtering.steps[,2] = as.character(filtering.steps[,2]) #filtering.steps[,3] = as.numeric(filtering.steps[,3]) -print("summary files columns") -print(names(summ)) +#print("summary files columns") +#print(names(summ)) summ = merge(summ, gene_identification, by="Sequence.ID") @@ -171,7 +171,6 @@ if(filter.unique != "no"){ clmns = names(result) - if(empty.region.filter == "leader"){ result$unique.def = paste(result$FR1.IMGT.seq, result$CDR1.IMGT.seq, result$FR2.IMGT.seq, result$CDR2.IMGT.seq, result$FR3.IMGT.seq, result$CDR3.IMGT.seq) } else if(empty.region.filter == "FR1"){ @@ -185,6 +184,7 @@ if(filter.unique == "remove"){ result = result[duplicated(result$unique.def) | duplicated(result$unique.def, fromLast=T),] } + result$unique.def = paste(result$unique.def, gsub(",.*", "", result$best_match)) #keep the unique sequences that are in multiple classes, gsub so the unmatched don't have a class after it result = result[!duplicated(result$unique.def),] @@ -194,16 +194,21 @@ filtering.steps = rbind(filtering.steps, c("After filter unique sequences", nrow(result))) +print(paste("Number of sequences in result after unique filtering:", nrow(result))) + if(nrow(summ) == 0){ stop("No data remaining after filter") } result$best_match_class = gsub(",.*", "", result$best_match) #gsub so the unmatched don't have a class after it -result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":")) +#result$past = "" +#cls = unlist(strsplit(unique.type, ",")) +#for (i in 1:nrow(result)){ +# result[i,"past"] = paste(result[i,cls], collapse=":") +#} - - +result$past = do.call(paste, c(result[unlist(strsplit(unique.type, ","))], sep = ":")) result.matched = result[!grepl("unmatched", result$best_match),] result.unmatched = result[grepl("unmatched", result$best_match),] diff -r a24f8c93583a -r ca2512e1e3ab shm_csr.py --- a/shm_csr.py Thu Dec 22 09:39:27 2016 -0500 +++ b/shm_csr.py Thu Dec 29 07:05:45 2016 -0500 @@ -38,7 +38,7 @@ cdr1LengthDic = {} cdr2LengthDic = {} -with open(infile, 'r') as i: +with open(infile, 'ru') as i: for line in i: if first: linesplt = line.split("\t") @@ -80,6 +80,8 @@ IDlist += [ID] +print mutationList, linecount + AALength = (int(max(mutationList, key=lambda i: int(i[4]) if i[4] else 0)[4]) + 1) # [4] is the position of the AA mutation, None if silent if AALength < 60: AALength = 64 @@ -173,7 +175,7 @@ aggctatIndex = 0 atagcctIndex = 0 first = True -with open(infile, 'r') as i: +with open(infile, 'ru') as i: for line in i: if first: linesplt = line.split("\t") diff -r a24f8c93583a -r ca2512e1e3ab wrapper.sh --- a/wrapper.sh Thu Dec 22 09:39:27 2016 -0500 +++ b/wrapper.sh Thu Dec 29 07:05:45 2016 -0500 @@ -43,6 +43,7 @@ cat "`find $PWD/files/ -name "1_*"`" > $PWD/summary.txt cat "`find $PWD/files/ -name "3_*"`" > $PWD/sequences.txt +cat "`find $PWD/files/ -name "4_*"`" > $PWD/gapped_aa.txt cat "`find $PWD/files/ -name "5_*"`" > $PWD/aa.txt cat "`find $PWD/files/ -name "6_*"`" > $PWD/junction.txt cat "`find $PWD/files/ -name "7_*"`" > $PWD/mutationanalysis.txt @@ -64,7 +65,7 @@ echo "---------------- merge_and_filter.r ----------------" echo "---------------- merge_and_filter.r ----------------
" >> $log -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $PWD/aa.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt "$PWD/gapped_aa.txt" $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 if [[ "$fast" == "no" ]] ; then