# HG changeset patch
# User davidvanzessen
# Date 1489498216 14400
# Node ID b8ac74723ab0cf991053ea251ec9dbe9efa7675a
# Parent ca2512e1e3ab1dbf153622e4abb6719a1f42e166
Uploaded
diff -r ca2512e1e3ab -r b8ac74723ab0 merge_and_filter.r
--- a/merge_and_filter.r Thu Dec 29 07:05:45 2016 -0500
+++ b/merge_and_filter.r Tue Mar 14 09:30:16 2017 -0400
@@ -36,6 +36,11 @@
colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")
}
+print("Summary analysis files columns")
+print(names(summ))
+
+
+
input.sequence.count = nrow(summ)
print(paste("Number of sequences in summary file:", input.sequence.count))
@@ -70,31 +75,37 @@
filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ)))
-#print("mutation analysis files columns")
-#print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
+print("mutation analysis files columns")
+print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID")
print(paste("Number of sequences after merging with mutation analysis file:", nrow(result)))
-#print("mutation stats files columns")
-#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
+print("mutation stats files columns")
+print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID")
print(paste("Number of sequences after merging with mutation stats file:", nrow(result)))
-#print("hotspots files columns")
-#print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
+print("hotspots files columns")
+print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID")
print(paste("Number of sequences after merging with hotspots file:", nrow(result)))
+print("sequences files columns")
+print(c("FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT"))
+
sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")]
names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq")
result = merge(result, sequences, by="Sequence.ID", all.x=T)
+print("sequences files columns")
+print("CDR3.IMGT")
+
AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")]
names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA")
result = merge(result, AAs, by="Sequence.ID", all.x=T)
diff -r ca2512e1e3ab -r b8ac74723ab0 wrapper.sh
--- a/wrapper.sh Thu Dec 29 07:05:45 2016 -0500
+++ b/wrapper.sh Tue Mar 14 09:30:16 2017 -0400
@@ -421,22 +421,30 @@
if [[ "$fast" == "no" ]] ; then
+
+
echo "---------------- baseline ----------------"
echo "---------------- baseline ----------------
" >> $log
tmp="$PWD"
mkdir $outdir/baseline
+ echo "
${header_substring}