changeset 41:b8ac74723ab0 draft

Uploaded
author davidvanzessen
date Tue, 14 Mar 2017 09:30:16 -0400
parents ca2512e1e3ab
children 1cf60ae234b4
files merge_and_filter.r wrapper.sh
diffstat 2 files changed, 26 insertions(+), 7 deletions(-) [+]
line wrap: on
line diff
--- a/merge_and_filter.r	Thu Dec 29 07:05:45 2016 -0500
+++ b/merge_and_filter.r	Tue Mar 14 09:30:16 2017 -0400
@@ -36,6 +36,11 @@
 	colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")
 }
 
+print("Summary analysis files columns")
+print(names(summ))
+
+
+
 input.sequence.count = nrow(summ)
 print(paste("Number of sequences in summary file:", input.sequence.count))
 
@@ -70,31 +75,37 @@
 
 filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ)))
 
-#print("mutation analysis files columns")
-#print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
+print("mutation analysis files columns")
+print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
 
 result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with mutation analysis file:", nrow(result)))
 
-#print("mutation stats files columns")
-#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
+print("mutation stats files columns")
+print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
 
 result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with mutation stats file:", nrow(result)))
 
-#print("hotspots files columns")
-#print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
+print("hotspots files columns")
+print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
 
 result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with hotspots file:", nrow(result)))
 
+print("sequences files columns")
+print(c("FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT"))
+
 sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")]
 names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq")
 result = merge(result, sequences, by="Sequence.ID", all.x=T)
 
+print("sequences files columns")
+print("CDR3.IMGT")
+
 AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")]
 names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA")
 result = merge(result, AAs, by="Sequence.ID", all.x=T)
--- a/wrapper.sh	Thu Dec 29 07:05:45 2016 -0500
+++ b/wrapper.sh	Tue Mar 14 09:30:16 2017 -0400
@@ -421,22 +421,30 @@
 
 if [[ "$fast" == "no" ]] ; then
 
+    
+
 	echo "---------------- baseline ----------------"
 	echo "---------------- baseline ----------------<br />" >> $log
 	tmp="$PWD"
 
 	mkdir $outdir/baseline
 	
+	echo "<center><h1>BASELINe</h1>" >> $output
+	header_substring="Based on CDR1, FR2, CDR2, FR3 (27:27:38:55:65:104:-)"
+	
 	baseline_boundaries="27:27:38:55:65:104:-"
 	
 	if [[ "${empty_region_filter}" == "leader" ]] ; then
 		baseline_boundaries="1:26:38:55:65:104:-"
+		header_substring="Based on FR1, CDR1, FR2, CDR2, FR3 (1:26:38:55:65:104,-)"
 	fi
+	
+	echo "<p>${header_substring}</p></center>" >> $output
 
 	mkdir $outdir/baseline/IGA_IGG_IGM
 	if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
 		cd $outdir/baseline/IGA_IGG_IGM
-		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"	
+		bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
 	else
 		echo "No sequences" > "$outdir/baseline.txt"
 	fi