Repository 'shm_csr'
hg clone https://toolshed.g2.bx.psu.edu/repos/davidvanzessen/shm_csr

Changeset 41:b8ac74723ab0 (2017-03-14)
Previous changeset 40:ca2512e1e3ab (2016-12-29) Next changeset 42:1cf60ae234b4 (2017-03-28)
Commit message:
Uploaded
modified:
merge_and_filter.r
wrapper.sh
b
diff -r ca2512e1e3ab -r b8ac74723ab0 merge_and_filter.r
--- a/merge_and_filter.r Thu Dec 29 07:05:45 2016 -0500
+++ b/merge_and_filter.r Tue Mar 14 09:30:16 2017 -0400
[
@@ -36,6 +36,11 @@
  colnames(gene_identification) = c("Sequence.ID", "chunk_hit_percentage", "nt_hit_percentage", "start_locations", "best_match")
 }
 
+print("Summary analysis files columns")
+print(names(summ))
+
+
+
 input.sequence.count = nrow(summ)
 print(paste("Number of sequences in summary file:", input.sequence.count))
 
@@ -70,31 +75,37 @@
 
 filtering.steps = rbind(filtering.steps, c("After functionality filter", nrow(summ)))
 
-#print("mutation analysis files columns")
-#print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
+print("mutation analysis files columns")
+print(names(mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])]))
 
 result = merge(summ, mutationanalysis[,!(names(mutationanalysis) %in% names(summ)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with mutation analysis file:", nrow(result)))
 
-#print("mutation stats files columns")
-#print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
+print("mutation stats files columns")
+print(names(mutationstats[,!(names(mutationstats) %in% names(result)[-1])]))
 
 result = merge(result, mutationstats[,!(names(mutationstats) %in% names(result)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with mutation stats file:", nrow(result)))
 
-#print("hotspots files columns")
-#print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
+print("hotspots files columns")
+print(names(hotspots[,!(names(hotspots) %in% names(result)[-1])]))
 
 result = merge(result, hotspots[,!(names(hotspots) %in% names(result)[-1])], by="Sequence.ID")
 
 print(paste("Number of sequences after merging with hotspots file:", nrow(result)))
 
+print("sequences files columns")
+print(c("FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT"))
+
 sequences = sequences[,c("Sequence.ID", "FR1.IMGT", "CDR1.IMGT", "FR2.IMGT", "CDR2.IMGT", "FR3.IMGT", "CDR3.IMGT")]
 names(sequences) = c("Sequence.ID", "FR1.IMGT.seq", "CDR1.IMGT.seq", "FR2.IMGT.seq", "CDR2.IMGT.seq", "FR3.IMGT.seq", "CDR3.IMGT.seq")
 result = merge(result, sequences, by="Sequence.ID", all.x=T)
 
+print("sequences files columns")
+print("CDR3.IMGT")
+
 AAs = AAs[,c("Sequence.ID", "CDR3.IMGT")]
 names(AAs) = c("Sequence.ID", "CDR3.IMGT.AA")
 result = merge(result, AAs, by="Sequence.ID", all.x=T)
b
diff -r ca2512e1e3ab -r b8ac74723ab0 wrapper.sh
--- a/wrapper.sh Thu Dec 29 07:05:45 2016 -0500
+++ b/wrapper.sh Tue Mar 14 09:30:16 2017 -0400
[
@@ -421,22 +421,30 @@
 
 if [[ "$fast" == "no" ]] ; then
 
+    
+
  echo "---------------- baseline ----------------"
  echo "---------------- baseline ----------------<br />" >> $log
  tmp="$PWD"
 
  mkdir $outdir/baseline
 
+ echo "<center><h1>BASELINe</h1>" >> $output
+ header_substring="Based on CDR1, FR2, CDR2, FR3 (27:27:38:55:65:104:-)"
+
  baseline_boundaries="27:27:38:55:65:104:-"
 
  if [[ "${empty_region_filter}" == "leader" ]] ; then
  baseline_boundaries="1:26:38:55:65:104:-"
+ header_substring="Based on FR1, CDR1, FR2, CDR2, FR3 (1:26:38:55:65:104,-)"
  fi
+
+ echo "<p>${header_substring}</p></center>" >> $output
 
  mkdir $outdir/baseline/IGA_IGG_IGM
  if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
  cd $outdir/baseline/IGA_IGG_IGM
- bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
+ bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "${baseline_boundaries}" $outdir/new_IMGT.txz "IGA_IGG_IGM_IGE" "$dir/baseline/IMGTVHreferencedataset20161215.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
  else
  echo "No sequences" > "$outdir/baseline.txt"
  fi