Repository 'prisca'
hg clone https://toolshed.g2.bx.psu.edu/repos/davidvanzessen/prisca

Changeset 1:75853bceec00 (2017-01-17)
Previous changeset 0:ed6885c85660 (2016-08-31) Next changeset 2:7ffd0fba8cf4 (2017-09-18)
Commit message:
Uploaded
modified:
ALL.xml
RScript.r
wrapper.sh
b
diff -r ed6885c85660 -r 75853bceec00 ALL.xml
--- a/ALL.xml Wed Aug 31 05:31:47 2016 -0400
+++ b/ALL.xml Tue Jan 17 07:24:44 2017 -0500
b
@@ -8,8 +8,8 @@
  <param name="min_freq" type="text" label="Minimum Frequency, between 0 and 100 in percentage" value='0'/>
  <param name="min_cells" type="text" label="Minimum cell count" value='0'/>
  <param name="merge_on" type="select" label="Merge On">
- <option value="Clone_Sequence">Clone_Sequence</option>
- <option value="V_J_CDR3">V+J+CDR3</option>
+ <option value="Clone_Sequence">Clone_Sequence</option>
+ <option value="V_J_CDR3">V+J+CDR3</option>
  </param>
  </inputs>
  <outputs>
@@ -40,6 +40,7 @@
 | CDR3_Sense_Sequence              | The CDR3 sequence region.                    |
 +----------------------------------+----------------------------------------------+
 
+It will handle patients with one, two or three samples.
 
 And generate a detailed HTML report on the sequences found in indiviual samples and in both samples.
  </help>
b
diff -r ed6885c85660 -r 75853bceec00 RScript.r
--- a/RScript.r Wed Aug 31 05:31:47 2016 -0400
+++ b/RScript.r Tue Jan 17 07:24:44 2017 -0500
[
b'@@ -34,7 +34,19 @@\n \r\n dat = dat[dat$Frequency >= min_freq,]\r\n \r\n-triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]\r\n+patient.sample.counts = data.frame(data.table(dat)[, list(count=.N), by=c("Patient", "Sample")])\r\n+patient.sample.counts = data.frame(data.table(patient.sample.counts)[, list(count=.N), by=c("Patient")])\r\n+\r\n+print("Found the following patients with number of samples:")\r\n+print(patient.sample.counts)\r\n+\r\n+patient.sample.counts.pairs = patient.sample.counts[patient.sample.counts$count %in% 1:2,]\r\n+patient.sample.counts.triplets = patient.sample.counts[patient.sample.counts$count == 3,]\r\n+\r\n+\r\n+\r\n+triplets = dat[dat$Patient %in% patient.sample.counts.triplets$Patient,]\r\n+dat = dat[dat$Patient %in% patient.sample.counts.pairs$Patient,]\r\n \r\n cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)\r\n \r\n@@ -475,21 +487,21 @@\n   print(plt)\r\n   dev.off()\r\n }\r\n-\r\n-cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)\r\n+if(length(patients) > 0){\r\n+\tcat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)\r\n \r\n-interval = intervalFreq\r\n-intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n-product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n-lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)\r\n+\tinterval = intervalFreq\r\n+\tintervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n+\tproduct = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n+\tlapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)\r\n \r\n-cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)\r\n+\tcat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)\r\n \r\n-interval = intervalReads\r\n-intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n-product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n-lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")\r\n-\r\n+\tinterval = intervalReads\r\n+\tintervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n+\tproduct = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n+\tlapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")\r\n+}\r\n if(nrow(single_patients) > 0){\r\n \tscales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))\r\n \tp = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))\r\n@@ -525,551 +537,532 @@\n patient.merge.list.second = list()\r\n \r\n tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){\r\n-  onShort = "reads"\r\n-  if(on == "Frequency"){\r\n-    onShort = "freq"\r\n-  }\r\n-  onx = paste(on, ".x", sep="")\r\n-  ony = paste(on, ".y", sep="")\r\n-  onz = paste(on, ".z", sep="")\r\n-  type="triplet"\r\n-  \r\n-  threshholdIndex = which(colnames(product) == "interval")\r\n-  V_SegmentIndex = which(colnames(product) == "V_Segments")\r\n-  J_SegmentIndex = w'..b'eg_BM",]\r\n-  three = triplets[triplets$Sample == "24062_reg_BM",]\r\n-  tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)\r\n-  \r\n-  one = triplets[triplets$Sample == "16278_Left",]\r\n-  two = triplets[triplets$Sample == "26402_Left",]\r\n-  three = triplets[triplets$Sample == "26759_Left",]\r\n-  tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)\r\n-  \r\n-  one = triplets[triplets$Sample == "16278_Right",]\r\n-  two = triplets[triplets$Sample == "26402_Right",]\r\n-  three = triplets[triplets$Sample == "26759_Right",]\r\n-  tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)\r\n-  \r\n-  cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)\r\n+\tinterval = intervalReads\r\n+\tintervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n+\tproduct = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n \r\n-  interval = intervalFreq\r\n-  intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n-  product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n-  \r\n-  one = triplets[triplets$Sample == "14696_reg_BM",]\r\n-  two = triplets[triplets$Sample == "24536_reg_BM",]\r\n-  three = triplets[triplets$Sample == "24062_reg_BM",]\r\n-  tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)\r\n-  \r\n-  one = triplets[triplets$Sample == "16278_Left",]\r\n-  two = triplets[triplets$Sample == "26402_Left",]\r\n-  three = triplets[triplets$Sample == "26759_Left",]\r\n-  tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)\r\n-  \r\n-  one = triplets[triplets$Sample == "16278_Right",]\r\n-  two = triplets[triplets$Sample == "26402_Right",]\r\n-  three = triplets[triplets$Sample == "26759_Right",]\r\n-  tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)\r\n+\ttriplets = split(triplets, triplets$Patient, drop=T)\r\n+\tprint(nrow(triplets))\r\n+\tfor(triplet in triplets){\r\n+\t\tsamples = unique(triplet$Sample)\r\n+\t\tone = triplet[triplet$Sample == samples[1],]\r\n+\t\ttwo = triplet[triplet$Sample == samples[2],]\r\n+\t\tthree = triplet[triplet$Sample == samples[3],]\r\n+\t\t\r\n+\t\tprint(paste(nrow(triplet), nrow(one), nrow(two), nrow(three)))\r\n+\t\ttripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="normalized_read_count", T)\r\n+\t}\r\n+\r\n+\tcat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)\r\n+\r\n+\tinterval = intervalFreq\r\n+\tintervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n+\tproduct = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n+\r\n+\tfor(triplet in triplets){\r\n+\t\tsamples = unique(triplet$Sample)\r\n+\t\tone = triplet[triplet$Sample == samples[1],]\r\n+\t\ttwo = triplet[triplet$Sample == samples[2],]\r\n+\t\tthree = triplet[triplet$Sample == samples[3],]\r\n+\t\ttripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="Frequency", F)\r\n+\t}\r\n } else {\r\n   cat("", file="triplets.txt")\r\n }\r\n'
b
diff -r ed6885c85660 -r 75853bceec00 wrapper.sh
--- a/wrapper.sh Wed Aug 31 05:31:47 2016 -0400
+++ b/wrapper.sh Tue Jan 17 07:24:44 2017 -0500
b
@@ -46,7 +46,7 @@
  oldLocus=""
  sample1="$(echo ${sample1} | tr -d '\r' | tr -d '\n')"
  sample2="$(echo ${sample2} | tr -d '\r' | tr -d '\n')"
- tail -n+2 ${patient}_freq.txt | sed "s/>//" > tmp.txt
+ tail -n+2 "${patient}_freq.txt" | sed "s/>//" > tmp.txt
  echo "<div class='tabber'>" >> "$html"
  echo "<div class='tabbertab' title='Data frequency'>" >> "$html"
  echo "<table><tr><td style='vertical-align:top;'>" >> "$html"
@@ -99,7 +99,7 @@
  echo "<a href='${patient}_percent_freq.png'><img src='${patient}_percent_freq.png' width='1280' height='720' /></a></div>" >> "$html"
  echo "${scatterplot_tab}</tr></table></div>" >> "$html"
 
- tail -n+2 ${patient}_reads.txt | sed "s/>//" > tmp.txt
+ tail -n+2 "${patient}_reads.txt" | sed "s/>//" > tmp.txt
  echo "<div class='tabbertab' title='Data reads'>" >> "$html"
  echo "<table><tr><td style='vertical-align:top;'>" >> "$html"
  echo "<table border = 1 class='result_table summary_table' id='summary_table_${patient}_reads'>" >> "$html"
@@ -189,9 +189,9 @@
  echo "$patient"
  html="${patient}.html"
  echo "<tr><td><a href='${patient}.html'>$patient</a></td></tr>" >> "index.html"
- echo "$header" > $html
+ echo "$header" > "$html"
  oldLocus=""
- tail -n+2 ${patient}_freq.txt | sed "s/>//" > tmp.txt
+ tail -n+2 "${patient}_freq.txt" | sed "s/>//" > tmp.txt
  echo "<div class='tabber'>" >> "$html"
  echo "<div class='tabbertab' title='Data frequency'>" >> "$html"
  echo "<table><tr><td style='vertical-align:top;'>" >> "$html"
@@ -261,7 +261,7 @@
  echo "<a href='${patient}_freq_indiv_all.png'><img src='${patient}_freq_indiv_all.png' width='1280' height='720' /></a><br /></div>" >> "$html"
  echo "${scatterplot_tab}</tr></table></div>" >> "$html"
 
- tail -n+2 ${patient}_reads.txt | sed "s/>//" > tmp.txt
+ tail -n+2 "${patient}_reads.txt" | sed "s/>//" > tmp.txt
  echo "<div class='tabbertab' title='Data reads'>" >> "$html"
  echo "<table><tr><td style='vertical-align:top;'>" >> "$html"
  echo "<table border = 1 class='result_table summary_table' id='summary_table_${patient}_reads'>" >> "$html"