Previous changeset 0:ed6885c85660 (2016-08-31) Next changeset 2:7ffd0fba8cf4 (2017-09-18) |
Commit message:
Uploaded |
modified:
ALL.xml RScript.r wrapper.sh |
b |
diff -r ed6885c85660 -r 75853bceec00 ALL.xml --- a/ALL.xml Wed Aug 31 05:31:47 2016 -0400 +++ b/ALL.xml Tue Jan 17 07:24:44 2017 -0500 |
b |
@@ -8,8 +8,8 @@ <param name="min_freq" type="text" label="Minimum Frequency, between 0 and 100 in percentage" value='0'/> <param name="min_cells" type="text" label="Minimum cell count" value='0'/> <param name="merge_on" type="select" label="Merge On"> - <option value="Clone_Sequence">Clone_Sequence</option> - <option value="V_J_CDR3">V+J+CDR3</option> + <option value="Clone_Sequence">Clone_Sequence</option> + <option value="V_J_CDR3">V+J+CDR3</option> </param> </inputs> <outputs> @@ -40,6 +40,7 @@ | CDR3_Sense_Sequence | The CDR3 sequence region. | +----------------------------------+----------------------------------------------+ +It will handle patients with one, two or three samples. And generate a detailed HTML report on the sequences found in indiviual samples and in both samples. </help> |
b |
diff -r ed6885c85660 -r 75853bceec00 RScript.r --- a/RScript.r Wed Aug 31 05:31:47 2016 -0400 +++ b/RScript.r Tue Jan 17 07:24:44 2017 -0500 |
[ |
b'@@ -34,7 +34,19 @@\n \r\n dat = dat[dat$Frequency >= min_freq,]\r\n \r\n-triplets = dat[grepl("VanDongen_cALL_14696", dat$Patient) | grepl("(16278)|(26402)|(26759)", dat$Sample),]\r\n+patient.sample.counts = data.frame(data.table(dat)[, list(count=.N), by=c("Patient", "Sample")])\r\n+patient.sample.counts = data.frame(data.table(patient.sample.counts)[, list(count=.N), by=c("Patient")])\r\n+\r\n+print("Found the following patients with number of samples:")\r\n+print(patient.sample.counts)\r\n+\r\n+patient.sample.counts.pairs = patient.sample.counts[patient.sample.counts$count %in% 1:2,]\r\n+patient.sample.counts.triplets = patient.sample.counts[patient.sample.counts$count == 3,]\r\n+\r\n+\r\n+\r\n+triplets = dat[dat$Patient %in% patient.sample.counts.triplets$Patient,]\r\n+dat = dat[dat$Patient %in% patient.sample.counts.pairs$Patient,]\r\n \r\n cat("<tr><td>Normalizing to lowest cell count within locus</td></tr>", file=logfile, append=T)\r\n \r\n@@ -475,21 +487,21 @@\n print(plt)\r\n dev.off()\r\n }\r\n-\r\n-cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)\r\n+if(length(patients) > 0){\r\n+\tcat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)\r\n \r\n-interval = intervalFreq\r\n-intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n-product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n-lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)\r\n+\tinterval = intervalFreq\r\n+\tintervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n+\tproduct = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n+\tlapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="Frequency", appendtxt=T)\r\n \r\n-cat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)\r\n+\tcat("<tr><td>Starting Cell Count analysis</td></tr>", file=logfile, append=T)\r\n \r\n-interval = intervalReads\r\n-intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n-product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n-lapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")\r\n-\r\n+\tinterval = intervalReads\r\n+\tintervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n+\tproduct = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n+\tlapply(patients, FUN=patientCountOnColumn, product = product, interval=interval, on="normalized_read_count")\r\n+}\r\n if(nrow(single_patients) > 0){\r\n \tscales = 10^(0:6) #(0:ceiling(log10(max(scatterplot_locus_data$normalized_read_count))))\r\n \tp = ggplot(single_patients, aes(Rearrangement, normalized_read_count)) + scale_y_log10(breaks=scales,labels=as.character(scales)) + expand_limits(y=c(0,1000000))\r\n@@ -525,551 +537,532 @@\n patient.merge.list.second = list()\r\n \r\n tripletAnalysis <- function(patient1, label1, patient2, label2, patient3, label3, product, interval, on, appendTriplets= FALSE){\r\n- onShort = "reads"\r\n- if(on == "Frequency"){\r\n- onShort = "freq"\r\n- }\r\n- onx = paste(on, ".x", sep="")\r\n- ony = paste(on, ".y", sep="")\r\n- onz = paste(on, ".z", sep="")\r\n- type="triplet"\r\n- \r\n- threshholdIndex = which(colnames(product) == "interval")\r\n- V_SegmentIndex = which(colnames(product) == "V_Segments")\r\n- J_SegmentIndex = w'..b'eg_BM",]\r\n- three = triplets[triplets$Sample == "24062_reg_BM",]\r\n- tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="normalized_read_count", T)\r\n- \r\n- one = triplets[triplets$Sample == "16278_Left",]\r\n- two = triplets[triplets$Sample == "26402_Left",]\r\n- three = triplets[triplets$Sample == "26759_Left",]\r\n- tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="normalized_read_count", T)\r\n- \r\n- one = triplets[triplets$Sample == "16278_Right",]\r\n- two = triplets[triplets$Sample == "26402_Right",]\r\n- three = triplets[triplets$Sample == "26759_Right",]\r\n- tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="normalized_read_count", T)\r\n- \r\n- cat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)\r\n+\tinterval = intervalReads\r\n+\tintervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n+\tproduct = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n \r\n- interval = intervalFreq\r\n- intervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n- product = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n- \r\n- one = triplets[triplets$Sample == "14696_reg_BM",]\r\n- two = triplets[triplets$Sample == "24536_reg_BM",]\r\n- three = triplets[triplets$Sample == "24062_reg_BM",]\r\n- tripletAnalysis(one, "14696_1_Trio", two, "14696_2_Trio", three, "14696_3_Trio", product=product, interval=interval, on="Frequency", F)\r\n- \r\n- one = triplets[triplets$Sample == "16278_Left",]\r\n- two = triplets[triplets$Sample == "26402_Left",]\r\n- three = triplets[triplets$Sample == "26759_Left",]\r\n- tripletAnalysis(one, "16278_Left_Trio", two, "26402_Left_Trio", three, "26759_Left_Trio", product=product, interval=interval, on="Frequency", F)\r\n- \r\n- one = triplets[triplets$Sample == "16278_Right",]\r\n- two = triplets[triplets$Sample == "26402_Right",]\r\n- three = triplets[triplets$Sample == "26759_Right",]\r\n- tripletAnalysis(one, "16278_Right_Trio", two, "26402_Right_Trio", three, "26759_Right_Trio", product=product, interval=interval, on="Frequency", F)\r\n+\ttriplets = split(triplets, triplets$Patient, drop=T)\r\n+\tprint(nrow(triplets))\r\n+\tfor(triplet in triplets){\r\n+\t\tsamples = unique(triplet$Sample)\r\n+\t\tone = triplet[triplet$Sample == samples[1],]\r\n+\t\ttwo = triplet[triplet$Sample == samples[2],]\r\n+\t\tthree = triplet[triplet$Sample == samples[3],]\r\n+\t\t\r\n+\t\tprint(paste(nrow(triplet), nrow(one), nrow(two), nrow(three)))\r\n+\t\ttripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="normalized_read_count", T)\r\n+\t}\r\n+\r\n+\tcat("<tr><td>Starting Frequency analysis</td></tr>", file=logfile, append=T)\r\n+\r\n+\tinterval = intervalFreq\r\n+\tintervalOrder = data.frame("interval"=paste(">", interval, sep=""), "intervalOrder"=1:length(interval))\r\n+\tproduct = data.frame("Titles"=rep(Titles, each=length(interval)), "interval"=rep(interval, times=10), "V_Segments"=rep(V_Segments, each=length(interval)), "J_Segments"=rep(J_Segments, each=length(interval)))\r\n+\r\n+\tfor(triplet in triplets){\r\n+\t\tsamples = unique(triplet$Sample)\r\n+\t\tone = triplet[triplet$Sample == samples[1],]\r\n+\t\ttwo = triplet[triplet$Sample == samples[2],]\r\n+\t\tthree = triplet[triplet$Sample == samples[3],]\r\n+\t\ttripletAnalysis(one, one[1,"uniqueID"], two, two[1,"uniqueID"], three, three[1,"uniqueID"], product=product, interval=interval, on="Frequency", F)\r\n+\t}\r\n } else {\r\n cat("", file="triplets.txt")\r\n }\r\n' |
b |
diff -r ed6885c85660 -r 75853bceec00 wrapper.sh --- a/wrapper.sh Wed Aug 31 05:31:47 2016 -0400 +++ b/wrapper.sh Tue Jan 17 07:24:44 2017 -0500 |
b |
@@ -46,7 +46,7 @@ oldLocus="" sample1="$(echo ${sample1} | tr -d '\r' | tr -d '\n')" sample2="$(echo ${sample2} | tr -d '\r' | tr -d '\n')" - tail -n+2 ${patient}_freq.txt | sed "s/>//" > tmp.txt + tail -n+2 "${patient}_freq.txt" | sed "s/>//" > tmp.txt echo "<div class='tabber'>" >> "$html" echo "<div class='tabbertab' title='Data frequency'>" >> "$html" echo "<table><tr><td style='vertical-align:top;'>" >> "$html" @@ -99,7 +99,7 @@ echo "<a href='${patient}_percent_freq.png'><img src='${patient}_percent_freq.png' width='1280' height='720' /></a></div>" >> "$html" echo "${scatterplot_tab}</tr></table></div>" >> "$html" - tail -n+2 ${patient}_reads.txt | sed "s/>//" > tmp.txt + tail -n+2 "${patient}_reads.txt" | sed "s/>//" > tmp.txt echo "<div class='tabbertab' title='Data reads'>" >> "$html" echo "<table><tr><td style='vertical-align:top;'>" >> "$html" echo "<table border = 1 class='result_table summary_table' id='summary_table_${patient}_reads'>" >> "$html" @@ -189,9 +189,9 @@ echo "$patient" html="${patient}.html" echo "<tr><td><a href='${patient}.html'>$patient</a></td></tr>" >> "index.html" - echo "$header" > $html + echo "$header" > "$html" oldLocus="" - tail -n+2 ${patient}_freq.txt | sed "s/>//" > tmp.txt + tail -n+2 "${patient}_freq.txt" | sed "s/>//" > tmp.txt echo "<div class='tabber'>" >> "$html" echo "<div class='tabbertab' title='Data frequency'>" >> "$html" echo "<table><tr><td style='vertical-align:top;'>" >> "$html" @@ -261,7 +261,7 @@ echo "<a href='${patient}_freq_indiv_all.png'><img src='${patient}_freq_indiv_all.png' width='1280' height='720' /></a><br /></div>" >> "$html" echo "${scatterplot_tab}</tr></table></div>" >> "$html" - tail -n+2 ${patient}_reads.txt | sed "s/>//" > tmp.txt + tail -n+2 "${patient}_reads.txt" | sed "s/>//" > tmp.txt echo "<div class='tabbertab' title='Data reads'>" >> "$html" echo "<table><tr><td style='vertical-align:top;'>" >> "$html" echo "<table border = 1 class='result_table summary_table' id='summary_table_${patient}_reads'>" >> "$html" |