| Previous changeset 24:d5d203d38c8a (2017-02-01) Next changeset 26:28fbbdfd7a87 (2017-02-13) |
|
Commit message:
Uploaded |
|
modified:
report_clonality/RScript.r report_clonality/r_wrapper.sh |
| b |
| diff -r d5d203d38c8a -r 94765af0db1f report_clonality/RScript.r --- a/report_clonality/RScript.r Wed Feb 01 09:48:38 2017 -0500 +++ b/report_clonality/RScript.r Thu Feb 09 07:20:09 2017 -0500 |
| [ |
| b'@@ -158,7 +158,7 @@\n #write the complete dataset that is left over, will be the input if \'none\' for clonaltype and \'no\' for filterproductive\n write.table(PRODF, "allUnique.txt", sep="\\t",quote=F,row.names=F,col.names=T)\n #write.table(PRODF, "allUnique.csv", sep=",",quote=F,row.names=F,col.names=T)\n-write.table(UNPROD, "allUnproductive.csv", sep=",",quote=F,row.names=F,col.names=T)\n+write.table(UNPROD, "allUnproductive.txt", sep="\\t",quote=F,row.names=F,col.names=T)\n \n print("SAMPLE TABLE:")\n print(table(PRODF$Sample))\n@@ -697,6 +697,7 @@\n }\n \n bak = PRODF\n+bakun = UNPROD\n \n imgtcolumns = c("X3V.REGION.trimmed.nt.nb","P3V.nt.nb", "N1.REGION.nt.nb", "P5D.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "P3D.nt.nb", "N2.REGION.nt.nb", "P5J.nt.nb", "X5J.REGION.trimmed.nt.nb", "X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb")\n if(all(imgtcolumns %in% colnames(inputdata)))\n@@ -724,8 +725,15 @@\n PRODF.with.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) > 2,]\n PRODF.no.D = PRODF[nchar(PRODF$Top.D.Gene, keepNA=F) < 4,]\n \n+ UNPROD.with.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) > 2,]\n+ UNPROD.no.D = UNPROD[nchar(UNPROD$Top.D.Gene, keepNA=F) < 4,]\n+ \n num_median = function(x, na.rm=T) { as.numeric(median(x, na.rm=na.rm)) }\n- \n+\n+ print("---- table prod.with.d cdr3.length ----")\n+ print(table(PRODF.with.D$CDR3.Length, useNA="ifany"))\n+ print(median(PRODF.with.D$CDR3.Length, na.rm=T))\n+\n newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, \n VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),\n P1=mean(.SD$P3V.nt.nb, na.rm=T),\n@@ -740,7 +748,7 @@\n Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),\n Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),\n Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),\n- Median.CDR3.l=as.double(median(.SD$CDR3.Length))),\n+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))),\n by=c("Sample")])\n newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)\n write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\\t",quote=F,na="-",row.names=F,col.names=F)\n@@ -759,12 +767,16 @@\n \t\t\t\t\t\t\t\t\t\t\t Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),\n \t\t\t\t\t\t\t\t\t\t\t Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),\n \t\t\t\t\t\t\t\t\t\t\t Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),\n-\t\t\t\t\t\t\t\t\t\t\t Median.CDR3.l=as.double(median(.SD$CDR3.Length))),\n+\t\t\t\t\t\t\t\t\t\t\t Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))),\n by=c("Sample")])\n newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)\n write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\\t",quote=F,na="-",row.names=F,col.names=F)\n \n- newData = data.frame(data.table(PRODF.with.D)[,list(unique=.N, \n+ print("---- table unprod.with.d cdr3.length ----")\n+ print(table(UNPROD.with.D$CDR3.Length, useNA="ifany"))\n+ print(median(UNPROD.with.D$CDR3'..b'b"), with=F], na.rm=T)),\n@@ -842,12 +853,12 @@\n Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),\n Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),\n Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),\n- Median.CDR3.l=as.double(median(.SD$CDR3.Length))),\n+ Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),\n by=c("Sample")])\n newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)\n write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\\t",quote=F,na="-",row.names=F,col.names=F)\n \n- newData = data.frame(data.table(PRODF.no.D)[,list(unique=.N, \n+ newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N, \n VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),\n P1=num_median(.SD$P3V.nt.nb, na.rm=T),\n N1=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),\n@@ -856,13 +867,14 @@\n Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),\n Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb"), with=F], na.rm=T)),\n Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),\n- Median.CDR3.l=as.double(median(.SD$CDR3.Length))),\n+ Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),\n \t\t\t\t\t\t\t\t\t\t\t\t\t\t\tby=c("Sample")])\n newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)\n write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\\t",quote=F,na="-",row.names=F,col.names=F)\n }\n \n PRODF = bak\n+UNPROD = bakun\n \n \n # ---------------------- D reading frame ----------------------\n@@ -939,3 +951,54 @@\n median.aa.l = data.frame(data.table(PRODF)[, list(median=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T), na.rm=T))), by=c("Sample")])\n write.table(median.aa.l, "AAMedianBySample.txt" , sep="\\t",quote=F,na="-",row.names=F,col.names=F)\n \n+\n+#generate the "Sequences that are present in more than one replicate" dataset\n+clonaltype.in.replicates = inputdata\n+clonaltype = unlist(strsplit(clonaltype, ","))\n+clonaltype = clonaltype[-which(clonaltype == "Sample")]\n+\n+clonaltype.in.replicates$clonaltype = do.call(paste, c(clonaltype.in.replicates[clonaltype], sep = ":"))\n+clonaltype.in.replicates = clonaltype.in.replicates[,c("clonaltype","Replicate", "ID", "Sequence", "Sample")]\n+\n+print(head(clonaltype.in.replicates))\n+\n+clonaltype.counts = data.frame(table(clonaltype.in.replicates$clonaltype))\n+names(clonaltype.counts) = c("clonaltype", "coincidence")\n+\n+print(head(clonaltype.counts))\n+\n+clonaltype.counts = clonaltype.counts[clonaltype.counts$coincidence > 1,]\n+\n+head(clonaltype.counts)\n+\n+clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$clonaltype %in% clonaltype.counts$clonaltype,]\n+clonaltype.in.replicates = merge(clonaltype.in.replicates, clonaltype.counts, by="clonaltype")\n+print(head(clonaltype.in.replicates))\n+clonaltype.in.replicates = clonaltype.in.replicates[order(clonaltype.in.replicates$clonaltype),c("coincidence","clonaltype", "Sample", "Replicate", "ID", "Sequence")]\n+\n+write.table(clonaltype.in.replicates, "clonaltypes_replicates.txt" , sep="\\t",quote=F,na="-",row.names=F,col.names=T)\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n+\n' |
| b |
| diff -r d5d203d38c8a -r 94765af0db1f report_clonality/r_wrapper.sh --- a/report_clonality/r_wrapper.sh Wed Feb 01 09:48:38 2017 -0500 +++ b/report_clonality/r_wrapper.sh Thu Feb 09 07:20:09 2017 -0500 |
| b |
| @@ -396,6 +396,7 @@ echo "<tr><td colspan='2' style='background-color:#E0E0E0;'>Clonality</td></tr>" >> $outputFile echo "<tr><td>The dataset used to calculate clonality score (Unique based on clonaltype, $clonalType)</td><td><a href='clonalityComplete.txt'>Download</a></td></tr>" >> $outputFile +echo "<tr><td>Sequences that are present in more than one replicate</td><td><a href='clonaltypes_replicates.txt'>Download</a></td></tr>" >> $outputFile echo "</table>" >> $outputFile |