# HG changeset patch
# User davidvanzessen
# Date 1492611661 14400
# Node ID b6936fb52ab9702aafeb965ad1421d77c4745287
# Parent f37e072affc01eb38c6411833177cb50b8437174
Uploaded
diff -r f37e072affc0 -r b6936fb52ab9 report_clonality/RScript.r
--- a/report_clonality/RScript.r Wed Apr 19 08:05:01 2017 -0400
+++ b/report_clonality/RScript.r Wed Apr 19 10:21:01 2017 -0400
@@ -768,7 +768,7 @@
Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
- Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))),
+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
write.table(newData, "junctionAnalysisProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
@@ -787,7 +787,7 @@
Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
- Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length, na.rm=T)))),
+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
write.table(newData, "junctionAnalysisProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
@@ -806,7 +806,7 @@
Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
Total.N=mean(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
- Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
write.table(newData, "junctionAnalysisUnProd_mean_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
@@ -825,7 +825,7 @@
Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5D.REGION.trimmed.nt.nb", "X3D.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
Total.N=num_median(rowSums(.SD[,c("N.REGION.nt.nb", "N1.REGION.nt.nb", "N2.REGION.nt.nb", "N3.REGION.nt.nb", "N4.REGION.nt.nb"), with=F], na.rm=T)),
Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5D.nt.nb", "P3D.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
- Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
write.table(newData, "junctionAnalysisUnProd_median_wD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
@@ -841,7 +841,7 @@
Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T),
Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
- Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
write.table(newData, "junctionAnalysisProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
@@ -855,13 +855,11 @@
Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T),
Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
- Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
write.table(newData, "junctionAnalysisProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
- print(paste("mean N:", mean(UNPROD.no.D$N.REGION.nt.nb, na.rm=T)))
-
newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N,
VH.DEL=mean(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
P1=mean(.SD$P3V.nt.nb, na.rm=T),
@@ -871,12 +869,11 @@
Total.Del=mean(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
Total.N=mean(.SD$N.REGION.nt.nb, na.rm=T),
Total.P=mean(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
- Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
write.table(newData, "junctionAnalysisUnProd_mean_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
- print(paste("median N:", num_median(UNPROD.no.D$N.REGION.nt.nb, na.rm=T)))
newData = data.frame(data.table(UNPROD.no.D)[,list(unique=.N,
VH.DEL=num_median(.SD$X3V.REGION.trimmed.nt.nb, na.rm=T),
@@ -887,7 +884,7 @@
Total.Del=num_median(rowSums(.SD[,c("X3V.REGION.trimmed.nt.nb", "X5J.REGION.trimmed.nt.nb"), with=F], na.rm=T)),
Total.N=num_median(.SD$N.REGION.nt.nb, na.rm=T),
Total.P=num_median(rowSums(.SD[,c("P3V.nt.nb", "P5J.nt.nb"), with=F], na.rm=T)),
- Median.CDR3.l=as.double(as.numeric(median(.SD$CDR3.Length, na.rm=T)))),
+ Median.CDR3.l=as.double(median(as.numeric(.SD$CDR3.Length), na.rm=T))),
by=c("Sample")])
newData[,sapply(newData, is.numeric)] = round(newData[,sapply(newData, is.numeric)],1)
write.table(newData, "junctionAnalysisUnProd_median_nD.txt" , sep="\t",quote=F,na="-",row.names=F,col.names=F)
@@ -927,7 +924,6 @@
ggsave("DReadingFrame.pdf", D.REGION.reading.frame)
-
# ---------------------- AA composition in CDR3 ----------------------
AACDR3 = PRODF[,c("Sample", "CDR3.Seq")]
@@ -977,6 +973,7 @@
#generate the "Sequences that are present in more than one replicate" dataset
clonaltype.in.replicates = inputdata
+clonaltype.in.replicates = clonaltype.in.replicates[clonaltype.in.replicates$Functionality %in% c("productive (see comment)","productive"),]
clonaltype.in.replicates = na.omit(clonaltype.in.replicates)
clonaltype = unlist(strsplit(clonaltype, ","))
@@ -1023,5 +1020,3 @@
-
-
diff -r f37e072affc0 -r b6936fb52ab9 report_clonality/r_wrapper.sh
--- a/report_clonality/r_wrapper.sh Wed Apr 19 08:05:01 2017 -0400
+++ b/report_clonality/r_wrapper.sh Wed Apr 19 10:21:01 2017 -0400
@@ -273,28 +273,28 @@
echo "" >> $outputFile
echo "
Unique rearrangements with a V, D and J gene assigned
" >> $outputFile
- echo " Productive meanDonor | Number of sequences | V.DEL | P1 | N1 | P2 | DEL.D | D.DEL | P3 | N2 | P4 | DEL.J | Total.Del | Total.N | Total.P | Median.CDR3 |
" >> $outputFile
+ echo " Productive meanDonor | Number of sequences | V.DEL | P1 | N1 | P2 | DEL.D | D.DEL | P3 | N2 | P4 | DEL.J | Total.Del | Total.N | Total.P | CDR3.Length |
" >> $outputFile
while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median
do
echo "$Sample | $unique | $VDEL | $P1 | $N1 | $P2 | $DELD | $DDEL | $P3 | $N2 | $P4 | $DELJ | $TotalDel | $TotalN | $TotalP | $median |
" >> $outputFile
done < $outputDir/junctionAnalysisProd_mean_wD.txt
echo "
" >> $outputFile
- echo " Unproductive meanDonor | Number of sequences | V.DEL | P1 | N1 | P2 | DEL.D | D.DEL | P3 | N2 | P4 | DEL.J | Total.Del | Total.N | Total.P | Median.CDR3 |
" >> $outputFile
+ echo " Unproductive meanDonor | Number of sequences | V.DEL | P1 | N1 | P2 | DEL.D | D.DEL | P3 | N2 | P4 | DEL.J | Total.Del | Total.N | Total.P | CDR3.Length |
" >> $outputFile
while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median
do
echo "$Sample | $unique | $VDEL | $P1 | $N1 | $P2 | $DELD | $DDEL | $P3 | $N2 | $P4 | $DELJ | $TotalDel | $TotalN | $TotalP | - |
" >> $outputFile
done < $outputDir/junctionAnalysisUnProd_mean_wD.txt
echo "
" >> $outputFile
- echo " Productive medianDonor | Number of sequences | V.DEL | P1 | N1 | P2 | DEL.D | D.DEL | P3 | N2 | P4 | DEL.J | Total.Del | Total.N | Total.P | Median.CDR3 |
" >> $outputFile
+ echo " Productive medianDonor | Number of sequences | V.DEL | P1 | N1 | P2 | DEL.D | D.DEL | P3 | N2 | P4 | DEL.J | Total.Del | Total.N | Total.P | CDR3.Length |
" >> $outputFile
while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median
do
echo "$Sample | $unique | $VDEL | $P1 | $N1 | $P2 | $DELD | $DDEL | $P3 | $N2 | $P4 | $DELJ | $TotalDel | $TotalN | $TotalP | $median |
" >> $outputFile
done < $outputDir/junctionAnalysisProd_median_wD.txt
echo "
" >> $outputFile
- echo " Unproductive medianDonor | Number of sequences | V.DEL | P1 | N1 | P2 | DEL.D | D.DEL | P3 | N2 | P4 | DEL.J | Total.Del | Total.N | Total.P | Median.CDR3 |
" >> $outputFile
+ echo " Unproductive medianDonor | Number of sequences | V.DEL | P1 | N1 | P2 | DEL.D | D.DEL | P3 | N2 | P4 | DEL.J | Total.Del | Total.N | Total.P | CDR3.Length |
" >> $outputFile
while read Sample unique VDEL P1 N1 P2 DELD DDEL P3 N2 P4 DELJ TotalDel TotalN TotalP median
do
echo "$Sample | $unique | $VDEL | $P1 | $N1 | $P2 | $DELD | $DDEL | $P3 | $N2 | $P4 | $DELJ | $TotalDel | $TotalN | $TotalP | - |
" >> $outputFile
@@ -303,28 +303,28 @@
# again for no-d
echo "Unique rearrangements with only a V and J gene assigned
" >> $outputFile
- echo " Productive meanDonor | Number of sequences | V.DEL | P1 | N | P2 | DEL.J | Total.Del | Total.N | Total.P | Median.CDR3 |
" >> $outputFile
+ echo " Productive meanDonor | Number of sequences | V.DEL | P1 | N | P2 | DEL.J | Total.Del | Total.N | Total.P | CDR3.Length |
" >> $outputFile
while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median
do
echo "$Sample | $unique | $VDEL | $P1 | $N1 | $P2 | $DELJ | $TotalDel | $TotalN | $TotalP | $median |
" >> $outputFile
done < $outputDir/junctionAnalysisProd_mean_nD.txt
echo "
" >> $outputFile
- echo " Unproductive meanDonor | Number of sequences | V.DEL | P1 | N | P2 | DEL.J | Total.Del | Total.N | Total.P | Median.CDR3 |
" >> $outputFile
+ echo " Unproductive meanDonor | Number of sequences | V.DEL | P1 | N | P2 | DEL.J | Total.Del | Total.N | Total.P | CDR3.Length |
" >> $outputFile
while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median
do
echo "$Sample | $unique | $VDEL | $P1 | $N1 | $P2 | $DELJ | $TotalDel | $TotalN | $TotalP | - |
" >> $outputFile
done < $outputDir/junctionAnalysisUnProd_mean_nD.txt
echo "
" >> $outputFile
- echo " Productive medianDonor | Number of sequences | V.DEL | P1 | N | P2 | DEL.J | Total.Del | Total.N | Total.P | Median.CDR3 |
" >> $outputFile
+ echo " Productive medianDonor | Number of sequences | V.DEL | P1 | N | P2 | DEL.J | Total.Del | Total.N | Total.P | CDR3.Length |
" >> $outputFile
while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median
do
echo "$Sample | $unique | $VDEL | $P1 | $N1 | $P2 | $DELJ | $TotalDel | $TotalN | $TotalP | $median |
" >> $outputFile
done < $outputDir/junctionAnalysisProd_median_nD.txt
echo "
" >> $outputFile
- echo " Unproductive medianDonor | Number of sequences | V.DEL | P1 | N | P2 | DEL.J | Total.Del | Total.N | Total.P | Median.CDR3 |
" >> $outputFile
+ echo " Unproductive medianDonor | Number of sequences | V.DEL | P1 | N | P2 | DEL.J | Total.Del | Total.N | Total.P | CDR3.Length |
" >> $outputFile
while read Sample unique VDEL P1 N1 P2 DELJ TotalDel TotalN TotalP median
do
echo "$Sample | $unique | $VDEL | $P1 | $N1 | $P2 | $DELJ | $TotalDel | $TotalN | $TotalP | - |
" >> $outputFile