annotate igblast/igblast.r @ 54:81b3eb11ed2c draft

"planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
author rhpvorderman
date Tue, 16 Nov 2021 15:42:32 +0000
parents 124b7fd92a3e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
54
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
1 args <- commandArgs(trailingOnly = TRUE)
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
2
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
3 infile=args[1]
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
4 outfile=args[2]
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
5
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
6 blasted = read.table(infile, header=T, sep="\t", fill=T, stringsAsFactors=F, comment.char="")
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
7
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
8 blasted$ID = 1:nrow(blasted)
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
9 blasted$VDJ.Frame = "Out-of-frame"
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
10
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
11 search = blasted$inFrame == "true" & blasted$noStop == "false"
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
12 if(sum(search) > 0){
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
13 blasted[search ,]$VDJ.Frame = "In-frame with stop codon"
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
14 }
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
15
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
16 search = blasted$inFrame == "true" & blasted$noStop == "true"
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
17 if(sum(search) > 0){
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
18 blasted[search ,]$VDJ.Frame = "In-frame"
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
19 }
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
20
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
21 blasted$Top.V.Gene = blasted$vSegment
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
22 blasted$Top.D.Gene = blasted$dSegment
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
23 blasted$Top.J.Gene = blasted$jSegment
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
24 blasted$CDR1.Seq = blasted$cdr1aa
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
25 blasted$CDR1.Length = nchar(blasted$CDR1.Seq)
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
26 blasted$CDR2.Seq = blasted$cdr2aa
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
27 blasted$CDR2.Length = nchar(blasted$CDR2.Seq)
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
28 blasted$CDR3.Seq = blasted$cdr3aa
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
29 blasted$CDR3.Length = nchar(blasted$CDR3.Seq)
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
30 blasted$CDR3.Seq.DNA = blasted$cdr3nt
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
31 blasted$CDR3.Length.DNA = nchar(blasted$CDR3.Seq.DNA)
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
32 blasted$Strand = "+/-"
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
33 blasted$CDR3.Found.How = "found"
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
34
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
35 search = blasted$cdr3nt == ""
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
36 if(sum(search) > 0){
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
37 blasted[search,]$CDR3.Found.How = "NOT_FOUND"
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
38 }
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
39
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
40 blasted$AA.JUNCTION = blasted$CDR3.Seq
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
41
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
42 n = c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "Functionality", "AA.JUNCTION")
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
43
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
44 n[!(n %in% names(blasted))]
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
45
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
46 blasted = blasted[,c("X.reads_count", "ID", "VDJ.Frame", "Top.V.Gene", "Top.D.Gene", "Top.J.Gene", "CDR1.Seq", "CDR1.Length", "CDR2.Seq", "CDR2.Length", "CDR3.Seq", "CDR3.Length", "CDR3.Seq.DNA", "CDR3.Length.DNA", "Strand", "CDR3.Found.How", "AA.JUNCTION")]
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
47
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
48 names(blasted) = c("frequency.count", "ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
49
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
50 #duplicate rows based on frequency.count
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
51 blasted = blasted[rep(seq_len(nrow(blasted)), blasted$frequency.count),]
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
52 blasted$ID = 1:nrow(blasted)
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
53
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
54 blasted = blasted[,c("ID", "VDJ Frame", "Top V Gene", "Top D Gene", "Top J Gene", "CDR1 Seq", "CDR1 Length", "CDR2 Seq", "CDR2 Length", "CDR3 Seq", "CDR3 Length", "CDR3 Seq DNA", "CDR3 Length DNA", "Strand", "CDR3 Found How", "AA JUNCTION")]
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
55
81b3eb11ed2c "planemo upload commit 6c0195cc4de6a34e2c46d875be4fc2157a21cdf6"
rhpvorderman
parents: 52
diff changeset
56 write.table(blasted, outfile, quote=F, sep="\t", row.names=F, col.names=T)