Mercurial > repos > petr-novak > dante_ltr
comparison clean_ltr.R @ 3:6ae4a341d1f3 draft
"planemo upload commit 8bd6029a4de4a8f5031a5cc71303bb06217cc88a"
author | petr-novak |
---|---|
date | Tue, 03 May 2022 12:38:12 +0000 |
parents | 7b0bbe7477c4 |
children | 93d35ae65e1b |
comparison
equal
deleted
inserted
replaced
2:f131886ea194 | 3:6ae4a341d1f3 |
---|---|
106 TE_DLTP_info$seqs_representative, | 106 TE_DLTP_info$seqs_representative, |
107 word_size = word_size | 107 word_size = word_size |
108 ) | 108 ) |
109 | 109 |
110 TE_DLT_plus_DLP_info_multiplicity <- verify_based_on_multiplicity(TE_DLT_plus_DLP_info) | 110 TE_DLT_plus_DLP_info_multiplicity <- verify_based_on_multiplicity(TE_DLT_plus_DLP_info) |
111 | |
112 # create additional library from rank 2 verified by multiplicity | 111 # create additional library from rank 2 verified by multiplicity |
113 id_for_additional_library <- setdiff( | 112 id_for_additional_library <- setdiff( |
114 TE_DLT_plus_DLP_info_multiplicity$id_ok_mp_verified, | 113 TE_DLT_plus_DLP_info_multiplicity$id_ok_mp_verified, |
115 TE_DLT_plus_DLP_info_DLTP_verified$id_ok_verified) | 114 TE_DLT_plus_DLP_info_DLTP_verified$id_ok_verified) |
116 | 115 |
132 }else { | 131 }else { |
133 seq_representative <- TE_DLTP_info$seqs_representative | 132 seq_representative <- TE_DLTP_info$seqs_representative |
134 } | 133 } |
135 } | 134 } |
136 | 135 |
137 | |
138 # TE rank 3 | 136 # TE rank 3 |
139 TE_DL_info_DLTP_verified <- compare_TE_datasets( | 137 TE_DL_info_DLTP_verified <- compare_TE_datasets( |
140 s_te$DL, | 138 s_te$DL, |
141 TE_DLTP_info$seqs_representative, min_coverage = 0.98, | 139 TE_DLTP_info$seqs_representative, min_coverage = 0.98, |
142 ncpus = ncpus | 140 ncpus = ncpus |
167 c1 <- g$ID %in% ID[id_of_good_te] | 165 c1 <- g$ID %in% ID[id_of_good_te] |
168 c2 <- sapply(g$Parent, function(x)ifelse(length(x) == 0, "", x)) %in% ID[id_of_good_te] | 166 c2 <- sapply(g$Parent, function(x)ifelse(length(x) == 0, "", x)) %in% ID[id_of_good_te] |
169 | 167 |
170 gff_out <- g[c1 | c2] | 168 gff_out <- g[c1 | c2] |
171 | 169 |
172 | 170 gff_te <- gff_out[gff_out$type %in% "transposable_element"] |
173 writeXStringSet(seq_representative, paste0(opt$output, "_RM_lib.fasta")) | 171 gff_5ltr <- gff_out[gff_out$LTR %in% "5LTR"] |
172 gff_3ltr <- gff_out[gff_out$LTR %in% "3LTR"] | |
173 full_te <- getSeqNamed(s, gff_te) | |
174 ltr5 <- getSeqNamed(s, gff_5ltr) | |
175 ltr3 <- getSeqNamed(s, gff_3ltr) | |
176 inc <- gff_te$Rank != "DL" | |
177 | |
178 writeXStringSet(seq_representative, paste0(opt$output, "_RM_lib_non_redundant.fasta")) | |
179 writeXStringSet(full_te, paste0(opt$output, "_RM_lib_full_TE.fasta")) | |
180 writeXStringSet(ltr5, paste0(opt$output, "_RM_lib_5LTR.fasta")) | |
181 writeXStringSet(ltr3, paste0(opt$output, "_RM_lib_3LTR.fasta")) | |
182 | |
174 export(gff_out, paste0(opt$output, "_clean.gff3"), format = "gff3") | 183 export(gff_out, paste0(opt$output, "_clean.gff3"), format = "gff3") |
175 | 184 |
185 lv <- sort(unique(gff_te$Final_Classification)) | |
186 te_count <- table(factor(gff_te$Final_Classification, levels=lv)) | |
187 | |
188 pdf(paste0(opt$output, "_summary.pdf"), width = 13, height=8, pointsize = 10) | |
189 par(mfrow=c(1,2), mar=c(5,7,2,0), las=1) | |
190 boxplot(width(gff_te) ~ factor(gff_te$Final_Classification, levels=lv), | |
191 horizontal = TRUE, xlab="length[bp]", ylab="", | |
192 names = paste0(gsub("^.+[|]", "", lv), " (", te_count, ")"), | |
193 main = 'Full TE', at = seq_along(lv)*4 | |
194 ) | |
195 boxplot(width(gff_te[inc]) ~ factor(gff_te$Final_Classification[inc], levels=lv), | |
196 horizontal = TRUE, xlab="length[bp]", ylab="", | |
197 names = rep("", length(lv)), | |
198 main = 'Full TE', at = seq_along(lv)*4-1, add=TRUE, col=2 | |
199 ) | |
200 par(mar=c(5,0,2,7)) | |
201 boxplot(width(gff_5ltr) ~ factor(gff_5ltr$Final_Classification, levels=lv), | |
202 horizontal = TRUE, xlab="length[bp]", ylab="", | |
203 names = rep("", length(lv)), | |
204 main = "5'LTR", at = seq_along(lv)*4 | |
205 ) | |
206 boxplot(width(gff_5ltr[inc]) ~ factor(gff_5ltr$Final_Classification[inc], levels=lv), | |
207 horizontal = TRUE, xlab="length[bp]", ylab="", | |
208 names = rep("", length(lv)), | |
209 main = "5'LTR", at = seq_along(lv)*4-1, add=TRUE, col=2 | |
210 ) | |
211 legend('bottomright', col=c("grey","2"), legend=c("All TE", "TE with PBS/TSD"), pch=15) | |
212 dev.off() |