Mercurial > repos > ecology > retrieve_bold
annotate retrieve_bold.R @ 0:0608a259f2da draft default tip
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
author | ecology |
---|---|
date | Fri, 21 Jun 2024 08:55:50 +0000 |
parents | |
children |
rev | line source |
---|---|
0
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
1 #!/bin/Rscript |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
2 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
3 library(bold) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
4 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
5 args = commandArgs(trailingOnly=TRUE) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
6 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
7 raw_marker_list <- paste(args[2],args[3],args[4],args[5],args[6], sep= ",") |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
8 marker_list_W_none <- unique(strsplit(raw_marker_list, ",")[[1]]) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
9 marker_list <- marker_list_W_none[marker_list_W_none != "None"] |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
10 cat("researched marker(s):", marker_list, "\n\n") |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
11 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
12 #Functions to retrieve the subtaxa of each family ((get)subtaxa) and search in Bold and download the available sequences of each subtaxa (get_fasta) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
13 get_fasta<-function(taxon,filename,arg_mark){ |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
14 bold_res<-bold_seqspec(taxon=taxon) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
15 cat(taxon, "marker list:", unique(bold_res$markercode), "\n") |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
16 x <- data.frame() |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
17 for (mark in arg_mark){x <- rbind(x, bold_res[bold_res$markercode == mark,])} |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
18 if (dim(x)[1] == 0){return(cat("no sequences were found with selected marker(s) for", taxon, "see existing marker list above", "\n"))} |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
19 x[x==""] <- NA |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
20 b_acc <- x$processid |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
21 b_tax <- ifelse(!is.na(x$species_name),x$species_name,ifelse(!is.na(x$genus_name),x$genus_name,ifelse( |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
22 !is.na(x$family_name),x$family_name,ifelse( |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
23 !is.na(x$order_name),x$order_name,ifelse( |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
24 !is.na(x$class_name),x$class_name,x$phylum_name))))) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
25 b_mark <- x$markercode |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
26 n_acc <- ifelse(!is.na(x$genbank_accession),ifelse(!is.na(x$genbank_accession),paste0("|",x$genbank_accession),""),"") |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
27 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
28 seq <- x$nucleotides |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
29 seqname <- paste(b_acc,b_tax,b_mark,sep="|") |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
30 seqname <- paste0(seqname,n_acc) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
31 Y <- cbind(seqname,seq) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
32 colnames(Y) <- c("name","seq") |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
33 fastaLines = c() |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
34 for (rowNum in 1:nrow(Y)){ |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
35 fastaLines = c(fastaLines, as.character(paste(">", Y[rowNum,"name"], sep = ""))) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
36 fastaLines = c(fastaLines,as.character(Y[rowNum,"seq"])) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
37 } |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
38 writeLines(fastaLines,filename) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
39 } |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
40 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
41 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
42 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
43 taxlist <- readLines(file(as.character(args[1]))) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
44 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
45 for (i in 1:length(taxlist)) { |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
46 cat("Processing ", taxlist[i], "\n") |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
47 tryCatch({ |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
48 get_fasta(taxlist[i],paste0(taxlist[i],"bold",".fasta"),marker_list)}, |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
49 error=function(e){cat("ERROR :",conditionMessage(e), "\n")} |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
50 ) |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
51 } |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
52 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
53 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
54 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
55 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
56 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
57 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
58 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
59 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
60 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
61 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
62 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
63 |
0608a259f2da
planemo upload for repository https://github.com/wpearman1996/MARES_database_pipeline/tree/master commit 853190e24a7acfe02bbfb1c392ac55f9b9a9e7da
ecology
parents:
diff
changeset
|
64 |