Mercurial > repos > dlalgroup > simtext_app
annotate pubmed_by_queries.R @ 2:d7b190591e63 draft default tip
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
| author | dlalgroup | 
|---|---|
| date | Thu, 24 Sep 2020 05:44:58 +0000 | 
| parents | 34ed44f3f85c | 
| children | 
| rev | line source | 
|---|---|
| 0 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 1 #!/usr/bin/env Rscript | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 2 #tool: pubmed_by_queries | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 3 # | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 4 #This tool uses a set of search queries to download a defined number of abstracts or PMIDs for search query from PubMed. | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 5 #PubMed's search rules and syntax apply. | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 6 # | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 7 #Input: Tab-delimited table with search queries in a column starting with "ID_", e.g. "ID_gene" if search queries are genes. | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 8 # | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 9 # Output: Input table with additional columns with PMIDs or abstracts (--abstracts) from PubMed. | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 10 # | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 11 #Usage: $ pubmed_by_queries.R [-h] [-i INPUT] [-o OUTPUT] [-n NUMBER] [-a] [-k KEY] | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 12 # | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 13 # optional arguments: | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 14 # -h, --help show this help message and exit | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 15 # -i INPUT, --input INPUT input file name. add path if file is not in working directory | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 16 # -o OUTPUT, --output OUTPUT output file name. [default "pubmed_by_queries_output"] | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 17 # -n NUMBER, --number NUMBER number of PMIDs or abstracts to save per ID [default "5"] | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 18 # -a, --abstract if abstracts instead of PMIDs should be retrieved use --abstracts | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 19 # -k KEY, --key KEY if NCBI API key is available, add it to speed up the fetching of pubmed data | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 20 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 21 if ( '--install_packages' %in% commandArgs()) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 22 print('Installing packages') | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 23 if (!require('argparse')) install.packages('argparse',repo="http://cran.rstudio.com/"); | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 24 if (!require('easyPubMed')) install.packages('easyPubMed',repo="http://cran.rstudio.com/"); | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 25 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 26 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 27 suppressPackageStartupMessages(library("argparse")) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 28 suppressPackageStartupMessages(library("easyPubMed")) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 29 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 30 parser <- ArgumentParser() | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 31 parser$add_argument("-i", "--input", | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 32 help = "input fie name. add path if file is not in working directory") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 33 parser$add_argument("-o", "--output", default="pubmed_by_queries_output", | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 34 help = "output file name. [default \"%(default)s\"]") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 35 parser$add_argument("-n", "--number", type="integer", default=5, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 36 help="Number of PMIDs (or abstracts) to save per ID. [default \"%(default)s\"]") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 37 parser$add_argument("-a", "--abstract", action="store_true", default=FALSE, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 38 help="if abstracts instead of PMIDs should be retrieved use --abstracts ") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 39 parser$add_argument("-k", "--key", type="character", | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 40 help="if ncbi API key is available, add it to speed up the download of pubmed data") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 41 parser$add_argument("--install_packages", action="store_true", default=FALSE, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 42 help="If you want to auto install missing required packages.") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 43 args <- parser$parse_args() | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 44 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 45 MAX_WEB_TRIES = 100 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 46 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 47 data = read.delim(args$input, stringsAsFactors=FALSE) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 48 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 49 id_col_index <- grep("ID_", names(data)) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 50 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 51 pubmed_data_in_table <- function(data, row, query, number, key, abstract){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 52 if (is.null(query)){print(data)} | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 53 pubmed_search <- get_pubmed_ids(query, api_key = key) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 54 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 55 if(as.numeric(pubmed_search$Count) == 0){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 56 cat("No PubMed result for the following query: ", query, "\n") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 57 return(data) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 58 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 59 } else if (abstract == FALSE) { # fetch PMIDs | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 60 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 61 myPubmedURL <- paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?", | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 62 "db=pubmed&retmax=", number, "&term=", pubmed_search$OriginalQuery, "&usehistory=n", sep = "") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 63 # get ids | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 64 idXML <- c() | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 65 for (i in 1:MAX_WEB_TRIES){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 66 tryCatch({ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 67 IDconnect <- suppressWarnings(url(myPubmedURL, open = "rb", encoding = "UTF8")) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 68 idXML <- suppressWarnings(readLines(IDconnect, warn = FALSE, encoding = "UTF8")) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 69 suppressWarnings(close(IDconnect)) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 70 break | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 71 }, error = function(e) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 72 print(paste('Error getting URL, sleeping',2*i,'seconds.')) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 73 print(e) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 74 Sys.sleep(time = 2*i) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 75 }) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 76 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 77 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 78 PMIDs = c() | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 79 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 80 for (i in 1:length(idXML)) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 81 if (grepl("^<Id>", idXML[i])) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 82 pmid <- custom_grep(idXML[i], tag = "Id", format = "char") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 83 PMIDs <- c(PMIDs, as.character(pmid[1])) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 84 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 85 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 86 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 87 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 88 if(length(PMIDs)>0){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 89 data[row,sapply(1:length(PMIDs),function(i){paste0("PMID_",i)})] <- PMIDs | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 90 cat(length(PMIDs)," PMIDs for ",query, " are added in the table.", "\n") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 91 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 92 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 93 return(data) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 94 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 95 } else if (abstract == TRUE) { # fetch abstracts and title text | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 96 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 97 efetch_url = paste("https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?", | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 98 "db=pubmed&WebEnv=", pubmed_search$WebEnv, "&query_key=", pubmed_search$QueryKey, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 99 "&retstart=", 0, "&retmax=", number, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 100 "&rettype=", "null","&retmode=", "xml", sep = "") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 101 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 102 api_key <- pubmed_search$APIkey | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 103 if (!is.null(api_key)) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 104 efetch_url <- paste(efetch_url, "&api_key=", api_key, sep = "") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 105 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 106 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 107 # initialize | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 108 out.data <- NULL | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 109 try_num <- 1 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 110 t_0 <- Sys.time() | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 111 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 112 # Try to fetch results | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 113 while(is.null(out.data)) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 114 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 115 # Timing check: kill at 3 min | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 116 if (try_num > 1){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 117 Sys.sleep(time = 2*try_num) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 118 cat("Problem to receive PubMed data or error is received. Please wait. Try number:",try_num,"\n") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 119 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 120 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 121 t_1 <- Sys.time() | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 122 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 123 if(as.numeric(difftime(t_1, t_0, units = "mins")) > 3){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 124 message("Killing the request! Something is not working. Please, try again later","\n") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 125 return(data) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 126 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 127 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 128 # ENTREZ server connect | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 129 out.data <- tryCatch({ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 130 tmpConnect <- suppressWarnings(url(efetch_url, open = "rb", encoding = "UTF8")) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 131 suppressWarnings(readLines(tmpConnect, warn = FALSE, encoding = "UTF8")) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 132 }, error = function(e) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 133 print(e) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 134 }, finally = { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 135 try(suppressWarnings(close(tmpConnect)), silent = TRUE) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 136 }) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 137 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 138 # Check if error | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 139 if (!is.null(out.data) && | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 140 class(out.data) == "character" && | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 141 grepl("<ERROR>", substr(paste(utils::head(out.data, n = 100), collapse = ""), 1, 250))) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 142 out.data <- NULL | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 143 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 144 try_num <- try_num + 1 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 145 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 146 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 147 if (is.null(out.data)) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 148 message("Killing the request! Something is not working. Please, try again later","\n") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 149 return(data) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 150 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 151 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 152 # process xml data | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 153 xml_data <- paste(out.data, collapse = "") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 154 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 155 # articles to list | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 156 xml_data <- strsplit(xml_data, "<PubmedArticle(>|[[:space:]]+?.*>)")[[1]][-1] | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 157 xml_data <- sapply(xml_data, function(x) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 158 #trim extra stuff at the end of the record | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 159 if (!grepl("</PubmedArticle>$", x)) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 160 x <- sub("(^.*</PubmedArticle>).*$", "\\1", x) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 161 # Rebuid XML structure and proceed | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 162 x <- paste("<PubmedArticle>", x) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 163 gsub("[[:space:]]{2,}", " ", x)}, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 164 USE.NAMES = FALSE, simplify = TRUE) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 165 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 166 #titles | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 167 titles = sapply(xml_data, function(x){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 168 x = custom_grep(x, tag="ArticleTitle", format="char") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 169 x <- gsub("</{0,1}i>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 170 x <- gsub("</{0,1}b>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 171 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 172 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 173 if (length(x) > 1){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 174 x <- paste(x, collapse = " ", sep = " ") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 175 } else if (length(x) < 1) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 176 x <- NA | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 177 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 178 x | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 179 }, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 180 USE.NAMES = FALSE, simplify = TRUE) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 181 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 182 # abstracts | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 183 abstract.text = sapply(xml_data, function(x){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 184 custom_grep(x, tag="AbstractText", format="char")}, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 185 USE.NAMES = FALSE, simplify = TRUE) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 186 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 187 abstracts <- sapply(abstract.text, function(x){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 188 if (length(x) > 1){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 189 x <- paste(x, collapse = " ", sep = " ") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 190 x <- gsub("</{0,1}i>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 191 x <- gsub("</{0,1}b>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 192 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 193 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 194 } else if (length(x) < 1) { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 195 x <- NA | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 196 } else { | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 197 x <- gsub("</{0,1}i>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 198 x <- gsub("</{0,1}b>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 199 x <- gsub("</{0,1}sub>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 200 x <- gsub("</{0,1}exp>", "", x, ignore.case = T) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 201 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 202 x | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 203 }, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 204 USE.NAMES = FALSE, simplify = TRUE) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 205 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 206 #add title to abstracts | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 207 if (length(titles) == length(abstracts)){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 208 abstracts = paste(titles, abstracts) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 209 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 210 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 211 #add abstracts to data frame | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 212 if(length(abstracts)>0){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 213 data[row,sapply(1:length(abstracts),function(i){paste0("ABSTRACT_",i)})] <- abstracts | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 214 cat(length(abstracts)," abstracts for ",query, " are added in the table.", "\n") | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 215 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 216 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 217 return(data) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 218 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 219 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 220 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 221 for(i in 1:nrow(data)){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 222 data = tryCatch(pubmed_data_in_table(data= data, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 223 row= i, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 224 query= data[i,id_col_index], | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 225 number= args$number, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 226 key= args$key, | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 227 abstract= args$abstract), error=function(e){ | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 228 print('main error') | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 229 print(e) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 230 Sys.sleep(5) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 231 }) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 232 } | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 233 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 234 write.table(data, args$output, append = FALSE, sep = '\t', row.names = FALSE, col.names = TRUE) | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 235 | 
| 
34ed44f3f85c
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
 dlalgroup parents: diff
changeset | 236 | 
