# HG changeset patch
# User frogs
# Date 1629714070 0
# Node ID 7403d6c4f510228148b675251ca01d7ba503fdbb
"planemo upload for repository https://github.com/geraldinepascal/FROGS-wrappers/ commit 2024a13846ea6f9bd94ae62e3b2a5a3aba8cd304"
diff -r 000000000000 -r 7403d6c4f510 data_manager/FROGS_data_manager.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/FROGS_data_manager.py Mon Aug 23 10:21:10 2021 +0000
@@ -0,0 +1,199 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (C) 2021 INRA
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+
+__author__ = 'David Christiany Migale Jouy en Josas / Maria Bernard - Sigenae Jouy en Josas'
+__copyright__ = 'Copyright (C) 2020 INRAE'
+__license__ = 'GNU General Public License'
+__version__ = '3.2.3'
+__email__ = 'frogs-support@inrae.fr'
+__status__ = 'prod'
+
+# import json
+import argparse
+import os
+# import sys
+import tarfile
+import time
+import urllib
+
+from galaxy.util.json import from_json_string, to_json_string
+
+import requests
+
+# GALAXY_database=~/galaxy/galaxy-20.09/database
+# FROGS_data_manager.py --database=frogs_db_data --all_dbs=false \
+# --date=0 --amplicons=16S --bases=SILVA --filters=Pintail100 \
+# --only_last_versions=true \
+# --tool_data=/home/maria/galaxy/galaxy-20.09/tool-data \
+# --output $GALAXY_database/objects/e/7/7/dataset_e7766c39-8f36-450c-adf5-3e4ee8d5c562.dat
+
+
+def get_args():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-d", "--database")
+ parser.add_argument("--all_dbs")
+ parser.add_argument("--date")
+ parser.add_argument("--amplicons")
+ parser.add_argument("--bases")
+ parser.add_argument("--filters")
+ parser.add_argument("--only_last_versions")
+ parser.add_argument("--tool_data")
+ parser.add_argument("-o", "--output")
+ args = parser.parse_args()
+
+ return args
+
+
+def _add_data_table_entry(data_manager_dict, data_table_entry, data_table):
+ data_manager_dict['data_tables'] = data_manager_dict.get('data_tables', {})
+ data_manager_dict['data_tables'][data_table] = data_manager_dict['data_tables'].get(data_table, [])
+ data_manager_dict['data_tables'][data_table].append(data_table_entry)
+ return data_manager_dict
+
+
+def keep_only_last_version(db_index):
+ db_dict = dict()
+ for line in db_index:
+ db_type = "_".join(line[1:4]) if line[3] != "" else "_".join(line[1:3])
+ if db_type not in db_dict:
+ db_dict[db_type] = line
+ return list(db_dict.values())
+
+
+def frogs_sources(data_manager_dict, target_directory):
+
+ # variables
+ amplicons_list = []
+ bases_list = []
+ filters_list = []
+ if args.all_dbs == "false":
+ amplicons_list = [amplicon.lower().strip() for amplicon in args.amplicons.split(",") if amplicon != ""]
+ bases_list = [base.lower().strip() for base in args.bases.split(",") if base != ""]
+ filters_list = [filter.lower().strip() for filter in args.filters.split(",") if filter != ""]
+ bottom_date = int(args.date)
+ tool_data_path = args.tool_data
+
+ # get frogs database index
+ frogs_db_index_link = "http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv"
+ with requests.Session() as s:
+ download = s.get(frogs_db_index_link)
+ decoded_content = download.content.decode('utf-8')
+ db_index = decoded_content.splitlines()
+ db_index = [line.split("\t") for line in db_index[1:]]
+ db_index = [[line[0], line[1].lower(), line[2].lower(), line[3].lower()] + line[4:] for line in db_index]
+
+ # filter databases
+ if args.all_dbs == "false":
+ # filter by amplicons
+ if len(amplicons_list) != 0:
+ db_index = [line for line in db_index if any([amplicon in amplicons_list for amplicon in line[1].split(',')])]
+ # filter by base
+ if len(bases_list) != 0:
+ db_index = [line for line in db_index if line[2] in bases_list]
+ # filter by filters
+ if len(filters_list) != 0:
+ db_index = [line for line in db_index if line[3] in filters_list]
+ # filter by date
+ if bottom_date != 0:
+ db_index = [line for line in db_index if int(line[0]) >= bottom_date]
+ if args.only_last_versions == "true":
+ # keep only last version
+ db_index = keep_only_last_version(db_index)
+
+ # get frogs dbs
+ os.chdir(target_directory)
+ dir_name = "frogs_db_" + time.strftime("%Y%m%d")
+ os.mkdir(dir_name)
+ dbs = set([])
+ for line in db_index:
+ value = line[5]
+ name = value.replace("_", " ") if "_" not in line[4] else value.replace(line[4], "").replace("_", " ") + line[4]
+ link = line[6]
+ name_dir = "".join([line[6].replace(".tar.gz", "").split("/")[-1]])
+ file_path = tool_data_path + "/frogs_db/" + name_dir
+ if not os.path.exists(file_path): # if the file is not already in frogs_db directory
+
+ # download frogs db
+ dl_file = urllib.request.URLopener()
+ dl_file.retrieve(link, "tmp.tar.gz")
+
+ # unzip frogs db
+ with tarfile.open("tmp.tar.gz") as tar:
+ tar.extractall(dir_name)
+ tar.close()
+ os.remove('tmp.tar.gz')
+
+ # get fasta file path
+ tmp = set(os.listdir(dir_name))
+ new_db = dir_name + "/" + "".join(tmp.difference(dbs))
+ files = os.listdir(new_db)
+ fasta = "".join([file for file in files if file.endswith('.fasta')])
+ path = new_db + '/' + fasta
+ dbs = os.listdir(dir_name)
+ # release = value + "_" + time.strftime("%Y-%m-%d")
+ # date = time.strftime("%Y%m%d")
+ path = os.path.join(target_directory, path)
+
+ data_table_entry = dict(name=name, value=value, path=path)
+ _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_db")
+
+# def HVL_sources(data_manager_dict,target_directory):
+# HVL_dir = "http://genoweb.toulouse.inra.fr/frogs_databanks/HVL/ITS/UNITE_s_7.1_20112016"
+# os.chdir(target_directory)
+# for link in [HVL_dir + "/Unite_s_7.1_20112016_ITS1.fasta",HVL_dir + "/Unite_s_7.1_20112016_ITS2.fasta"]:
+# file_name=link.split("/")[-1].replace('.fasta',"_"+time.strftime("%Y-%m-%d")+".fasta")
+# dl_file = urllib.URLopener()
+# dl_file.retrieve(link,file_name)
+
+# #get fasta file path
+# path = os.path.join(target_directory,file_name)
+# if link.endswith('ITS1.fasta'):
+# name = "UNITE 7.1 ITS1 " + time.strftime("%Y-%m-%d")
+# elif link.endswith('ITS2.fasta'):
+# name = "UNITE 7.1 ITS2 " + time.strftime("%Y-%m-%d")
+# value=file_name.replace('.fasta','')
+
+# data_table_entry = dict(name = name, value = value, path=path)
+# _add_data_table_entry(data_manager_dict, data_table_entry, "frogs_HVL_db")
+
+
+def main():
+
+ # get args from command line
+ global args
+ args = get_args()
+
+ # Extract json file params
+ data_manager_dict = {}
+ filename = args.output
+ params = from_json_string(open(filename).read())
+ target_directory = params['output_data'][0]['extra_files_path']
+ os.mkdir(target_directory)
+
+ # if args.database=="frogs_db_data":
+ frogs_sources(data_manager_dict, target_directory)
+ # elif args.database=="HVL_db_data":
+ # HVL_sources(data_manager_dict,target_directory)
+
+ # save info to json file
+ open(filename, 'wt').write(to_json_string(data_manager_dict))
+
+
+if __name__ == "__main__":
+ main()
diff -r 000000000000 -r 7403d6c4f510 data_manager/FROGS_data_manager.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager/FROGS_data_manager.xml Mon Aug 23 10:21:10 2021 +0000
@@ -0,0 +1,75 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 0|[1-2]{1}[0-9]{3}[0-1]{1}[0-9]{1}([0-2]{1}[0-9]{1}|3[0-1]{1})
+
+
+
+
+
+
+
+
+
+
+
+
+
+ .. image:: static/images/FROGS_logo.png
+ :height: 144
+ :width: 110
+
+ FROGS datamanager allows to download preformated databases for FROGS Affiliation OTU tool.
+
+ All databases are available at http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/.
+
+ This tools is based on the `FROGS_databases.tsv.txt <http://genoweb.toulouse.inra.fr/frogs_databanks/assignation/FROGS_databases.tsv>`_, that we update each time a new database is formatted.
+
+ .. image:: static/images/FROGS_db.png
+ :height: 157
+ :width: 961
+
+ You may download all databases, but you may (should) filter whished database on different criteria:
+
+ - on a date, to download only last formated databases
+ - on an amplicon type
+ - on a base name
+ - eventually on a filtered name, this may be the case for example, for 16S SILVA database, for which we provide reference sequence with pintail score above a threshold
+
+ Last option allow you to download only last version of previously selected databases, indeed we provide reference database since 2016 with for example, around 1 version of SILVA per year.
+
+ **Contact**
+
+ Contacts: frogs-support@inrae.fr
+
+ Repositories: https://github.com/geraldinepascal/FROGS, https://github.com/geraldinepascal/FROGS-wrappers
+
+ Website: http://frogs.toulouse.inrae.fr/
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 7403d6c4f510 data_manager_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/data_manager_conf.xml Mon Aug 23 10:21:10 2021 +0000
@@ -0,0 +1,19 @@
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 7403d6c4f510 static/images/FROGS_db.png
Binary file static/images/FROGS_db.png has changed
diff -r 000000000000 -r 7403d6c4f510 static/images/FROGS_logo.png
Binary file static/images/FROGS_logo.png has changed
diff -r 000000000000 -r 7403d6c4f510 tool-data/frogs_db.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/frogs_db.loc.sample Mon Aug 23 10:21:10 2021 +0000
@@ -0,0 +1,53 @@
+# Copyright (C) 2014 INRA
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+#
+#
+#This is a sample file that enables tools FROGS_affiliations_OTU to use taxonomy database for
+#taxonomy affiliation. You will need to create or download Blast+ index and train your database
+#for RDP classifier these data files.
+#download link : http://genoweb.toulouse.inra.fr/frogs_databanks/assignation
+#Finally you will need to create frogs_db.loc file similar to this one in your galaxy
+#tool-data directory.The frogs_db.loc file has this format (longer white space characters are
+#TAB characters):
+#
+#
+#
+#First column will be the visible name in galaxy.
+#So, for example, if you had 16S silva 128 indexed stored in
+#/galaxy_databanks/16S/silva_128/
+#then the frogs_db.loc entry would look like this:
+#
+#silva 128 16S /galaxy_databanks/16S/silva_128/silva_128_16S.fasta
+#
+#and your /galaxy_databanks/16S/silva_128/ directory
+#would contain index files:
+#
+#-rw-r--r-- 1 mbernard FROGS 8097966 5 déc. 16:56 bergeyTrainingTree.xml
+#-rw-r--r-- 1 mbernard FROGS 1572981589 5 déc. 16:56 genus_wordConditionalProbList.txt
+#-rw-r--r-- 1 mbernard FROGS 1654 5 déc. 16:56 LICENCE.txt
+#-rw-r--r-- 1 mbernard FROGS 1072228 5 déc. 16:56 logWordPrior.txt
+#-rw-r--r-- 1 mbernard FROGS 940834335 5 déc. 16:56 silva_128_16S.fasta
+#-rw-r--r-- 1 mbernard FROGS 152606489 5 déc. 16:56 silva_128_16S.fasta.nhr
+#-rw-r--r-- 1 mbernard FROGS 6918588 5 déc. 16:56 silva_128_16S.fasta.nin
+#-rw-r--r-- 1 mbernard FROGS 205320030 5 déc. 16:56 silva_128_16S.fasta.nsq
+#-rw-r--r-- 1 mbernard FROGS 281 5 déc. 16:56 silva_128_16S.fasta.properties
+#-rw-r--r-- 1 mbernard FROGS 3420464 5 déc. 16:56 silva_128_16S.tax
+#-rw-r--r-- 1 mbernard FROGS 964048 5 déc. 16:57 wordConditionalProbIndexArr.txt
+#
+#
+#
+#
+# EXAMPLE FOR TEST :
+#ITS1_test ITS1_test ${__HERE__}/frogs_db_data/ITS1.rdp.fasta
diff -r 000000000000 -r 7403d6c4f510 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Aug 23 10:21:10 2021 +0000
@@ -0,0 +1,8 @@
+
+
+
+