Mercurial > repos > devteam > data_manager_fetch_genome_dbkeys_all_fasta
changeset 8:14eb0fc65c62 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_genome_dbkeys_all_fasta commit b56485a8b386fc6f17219850e30e5656c159f231"
author | iuc |
---|---|
date | Wed, 16 Oct 2019 04:17:00 -0400 |
parents | b1bc53e9bbc5 |
children | 4d3eff1bc421 |
files | data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml test-data/phiX174.data_manager_json tool-data/dbkeys.loc.sample |
diffstat | 4 files changed, 27 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Thu Jun 15 13:14:56 2017 -0400 +++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Wed Oct 16 04:17:00 2019 -0400 @@ -3,6 +3,7 @@ import bz2 import gzip +import json import optparse import os import shutil @@ -11,7 +12,6 @@ import tempfile import zipfile from ftplib import FTP -from json import dumps, loads try: # For Python 3.0 and later @@ -118,20 +118,16 @@ def _write_sorted_fasta(sorted_names, fasta_offsets, sorted_fasta_filename, unsorted_fasta_filename): - unsorted_fh = open(unsorted_fasta_filename) - sorted_fh = open(sorted_fasta_filename, 'wb+') - - for name in sorted_names: - offset = fasta_offsets[name] - unsorted_fh.seek(offset) - sorted_fh.write(unsorted_fh.readline()) - while True: - line = unsorted_fh.readline() - if not line or line.startswith(">"): - break - sorted_fh.write(line) - unsorted_fh.close() - sorted_fh.close() + with open(unsorted_fasta_filename, 'rb') as unsorted_fh, open(sorted_fasta_filename, 'wb+') as sorted_fh: + for name in sorted_names: + offset = fasta_offsets[name] + unsorted_fh.seek(offset) + sorted_fh.write(unsorted_fh.readline()) + while True: + line = unsorted_fh.readline() + if not line or line.startswith(b">"): + break + sorted_fh.write(line) def _sort_fasta_as_is(fasta_filename, params): @@ -316,27 +312,15 @@ """ Download a file from a URL and return a list of filehandles from which to read the data. - >>> url = 'https://github.com/mvdbeek/tools-devteam/raw/data_manager/data_managers/data_manager_fetch_genome_dbkeys_all_fasta/test-data/test.tar' - >>> params = {'param_dict': {'reference_source': {'user_url': url}}} >>> tmp_dir = tempfile.mkdtemp() - >>> fh = download_from_url(params=params, tmp_dir=tmp_dir)[0][0] - >>> assert fh.readline().startswith('>FBtr0304171') - >>> url = 'https://github.com/mvdbeek/tools-devteam/raw/data_manager/data_managers/data_manager_fetch_genome_dbkeys_all_fasta/test-data/test.tar.bz2' + >>> url = 'https://github.com/galaxyproject/tools-iuc/raw/master/data_managers/data_manager_fetch_genome_dbkeys_all_fasta/test-data/test.tar.bz2' >>> params = {'param_dict': {'reference_source': {'user_url': url}}} >>> fh = download_from_url(params=params, tmp_dir=tmp_dir)[0][0] - >>> assert fh.readline().startswith('>FBtr0304171') - >>> url = 'https://github.com/mvdbeek/tools-devteam/raw/data_manager/data_managers/data_manager_fetch_genome_dbkeys_all_fasta/test-data/test.tar.gz' + >>> assert fh.readline().startswith('b>FBtr0304171') + >>> url = 'https://github.com/galaxyproject/tools-iuc/raw/master/data_managers/data_manager_fetch_genome_dbkeys_all_fasta/test-data/phiX174.fasta' >>> params = {'param_dict': {'reference_source': {'user_url': url}}} >>> fh = download_from_url(params=params, tmp_dir=tmp_dir)[0][0] - >>> assert fh.readline().startswith('>FBtr0304171') - >>> url = 'https://github.com/mvdbeek/tools-devteam/raw/data_manager/data_managers/data_manager_fetch_genome_dbkeys_all_fasta/test-data/test.zip' - >>> params = {'param_dict': {'reference_source': {'user_url': url}}} - >>> fh = download_from_url(params=params, tmp_dir=tmp_dir)[0][0] - >>> assert fh.readline().startswith('>FBtr0304171') - >>> url = 'https://raw.githubusercontent.com/galaxyproject/tools-devteam/master/data_managers/data_manager_fetch_genome_dbkeys_all_fasta/test-data/phiX174.fasta' - >>> params = {'param_dict': {'reference_source': {'user_url': url}}} - >>> fh = download_from_url(params=params, tmp_dir=tmp_dir)[0][0] - >>> assert fh.readline().startswith('>phiX174') + >>> assert fh.readline().startswith('b>phiX174') """ urls = filter(bool, [x.strip() for x in params['param_dict']['reference_source']['user_url'].split('\n')]) return [get_stream_reader(urlopen(url), tmp_dir) for url in urls] @@ -348,7 +332,7 @@ if isinstance(input_filename, list): fasta_readers = [get_stream_reader(open(filename, 'rb'), tmp_dir) for filename in input_filename] else: - fasta_readers = get_stream_reader(open(input_filename), tmp_dir) + fasta_readers = get_stream_reader(open(input_filename, 'rb'), tmp_dir) return fasta_readers @@ -468,7 +452,8 @@ filename = args[0] - params = loads(open(filename).read()) + with open(filename) as fh: + params = json.load(fh) target_directory = params['output_data'][0]['extra_files_path'] os.mkdir(target_directory) data_manager_dict = {} @@ -504,7 +489,8 @@ finally: cleanup_before_exit(tmp_dir) # save info to json file - open(filename, 'wb').write(dumps(data_manager_dict).encode()) + with open(filename, 'w') as fh: + json.dump(data_manager_dict, fh, sort_keys=True) if __name__ == "__main__":
--- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Thu Jun 15 13:14:56 2017 -0400 +++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Wed Oct 16 04:17:00 2019 -0400 @@ -1,4 +1,4 @@ -<tool id="data_manager_fetch_genome_all_fasta_dbkey" name="Create DBKey and Reference Genome" version="0.0.2" tool_type="manage_data"> +<tool id="data_manager_fetch_genome_all_fasta_dbkey" name="Create DBKey and Reference Genome" version="0.0.3" tool_type="manage_data"> <description>fetching</description> <command detect_errors="exit_code"><![CDATA[ python '$__tool_directory__/data_manager_fetch_genome_all_fasta_dbkeys.py' @@ -78,9 +78,10 @@ <tests> <!-- TODO: need some way to test that new entry was added to data table --> <test> - <param name="dbkey" value="anoGam1"/> - <param name="sequence_name" value=""/> - <param name="sequence_id" value=""/> + <param name="dbkey" value="phiX174"/> + <param name="dbkey_source|dbkey_source_selector" value="new"/> + <param name="sequence_name" value="phiX174 sequence name"/> + <param name="sequence_id" value="phix174"/> <param name="reference_source_selector" value="history"/> <param name="input_fasta" value="phiX174.fasta"/> <param name="sort_selector" value="as_is"/>
--- a/test-data/phiX174.data_manager_json Thu Jun 15 13:14:56 2017 -0400 +++ b/test-data/phiX174.data_manager_json Wed Oct 16 04:17:00 2019 -0400 @@ -1,1 +1,1 @@ -{"data_tables": {"all_fasta": [{"path": "anoGam1.fa", "dbkey": "anoGam1", "name": "A. gambiae Feb. 2003 (IAGEC MOZ2/anoGam1) (anoGam1)", "value": "anoGam1"}]}} \ No newline at end of file +{"data_tables": {"__dbkeys__": [{"len_path": "phiX174.len", "name": "phiX174", "value": "phiX174"}], "all_fasta": [{"dbkey": "phiX174", "name": "phiX174 sequence name", "path": "phix174.fa", "value": "phix174"}]}} \ No newline at end of file