Previous changeset 3:86fa71e9b427 (2016-08-26) Next changeset 5:a246b4f11133 (2017-04-05) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_genome_dbkeys_all_fasta commit 8652f36a3a3838dca989426961561e81432acf4f |
modified:
data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml |
removed:
test-data/test.tar test-data/test.tar.gz test-data/test.zip |
b |
diff -r 86fa71e9b427 -r 60994ca04177 data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py --- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Fri Aug 26 12:46:47 2016 -0400 +++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Tue Apr 04 17:13:31 2017 -0400 |
[ |
b'@@ -1,35 +1,36 @@\n #!/usr/bin/env python\n-#Dan Blankenberg\n+# Dan Blankenberg\n \n-import sys\n+import bz2\n+import gzip\n+import optparse\n import os\n-import tempfile\n import shutil\n-import optparse\n-from ftplib import FTP\n+import sys\n import tarfile\n+import tempfile\n import zipfile\n-import gzip\n-import bz2\n+from ftplib import FTP\n+from json import dumps, loads\n+\n try:\n # For Python 3.0 and later\n- from urllib.request import urlopen\n from io import BytesIO as StringIO\n from io import UnsupportedOperation\n+ from urllib.request import urlopen\n except ImportError:\n- # Fall back to Python 2\'s urllib2\n+ # Fall back to Python 2 imports\n+ from StringIO import StringIO\n from urllib2 import urlopen\n- from StringIO import StringIO\n+\n UnsupportedOperation = AttributeError\n-from json import loads, dumps\n+\n+CHUNK_SIZE = 2 ** 20 # 1mb\n \n \n-CHUNK_SIZE = 2**20 # 1mb\n-\n-\n-def cleanup_before_exit( tmp_dir ):\n- if tmp_dir and os.path.exists( tmp_dir ):\n- shutil.rmtree( tmp_dir )\n+def cleanup_before_exit(tmp_dir):\n+ if tmp_dir and os.path.exists(tmp_dir):\n+ shutil.rmtree(tmp_dir)\n \n \n def stop_err(msg):\n@@ -37,20 +38,20 @@\n sys.exit(1)\n \n \n-def get_dbkey_dbname_id_name( params, dbkey_description=None ):\n+def get_dbkey_dbname_id_name(params, dbkey_description=None):\n dbkey = params[\'param_dict\'][\'dbkey_source\'][\'dbkey\']\n- #TODO: ensure sequence_id is unique and does not already appear in location file\n+ # TODO: ensure sequence_id is unique and does not already appear in location file\n sequence_id = params[\'param_dict\'][\'sequence_id\']\n if not sequence_id:\n- sequence_id = dbkey #uuid.uuid4() generate and use an uuid instead?\n- \n+ sequence_id = dbkey # uuid.uuid4() generate and use an uuid instead?\n+\n if params[\'param_dict\'][\'dbkey_source\'][\'dbkey_source_selector\'] == \'new\':\n dbkey_name = params[\'param_dict\'][\'dbkey_source\'][\'dbkey_name\']\n if not dbkey_name:\n dbkey_name = dbkey\n else:\n dbkey_name = None\n- \n+\n sequence_name = params[\'param_dict\'][\'sequence_name\']\n if not sequence_name:\n sequence_name = dbkey_description\n@@ -59,146 +60,146 @@\n return dbkey, dbkey_name, sequence_id, sequence_name\n \n \n-def _get_files_in_ftp_path( ftp, path ):\n+def _get_files_in_ftp_path(ftp, path):\n path_contents = []\n- ftp.retrlines( \'MLSD %s\' % ( path ), path_contents.append )\n- return [ line.split( \';\' )[ -1 ].lstrip() for line in path_contents ]\n+ ftp.retrlines(\'MLSD %s\' % (path), path_contents.append)\n+ return [line.split(\';\')[-1].lstrip() for line in path_contents]\n \n \n-def _get_stream_readers_for_tar( fh, tmp_dir ):\n- fasta_tar = tarfile.open( fileobj=fh, mode=\'r:*\' )\n+def _get_stream_readers_for_tar(fh, tmp_dir):\n+ fasta_tar = tarfile.open(fileobj=fh, mode=\'r:*\')\n return [x for x in [fasta_tar.extractfile(member) for member in fasta_tar.getmembers()] if x]\n \n \n-def _get_stream_readers_for_zip( fh, tmp_dir ):\n+def _get_stream_readers_for_zip(fh, tmp_dir):\n """\n Unpacks all archived files in a zip file.\n Individual files will be concatenated (in _stream_fasta_to_file)\n """\n- fasta_zip = zipfile.ZipFile( fh, \'r\' )\n+ fasta_zip = zipfile.ZipFile(fh, \'r\')\n rval = []\n for member in fasta_zip.namelist():\n- fasta_zip.extract( member, tmp_dir )\n- rval.append( open( os.path.join( tmp_dir, member ), \'rb\' ) )\n+ fasta_zip.extract(member, tmp_dir)\n+ rval.append(open(os.path.join(tmp_dir, member), \'rb\'))\n return rval\n \n \n-def _get_stream_readers_for_gzip( fh, tmp_dir ):\n- return [ gzip.GzipFile( fileobj=fh, mode=\'rb\') ]\n+def _get_stream_readers_for_gzip(fh, tmp_dir):\n+ return [gzip.GzipFile(fileobj=fh, mode=\'rb\')]\n \n \n-def _get_stream_readers_for_bz2( fh, tmp_dir ):\n- return [ bz2.BZ2File( fh.name, \'rb\') ]\n+def _get_stream_readers_for_bz2(fh, tmp_dir):\n+ return [bz2.BZ2File(fh.name, \'rb\')]\n \n \n-def sort_fasta( fasta_file'..b'd_from_ucsc, ncbi=download_from_ncbi, url=download_from_url, history=download_from_history, directory=copy_from_directory )\n-\n-SORTING_METHODS = dict( as_is=_sort_fasta_as_is, lexicographical=_sort_fasta_lexicographical, gatk=_sort_fasta_gatk, custom=_sort_fasta_custom )\n+REFERENCE_SOURCE_TO_DOWNLOAD = dict(ucsc=download_from_ucsc, ncbi=download_from_ncbi, url=download_from_url, history=download_from_history,\n+ directory=copy_from_directory)\n+SORTING_METHODS = dict(as_is=_sort_fasta_as_is, lexicographical=_sort_fasta_lexicographical, gatk=_sort_fasta_gatk, custom=_sort_fasta_custom)\n \n \n def main():\n- #Parse Command Line\n parser = optparse.OptionParser()\n- parser.add_option( \'-d\', \'--dbkey_description\', dest=\'dbkey_description\', action=\'store\', type="string", default=None, help=\'dbkey_description\' )\n+ parser.add_option(\'-d\', \'--dbkey_description\', dest=\'dbkey_description\', action=\'store\', type="string", default=None, help=\'dbkey_description\')\n (options, args) = parser.parse_args()\n- \n+\n filename = args[0]\n- \n- params = loads( open( filename ).read() )\n- target_directory = params[ \'output_data\' ][0][\'extra_files_path\']\n- os.mkdir( target_directory )\n+\n+ params = loads(open(filename).read())\n+ target_directory = params[\'output_data\'][0][\'extra_files_path\']\n+ os.mkdir(target_directory)\n data_manager_dict = {}\n- \n- dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name( params, dbkey_description=options.dbkey_description ) \n- \n- if dbkey in [ None, \'\', \'?\' ]:\n- raise Exception( \'"%s" is not a valid dbkey. You must specify a valid dbkey.\' % ( dbkey ) )\n+\n+ dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name(params, dbkey_description=options.dbkey_description)\n+\n+ if dbkey in [None, \'\', \'?\']:\n+ raise Exception(\'"%s" is not a valid dbkey. You must specify a valid dbkey.\' % (dbkey))\n \n # Create a tmp_dir, in case a zip file needs to be uncompressed\n tmp_dir = tempfile.mkdtemp()\n- #Fetch the FASTA\n+ # Fetch the FASTA\n try:\n- REFERENCE_SOURCE_TO_DOWNLOAD[ params[\'param_dict\'][\'reference_source\'][\'reference_source_selector\'] ]( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir )\n+ reference_source = params[\'param_dict\'][\'reference_source\'][\'reference_source_selector\']\n+ fasta_readers = REFERENCE_SOURCE_TO_DOWNLOAD[reference_source](data_manager_dict=data_manager_dict,\n+ params=params,\n+ target_directory=target_directory,\n+ dbkey=dbkey,\n+ dbkey_name=dbkey_name,\n+ sequence_id=sequence_id,\n+ sequence_name=sequence_name,\n+ tmp_dir=tmp_dir)\n+ if fasta_readers:\n+ add_fasta_to_table(data_manager_dict=data_manager_dict,\n+ fasta_readers=fasta_readers,\n+ target_directory=target_directory,\n+ dbkey=dbkey,\n+ dbkey_name=dbkey_name,\n+ sequence_id=sequence_id,\n+ sequence_name=sequence_name,\n+ params=params)\n+\n finally:\n cleanup_before_exit(tmp_dir)\n- #save info to json file\n- open( filename, \'wb\' ).write( dumps( data_manager_dict ).encode() )\n- \n+ # save info to json file\n+ open(filename, \'wb\').write(dumps(data_manager_dict).encode())\n+\n+\n if __name__ == "__main__":\n main()\n' |
b |
diff -r 86fa71e9b427 -r 60994ca04177 data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml --- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Fri Aug 26 12:46:47 2016 -0400 +++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Tue Apr 04 17:13:31 2017 -0400 |
[ |
@@ -1,11 +1,12 @@ <tool id="data_manager_fetch_genome_all_fasta_dbkey" name="Create DBKey and Reference Genome" version="0.0.2" tool_type="manage_data"> <description>fetching</description> - <command><![CDATA[ - python "$__tool_directory__"/data_manager_fetch_genome_all_fasta_dbkeys.py "${out_file}" + <command detect_errors="exit_code"><![CDATA[ + python '$__tool_directory__/data_manager_fetch_genome_all_fasta_dbkeys.py' + '${out_file}' #if str( $dbkey_source.dbkey_source_selector ) == 'existing': - --dbkey_description ${ dbkey_source.dbkey.get_display_text() } + --dbkey_description '${ dbkey_source.dbkey.get_display_text() }' #else - --dbkey_description "${ dbkey_source.dbkey_name or $dbkey_source.dbkey }" + --dbkey_description '${ dbkey_source.dbkey_name or $dbkey_source.dbkey }' #end if ]]></command> <inputs> @@ -18,12 +19,12 @@ <param name="dbkey" type="genomebuild" label="DBKEY to assign to data" /> </when> <when value="new"> - <param type="text" name="dbkey" value="" label="dbkey" optional="False" /> - <param type="text" name="dbkey_name" value="" label="Display name for dbkey" /> + <param name="dbkey" type="text" value="" optional="false" label="dbkey" /> + <param name="dbkey_name" type="text" value="" label="Display name for dbkey" /> </when> </conditional> - <param type="text" name="sequence_name" value="" label="Name of sequence" /> - <param type="text" name="sequence_id" value="" label="ID for sequence" /> + <param name="sequence_name" type="text" value="" label="Name of sequence" /> + <param name="sequence_id" type="text" value="" label="ID for sequence" /> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Choose the source for the reference genome"> <option value="ucsc" selected="True">UCSC</option> @@ -33,20 +34,20 @@ <option value="directory">Directory on Server</option> </param> <when value="ucsc"> - <param type="text" name="requested_dbkey" value="" label="UCSC's DBKEY for source FASTA" optional="False" /> + <param name="requested_dbkey" type="text" value="" optional="false" label="UCSC's DBKEY for source FASTA" /> </when> <when value="ncbi"> - <param type="text" name="requested_identifier" value="" label="NCBI identifier/accession" help="Identifiers (e.g 667699573) or accessions (e.g AC020606.7) may be used" optional="False" /> + <param name="requested_identifier" type="text" value="" optional="false" label="NCBI identifier/accession" help="Identifiers (e.g 667699573) or accessions (e.g AC020606.7) may be used" /> </when> <when value="url"> - <param type="text" area="True" name="user_url" value="http://" label="URLs" optional="False" /> + <param name="user_url" type="text" area="True" value="http://" optional="false" label="URLs" /> </when> <when value="history"> - <param name="input_fasta" type="data" format="fasta" label="FASTA File" multiple="False" optional="False" /> + <param name="input_fasta" type="data" format="fasta" label="FASTA file" /> </when> <when value="directory"> - <param type="text" name="fasta_filename" value="" label="Full path to FASTA File on disk" optional="False" /> - <param type="boolean" name="create_symlink" truevalue="create_symlink" falsevalue="copy_file" label="Create symlink to original data instead of copying" checked="False" /> + <param name="fasta_filename" type="text" value="" optional="false" label="Full path to FASTA file on disk" /> + <param name="create_symlink" type="boolean" truevalue="create_symlink" falsevalue="copy_file" label="Create symlink to original data instead of copying" /> </when> </conditional> <conditional name="sorting"> @@ -56,15 +57,12 @@ <option value="gatk">GATK</option> <option value="custom">Custom</option> </param> - <when value="as_is"> - </when> - <when value="lexicographical"> - </when> - <when value="gatk"> - </when> + <when value="as_is" /> + <when value="lexicographical" /> + <when value="gatk" /> <when value="custom"> <repeat name="sequence_identifiers" title="Sequence Identifiers" min="1" default="1"> - <param type="text" name="identifier" value="" label="Sequence Identifier" optional="False" /> + <param name="identifier" type="text" value="" optional="false" label="Sequence Identifier" /> </repeat> <param name="handle_not_listed_selector" type="select" label="How to handle non-specified Identifiers"> <option value="discard" selected="True">Discard</option> @@ -82,7 +80,6 @@ <test> <param name="dbkey" value="anoGam1"/> <param name="sequence_name" value=""/> - <param name="sequence_desc" value=""/> <param name="sequence_id" value=""/> <param name="reference_source_selector" value="history"/> <param name="input_fasta" value="phiX174.fasta"/> @@ -97,11 +94,8 @@ ------ - - .. class:: infomark **Notice:** If you leave name, description, or id blank, it will be generated automatically. - </help> </tool> |
b |
diff -r 86fa71e9b427 -r 60994ca04177 test-data/test.tar |
b |
Binary file test-data/test.tar has changed |
b |
diff -r 86fa71e9b427 -r 60994ca04177 test-data/test.tar.gz |
b |
Binary file test-data/test.tar.gz has changed |
b |
diff -r 86fa71e9b427 -r 60994ca04177 test-data/test.zip |
b |
Binary file test-data/test.zip has changed |