Repository 'data_manager_fetch_genome_dbkeys_all_fasta'
hg clone https://toolshed.g2.bx.psu.edu/repos/devteam/data_manager_fetch_genome_dbkeys_all_fasta

Changeset 4:60994ca04177 (2017-04-04)
Previous changeset 3:86fa71e9b427 (2016-08-26) Next changeset 5:a246b4f11133 (2017-04-05)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_fetch_genome_dbkeys_all_fasta commit 8652f36a3a3838dca989426961561e81432acf4f
modified:
data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py
data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml
removed:
test-data/test.tar
test-data/test.tar.gz
test-data/test.zip
b
diff -r 86fa71e9b427 -r 60994ca04177 data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py
--- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Fri Aug 26 12:46:47 2016 -0400
+++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Tue Apr 04 17:13:31 2017 -0400
[
b'@@ -1,35 +1,36 @@\n #!/usr/bin/env python\n-#Dan Blankenberg\n+# Dan Blankenberg\n \n-import sys\n+import bz2\n+import gzip\n+import optparse\n import os\n-import tempfile\n import shutil\n-import optparse\n-from ftplib import FTP\n+import sys\n import tarfile\n+import tempfile\n import zipfile\n-import gzip\n-import bz2\n+from ftplib import FTP\n+from json import dumps, loads\n+\n try:\n     # For Python 3.0 and later\n-    from urllib.request import urlopen\n     from io import BytesIO as StringIO\n     from io import UnsupportedOperation\n+    from urllib.request import urlopen\n except ImportError:\n-    # Fall back to Python 2\'s urllib2\n+    # Fall back to Python 2 imports\n+    from StringIO import StringIO\n     from urllib2 import urlopen\n-    from StringIO import StringIO\n+\n     UnsupportedOperation = AttributeError\n-from json import loads, dumps\n+\n+CHUNK_SIZE = 2 ** 20  # 1mb\n \n \n-CHUNK_SIZE = 2**20  # 1mb\n-\n-\n-def cleanup_before_exit( tmp_dir ):\n-    if tmp_dir and os.path.exists( tmp_dir ):\n-        shutil.rmtree( tmp_dir )\n+def cleanup_before_exit(tmp_dir):\n+    if tmp_dir and os.path.exists(tmp_dir):\n+        shutil.rmtree(tmp_dir)\n \n \n def stop_err(msg):\n@@ -37,20 +38,20 @@\n     sys.exit(1)\n \n \n-def get_dbkey_dbname_id_name( params, dbkey_description=None ):\n+def get_dbkey_dbname_id_name(params, dbkey_description=None):\n     dbkey = params[\'param_dict\'][\'dbkey_source\'][\'dbkey\']\n-    #TODO: ensure sequence_id is unique and does not already appear in location file\n+    # TODO: ensure sequence_id is unique and does not already appear in location file\n     sequence_id = params[\'param_dict\'][\'sequence_id\']\n     if not sequence_id:\n-        sequence_id = dbkey #uuid.uuid4() generate and use an uuid instead?\n-    \n+        sequence_id = dbkey  # uuid.uuid4() generate and use an uuid instead?\n+\n     if params[\'param_dict\'][\'dbkey_source\'][\'dbkey_source_selector\'] == \'new\':\n         dbkey_name = params[\'param_dict\'][\'dbkey_source\'][\'dbkey_name\']\n         if not dbkey_name:\n             dbkey_name = dbkey\n     else:\n         dbkey_name = None\n-    \n+\n     sequence_name = params[\'param_dict\'][\'sequence_name\']\n     if not sequence_name:\n         sequence_name = dbkey_description\n@@ -59,146 +60,146 @@\n     return dbkey, dbkey_name, sequence_id, sequence_name\n \n \n-def _get_files_in_ftp_path( ftp, path ):\n+def _get_files_in_ftp_path(ftp, path):\n     path_contents = []\n-    ftp.retrlines( \'MLSD %s\' % ( path ), path_contents.append )\n-    return [ line.split( \';\' )[ -1 ].lstrip() for line in path_contents ]\n+    ftp.retrlines(\'MLSD %s\' % (path), path_contents.append)\n+    return [line.split(\';\')[-1].lstrip() for line in path_contents]\n \n \n-def _get_stream_readers_for_tar( fh, tmp_dir ):\n-    fasta_tar = tarfile.open( fileobj=fh, mode=\'r:*\' )\n+def _get_stream_readers_for_tar(fh, tmp_dir):\n+    fasta_tar = tarfile.open(fileobj=fh, mode=\'r:*\')\n     return [x for x in [fasta_tar.extractfile(member) for member in fasta_tar.getmembers()] if x]\n \n \n-def _get_stream_readers_for_zip( fh, tmp_dir ):\n+def _get_stream_readers_for_zip(fh, tmp_dir):\n     """\n     Unpacks all archived files in a zip file.\n     Individual files will be concatenated (in _stream_fasta_to_file)\n     """\n-    fasta_zip = zipfile.ZipFile( fh, \'r\' )\n+    fasta_zip = zipfile.ZipFile(fh, \'r\')\n     rval = []\n     for member in fasta_zip.namelist():\n-        fasta_zip.extract( member, tmp_dir )\n-        rval.append( open( os.path.join( tmp_dir, member ), \'rb\' ) )\n+        fasta_zip.extract(member, tmp_dir)\n+        rval.append(open(os.path.join(tmp_dir, member), \'rb\'))\n     return rval\n \n \n-def _get_stream_readers_for_gzip( fh, tmp_dir ):\n-    return [ gzip.GzipFile( fileobj=fh, mode=\'rb\') ]\n+def _get_stream_readers_for_gzip(fh, tmp_dir):\n+    return [gzip.GzipFile(fileobj=fh, mode=\'rb\')]\n \n \n-def _get_stream_readers_for_bz2( fh, tmp_dir ):\n-    return [ bz2.BZ2File( fh.name, \'rb\') ]\n+def _get_stream_readers_for_bz2(fh, tmp_dir):\n+    return [bz2.BZ2File(fh.name, \'rb\')]\n \n \n-def sort_fasta( fasta_file'..b'd_from_ucsc, ncbi=download_from_ncbi, url=download_from_url, history=download_from_history, directory=copy_from_directory )\n-\n-SORTING_METHODS = dict( as_is=_sort_fasta_as_is, lexicographical=_sort_fasta_lexicographical, gatk=_sort_fasta_gatk, custom=_sort_fasta_custom )\n+REFERENCE_SOURCE_TO_DOWNLOAD = dict(ucsc=download_from_ucsc, ncbi=download_from_ncbi, url=download_from_url, history=download_from_history,\n+                                    directory=copy_from_directory)\n+SORTING_METHODS = dict(as_is=_sort_fasta_as_is, lexicographical=_sort_fasta_lexicographical, gatk=_sort_fasta_gatk, custom=_sort_fasta_custom)\n \n \n def main():\n-    #Parse Command Line\n     parser = optparse.OptionParser()\n-    parser.add_option( \'-d\', \'--dbkey_description\', dest=\'dbkey_description\', action=\'store\', type="string", default=None, help=\'dbkey_description\' )\n+    parser.add_option(\'-d\', \'--dbkey_description\', dest=\'dbkey_description\', action=\'store\', type="string", default=None, help=\'dbkey_description\')\n     (options, args) = parser.parse_args()\n-    \n+\n     filename = args[0]\n-    \n-    params = loads( open( filename ).read() )\n-    target_directory = params[ \'output_data\' ][0][\'extra_files_path\']\n-    os.mkdir( target_directory )\n+\n+    params = loads(open(filename).read())\n+    target_directory = params[\'output_data\'][0][\'extra_files_path\']\n+    os.mkdir(target_directory)\n     data_manager_dict = {}\n-    \n-    dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name( params, dbkey_description=options.dbkey_description ) \n-    \n-    if dbkey in [ None, \'\', \'?\' ]:\n-        raise Exception( \'"%s" is not a valid dbkey. You must specify a valid dbkey.\' % ( dbkey ) )\n+\n+    dbkey, dbkey_name, sequence_id, sequence_name = get_dbkey_dbname_id_name(params, dbkey_description=options.dbkey_description)\n+\n+    if dbkey in [None, \'\', \'?\']:\n+        raise Exception(\'"%s" is not a valid dbkey. You must specify a valid dbkey.\' % (dbkey))\n \n     # Create a tmp_dir, in case a zip file needs to be uncompressed\n     tmp_dir = tempfile.mkdtemp()\n-    #Fetch the FASTA\n+    # Fetch the FASTA\n     try:\n-        REFERENCE_SOURCE_TO_DOWNLOAD[ params[\'param_dict\'][\'reference_source\'][\'reference_source_selector\'] ]( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir )\n+        reference_source = params[\'param_dict\'][\'reference_source\'][\'reference_source_selector\']\n+        fasta_readers = REFERENCE_SOURCE_TO_DOWNLOAD[reference_source](data_manager_dict=data_manager_dict,\n+                                                                       params=params,\n+                                                                       target_directory=target_directory,\n+                                                                       dbkey=dbkey,\n+                                                                       dbkey_name=dbkey_name,\n+                                                                       sequence_id=sequence_id,\n+                                                                       sequence_name=sequence_name,\n+                                                                       tmp_dir=tmp_dir)\n+        if fasta_readers:\n+            add_fasta_to_table(data_manager_dict=data_manager_dict,\n+                               fasta_readers=fasta_readers,\n+                               target_directory=target_directory,\n+                               dbkey=dbkey,\n+                               dbkey_name=dbkey_name,\n+                               sequence_id=sequence_id,\n+                               sequence_name=sequence_name,\n+                               params=params)\n+\n     finally:\n         cleanup_before_exit(tmp_dir)\n-    #save info to json file\n-    open( filename, \'wb\' ).write( dumps( data_manager_dict ).encode() )\n-        \n+    # save info to json file\n+    open(filename, \'wb\').write(dumps(data_manager_dict).encode())\n+\n+\n if __name__ == "__main__":\n     main()\n'
b
diff -r 86fa71e9b427 -r 60994ca04177 data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml
--- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Fri Aug 26 12:46:47 2016 -0400
+++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Tue Apr 04 17:13:31 2017 -0400
[
@@ -1,11 +1,12 @@
 <tool id="data_manager_fetch_genome_all_fasta_dbkey" name="Create DBKey and Reference Genome" version="0.0.2" tool_type="manage_data">
     <description>fetching</description>
-    <command><![CDATA[
-       python "$__tool_directory__"/data_manager_fetch_genome_all_fasta_dbkeys.py "${out_file}"
+    <command detect_errors="exit_code"><![CDATA[
+       python '$__tool_directory__/data_manager_fetch_genome_all_fasta_dbkeys.py'
+       '${out_file}'
        #if str( $dbkey_source.dbkey_source_selector ) == 'existing':
-       --dbkey_description ${ dbkey_source.dbkey.get_display_text() }
+           --dbkey_description '${ dbkey_source.dbkey.get_display_text() }'
        #else
-       --dbkey_description "${ dbkey_source.dbkey_name or $dbkey_source.dbkey }"
+           --dbkey_description '${ dbkey_source.dbkey_name or $dbkey_source.dbkey }'
        #end if
     ]]></command>
     <inputs>
@@ -18,12 +19,12 @@
                 <param name="dbkey" type="genomebuild" label="DBKEY to assign to data" />
             </when>
             <when value="new">
-                <param type="text" name="dbkey" value="" label="dbkey" optional="False" />
-                <param type="text" name="dbkey_name" value="" label="Display name for dbkey" />
+                <param name="dbkey" type="text" value="" optional="false" label="dbkey" />
+                <param name="dbkey_name" type="text" value="" label="Display name for dbkey" />
             </when>
         </conditional>
-        <param type="text" name="sequence_name" value="" label="Name of sequence" />
-        <param type="text" name="sequence_id" value="" label="ID for sequence" />
+        <param name="sequence_name" type="text" value="" label="Name of sequence" />
+        <param name="sequence_id" type="text" value="" label="ID for sequence" />
         <conditional name="reference_source">
             <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
                 <option value="ucsc" selected="True">UCSC</option>
@@ -33,20 +34,20 @@
                 <option value="directory">Directory on Server</option>
             </param>
             <when value="ucsc">
-                <param type="text" name="requested_dbkey" value="" label="UCSC's DBKEY for source FASTA" optional="False" />
+                <param name="requested_dbkey" type="text" value="" optional="false" label="UCSC's DBKEY for source FASTA" />
             </when>
             <when value="ncbi">
-                <param type="text" name="requested_identifier" value="" label="NCBI identifier/accession" help="Identifiers (e.g 667699573) or accessions (e.g AC020606.7) may be used" optional="False" />
+                <param name="requested_identifier" type="text" value="" optional="false" label="NCBI identifier/accession" help="Identifiers (e.g 667699573) or accessions (e.g AC020606.7) may be used" />
             </when>
             <when value="url">
-                <param type="text" area="True" name="user_url" value="http://" label="URLs" optional="False" />
+                <param name="user_url" type="text" area="True" value="http://" optional="false" label="URLs" />
             </when>
             <when value="history">
-                <param name="input_fasta" type="data" format="fasta" label="FASTA File" multiple="False" optional="False" />
+                <param name="input_fasta" type="data" format="fasta" label="FASTA file" />
             </when>
             <when value="directory">
-                <param type="text" name="fasta_filename" value="" label="Full path to FASTA File on disk" optional="False" />
-                <param type="boolean" name="create_symlink" truevalue="create_symlink" falsevalue="copy_file" label="Create symlink to original data instead of copying" checked="False" />
+                <param name="fasta_filename" type="text" value="" optional="false" label="Full path to FASTA file on disk" />
+                <param name="create_symlink" type="boolean" truevalue="create_symlink" falsevalue="copy_file" label="Create symlink to original data instead of copying" />
             </when>
         </conditional>
         <conditional name="sorting">
@@ -56,15 +57,12 @@
                 <option value="gatk">GATK</option>
                 <option value="custom">Custom</option>
             </param>
-            <when value="as_is">
-            </when>
-            <when value="lexicographical">
-            </when>
-            <when value="gatk">
-            </when>
+            <when value="as_is" />
+            <when value="lexicographical" />
+            <when value="gatk" />
             <when value="custom">
                 <repeat name="sequence_identifiers" title="Sequence Identifiers" min="1" default="1">
-                    <param type="text" name="identifier" value="" label="Sequence Identifier" optional="False" />
+                    <param name="identifier" type="text" value="" optional="false" label="Sequence Identifier" />
                 </repeat>
                 <param name="handle_not_listed_selector" type="select" label="How to handle non-specified Identifiers">
                     <option value="discard" selected="True">Discard</option>
@@ -82,7 +80,6 @@
         <test>
             <param name="dbkey" value="anoGam1"/>
             <param name="sequence_name" value=""/>
-            <param name="sequence_desc" value=""/>
             <param name="sequence_id" value=""/>
             <param name="reference_source_selector" value="history"/>
             <param name="input_fasta" value="phiX174.fasta"/>
@@ -97,11 +94,8 @@
 
 ------
 
-
-
 .. class:: infomark
 
 **Notice:** If you leave name, description, or id blank, it will be generated automatically.
-
     </help>
 </tool>
b
diff -r 86fa71e9b427 -r 60994ca04177 test-data/test.tar
b
Binary file test-data/test.tar has changed
b
diff -r 86fa71e9b427 -r 60994ca04177 test-data/test.tar.gz
b
Binary file test-data/test.tar.gz has changed
b
diff -r 86fa71e9b427 -r 60994ca04177 test-data/test.zip
b
Binary file test-data/test.zip has changed