Previous changeset 2:776bb1b478a0 (2015-10-14) Next changeset 4:60994ca04177 (2017-04-04) |
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_fetch_genome_dbkeys_all_fasta commit 9df6291d7d65c5274c4fd35775108eae58ea6c23 |
modified:
data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml |
added:
test-data/test.tar test-data/test.tar.bz2 test-data/test.tar.gz test-data/test.zip |
b |
diff -r 776bb1b478a0 -r 86fa71e9b427 data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py --- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Wed Oct 14 13:46:35 2015 -0400 +++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.py Fri Aug 26 12:46:47 2016 -0400 |
[ |
b'@@ -6,27 +6,37 @@\n import tempfile\n import shutil\n import optparse\n-import urllib2\n-#import uuid\n from ftplib import FTP\n import tarfile\n import zipfile\n import gzip\n import bz2\n-\n+try:\n+ # For Python 3.0 and later\n+ from urllib.request import urlopen\n+ from io import BytesIO as StringIO\n+ from io import UnsupportedOperation\n+except ImportError:\n+ # Fall back to Python 2\'s urllib2\n+ from urllib2 import urlopen\n+ from StringIO import StringIO\n+ UnsupportedOperation = AttributeError\n from json import loads, dumps\n \n \n-CHUNK_SIZE = 2**20 #1mb\n+CHUNK_SIZE = 2**20 # 1mb\n+\n \n def cleanup_before_exit( tmp_dir ):\n if tmp_dir and os.path.exists( tmp_dir ):\n shutil.rmtree( tmp_dir )\n \n+\n def stop_err(msg):\n sys.stderr.write(msg)\n sys.exit(1)\n- \n+\n+\n def get_dbkey_dbname_id_name( params, dbkey_description=None ):\n dbkey = params[\'param_dict\'][\'dbkey_source\'][\'dbkey\']\n #TODO: ensure sequence_id is unique and does not already appear in location file\n@@ -48,28 +58,38 @@\n sequence_name = dbkey\n return dbkey, dbkey_name, sequence_id, sequence_name\n \n+\n def _get_files_in_ftp_path( ftp, path ):\n path_contents = []\n ftp.retrlines( \'MLSD %s\' % ( path ), path_contents.append )\n return [ line.split( \';\' )[ -1 ].lstrip() for line in path_contents ]\n \n-def _get_stream_readers_for_tar( file_obj, tmp_dir ):\n- fasta_tar = tarfile.open( fileobj=file_obj, mode=\'r:*\' )\n- return filter( lambda x: x is not None, [ fasta_tar.extractfile( member ) for member in fasta_tar.getmembers() ] )\n+\n+def _get_stream_readers_for_tar( fh, tmp_dir ):\n+ fasta_tar = tarfile.open( fileobj=fh, mode=\'r:*\' )\n+ return [x for x in [fasta_tar.extractfile(member) for member in fasta_tar.getmembers()] if x]\n+\n \n-def _get_stream_readers_for_zip( file_obj, tmp_dir ):\n- fasta_zip = zipfile.ZipFile( file_obj, \'r\' )\n+def _get_stream_readers_for_zip( fh, tmp_dir ):\n+ """\n+ Unpacks all archived files in a zip file.\n+ Individual files will be concatenated (in _stream_fasta_to_file)\n+ """\n+ fasta_zip = zipfile.ZipFile( fh, \'r\' )\n rval = []\n for member in fasta_zip.namelist():\n fasta_zip.extract( member, tmp_dir )\n rval.append( open( os.path.join( tmp_dir, member ), \'rb\' ) )\n return rval\n \n-def _get_stream_readers_for_gzip( file_obj, tmp_dir ):\n- return [ gzip.GzipFile( fileobj=file_obj, mode=\'rb\' ) ]\n+\n+def _get_stream_readers_for_gzip( fh, tmp_dir ):\n+ return [ gzip.GzipFile( fileobj=fh, mode=\'rb\') ]\n \n-def _get_stream_readers_for_bz2( file_obj, tmp_dir ):\n- return [ bz2.BZ2File( file_obj.name, \'rb\' ) ]\n+\n+def _get_stream_readers_for_bz2( fh, tmp_dir ):\n+ return [ bz2.BZ2File( fh.name, \'rb\') ]\n+\n \n def sort_fasta( fasta_filename, sort_method, params ):\n if sort_method is None:\n@@ -77,6 +97,7 @@\n assert sort_method in SORTING_METHODS, ValueError( "%s is not a valid sorting option." % sort_method )\n return SORTING_METHODS[ sort_method ]( fasta_filename, params )\n \n+\n def _move_and_index_fasta_for_sorting( fasta_filename ):\n unsorted_filename = tempfile.NamedTemporaryFile().name\n shutil.move( fasta_filename, unsorted_filename )\n@@ -94,6 +115,7 @@\n current_order = map( lambda x: x[1], sorted( map( lambda x: ( x[1], x[0] ), fasta_offsets.items() ) ) )\n return ( unsorted_filename, fasta_offsets, current_order )\n \n+\n def _write_sorted_fasta( sorted_names, fasta_offsets, sorted_fasta_filename, unsorted_fasta_filename ):\n unsorted_fh = open( unsorted_fasta_filename )\n sorted_fh = open( sorted_fasta_filename, \'wb+\' )\n@@ -110,6 +132,7 @@\n unsorted_fh.close()\n sorted_fh.close()\n \n+\n def _sort_fasta_as_is( fasta_filename, params ):\n return\n \n@@ -121,6 +144,7 @@\n else:\n _write_sorted_fasta( sorted_names, fasta_offsets, fasta_filename, unsorted_filename ) \n \n+\n def _sort_fasta_gatk( fasta_filename, params ):\n #This method was added by reviewer request.\n ( unsorted_filename, fasta_offs'..b'am[0]\n- \n- if isinstance( fasta_stream, list ):\n- last_char = None\n- for fh in fasta_stream:\n- if last_char not in [ None, \'\\n\', \'\\r\' ]:\n- fasta_writer.write( \'\\n\' )\n+ with open( fasta_filename, \'wb+\' ) as fasta_writer:\n+\n+ if isinstance( fasta_stream, list ) and len( fasta_stream ) == 1:\n+ fasta_stream = fasta_stream[0]\n+\n+ if isinstance( fasta_stream, list ):\n+ last_char = None\n+ for fh in fasta_stream:\n+ if last_char not in [ None, \'\\n\', \'\\r\', b\'\\n\', b\'\\r\' ]:\n+ fasta_writer.write( b\'\\n\' )\n+ while True:\n+ data = fh.read( CHUNK_SIZE )\n+ if data:\n+ fasta_writer.write( data )\n+ last_char = data[-1]\n+ else:\n+ break\n+ if close_stream:\n+ fh.close()\n+ else:\n while True:\n- data = fh.read( CHUNK_SIZE )\n+ data = fasta_stream.read( CHUNK_SIZE )\n if data:\n fasta_writer.write( data )\n- last_char = data[-1]\n else:\n break\n if close_stream:\n- fh.close()\n- else:\n- while True:\n- data = fasta_stream.read( CHUNK_SIZE )\n- if data:\n- fasta_writer.write( data )\n- else:\n- break\n- if close_stream:\n- fasta_stream.close()\n- \n- fasta_writer.close()\n- \n+ fasta_stream.close()\n+\n sort_fasta( fasta_filename, params[\'param_dict\'][\'sorting\'][\'sort_selector\'], params )\n \n dbkey_dict = None\n@@ -335,6 +368,7 @@\n \n return [ ( \'__dbkeys__\', dbkey_dict ), ( \'all_fasta\', dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_filename ) ) ]\n \n+\n def compute_fasta_length( fasta_file, out_file, keep_first_word=False ):\n \n infile = fasta_file\n@@ -367,6 +401,7 @@\n out.write( "%s\\t%d\\n" % ( fasta_title[ 1: ], seq_len ) )\n out.close()\n \n+\n def _create_symlink( input_filename, target_directory, dbkey, dbkey_name, sequence_id, sequence_name ):\n fasta_base_filename = "%s.fa" % sequence_id\n fasta_filename = os.path.join( target_directory, fasta_base_filename )\n@@ -382,12 +417,11 @@\n return [ ( \'__dbkeys__\', dbkey_dict ), ( \'all_fasta\', dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_filename ) ) ]\n \n \n-\n-\n REFERENCE_SOURCE_TO_DOWNLOAD = dict( ucsc=download_from_ucsc, ncbi=download_from_ncbi, url=download_from_url, history=download_from_history, directory=copy_from_directory )\n \n SORTING_METHODS = dict( as_is=_sort_fasta_as_is, lexicographical=_sort_fasta_lexicographical, gatk=_sort_fasta_gatk, custom=_sort_fasta_custom )\n \n+\n def main():\n #Parse Command Line\n parser = optparse.OptionParser()\n@@ -405,11 +439,16 @@\n \n if dbkey in [ None, \'\', \'?\' ]:\n raise Exception( \'"%s" is not a valid dbkey. You must specify a valid dbkey.\' % ( dbkey ) )\n- \n+\n+ # Create a tmp_dir, in case a zip file needs to be uncompressed\n+ tmp_dir = tempfile.mkdtemp()\n #Fetch the FASTA\n- REFERENCE_SOURCE_TO_DOWNLOAD[ params[\'param_dict\'][\'reference_source\'][\'reference_source_selector\'] ]( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name )\n- \n+ try:\n+ REFERENCE_SOURCE_TO_DOWNLOAD[ params[\'param_dict\'][\'reference_source\'][\'reference_source_selector\'] ]( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir )\n+ finally:\n+ cleanup_before_exit(tmp_dir)\n #save info to json file\n- open( filename, \'wb\' ).write( dumps( data_manager_dict ) )\n+ open( filename, \'wb\' ).write( dumps( data_manager_dict ).encode() )\n \n-if __name__ == "__main__": main()\n+if __name__ == "__main__":\n+ main()\n' |
b |
diff -r 776bb1b478a0 -r 86fa71e9b427 data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml --- a/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Wed Oct 14 13:46:35 2015 -0400 +++ b/data_manager/data_manager_fetch_genome_all_fasta_dbkeys.xml Fri Aug 26 12:46:47 2016 -0400 |
[ |
b'@@ -1,86 +1,77 @@\n-<tool id="data_manager_fetch_genome_all_fasta_dbkey" name="Create DBKey and Reference Genome" version="0.0.1" tool_type="manage_data">\n+<tool id="data_manager_fetch_genome_all_fasta_dbkey" name="Create DBKey and Reference Genome" version="0.0.2" tool_type="manage_data">\n <description>fetching</description>\n- <command interpreter="python">data_manager_fetch_genome_all_fasta_dbkeys.py "${out_file}" \n- #if str( $dbkey_source.dbkey_source_selector ) == \'existing\':\n- --dbkey_description ${ dbkey_source.dbkey.get_display_text() }\n- #else\n- --dbkey_description "${ dbkey_source.dbkey_name or $dbkey_source.dbkey }"\n- #end if\n- \n- </command>\n+ <command><![CDATA[\n+ python "$__tool_directory__"/data_manager_fetch_genome_all_fasta_dbkeys.py "${out_file}"\n+ #if str( $dbkey_source.dbkey_source_selector ) == \'existing\':\n+ --dbkey_description ${ dbkey_source.dbkey.get_display_text() }\n+ #else\n+ --dbkey_description "${ dbkey_source.dbkey_name or $dbkey_source.dbkey }"\n+ #end if\n+ ]]></command>\n <inputs>\n <conditional name="dbkey_source">\n- <param name="dbkey_source_selector" type="select" label="Use existing dbkey or create a new one.">\n- <option value="existing" selected="True">Existing</option>\n- <option value="new">New</option>\n- </param>\n- <when value="existing">\n- <param name="dbkey" type="genomebuild" label="DBKEY to assign to data" />\n- </when>\n- <when value="new">\n- <param type="text" name="dbkey" value="" label="dbkey" optional="False" />\n- <param type="text" name="dbkey_name" value="" label="Display name for dbkey" />\n- </when>\n+ <param name="dbkey_source_selector" type="select" label="Use existing dbkey or create a new one.">\n+ <option value="existing" selected="True">Existing</option>\n+ <option value="new">New</option>\n+ </param>\n+ <when value="existing">\n+ <param name="dbkey" type="genomebuild" label="DBKEY to assign to data" />\n+ </when>\n+ <when value="new">\n+ <param type="text" name="dbkey" value="" label="dbkey" optional="False" />\n+ <param type="text" name="dbkey_name" value="" label="Display name for dbkey" />\n+ </when>\n </conditional>\n- \n <param type="text" name="sequence_name" value="" label="Name of sequence" />\n <param type="text" name="sequence_id" value="" label="ID for sequence" />\n <conditional name="reference_source">\n- <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">\n- <option value="ucsc" selected="True">UCSC</option>\n- <option value="ncbi">NCBI</option>\n- <option value="url">URL</option>\n- <option value="history">History</option>\n- <option value="directory">Directory on Server</option>\n- </param>\n- <when value="ucsc">\n- <param type="text" name="requested_dbkey" value="" label="UCSC\'s DBKEY for source FASTA" optional="False" />\n- </when>\n- <when value="ncbi">\n- <param type="text" name="requested_identifier" value="" label="NCBI identifier" optional="False" />\n- </when>\n- <when value="url">\n- <param type="text" area="True" name="user_url" value="http://" label="URLs" optional="False" />\n- </when>\n- <when value="history">\n- <param name="input_fasta" type="data" format="fasta" label="FASTA File" multiple="False" optional="False" />\n- </when>\n- <when value="directory">\n- <param type="text" name="fasta_filename" value="" label="Full path to FASTA File on disk" optional="False" />\n- <param type="boolean" name="create_symlink" truevalue="create_symlink" falseval'..b'value="url">URL</option>\n+ <option value="history">History</option>\n+ <option value="directory">Directory on Server</option>\n+ </param>\n+ <when value="ucsc">\n+ <param type="text" name="requested_dbkey" value="" label="UCSC\'s DBKEY for source FASTA" optional="False" />\n+ </when>\n+ <when value="ncbi">\n+ <param type="text" name="requested_identifier" value="" label="NCBI identifier/accession" help="Identifiers (e.g 667699573) or accessions (e.g AC020606.7) may be used" optional="False" />\n+ </when>\n+ <when value="url">\n+ <param type="text" area="True" name="user_url" value="http://" label="URLs" optional="False" />\n+ </when>\n+ <when value="history">\n+ <param name="input_fasta" type="data" format="fasta" label="FASTA File" multiple="False" optional="False" />\n+ </when>\n+ <when value="directory">\n+ <param type="text" name="fasta_filename" value="" label="Full path to FASTA File on disk" optional="False" />\n+ <param type="boolean" name="create_symlink" truevalue="create_symlink" falsevalue="copy_file" label="Create symlink to original data instead of copying" checked="False" />\n+ </when>\n </conditional>\n <conditional name="sorting">\n- <param name="sort_selector" type="select" label="Sort by chromosome name">\n- <option value="as_is" selected="True">As is</option>\n- <option value="lexicographical">Lexicographical</option>\n- <option value="gatk">GATK</option>\n- <option value="custom">Custom</option>\n- </param>\n- <when value="as_is">\n- </when>\n- <when value="lexicographical">\n- </when>\n- <when value="gatk">\n- </when>\n- <when value="custom">\n- <repeat name="sequence_identifiers" title="Sequence Identifiers" min="1" default="1">\n- <param type="text" name="identifier" value="" label="Sequence Identifier" optional="False" />\n- </repeat>\n- <conditional name="handle_not_listed">\n+ <param name="sort_selector" type="select" label="Sort by chromosome name">\n+ <option value="as_is" selected="True">As is</option>\n+ <option value="lexicographical">Lexicographical</option>\n+ <option value="gatk">GATK</option>\n+ <option value="custom">Custom</option>\n+ </param>\n+ <when value="as_is">\n+ </when>\n+ <when value="lexicographical">\n+ </when>\n+ <when value="gatk">\n+ </when>\n+ <when value="custom">\n+ <repeat name="sequence_identifiers" title="Sequence Identifiers" min="1" default="1">\n+ <param type="text" name="identifier" value="" label="Sequence Identifier" optional="False" />\n+ </repeat>\n <param name="handle_not_listed_selector" type="select" label="How to handle non-specified Identifiers">\n <option value="discard" selected="True">Discard</option>\n <option value="keep_append">Keep and Append</option>\n <option value="keep_prepend">Keep and Prepend</option>\n </param>\n- <when value="discard">\n- </when>\n- <when value="keep_append">\n- </when>\n- <when value="keep_prepend">\n- </when>\n- </conditional>\n- </when>\n+ </when>\n </conditional>\n </inputs>\n <outputs>\n@@ -110,7 +101,7 @@\n \n .. class:: infomark\n \n-**Notice:** If you leave name, description, or id blank, it will be generated automatically. \n+**Notice:** If you leave name, description, or id blank, it will be generated automatically.\n \n </help>\n </tool>\n' |
b |
diff -r 776bb1b478a0 -r 86fa71e9b427 test-data/test.tar |
b |
Binary file test-data/test.tar has changed |
b |
diff -r 776bb1b478a0 -r 86fa71e9b427 test-data/test.tar.bz2 |
b |
Binary file test-data/test.tar.bz2 has changed |
b |
diff -r 776bb1b478a0 -r 86fa71e9b427 test-data/test.tar.gz |
b |
Binary file test-data/test.tar.gz has changed |
b |
diff -r 776bb1b478a0 -r 86fa71e9b427 test-data/test.zip |
b |
Binary file test-data/test.zip has changed |