changeset 1:ac850912d386 draft

Uploaded
author devteam
date Tue, 03 Feb 2015 10:01:30 -0500
parents 2ebc856bce29
children cca219f2b212
files .shed.yml data_manager/data_manager_fetch_genome_all_fasta.py
diffstat 2 files changed, 18 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Tue Feb 03 10:01:30 2015 -0500
@@ -0,0 +1,4 @@
+# repository published to https://toolshed.g2.bx.psu.edu/repos/devteam/data_manager_fetch_genome_all_fasta
+owner: devteam
+name: data_manager_fetch_genome_all_fasta
+    
\ No newline at end of file
--- a/data_manager/data_manager_fetch_genome_all_fasta.py	Fri Mar 28 14:14:24 2014 -0400
+++ b/data_manager/data_manager_fetch_genome_all_fasta.py	Tue Feb 03 10:01:30 2015 -0500
@@ -14,7 +14,7 @@
 import gzip
 import bz2
 
-from galaxy.util.json import from_json_string, to_json_string
+from json import loads, dumps
 
 
 CHUNK_SIZE = 2**20 #1mb
@@ -48,7 +48,7 @@
 
 def _get_stream_readers_for_tar( file_obj, tmp_dir ):
     fasta_tar = tarfile.open( fileobj=file_obj, mode='r:*' )
-    return [ fasta_tar.extractfile( member ) for member in fasta_tar.getmembers() ]
+    return filter( lambda x: x is not None, [ fasta_tar.extractfile( member ) for member in fasta_tar.getmembers() ] )
 
 def _get_stream_readers_for_zip( file_obj, tmp_dir ):
     fasta_zip = zipfile.ZipFile( file_obj, 'r' )
@@ -170,7 +170,6 @@
 
 def download_from_ucsc( data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name ):
     UCSC_FTP_SERVER = 'hgdownload.cse.ucsc.edu'
-    UCSC_CHROM_FA_FILENAME = 'chromFa'
     UCSC_DOWNLOAD_PATH = '/goldenPath/%s/bigZips/'
     COMPRESSED_EXTENSIONS = [ ( '.tar.gz', _get_stream_readers_for_tar ), ( '.tar.bz2', _get_stream_readers_for_tar ), ( '.zip', _get_stream_readers_for_zip ), ( '.fa.gz', _get_stream_readers_for_gzip ), ( '.fa.bz2', _get_stream_readers_for_bz2 ) ]
     
@@ -179,6 +178,8 @@
         email = 'anonymous@example.com'
 
     ucsc_dbkey = params['param_dict']['reference_source']['requested_dbkey'] or dbkey
+    UCSC_CHROM_FA_FILENAMES = [ '%s.chromFa' % ucsc_dbkey, 'chromFa' ]
+    
     ftp = FTP( UCSC_FTP_SERVER )
     ftp.login( 'anonymous', email )
     
@@ -188,9 +189,13 @@
     ucsc_file_name = None
     get_stream_reader = None
     ext = None
-    for ext, get_stream_reader in COMPRESSED_EXTENSIONS:
-        if "%s%s" % ( UCSC_CHROM_FA_FILENAME, ext ) in path_contents:
-            ucsc_file_name = "%s%s%s" % ( ucsc_path, UCSC_CHROM_FA_FILENAME, ext )
+    ucsc_chrom_fa_filename = None
+    for ucsc_chrom_fa_filename in UCSC_CHROM_FA_FILENAMES:
+        for ext, get_stream_reader in COMPRESSED_EXTENSIONS:
+            if "%s%s" % ( ucsc_chrom_fa_filename, ext ) in path_contents:
+                ucsc_file_name = "%s%s%s" % ( ucsc_path, ucsc_chrom_fa_filename, ext )
+                break
+        if ucsc_file_name:
             break
     
     if not ucsc_file_name:
@@ -198,7 +203,7 @@
     
     
     tmp_dir = tempfile.mkdtemp( prefix='tmp-data-manager-ucsc-' )
-    ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( UCSC_CHROM_FA_FILENAME, ext ) )
+    ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( ucsc_chrom_fa_filename, ext ) )
     
     fasta_base_filename = "%s.fa" % sequence_id
     fasta_filename = os.path.join( target_directory, fasta_base_filename )
@@ -331,7 +336,7 @@
     
     filename = args[0]
     
-    params = from_json_string( open( filename ).read() )
+    params = loads( open( filename ).read() )
     target_directory = params[ 'output_data' ][0]['extra_files_path']
     os.mkdir( target_directory )
     data_manager_dict = {}
@@ -345,6 +350,6 @@
     REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name )
     
     #save info to json file
-    open( filename, 'wb' ).write( to_json_string( data_manager_dict ) )
+    open( filename, 'wb' ).write( dumps( data_manager_dict ) )
         
 if __name__ == "__main__": main()