Mercurial > repos > devteam > data_manager_fetch_genome_all_fasta
changeset 1:ac850912d386 draft
Uploaded
author | devteam |
---|---|
date | Tue, 03 Feb 2015 10:01:30 -0500 |
parents | 2ebc856bce29 |
children | cca219f2b212 |
files | .shed.yml data_manager/data_manager_fetch_genome_all_fasta.py |
diffstat | 2 files changed, 18 insertions(+), 9 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/.shed.yml Tue Feb 03 10:01:30 2015 -0500 @@ -0,0 +1,4 @@ +# repository published to https://toolshed.g2.bx.psu.edu/repos/devteam/data_manager_fetch_genome_all_fasta +owner: devteam +name: data_manager_fetch_genome_all_fasta + \ No newline at end of file
--- a/data_manager/data_manager_fetch_genome_all_fasta.py Fri Mar 28 14:14:24 2014 -0400 +++ b/data_manager/data_manager_fetch_genome_all_fasta.py Tue Feb 03 10:01:30 2015 -0500 @@ -14,7 +14,7 @@ import gzip import bz2 -from galaxy.util.json import from_json_string, to_json_string +from json import loads, dumps CHUNK_SIZE = 2**20 #1mb @@ -48,7 +48,7 @@ def _get_stream_readers_for_tar( file_obj, tmp_dir ): fasta_tar = tarfile.open( fileobj=file_obj, mode='r:*' ) - return [ fasta_tar.extractfile( member ) for member in fasta_tar.getmembers() ] + return filter( lambda x: x is not None, [ fasta_tar.extractfile( member ) for member in fasta_tar.getmembers() ] ) def _get_stream_readers_for_zip( file_obj, tmp_dir ): fasta_zip = zipfile.ZipFile( file_obj, 'r' ) @@ -170,7 +170,6 @@ def download_from_ucsc( data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name ): UCSC_FTP_SERVER = 'hgdownload.cse.ucsc.edu' - UCSC_CHROM_FA_FILENAME = 'chromFa' UCSC_DOWNLOAD_PATH = '/goldenPath/%s/bigZips/' COMPRESSED_EXTENSIONS = [ ( '.tar.gz', _get_stream_readers_for_tar ), ( '.tar.bz2', _get_stream_readers_for_tar ), ( '.zip', _get_stream_readers_for_zip ), ( '.fa.gz', _get_stream_readers_for_gzip ), ( '.fa.bz2', _get_stream_readers_for_bz2 ) ] @@ -179,6 +178,8 @@ email = 'anonymous@example.com' ucsc_dbkey = params['param_dict']['reference_source']['requested_dbkey'] or dbkey + UCSC_CHROM_FA_FILENAMES = [ '%s.chromFa' % ucsc_dbkey, 'chromFa' ] + ftp = FTP( UCSC_FTP_SERVER ) ftp.login( 'anonymous', email ) @@ -188,9 +189,13 @@ ucsc_file_name = None get_stream_reader = None ext = None - for ext, get_stream_reader in COMPRESSED_EXTENSIONS: - if "%s%s" % ( UCSC_CHROM_FA_FILENAME, ext ) in path_contents: - ucsc_file_name = "%s%s%s" % ( ucsc_path, UCSC_CHROM_FA_FILENAME, ext ) + ucsc_chrom_fa_filename = None + for ucsc_chrom_fa_filename in UCSC_CHROM_FA_FILENAMES: + for ext, get_stream_reader in COMPRESSED_EXTENSIONS: + if "%s%s" % ( ucsc_chrom_fa_filename, ext ) in path_contents: + ucsc_file_name = "%s%s%s" % ( ucsc_path, ucsc_chrom_fa_filename, ext ) + break + if ucsc_file_name: break if not ucsc_file_name: @@ -198,7 +203,7 @@ tmp_dir = tempfile.mkdtemp( prefix='tmp-data-manager-ucsc-' ) - ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( UCSC_CHROM_FA_FILENAME, ext ) ) + ucsc_fasta_filename = os.path.join( tmp_dir, "%s%s" % ( ucsc_chrom_fa_filename, ext ) ) fasta_base_filename = "%s.fa" % sequence_id fasta_filename = os.path.join( target_directory, fasta_base_filename ) @@ -331,7 +336,7 @@ filename = args[0] - params = from_json_string( open( filename ).read() ) + params = loads( open( filename ).read() ) target_directory = params[ 'output_data' ][0]['extra_files_path'] os.mkdir( target_directory ) data_manager_dict = {} @@ -345,6 +350,6 @@ REFERENCE_SOURCE_TO_DOWNLOAD[ params['param_dict']['reference_source']['reference_source_selector'] ]( data_manager_dict, params, target_directory, dbkey, sequence_id, sequence_name ) #save info to json file - open( filename, 'wb' ).write( to_json_string( data_manager_dict ) ) + open( filename, 'wb' ).write( dumps( data_manager_dict ) ) if __name__ == "__main__": main()