Previous changeset 2:c57bd7f3fb46 (2018-07-10) Next changeset 4:4b10131cb66d (2018-10-09) |
Commit message:
Uploaded |
modified:
data_manager/data_manager_fetch_gff.py data_manager/data_manager_fetch_gff.xml data_manager_conf.xml |
b |
diff -r c57bd7f3fb46 -r cb0fa3584aeb data_manager/data_manager_fetch_gff.py --- a/data_manager/data_manager_fetch_gff.py Tue Jul 10 10:55:47 2018 -0400 +++ b/data_manager/data_manager_fetch_gff.py Tue Oct 09 14:32:48 2018 -0400 |
[ |
b'@@ -93,116 +93,6 @@\n return [ bz2.BZ2File( fh.name, \'rb\') ]\n \n \n-def sort_fasta( fasta_filename, sort_method, params ):\n- if sort_method is None:\n- return\n- assert sort_method in SORTING_METHODS, ValueError( "%s is not a valid sorting option." % sort_method )\n- return SORTING_METHODS[ sort_method ]( fasta_filename, params )\n-\n-\n-def _move_and_index_fasta_for_sorting( fasta_filename ):\n- unsorted_filename = tempfile.NamedTemporaryFile().name\n- shutil.move( fasta_filename, unsorted_filename )\n- fasta_offsets = {}\n- unsorted_fh = open( unsorted_filename )\n- while True:\n- offset = unsorted_fh.tell()\n- line = unsorted_fh.readline()\n- if not line:\n- break\n- if line.startswith( ">" ):\n- line = line.split( None, 1 )[0][1:]\n- fasta_offsets[ line ] = offset\n- unsorted_fh.close()\n- current_order = map( lambda x: x[1], sorted( map( lambda x: ( x[1], x[0] ), fasta_offsets.items() ) ) )\n- return ( unsorted_filename, fasta_offsets, current_order )\n-\n-\n-def _write_sorted_fasta( sorted_names, fasta_offsets, sorted_fasta_filename, unsorted_fasta_filename ):\n- unsorted_fh = open( unsorted_fasta_filename )\n- sorted_fh = open( sorted_fasta_filename, \'wb+\' )\n- \n- for name in sorted_names:\n- offset = fasta_offsets[ name ]\n- unsorted_fh.seek( offset )\n- sorted_fh.write( unsorted_fh.readline() )\n- while True:\n- line = unsorted_fh.readline()\n- if not line or line.startswith( ">" ):\n- break\n- sorted_fh.write( line )\n- unsorted_fh.close()\n- sorted_fh.close()\n-\n-\n-def _sort_fasta_as_is( fasta_filename, params ):\n- return\n-\n-def _sort_fasta_lexicographical( fasta_filename, params ):\n- ( unsorted_filename, fasta_offsets, current_order ) = _move_and_index_fasta_for_sorting( fasta_filename )\n- sorted_names = sorted( fasta_offsets.keys() )\n- if sorted_names == current_order:\n- shutil.move( unsorted_filename, fasta_filename )\n- else:\n- _write_sorted_fasta( sorted_names, fasta_offsets, fasta_filename, unsorted_filename ) \n-\n-\n-def _sort_fasta_gatk( fasta_filename, params ):\n- #This method was added by reviewer request.\n- ( unsorted_filename, fasta_offsets, current_order ) = _move_and_index_fasta_for_sorting( fasta_filename )\n- sorted_names = map( str, range( 1, 23 ) ) + [ \'X\', \'Y\' ]\n- #detect if we have chrN, or just N\n- has_chr = False\n- for chrom in sorted_names:\n- if "chr%s" % chrom in current_order:\n- has_chr = True\n- break\n- \n- if has_chr:\n- sorted_names = map( lambda x: "chr%s" % x, sorted_names)\n- sorted_names.insert( 0, "chrM" )\n- else:\n- sorted_names.insert( 0, "MT" )\n- sorted_names.extend( map( lambda x: "%s_random" % x, sorted_names ) )\n- \n- existing_sorted_names = []\n- for name in sorted_names:\n- if name in current_order:\n- existing_sorted_names.append( name )\n- for name in current_order:\n- #TODO: confirm that non-canonical names do not need to be sorted specially\n- if name not in existing_sorted_names:\n- existing_sorted_names.append( name )\n- \n- if existing_sorted_names == current_order:\n- shutil.move( unsorted_filename, fasta_filename )\n- else:\n- _write_sorted_fasta( existing_sorted_names, fasta_offsets, fasta_filename, unsorted_filename )\n-\n-\n-def _sort_fasta_custom( fasta_filename, params ):\n- ( unsorted_filename, fasta_offsets, current_order ) = _move_and_index_fasta_for_sorting( fasta_filename )\n- sorted_names = []\n- for id_repeat in params[\'param_dict\'][\'sorting\'][\'sequence_identifiers\']:\n- sorted_names.append( id_repeat[ \'identifier\' ] )\n- handle_not_listed = params[\'param_dict\'][\'sorting\'][\'handle_not_listed_selector\']\n- if handle_not_listed.startswith( \'keep\' ):\n- add_list = []\n- for name in current_order:\n- if '..b't)\n- return "ftp://%s%s" % (UCSC_FTP_SERVER, ucsc_file_name)\n-\n- raise Exception(\'Unable to determine filename for UCSC Genome for %s: %s\' % (ucsc_dbkey, path_contents))\n \n def add_fasta_to_table(data_manager_dict, fasta_readers, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params):\n for data_table_name, data_table_entry in _stream_fasta_to_file( fasta_readers, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params ):\n@@ -274,20 +135,6 @@\n _add_data_table_entry( data_manager_dict, data_table_entry, data_table_name )\n \n \n-def download_from_ucsc( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir ):\n- url = _get_ucsc_download_address(params, dbkey)\n- fasta_readers = get_stream_reader(urlopen(url), tmp_dir)\n- add_fasta_to_table(data_manager_dict, fasta_readers, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params)\n-\n-\n-def download_from_ncbi( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir ):\n- NCBI_DOWNLOAD_URL = \'http://togows.dbcls.jp/entry/ncbi-nucleotide/%s.fasta\' #FIXME: taken from dave\'s genome manager...why some japan site?\n- requested_identifier = params[\'param_dict\'][\'reference_source\'][\'requested_identifier\']\n- url = NCBI_DOWNLOAD_URL % requested_identifier\n- fasta_readers = get_stream_reader(urlopen(url), tmp_dir)\n- add_fasta_to_table(data_manager_dict, fasta_readers, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, params)\n-\n-\n def download_from_url( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir ):\n urls = filter( bool, map( lambda x: x.strip(), params[\'param_dict\'][\'reference_source\'][\'user_url\'].split( \'\\n\' ) ) )\n fasta_readers = [ get_stream_reader(urlopen( url ), tmp_dir) for url in urls ]\n@@ -295,7 +142,6 @@\n \n \n def download_from_history( data_manager_dict, params, target_directory, dbkey, dbkey_name, sequence_id, sequence_name, tmp_dir ):\n- #TODO: allow multiple FASTA input files\n input_filename = params[\'param_dict\'][\'reference_source\'][\'input_fasta\']\n if isinstance( input_filename, list ):\n fasta_readers = [ get_stream_reader(open(filename, \'rb\'), tmp_dir) for filename in input_filename ]\n@@ -405,22 +251,19 @@\n return [ ( DATA_TABLE_NAME, dict( value=sequence_id, dbkey=dbkey, name=sequence_name, path=fasta_base_filename ) ) ]\n \n \n-REFERENCE_SOURCE_TO_DOWNLOAD = dict( ucsc=download_from_ucsc, ncbi=download_from_ncbi, url=download_from_url, history=download_from_history, directory=copy_from_directory )\n-\n-SORTING_METHODS = dict( as_is=_sort_fasta_as_is, lexicographical=_sort_fasta_lexicographical, gatk=_sort_fasta_gatk, custom=_sort_fasta_custom )\n+#REFERENCE_SOURCE_TO_DOWNLOAD = dict( ucsc=download_from_ucsc, ncbi=download_from_ncbi, url=download_from_url, history=download_from_history, directory=copy_from_directory )\n+REFERENCE_SOURCE_TO_DOWNLOAD = dict( url=download_from_url, history=download_from_history, directory=copy_from_directory )\n+#SORTING_METHODS = dict( as_is=_sort_fasta_as_is, lexicographical=_sort_fasta_lexicographical, gatk=_sort_fasta_gatk, custom=_sort_fasta_custom )\n \n \n def main():\n #Parse Command Line\n parser = optparse.OptionParser()\n parser.add_option( \'-d\', \'--dbkey_description\', dest=\'dbkey_description\', action=\'store\', type="string", default=None, help=\'dbkey_description\' )\n- parser.add_option( \'-t\', \'--type\', dest=\'file_type\', action=\'store\', type=\'string\', default=None, help=\'file_type\')\n (options, args) = parser.parse_args()\n \n filename = args[0]\n- global DATA_TABLE_NAME\n- if options.file_type == \'representative\':\n- DATA_TABLE_NAME= \'representative_gff\'\n+ #global DATA_TABLE_NAME\n params = loads( open( filename ).read() )\n target_directory = params[ \'output_data\' ][0][\'extra_files_path\']\n os.mkdir( target_directory )\n' |
b |
diff -r c57bd7f3fb46 -r cb0fa3584aeb data_manager/data_manager_fetch_gff.xml --- a/data_manager/data_manager_fetch_gff.xml Tue Jul 10 10:55:47 2018 -0400 +++ b/data_manager/data_manager_fetch_gff.xml Tue Oct 09 14:32:48 2018 -0400 |
[ |
@@ -2,16 +2,10 @@ <description>fetching</description> <command><![CDATA[ python "$__tool_directory__"/data_manager_fetch_gff.py "${out_file}" - --type $file_type --dbkey_description ${ dbkey.get_display_text() } ]]></command> <inputs> - <param name="file_type" type="select" label="GFF file with only one representative transcript per gene (for htseq-count use) or full features file"> - <option value="representative">Representative GFF</option> - <option value="full">GFF with complete features</option> - </param> - <param name="dbkey" type="genomebuild" label="DBKEY to assign to data" /> <param type="text" name="sequence_name" value="" label="Name of sequence" /> <param type="text" name="sequence_id" value="" label="ID for sequence" /> |
b |
diff -r c57bd7f3fb46 -r cb0fa3584aeb data_manager_conf.xml --- a/data_manager_conf.xml Tue Jul 10 10:55:47 2018 -0400 +++ b/data_manager_conf.xml Tue Oct 09 14:32:48 2018 -0400 |
b |
@@ -16,20 +16,5 @@ </column> </output> </data_table> - <data_table name="representative_gff"> - <output> - <column name="value" /> - <column name="dbkey" /> - <column name="name" /> - <column name="path" output_ref="out_file"> - <move type="file"> - <source>${path}</source> - <target base="${GALAXY_DATA_MANAGER_DATA_PATH}">${dbkey}/representative_gff/${path}</target> - </move> - <value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/${dbkey}/representative_gff/${path}</value_translation> - <value_translation type="function">abspath</value_translation> - </column> - </output> - </data_table> </data_manager> </data_managers> |