Mercurial > repos > xuebing > sharplabtool
comparison tools/data_source/microbial_import_code.py @ 0:9071e359b9a3
Uploaded
| author | xuebing |
|---|---|
| date | Fri, 09 Mar 2012 19:37:19 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9071e359b9a3 |
|---|---|
| 1 | |
| 2 def load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ): | |
| 3 # FIXME: this function is duplicated in the DynamicOptions class. It is used here only to | |
| 4 # set data.name in exec_after_process(). | |
| 5 microbe_info= {} | |
| 6 orgs = {} | |
| 7 | |
| 8 filename = "%s/microbial_data.loc" % GALAXY_DATA_INDEX_DIR | |
| 9 for i, line in enumerate( open( filename ) ): | |
| 10 line = line.rstrip( '\r\n' ) | |
| 11 if line and not line.startswith( '#' ): | |
| 12 fields = line.split( sep ) | |
| 13 #read each line, if not enough fields, go to next line | |
| 14 try: | |
| 15 info_type = fields.pop(0) | |
| 16 if info_type.upper() == "ORG": | |
| 17 #ORG 12521 Clostridium perfringens SM101 bacteria Firmicutes CP000312,CP000313,CP000314,CP000315 http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?db=genomeprj&cmd=Retrieve&dopt=Overview&list_uids=12521 | |
| 18 org_num = fields.pop(0) | |
| 19 name = fields.pop(0) | |
| 20 kingdom = fields.pop(0) | |
| 21 group = fields.pop(0) | |
| 22 chromosomes = fields.pop(0) | |
| 23 info_url = fields.pop(0) | |
| 24 link_site = fields.pop(0) | |
| 25 if org_num not in orgs: | |
| 26 orgs[ org_num ] = {} | |
| 27 orgs[ org_num ][ 'chrs' ] = {} | |
| 28 orgs[ org_num ][ 'name' ] = name | |
| 29 orgs[ org_num ][ 'kingdom' ] = kingdom | |
| 30 orgs[ org_num ][ 'group' ] = group | |
| 31 orgs[ org_num ][ 'chromosomes' ] = chromosomes | |
| 32 orgs[ org_num ][ 'info_url' ] = info_url | |
| 33 orgs[ org_num ][ 'link_site' ] = link_site | |
| 34 elif info_type.upper() == "CHR": | |
| 35 #CHR 12521 CP000315 Clostridium perfringens phage phiSM101, complete genome 38092 110684521 CP000315.1 | |
| 36 org_num = fields.pop(0) | |
| 37 chr_acc = fields.pop(0) | |
| 38 name = fields.pop(0) | |
| 39 length = fields.pop(0) | |
| 40 gi = fields.pop(0) | |
| 41 gb = fields.pop(0) | |
| 42 info_url = fields.pop(0) | |
| 43 chr = {} | |
| 44 chr[ 'name' ] = name | |
| 45 chr[ 'length' ] = length | |
| 46 chr[ 'gi' ] = gi | |
| 47 chr[ 'gb' ] = gb | |
| 48 chr[ 'info_url' ] = info_url | |
| 49 if org_num not in orgs: | |
| 50 orgs[ org_num ] = {} | |
| 51 orgs[ org_num ][ 'chrs' ] = {} | |
| 52 orgs[ org_num ][ 'chrs' ][ chr_acc ] = chr | |
| 53 elif info_type.upper() == "DATA": | |
| 54 #DATA 12521_12521_CDS 12521 CP000315 CDS bed /home/djb396/alignments/playground/bacteria/12521/CP000315.CDS.bed | |
| 55 uid = fields.pop(0) | |
| 56 org_num = fields.pop(0) | |
| 57 chr_acc = fields.pop(0) | |
| 58 feature = fields.pop(0) | |
| 59 filetype = fields.pop(0) | |
| 60 path = fields.pop(0) | |
| 61 data = {} | |
| 62 data[ 'filetype' ] = filetype | |
| 63 data[ 'path' ] = path | |
| 64 data[ 'feature' ] = feature | |
| 65 | |
| 66 if org_num not in orgs: | |
| 67 orgs[ org_num ] = {} | |
| 68 orgs[ org_num ][ 'chrs' ] = {} | |
| 69 if 'data' not in orgs[ org_num ][ 'chrs' ][ chr_acc ]: | |
| 70 orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ] = {} | |
| 71 orgs[ org_num ][ 'chrs' ][ chr_acc ][ 'data' ][ uid ] = data | |
| 72 else: continue | |
| 73 except: continue | |
| 74 for org_num in orgs: | |
| 75 org = orgs[ org_num ] | |
| 76 if org[ 'kingdom' ] not in microbe_info: | |
| 77 microbe_info[ org[ 'kingdom' ] ] = {} | |
| 78 if org_num not in microbe_info[ org[ 'kingdom' ] ]: | |
| 79 microbe_info[ org[ 'kingdom' ] ][org_num] = org | |
| 80 return microbe_info | |
| 81 | |
| 82 #post processing, set build for data and add additional data to history | |
| 83 from galaxy import datatypes, config, jobs, tools | |
| 84 from shutil import copyfile | |
| 85 | |
| 86 def exec_after_process(app, inp_data, out_data, param_dict, tool, stdout, stderr): | |
| 87 base_dataset = out_data.items()[0][1] | |
| 88 history = base_dataset.history | |
| 89 if history == None: | |
| 90 print "unknown history!" | |
| 91 return | |
| 92 kingdom = param_dict.get( 'kingdom', None ) | |
| 93 #group = param_dict.get( 'group', None ) | |
| 94 org = param_dict.get( 'org', None ) | |
| 95 | |
| 96 #if not (kingdom or group or org): | |
| 97 if not (kingdom or org): | |
| 98 print "Parameters are not available." | |
| 99 #workflow passes galaxy.tools.parameters.basic.UnvalidatedValue instead of values | |
| 100 if isinstance( kingdom, tools.parameters.basic.UnvalidatedValue ): | |
| 101 kingdom = kingdom.value | |
| 102 if isinstance( org, tools.parameters.basic.UnvalidatedValue ): | |
| 103 org = org.value | |
| 104 | |
| 105 GALAXY_DATA_INDEX_DIR = app.config.tool_data_path | |
| 106 microbe_info = load_microbial_data( GALAXY_DATA_INDEX_DIR, sep='\t' ) | |
| 107 new_stdout = "" | |
| 108 split_stdout = stdout.split("\n") | |
| 109 basic_name = "" | |
| 110 for line in split_stdout: | |
| 111 fields = line.split("\t") | |
| 112 if fields[0] == "#File1": | |
| 113 description = fields[1] | |
| 114 chr = fields[2] | |
| 115 dbkey = fields[3] | |
| 116 file_type = fields[4] | |
| 117 name, data = out_data.items()[0] | |
| 118 data.set_size() | |
| 119 basic_name = data.name | |
| 120 data.name = data.name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for " + microbe_info[kingdom][org]['name'] + ":" + chr + ")" | |
| 121 data.dbkey = dbkey | |
| 122 data.info = data.name | |
| 123 data = app.datatypes_registry.change_datatype( data, file_type ) | |
| 124 data.init_meta() | |
| 125 data.set_peek() | |
| 126 app.model.context.add( data ) | |
| 127 app.model.context.flush() | |
| 128 elif fields[0] == "#NewFile": | |
| 129 description = fields[1] | |
| 130 chr = fields[2] | |
| 131 dbkey = fields[3] | |
| 132 filepath = fields[4] | |
| 133 file_type = fields[5] | |
| 134 newdata = app.model.HistoryDatasetAssociation( create_dataset = True, sa_session = app.model.context ) #This import should become a library | |
| 135 newdata.set_size() | |
| 136 newdata.extension = file_type | |
| 137 newdata.name = basic_name + " (" + microbe_info[kingdom][org]['chrs'][chr]['data'][description]['feature'] +" for "+microbe_info[kingdom][org]['name']+":"+chr + ")" | |
| 138 app.model.context.add( newdata ) | |
| 139 app.model.context.flush() | |
| 140 app.security_agent.copy_dataset_permissions( base_dataset.dataset, newdata.dataset ) | |
| 141 history.add_dataset( newdata ) | |
| 142 app.model.context.add( history ) | |
| 143 app.model.context.flush() | |
| 144 try: | |
| 145 copyfile(filepath,newdata.file_name) | |
| 146 newdata.info = newdata.name | |
| 147 newdata.state = jobs.JOB_OK | |
| 148 except: | |
| 149 newdata.info = "The requested file is missing from the system." | |
| 150 newdata.state = jobs.JOB_ERROR | |
| 151 newdata.dbkey = dbkey | |
| 152 newdata.init_meta() | |
| 153 newdata.set_peek() | |
| 154 app.model.context.flush() |
