comparison predict.py @ 3:302332b914ef draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/VirHunter commit 58587e05f604590c70550e13fc51b7425e916ed4
author iuc
date Sat, 14 Jan 2023 21:08:33 +0000
parents ea2cccb9f73e
children
comparison
equal deleted inserted replaced
2:ea2cccb9f73e 3:302332b914ef
151 ds_path=ts, 151 ds_path=ts,
152 nn_weights_path=weights, 152 nn_weights_path=weights,
153 length=l_, 153 length=l_,
154 use_10=use_10 154 use_10=use_10
155 ) 155 )
156 print(df)
157 df = predict_rf( 156 df = predict_rf(
158 df=df, 157 df=df,
159 rf_weights_path=weights, 158 rf_weights_path=weights,
160 length=l_, 159 length=l_,
161 use_10=use_10 160 use_10=use_10
166 dfs_cont.append(df) 165 dfs_cont.append(df)
167 # print('prediction finished') 166 # print('prediction finished')
168 df_500 = dfs_fr[0][(dfs_fr[0]['length'] >= limit) & (dfs_fr[0]['length'] < 1500)] 167 df_500 = dfs_fr[0][(dfs_fr[0]['length'] >= limit) & (dfs_fr[0]['length'] < 1500)]
169 df_1000 = dfs_fr[1][(dfs_fr[1]['length'] >= 1500)] 168 df_1000 = dfs_fr[1][(dfs_fr[1]['length'] >= 1500)]
170 df = pd.concat([df_1000, df_500], ignore_index=True) 169 df = pd.concat([df_1000, df_500], ignore_index=True)
171 pred_fr = Path(out_path, 'predicted_fragments.csv') 170 pred_fr = Path(out_path, 'predicted_fragments.tsv')
172 df.to_csv(pred_fr) 171 df.to_csv(pred_fr, sep='\t')
173 172
174 df_500 = dfs_cont[0][(dfs_cont[0]['length'] >= limit) & (dfs_cont[0]['length'] < 1500)] 173 df_500 = dfs_cont[0][(dfs_cont[0]['length'] >= limit) & (dfs_cont[0]['length'] < 1500)]
175 df_1000 = dfs_cont[1][(dfs_cont[1]['length'] >= 1500)] 174 df_1000 = dfs_cont[1][(dfs_cont[1]['length'] >= 1500)]
176 df = pd.concat([df_1000, df_500], ignore_index=True) 175 df = pd.concat([df_1000, df_500], ignore_index=True)
177 pred_contigs = Path(out_path, 'predicted.csv') 176 pred_contigs = Path(out_path, 'predicted.tsv')
178 df.to_csv(pred_contigs) 177 df.to_csv(pred_contigs, sep='\t')
179 178
180 if return_viral: 179 if return_viral:
181 viral_ids = list(df[df["decision"] == "virus"]["id"]) 180 viral_ids = list(df[df["decision"] == "virus"]["id"])
182 seqs_ = list(SeqIO.parse(ts, "fasta")) 181 seqs_ = list(SeqIO.parse(ts, "fasta"))
183 viral_seqs = [s_ for s_ in seqs_ if s_.id in viral_ids] 182 viral_seqs = [s_ for s_ in seqs_ if s_.id in viral_ids]