sklearn_build_pipeline: model_prediction.py comparison

comparison model_prediction.py @ 15:3f3c6dc38f3e draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"

author	bgruening
date	Mon, 16 Dec 2019 05:39:20 -0500
parents	653be9c354ec
children	4de3d598c116

comparison

equal deleted inserted replaced

-:c33145a815ee
+:3f3c6dc38f3e
 import argparse
 import json
 import numpy as np
 import pandas as pd
-import tabix
 import warnings
 from scipy.io import mmread
 from sklearn.pipeline import Pipeline
-from galaxy_ml.externals.selene_sdk.sequences import Genome
 from galaxy_ml.utils import (load_model, read_columns,
 get_module, try_get_attr)
 N_JOBS = int(__import__('os').environ.get('GALAXY_SLOTS', 1))
 options['blacklist_regions'] = None
 pred_data_generator = klass(
 ref_genome_path=ref_seq, vcf_path=vcf_path, **options)
-pred_data_generator.fit()
+pred_data_generator.set_processing_attrs()
 variants = pred_data_generator.variants
-# TODO : remove the following block after galaxy-ml v0.7.13
-blacklist_tabix = getattr(pred_data_generator.reference_genome_,
-'_blacklist_tabix', None)
-clean_variants = []
-if blacklist_tabix:
-start_radius = pred_data_generator.start_radius_
-end_radius = pred_data_generator.end_radius_
-for chrom, pos, name, ref, alt, strand in variants:
-center = pos + len(ref) // 2
-start = center - start_radius
-end = center + end_radius
-if isinstance(pred_data_generator.reference_genome_, Genome):
-if "chr" not in chrom:
-chrom = "chr" + chrom
-if "MT" in chrom:
-chrom = chrom[:-1]
-try:
-rows = blacklist_tabix.query(chrom, start, end)
-found = 0
-for row in rows:
-found = 1
-break
-if found:
-continue
-except tabix.TabixError:
-pass
-clean_variants.append((chrom, pos, name, ref, alt, strand))
-else:
-clean_variants = variants
-setattr(pred_data_generator, 'variants', clean_variants)
-variants = np.array(clean_variants)
 # predict 1600 sample at once then write to file
 gen_flow = pred_data_generator.flow(batch_size=1600)
 file_writer = open(outfile_predict, 'w')
 header_row = '\t'.join(['chrom', 'pos', 'name', 'ref',

Mercurial > repos > bgruening > sklearn_build_pipeline

comparison model_prediction.py @ 15:3f3c6dc38f3e draft