Mercurial > repos > bgruening > sklearn_build_pipeline

diff model_prediction.py @ 15:3f3c6dc38f3e draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 5b2ac730ec6d3b762faa9034eddd19ad1b347476"
author: bgruening
date: Mon, 16 Dec 2019 05:39:20 -0500
parents: 653be9c354ec
children: 4de3d598c116
--- a/model_prediction.py	Thu Nov 07 05:42:25 2019 -0500
+++ b/model_prediction.py	Mon Dec 16 05:39:20 2019 -0500
@@ -2,13 +2,11 @@
 import json
 import numpy as np
 import pandas as pd
-import tabix
 import warnings
 
 from scipy.io import mmread
 from sklearn.pipeline import Pipeline
 
-from galaxy_ml.externals.selene_sdk.sequences import Genome
 from galaxy_ml.utils import (load_model, read_columns,
                              get_module, try_get_attr)
 
@@ -138,45 +136,10 @@
         pred_data_generator = klass(
             ref_genome_path=ref_seq, vcf_path=vcf_path, **options)
 
-        pred_data_generator.fit()
+        pred_data_generator.set_processing_attrs()
 
         variants = pred_data_generator.variants
-        # TODO : remove the following block after galaxy-ml v0.7.13
-        blacklist_tabix = getattr(pred_data_generator.reference_genome_,
-                                  '_blacklist_tabix', None)
-        clean_variants = []
-        if blacklist_tabix:
-            start_radius = pred_data_generator.start_radius_
-            end_radius = pred_data_generator.end_radius_
 
-            for chrom, pos, name, ref, alt, strand in variants:
-                center = pos + len(ref) // 2
-                start = center - start_radius
-                end = center + end_radius
-
-                if isinstance(pred_data_generator.reference_genome_, Genome):
-                    if "chr" not in chrom:
-                        chrom = "chr" + chrom
-                    if "MT" in chrom:
-                        chrom = chrom[:-1]
-                try:
-                    rows = blacklist_tabix.query(chrom, start, end)
-                    found = 0
-                    for row in rows:
-                        found = 1
-                        break
-                    if found:
-                        continue
-                except tabix.TabixError:
-                    pass
-
-                clean_variants.append((chrom, pos, name, ref, alt, strand))
-        else:
-            clean_variants = variants
-
-        setattr(pred_data_generator, 'variants', clean_variants)
-
-        variants = np.array(clean_variants)
         # predict 1600 sample at once then write to file
         gen_flow = pred_data_generator.flow(batch_size=1600)
author	bgruening
date	Mon, 16 Dec 2019 05:39:20 -0500
parents	653be9c354ec
children	4de3d598c116