diff read2mut.py @ 1:3556001ff2db draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 60dc8db809909edf44d662683b1f392b9d5964bf"
author iuc
date Wed, 04 Dec 2019 16:21:17 -0500
parents 8d29173d49a9
children 3f1dbd2c59bf
line wrap: on
line diff
--- a/read2mut.py	Wed Nov 20 17:47:35 2019 -0500
+++ b/read2mut.py	Wed Dec 04 16:21:17 2019 -0500
@@ -23,7 +23,6 @@
 from __future__ import division
 
 import argparse
-import itertools
 import json
 import operator
 import os
@@ -89,7 +88,7 @@
 
     # 1. read mut file
     with open(file1, 'r') as mut:
-        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string')
+        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)
 
     # 2. load dicts
     with open(json_file, "r") as f:
@@ -122,7 +121,7 @@
         mut_read_pos_dict[chrom_stop_pos] = {}
         reads_dict[chrom_stop_pos] = {}
 
-        for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000):
+        for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000):
             if pileupcolumn.reference_pos == stop_pos - 1:
                 count_alt = 0
                 count_ref = 0
@@ -219,13 +218,7 @@
 
     whole_array = []
     for k in pure_tags_dict.values():
-        if len(k) != 0:
-            keys = k.keys()
-            if len(keys) > 1:
-                for k1 in keys:
-                    whole_array.append(k1)
-            else:
-                whole_array.append(keys[0])
+        whole_array.extend(k.keys())
 
     # 7. output summary with threshold
     workbook = xlsxwriter.Workbook(outfile)
@@ -623,14 +616,14 @@
                                     half1_mate2 = array2_half2
                                     half2_mate2 = array2_half
                                 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's"
-                                dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2])
+                                dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2])
                                 min_index = np.where(dist == dist.min())  # get index of min HD
                                 # get all "b's" of the tag or all "a's" of the tag with minimum HD
                                 min_tag_half2 = half2_mate2[min_index]
                                 min_tag_array2 = array2[min_index]  # get whole tag with min HD
                                 min_value = dist.min()
                                 # calculate HD of "b" to all "b's" or "a" to all "a's"
-                                dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e))
+                                dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e))
                                                              for e in min_tag_half2])
 
                                 dist2 = dist_second_half.max()