Mercurial > repos > iuc > variant_analyzer

--- a/mut2read.py	Wed Nov 20 17:47:35 2019 -0500
+++ b/mut2read.py	Wed Dec 04 16:21:17 2019 -0500
@@ -63,7 +63,7 @@

     # read mut file
     with open(file1, 'r') as mut:
-        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string')
+        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)

     # read dcs bam file
     # pysam.index(file2)
@@ -86,7 +86,7 @@

         dcs_len = []

-        for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=100000000):
+        for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=100000000):

             if pileupcolumn.reference_pos == stop_pos - 1:
                 count_alt = 0
--- a/mut2read.xml	Wed Nov 20 17:47:35 2019 -0500
+++ b/mut2read.xml	Wed Dec 04 16:21:17 2019 -0500
@@ -1,14 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="mut2read" name="DCS mutations to tags/reads:" version="1.0.0" profile="19.01">
+<tool id="mut2read" name="DCS mutations to tags/reads:" version="1.0.1" profile="19.01">
     <description>Extracts all tags that carry a mutation in the duplex consensus sequence (DCS)</description>
     <macros>
         <import>va_macros.xml</import>
     </macros>
-	<requirements>
-        <requirement type="package" version="2.7">python</requirement>
-        <requirement type="package" version="1.4.0">matplotlib</requirement>
-        <requirement type="package" version="0.15">pysam</requirement>
-    </requirements>
+    <expand macro="requirements"/>
     <command><![CDATA[
         ln -s '$file2' bam_input.bam &&
         ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
--- a/mut2sscs.py	Wed Nov 20 17:47:35 2019 -0500
+++ b/mut2sscs.py	Wed Dec 04 16:21:17 2019 -0500
@@ -56,7 +56,7 @@

     # 1. read mut file
     with open(file1, 'r') as mut:
-        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string')
+        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)

     # 2 read SSCS bam file
     # pysam.index(file2)
@@ -76,7 +76,7 @@
         ref = mut_array[m, 9]
         alt = mut_array[m, 10]

-        for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000):
+        for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000):
             if pileupcolumn.reference_pos == stop_pos - 1:
                 count_alt = 0
                 count_ref = 0
--- a/mut2sscs.xml	Wed Nov 20 17:47:35 2019 -0500
+++ b/mut2sscs.xml	Wed Dec 04 16:21:17 2019 -0500
@@ -1,14 +1,10 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="1.0.0" profile="19.01">
+<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="1.0.1" profile="19.01">
     <description>Extracts all tags from the single stranded consensus sequence (SSCS) bam file that carry a mutation at the same position a mutation is called in the duplex consensus sequence (DCS) and calculates their frequencies</description>
     <macros>
         <import>va_macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="2.7">python</requirement>
-        <requirement type="package" version="1.4.0">matplotlib</requirement>
-        <requirement type="package" version="0.15">pysam</requirement>
-    </requirements>
+    <expand macro="requirements"/>
     <command><![CDATA[
         ln -s '$file2' bam_input.bam &&
         ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
--- a/read2mut.py	Wed Nov 20 17:47:35 2019 -0500
+++ b/read2mut.py	Wed Dec 04 16:21:17 2019 -0500
@@ -23,7 +23,6 @@
 from __future__ import division

 import argparse
-import itertools
 import json
 import operator
 import os
@@ -89,7 +88,7 @@

     # 1. read mut file
     with open(file1, 'r') as mut:
-        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string')
+        mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str)

     # 2. load dicts
     with open(json_file, "r") as f:
@@ -122,7 +121,7 @@
         mut_read_pos_dict[chrom_stop_pos] = {}
         reads_dict[chrom_stop_pos] = {}

-        for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000):
+        for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000):
             if pileupcolumn.reference_pos == stop_pos - 1:
                 count_alt = 0
                 count_ref = 0
@@ -219,13 +218,7 @@

     whole_array = []
     for k in pure_tags_dict.values():
-        if len(k) != 0:
-            keys = k.keys()
-            if len(keys) > 1:
-                for k1 in keys:
-                    whole_array.append(k1)
-            else:
-                whole_array.append(keys[0])
+        whole_array.extend(k.keys())

     # 7. output summary with threshold
     workbook = xlsxwriter.Workbook(outfile)
@@ -623,14 +616,14 @@
                                     half1_mate2 = array2_half2
                                     half2_mate2 = array2_half
                                 # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's"
-                                dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2])
+                                dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2])
                                 min_index = np.where(dist == dist.min())  # get index of min HD
                                 # get all "b's" of the tag or all "a's" of the tag with minimum HD
                                 min_tag_half2 = half2_mate2[min_index]
                                 min_tag_array2 = array2[min_index]  # get whole tag with min HD
                                 min_value = dist.min()
                                 # calculate HD of "b" to all "b's" or "a" to all "a's"
-                                dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e))
+                                dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e))
                                                              for e in min_tag_half2])

                                 dist2 = dist_second_half.max()
--- a/read2mut.xml	Wed Nov 20 17:47:35 2019 -0500
+++ b/read2mut.xml	Wed Dec 04 16:21:17 2019 -0500
@@ -1,15 +1,12 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="read2mut" name="Call specific mutations in reads:" version="1.0.0" profile="19.01">
+<tool id="read2mut" name="Call specific mutations in reads:" version="1.0.1" profile="19.01">
     <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description>
     <macros>
         <import>va_macros.xml</import>
     </macros>
-    <requirements>
-        <requirement type="package" version="2.7">python</requirement>
-        <requirement type="package" version="1.4.0">matplotlib</requirement>
-        <requirement type="package" version="0.15">pysam</requirement>
+    <expand macro="requirements">
         <requirement type="package" version="1.1.0">xlsxwriter</requirement>
-    </requirements>
+    </expand>
     <command><![CDATA[
         ln -s '$file2' bam_input.bam &&
         ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
--- a/va_macros.xml	Wed Nov 20 17:47:35 2019 -0500
+++ b/va_macros.xml	Wed Dec 04 16:21:17 2019 -0500
@@ -1,13 +1,20 @@
 <macros>
     <xml name="citation">
-    <citations>
-        <citation type="bibtex">
-            @misc{duplex,
-            author = {Povysil, Gundula and Heinzl, Monika and Salazar, Renato and Stoler, Nicholas and Nekrutenko, Anton and Tiemann-Boege, Irene},
-            year = {2019},
-            title = {{Variant Analyzer: a quality control for variant calling in duplex sequencing data (manuscript)}}
-         }
-        </citation>
-    </citations>
-</xml>
-</macros>
\ No newline at end of file
+        <citations>
+            <citation type="bibtex">
+@misc{duplex,
+    author = {Povysil, Gundula and Heinzl, Monika and Salazar, Renato and Stoler, Nicholas and Nekrutenko, Anton and Tiemann-Boege, Irene},
+    year = {2019},
+    title = {{Variant Analyzer: a quality control for variant calling in duplex sequencing data (manuscript)}}
+ }
+           </citation>
+        </citations>
+    </xml>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="3.1.2">matplotlib</requirement>
+            <requirement type="package" version="0.15">pysam</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+</macros>