Mercurial > repos > iuc > variant_analyzer
changeset 1:3556001ff2db draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/variant_analyzer commit 60dc8db809909edf44d662683b1f392b9d5964bf"
author | iuc |
---|---|
date | Wed, 04 Dec 2019 16:21:17 -0500 |
parents | 8d29173d49a9 |
children | 3f1dbd2c59bf |
files | mut2read.py mut2read.xml mut2sscs.py mut2sscs.xml read2mut.py read2mut.xml va_macros.xml |
diffstat | 7 files changed, 34 insertions(+), 45 deletions(-) [+] |
line wrap: on
line diff
--- a/mut2read.py Wed Nov 20 17:47:35 2019 -0500 +++ b/mut2read.py Wed Dec 04 16:21:17 2019 -0500 @@ -63,7 +63,7 @@ # read mut file with open(file1, 'r') as mut: - mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') + mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) # read dcs bam file # pysam.index(file2) @@ -86,7 +86,7 @@ dcs_len = [] - for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=100000000): + for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=100000000): if pileupcolumn.reference_pos == stop_pos - 1: count_alt = 0
--- a/mut2read.xml Wed Nov 20 17:47:35 2019 -0500 +++ b/mut2read.xml Wed Dec 04 16:21:17 2019 -0500 @@ -1,14 +1,10 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="mut2read" name="DCS mutations to tags/reads:" version="1.0.0" profile="19.01"> +<tool id="mut2read" name="DCS mutations to tags/reads:" version="1.0.1" profile="19.01"> <description>Extracts all tags that carry a mutation in the duplex consensus sequence (DCS)</description> <macros> <import>va_macros.xml</import> </macros> - <requirements> - <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.4.0">matplotlib</requirement> - <requirement type="package" version="0.15">pysam</requirement> - </requirements> + <expand macro="requirements"/> <command><![CDATA[ ln -s '$file2' bam_input.bam && ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
--- a/mut2sscs.py Wed Nov 20 17:47:35 2019 -0500 +++ b/mut2sscs.py Wed Dec 04 16:21:17 2019 -0500 @@ -56,7 +56,7 @@ # 1. read mut file with open(file1, 'r') as mut: - mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') + mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) # 2 read SSCS bam file # pysam.index(file2) @@ -76,7 +76,7 @@ ref = mut_array[m, 9] alt = mut_array[m, 10] - for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): + for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000): if pileupcolumn.reference_pos == stop_pos - 1: count_alt = 0 count_ref = 0
--- a/mut2sscs.xml Wed Nov 20 17:47:35 2019 -0500 +++ b/mut2sscs.xml Wed Dec 04 16:21:17 2019 -0500 @@ -1,14 +1,10 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="1.0.0" profile="19.01"> +<tool id="mut2sscs" name="DCS mutations to SSCS stats:" version="1.0.1" profile="19.01"> <description>Extracts all tags from the single stranded consensus sequence (SSCS) bam file that carry a mutation at the same position a mutation is called in the duplex consensus sequence (DCS) and calculates their frequencies</description> <macros> <import>va_macros.xml</import> </macros> - <requirements> - <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.4.0">matplotlib</requirement> - <requirement type="package" version="0.15">pysam</requirement> - </requirements> + <expand macro="requirements"/> <command><![CDATA[ ln -s '$file2' bam_input.bam && ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
--- a/read2mut.py Wed Nov 20 17:47:35 2019 -0500 +++ b/read2mut.py Wed Dec 04 16:21:17 2019 -0500 @@ -23,7 +23,6 @@ from __future__ import division import argparse -import itertools import json import operator import os @@ -89,7 +88,7 @@ # 1. read mut file with open(file1, 'r') as mut: - mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype='string') + mut_array = np.genfromtxt(mut, skip_header=1, delimiter='\t', comments='#', dtype=str) # 2. load dicts with open(json_file, "r") as f: @@ -122,7 +121,7 @@ mut_read_pos_dict[chrom_stop_pos] = {} reads_dict[chrom_stop_pos] = {} - for pileupcolumn in bam.pileup(chrom.tobytes(), stop_pos - 2, stop_pos, max_depth=1000000000): + for pileupcolumn in bam.pileup(chrom, stop_pos - 2, stop_pos, max_depth=1000000000): if pileupcolumn.reference_pos == stop_pos - 1: count_alt = 0 count_ref = 0 @@ -219,13 +218,7 @@ whole_array = [] for k in pure_tags_dict.values(): - if len(k) != 0: - keys = k.keys() - if len(keys) > 1: - for k1 in keys: - whole_array.append(k1) - else: - whole_array.append(keys[0]) + whole_array.extend(k.keys()) # 7. output summary with threshold workbook = xlsxwriter.Workbook(outfile) @@ -623,14 +616,14 @@ half1_mate2 = array2_half2 half2_mate2 = array2_half # calculate HD of "a" in the tag to all "a's" or "b" in the tag to all "b's" - dist = np.array([sum(itertools.imap(operator.ne, half1_mate1, c)) for c in half1_mate2]) + dist = np.array([sum(map(operator.ne, half1_mate1, c)) for c in half1_mate2]) min_index = np.where(dist == dist.min()) # get index of min HD # get all "b's" of the tag or all "a's" of the tag with minimum HD min_tag_half2 = half2_mate2[min_index] min_tag_array2 = array2[min_index] # get whole tag with min HD min_value = dist.min() # calculate HD of "b" to all "b's" or "a" to all "a's" - dist_second_half = np.array([sum(itertools.imap(operator.ne, half2_mate1, e)) + dist_second_half = np.array([sum(map(operator.ne, half2_mate1, e)) for e in min_tag_half2]) dist2 = dist_second_half.max()
--- a/read2mut.xml Wed Nov 20 17:47:35 2019 -0500 +++ b/read2mut.xml Wed Dec 04 16:21:17 2019 -0500 @@ -1,15 +1,12 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="read2mut" name="Call specific mutations in reads:" version="1.0.0" profile="19.01"> +<tool id="read2mut" name="Call specific mutations in reads:" version="1.0.1" profile="19.01"> <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description> <macros> <import>va_macros.xml</import> </macros> - <requirements> - <requirement type="package" version="2.7">python</requirement> - <requirement type="package" version="1.4.0">matplotlib</requirement> - <requirement type="package" version="0.15">pysam</requirement> + <expand macro="requirements"> <requirement type="package" version="1.1.0">xlsxwriter</requirement> - </requirements> + </expand> <command><![CDATA[ ln -s '$file2' bam_input.bam && ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
--- a/va_macros.xml Wed Nov 20 17:47:35 2019 -0500 +++ b/va_macros.xml Wed Dec 04 16:21:17 2019 -0500 @@ -1,13 +1,20 @@ <macros> <xml name="citation"> - <citations> - <citation type="bibtex"> - @misc{duplex, - author = {Povysil, Gundula and Heinzl, Monika and Salazar, Renato and Stoler, Nicholas and Nekrutenko, Anton and Tiemann-Boege, Irene}, - year = {2019}, - title = {{Variant Analyzer: a quality control for variant calling in duplex sequencing data (manuscript)}} - } - </citation> - </citations> -</xml> -</macros> \ No newline at end of file + <citations> + <citation type="bibtex"> +@misc{duplex, + author = {Povysil, Gundula and Heinzl, Monika and Salazar, Renato and Stoler, Nicholas and Nekrutenko, Anton and Tiemann-Boege, Irene}, + year = {2019}, + title = {{Variant Analyzer: a quality control for variant calling in duplex sequencing data (manuscript)}} + } + </citation> + </citations> + </xml> + <xml name="requirements"> + <requirements> + <requirement type="package" version="3.1.2">matplotlib</requirement> + <requirement type="package" version="0.15">pysam</requirement> + <yield/> + </requirements> + </xml> +</macros>