view read2mut.xml @ 76:56f271641828 draft

planemo upload for repository commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Fri, 26 Mar 2021 07:49:15 +0000
parents 6ccff403db8a
children fdfe9a919ff7
line wrap: on
line source

<?xml version="1.0" encoding="UTF-8"?>
<tool id="read2mut" name="Call specific mutations in reads:" version="2.1.4" profile="19.01">
    <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description>
    <expand macro="requirements">
        <requirement type="package" version="1.1.0">xlsxwriter</requirement>
        ln -s '$file2' bam_input.bam &&
        ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
        python '$__tool_directory__/' 
        --mutFile '$file1'
        --bamFile bam_input.bam
        --inputJson '$file3'
        --sscsJson '$file4'
        --thresh '$thresh'
        --phred '$phred'
        --trim '$trim'
        --softclipping_dist '$softclipping_dist'
        --reads_threshold '$reads_threshold'
        --outputFile '$output_xlsx'
        --outputFile_csv '$outputFile_csv'
        --outputFile2 '$output_xlsx2'
        --outputFile3 '$output_xlsx3'
        <param name="file1" type="data" format="vcf" label="DCS Mutation File" optional="false" help="VCF file with DCS mutations. See Help section below for a detailed explanation."/>
        <param name="file2" type="data" format="bam" label="BAM File of raw reads" optional="false" help="BAM file with aligned raw reads of selected tags."/>
        <param name="file3" type="data" format="json" label="JSON File with DCS tag stats" optional="false" help="JSON file generated by DCS mutations to tags/reads"/>
        <param name="file4" type="data" format="json" label="JSON File with SSCS tag stats" optional="false" help="JSON file generated by DCS mutations to SSCS stats."/>
        <param name="thresh" type="integer" label="Tag count threshold" value="0" help="Integer threshold for displaying mutations. Only mutations occuring in DCS of less than thresh tags are displayed. Default of 0 displays all."/>
        <param name="phred" type="integer" label="Phred quality score threshold" min="0" max="41" value="20" help="Integer threshold for Phred quality score. Only reads higher than this threshold are considered. Default = 20."/>
        <param name="trim" type="integer" label="Trimming threshold" value="10" help="Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10."/>
        <param name="chimera_correction" type="boolean" label="Apply chimera correction?" truevalue="--chimera_correction" falsevalue="" checked="False" help="Count chimeric variants and correct the variant frequencies."/>
        <param name="softclipping_dist" type="integer" label="Distance between artifact and softclipping of the reads" min="1" value="15" help="Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the reads. Default = 20"/>
<param name="reads_threshold" type="float" label="Minimum percentage of softclipped reads in a family" min="0.0" max="1.0" value="1.0" help="Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped."/>
        <data name="output_xlsx" format="xlsx" label="${} on ${on_string}: XLSX summary"/>
        <data name="outputFile_csv" format="csv" label="${} on ${on_string}: CSV summary"/>
        <data name="output_xlsx2" format="xlsx" label="${} on ${on_string}: XLSX allele frequencies"/>
        <data name="output_xlsx3" format="xlsx" label="${} on ${on_string}: XLSX tiers"/>
            <param name="file1" value="FreeBayes_test.vcf"/>
            <param name="file2" value="Interesting_Reads_test.trim.bam"/>
            <param name="file3" value="tag_count_dict_test.json"/>
            <param name="file4" value="SSCS_counts_test.json"/>
            <param name="thresh" value="0"/>
            <param name="phred" value="20"/>
            <param name="trim" value="10"/>
            <param name="chimera_correction"/>
            <param name="softclipping_dist" value="15"/>
            <param name="reads_threshold" value="1.0"/>
            <output name="output_xlsx" file="Variant_Analyzer_summary_test.xlsx" decompress="true" lines_diff="10"/>
            <output name="outputFile_csv" file="Variant_Analyzer_summary_test.csv" decompress="true" lines_diff="10"/>
            <output name="output_xlsx2" file="Variant_Analyzer_allele_frequencies_test.xlsx" decompress="true" lines_diff="10"/>
            <output name="output_xlsx3" file="Variant_Analyzer_tiers_test.xlsx" decompress="true" lines_diff="10"/>
    <help> <![CDATA[
**What it does**

Takes a VCF file with mutations, a BAM file of aligned raw reads, and JSON files 
created by the tools **DCS mutations to tags/reads** and **DCS mutations to SSCS stats** 
as input and calculates frequencies and stats for DCS mutations based on information 
from the raw reads.


**Dataset 1:** VCF file with duplex consesus sequence (DCS) mutations. E.g. 
generated by the `FreeBayes <>`_ or `LoFreq <>`_ variant caller.

**Dataset 2:** BAM file of aligned raw reads. This file can be obtained by the 
tool `Map with BWA-MEM <>`_.

**Dataset 3:** JSON file generated by the **DCS mutations to tags/reads** tool 
containing dictonaries of the tags of reads containing mutations 
in the DCS.

**Dataset 4:** JSON file generated by the **DCS mutations to SSCS stats** tool 
stats of tags that carry a mutation in the SSCS at the same position a mutation 
is called in the DCS.


The output are three XLSX files containing frequencies stats for DCS mutations based 
on information from the raw reads and a CSV file containing the summary information without color-coding. In addition to that a tier based 
classification is provided based on the amout of support for a true variant call.

    <expand macro="citation" />