view mut2read.xml @ 88:63e4e5d9a98f draft

planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8-dirty
author mheinzl
date Sat, 22 Apr 2023 10:06:51 +0000
parents 97bd9c7a1b44
children
line wrap: on
line source

<?xml version="1.0" encoding="UTF-8"?>
<tool id="mut2read" name="DCS mutations to tags/reads:" version="3.2.0" profile="19.01">
    <description>Extracts all tags that carry a mutation in the duplex consensus sequence (DCS)</description>
    <macros>
        <import>va_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command><![CDATA[
        ln -s '$file2' bam_input.bam &&
        ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
        python '$__tool_directory__/mut2read.py' 
        --mutFile '$file1'
        --bamFile bam_input.bam
        --familiesFile '$file3'
        $refalttiers
        --outputFastq '$output_fastq' 
        --outputJson '$output_json'
    ]]>
    </command>
    <inputs>
        <param name="file1" type="data" format="vcf" label="DCS Mutation File" optional="false" help="VCF file with DCS mutations. See the Help section below for a detailed explanation."/>
        <param name="file2" type="data" format="bam" label="DCS BAM File" optional="false" help="BAM file with aligned DCS reads."/>
        <param name="file3" type="data" format="tabular" label="Aligned Families File" optional="false" help="TABULAR file with aligned families."/>
        <param name="refalttiers" type="boolean" label="Extract tiers also for reference allele" truevalue="--refalttiers" falsevalue="" checked="False" help="Extracts tier information for the alternate and reference allele. Note that this will increase the running time of the tool. Otherwise only the tier information for the alternate allele is extracted."/>
    </inputs>
    <outputs>
        <data name="output_fastq" format="fastq" label="${tool.name} on ${on_string}: FASTQ"/>
        <data name="output_json" format="json" label="${tool.name} on ${on_string}: JSON"/>
    </outputs>
    <tests>
        <test>
            <param name="file1" value="FreeBayes_test.vcf"/>
            <param name="file2" value="DCS_test.bam"/>
            <param name="file3" value="Aligned_Families_test.tabular"/>
            <output name="output_fastq" file="Interesting_Reads_test.fastq"/>
            <output name="output_json" file="tag_count_dict_test.json"/>
        </test>
    </tests>
    <help> <![CDATA[
**What it does**

Takes a VCF file with mutations, a BAM file of aligned DCS reads, and a 
tabular file with aligned families as input and prints all tags of reads that 
carry a mutation or have the reference allele to a user-specified output file and creates a fastq file of 
reads of tags with a mutation (and optional the reference allele).

**Input** 

**Dataset 1:** VCF file with duplex consensus sequence (DCS) mutations. E.g. 
generated by the `FreeBayes <https://arxiv.org/abs/1207.3907>`_ or `LoFreq <https://academic.oup.com/nar/article/40/22/11189/1152727>`_ variant caller.


**Dataset 2:** BAM file of aligned DCS reads. This file can be obtained by the 
tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_.

**Dataset 3:** Tabular file with reads as produced by the 
**Du Novo: Align families** tool of the `Du Novo Analysis Pipeline 
<https://doi.org/10.1186/s13059-016-1039-4>`_

**Output**

The output is a json file containing dictionaries of the tags of reads containing mutations 
in the DCS and a fastq file of all reads of these tags.

    ]]> 
    </help>
    <expand macro="citation" />
</tool>