view bamutil_diff.xml @ 0:2cafa8420c04 draft default tip

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/bamutil/ commit c1945909ca200610f128577b68a82d9228905f3d-dirty"
author jjohnson
date Fri, 26 Mar 2021 13:16:53 +0000
parents
children
line wrap: on
line source

<tool id="bamutil_diff" name="BamUtil diff" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" python_template_version="3.5">
    <description>two coordinate sorted SAM/BAM files</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code"><![CDATA[
    bam diff 
    --in1 '$in1'
    --in2 '$in2'
    #if $fields.choice == 'all':
        --all
    #elif $fields.choice == 'select':
        $fields.flag
        $fields.mapQual
        $fields.mate
        $fields.isize
        $fields.seq
        $fields.baseQual
        $fields.noCigar
        $fields.noPos
        #if $fields.samtags.tagchoice == 'everyTag':
            --everyTag
        #elif $fields.samtags.tagchoice == 'specify':
            --tags '$fields.samtags.tags'
        #end if
    #end if
    --posDiff $posDiff
    --recPoolSize -1
    $onlyDiffs
    --params
    --noPhoneHome
    --out $output_as
    ]]></command>
    <inputs>
        <param argument="--in1" type="data" format="sam,bam" label="Input BAM 1"/>
        <param argument="--in2" type="data" format="sam,bam" label="Input BAM 2"/>
        <param argument="--posDiff" type="integer" value="100000" min="0" label="max base pair difference between possibly matching records"/>
        <param argument="--onlyDiffs" type="boolean" truevalue="--onlyDiffs" falsevalue="" checked="false" label="only print the fields that differ"/>
        <conditional name="fields">
            <param name="choice" type="select" label="BAM fields to diff">
                <option value="default" selected="true">Read Name, Flag Fragment bit, Position, Cigar</option>
                <option value="all">Diff all the SAM/BAM fields</option>
                <option value="select">Select SAM/BAM fields to diff</option>
            </param>
            <when value="default"/>
            <when value="all"/>
            <when value="select">
                <param argument="--flag" type="boolean" truevalue="--flag" falsevalue="" checked="false" label="diff the flags."/>
                <param argument="--mapQual" type="boolean" truevalue="--mapQual" falsevalue="" checked="false" label="diff the mapping qualities."/>
                <param argument="--mate" type="boolean" truevalue="--mate" falsevalue="" checked="false" label="diff the mate chrom/pos."/>
                <param argument="--isize" type="boolean" truevalue="--isize" falsevalue="" checked="false" label="diff the insert sizes."/>
                <param argument="--seq" type="boolean" truevalue="--seq" falsevalue="" checked="false" label="diff the sequence bases."/>
                <param argument="--baseQual" type="boolean" truevalue="--baseQual" falsevalue="" checked="false" label="diff the base qualities."/>
                <param argument="--noCigar" type="boolean" truevalue="--noCigar" falsevalue="" checked="false" label="do not diff the the cigars."/>
                <param argument="--noPos" type="boolean" truevalue="--noPos" falsevalue="" checked="false" label="do not diff the positions."/>
                <conditional name="samtags">
                    <param name="tagchoice" type="select" label="Tags to diff">
                        <option value="none">Do not diff tags</option>
                        <option value="everyTag">Diff every tag</option>
                        <option value="specify">Specify tags to diff</option>
                    </param>
                    <when value="none"/>
                    <when value="everyTag"/>
                    <when value="specify">
                        <param argument="--tags" type="text" label="diff the specified Tags formatted as Tag:Type,Tag:Type,Tag:Type...">
                            <validator type="regex" message="SAM 2-char Tag:type">^([A-Za-z][A-Za-z0-9]:[AifZHB])(,[A-Za-z][A-Za-z0-9]:[AifZHB])*$</validator>
                        </param>
                    </when>
                </conditional>
            </when>
        </conditional>
        <param name="output_as" type="select" label="Output format">
            <option value="diff.txt">ASCII text diff file</option>
            <option value="diff.bam">BAM files: diff, only_in_file1, only_in_file2</option>
            <option value="diff.sam">SAM files: diff, only_in_file1, only_in_file2</option>
        </param>
    </inputs>
    <outputs>
        <data name="diff_bam" format="bam" from_work_dir="diff.bam" label="${tool.name} on ${on_string}: diff.bam">
            <filter>output_as == 'diff.bam'</filter>
        </data>
        <data name="diff_only1_bam" format="bam" from_work_dir="diff_only1_*.bam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}">
            <filter>output_as == 'diff.bam'</filter>
        </data>
        <data name="diff_only2_bam" format="bam" from_work_dir="diff_only2_*.bam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}">
            <filter>output_as == 'diff.bam'</filter>
        </data>
        <data name="diff_sam" format="sam" from_work_dir="diff.sam" label="${tool.name} on ${on_string}: diff.sam">
            <filter>output_as == 'diff.sam'</filter>
        </data>
        <data name="diff_only1_sam" format="sam" from_work_dir="diff_only1_*.sam" label="${tool.name} on ${on_string} only in: ${in1.element_identifier}">
            <filter>output_as == 'diff.sam'</filter>
        </data>
        <data name="diff_only2_sam" format="sam" from_work_dir="diff_only2_*.sam" label="${tool.name} on ${on_string} only in: ${in2.element_identifier}">
            <filter>output_as == 'diff.sam'</filter>
        </data>
        <data name="diff_txt" format="txt" from_work_dir="diff.txt" label="${tool.name} on ${on_string}: diff.txt">
            <filter>output_as == 'diff.txt'</filter>
        </data>
    </outputs>
    <tests>
        <!-- Test-1 --> 
        <test>
            <param name="in1" ftype="sam" value="in1.sam"/>
            <param name="in2" ftype="sam" value="in2.sam"/>
            <param name="posDiff" value="100000"/>
            <param name="onlyDiffs" value="true"/>
            <conditional name="fields">
                <param name="choice" value="default"/>
            </conditional>
            <param name="output_as" value="diff.txt"/>
            <output name="diff_txt" file="diff.txt"/>
            <output name="diff_txt">
                <assert_contents>
                    <has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" />
                    <has_text_matching expression="&lt;\t1a3\t74M74N1M" />
                    <has_text_matching expression="&gt;\ta3\t74M66N1M" />
                </assert_contents>
            </output>
        </test>

        <!-- Test-2 --> 
        <test>
            <param name="in1" ftype="sam" value="in1.sam"/>
            <param name="in2" ftype="sam" value="in2.sam"/>
            <param name="posDiff" value="100000"/>
            <param name="onlyDiffs" value="true"/>
            <conditional name="fields">
                <param name="choice" value="select"/>
                <param name="flag" value="true"/>
                <param name="seq" value="true"/>
                <conditional name="samtags">
                    <param name="tagchoice" value="specify"/>
                    <param name="tags" value="AS:i,MD:Z"/>
                </conditional>
            </conditional>
            <param name="output_as" value="diff.sam"/>
            <output name="diff_sam">
                <assert_contents>
                    <has_text text="NB500964:249:HHLFNBGX7:4:12608:21020:10228" />
                    <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
                    <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
                </assert_contents>
            </output>
            <output name="diff_only1_sam">
                <assert_contents>
                    <has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
                    <not_has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
                    <has_text text="TGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCACCG" />
                    <has_text text="AS:i:0" />
                    <has_text text="MD:Z:75" />
                </assert_contents>
            </output>
            <output name="diff_only2_sam">
                <assert_contents>
                    <has_text text="NB500964:249:HHLFNBGX7:4:11510:10074:3541" />
                    <not_has_text text="NB500964:249:HHLFNBGX7:1:12312:5087:3846" />
                    <has_text text="ATCTGTCACCCCATTGATCGCCAGGGTTGATTCGGCTGATCTGGCTGGCTAGGCGGGTGTCCCCTTCCTCCCTCA" />
                    <has_text text="AS:i:0" />
                    <has_text text="MD:Z:75" />
                </assert_contents>
            </output>
        </test>
        <!-- Test-3 --> 
        <test>
            <param name="in1" ftype="sam" value="in1.sam"/>
            <param name="in2" ftype="sam" value="in3.sam"/>
            <param name="posDiff" value="100000"/>
            <param name="onlyDiffs" value="true"/>
            <conditional name="fields">
                <param name="choice" value="default"/>
            </conditional>
            <param name="output_as" value="diff.txt"/>
            <output name="diff_txt">
                <assert_contents>
                    <not_has_text text="NB500964:249:HHLFNBGX7:3:21407:1974:9687" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**bamUtil diff**

The diff option on the bamUtil executable prints the difference between two coordinate sorted SAM/BAM files. This can be used to compare the outputs of running a SAM/BAM through different tools/versions of tools.
The diff tool compares records that have the same Read Name and Fragment (from the flag). If a matching ReadName & Fragment is not found, the record is considered to be different.
diff assumes the files are coordinate sorted and uses this assumption for determining how long to store a record before determining that the other file does not contain a matching ReadName/Fragment. If the files are not coordinate sorted, this logic does not work.
By default, just the chromosome/position and cigar are compared for each record.
Note: The headers are not compared.

Options are available to compare::

 - all fields
 - flags
 - mapping quality
 - mate chromosome/position
 - insert size
 - sequence
 - base quality
 - specified tags
 - all tags
 - turn off position comparison
 - turn off cigar comparison

**Inputs**
  Two BAM or SAM alignment files

**Outputs**
  Choice of 2 Output Formats:

  ::

    **Diff Format**
    There are 2 types of differences.
    ReadName/Fragment combo is in one file, but not in the other file within the window set by recPoolSize & posDiff
    ReadName/Fragment combo is in both files, but at least one of the specified fields to diff is different
    Each difference output consists of 2 or 3 lines. If the record only appears in one of the files, the diff is 2 lines, if it appears in both files, the diff is 3 lines.
    The first line of the difference output is just the read name.
    The 2nd and 3rd line (if present) begin with either a '<' or a '>'. If the record is from the first file (--in1), it begins with a '<'. If the record is from the 2nd file (--in2), it begins with a '>'.
    The 2nd line is the flag followed by the diff'd fields from one of the records.
    The 3rd line (if a matching record was found) is the flag followed by the diff'd fields from the matching record.
    The diff'd record lines are tab separated, and are in the following order if --onlyDiffs is not specified::

      - '<' or '>'
      - flag
      - chrom:pos (chromosome name ':' 1 based position) - if --noPos is not specified
      - cigar - if --noCigar is not specified
      - mapping quality - if --mapq or --all is specified
      - mate chrom:pos (chromosome name ':' 1 based position) - if --mate or --all is specified
      - insert size - if --isize or --all is specified
      - sequence - if --seq or --all is specified
      - base quality - if --baseQual or --all is specified
      - tag:type:value - for each tag:type specified in --tags or for every tag if --all or --everyTag specified


    **BAM Format**
    In SAM/BAM format there will be 3 output files::

      1. the specified name with record diffs
      2. specified name with _only_<in1>.sam/bam with records only in the in1 file
      3. specified name with _only_<in2>.sam/bam with records only in the in2 file

    Records that are identical in the two files are not written in any of these output files.
    When a record is found in both input files, but a difference is found, the record from the first file is written with additional tags to indicate the values from the second file, using the following tags::

      - ZF - Flag
      - ZP - Chromosome:1-based Position
      - ZC - Cigar
      - ZM - Mapping Quality
      - ZN - Chromosome:1-based Mate Position
      - ZI - Insert Size
      - ZS - Sequence
      - ZQ - Base Quality
      - ZT - Tags

    If --onlyDiffs is not specified, all fields that were compared will be printed in the tags. If --onlyDiffs is specified, then only the differing compared fields will be printed in the tags.




https://genome.sph.umich.edu/wiki/BamUtil:_diff

    ]]></help>
    <expand macro="citations" />
</tool>