changeset 2:3aa9416cc3b3 draft

Uploaded
author jackcurragh
date Fri, 27 May 2022 11:33:45 +0000
parents 79c0c6042954
children 48e1c3a73761
files bedtools_genomecov/genomeCoverageBed.xml bedtools_genomecov/macros.xml bedtools_genomecov/test-data/genomeCoverageBed1.bed bedtools_genomecov/test-data/genomeCoverageBed1.len bedtools_genomecov/test-data/genomeCoverageBed_result1.bed bedtools_genomecov/tool_data_table_conf.xml.sample bedtools_genomecov/tool_data_table_conf.xml.test
diffstat 7 files changed, 478 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedtools_genomecov/genomeCoverageBed.xml	Fri May 27 11:33:45 2022 +0000
@@ -0,0 +1,170 @@
+<tool id="bedtools_genomecoveragebed" name="BedTools Genome Coverage" version="@TOOL_VERSION@" profile="@PROFILE@">
+    <description>Compute Read Coverage Over An Entire Genome</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="bio_tools" />
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <command><![CDATA[
+bedtools genomecov
+@GENOME_FILE_COVERAGE@
+
+$split
+$strand
+
+#if str($report.report_select) == "bg":
+    #if $report.zero_regions:
+        $report.zero_regions
+    #else:
+        -bg
+    #end if
+
+    #if str($report.scale):
+        -scale $report.scale
+    #end if
+#else:
+    #if str($report.max):
+        -max $report.max
+    #end if
+#end if
+$d
+$dz
+$five
+$three
+> '$output'
+    ]]></command>
+    <inputs>
+        <conditional name="input_type">
+            <param name="input_type_select" type="select" label="Input type">
+                <option value="bed">@STD_BEDTOOLS_INPUT_LABEL@</option>
+                <option value="bam" selected='true'>BAM</option>
+            </param>
+            <when value="bed">
+                <param name="input" argument="-i" type="data" format="@STD_BEDTOOLS_INPUTS@" label="@STD_BEDTOOLS_INPUT_LABEL@ file" />
+                <expand macro="input_conditional_genome_file" />
+            </when>
+            <when value="bam">
+                <param name="input" argument="-ibam" type="data" format="bam" label="BAM file" />
+            </when>
+        </conditional>
+        <conditional name="report">
+            <param name="report_select" type="select" label="Output type">
+                <option value="bg" selected="true">BedGraph coverage file</option>
+                <option value="hist">Data suiteable for Histogram</option>
+            </param>
+            <when value="bg">
+                <param name="zero_regions" argument="-bga" type="boolean" truevalue="-bga" falsevalue="" checked="false"
+                    label="Report regions with zero coverage" help="If set, regions without any coverage will also be reported" />
+                <param argument="-scale" type="float" value="1.0"
+                    label="Scale the coverage by a constant factor"
+                    help="Each bedGraph coverage value is multiplied by this factor before being reported. Useful for normalizing coverage by, e.g., reads per million (RPM)." />
+            </when>
+            <when value="hist">
+                <param argument="-max" type="integer" value="0" label="Specify max depth"
+                    help="Combine all positions with a depth >= max into a single bin in the histogram" />
+            </when>
+        </conditional>
+        <expand macro="split" />
+        <param argument="-strand" type="select" label="Calculate coverage based on">
+            <option value="">both strands combined</option>
+            <option value="-strand +">positive strand only</option>
+            <option value="-strand -">negative strand only</option>
+        </param>
+
+        <param argument="-d" type="boolean" truevalue="-d" falsevalue="" checked="false"
+            label="Report the depth at each genome position with 1-based coordinates" />
+        <param argument="-dz" type="boolean" truevalue="-dz" falsevalue="" checked="false"
+            label="Report the depth at each genome position with 0-based coordinatess" />
+        <param name="five" argument="-5" type="boolean" truevalue="-5" falsevalue="" checked="false"
+            label="Calculate coverage of 5’ positions" help="Instead of entire interval" />
+        <param name="three" argument="-3" type="boolean" truevalue="-3" falsevalue="" checked="false"
+            label="Calculate coverage of 3’ positions" help="Instead of entire interval" />
+    </inputs>
+    <outputs>
+        <data name="output" format="bedgraph">
+            <change_format>
+                <when input="report.report_select" value="hist" format="tabular" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_type_select" value="bed" />
+            <param name="input" value="genomeCoverageBed1.bed" ftype="bed" />
+            <param name="genome_file_opts_selector" value="hist" />
+            <param name="genome" value="genomeCoverageBed1.len" ftype="tabular" />
+            <param name="report_select" value="hist" />
+            <output name="output" file="genomeCoverageBed_result1.bed" ftype="tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+This tool calculates the genome-wide coverage of intervals defined in a BAM or BED file and reports them in BedGraph format.
+
+.. image:: $PATH_TO_IMAGES/genomecov-glyph.png
+
+.. class:: warningmark
+
+The input BED or BAM file must be sorted by chromosome name (but doesn't necessarily have to be sorted by start position).
+
+-----
+
+**Example 1**
+
+Input (BED format)-
+Overlapping, un-sorted intervals::
+
+    chr1    140 176
+    chr1    100 130
+    chr1    120 147
+
+
+Output (BedGraph format)-
+Sorted, non-overlapping intervals, with coverage value on the 4th column::
+
+    chr1    100 120 1
+    chr1    120 130 2
+    chr1    130 140 1
+    chr1    140 147 2
+    chr1    147 176 1
+
+-----
+
+**Example 2 - with ZERO-Regions selected (assuming hg19)**
+
+Input (BED format)-
+Overlapping, un-sorted intervals::
+
+    chr1    140 176
+    chr1    100 130
+    chr1    120 147
+
+
+BedGraph output will contain five columns:
+
+    * 1. Chromosome name (or 'genome' for whole-genome coverage)
+    * 2. Coverage depth
+    * 3. The number of bases on chromosome (or genome) with depth equal to column 2.
+    * 4. The size of chromosome (or entire genome) in base pairs
+    * 5. The fraction of bases on chromosome (or entire genome) with depth equal to column 2.
+
+**Example Output**:
+
+    chr2L       0           1379895     23011544    0.0599653
+    chr2L       1           837250      23011544    0.0363839
+    chr2L       2           904442      23011544    0.0393038
+    chr2L       3           913723      23011544    0.0397072
+    chr2L       4           952166      23011544    0.0413778
+    chr2L       5           967763      23011544    0.0420555
+    chr2L       6           986331      23011544    0.0428624
+    chr2L       7           998244      23011544    0.0433801
+    chr2L       8           995791      23011544    0.0432735
+    chr2L       9           996398      23011544    0.0432999
+
+
+@REFERENCES@
+    ]]></help>
+    <expand macro="citations" />
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedtools_genomecov/macros.xml	Fri May 27 11:33:45 2022 +0000
@@ -0,0 +1,266 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="@TOOL_VERSION@">bedtools</requirement>
+            <yield/>
+        </requirements>
+    </xml>
+    <xml name="bio_tools">
+        <xrefs>
+            <xref type="bio.tools">bedtools</xref>
+        </xrefs>
+    </xml>
+    <token name="@TOOL_VERSION@">2.30.0</token>
+    <token name="@SAMTOOLS_VERSION@">1.9</token>
+    <token name="@STD_BEDTOOLS_INPUTS@">bed,bedgraph,gff,vcf,encodepeak</token>
+    <token name="@STD_BEDTOOLS_INPUT_LABEL@">BED/bedGraph/GFF/VCF/EncodePeak</token>
+    <token name="@PROFILE@">20.05</token>
+    <xml name="stdio">
+        <stdio>
+            <!-- Anything other than zero is an error -->
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <!-- In case the return code has not been set propery check stderr too -->
+            <regex match="Error:" />
+            <regex match="Exception:" />
+        </stdio>
+        <version_command>bedtools --version</version_command>
+    </xml>
+    <xml name="reciprocal">
+        <param name="reciprocal" argument="-r" type="select" label="Require that the fraction of overlap be reciprocal for A and B" help="In other words, if -f is 0.90 and -r is used, this requires that B overlap at least 90% of A and that A also overlaps at least 90% of B.">
+            <option value="" selected="true">No</option>
+            <option value="-r">Yes</option>
+        </param>
+    </xml>
+    <xml name="fraction" token_name="" token_argument="" token_label="" token_help="" >
+        <param name="@NAME@" argument="@ARGUMENT@" type="float" min="0" max="1" optional="true" label="@LABEL@" help="@HELP@" />
+    </xml>
+    <xml name="overlap" token_name="overlap" token_argument="-f" token_fracof="A">
+        <expand macro="fraction" name="@NAME@" argument="@ARGUMENT@" label="Minimum overlap required as a fraction of @FRACOF@" help="Default is 1E-9, i.e. 1bp."/>
+    </xml>
+    <token name="@OVERLAP@"><![CDATA[
+        #if str($overlap):
+            -f $overlap
+        #end if
+    ]]></token>
+    <xml name="strand2">
+        <param name="strand" type="select" label="Calculation based on strandedness?">
+            <option value="" selected="true">Overlaps on either strand</option>
+            <option value="-s">Only overlaps occurring on the **same** strand.</option>
+            <option value="-S">Only overlaps occurring on the **opposite** strand.</option>
+        </param>
+    </xml>
+    <xml name="seed">
+        <conditional name="seed">
+            <param name="seed_choose" type="select" label="Choose Seed?">
+                <option value="False" selected="true">Random Shuffling</option>
+                <option value="True">Choose fixed seed</option>
+            </param>
+            <when value="True">
+                <param argument="-seed" type="integer" value="12345" label="Enter Seed" />
+            </when>
+            <when value="False" />
+        </conditional>
+    </xml>
+    <xml name="split">
+        <param argument="-split" type="boolean" truevalue="-split" falsevalue="" checked="false"
+            label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage."
+            help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." />
+    </xml>
+    <xml name="input_conditional_genome_file" token_optional="false" token_help="">
+        <conditional name="genome_file_opts">
+            <param name="genome_file_opts_selector" type="select" label="Genome file" help="@HELP@">
+                <option value="loc" selected="true">Locally installed Genome file</option>
+                <option value="hist">Genome file from your history</option>
+            </param>
+            <when value="loc">
+                <param name="genome" type="select" optional="@OPTIONAL@" multiple="false" label="Genome file">
+                    <options from_data_table="__dbkeys__" />
+                </param>
+            </when>
+            <when value="hist">
+                <param name="genome" type="data" optional="@OPTIONAL@" format="tabular" label="Genome file" />
+            </when>
+        </conditional>
+    </xml>
+    <token name="@GENOME_FILE@">
+#if $genome_file_opts.genome
+    -g 
+    #if $genome_file_opts.genome_file_opts_selector == "loc":
+        '$genome_file_opts.genome.fields.len_path'
+    #elif $genome_file_opts.genome_file_opts_selector == "hist":
+        '$genome_file_opts.genome'
+    #end if
+#end if
+    </token>
+    <token name="@GENOME_FILE_MAKEWINDOWS@">
+#if $type.type_select == "genome":
+    #if $type.genome_file_opts.genome_file_opts_selector == "loc":
+        -g '$type.genome_file_opts.genome.fields.len_path'
+    #elif $type.genome_file_opts.genome_file_opts_selector == "hist":
+        -g '$type.genome_file_opts.genome'
+    #end if
+#end if
+    </token>
+    <token name="@GENOME_FILE_UNION@">
+#if $empty.empty_selector == "-empty":
+    #if $empty.genome_file_opts.genome_file_opts_selector == "loc":
+        -g '$empty.genome_file_opts.genome.fields.len_path'
+    #elif $empty.genome_file_opts.genome_file_opts_selector == "hist":
+        -g '$empty.genome_file_opts.genome'
+    #end if
+#end if
+    </token>
+    <token name="@GENOME_FILE_COVERAGE@">
+#if $input_type.input_type_select == "bam":
+    -ibam '$input_type.input'
+#else:
+    -i '$input_type.input'
+    #if $input_type.genome_file_opts.genome_file_opts_selector == "loc":
+        -g '$input_type.genome_file_opts.genome.fields.len_path'
+    #elif $input_type.genome_file_opts.genome_file_opts_selector == "hist":
+        -g '$input_type.genome_file_opts.genome'
+    #end if
+#end if
+    </token>
+    <xml name="closest_D_option">
+        <param argument="-iu" type="boolean" truevalue="-iu" falsevalue="" checked="false"
+            label="Ignore features in B that are upstream of features in A"
+            help="This option requires -D and follows its orientation rules for determining what is 'upstream'" />
+
+        <param argument="-id" type="boolean" truevalue="-id" falsevalue="" checked="false"
+            label="Ignore features in B that are downstream of features in A"
+            help="This option requires -D and follows its orientation rules for determining what is 'downstream'" />
+
+        <param argument="-fu" type="boolean" truevalue="-fu" falsevalue="" checked="false"
+            label="Choose first from features in B that are upstream of features in A"
+            help="This option requires -D and follows its orientation rules for determining what is 'upstream'" />
+
+        <param argument="-fd" type="boolean" truevalue="-fd" falsevalue="" checked="false"
+            label="Choose first from features in B that are downstream of features in A"
+            help="This option requires -D and follows its orientation rules for determining what is 'downstream'" />
+    </xml>
+    <xml name="addition">
+        <conditional name="addition">
+            <param name="addition_select" type="select" label="Choose what you want to do">
+                <option value="b" selected="true">Increase the @STD_BEDTOOLS_INPUT_LABEL@ entry by the same number base pairs in each direction.</option>
+                <option value="lr">Increase by Start Coordinate and End Coordinate</option>
+            </param>
+            <when value="b">
+                <param name="b" value="1" label="Number of base pairs" type="integer" />
+            </when>
+            <when value="lr">
+                <param name="l" type="integer" value="0" label="The number of base pairs to subtract from the start coordinate" />
+                <param name="r" type="integer" value="0" label="The number of base pairs to add to the end coordinate" />
+            </when>
+        </conditional>
+    </xml>
+    <xml name="print_header">
+        <param argument="-header" type="boolean" truevalue="-header" falsevalue="" checked="false"
+            label="Print the header from the A file prior to results" />
+    </xml>
+    <!-- TODO this is currently not used, but we should make use of it -->
+    <xml name="genome_validator">
+        <validator type="unspecified_build" />
+        <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />
+    </xml>
+
+    <!-- ToDo column_picker -->
+    <xml name="choose_columns">
+        <param name="cols" argument="-c" type="text" value=""
+            label="Specify the column(s) that should be summarized"
+            help="Comma separated">
+            <sanitizer invalid_char="">
+                <valid initial="string.digits"><add value=","/></valid>
+            </sanitizer>
+        </param>
+    </xml>
+
+    <token name="@C_AND_O_ARGUMENT@">
+        #set $col = list()
+        #set $op = list()
+        #for $item in $c_and_o_argument_repeat:
+            #silent $col.append( str($item.col) )
+            #silent $op.append( str($item.operation) )
+        #end for
+        #if $col:
+            -c #echo ','.join($col)#
+            -o #echo ','.join($op)#
+        #end if
+    </token>
+
+    <xml name="c_and_o_argument">
+        <repeat name="c_and_o_argument_repeat" title="Applying operations to columns from merged intervals" min="0">
+            <yield />
+            <expand macro="choose_operations">
+                <expand macro="math_options" />
+                <expand macro="additional_math_options" />
+            </expand>
+        </repeat>
+    </xml>
+
+    <xml name="choose_operations">
+        <param name="operation" type="select" label="Specify the operation">
+            <yield />
+        </param>
+    </xml>
+
+    <xml name="math_options">
+        <option value="sum" selected="true">Sum - numeric only</option>
+        <option value="min">Min - numeric only</option>
+        <option value="max">Max - numeric only</option>
+        <option value="absmin">AbsMin - numeric only</option>
+        <option value="absmax">AbsMax - numeric only</option>
+        <option value="mean">Mean - numeric only</option>
+        <option value="median">Median - numeric only</option>
+        <option value="mode">Mode - numeric only</option>
+        <option value="antimode">Antimode - numeric only</option>
+        <option value="collapse">collapse (i.e., print a comma separated list) - numeric or text</option>
+    </xml>
+    <xml name="additional_math_options">
+        <option value="count">Count - numeric or text</option>
+        <option value="count_disctinct">Count Distinct - numeric or text</option>
+        <option value="distinct">distinct (i.e., print a comma separated list) - numeric or text</option>
+        <option value="concat">concat (i.e., print a comma separated list) - numeric or text</option>
+    </xml>
+    <xml name="sorted">
+        <!-- -sorted -g  -->
+        <param argument="-sorted" type="boolean" truevalue="-sorted" falsevalue="" checked="false"
+            label="For coordinate sorted input file the more efficient sweeping algorithm is enabled."/>
+    </xml>
+    <token name="@SORTED@">
+<![CDATA[
+$sorted
+#if str($sorted) != '':
+    #if str($reduce_or_iterate.reduce_or_iterate_selector) == 'iterate' and $reduce_or_iterate.inputB.is_of_type('bam'):
+        -g <(samtools view -H $reduce_or_iterate.inputB | tr ':' '\t' | grep SN | cut -f 3,5)
+    #else if str($reduce_or_iterate.reduce_or_iterate_selector) == 'reduce' and str($reduce_or_iterate.inputB) != 'None' and $reduce_or_iterate.inputB[0].is_of_type('bam'):
+        -g <(samtools view -H $reduce_or_iterate.inputB[0] | tr ':' '\t' | grep SN | cut -f 3,5)
+    #end if
+#end if
+]]>
+    </token>
+    <token name="@REFERENCES@">
+<![CDATA[
+------
+
+This tool is part of the `bedtools package`_ from the `Quinlan laboratory`_.
+
+.. _bedtools package: https://github.com/arq5x/bedtools2
+.. _Quinlan laboratory: http://quinlanlab.org
+
+
+**Citation**
+
+If you use this tool in Galaxy, please cite:
+
+Bjoern A. Gruening (2014), `Galaxy wrapper <https://github.com/bgruening/galaxytools>`_
+]]>
+    </token>
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1093/bioinformatics/btq033</citation>
+            <yield />
+        </citations>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedtools_genomecov/test-data/genomeCoverageBed1.bed	Fri May 27 11:33:45 2022 +0000
@@ -0,0 +1,3 @@
+chr1	10	20
+chr1	20	30
+chr2	0	500
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedtools_genomecov/test-data/genomeCoverageBed1.len	Fri May 27 11:33:45 2022 +0000
@@ -0,0 +1,2 @@
+chr1	1000
+chr2	500
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedtools_genomecov/test-data/genomeCoverageBed_result1.bed	Fri May 27 11:33:45 2022 +0000
@@ -0,0 +1,3 @@
+chr1	0	1000	1000	1
+chr2	0	500	500	1
+genome	0	1500	1500	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedtools_genomecov/tool_data_table_conf.xml.sample	Fri May 27 11:33:45 2022 +0000
@@ -0,0 +1,17 @@
+<tables>
+    <!-- Locations of all sam indexes under genome directory -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/fasta_indexes.loc" />
+    </table>
+    <!-- Locations of all gff files with annotations of genome builds -->
+    <table name="all_gff" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_gff.loc" />
+    </table>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="tool-data/dbkeys.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bedtools_genomecov/tool_data_table_conf.xml.test	Fri May 27 11:33:45 2022 +0000
@@ -0,0 +1,17 @@
+<tables>
+    <!-- Locations of all sam indexes under genome directory -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+    <!-- Locations of all gff files with annotations of genome builds -->
+    <table name="all_gff" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/all_gff.loc" />
+    </table>
+    <!-- Locations of dbkeys and len files under genome directory -->
+    <table name="__dbkeys__" comment_char="#">
+        <columns>value, name, len_path</columns>
+        <file path="${__HERE__}/test-data/dbkeys.loc" />
+    </table>
+</tables>