Mercurial > repos > jackcurragh > ribogalaxy_genomecov_bedtools
changeset 2:3aa9416cc3b3 draft
Uploaded
author | jackcurragh |
---|---|
date | Fri, 27 May 2022 11:33:45 +0000 |
parents | 79c0c6042954 |
children | 48e1c3a73761 |
files | bedtools_genomecov/genomeCoverageBed.xml bedtools_genomecov/macros.xml bedtools_genomecov/test-data/genomeCoverageBed1.bed bedtools_genomecov/test-data/genomeCoverageBed1.len bedtools_genomecov/test-data/genomeCoverageBed_result1.bed bedtools_genomecov/tool_data_table_conf.xml.sample bedtools_genomecov/tool_data_table_conf.xml.test |
diffstat | 7 files changed, 478 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedtools_genomecov/genomeCoverageBed.xml Fri May 27 11:33:45 2022 +0000 @@ -0,0 +1,170 @@ +<tool id="bedtools_genomecoveragebed" name="BedTools Genome Coverage" version="@TOOL_VERSION@" profile="@PROFILE@"> + <description>Compute Read Coverage Over An Entire Genome</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="bio_tools" /> + <expand macro="requirements" /> + <expand macro="stdio" /> + <command><![CDATA[ +bedtools genomecov +@GENOME_FILE_COVERAGE@ + +$split +$strand + +#if str($report.report_select) == "bg": + #if $report.zero_regions: + $report.zero_regions + #else: + -bg + #end if + + #if str($report.scale): + -scale $report.scale + #end if +#else: + #if str($report.max): + -max $report.max + #end if +#end if +$d +$dz +$five +$three +> '$output' + ]]></command> + <inputs> + <conditional name="input_type"> + <param name="input_type_select" type="select" label="Input type"> + <option value="bed">@STD_BEDTOOLS_INPUT_LABEL@</option> + <option value="bam" selected='true'>BAM</option> + </param> + <when value="bed"> + <param name="input" argument="-i" type="data" format="@STD_BEDTOOLS_INPUTS@" label="@STD_BEDTOOLS_INPUT_LABEL@ file" /> + <expand macro="input_conditional_genome_file" /> + </when> + <when value="bam"> + <param name="input" argument="-ibam" type="data" format="bam" label="BAM file" /> + </when> + </conditional> + <conditional name="report"> + <param name="report_select" type="select" label="Output type"> + <option value="bg" selected="true">BedGraph coverage file</option> + <option value="hist">Data suiteable for Histogram</option> + </param> + <when value="bg"> + <param name="zero_regions" argument="-bga" type="boolean" truevalue="-bga" falsevalue="" checked="false" + label="Report regions with zero coverage" help="If set, regions without any coverage will also be reported" /> + <param argument="-scale" type="float" value="1.0" + label="Scale the coverage by a constant factor" + help="Each bedGraph coverage value is multiplied by this factor before being reported. Useful for normalizing coverage by, e.g., reads per million (RPM)." /> + </when> + <when value="hist"> + <param argument="-max" type="integer" value="0" label="Specify max depth" + help="Combine all positions with a depth >= max into a single bin in the histogram" /> + </when> + </conditional> + <expand macro="split" /> + <param argument="-strand" type="select" label="Calculate coverage based on"> + <option value="">both strands combined</option> + <option value="-strand +">positive strand only</option> + <option value="-strand -">negative strand only</option> + </param> + + <param argument="-d" type="boolean" truevalue="-d" falsevalue="" checked="false" + label="Report the depth at each genome position with 1-based coordinates" /> + <param argument="-dz" type="boolean" truevalue="-dz" falsevalue="" checked="false" + label="Report the depth at each genome position with 0-based coordinatess" /> + <param name="five" argument="-5" type="boolean" truevalue="-5" falsevalue="" checked="false" + label="Calculate coverage of 5’ positions" help="Instead of entire interval" /> + <param name="three" argument="-3" type="boolean" truevalue="-3" falsevalue="" checked="false" + label="Calculate coverage of 3’ positions" help="Instead of entire interval" /> + </inputs> + <outputs> + <data name="output" format="bedgraph"> + <change_format> + <when input="report.report_select" value="hist" format="tabular" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <param name="input_type_select" value="bed" /> + <param name="input" value="genomeCoverageBed1.bed" ftype="bed" /> + <param name="genome_file_opts_selector" value="hist" /> + <param name="genome" value="genomeCoverageBed1.len" ftype="tabular" /> + <param name="report_select" value="hist" /> + <output name="output" file="genomeCoverageBed_result1.bed" ftype="tabular" /> + </test> + </tests> + <help><![CDATA[ +**What it does** + +This tool calculates the genome-wide coverage of intervals defined in a BAM or BED file and reports them in BedGraph format. + +.. image:: $PATH_TO_IMAGES/genomecov-glyph.png + +.. class:: warningmark + +The input BED or BAM file must be sorted by chromosome name (but doesn't necessarily have to be sorted by start position). + +----- + +**Example 1** + +Input (BED format)- +Overlapping, un-sorted intervals:: + + chr1 140 176 + chr1 100 130 + chr1 120 147 + + +Output (BedGraph format)- +Sorted, non-overlapping intervals, with coverage value on the 4th column:: + + chr1 100 120 1 + chr1 120 130 2 + chr1 130 140 1 + chr1 140 147 2 + chr1 147 176 1 + +----- + +**Example 2 - with ZERO-Regions selected (assuming hg19)** + +Input (BED format)- +Overlapping, un-sorted intervals:: + + chr1 140 176 + chr1 100 130 + chr1 120 147 + + +BedGraph output will contain five columns: + + * 1. Chromosome name (or 'genome' for whole-genome coverage) + * 2. Coverage depth + * 3. The number of bases on chromosome (or genome) with depth equal to column 2. + * 4. The size of chromosome (or entire genome) in base pairs + * 5. The fraction of bases on chromosome (or entire genome) with depth equal to column 2. + +**Example Output**: + + chr2L 0 1379895 23011544 0.0599653 + chr2L 1 837250 23011544 0.0363839 + chr2L 2 904442 23011544 0.0393038 + chr2L 3 913723 23011544 0.0397072 + chr2L 4 952166 23011544 0.0413778 + chr2L 5 967763 23011544 0.0420555 + chr2L 6 986331 23011544 0.0428624 + chr2L 7 998244 23011544 0.0433801 + chr2L 8 995791 23011544 0.0432735 + chr2L 9 996398 23011544 0.0432999 + + +@REFERENCES@ + ]]></help> + <expand macro="citations" /> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedtools_genomecov/macros.xml Fri May 27 11:33:45 2022 +0000 @@ -0,0 +1,266 @@ +<macros> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">bedtools</requirement> + <yield/> + </requirements> + </xml> + <xml name="bio_tools"> + <xrefs> + <xref type="bio.tools">bedtools</xref> + </xrefs> + </xml> + <token name="@TOOL_VERSION@">2.30.0</token> + <token name="@SAMTOOLS_VERSION@">1.9</token> + <token name="@STD_BEDTOOLS_INPUTS@">bed,bedgraph,gff,vcf,encodepeak</token> + <token name="@STD_BEDTOOLS_INPUT_LABEL@">BED/bedGraph/GFF/VCF/EncodePeak</token> + <token name="@PROFILE@">20.05</token> + <xml name="stdio"> + <stdio> + <!-- Anything other than zero is an error --> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <!-- In case the return code has not been set propery check stderr too --> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> + <version_command>bedtools --version</version_command> + </xml> + <xml name="reciprocal"> + <param name="reciprocal" argument="-r" type="select" label="Require that the fraction of overlap be reciprocal for A and B" help="In other words, if -f is 0.90 and -r is used, this requires that B overlap at least 90% of A and that A also overlaps at least 90% of B."> + <option value="" selected="true">No</option> + <option value="-r">Yes</option> + </param> + </xml> + <xml name="fraction" token_name="" token_argument="" token_label="" token_help="" > + <param name="@NAME@" argument="@ARGUMENT@" type="float" min="0" max="1" optional="true" label="@LABEL@" help="@HELP@" /> + </xml> + <xml name="overlap" token_name="overlap" token_argument="-f" token_fracof="A"> + <expand macro="fraction" name="@NAME@" argument="@ARGUMENT@" label="Minimum overlap required as a fraction of @FRACOF@" help="Default is 1E-9, i.e. 1bp."/> + </xml> + <token name="@OVERLAP@"><![CDATA[ + #if str($overlap): + -f $overlap + #end if + ]]></token> + <xml name="strand2"> + <param name="strand" type="select" label="Calculation based on strandedness?"> + <option value="" selected="true">Overlaps on either strand</option> + <option value="-s">Only overlaps occurring on the **same** strand.</option> + <option value="-S">Only overlaps occurring on the **opposite** strand.</option> + </param> + </xml> + <xml name="seed"> + <conditional name="seed"> + <param name="seed_choose" type="select" label="Choose Seed?"> + <option value="False" selected="true">Random Shuffling</option> + <option value="True">Choose fixed seed</option> + </param> + <when value="True"> + <param argument="-seed" type="integer" value="12345" label="Enter Seed" /> + </when> + <when value="False" /> + </conditional> + </xml> + <xml name="split"> + <param argument="-split" type="boolean" truevalue="-split" falsevalue="" checked="false" + label="Treat split/spliced BAM or BED12 entries as distinct BED intervals when computing coverage." + help="If set, the coverage will be calculated based the spliced intervals only. For BAM files, this inspects the CIGAR N operation to infer the blocks for computing coverage. For BED12 files, this inspects the BlockCount, BlockStarts, and BlockEnds fields (i.e., columns 10,11,12). If this option is not set, coverage will be calculated based on the interval's START/END coordinates, and would include introns in the case of RNAseq data." /> + </xml> + <xml name="input_conditional_genome_file" token_optional="false" token_help=""> + <conditional name="genome_file_opts"> + <param name="genome_file_opts_selector" type="select" label="Genome file" help="@HELP@"> + <option value="loc" selected="true">Locally installed Genome file</option> + <option value="hist">Genome file from your history</option> + </param> + <when value="loc"> + <param name="genome" type="select" optional="@OPTIONAL@" multiple="false" label="Genome file"> + <options from_data_table="__dbkeys__" /> + </param> + </when> + <when value="hist"> + <param name="genome" type="data" optional="@OPTIONAL@" format="tabular" label="Genome file" /> + </when> + </conditional> + </xml> + <token name="@GENOME_FILE@"> +#if $genome_file_opts.genome + -g + #if $genome_file_opts.genome_file_opts_selector == "loc": + '$genome_file_opts.genome.fields.len_path' + #elif $genome_file_opts.genome_file_opts_selector == "hist": + '$genome_file_opts.genome' + #end if +#end if + </token> + <token name="@GENOME_FILE_MAKEWINDOWS@"> +#if $type.type_select == "genome": + #if $type.genome_file_opts.genome_file_opts_selector == "loc": + -g '$type.genome_file_opts.genome.fields.len_path' + #elif $type.genome_file_opts.genome_file_opts_selector == "hist": + -g '$type.genome_file_opts.genome' + #end if +#end if + </token> + <token name="@GENOME_FILE_UNION@"> +#if $empty.empty_selector == "-empty": + #if $empty.genome_file_opts.genome_file_opts_selector == "loc": + -g '$empty.genome_file_opts.genome.fields.len_path' + #elif $empty.genome_file_opts.genome_file_opts_selector == "hist": + -g '$empty.genome_file_opts.genome' + #end if +#end if + </token> + <token name="@GENOME_FILE_COVERAGE@"> +#if $input_type.input_type_select == "bam": + -ibam '$input_type.input' +#else: + -i '$input_type.input' + #if $input_type.genome_file_opts.genome_file_opts_selector == "loc": + -g '$input_type.genome_file_opts.genome.fields.len_path' + #elif $input_type.genome_file_opts.genome_file_opts_selector == "hist": + -g '$input_type.genome_file_opts.genome' + #end if +#end if + </token> + <xml name="closest_D_option"> + <param argument="-iu" type="boolean" truevalue="-iu" falsevalue="" checked="false" + label="Ignore features in B that are upstream of features in A" + help="This option requires -D and follows its orientation rules for determining what is 'upstream'" /> + + <param argument="-id" type="boolean" truevalue="-id" falsevalue="" checked="false" + label="Ignore features in B that are downstream of features in A" + help="This option requires -D and follows its orientation rules for determining what is 'downstream'" /> + + <param argument="-fu" type="boolean" truevalue="-fu" falsevalue="" checked="false" + label="Choose first from features in B that are upstream of features in A" + help="This option requires -D and follows its orientation rules for determining what is 'upstream'" /> + + <param argument="-fd" type="boolean" truevalue="-fd" falsevalue="" checked="false" + label="Choose first from features in B that are downstream of features in A" + help="This option requires -D and follows its orientation rules for determining what is 'downstream'" /> + </xml> + <xml name="addition"> + <conditional name="addition"> + <param name="addition_select" type="select" label="Choose what you want to do"> + <option value="b" selected="true">Increase the @STD_BEDTOOLS_INPUT_LABEL@ entry by the same number base pairs in each direction.</option> + <option value="lr">Increase by Start Coordinate and End Coordinate</option> + </param> + <when value="b"> + <param name="b" value="1" label="Number of base pairs" type="integer" /> + </when> + <when value="lr"> + <param name="l" type="integer" value="0" label="The number of base pairs to subtract from the start coordinate" /> + <param name="r" type="integer" value="0" label="The number of base pairs to add to the end coordinate" /> + </when> + </conditional> + </xml> + <xml name="print_header"> + <param argument="-header" type="boolean" truevalue="-header" falsevalue="" checked="false" + label="Print the header from the A file prior to results" /> + </xml> + <!-- TODO this is currently not used, but we should make use of it --> + <xml name="genome_validator"> + <validator type="unspecified_build" /> + <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." /> + </xml> + + <!-- ToDo column_picker --> + <xml name="choose_columns"> + <param name="cols" argument="-c" type="text" value="" + label="Specify the column(s) that should be summarized" + help="Comma separated"> + <sanitizer invalid_char=""> + <valid initial="string.digits"><add value=","/></valid> + </sanitizer> + </param> + </xml> + + <token name="@C_AND_O_ARGUMENT@"> + #set $col = list() + #set $op = list() + #for $item in $c_and_o_argument_repeat: + #silent $col.append( str($item.col) ) + #silent $op.append( str($item.operation) ) + #end for + #if $col: + -c #echo ','.join($col)# + -o #echo ','.join($op)# + #end if + </token> + + <xml name="c_and_o_argument"> + <repeat name="c_and_o_argument_repeat" title="Applying operations to columns from merged intervals" min="0"> + <yield /> + <expand macro="choose_operations"> + <expand macro="math_options" /> + <expand macro="additional_math_options" /> + </expand> + </repeat> + </xml> + + <xml name="choose_operations"> + <param name="operation" type="select" label="Specify the operation"> + <yield /> + </param> + </xml> + + <xml name="math_options"> + <option value="sum" selected="true">Sum - numeric only</option> + <option value="min">Min - numeric only</option> + <option value="max">Max - numeric only</option> + <option value="absmin">AbsMin - numeric only</option> + <option value="absmax">AbsMax - numeric only</option> + <option value="mean">Mean - numeric only</option> + <option value="median">Median - numeric only</option> + <option value="mode">Mode - numeric only</option> + <option value="antimode">Antimode - numeric only</option> + <option value="collapse">collapse (i.e., print a comma separated list) - numeric or text</option> + </xml> + <xml name="additional_math_options"> + <option value="count">Count - numeric or text</option> + <option value="count_disctinct">Count Distinct - numeric or text</option> + <option value="distinct">distinct (i.e., print a comma separated list) - numeric or text</option> + <option value="concat">concat (i.e., print a comma separated list) - numeric or text</option> + </xml> + <xml name="sorted"> + <!-- -sorted -g --> + <param argument="-sorted" type="boolean" truevalue="-sorted" falsevalue="" checked="false" + label="For coordinate sorted input file the more efficient sweeping algorithm is enabled."/> + </xml> + <token name="@SORTED@"> +<![CDATA[ +$sorted +#if str($sorted) != '': + #if str($reduce_or_iterate.reduce_or_iterate_selector) == 'iterate' and $reduce_or_iterate.inputB.is_of_type('bam'): + -g <(samtools view -H $reduce_or_iterate.inputB | tr ':' '\t' | grep SN | cut -f 3,5) + #else if str($reduce_or_iterate.reduce_or_iterate_selector) == 'reduce' and str($reduce_or_iterate.inputB) != 'None' and $reduce_or_iterate.inputB[0].is_of_type('bam'): + -g <(samtools view -H $reduce_or_iterate.inputB[0] | tr ':' '\t' | grep SN | cut -f 3,5) + #end if +#end if +]]> + </token> + <token name="@REFERENCES@"> +<![CDATA[ +------ + +This tool is part of the `bedtools package`_ from the `Quinlan laboratory`_. + +.. _bedtools package: https://github.com/arq5x/bedtools2 +.. _Quinlan laboratory: http://quinlanlab.org + + +**Citation** + +If you use this tool in Galaxy, please cite: + +Bjoern A. Gruening (2014), `Galaxy wrapper <https://github.com/bgruening/galaxytools>`_ +]]> + </token> + <xml name="citations"> + <citations> + <citation type="doi">10.1093/bioinformatics/btq033</citation> + <yield /> + </citations> + </xml> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedtools_genomecov/test-data/genomeCoverageBed1.bed Fri May 27 11:33:45 2022 +0000 @@ -0,0 +1,3 @@ +chr1 10 20 +chr1 20 30 +chr2 0 500
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedtools_genomecov/test-data/genomeCoverageBed1.len Fri May 27 11:33:45 2022 +0000 @@ -0,0 +1,2 @@ +chr1 1000 +chr2 500
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedtools_genomecov/test-data/genomeCoverageBed_result1.bed Fri May 27 11:33:45 2022 +0000 @@ -0,0 +1,3 @@ +chr1 0 1000 1000 1 +chr2 0 500 500 1 +genome 0 1500 1500 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedtools_genomecov/tool_data_table_conf.xml.sample Fri May 27 11:33:45 2022 +0000 @@ -0,0 +1,17 @@ +<tables> + <!-- Locations of all sam indexes under genome directory --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> + <!-- Locations of all gff files with annotations of genome builds --> + <table name="all_gff" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_gff.loc" /> + </table> + <!-- Locations of dbkeys and len files under genome directory --> + <table name="__dbkeys__" comment_char="#"> + <columns>value, name, len_path</columns> + <file path="tool-data/dbkeys.loc" /> + </table> +</tables>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bedtools_genomecov/tool_data_table_conf.xml.test Fri May 27 11:33:45 2022 +0000 @@ -0,0 +1,17 @@ +<tables> + <!-- Locations of all sam indexes under genome directory --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/fasta_indexes.loc" /> + </table> + <!-- Locations of all gff files with annotations of genome builds --> + <table name="all_gff" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/all_gff.loc" /> + </table> + <!-- Locations of dbkeys and len files under genome directory --> + <table name="__dbkeys__" comment_char="#"> + <columns>value, name, len_path</columns> + <file path="${__HERE__}/test-data/dbkeys.loc" /> + </table> +</tables>