view drop_reads_that_doNot_overlap_after_BEDtools_intersect.xml @ 0:e979cb57a5d5 draft default tip

"planemo upload for repository https://github.com/McIntyre-Lab/BayesASE/tree/main/galaxy commit 9b70598ef46a73632d9e0fa0c6ce6776fb5e9d6a"
author malex
date Thu, 14 Jan 2021 21:51:36 +0000
parents
children
line wrap: on
line source

<tool id="base_remove_nonoverlapping_reads" name="Remove reads"  version="21.1.13">
    <description>that do not overlap with genic features for BayesASE (using awk)</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command>
<![CDATA[
        awk -v OFS='   ' '$4 !="."' '$BEDINT' > '$BED3';
        awk -v a=0 'BEGIN{print "fqName\tnumber_overlapping_rows\ttotal_number_rows"} {if($4 !=".") a++} END{print "$BEDINT.element_identifier""\t"a"\t"NR }' '$BED3' > $summary
]]>
    </command>
    <inputs>
        <param name="BEDINT" type="data" format="tabular" label="Output from Bedtools Intersect Intervals" help="Input BED file made from loj intersection between BED files with location of unique reads and features of interest" />
    </inputs>
    <outputs>
      <data name="BED3" format="tabular" label="${tool.name} on ${on_string}: Multi-column BED file with overlapping reads" />
      <data name="summary" format="tabular" label="${tool.name} on ${on_string}: output summary file" /> 
    </outputs>
    <tests>
        <test>
            <param name="BEDINT" ftype="data" value="align_and_counts_test_data/bedtools_intersect_intervals_BASE_test_data.bed"/>
            <output name="BED3"  ftype="data" file="align_and_counts_test_data/drop_nonintersecting_reads_for_BASE.tabular" />
        </test>
    </tests>
    <help><![CDATA[
    **Tool Description**

The purpose of this tool is to remove reads from the input BED file that do not intersect with any genic features.
The input into this tool is generated by the **BedTools Intersect Intervals** and may contain rows where a read does not intersect with any genic features.  These rows contain a “.” in the ThickStart column.
The resulting output BED file contains locations of unique reads that overlap with regions of genic features.

-----------------------------------------------------------------------------------------------

**Inputs**
    -One input dataset is required.

**BED File [REQUIRED]**

 -A BED file resulting from the intersection of a SAM file in BED format and a BED file of genic features.
        This input BED file can be generated with the **BedTools Intersect Intervals** tool.

Example input BED file:

    +-------------+---------+---------+---------+---------+------------+------------+
    |   Chrom     |  Start  |   End   |   Name  |  Score  | Strand     |ThickStart  |
    +=============+=========+=========+=========+=========+============+============+
    |      X      |  2190   |  2245   |   X     |    1    | 2300       |l(1)G0196   |
    +-------------+---------+---------+---------+---------+------------+------------+
    |      2R     |  1502   |  1834   |   2R    |     50  | 1900       | Mapmodulin |
    +-------------+---------+---------+---------+---------+------------+------------+
    |      2R     |  1621   |  1680   |    .    |  -1     |-1          |.           |
    +-------------+---------+---------+---------+---------+------------+------------+


**Outputs**
    The tool outputs a BED file containing regions of reads that overlap with locations of genic features interest.

Example output BED file:

    +---------------+---------+---------+---------+---------+------------+------------+
    |   Chrom       |  Start  |   End   |   Name  |  Score  | Strand     |ThickStart  |
    +===============+=========+=========+=========+=========+============+============+
    |      X        |  2190   |  2245   |   X     |    1    |    2300    |l(1)G0196   |
    +---------------+---------+---------+---------+---------+------------+------------+
    |      2R       |  1502   |  1834   |   2R    |     50  |19000       |Mapmodulin  |
    +---------------+---------+---------+---------+---------+------------+------------+
    ]]></help>
    <citations>
            <citation type="bibtex">@ARTICLE{Miller20BASE,
            author = {Brecca Miller, Alison M. Morse, Elyse Borgert, Zihao Liu, Kelsey Sinclair, Gavin Gamble, Fei Zou, Jeremy Newman, Luis Leon Novello, Fabio Marroni, Lauren M. McIntyre},
            title = {Testcrosses are an efficient strategy for identifying cis regulatory variation: Bayesian analysis of allele imbalance among conditions (BASE)},
            journal = {????},
            year = {submitted for publication}
            }</citation>
        </citations>
</tool>