diff drop_reads_that_doNot_overlap_after_BEDtools_intersect.xml @ 0:e979cb57a5d5 draft default tip

"planemo upload for repository https://github.com/McIntyre-Lab/BayesASE/tree/main/galaxy commit 9b70598ef46a73632d9e0fa0c6ce6776fb5e9d6a"
author malex
date Thu, 14 Jan 2021 21:51:36 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/drop_reads_that_doNot_overlap_after_BEDtools_intersect.xml	Thu Jan 14 21:51:36 2021 +0000
@@ -0,0 +1,77 @@
+<tool id="base_remove_nonoverlapping_reads" name="Remove reads"  version="21.1.13">
+    <description>that do not overlap with genic features for BayesASE (using awk)</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command>
+<![CDATA[
+        awk -v OFS='   ' '$4 !="."' '$BEDINT' > '$BED3';
+        awk -v a=0 'BEGIN{print "fqName\tnumber_overlapping_rows\ttotal_number_rows"} {if($4 !=".") a++} END{print "$BEDINT.element_identifier""\t"a"\t"NR }' '$BED3' > $summary
+]]>
+    </command>
+    <inputs>
+        <param name="BEDINT" type="data" format="tabular" label="Output from Bedtools Intersect Intervals" help="Input BED file made from loj intersection between BED files with location of unique reads and features of interest" />
+    </inputs>
+    <outputs>
+      <data name="BED3" format="tabular" label="${tool.name} on ${on_string}: Multi-column BED file with overlapping reads" />
+      <data name="summary" format="tabular" label="${tool.name} on ${on_string}: output summary file" /> 
+    </outputs>
+    <tests>
+        <test>
+            <param name="BEDINT" ftype="data" value="align_and_counts_test_data/bedtools_intersect_intervals_BASE_test_data.bed"/>
+            <output name="BED3"  ftype="data" file="align_and_counts_test_data/drop_nonintersecting_reads_for_BASE.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+    **Tool Description**
+
+The purpose of this tool is to remove reads from the input BED file that do not intersect with any genic features.
+The input into this tool is generated by the **BedTools Intersect Intervals** and may contain rows where a read does not intersect with any genic features.  These rows contain a “.” in the ThickStart column.
+The resulting output BED file contains locations of unique reads that overlap with regions of genic features.
+
+-----------------------------------------------------------------------------------------------
+
+**Inputs**
+    -One input dataset is required.
+
+**BED File [REQUIRED]**
+
+ -A BED file resulting from the intersection of a SAM file in BED format and a BED file of genic features.
+        This input BED file can be generated with the **BedTools Intersect Intervals** tool.
+
+Example input BED file:
+
+    +-------------+---------+---------+---------+---------+------------+------------+
+    |   Chrom     |  Start  |   End   |   Name  |  Score  | Strand     |ThickStart  |
+    +=============+=========+=========+=========+=========+============+============+
+    |      X      |  2190   |  2245   |   X     |    1    | 2300       |l(1)G0196   |
+    +-------------+---------+---------+---------+---------+------------+------------+
+    |      2R     |  1502   |  1834   |   2R    |     50  | 1900       | Mapmodulin |
+    +-------------+---------+---------+---------+---------+------------+------------+
+    |      2R     |  1621   |  1680   |    .    |  -1     |-1          |.           |
+    +-------------+---------+---------+---------+---------+------------+------------+
+
+
+**Outputs**
+    The tool outputs a BED file containing regions of reads that overlap with locations of genic features interest.
+
+Example output BED file:
+
+    +---------------+---------+---------+---------+---------+------------+------------+
+    |   Chrom       |  Start  |   End   |   Name  |  Score  | Strand     |ThickStart  |
+    +===============+=========+=========+=========+=========+============+============+
+    |      X        |  2190   |  2245   |   X     |    1    |    2300    |l(1)G0196   |
+    +---------------+---------+---------+---------+---------+------------+------------+
+    |      2R       |  1502   |  1834   |   2R    |     50  |19000       |Mapmodulin  |
+    +---------------+---------+---------+---------+---------+------------+------------+
+    ]]></help>
+    <citations>
+            <citation type="bibtex">@ARTICLE{Miller20BASE,
+            author = {Brecca Miller, Alison M. Morse, Elyse Borgert, Zihao Liu, Kelsey Sinclair, Gavin Gamble, Fei Zou, Jeremy Newman, Luis Leon Novello, Fabio Marroni, Lauren M. McIntyre},
+            title = {Testcrosses are an efficient strategy for identifying cis regulatory variation: Bayesian analysis of allele imbalance among conditions (BASE)},
+            journal = {????},
+            year = {submitted for publication}
+            }</citation>
+        </citations>
+</tool>