changeset 0:e94194f13332 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ucsc_tools/maftools commit 09c8e251f8309b85d189c8052dfbc7b8e108976a
author iuc
date Fri, 27 Jun 2025 13:23:30 +0000
parents
children f8af4725edb6
files mafFilter.xml test-data/componentFilter.txt test-data/filter_in.maf test-data/gorGor3.bed test-data/hg38.bed test-data/mafIn.maf test-data/malformed.maf test-data/panTro4.bed test-data/ref.2bit test-data/ref.fa test-data/speciesFilter.txt
diffstat 11 files changed, 335 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mafFilter.xml	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,255 @@
+<tool id="ucsc_mafFilter" name="mafFilter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.2" license="MIT">
+    <description>Filter MAF files based on various criteria</description>
+    <macros>
+        <token name="@TOOL_VERSION@">469</token>
+        <token name="@VERSION_SUFFIX@">0</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">ucsc-maffilter</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        mafFilter
+        $tolerate
+        #if $minCol
+            -minCol='$minCol'
+        #end if
+        #if $minRow
+            -minRow='$minRow'
+        #end if
+        #if $maxRow
+            -maxRow='$maxRow'
+        #end if
+        #if $factor_options.factor_enabled == "yes":
+            -factor -minFactor='$minFactor'
+        #end if
+        #if $minScore
+            -minScore='$minScore'
+        #end if
+        #if $reject
+            -reject='$rejected_maf'
+        #end if
+        #if $needComp
+            -needComp='$needComp'
+        #end if
+        $overlap
+        #if $componentFilter
+            -componentFilter='$componentFilter'
+        #end if
+        #if $speciesFilter
+            -speciesFilter='$speciesFilter'
+        #end if
+        '$input_maf' > '$output_maf'
+    ]]></command>
+    <inputs>
+        <param name="input_maf" type="data" format="maf" label="Input MAF file" help="Select the MAF file to filter."/>
+        <param argument="-tolerate" type="boolean" truevalue="-tolerate" falsevalue="" checked="false" label="Tolerate bad input" help="Ignore bad input rather than aborting."/>
+        <param argument="-minCol" type="integer" optional="true" value="1" min="1" label="Minimum columns" help="Filter out blocks with fewer than this number of columns."/>
+        <param argument="-minRow" type="integer" optional="true" value="2" min="1" label="Minimum rows" help="Filter out blocks with fewer than this number of rows."/>
+        <param argument="-maxRow" type="integer" optional="true" value="100" min="1" label="Maximum rows" help="Filter out blocks with more than or equal to this number of rows."/>
+        <conditional name="factor_options">
+            <param name="factor_enabled" type="select" label="Enable factor" help="Enable factor for score filtering">
+                <option value="yes">Yes</option>
+                <option value="no" selected="true">No</option>
+            </param>
+            <when value="yes">
+                <param argument="-factor" type="boolean" truevalue="-factor" falsevalue="" label="Use factor-based score filtering" help="Filter out scores below -minFactor * (ncol^2) * nrow"/>
+                <param argument="-minFactor" type="integer" value="5" min="0" label="Minimum factor" help="Factor to use with factor-based score filtering"/>
+            </when>
+            <when value="no"/>
+        </conditional>
+        <param argument="-minScore" type="float" label="Minimum score" help="Minimum allowed score (alternative to factor-based filtering)" optional="true"/>
+        <param argument="-reject" type="boolean" truevalue="-reject" falsevalue="" optional="true" label="Save Rejected blocks to an output file" help="Select if you like to save rejected blocks to a file"/>
+        <param argument="-needComp" type="text" label="Required species component" help="All alignments must have this species as one of the components" optional="true"/>
+        <param argument="-overlap" type="boolean" truevalue="-overlap" falsevalue="" checked="false" label="Reject overlapping blocks" help="Reject overlapping blocks in reference (assumes ordered blocks)."/>
+        <param argument="-componentFilter" type="data" format="txt" label="Component filter file" help="Filter out blocks without a component listed in this file." optional="true"/>
+        <param argument="-speciesFilter" type="data" format="txt" label="Species filter file" help="Filter out blocks without a species listed in this file." optional="true"/>
+    </inputs>
+    <outputs>
+        <data name="output_maf" format="maf" label="${tool.name} on ${on_string}: Filtered MAF output"/>
+        <data name="rejected_maf" format="maf" label="${tool.name} on ${on_string}: Rejected blocks">
+            <filter>reject</filter>
+        </data>
+    </outputs>
+    <tests>
+        <!-- Test 1: Testing -tolerate option on malformed MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="malformed.maf"/>
+            <param name="tolerate" value="true"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="28"/>
+                    <not_has_text text="s dog.chr6"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 2: Testing -minCol option on MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="filter_in.maf"/>
+            <param name="minCol" value="10"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="29"/>
+                    <not_has_text text="a score=500.0"/>
+                    <not_has_text text="s human.chr2  150 5 + 1000 ACGTA"/>
+                    <not_has_text text="s mouse.chr2  250 5 + 2000 ACGT-"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 3: Testing -minRow option on MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="filter_in.maf"/>
+            <param name="minRow" value="3"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="25"/>
+                    <not_has_text text="a score=500.0"/>
+                    <not_has_text text="s human.chr2  150 5 + 1000 ACGTA"/>
+                    <not_has_text text="s mouse.chr2  250 5 + 2000 ACGT-"/>
+                    <not_has_text text="a score=50.0"/>
+                    <not_has_text text="s human.chr4  110 10 + 1000 ACGTACGTAC"/>
+                    <not_has_text text="s mouse.chr4  210 10 + 2000 ACGTACGTAC"/>
+                </assert_contents> 
+            </output>
+        </test>
+        <!-- Test 4: Testing -maxRow option on MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="filter_in.maf"/>
+            <param name="maxRow" value="4"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="25"/>
+                    <has_line line="a score=50.000000"/>
+                    <has_line line="s human.chr4 110 10 + 1000 ACGTACGTAC"/>
+                    <has_line line="s mouse.chr4 210 10 + 2000 ACGTACGTAC"/>
+                </assert_contents> 
+            </output>
+        </test>
+        <!-- Test 5: Testing -factor and -minFactor option on MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="filter_in.maf"/>
+            <conditional name="factor_options">
+                <param name="factor_enabled" value="yes"/>
+                <param name="minFactor" value="5"/>
+            </conditional>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="33"/>
+                </assert_contents> 
+            </output>
+        </test>
+        <!-- Test 6: Testing -minScore option on MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="filter_in.maf"/>
+            <param name="minScore" value="500"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="23"/>
+                    <not_has_text text="a score=50.0"/>
+                    <not_has_text text="s human.chr4  110 10 + 1000 ACGTACGTAC"/>
+                    <not_has_text text="s mouse.chr4  210 10 + 2000 ACGTACGTAC"/>
+                </assert_contents> 
+            </output>
+        </test>
+        <!-- Test 7: Testing -reject option on MAF File -->
+        <test expect_num_outputs="2">
+            <param name="input_maf" value="filter_in.maf"/>
+            <param name="minScore" value="500"/>
+            <param name="reject" value="true"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="23"/>
+                    <not_has_text text="a score=50.0"/>
+                    <not_has_text text="s human.chr4  110 10 + 1000 ACGTACGTAC"/>
+                    <not_has_text text="s mouse.chr4  210 10 + 2000 ACGTACGTAC"/>
+                </assert_contents>
+            </output>
+            <output name="rejected_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="11"/>
+                    <has_line line="a score=50.000000"/>
+                    <has_line line="s human.chr4 110 10 + 1000 ACGTACGTAC"/>
+                    <has_line line="s mouse.chr4 210 10 + 2000 ACGTACGTAC"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 8: Testing -needComp option on MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="filter_in.maf"/>
+            <param name="needComp" value="human.chr1"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="6"/>
+                    <has_text text="a score=1000.000000"/>
+                    <has_text text="s human.chr1 100 10 + 1000 ACGTACGTAC"/>
+                    <has_text text="s mouse.chr1 200 10 + 2000 ACGTACGTAC"/>
+                    <has_text text="s dog.chr1   300 10 + 3000 ACGTACGTAC"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 9: Testing -componentFilter option on MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="filter_in.maf"/>
+            <param name="componentFilter" value="componentFilter.txt"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="6"/>
+                    <has_text text="a score=1000.000000"/>
+                    <has_text text="s human.chr1 100 10 + 1000 ACGTACGTAC"/>
+                    <has_text text="s mouse.chr1 200 10 + 2000 ACGTACGTAC"/>
+                    <has_text text="s dog.chr1   300 10 + 3000 ACGTACGTAC"/>
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test 10: Testing -speciesFiler option on MAF File -->
+        <test expect_num_outputs="1">
+            <param name="input_maf" value="filter_in.maf"/>
+            <param name="speciesFilter" value="speciesFilter.txt"/>
+            <output name="output_maf" ftype="maf">
+                <assert_contents>
+                    <has_n_lines n="25"/>
+                    <has_text text="a score=1000.000000"/>
+                    <not_has_text text="a score=500.0"/>
+                    <not_has_text text="s human.chr2  150 5 + 1000 ACGTA"/>
+                    <not_has_text text="s mouse.chr2  250 5 + 2000 ACGT-"/>
+                    <not_has_text text="a score=50.0"/>
+                    <not_has_text text="s human.chr4  110 10 + 1000 ACGTACGTAC"/>
+                    <not_has_text text="s mouse.chr4  210 10 + 2000 ACGTACGTAC"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+**mafFilter**
+
+Filters MAF (Multiple Alignment Format) files based on specified criteria. The filtered output is written to a new MAF file, and optionally, rejected blocks are saved to a separate file.
+
+**Options:**
+
+- **Tolerate bad input**: Ignore bad input instead of aborting.
+- **Minimum columns**: Filter out blocks with fewer than the specified number of columns (default: 1).
+- **Minimum rows**: Filter out blocks with fewer than the specified number of rows (default: 2).
+- **Maximum rows**: Filter out blocks with more than or equal to the specified number of rows (default: 100).
+- **Factor-based score filtering**: Filter out scores below `-minFactor * (ncol^2) * nrow`.
+- **Minimum factor**: Factor to use with factor-based score filtering (default: 5).
+- **Minimum score**: Minimum allowed score (alternative to factor-based filtering).
+- **Rejected blocks output file**: Save rejected blocks to the specified file.
+- **Required species component**: All alignments must include the specified species as a component.
+- **Reject overlapping blocks**: Reject overlapping blocks in the reference (assumes ordered blocks).
+- **Component filter file**: Filter out blocks without a component listed in the provided file.
+- **Species filter file**: Filter out blocks without a species listed in the provided file.
+
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+        @misc{mafFilter,
+        author = {Kent UCSC},
+        title = {mafFilter: A tool for filtering MAF files},
+        note = {Tool for filtering Multiple Alignment Format files}
+}
+        </citation>
+    </citations>
+    <creator>
+        <person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12"/>
+        <organization name="Galaxy Europe" url="https://galaxyproject.org/eu/"/>
+    </creator>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/componentFilter.txt	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,3 @@
+human.chr1
+mouse.chr1
+dog.chr1
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filter_in.maf	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,32 @@
+##maf version=1 scoring=example
+a score=1000.0
+s human.chr1  100 10 + 1000 ACGTACGTAC
+s mouse.chr1  200 10 + 2000 ACGTACGTAC
+s dog.chr1    300 10 + 3000 ACGTACGTAC
+
+a score=500.0
+s human.chr2  150 5 + 1000 ACGTA
+s mouse.chr2  250 5 + 2000 ACGT-
+
+a score=200.0
+s human.chr3  200 15 + 1000 ACGTACGTACGTACG
+s cat.chr3    350 15 + 4000 ACGTACGTACGTACG
+s dog.chr3    450 15 + 5000 ACGTACGTACGTACG
+s rat.chr3    550 15 + 6000 ACGTACGTACGTACG
+
+a score=50.0
+s human.chr4  110 10 + 1000 ACGTACGTAC
+s mouse.chr4  210 10 + 2000 ACGTACGTAC
+
+a score=3000.0
+s human.chr5  105 10 + 1000 ACGTACGTAC
+s mouse.chr5  205 10 + 2000 ACGTACGTAC
+s dog.chr5    305 10 + 3000 ACGTACGTAC
+s cat.chr5    405 10 + 4000 ACGTACGTAC
+s rat.chr5    505 10 + 5000 ACGTACGTAC
+s cow.chr5    605 10 + 6000 ACGTACGTAC
+
+a score=600.0
+s human.chr6  100 10 + 1000 ACGTACGTAC
+s mouse.chr6  200 10 + 2000 ACGTACGTAC
+s dog.chr6    300 10 + 3000 ACGTACGTAC
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/gorGor3.bed	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,2 @@
+chr7    1006    1007
+chr7    1013    1014
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg38.bed	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,2 @@
+chr7    1005    1006
+chr7    1010    1012
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/mafIn.maf	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,5 @@
+##maf version=1 scoring=blastz
+a score=1234
+s hg38.chr7     1000 20 + 248956422 ACGTACGTACGTACGTACGT
+s panTro4.chr7  1000 20 + 159345973 ACGTAC-TAC-TACGTACGT
+s gorGor3.chr7  1000 20 + 174310764 A-GTACGTAC-TACG-AC-T
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/malformed.maf	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,32 @@
+##maf version=1 scoring=example
+a score=1000.0
+s human.chr1  100 10 + 1000 ACGTACGTAC
+s mouse.chr1  200 10 + 2000 ACGTACGTAC
+s dog.chr1    300 10 + 3000 ACGTACGTAC
+
+a score=500.0
+s human.chr2  150 5 + 1000 ACGTA
+s mouse.chr2  250 5 + 2000 ACGT-
+
+a score=200.0
+s human.chr3  200 15 + 1000 ACGTACGTACGTACG
+s cat.chr3    350 15 + 4000 ACGTACGTACGTACG
+s dog.chr3    450 15 + 5000 ACGTACGTACGTACG
+s rat.chr3    550 15 + 6000 ACGTACGTACGTACG
+
+a score=50.0
+s human.chr4  110 10 + 1000 ACGTACGTAC
+s mouse.chr4  210 10 + 2000 ACGTACGTAC
+
+a score=3000.0
+s human.chr5  105 10 + 1000 ACGTACGTAC
+s mouse.chr5  205 10 + 2000 ACGTACGTAC
+s dog.chr5    305 10 + 3000 ACGTACGTAC
+s cat.chr5    405 10 + 4000 ACGTACGTAC
+s rat.chr5    505 10 + 5000 ACGTACGTAC
+s cow.chr5    605 10 + 6000 ACGTACGTAC
+
+a score=600.0
+s human.chr6  100 10 + 1000 ACGTACGTAC
+s mouse.chr6  200 10 + 2000 ACGTACGTAC
+s dog.chr6    
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/panTro4.bed	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,1 @@
+chr7    1007    1008
\ No newline at end of file
Binary file test-data/ref.2bit has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/ref.fa	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,2 @@
+>chr7
+NNNNNACGTACGTACGTACGTNNNNNTGCACTGCACTGCACTGCANNNNN
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/speciesFilter.txt	Fri Jun 27 13:23:30 2025 +0000
@@ -0,0 +1,1 @@
+dog
\ No newline at end of file