Mercurial > repos > iuc > ucsc_maffilter
changeset 0:e94194f13332 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ucsc_tools/maftools commit 09c8e251f8309b85d189c8052dfbc7b8e108976a
| author | iuc |
|---|---|
| date | Fri, 27 Jun 2025 13:23:30 +0000 |
| parents | |
| children | f8af4725edb6 |
| files | mafFilter.xml test-data/componentFilter.txt test-data/filter_in.maf test-data/gorGor3.bed test-data/hg38.bed test-data/mafIn.maf test-data/malformed.maf test-data/panTro4.bed test-data/ref.2bit test-data/ref.fa test-data/speciesFilter.txt |
| diffstat | 11 files changed, 335 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mafFilter.xml Fri Jun 27 13:23:30 2025 +0000 @@ -0,0 +1,255 @@ +<tool id="ucsc_mafFilter" name="mafFilter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.2" license="MIT"> + <description>Filter MAF files based on various criteria</description> + <macros> + <token name="@TOOL_VERSION@">469</token> + <token name="@VERSION_SUFFIX@">0</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">ucsc-maffilter</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + mafFilter + $tolerate + #if $minCol + -minCol='$minCol' + #end if + #if $minRow + -minRow='$minRow' + #end if + #if $maxRow + -maxRow='$maxRow' + #end if + #if $factor_options.factor_enabled == "yes": + -factor -minFactor='$minFactor' + #end if + #if $minScore + -minScore='$minScore' + #end if + #if $reject + -reject='$rejected_maf' + #end if + #if $needComp + -needComp='$needComp' + #end if + $overlap + #if $componentFilter + -componentFilter='$componentFilter' + #end if + #if $speciesFilter + -speciesFilter='$speciesFilter' + #end if + '$input_maf' > '$output_maf' + ]]></command> + <inputs> + <param name="input_maf" type="data" format="maf" label="Input MAF file" help="Select the MAF file to filter."/> + <param argument="-tolerate" type="boolean" truevalue="-tolerate" falsevalue="" checked="false" label="Tolerate bad input" help="Ignore bad input rather than aborting."/> + <param argument="-minCol" type="integer" optional="true" value="1" min="1" label="Minimum columns" help="Filter out blocks with fewer than this number of columns."/> + <param argument="-minRow" type="integer" optional="true" value="2" min="1" label="Minimum rows" help="Filter out blocks with fewer than this number of rows."/> + <param argument="-maxRow" type="integer" optional="true" value="100" min="1" label="Maximum rows" help="Filter out blocks with more than or equal to this number of rows."/> + <conditional name="factor_options"> + <param name="factor_enabled" type="select" label="Enable factor" help="Enable factor for score filtering"> + <option value="yes">Yes</option> + <option value="no" selected="true">No</option> + </param> + <when value="yes"> + <param argument="-factor" type="boolean" truevalue="-factor" falsevalue="" label="Use factor-based score filtering" help="Filter out scores below -minFactor * (ncol^2) * nrow"/> + <param argument="-minFactor" type="integer" value="5" min="0" label="Minimum factor" help="Factor to use with factor-based score filtering"/> + </when> + <when value="no"/> + </conditional> + <param argument="-minScore" type="float" label="Minimum score" help="Minimum allowed score (alternative to factor-based filtering)" optional="true"/> + <param argument="-reject" type="boolean" truevalue="-reject" falsevalue="" optional="true" label="Save Rejected blocks to an output file" help="Select if you like to save rejected blocks to a file"/> + <param argument="-needComp" type="text" label="Required species component" help="All alignments must have this species as one of the components" optional="true"/> + <param argument="-overlap" type="boolean" truevalue="-overlap" falsevalue="" checked="false" label="Reject overlapping blocks" help="Reject overlapping blocks in reference (assumes ordered blocks)."/> + <param argument="-componentFilter" type="data" format="txt" label="Component filter file" help="Filter out blocks without a component listed in this file." optional="true"/> + <param argument="-speciesFilter" type="data" format="txt" label="Species filter file" help="Filter out blocks without a species listed in this file." optional="true"/> + </inputs> + <outputs> + <data name="output_maf" format="maf" label="${tool.name} on ${on_string}: Filtered MAF output"/> + <data name="rejected_maf" format="maf" label="${tool.name} on ${on_string}: Rejected blocks"> + <filter>reject</filter> + </data> + </outputs> + <tests> + <!-- Test 1: Testing -tolerate option on malformed MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="malformed.maf"/> + <param name="tolerate" value="true"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="28"/> + <not_has_text text="s dog.chr6"/> + </assert_contents> + </output> + </test> + <!-- Test 2: Testing -minCol option on MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="filter_in.maf"/> + <param name="minCol" value="10"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="29"/> + <not_has_text text="a score=500.0"/> + <not_has_text text="s human.chr2 150 5 + 1000 ACGTA"/> + <not_has_text text="s mouse.chr2 250 5 + 2000 ACGT-"/> + </assert_contents> + </output> + </test> + <!-- Test 3: Testing -minRow option on MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="filter_in.maf"/> + <param name="minRow" value="3"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="25"/> + <not_has_text text="a score=500.0"/> + <not_has_text text="s human.chr2 150 5 + 1000 ACGTA"/> + <not_has_text text="s mouse.chr2 250 5 + 2000 ACGT-"/> + <not_has_text text="a score=50.0"/> + <not_has_text text="s human.chr4 110 10 + 1000 ACGTACGTAC"/> + <not_has_text text="s mouse.chr4 210 10 + 2000 ACGTACGTAC"/> + </assert_contents> + </output> + </test> + <!-- Test 4: Testing -maxRow option on MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="filter_in.maf"/> + <param name="maxRow" value="4"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="25"/> + <has_line line="a score=50.000000"/> + <has_line line="s human.chr4 110 10 + 1000 ACGTACGTAC"/> + <has_line line="s mouse.chr4 210 10 + 2000 ACGTACGTAC"/> + </assert_contents> + </output> + </test> + <!-- Test 5: Testing -factor and -minFactor option on MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="filter_in.maf"/> + <conditional name="factor_options"> + <param name="factor_enabled" value="yes"/> + <param name="minFactor" value="5"/> + </conditional> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="33"/> + </assert_contents> + </output> + </test> + <!-- Test 6: Testing -minScore option on MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="filter_in.maf"/> + <param name="minScore" value="500"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="23"/> + <not_has_text text="a score=50.0"/> + <not_has_text text="s human.chr4 110 10 + 1000 ACGTACGTAC"/> + <not_has_text text="s mouse.chr4 210 10 + 2000 ACGTACGTAC"/> + </assert_contents> + </output> + </test> + <!-- Test 7: Testing -reject option on MAF File --> + <test expect_num_outputs="2"> + <param name="input_maf" value="filter_in.maf"/> + <param name="minScore" value="500"/> + <param name="reject" value="true"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="23"/> + <not_has_text text="a score=50.0"/> + <not_has_text text="s human.chr4 110 10 + 1000 ACGTACGTAC"/> + <not_has_text text="s mouse.chr4 210 10 + 2000 ACGTACGTAC"/> + </assert_contents> + </output> + <output name="rejected_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="11"/> + <has_line line="a score=50.000000"/> + <has_line line="s human.chr4 110 10 + 1000 ACGTACGTAC"/> + <has_line line="s mouse.chr4 210 10 + 2000 ACGTACGTAC"/> + </assert_contents> + </output> + </test> + <!-- Test 8: Testing -needComp option on MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="filter_in.maf"/> + <param name="needComp" value="human.chr1"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="6"/> + <has_text text="a score=1000.000000"/> + <has_text text="s human.chr1 100 10 + 1000 ACGTACGTAC"/> + <has_text text="s mouse.chr1 200 10 + 2000 ACGTACGTAC"/> + <has_text text="s dog.chr1 300 10 + 3000 ACGTACGTAC"/> + </assert_contents> + </output> + </test> + <!-- Test 9: Testing -componentFilter option on MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="filter_in.maf"/> + <param name="componentFilter" value="componentFilter.txt"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="6"/> + <has_text text="a score=1000.000000"/> + <has_text text="s human.chr1 100 10 + 1000 ACGTACGTAC"/> + <has_text text="s mouse.chr1 200 10 + 2000 ACGTACGTAC"/> + <has_text text="s dog.chr1 300 10 + 3000 ACGTACGTAC"/> + </assert_contents> + </output> + </test> + <!-- Test 10: Testing -speciesFiler option on MAF File --> + <test expect_num_outputs="1"> + <param name="input_maf" value="filter_in.maf"/> + <param name="speciesFilter" value="speciesFilter.txt"/> + <output name="output_maf" ftype="maf"> + <assert_contents> + <has_n_lines n="25"/> + <has_text text="a score=1000.000000"/> + <not_has_text text="a score=500.0"/> + <not_has_text text="s human.chr2 150 5 + 1000 ACGTA"/> + <not_has_text text="s mouse.chr2 250 5 + 2000 ACGT-"/> + <not_has_text text="a score=50.0"/> + <not_has_text text="s human.chr4 110 10 + 1000 ACGTACGTAC"/> + <not_has_text text="s mouse.chr4 210 10 + 2000 ACGTACGTAC"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**mafFilter** + +Filters MAF (Multiple Alignment Format) files based on specified criteria. The filtered output is written to a new MAF file, and optionally, rejected blocks are saved to a separate file. + +**Options:** + +- **Tolerate bad input**: Ignore bad input instead of aborting. +- **Minimum columns**: Filter out blocks with fewer than the specified number of columns (default: 1). +- **Minimum rows**: Filter out blocks with fewer than the specified number of rows (default: 2). +- **Maximum rows**: Filter out blocks with more than or equal to the specified number of rows (default: 100). +- **Factor-based score filtering**: Filter out scores below `-minFactor * (ncol^2) * nrow`. +- **Minimum factor**: Factor to use with factor-based score filtering (default: 5). +- **Minimum score**: Minimum allowed score (alternative to factor-based filtering). +- **Rejected blocks output file**: Save rejected blocks to the specified file. +- **Required species component**: All alignments must include the specified species as a component. +- **Reject overlapping blocks**: Reject overlapping blocks in the reference (assumes ordered blocks). +- **Component filter file**: Filter out blocks without a component listed in the provided file. +- **Species filter file**: Filter out blocks without a species listed in the provided file. + + ]]></help> + <citations> + <citation type="bibtex"> + @misc{mafFilter, + author = {Kent UCSC}, + title = {mafFilter: A tool for filtering MAF files}, + note = {Tool for filtering Multiple Alignment Format files} +} + </citation> + </citations> + <creator> + <person givenName="Saim" familyName="Momin" url="https://github.com/SaimMomin12"/> + <organization name="Galaxy Europe" url="https://galaxyproject.org/eu/"/> + </creator> +</tool> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/componentFilter.txt Fri Jun 27 13:23:30 2025 +0000 @@ -0,0 +1,3 @@ +human.chr1 +mouse.chr1 +dog.chr1 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filter_in.maf Fri Jun 27 13:23:30 2025 +0000 @@ -0,0 +1,32 @@ +##maf version=1 scoring=example +a score=1000.0 +s human.chr1 100 10 + 1000 ACGTACGTAC +s mouse.chr1 200 10 + 2000 ACGTACGTAC +s dog.chr1 300 10 + 3000 ACGTACGTAC + +a score=500.0 +s human.chr2 150 5 + 1000 ACGTA +s mouse.chr2 250 5 + 2000 ACGT- + +a score=200.0 +s human.chr3 200 15 + 1000 ACGTACGTACGTACG +s cat.chr3 350 15 + 4000 ACGTACGTACGTACG +s dog.chr3 450 15 + 5000 ACGTACGTACGTACG +s rat.chr3 550 15 + 6000 ACGTACGTACGTACG + +a score=50.0 +s human.chr4 110 10 + 1000 ACGTACGTAC +s mouse.chr4 210 10 + 2000 ACGTACGTAC + +a score=3000.0 +s human.chr5 105 10 + 1000 ACGTACGTAC +s mouse.chr5 205 10 + 2000 ACGTACGTAC +s dog.chr5 305 10 + 3000 ACGTACGTAC +s cat.chr5 405 10 + 4000 ACGTACGTAC +s rat.chr5 505 10 + 5000 ACGTACGTAC +s cow.chr5 605 10 + 6000 ACGTACGTAC + +a score=600.0 +s human.chr6 100 10 + 1000 ACGTACGTAC +s mouse.chr6 200 10 + 2000 ACGTACGTAC +s dog.chr6 300 10 + 3000 ACGTACGTAC \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/gorGor3.bed Fri Jun 27 13:23:30 2025 +0000 @@ -0,0 +1,2 @@ +chr7 1006 1007 +chr7 1013 1014 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/hg38.bed Fri Jun 27 13:23:30 2025 +0000 @@ -0,0 +1,2 @@ +chr7 1005 1006 +chr7 1010 1012 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/mafIn.maf Fri Jun 27 13:23:30 2025 +0000 @@ -0,0 +1,5 @@ +##maf version=1 scoring=blastz +a score=1234 +s hg38.chr7 1000 20 + 248956422 ACGTACGTACGTACGTACGT +s panTro4.chr7 1000 20 + 159345973 ACGTAC-TAC-TACGTACGT +s gorGor3.chr7 1000 20 + 174310764 A-GTACGTAC-TACG-AC-T \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/malformed.maf Fri Jun 27 13:23:30 2025 +0000 @@ -0,0 +1,32 @@ +##maf version=1 scoring=example +a score=1000.0 +s human.chr1 100 10 + 1000 ACGTACGTAC +s mouse.chr1 200 10 + 2000 ACGTACGTAC +s dog.chr1 300 10 + 3000 ACGTACGTAC + +a score=500.0 +s human.chr2 150 5 + 1000 ACGTA +s mouse.chr2 250 5 + 2000 ACGT- + +a score=200.0 +s human.chr3 200 15 + 1000 ACGTACGTACGTACG +s cat.chr3 350 15 + 4000 ACGTACGTACGTACG +s dog.chr3 450 15 + 5000 ACGTACGTACGTACG +s rat.chr3 550 15 + 6000 ACGTACGTACGTACG + +a score=50.0 +s human.chr4 110 10 + 1000 ACGTACGTAC +s mouse.chr4 210 10 + 2000 ACGTACGTAC + +a score=3000.0 +s human.chr5 105 10 + 1000 ACGTACGTAC +s mouse.chr5 205 10 + 2000 ACGTACGTAC +s dog.chr5 305 10 + 3000 ACGTACGTAC +s cat.chr5 405 10 + 4000 ACGTACGTAC +s rat.chr5 505 10 + 5000 ACGTACGTAC +s cow.chr5 605 10 + 6000 ACGTACGTAC + +a score=600.0 +s human.chr6 100 10 + 1000 ACGTACGTAC +s mouse.chr6 200 10 + 2000 ACGTACGTAC +s dog.chr6 \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/panTro4.bed Fri Jun 27 13:23:30 2025 +0000 @@ -0,0 +1,1 @@ +chr7 1007 1008 \ No newline at end of file
