Mercurial > repos > yating-l > rename_scaffolds

<tool id="rename_scaffold" name="rename the scaffolds" version="2.3">
    <description>a Galaxy tool to rename or truncate the scaffold names in the target genome so that they won't exceed 31 characters</description>
<stdio>
    <exit_code range="1:" />
</stdio>
<command><![CDATA[
    python $__tool_directory__/rename.py $input $manipulate_selector $output $index
]]></command>
<inputs>
    <param name="input" type="data" format="fasta"/>
    <param name="manipulate_selector" type="select" label="Choose whether you want to rename the scaffolds or truncate the scaffold names">
        <option value="truncate" selected="true">Truncate the scaffold names if they exceed 31 characters</option>
        <option value="rename">Rename the scaffold names</option>
    </param>
</inputs>
<outputs>
    <data name="output" format="fasta" label="${tool.name} on ${on_string}: ${manipulate_selector}d target genome" />
    <data name="index" format="csv" label="${tool.name} on ${on_string}: name mapping" />
</outputs>
<tests>
    <test>
        <!-- Test rename input Dbia3.fa -->
        <param name="input" value="Dbia3.fa" />
        <param name="manipulate_selector" value="rename" />
        <output name="output" file="Dbia3_renamed.fa"/>
        <output name="index" file="renamed_Dbia3_name_mapping.csv"/>
    </test>
    <test>
        <!-- Test truncate input Dbia3.fa -->
        <param name="input" value="Dbia3.fa" />
        <param name="manipulate_selector" value="truncate" />
        <output name="output" file="Dbia3.fa"/>
        <output name="index" file="truncated_Dbia3_name_mapping.csv"/>
    </test>
    <test>
        <!-- Test rename input with non-ASCII charaters -->
        <param name="input" value="sequence_with_noascii.fa" />
        <param name="manipulate_selector" value="rename" />
        <output name="output" file="renamed_sequence_with_noascii.fa" />
        <output name="index" file="renamed_noascii_name_mapping.csv"/>
    </test>
    <test>
        <!-- Test truncate input with non-ASCII charaters -->
        <param name="input" value="sequence_with_noascii.fa" />
        <param name="manipulate_selector" value="truncate" />
        <output name="output" file="truncated_sequence_with_noascii.fa" />
        <output name="index" file="truncated_noascii_name_mapping.csv"/>
    </test>
    <test expect_failure="true">
        <!-- Test truncate input with non-ASCII charaters. Expect fail: name conflict! -->
        <param name="input" value="sequence_with_noascii_name_conflict.fa" />
        <param name="manipulate_selector" value="truncate" />
    </test>
    <test>
        <!-- Test input with tab -->
        <param name="input" value="sequence_with_tab.fa" />
        <param name="manipulate_selector" value="truncate" />
        <output name="output" file="fixed_reference_with_tab.fasta" />
        <output name="index" file="truncated_noascii_with_tab_name_mapping.csv"/>
    </test>
</tests>
<help><![CDATA[
This tool is to rename scaffolds in the target genome so that the sequence names are less than 31 characters. Rename all scaffolds to scaffold_1, scaffold_2, ..., scaffold_N and also output a name mapping file.

Or truncate the scaffold names that are more than 31 characters and replace each invalid character (non-ASCII, '\\t', '\\n', '\\x0b', '\\x0c', '\\r') with '_' and also output a name mapping file.
]]></help>
<citations>
</citations>
</tool>
author	yating-l
date	Tue, 31 Jul 2018 15:06:14 -0400
parents	2d143f0ac727
children