view column_maker.xml @ 3:be25c075ed54 draft

"planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/column_maker commit 2b17bdfc47ca4d7f1a584216c4bd61a7050df7ea"
author devteam
date Thu, 04 Jun 2020 05:03:46 -0400
parents 464b9305180e
children 6e8d94597139
line wrap: on
line source

<tool id="Add_a_column1" name="Compute" version="1.3.0">
    <description>an expression on every row</description>
    <requirements>
        <requirement type="package" version="2.7.13">python</requirement>
        <requirement type="package" version="4.4">sed</requirement>
        <requirement type="package" version="1.14">numpy</requirement>
    </requirements>
    <command detect_errors="aggressive"><![CDATA[
        #if $header_lines_conditional.header_lines_select == "yes":
            (sed -n '1,1p' '$input' | sed  "s|$|%${header_lines_conditional.header_new_column_name}|" | tr "%" "\t") > header &&
            sed '1,1d' '$input' > data &&
        #else:
            touch header &&
            ln -s '$input' data &&
        #end if

        python '$__tool_directory__/column_maker.py'
            data column_maker_output
            "$cond"
            $round
            ${input.metadata.columns}
            "${input.metadata.column_types}"
            $avoid_scientific_notation &&
        cat header column_maker_output > '$out_file1'
    ]]></command>
    <inputs>
        <param name="cond" type="text" value="c3-c2" label="Add expression"/>
        <param format="tabular" name="input" type="data" label="as a new column to" help="Dataset missing? See TIP below"/>
        <param name="round" type="select" label="Round result?">
            <option value="no">NO</option>
            <option value="yes">YES</option>
        </param>
        <conditional name="header_lines_conditional">
            <param name="header_lines_select" type="select" label="Skip a header line" help="# characters are already considered as comments and kept" >
                <option value="no" >no</option>
                <option value="yes" >yes</option>
            </param>
            <when value="no">
            </when>
            <when value="yes">
                <param name="header_new_column_name" type="text" value="New Column" label="The new column name" />
            </when>
        </conditional>
        <param name="avoid_scientific_notation" type="select" label="Avoid scientific notation" help="If yes, use fully expanded decimal representation when writing new columns (use only if expression produces decimal numbers).">
            <option value="no">no</option>
            <option value="yes">yes</option>
        </param>
    </inputs>
    <outputs>
        <data format_source="input" name="out_file1" metadata_source="input"/>
    </outputs>
    <tests>
        <test>
            <param name="cond" value="c3-c2"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="no"/>
            <output name="out_file1" file="column_maker_out1.interval"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.interval"/>
            <param name="round" value="no"/>
            <output name="out_file1" file="column_maker_out2.interval"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.header.tsv"/>
            <param name="round" value="no"/>
            <conditional name="header_lines_conditional">
                <param name="header_lines_select" value="yes" />
                <param name="header_new_column_name" value="value1_again" />
            </conditional>
            <output name="out_file1" file="column_maker_out2.header.tsv"/>
        </test>
        <test>
            <param name="cond" value="c4*1"/>
            <param name="input" value="1.interval"/>
            <param name="round" value="yes"/>
            <output name="out_file1" file="column_maker_out3.interval"/>
        </test>
        <test>
            <param name="cond" value="float(.0000000000001)"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="no"/>
            <output name="out_file1">
                <assert_contents>
                    <has_text text="CCDS10397" />
                    <has_text text="1e-13" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="cond" value="float(.0000000000001)"/>
            <param name="input" value="1.bed"/>
            <param name="round" value="no"/>
            <param name="avoid_scientific_notation" value="yes"/>
            <output name="out_file1">
                <assert_contents>
                    <has_text text="CCDS10397" />
                    <has_text text=".0000000000001" />
                    <not_has_text text="1e-13" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>

 .. class:: infomark

**TIP:** If your data is not TAB delimited, use *Text Manipulation-&gt;Convert*

-----

**What it does**

This tool computes an expression for every row of a dataset and appends the result as a new column (field).

- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file

- **c3-c2** will add a length column to the dataset if **c2** and **c3** are start and end position

-----

**Example**

If this is your input::

   chr1  151077881  151077918  2  200  -
   chr1  151081985  151082078  3  500  +

computing "c4*c5" will produce::

   chr1  151077881  151077918  2  200  -   400.0
   chr1  151081985  151082078  3  500  +  1500.0

if, at the same time, "Round result?" is set to **YES** results will look like this::

   chr1  151077881  151077918  2  200  -   400
   chr1  151081985  151082078  3  500  +  1500

You can also use this tool to evaluate expressions. For example, computing "c3>=c2" for Input will result in the following::

   chr1  151077881  151077918  2  200  -  True
   chr1  151081985  151082078  3  500  +  True

or computing "type(c2)==type('') for Input will return::

   chr1  151077881  151077918  2  200  -  False
   chr1  151081985  151082078  3  500  +  False


The following build-in functions are available::

  abs | all | any | bin | bool | chr | ceil | cmp | complex

  divmod | exp | float | log | floor | hex | int | len | long

  max | min | oct | ord | pow | range | reversed

  round | sorted | sqrt | str | sum | type | unichr | unicode |

    </help>
    <citations />
</tool>