view datamash-transpose.xml @ 5:374cb875d38a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/datamash commit 9edd054251bc74685d627360bb0fbe8ea60fa6a2
author iuc
date Thu, 23 Mar 2023 20:47:02 +0000
parents ba0c40780ecf
children a513e3fbb4c5
line wrap: on
line source

<tool id="datamash_transpose" name="Transpose" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>rows/columns in a tabular file</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <edam_topics>
        <edam_topic>topic_3570</edam_topic> <!-- Pure math / linear algebra -->
    </edam_topics>
    <edam_operations>
         <!-- <edam_operation>operation_1234</edam_operation> -->
    </edam_operations>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command><![CDATA[
        #import os
        #set file_size_MB = os.path.getsize(str($in_file)) / (1024 * 1024)
        #set size_threshold_MB = 1024
        #if $file_size_MB <= $size_threshold_MB:
            datamash transpose @FIELD_SEPARATOR@ < $in_file > $out_file
        #else:
            ## Input matrix is very big: divide and conquer
            ## If the input file is very big, datamash runs out of memory (much earlier than file size ~ available RAM.
            ## Split into managable chunks of row vectors, transpose the chunks and juxtapose column vector chunks.
            #set num_chunks = 1 + int(file_size_MB/size_threshold_MB)
            echo Huge matrix detected, processing in $num_chunks chunks. &&
            split -n l/$num_chunks $in_file split_input_ &&
            for chunk in \$(ls split_input*); do
                datamash transpose @FIELD_SEPARATOR@ < \$chunk > \${chunk}_transposed;
            done &&
            paste split_input_*_transposed > $out_file
        #end if
    ]]></command>
    <expand macro="inputs_outputs"/>
    <tests>
        <test expect_num_outputs="1">
            <param name="in_file" value="datamash_transpose_input.txt"/>
            <output file="datamash_transpose_output.txt" name="out_file"/>
        </test>
        <!-- Test for transposing an extremely big input matrix
         Disabled to keep the repository size reasonable.
        For testing, manually download a pathological in- and output from:
        https://usegalaxy.eu/u/tunc/h/very-big-scrna-matrix
        -->
        <!--
        <test>
            <param name="in_file" value="big.tabular"/>
            <output file="transposed_big.tabular" name="out_file"/>
        </test>
        -->
        <!-- transpose(transpose(A)) = A -->
        <!--
        <test>
            <param name="in_file" value="transposed_big.tabular"/>
            <output file="big.tabular" name="out_file"/>
        </test>
        -->
    </tests>
    <help>
<![CDATA[
@HELP_HEADER@

**Syntax**

This tools transposes (swaps) rows/columns in a tabular input file.

-----

**Example**

Input file::

    Genes   NOX1  DcP  HH
    Sample  A1    A2   B3
    Counts  514   542  490

Output file::

    Genes  Sample  Counts
    NOX1   A1      514
    DcP    A2      542
    HH     B3      490

@HELP_FOOTER@
]]>
    </help>
    <expand macro="citation"/>
</tool>