Mercurial > repos > iuc > trycycler_partition

<tool id='trycycler_partition' name='Trycycler partition' version='@TOOL_VERSION@' profile='20.01'>
    <description>assign the reads to the clusters</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro='edam_ontology' />
    <expand macro='requirements' />
    <version_command>trycycler --version</version_command>
    <command detect_errors='exit_code'><![CDATA[
        mkdir -p 'partitions'
        #for $i in $input_cluster
            #set $name = str($i.element_identifier)
            #set $number = (str($name).split('_')[-1]).strip('.fasta')
            #set $fullpath = '_'.join(['selected_cluster/cluster',str($number)])
            mkdir -p $fullpath/ &&
            ln -s '${i}' '$fullpath/2_all_seqs.fasta' &&
        #end for
        trycycler partition --cluster_dir 'selected_cluster'/cluster_*
                --reads '$reads'
                --min_aligned_len $min_aligned_len
                --min_read_cov $min_read_cov
                --threads \${GALAXY_SLOTS:-2} &&
        #for $i in $input_cluster
            #set $name = str($i.element_identifier)
            #set $number = (str($name).split('_')[-1]).strip('.fasta')
            #set $fullpath = '_'.join(['selected_cluster/cluster',str($number)])
            mv '$fullpath/4_reads.fastq' 'partitions/partition_${number}.fastq' &&
        #end for
        echo 'bye!'
    ]]>    </command>
    <inputs>
        <param name='input_cluster' type='data' format='fasta' multiple='true' label='Cluster datasets' help='Clustered contigs (multiple FASTA files)' />
        <param name='reads' type='data' format='fastq,fastq.gz' label='Long-read datasets' help='Long reads (FASTQ format) used to generate the assemblies' />
        <param argument='--min_aligned_len' type='integer' min='500' max='3500' value='1000' label='Min bases aligned' help='Reads with less than this many bases aligned (default = 1000) will be ignored.' />
        <param argument='--min_read_cov' type='integer' min='0' max='100' value='90' label='Min read length covered by alignments' help='Reads with less than this percentage of their length covered by alignments (default = 90.0) will be ignored.' />
    </inputs>
    <outputs>
        <collection name='partitions' type='list' label='${tool.name} on ${on_string}'>
            <discover_datasets pattern='__designation_and_ext__' format='fastq' directory='partitions' />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name='input_cluster' value='reconciled_cluster_01.fasta' />
            <param name='reads' value='reads.fastq.gz' />
            <param name='min_aligned_len' value='1000' />
            <param name='min_read_cov' value='90' />
            <output_collection name='partitions' type='list' count='1'>
                <element name='partition_01' file='partition_01.fastq' ftype='fastq' />
            </output_collection>
        </test>
        <test>
            <param name='input_cluster' value='reconciled_cluster_01.fasta' />
            <param name='reads' value='reads.fastq.gz' />
            <param name='min_aligned_len' value='1200' />
            <param name='min_read_cov' value='95' />
            <output_collection name='partitions' type='list' count='1'>
                <element name='partition_01' file='partition_02.fastq' ftype='fastq' />
            </output_collection>
        </test>
        <test>
            <param name='input_cluster' value='reconciled_cluster_01.fasta' />
            <param name='reads' value='reads.fastq.gz' />
            <param name='min_aligned_len' value='900' />
            <param name='min_read_cov' value='93' />
            <output_collection name='partitions' type='list' count='1'>
                <element name='partition_01' file='partition_03.fastq' ftype='fastq' />
            </output_collection>
        </test>
        <test>
            <param name='input_cluster' value='reconciled_cluster_01.fasta' />
            <param name='reads' value='reads.fastq.gz' />
            <param name='min_aligned_len' value='1000' />
            <param name='min_read_cov' value='90' />
            <output_collection name='partitions' type='list' count='1'>
                <element name='partition_01' file='partition_04.fastq' ftype='fastq' />
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[

.. class:: infomark

**Purpose**

The *Trycycler partition* split the reads between the different clusters, i.e. each read will be assigned to whichever cluster it best aligns and saved into a file for that cluster. This step is run once for your entire genome (i.e. not on a per-cluster basis).

----

.. class:: infomark

**Input**

This tool requires as input the set of clustered considered valuable, and the long-read dataset used previously.

----

.. class:: infomark

**Output**

After **Trycycler partition** completes, if will generate a file per cluster, each of which contains its share of the total reads.


----

.. class:: infomark

@PIPELINE@
    ]]>    </help>
    <expand macro='citations' />
</tool>
author	iuc
date	Wed, 06 Nov 2024 13:32:36 +0000
parents	4688ae3b49b5
children