diff trycycler_partition.xml @ 0:8fcec9049d68 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/trycycler commit 9d7c4277b0f96aacd466f2d497e08edcca3fa238"
author iuc
date Thu, 11 Feb 2021 19:23:50 +0000
parents
children 4688ae3b49b5
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/trycycler_partition.xml	Thu Feb 11 19:23:50 2021 +0000
@@ -0,0 +1,120 @@
+<tool id='trycycler_partition' name='Trycycler partition' version='@TOOL_VERSION@' profile='21.01'>
+    <description>assign the reads to the clusters</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro='edam_ontology'/>
+    <expand macro='requirements'/>
+    <version_command>trycycler --version</version_command>
+    <command detect_errors='exit_code'><![CDATA[
+        mkdir -p 'partitions'
+        #for $i in $input_cluster
+            #set $name = str($i.element_identifier)
+            #set $number = (str($name).split('_')[-1]).strip('.fasta')
+            #set $fullpath = '_'.join(['selected_cluster/cluster',str($number)])
+            mkdir -p $fullpath/ &&
+            ln -s '${i}' '$fullpath/2_all_seqs.fasta' &&
+        #end for
+        trycycler partition --cluster_dir 'selected_cluster'/cluster_*
+                --reads '$reads'
+                --min_aligned_len $min_aligned_len
+                --min_read_cov $min_read_cov
+                --threads \${GALAXY_SLOTS:-2} &&
+        #for $i in $input_cluster
+            #set $name = str($i.element_identifier)
+            #set $number = (str($name).split('_')[-1]).strip('.fasta')
+            #set $fullpath = '_'.join(['selected_cluster/cluster',str($number)])
+            mv '$fullpath/4_reads.fastq' 'partitions/partition_${number}.fastq' &&
+        #end for
+        echo 'bye!'
+    ]]></command>
+    <inputs>
+        <param name='input_cluster' type='data' 
+            format='fasta' multiple='true' label='Cluster datasets' 
+            help='Clustered contigs (multiple FASTA files)' />
+        <param name='reads' type='data' 
+            format='fastq,fastq.gz' label='Long-read datasets' 
+            help='Long reads (FASTQ format) used to generate the assemblies' />
+        <param argument='--min_aligned_len' type='integer' min='500' max='3500' 
+            value='1000' label='Min bases aligned' 
+            help='Reads with less than this many bases aligned (default = 1000) will be ignored.' />
+        <param argument='--min_read_cov' type='integer' min='0' max='100' 
+            value='90' label='Min read length covered by alignments' 
+            help='Reads with less than this percentage of their length covered by alignments (default = 90.0) will be ignored.' />
+    </inputs>
+    <outputs>
+        <collection name='partitions' type='list' label='${tool.name} on ${on_string}'>
+            <discover_datasets pattern='__designation_and_ext__' format='fastq' directory='partitions'/>        
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name='input_cluster' value='reconciled_cluster_01.fasta'/>
+            <param name='reads' value='reads.fastq.gz'/>
+            <param name='min_aligned_len' value='1000'/>
+            <param name='min_read_cov' value='90'/>
+            <output_collection name='partitions' type='list' count='1'>
+                <element name='partition_01' file='partition_01.fastq' ftype='fastq'/>
+            </output_collection>
+        </test>
+        <test>
+            <param name='input_cluster' value='reconciled_cluster_01.fasta'/>
+            <param name='reads' value='reads.fastq.gz'/>
+            <param name='min_aligned_len' value='1200'/>
+            <param name='min_read_cov' value='95'/>
+            <output_collection name='partitions' type='list' count='1'>
+                <element name='partition_01' file='partition_02.fastq' ftype='fastq'/>
+            </output_collection>
+        </test>
+        <test>
+            <param name='input_cluster' value='reconciled_cluster_01.fasta'/>
+            <param name='reads' value='reads.fastq.gz'/>
+            <param name='min_aligned_len' value='900'/>
+            <param name='min_read_cov' value='93'/>
+            <output_collection name='partitions' type='list' count='1'>
+                <element name='partition_01' file='partition_03.fastq' ftype='fastq'/>
+            </output_collection>
+        </test>
+        <test>
+            <param name='input_cluster' value='reconciled_cluster_01.fasta'/>
+            <param name='reads' value='reads.fastq.gz'/>
+            <param name='min_aligned_len' value='1000'/>
+            <param name='min_read_cov' value='90'/>
+            <output_collection name='partitions' type='list' count='1'>
+                <element name='partition_01' file='partition_04.fastq' ftype='fastq'/>
+            </output_collection>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. class:: infomark
+
+**Purpose**
+                    
+The *Trycycler partition* split the reads between the different clusters, i.e. each read will be assigned to whichever cluster it best aligns and saved into a file for that cluster. This step is run once for your entire genome (i.e. not on a per-cluster basis).
+
+----
+                    
+.. class:: infomark
+                    
+**Input**
+
+This tool requires as input the set of clustered considered valuable, and the long-read dataset used previously.
+
+----
+                    
+.. class:: infomark
+                    
+**Output**
+                    
+After **Trycycler partition** completes, if will generate a file per cluster, each of which contains its share of the total reads.
+
+
+----                                                                                                    
+                                                                                                        
+.. class:: infomark
+
+@PIPELINE@
+    ]]></help>
+    <expand macro='citations'/>
+</tool>