Mercurial > repos > iuc > trycycler_partition
view trycycler_partition.xml @ 7:7673a6c920b2 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/trycycler commit 9da13f57f93e69fd463f6245af45674fe011b861
author | iuc |
---|---|
date | Wed, 06 Nov 2024 13:32:36 +0000 |
parents | 4688ae3b49b5 |
children |
line wrap: on
line source
<tool id='trycycler_partition' name='Trycycler partition' version='@TOOL_VERSION@' profile='20.01'> <description>assign the reads to the clusters</description> <macros> <import>macros.xml</import> </macros> <expand macro='edam_ontology' /> <expand macro='requirements' /> <version_command>trycycler --version</version_command> <command detect_errors='exit_code'><![CDATA[ mkdir -p 'partitions' #for $i in $input_cluster #set $name = str($i.element_identifier) #set $number = (str($name).split('_')[-1]).strip('.fasta') #set $fullpath = '_'.join(['selected_cluster/cluster',str($number)]) mkdir -p $fullpath/ && ln -s '${i}' '$fullpath/2_all_seqs.fasta' && #end for trycycler partition --cluster_dir 'selected_cluster'/cluster_* --reads '$reads' --min_aligned_len $min_aligned_len --min_read_cov $min_read_cov --threads \${GALAXY_SLOTS:-2} && #for $i in $input_cluster #set $name = str($i.element_identifier) #set $number = (str($name).split('_')[-1]).strip('.fasta') #set $fullpath = '_'.join(['selected_cluster/cluster',str($number)]) mv '$fullpath/4_reads.fastq' 'partitions/partition_${number}.fastq' && #end for echo 'bye!' ]]> </command> <inputs> <param name='input_cluster' type='data' format='fasta' multiple='true' label='Cluster datasets' help='Clustered contigs (multiple FASTA files)' /> <param name='reads' type='data' format='fastq,fastq.gz' label='Long-read datasets' help='Long reads (FASTQ format) used to generate the assemblies' /> <param argument='--min_aligned_len' type='integer' min='500' max='3500' value='1000' label='Min bases aligned' help='Reads with less than this many bases aligned (default = 1000) will be ignored.' /> <param argument='--min_read_cov' type='integer' min='0' max='100' value='90' label='Min read length covered by alignments' help='Reads with less than this percentage of their length covered by alignments (default = 90.0) will be ignored.' /> </inputs> <outputs> <collection name='partitions' type='list' label='${tool.name} on ${on_string}'> <discover_datasets pattern='__designation_and_ext__' format='fastq' directory='partitions' /> </collection> </outputs> <tests> <test> <param name='input_cluster' value='reconciled_cluster_01.fasta' /> <param name='reads' value='reads.fastq.gz' /> <param name='min_aligned_len' value='1000' /> <param name='min_read_cov' value='90' /> <output_collection name='partitions' type='list' count='1'> <element name='partition_01' file='partition_01.fastq' ftype='fastq' /> </output_collection> </test> <test> <param name='input_cluster' value='reconciled_cluster_01.fasta' /> <param name='reads' value='reads.fastq.gz' /> <param name='min_aligned_len' value='1200' /> <param name='min_read_cov' value='95' /> <output_collection name='partitions' type='list' count='1'> <element name='partition_01' file='partition_02.fastq' ftype='fastq' /> </output_collection> </test> <test> <param name='input_cluster' value='reconciled_cluster_01.fasta' /> <param name='reads' value='reads.fastq.gz' /> <param name='min_aligned_len' value='900' /> <param name='min_read_cov' value='93' /> <output_collection name='partitions' type='list' count='1'> <element name='partition_01' file='partition_03.fastq' ftype='fastq' /> </output_collection> </test> <test> <param name='input_cluster' value='reconciled_cluster_01.fasta' /> <param name='reads' value='reads.fastq.gz' /> <param name='min_aligned_len' value='1000' /> <param name='min_read_cov' value='90' /> <output_collection name='partitions' type='list' count='1'> <element name='partition_01' file='partition_04.fastq' ftype='fastq' /> </output_collection> </test> </tests> <help><![CDATA[ .. class:: infomark **Purpose** The *Trycycler partition* split the reads between the different clusters, i.e. each read will be assigned to whichever cluster it best aligns and saved into a file for that cluster. This step is run once for your entire genome (i.e. not on a per-cluster basis). ---- .. class:: infomark **Input** This tool requires as input the set of clustered considered valuable, and the long-read dataset used previously. ---- .. class:: infomark **Output** After **Trycycler partition** completes, if will generate a file per cluster, each of which contains its share of the total reads. ---- .. class:: infomark @PIPELINE@ ]]> </help> <expand macro='citations' /> </tool>