Mercurial > repos > iuc > links

diff links.xml @ 0:7acd9ade2dd1 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/links commit f15e5bb67b7a212ecd8c98e816c80e22d3b7a0cb"
author: iuc
date: Wed, 23 Feb 2022 08:25:55 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/links.xml	Wed Feb 23 08:25:55 2022 +0000
@@ -0,0 +1,191 @@
+<tool id="links" name="LINKS" version="@TOOL_VERSION@+galaxy+@VERSION_SUFFIX@" python_template_version="3.5" profile="20.09">
+    <description> - scaffold genome assemblies with long reads</description>
+    <xrefs>
+        <xref type="bio.tools">links</xref>
+    </xrefs>
+    <macros>
+        <token name="@TOOL_VERSION@">2.0.1</token>
+        <token name="@VERSION_SUFFIX@">1</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">links</requirement> 
+    </requirements>
+    <version_command><![CDATA[
+        LINKS | grep "LINKS v" | cut -d' ' -f4
+    ]]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+
+        ## set up file paths
+
+        #set sequences_fn = 'sequences.' + $f.ext
+        ln -s '$f' '$sequences_fn'
+        &&
+        #for $n, $read in enumerate($reads):
+            #set read_fn = 'reads' + str($n) + '.' + $read.ext
+            ln -s '$read' '$read_fn'
+            &&
+            printf '%s\n' '$read_fn' >> reads.fof
+            &&
+        #end for
+
+        ## RUN LINKS       
+
+        LINKS
+        -f '$sequences_fn'
+        -s reads.fof
+        -b links_output
+
+        #if str($scaffolding.k):
+            -k '$scaffolding.k'
+        #end if
+
+        #if str($scaffolding.d):
+            -d '$scaffolding.d'
+        #end if
+
+        #if str($scaffolding.t):
+            -t '$scaffolding.t'
+        #end if
+
+        #if str($advanced.a):
+            -a '$advanced.a'
+        #end if
+
+        #if str($advanced.e):
+            -e '$advanced.e'
+        #end if
+
+        #if str($advanced.o):
+            -o '$advanced.o'
+        #end if
+
+        #if str($advanced.l):
+            -l '$advanced.l'
+        #end if
+
+        #if str($advanced.z):
+            -z '$advanced.z'
+        #end if
+
+        #if str($advanced.p):
+            -p '$advanced.p'
+        #end if
+
+    ]]></command>
+    <inputs>
+        <param argument="-f" type="data" format="fasta,fasta.gz" label="Contigs" help="Sequences to scaffold" />
+        <param name="reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Reads" help="Long sequence reads for scaffolding" multiple="true" />
+        <section name="scaffolding" title="Scaffolding options" expanded="False">
+            <param argument="-k" type="integer" value="15" optional="true" label="k-mer length" />
+            <param argument="-d" type="text" value="4000" optional="true" label="Distance" help="Distance between k-mer pairs. Multiple distances are separated by comma, e.g. 500,1000,2000,3000"/>
+            <param argument="-t" type="text" value="2" optional="true" label="Step" help="Step of sliding window when extracting k-mer pairs from long reads. Multiple steps are separated by comma, e.g. 10,5" />
+        </section>
+        <section name="advanced" title="Advanced options" expanded="False">
+            <param argument="-a" type="float" value="0.3" optional="true" label="Maximum link ratio" help="Maximum link ratio between two best contig pairs. Higher values lead to least accurate scaffolding" />
+            <param argument="-e" type="float" value="0.1" optional="true" label="Error" help="Error (%) allowed on -d distance"/>
+            <param argument="-l" type="integer" value="5" optional="true" label="Number of links" help="Minimum number of links (k-mer pairs) to compute 
+            scaffold"/>
+            <param argument="-o" type="integer" value="0" optional="true" label="Offset" help="Offset position for extracting k-mer pairs"/>
+            <param argument="-p" type="float" value="0.001" optional="true" label="Bloom filter false positive rate" help="Increase to prevent memory allocation errors"/>
+            <param argument="-z" type="integer" value="500" optional="true" label="Minimum contig length" help="Minimum contig length to consider for scaffolding"/>
+        </section>
+    </inputs>
+    <outputs>
+        <data name="scaffolds" format="fasta" from_work_dir="links_output.scaffolds.fa" label="${tool.name} on ${on_string} (scaffolds)" />
+        <data name="correspondence" format="tabular" from_work_dir="links_output.assembly_correspondence.tsv" label="${tool.name} on ${on_string} (Correspondence file)" />
+        <data name="gv" format="graph_dot" from_work_dir="links_output.gv" label="${tool.name} on ${on_string} (Graph)" />
+    </outputs>
+    <tests>
+    <!-- Basic test -->
+        <test>
+            <param name="f" value="used_contigs.fasta.gz"/>
+            <param name="reads" value="reads.fasta.gz"/>
+            <output name="scaffolds" ftype="fasta" file="single_readfile.scaffolds.fa"/>
+            <output name="correspondence" ftype="tabular" file="single_readfile.assembly_correspondence.tsv"/>
+            <output name="gv">
+                <assert_contents>
+                    <has_text text="node [shape = circle]" />
+                </assert_contents>
+            </output>
+        </test>
+        <!-- Test multiple readfile input -->
+        <test>
+            <param name="f" value="used_contigs.fasta.gz"/>
+            <param name="reads" value="reads1.fasta.gz,reads2.fasta.gz"/>
+            <output name="scaffolds" ftype="fasta" file="multi_readfile.scaffolds.fa"/>
+            <output name="correspondence" ftype="tabular" file="multi_readfile.assembly_correspondence.tsv"/>
+        </test>
+        <!-- Test scaffolding parameters -->
+        <test>
+            <param name="f" value="used_contigs.fasta.gz"/>
+            <param name="reads" value="reads.fasta.gz"/>
+            <param name="k" value="19" />
+            <param name="d" value="500,1000,2000,3000" />
+            <param name="t" value="10,5" />
+            <output name="scaffolds" ftype="fasta" file="scaffolding.scaffolds.fa"/>
+            <output name="correspondence" ftype="tabular" file="scaffolding.assembly_correspondence.tsv"/>
+        </test>
+        <!-- Test advanced parameters -->
+        <test>
+            <param name="f" value="used_contigs.fasta.gz"/>
+            <param name="reads" value="reads.fasta.gz"/>
+            <output name="scaffolds" ftype="fasta" file="advanced.scaffolds.fa"/>
+            <param name="a" value="0.2" />
+            <param name="e" value="0.05" />
+            <param name="l" value="10" />
+            <param name="o" value="1" />
+            <param name="z" value="600" />
+            <output name="scaffolds" ftype="fasta" file="advanced.scaffolds.fa"/>
+            <output name="correspondence" ftype="tabular" file="advanced.assembly_correspondence.tsv"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+.. figure:: https://github.com/warrenlr/links/raw/master/links-logo.png
+   :alt: links-logo
+   :width: 200px
+
+`LINKS <https://github.com/bcgsc/LINKS>`__ is a genomics application for
+scaffolding genome assemblies with long reads, such as those produced by
+Oxford Nanopore Technologies Ltd. It can be used to scaffold
+high-quality draft genome assemblies with any long sequences (eg. ONT
+reads, PacBio reads, other draft genomes, etc).
+
+**Input**:
+
+**LINKS does not use quality information in the Reads, so it is
+recommended to filter the long reads first**.
+
+Provide a ``fasta`` or ``fasta.gz`` dataset containing the Contigs, and
+a ``fastq``, ``fastq.gz``, ``fasta`` or ``fasta.gz`` dataset of Reads to
+be used for scaffolding.
+
+**Use cases**:
+
+-  *Use long reads to scaffold a draft assembly* by providing the draft
+   assembly to the Contigs parameter and the long reads to the Reads
+   parameter.
+-  *Use a reference assembly to scaffold a draft assembly* by providing
+   the reference assembly to the Reads parameter. For example, you could
+   use a species’s reference genome to scaffold a genome that was
+   assembled for another individual.
+
+**How it works**:
+
+LINKS uses *k*-mer pairs from the Reads to identify candidate pairs of
+Contigs, then uses the number of spanning *k*-mer pairs and the mean
+distance between them to build scaffolds.
+
+You can control the distance between *k*-mer pairs, the length of the
+*k*-mers and the minimum number of *k*-mer pairs between contigs. See
+the `LINKS readme on
+GitHub <https://github.com/bcgsc/LINKS#how-it-works>`__ for information
+on setting advanced parameters.
+
+
+
+    ]]></help>
+    <citations>
+      <citation type="doi">10.1186/s13742-015-0076-3</citation>
+    </citations>
+</tool>
\ No newline at end of file