Mercurial > repos > iuc > links
diff links.xml @ 0:7acd9ade2dd1 draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/links commit f15e5bb67b7a212ecd8c98e816c80e22d3b7a0cb"
author | iuc |
---|---|
date | Wed, 23 Feb 2022 08:25:55 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/links.xml Wed Feb 23 08:25:55 2022 +0000 @@ -0,0 +1,191 @@ +<tool id="links" name="LINKS" version="@TOOL_VERSION@+galaxy+@VERSION_SUFFIX@" python_template_version="3.5" profile="20.09"> + <description> - scaffold genome assemblies with long reads</description> + <xrefs> + <xref type="bio.tools">links</xref> + </xrefs> + <macros> + <token name="@TOOL_VERSION@">2.0.1</token> + <token name="@VERSION_SUFFIX@">1</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">links</requirement> + </requirements> + <version_command><![CDATA[ + LINKS | grep "LINKS v" | cut -d' ' -f4 + ]]></version_command> + <command detect_errors="exit_code"><![CDATA[ + + ## set up file paths + + #set sequences_fn = 'sequences.' + $f.ext + ln -s '$f' '$sequences_fn' + && + #for $n, $read in enumerate($reads): + #set read_fn = 'reads' + str($n) + '.' + $read.ext + ln -s '$read' '$read_fn' + && + printf '%s\n' '$read_fn' >> reads.fof + && + #end for + + ## RUN LINKS + + LINKS + -f '$sequences_fn' + -s reads.fof + -b links_output + + #if str($scaffolding.k): + -k '$scaffolding.k' + #end if + + #if str($scaffolding.d): + -d '$scaffolding.d' + #end if + + #if str($scaffolding.t): + -t '$scaffolding.t' + #end if + + #if str($advanced.a): + -a '$advanced.a' + #end if + + #if str($advanced.e): + -e '$advanced.e' + #end if + + #if str($advanced.o): + -o '$advanced.o' + #end if + + #if str($advanced.l): + -l '$advanced.l' + #end if + + #if str($advanced.z): + -z '$advanced.z' + #end if + + #if str($advanced.p): + -p '$advanced.p' + #end if + + ]]></command> + <inputs> + <param argument="-f" type="data" format="fasta,fasta.gz" label="Contigs" help="Sequences to scaffold" /> + <param name="reads" type="data" format="fasta,fasta.gz,fastq,fastq.gz" label="Reads" help="Long sequence reads for scaffolding" multiple="true" /> + <section name="scaffolding" title="Scaffolding options" expanded="False"> + <param argument="-k" type="integer" value="15" optional="true" label="k-mer length" /> + <param argument="-d" type="text" value="4000" optional="true" label="Distance" help="Distance between k-mer pairs. Multiple distances are separated by comma, e.g. 500,1000,2000,3000"/> + <param argument="-t" type="text" value="2" optional="true" label="Step" help="Step of sliding window when extracting k-mer pairs from long reads. Multiple steps are separated by comma, e.g. 10,5" /> + </section> + <section name="advanced" title="Advanced options" expanded="False"> + <param argument="-a" type="float" value="0.3" optional="true" label="Maximum link ratio" help="Maximum link ratio between two best contig pairs. Higher values lead to least accurate scaffolding" /> + <param argument="-e" type="float" value="0.1" optional="true" label="Error" help="Error (%) allowed on -d distance"/> + <param argument="-l" type="integer" value="5" optional="true" label="Number of links" help="Minimum number of links (k-mer pairs) to compute + scaffold"/> + <param argument="-o" type="integer" value="0" optional="true" label="Offset" help="Offset position for extracting k-mer pairs"/> + <param argument="-p" type="float" value="0.001" optional="true" label="Bloom filter false positive rate" help="Increase to prevent memory allocation errors"/> + <param argument="-z" type="integer" value="500" optional="true" label="Minimum contig length" help="Minimum contig length to consider for scaffolding"/> + </section> + </inputs> + <outputs> + <data name="scaffolds" format="fasta" from_work_dir="links_output.scaffolds.fa" label="${tool.name} on ${on_string} (scaffolds)" /> + <data name="correspondence" format="tabular" from_work_dir="links_output.assembly_correspondence.tsv" label="${tool.name} on ${on_string} (Correspondence file)" /> + <data name="gv" format="graph_dot" from_work_dir="links_output.gv" label="${tool.name} on ${on_string} (Graph)" /> + </outputs> + <tests> + <!-- Basic test --> + <test> + <param name="f" value="used_contigs.fasta.gz"/> + <param name="reads" value="reads.fasta.gz"/> + <output name="scaffolds" ftype="fasta" file="single_readfile.scaffolds.fa"/> + <output name="correspondence" ftype="tabular" file="single_readfile.assembly_correspondence.tsv"/> + <output name="gv"> + <assert_contents> + <has_text text="node [shape = circle]" /> + </assert_contents> + </output> + </test> + <!-- Test multiple readfile input --> + <test> + <param name="f" value="used_contigs.fasta.gz"/> + <param name="reads" value="reads1.fasta.gz,reads2.fasta.gz"/> + <output name="scaffolds" ftype="fasta" file="multi_readfile.scaffolds.fa"/> + <output name="correspondence" ftype="tabular" file="multi_readfile.assembly_correspondence.tsv"/> + </test> + <!-- Test scaffolding parameters --> + <test> + <param name="f" value="used_contigs.fasta.gz"/> + <param name="reads" value="reads.fasta.gz"/> + <param name="k" value="19" /> + <param name="d" value="500,1000,2000,3000" /> + <param name="t" value="10,5" /> + <output name="scaffolds" ftype="fasta" file="scaffolding.scaffolds.fa"/> + <output name="correspondence" ftype="tabular" file="scaffolding.assembly_correspondence.tsv"/> + </test> + <!-- Test advanced parameters --> + <test> + <param name="f" value="used_contigs.fasta.gz"/> + <param name="reads" value="reads.fasta.gz"/> + <output name="scaffolds" ftype="fasta" file="advanced.scaffolds.fa"/> + <param name="a" value="0.2" /> + <param name="e" value="0.05" /> + <param name="l" value="10" /> + <param name="o" value="1" /> + <param name="z" value="600" /> + <output name="scaffolds" ftype="fasta" file="advanced.scaffolds.fa"/> + <output name="correspondence" ftype="tabular" file="advanced.assembly_correspondence.tsv"/> + </test> + </tests> + <help><![CDATA[ + +.. figure:: https://github.com/warrenlr/links/raw/master/links-logo.png + :alt: links-logo + :width: 200px + +`LINKS <https://github.com/bcgsc/LINKS>`__ is a genomics application for +scaffolding genome assemblies with long reads, such as those produced by +Oxford Nanopore Technologies Ltd. It can be used to scaffold +high-quality draft genome assemblies with any long sequences (eg. ONT +reads, PacBio reads, other draft genomes, etc). + +**Input**: + +**LINKS does not use quality information in the Reads, so it is +recommended to filter the long reads first**. + +Provide a ``fasta`` or ``fasta.gz`` dataset containing the Contigs, and +a ``fastq``, ``fastq.gz``, ``fasta`` or ``fasta.gz`` dataset of Reads to +be used for scaffolding. + +**Use cases**: + +- *Use long reads to scaffold a draft assembly* by providing the draft + assembly to the Contigs parameter and the long reads to the Reads + parameter. +- *Use a reference assembly to scaffold a draft assembly* by providing + the reference assembly to the Reads parameter. For example, you could + use a species’s reference genome to scaffold a genome that was + assembled for another individual. + +**How it works**: + +LINKS uses *k*-mer pairs from the Reads to identify candidate pairs of +Contigs, then uses the number of spanning *k*-mer pairs and the mean +distance between them to build scaffolds. + +You can control the distance between *k*-mer pairs, the length of the +*k*-mers and the minimum number of *k*-mer pairs between contigs. See +the `LINKS readme on +GitHub <https://github.com/bcgsc/LINKS#how-it-works>`__ for information +on setting advanced parameters. + + + + ]]></help> + <citations> + <citation type="doi">10.1186/s13742-015-0076-3</citation> + </citations> +</tool> \ No newline at end of file