Mercurial > repos > ecology > aligned_to_consensus

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/consalign.xml	Tue Apr 25 10:05:29 2023 +0000
@@ -0,0 +1,67 @@
+<tool id="aligned_to_consensus" name="Consensus sequence from aligned FASTA" version="@VERSION@">
+    <description></description>
+    <macros>
+        <import>consalign_macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="0.1.4">r-bioseq</requirement>
+        <requirement type="package" version="1.0.14">r-ptxqc</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        Rscript
+         '$__tool_directory__/consensus_from_alignments.R'
+         '$input'
+         '$seqtype'
+         '$method'
+         '$gaps'
+         '$outog'
+         '$output'
+    ]]>
+    </command>
+    <inputs>
+        <param name="input" type="data" format="fasta" label="Input fasta file with at least two sequences"/>
+        <param name="seqtype" type="select" label="Type of sequence" multiple="false" optional="false" display="radio">
+            <option value="DNA" selected="true">DNA</option>
+            <option value="RNA">RNA</option>
+            <option value="AA">Amino Acid</option>
+        </param>
+        <param name="method" type="select" label="Consensus method when differences are found between sequences" multiple="false" optional="false" display="radio">
+            <option value="chr_ambiguity" selected="true">Use an ambiguous nucleotide</option>
+            <option value="chr_majority">Use the most frequent nucleotide</option>
+        </param>
+        <param name="gaps" type="boolean" checked="no" label="Take account of gaps?" help="/!\ If gaps are at the same place in all the sequences don't check 'no', you'll face an error"/>
+        <param name="outog" type="boolean" checked="no" label="Output consensus sequence AND original sequences in the FASTA file?" help="/!\ If gaps are at the same place in all the sequences don't check 'no', you'll face an error"/>
+    </inputs>
+    <outputs>
+        <data name="output" from_work_dir="output.fasta" format="fasta" label="Consensus sequence of ${input.display_name}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="test_file.fasta"/>
+            <param name="seqtype" value="DNA"/>
+            <param name="method" value="chr_ambiguity"/>
+            <param name="gaps" value="false"/>
+            <param name="outog" value="false"/>
+            <output name="output">
+                <assert_contents>
+                    <has_n_lines n="2"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+=====================================================
+Get consensus sequence from several aligned sequences
+=====================================================
+
+This tool uses the seq_consensus function from the bioseq package. @WARNING@ If gaps are at the same place in all the sequences don't check 'no' in the gaps parameter,
+you'll face an error.
+
+Input can be any fasta file containing at least two aligned sequences.
+
+Output is a FASTA file containing the computed consensus sequence (along with original sequences if you want to).
+    ]]></help>
+    <citations>
+        <citation type="doi">10.1111/2041-210X.13490</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/consalign_macros.xml	Tue Apr 25 10:05:29 2023 +0000
@@ -0,0 +1,3 @@
+<macros>
+    <token name="@VERSION@">1.0.0</token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/consensus_from_alignments.R	Tue Apr 25 10:05:29 2023 +0000
@@ -0,0 +1,43 @@
+#Rscript
+
+################################################################################
+##      Extract consensus sequence from aligned forward and reverse fasta     ##
+################################################################################
+
+#####Packages
+library(bioseq, quietly = TRUE)
+
+##Load arguments
+args <- commandArgs(trailingOnly = TRUE)
+
+if (length(args) == 0) {
+    stop("This tool needs at least one argument")
+} else {
+    fasta_f <- args[1]
+    seq_type <- args[2]
+    meth_choice <- args[3]
+    gaps_tf <- as.logical(args[4])
+    out_og <- as.logical(args[5])
+}
+
+## Read input file
+seq_l <- bioseq::read_fasta(fasta_f, type = seq_type)
+
+if(bioseq::seq_nseq(seq_l) < 2){
+    stop("Only one sequence in the file, at least two aligned sequences are needed to compute a consensus")
+}else{
+    if(length(unique(bioseq::seq_nchar(seq_l))) > 1) {stop("Sequences have different lengths, please provide aligned sequences")}
+}
+
+##Consensus sequence
+seq_con <- bioseq::seq_consensus(seq_l, method = meth_choice, gaps = gaps_tf)
+
+if(bioseq::seq_nseq(seq_con) > 1){stop("Consensus hasn't worked for an unknown reason, double-check your input file and the parameters you chose")}
+
+names(seq_con) <- paste0("consensus_", Reduce(PTXQC::LCS, names(seq_l)))
+##Create output
+if(out_og){
+    bioseq::write_fasta(c(seq_con, seq_l), file = "output.fasta", line_length = Inf, block_length = Inf)
+}else{
+    bioseq::write_fasta(seq_con, file = "output.fasta", line_length = Inf, block_length = Inf)
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/out_file.fasta	Tue Apr 25 10:05:29 2023 +0000
@@ -0,0 +1,2 @@
+>consensus_L
+TTTGTGTACCCCCASDGGAT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/test_file.fasta	Tue Apr 25 10:05:29 2023 +0000
@@ -0,0 +1,6 @@
+>LAL
+TTTGTGTACC-CCACAGG--
+>L0L
+--TGTGT-CCCCCAGTGGAT
+>LEL
+-TTGTGT-CCCCCAGGGGA-