Mercurial > repos > ecology > aligned_to_consensus
changeset 0:0ccbe1c20fc3 draft default tip
planemo upload for repository https://github.com/ColineRoyaux/Galaxy_tool_projects/tree/main/consensus_from_alignments commit ecc21de8f368c6a95c57d4e6511ed42af9e72a66
author | ecology |
---|---|
date | Tue, 25 Apr 2023 10:05:29 +0000 |
parents | |
children | |
files | consalign.xml consalign_macros.xml consensus_from_alignments.R test-data/out_file.fasta test-data/test_file.fasta |
diffstat | 5 files changed, 121 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/consalign.xml Tue Apr 25 10:05:29 2023 +0000 @@ -0,0 +1,67 @@ +<tool id="aligned_to_consensus" name="Consensus sequence from aligned FASTA" version="@VERSION@"> + <description></description> + <macros> + <import>consalign_macros.xml</import> + </macros> + <requirements> + <requirement type="package" version="0.1.4">r-bioseq</requirement> + <requirement type="package" version="1.0.14">r-ptxqc</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + Rscript + '$__tool_directory__/consensus_from_alignments.R' + '$input' + '$seqtype' + '$method' + '$gaps' + '$outog' + '$output' + ]]> + </command> + <inputs> + <param name="input" type="data" format="fasta" label="Input fasta file with at least two sequences"/> + <param name="seqtype" type="select" label="Type of sequence" multiple="false" optional="false" display="radio"> + <option value="DNA" selected="true">DNA</option> + <option value="RNA">RNA</option> + <option value="AA">Amino Acid</option> + </param> + <param name="method" type="select" label="Consensus method when differences are found between sequences" multiple="false" optional="false" display="radio"> + <option value="chr_ambiguity" selected="true">Use an ambiguous nucleotide</option> + <option value="chr_majority">Use the most frequent nucleotide</option> + </param> + <param name="gaps" type="boolean" checked="no" label="Take account of gaps?" help="/!\ If gaps are at the same place in all the sequences don't check 'no', you'll face an error"/> + <param name="outog" type="boolean" checked="no" label="Output consensus sequence AND original sequences in the FASTA file?" help="/!\ If gaps are at the same place in all the sequences don't check 'no', you'll face an error"/> + </inputs> + <outputs> + <data name="output" from_work_dir="output.fasta" format="fasta" label="Consensus sequence of ${input.display_name}"/> + </outputs> + <tests> + <test> + <param name="input" value="test_file.fasta"/> + <param name="seqtype" value="DNA"/> + <param name="method" value="chr_ambiguity"/> + <param name="gaps" value="false"/> + <param name="outog" value="false"/> + <output name="output"> + <assert_contents> + <has_n_lines n="2"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +===================================================== +Get consensus sequence from several aligned sequences +===================================================== + +This tool uses the seq_consensus function from the bioseq package. @WARNING@ If gaps are at the same place in all the sequences don't check 'no' in the gaps parameter, +you'll face an error. + +Input can be any fasta file containing at least two aligned sequences. + +Output is a FASTA file containing the computed consensus sequence (along with original sequences if you want to). + ]]></help> + <citations> + <citation type="doi">10.1111/2041-210X.13490</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/consalign_macros.xml Tue Apr 25 10:05:29 2023 +0000 @@ -0,0 +1,3 @@ +<macros> + <token name="@VERSION@">1.0.0</token> +</macros>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/consensus_from_alignments.R Tue Apr 25 10:05:29 2023 +0000 @@ -0,0 +1,43 @@ +#Rscript + +################################################################################ +## Extract consensus sequence from aligned forward and reverse fasta ## +################################################################################ + +#####Packages +library(bioseq, quietly = TRUE) + +##Load arguments +args <- commandArgs(trailingOnly = TRUE) + +if (length(args) == 0) { + stop("This tool needs at least one argument") +} else { + fasta_f <- args[1] + seq_type <- args[2] + meth_choice <- args[3] + gaps_tf <- as.logical(args[4]) + out_og <- as.logical(args[5]) +} + +## Read input file +seq_l <- bioseq::read_fasta(fasta_f, type = seq_type) + +if(bioseq::seq_nseq(seq_l) < 2){ + stop("Only one sequence in the file, at least two aligned sequences are needed to compute a consensus") +}else{ + if(length(unique(bioseq::seq_nchar(seq_l))) > 1) {stop("Sequences have different lengths, please provide aligned sequences")} +} + +##Consensus sequence +seq_con <- bioseq::seq_consensus(seq_l, method = meth_choice, gaps = gaps_tf) + +if(bioseq::seq_nseq(seq_con) > 1){stop("Consensus hasn't worked for an unknown reason, double-check your input file and the parameters you chose")} + +names(seq_con) <- paste0("consensus_", Reduce(PTXQC::LCS, names(seq_l))) +##Create output +if(out_og){ + bioseq::write_fasta(c(seq_con, seq_l), file = "output.fasta", line_length = Inf, block_length = Inf) +}else{ + bioseq::write_fasta(seq_con, file = "output.fasta", line_length = Inf, block_length = Inf) +}