Mercurial > repos > iuc > nextclade

--- a/macros.xml	Wed Aug 31 16:40:29 2022 +0000
+++ b/macros.xml	Sat Oct 08 20:03:50 2022 +0000
@@ -1,6 +1,6 @@
 <macros>
     <!-- same version number is used for nextclade and nextalign releases, even though they are distinct tools -->
-    <token name="@TOOL_VERSION@">2.4.0</token>
+    <token name="@TOOL_VERSION@">2.7.0</token>
     <xml name="citations">
         <citations>
             <citation type="bibtex">@online{nextclade,
@@ -10,10 +10,9 @@
                 urldate = {2021-03-26}
                 }
             </citation>
-            <yield />
+            <yield/>
         </citations>
     </xml>
-
     <!--
         command
     -->
@@ -24,7 +23,6 @@
         ln -f -s '$reference_source.ref_file.fields.path' reference.fa &&
     #end if
 ]]></token>
-
     <token name="@QUERY_FASTA@"><![CDATA[
     #if $input_fasta.is_of_type('fasta.gz')
         #set $query = 'query.fa.gz'
@@ -36,7 +34,6 @@
     <!--
         inputs
     -->
-
     <xml name="reference">
         <conditional name="reference_source">
             <param name="reference_source_selector" type="select" label="Choose the source for the reference genome">
@@ -56,7 +53,6 @@
             </when>
         </conditional>
     </xml>
-
     <!--
         help
     -->
@@ -64,5 +60,11 @@
 Nextclade is a tool that identifies differences between your sequences and a reference sequence, uses these differences to assign your sequences to clades, and reports potential sequence quality issues in your data.
 You can use the tool to analyze sequences before you upload them to a database, or if you want to assign Nextstrain clades to a set of sequences.
 ]]></token>
-
+    <xml name="column_metadata" tokens="dataset_name" token_extra_columns="">
+        <!-- the columns in use are dependent on the dataset (i.e. database) - and extra columns seem to always be added in the same place -->
+        <!-- note that the tool is assuming that the dataset columns remain static: this might be an incorrect assumption in the future -->
+        <when value="@DATASET_NAME@">
+            <action name="column_names" type="metadata" default="seqName,clade,@EXTRA_COLUMNS@qc.overallScore,qc.overallStatus,totalSubstitutions,totalDeletions,totalInsertions,totalFrameShifts,totalAminoacidSubstitutions,totalAminoacidDeletions,totalAminoacidInsertions,totalMissing,totalNonACGTNs,totalPcrPrimerChanges,substitutions,deletions,insertions,privateNucMutations.reversionSubstitutions,privateNucMutations.labeledSubstitutions,privateNucMutations.unlabeledSubstitutions,privateNucMutations.totalReversionSubstitutions,privateNucMutations.totalLabeledSubstitutions,privateNucMutations.totalUnlabeledSubstitutions,privateNucMutations.totalPrivateSubstitutions,frameShifts,aaSubstitutions,aaDeletions,aaInsertions,missing,nonACGTNs,pcrPrimerChanges,alignmentScore,alignmentStart,alignmentEnd,coverage,qc.missingData.missingDataThreshold,qc.missingData.score,qc.missingData.status,qc.missingData.totalMissing,qc.mixedSites.mixedSitesThreshold,qc.mixedSites.score,qc.mixedSites.status,qc.mixedSites.totalMixedSites,qc.privateMutations.cutoff,qc.privateMutations.excess,qc.privateMutations.score,qc.privateMutations.status,qc.privateMutations.total,qc.snpClusters.clusteredSNPs,qc.snpClusters.score,qc.snpClusters.status,qc.snpClusters.totalSNPs,qc.frameShifts.frameShifts,qc.frameShifts.totalFrameShifts,qc.frameShifts.frameShiftsIgnored,qc.frameShifts.totalFrameShiftsIgnored,qc.frameShifts.score,qc.frameShifts.status,qc.stopCodons.stopCodons,qc.stopCodons.totalStopCodons,qc.stopCodons.score,qc.stopCodons.status,isReverseComplement,failedGenes,warnings,errors"/>
+        </when>
+    </xml>
 </macros>
--- a/nextclade.xml	Wed Aug 31 16:40:29 2022 +0000
+++ b/nextclade.xml	Sat Oct 08 20:03:50 2022 +0000
@@ -68,18 +68,19 @@
         #end if
     ]]></command>
     <inputs>
-        <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="FASTA file with input sequences" />
+        <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="FASTA file with input sequences"/>
         <param name="organism" type="select" label="Organism">
             <option value="sars-cov-2" selected="true">SARS-CoV-2</option>
             <option value="sars-cov-2-no-recomb">SARS-CoV-2 without recombinants</option>
+            <option value="sars-cov-2-21L">SARS-CoV-2 relative to BA.2</option>
             <option value="flu_h1n1pdm_ha">Influenza A H1N1pdm HA</option>
             <option value="flu_h3n2_ha">Influenza A H3N2 HA</option>
             <option value="flu_vic_ha">Influenza B Victoria HA</option>
             <option value="flu_yam_ha">Influenza B Yamagata HA</option>
-            <option value="MPXV">Monkeypox (All Clades)</option>
+            <option value="MPXV">Monkeypox (All Clades - MPXV)</option>
             <option value="hMPXV">Human Monkeypox (hMPXV)</option>
-            <option value="hMPXV_B1">Human Monkeypox Clade B.1</option>
-        </param>
+            <option value="hMPXV_B1">Human Monkeypox Clade B.1 (hMPXV_B1)</option>
+        </param>
         <conditional name="db">
             <param name="source" type="select" label="Version of database to use">
                 <option value="cached" selected="true">Use specific database version cached on this Galaxy server</option>
@@ -88,17 +89,17 @@
             <when value="cached">
                 <param name="release" label="Cached nextclade database release" type="select">
                     <options from_data_table="nextclade">
-                        <column name="value" index="0" />
-                        <column name="description" index="2" />
-                        <column name="date" index="4" />
-                        <column name="path" index="5" />
-                        <filter type="sort_by" column="4" />
-                        <filter type="param_value" ref="organism" column="1" />
-                        <filter type="regexp" column="3" value="@COMPATIBILITY_SPEC@" />
+                        <column name="value" index="0"/>
+                        <column name="description" index="2"/>
+                        <column name="date" index="4"/>
+                        <column name="path" index="5"/>
+                        <filter type="sort_by" column="4"/>
+                        <filter type="param_value" ref="organism" column="1"/>
+                        <filter type="regexp" column="3" value="@COMPATIBILITY_SPEC@"/>
                     </options>
                 </param>
             </when>
-            <when value="download"></when>
+            <when value="download"/>
         </conditional>
         <param name="outputs" type="select" label="Output options" help="Output reports and optionally tree" multiple="true">
             <option value="report_json">JSON format report</option>
@@ -106,31 +107,41 @@
             <option value="output_tree">Auspice v2 tree file (JSON format)</option>
             <option value="output_fasta">Aligned sequences (FASTA format)</option>
         </param>
-        <param name="include_header" type="boolean" label="Include header line in output file"
-            truevalue="true" falsevalue="false" />
+        <param name="include_header" type="boolean" label="Include header line in output file" truevalue="true" falsevalue="false"/>
         <conditional name="adv">
-            <param name="advanced_options" type="select" label="Use advanced options" >
+            <param name="advanced_options" type="select" label="Use advanced options">
                 <option value="yes">Yes</option>
                 <option value="no" selected="true">No</option>
             </param>
             <when value="yes">
-                <param argument="--input-qc-config" type="data" label="Quality Control (QC) Config file" format="json" optional="true" help="QC config json file containing custom QC configuration" />
-                <param argument="--input-root-seq" type="data" label="Custom root sequence" format="txt" optional="true" help="Text file containing custom root sequence" />
-                <param argument="--input-tree" type="data" label="Custom reference tree" format="json" optional="true" help="Auspice JSON v2 file containing custom reference tree" />
-                <param argument="--input-gene-map" type="data" label="Custom gene map" format="json" optional="true" help="JSON file containing custom gene map. Gene map (sometimes also called 'gene annotations') is used to resolve aminoacid changes in genes" />
-                <param argument="--input-pcr-primers" type="data" label="Custom PCR primer sites" format="json" optional="true" help="CSV file containing a list of custom PCR primer sites. These are used to report mutations in these sites" />
-                <param argument="--input-virus-properties" type="data" label="Configuration and data specific to a pathogen" format="json" optional="true" help="For more info on the virus properties JSON file, see the Nextclade documentation." />
-                <param argument="--include-reference" truevalue="--include-reference" falsevalue="" type="boolean" label="Include reference sequence in FASTA alignment output" help="The FASTA alignment is an optional output of nextclade. Select this option to include the reference sequence in that alignment file" />
+                <param argument="--input-qc-config" type="data" label="Quality Control (QC) Config file" format="json" optional="true" help="QC config json file containing custom QC configuration"/>
+                <param argument="--input-root-seq" type="data" label="Custom root sequence" format="txt" optional="true" help="Text file containing custom root sequence"/>
+                <param argument="--input-tree" type="data" label="Custom reference tree" format="json" optional="true" help="Auspice JSON v2 file containing custom reference tree"/>
+                <param argument="--input-gene-map" type="data" label="Custom gene map" format="json" optional="true" help="JSON file containing custom gene map. Gene map (sometimes also called 'gene annotations') is used to resolve aminoacid changes in genes"/>
+                <param argument="--input-pcr-primers" type="data" label="Custom PCR primer sites" format="json" optional="true" help="CSV file containing a list of custom PCR primer sites. These are used to report mutations in these sites"/>
+                <param argument="--input-virus-properties" type="data" label="Configuration and data specific to a pathogen" format="json" optional="true" help="For more info on the virus properties JSON file, see the Nextclade documentation."/>
+                <param argument="--include-reference" truevalue="--include-reference" falsevalue="" type="boolean" label="Include reference sequence in FASTA alignment output" help="The FASTA alignment is an optional output of nextclade. Select this option to include the reference sequence in that alignment file"/>
             </when>
             <when value="no">
             </when>
         </conditional>
     </inputs>
     <outputs>
-        <data name="report_tsv" format="tabular" label="${tool.name} on ${on_string} (TSV report)">
+        <data name="report_tsv" format="tabular" label="${tool.name} on ${on_string} (TSV report)">
             <filter>outputs and "report_tsv" in outputs</filter>
             <actions>
-                <action name="column_names" type="metadata" default="seqName,clade,Nextclade_pango,qc.overallScore,qc.overallStatus,totalSubstitutions,totalDeletions,totalInsertions,totalFrameShifts,totalAminoacidSubstitutions,totalAminoacidDeletions,totalAminoacidInsertions,totalMissing,totalNonACGTNs,totalPcrPrimerChanges,substitutions,deletions,insertions,privateNucMutations.reversionSubstitutions,privateNucMutations.labeledSubstitutions,privateNucMutations.unlabeledSubstitutions,privateNucMutations.totalReversionSubstitutions,privateNucMutations.totalLabeledSubstitutions,privateNucMutations.totalUnlabeledSubstitutions,privateNucMutations.totalPrivateSubstitutions,frameShifts,aaSubstitutions,aaDeletions,aaInsertions,missing,nonACGTNs,pcrPrimerChanges,alignmentScore,alignmentStart,alignmentEnd,qc.missingData.missingDataThreshold,qc.missingData.score,qc.missingData.status,qc.missingData.totalMissing,qc.mixedSites.mixedSitesThreshold,qc.mixedSites.score,qc.mixedSites.status,qc.mixedSites.totalMixedSites,qc.privateMutations.cutoff,qc.privateMutations.excess,qc.privateMutations.score,qc.privateMutations.status,qc.privateMutations.total,qc.snpClusters.clusteredSNPs,qc.snpClusters.score,qc.snpClusters.status,qc.snpClusters.totalSNPs,qc.frameShifts.frameShifts,qc.frameShifts.totalFrameShifts,qc.frameShifts.frameShiftsIgnored,qc.frameShifts.totalFrameShiftsIgnored,qc.frameShifts.score,qc.frameShifts.status,qc.stopCodons.stopCodons,qc.stopCodons.totalStopCodons,qc.stopCodons.score,qc.stopCodons.status,isReverseComplement,failedGenes,warnings,errors" />
+                <conditional name="organism">
+                    <expand macro="column_metadata" dataset_name="sars-cov-2" extra_columns="Nextclade_pango,"/>
+                    <expand macro="column_metadata" dataset_name="sars-cov-2-no-recomb" extra_columns="Nextclade_pango,"/>
+                    <expand macro="column_metadata" dataset_name="sars-cov-2-21L" extra_columns="Nextclade_pango,partiallyAliased,immune_escape,ace2_binding,"/>
+                    <expand macro="column_metadata" dataset_name="flu_h1n1pdm_ha"/>
+                    <expand macro="column_metadata" dataset_name="flu_h3n2_ha"/>
+                    <expand macro="column_metadata" dataset_name="flu_vic_ha"/>
+                    <expand macro="column_metadata" dataset_name="flu_yam_ha"/>
+                    <expand macro="column_metadata" dataset_name="MPXV" extra_columns="outbreak,lineage,"/>
+                    <expand macro="column_metadata" dataset_name="hMPXV" extra_columns="outbreak,lineage,"/>
+                    <expand macro="column_metadata" dataset_name="hMPXV_B1" extra_columns="outbreak,lineage,"/>
+                </conditional>
             </actions>
         </data>
         <data name="report_json" format="json" label="${tool.name} on ${on_string} (JSON report)">
@@ -145,92 +156,94 @@
     </outputs>
     <tests>
         <test expect_num_outputs="1">
-            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta" />
+            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta"/>
             <conditional name="db">
-                <param name="source" value="download" />
+                <param name="source" value="download"/>
             </conditional>
-            <param name="outputs" value="report_tsv" />
-            <param name="organism" value="sars-cov-2" />
+            <param name="outputs" value="report_tsv"/>
+            <param name="organism" value="sars-cov-2"/>
             <output name="report_tsv">
                 <assert_contents>
-                    <has_n_columns n="66" />
-                    <has_text text="20A" />
+                    <has_n_columns n="67"/>
+                    <has_text text="20A"/>
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="3">
-            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta" />
+            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta"/>
             <conditional name="db">
-                <param name="source" value="download" />
+                <param name="source" value="download"/>
             </conditional>
-            <param name="outputs" value="report_tsv,report_json,output_tree" />
-            <param name="organism" value="sars-cov-2" />
+            <param name="outputs" value="report_tsv,report_json,output_tree"/>
+            <param name="organism" value="sars-cov-2"/>
             <output name="report_tsv">
                 <assert_contents>
-                    <has_n_columns n="66" />
-                    <has_text text="20A" />
+                    <has_n_columns n="67"/>
+                    <has_text text="20A"/>
                 </assert_contents>
+                <metadata name="column_names" value="seqName,clade,Nextclade_pango,qc.overallScore,qc.overallStatus,totalSubstitutions,totalDeletions,totalInsertions,totalFrameShifts,totalAminoacidSubstitutions,totalAminoacidDeletions,totalAminoacidInsertions,totalMissing,totalNonACGTNs,totalPcrPrimerChanges,substitutions,deletions,insertions,privateNucMutations.reversionSubstitutions,privateNucMutations.labeledSubstitutions,privateNucMutations.unlabeledSubstitutions,privateNucMutations.totalReversionSubstitutions,privateNucMutations.totalLabeledSubstitutions,privateNucMutations.totalUnlabeledSubstitutions,privateNucMutations.totalPrivateSubstitutions,frameShifts,aaSubstitutions,aaDeletions,aaInsertions,missing,nonACGTNs,pcrPrimerChanges,alignmentScore,alignmentStart,alignmentEnd,coverage,qc.missingData.missingDataThreshold,qc.missingData.score,qc.missingData.status,qc.missingData.totalMissing,qc.mixedSites.mixedSitesThreshold,qc.mixedSites.score,qc.mixedSites.status,qc.mixedSites.totalMixedSites,qc.privateMutations.cutoff,qc.privateMutations.excess,qc.privateMutations.score,qc.privateMutations.status,qc.privateMutations.total,qc.snpClusters.clusteredSNPs,qc.snpClusters.score,qc.snpClusters.status,qc.snpClusters.totalSNPs,qc.frameShifts.frameShifts,qc.frameShifts.totalFrameShifts,qc.frameShifts.frameShiftsIgnored,qc.frameShifts.totalFrameShiftsIgnored,qc.frameShifts.score,qc.frameShifts.status,qc.stopCodons.stopCodons,qc.stopCodons.totalStopCodons,qc.stopCodons.score,qc.stopCodons.status,isReverseComplement,failedGenes,warnings,errors"/>
             </output>
             <output name="report_json">
                 <assert_contents>
-                    <has_text text='"pos": 240,' />
+                    <has_text text="&quot;pos&quot;: 240,"/>
                 </assert_contents>
             </output>
             <output name="output_tree">
                 <assert_contents>
-                    <has_text text='"title": "QC Status"' />
+                    <has_text text="&quot;title&quot;: &quot;QC Status&quot;"/>
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="1">
-            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta" />
+            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta"/>
             <conditional name="db">
-                <param name="source" value="download" />
+                <param name="source" value="download"/>
             </conditional>
-            <param name="outputs" value="report_tsv" />
-            <param name="organism" value="sars-cov-2" />
+            <param name="outputs" value="report_tsv"/>
+            <param name="organism" value="sars-cov-2"/>
             <conditional name="adv">
-                <param name="advanced_options" value="yes" />
-                <param name="input_qc_config" value="strict_qc.json" ftype="json" />
+                <param name="advanced_options" value="yes"/>
+                <param name="input_qc_config" value="strict_qc.json" ftype="json"/>
             </conditional>
             <output name="report_tsv">
                 <assert_contents>
-                    <has_n_columns n="66" />
-                    <has_text text="mediocre" />
+                    <has_n_columns n="67"/>
+                    <has_text text="mediocre"/>
                 </assert_contents>
             </output>
         </test>
         <test expect_num_outputs="2">
-            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta" />
+            <param name="input_fasta" value="sarscov2_1.fasta" ftype="fasta"/>
             <conditional name="db">
-                <param name="source" value="download" />
+                <param name="source" value="download"/>
             </conditional>
-            <param name="outputs" value="report_tsv,output_fasta" />
-            <param name="organism" value="sars-cov-2" />
+            <param name="outputs" value="report_tsv,output_fasta"/>
+            <param name="organism" value="sars-cov-2"/>
             <conditional name="adv">
-                <param name="advanced_options" value="yes" />
-                <param name="include_reference" value="true" />
+                <param name="advanced_options" value="yes"/>
+                <param name="include_reference" value="true"/>
             </conditional>
             <output name="report_tsv">
                 <assert_contents>
-                    <has_n_columns n="66" />
-                    <has_text text="mediocre" />
+                    <has_n_columns n="67"/>
+                    <has_text text="mediocre"/>
                 </assert_contents>
             </output>
-            <output name="output_fasta" value="output_alignment.fasta" ftype="fasta" />
+            <output name="output_fasta" value="output_alignment.fasta" ftype="fasta"/>
         </test>
         <test expect_num_outputs="1">
-            <param name="input_fasta" value="mpxv.fasta.gz" ftype="fasta.gz" />
+            <param name="input_fasta" value="mpxv.fasta.gz" ftype="fasta.gz"/>
             <conditional name="db">
-                <param name="source" value="download" />
+                <param name="source" value="download"/>
             </conditional>
-            <param name="outputs" value="report_tsv" />
-            <param name="organism" value="MPXV" />
+            <param name="outputs" value="report_tsv"/>
+            <param name="organism" value="MPXV"/>
             <output name="report_tsv">
                 <assert_contents>
-                    <has_n_columns n="67" />
-                    <has_text text="hMPXV-1" />
+                    <has_n_columns n="68"/>
+                    <has_text text="hMPXV-1"/>
                 </assert_contents>
+                <metadata name="column_names" value="seqName,clade,outbreak,lineage,qc.overallScore,qc.overallStatus,totalSubstitutions,totalDeletions,totalInsertions,totalFrameShifts,totalAminoacidSubstitutions,totalAminoacidDeletions,totalAminoacidInsertions,totalMissing,totalNonACGTNs,totalPcrPrimerChanges,substitutions,deletions,insertions,privateNucMutations.reversionSubstitutions,privateNucMutations.labeledSubstitutions,privateNucMutations.unlabeledSubstitutions,privateNucMutations.totalReversionSubstitutions,privateNucMutations.totalLabeledSubstitutions,privateNucMutations.totalUnlabeledSubstitutions,privateNucMutations.totalPrivateSubstitutions,frameShifts,aaSubstitutions,aaDeletions,aaInsertions,missing,nonACGTNs,pcrPrimerChanges,alignmentScore,alignmentStart,alignmentEnd,coverage,qc.missingData.missingDataThreshold,qc.missingData.score,qc.missingData.status,qc.missingData.totalMissing,qc.mixedSites.mixedSitesThreshold,qc.mixedSites.score,qc.mixedSites.status,qc.mixedSites.totalMixedSites,qc.privateMutations.cutoff,qc.privateMutations.excess,qc.privateMutations.score,qc.privateMutations.status,qc.privateMutations.total,qc.snpClusters.clusteredSNPs,qc.snpClusters.score,qc.snpClusters.status,qc.snpClusters.totalSNPs,qc.frameShifts.frameShifts,qc.frameShifts.totalFrameShifts,qc.frameShifts.frameShiftsIgnored,qc.frameShifts.totalFrameShiftsIgnored,qc.frameShifts.score,qc.frameShifts.status,qc.stopCodons.stopCodons,qc.stopCodons.totalStopCodons,qc.stopCodons.score,qc.stopCodons.status,isReverseComplement,failedGenes,warnings,errors"/>
             </output>
         </test>
     </tests>
@@ -261,5 +274,5 @@
 .. _Nextclade: https://github.com/nextstrain/nextclade

     ]]></help>
-    <expand macro="citations" />
+    <expand macro="citations"/>
 </tool>