changeset 10:480d9e9d156b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit dccd4c0668ec8ce1b1c8fd9cef4dcf3a99dc78b3
author iuc
date Fri, 27 Oct 2023 19:44:40 +0000
parents a62c4a11a67d
children f803a68fc9e5
files README.md README.rst ena_upload.xml extract_tables.py samples_macros.xml test-data/metadata_test_nonviral.xlsx test-data/metadata_test_nonviral_1_run.xlsx tool-data/geographic_location_1.loc.sample tool-data/host_sex_1.loc.sample tool-data/instrument_model.loc.sample tool-data/library_selection.loc.sample tool-data/library_source.loc.sample tool-data/library_strategy.loc.sample tool-data/platform.loc.sample tool-data/study_type.loc.sample tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 17 files changed, 319 insertions(+), 297 deletions(-) [+]
line wrap: on
line diff
--- a/README.md	Thu Nov 10 15:18:00 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-# ena-upload-wrapper
-Galaxy wrapper for ena-cli-upload
-
-This tool is shipped in a ready to use Galaxy container found [here](https://github.com/ELIXIR-Belgium/ena-upload-container).
-
-## Setting up credentials on Galaxy
-
-The admin of the server can set up global credentials through a file with the format:
-```
-username: webin_id
-password: webin_password
-```
-
-The path to this file must be exported as an environment variable called $GALAXY_ENA_SECRETS
-
-Alternatively, the admin can enable users to set their own credentials for this tool.
-To enable it, make sure the file `config/user_preferences_extra_conf.yml` has the following section:
-
-```
-    ena_webin_account:
-        description: Your ENA Webin account details
-        inputs:
-            - name: webin_id
-              label: ENA Webin ID
-              type: text
-              required: False
-            - name: password
-              label: Password
-              type:  password
-              required: False
-```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Fri Oct 27 19:44:40 2023 +0000
@@ -0,0 +1,42 @@
+ENA-upload-cli wrapper
+======================
+
+Galaxy wrapper of the
+`ena-upload-cli <https://github.com/usegalaxy-eu/ena-upload-cli>`__.
+Templates that can be used in as input for the metadata can be found
+`here <https://github.com/ELIXIR-Belgium/ENA-metadata-templates>`__
+
+This tool is shipped in a ready to use Galaxy container found
+`here <https://github.com/ELIXIR-Belgium/ena-upload-container>`__.
+
+Setting up credentials on Galaxy
+--------------------------------
+
+The admin of the server can set up global credentials through a file
+with the format:
+
+.. code-block:: yaml
+
+   username: webin_id
+   password: webin_password
+
+The path to this file must be exported as an environment variable called
+$GALAXY_ENA_SECRETS
+
+Alternatively, the admin can enable users to set their own credentials
+for this tool. To enable it, make sure the file
+``config/user_preferences_extra_conf.yml`` has the following section:
+
+.. code-block:: yaml
+
+       ena_webin_account:
+           description: Your ENA Webin account details
+           inputs:
+               - name: webin_id
+                 label: ENA Webin ID
+                 type: text
+                 required: False
+               - name: password
+                 label: Password
+                 type:  password
+                 required: False
--- a/ena_upload.xml	Thu Nov 10 15:18:00 2022 +0000
+++ b/ena_upload.xml	Fri Oct 27 19:44:40 2023 +0000
@@ -1,10 +1,11 @@
-<tool id="ena_upload" name="ENA Upload tool" version="@VERSION@" profile="20.01" license="MIT">
+<tool id="ena_upload" name="ENA Upload tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05" license="MIT">
     <macros>
-        <token name="@VERSION@">0.6.1</token>
+        <token name="@TOOL_VERSION@">0.6.3</token>
+        <token name="@VERSION_SUFFIX@">0</token>
         <import>samples_macros.xml</import>
     </macros>
     <requirements>
-        <requirement type="package" version="@VERSION@">ena-upload-cli</requirement>
+        <requirement type="package" version="@TOOL_VERSION@">ena-upload-cli</requirement>
     </requirements>
     <stdio>
         <regex match="Oops" source="stderr" level="fatal"/>
@@ -47,7 +48,7 @@
 #if $action_options.input_format_conditional.input_format == "excel_tables":
     ln -s '$action_options.input_format_conditional.xlsx_file' ./xlsx_input.xlsx &&
 #end if
-#if $action_options.test_submit_parameters.dry_run == "false" and $action_options.test_submit == "False":
+#if $action_options.test_submit_parameters.dry_run == "False" and $action_options.test_submit == "False":
     webin_id=`grep 'username' $credentials`;
     if [ "\$webin_id" = "" ]; then
       ## No credentials in user defined preferences    
@@ -57,7 +58,7 @@
           credentials_path=\${GALAXY_ENA_SECRETS};     
           webin_id=`grep 'username' \$GALAXY_ENA_SECRETS`;
           if [ "\$webin_id" = "" ]; then
-              echo "No global credentials defined. Check your GALAXY_ENA_SECRETS file or set your credentials via: User -> Preferences -> Manage Information";
+                echo "No global credentials defined. Check your GALAXY_ENA_SECRETS file or set your credentials via: User -> Preferences -> Manage Information";
               exit 1;
           fi;
       #else:
@@ -80,7 +81,7 @@
           #for $run in $experiment.rep_runs:
             #for $file in $run.upload_files:
                 #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
-                #if $action_options.input_format_conditional.add_extension == "true":
+                #if $action_options.input_format_conditional.add_extension == "True":
                     #set $extension = '.fastq'
                 #else
                     #set $extension = ''
@@ -90,7 +91,7 @@
                     #set $safename_reads_file = $safename_reads_file + $extension + '.gz'
                     gzip -c '$file' > $safename_reads_file &&
                 #else:
-                    #if $action_options.input_format_conditional.add_extension == "true":
+                    #if $action_options.input_format_conditional.add_extension == "True":
                         #if $file.is_of_type('fastq.gz', 'fastqsanger.gz'):
                             #set $compression = '.gz' 
                         #elif $file.is_of_type('fastqsanger.bz2', 'fastq.bz2'):
@@ -148,14 +149,14 @@
             #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
             #if $file.is_of_type('fastq', 'fastqsanger'):
                 ## always compress add the gz extension
-                #if $action_options.input_format_conditional.run_input_format_conditional.add_extension == "true":
+                #if $action_options.input_format_conditional.run_input_format_conditional.add_extension == "True":
                     #set $safename_reads_file = $safename_reads_file + '.fastq.gz'
                 #else
                     #set $safename_reads_file = $safename_reads_file + '.gz'
-                #end if   
+                #end if
                 gzip -c '$file' > $safename_reads_file &&
             #else
-                #if $action_options.input_format_conditional.run_input_format_conditional.add_extension == "true":
+                #if $action_options.input_format_conditional.run_input_format_conditional.add_extension == "True":
                     #if $file.is_of_type('fastq.gz', 'fastqsanger.gz'):
                         #set $extension = '.fastq.gz'
                     #elif $file.is_of_type('fastqsanger.bz2', 'fastq.bz2'):
@@ -172,7 +173,7 @@
 
 #if $action_options.action == "add":
 ena-upload-cli
-    --tool 'ena-upload-cli v@VERSION@ @ Galaxy'
+    --tool 'ena-upload-cli v@TOOL_VERSION@ @ Galaxy'
     --action '$action_options.action'
     --center '$action_options.center'
     --secret \${credentials_path}
@@ -180,7 +181,7 @@
     #for $dataset in $files_to_upload:
         '$dataset'
     #end for
-#if $action_options.test_submit_parameters.dry_run == "true":
+#if $action_options.test_submit_parameters.dry_run == "True":
     --draft
 #end if
 #if $action_options.input_format_conditional.input_format != "excel_tables":
@@ -196,17 +197,17 @@
         --checklist $action_options.input_format_conditional.checklist_id
 #else:
     #if $action_options.input_format_conditional.input_format == "build_tables":
-        #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true":
+        #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "True":
           --checklist ERC000033
         #end if
     #else:
           --checklist $action_options.input_format_conditional.checklist_id
     #end if
 #end if
-#if $action_options.idempotent == "true":
+#if $action_options.idempotent == "True":
     --auto_action
 #end if
-#if $action_options.test_submit_parameters.submit_dev == "true":
+#if $action_options.test_submit_parameters.submit_dev == "True":
     -d
 #end if
     >> '$output';
@@ -216,7 +217,7 @@
 
 #if $action_options.action == "modify":
     ena-upload-cli
-    --tool 'ena-upload-cli v@VERSION@ @ Galaxy'
+    --tool 'ena-upload-cli v@TOOL_VERSION@ @ Galaxy'
     --action '$action_options.action'
     --center '$action_options.center'
     --secret \${credentials_path}
@@ -224,7 +225,7 @@
     #for $dataset in $files_to_upload:
         '$dataset'
     #end for
-#if $action_options.test_submit_parameters.dry_run == "true":
+#if $action_options.test_submit_parameters.dry_run == "True":
     --draft
 #end if
 #if $action_options.input_format_conditional.input_format != "excel_tables":
@@ -241,7 +242,7 @@
         --checklist $action_options.input_format_conditional.checklist_id
 #else:
     #if $action_options.input_format_conditional.input_format == "build_tables":
-        #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true":
+        #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "True":
           --checklist ERC000033
         #end if
     #else:
@@ -251,7 +252,7 @@
     >> '$output';
 #end if
 
-#if $action_options.test_submit_parameters.dry_run == "false":
+#if $action_options.test_submit_parameters.dry_run == "False":
     echo -e 'center_name\t$action_options.center' >> '$output';
     echo -e 'action_option\t$action_options.action' >> '$output';
 #end if
@@ -305,12 +306,10 @@
     
 $experiments.append({'title':str($experiment.experiment_title),'experiment_design':str($experiment.experiment_design),'library_strategy':str($experiment.library_strategy),'library_source':str($experiment.library_source),'library_selection':str($experiment.library_selection),'library_layout':str($experiment.library_layout),'insert_size':str($experiment.insert_size),'library_construction_protocol':str($experiment.library_construction_protocol),'platform':str($experiment.platform),'instrument_model':str($experiment.instrument_model),'runs':$runs})
       #end for
-      #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true":
-
+      #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "True":
 $samples.append({'title':str($sample.sample_title),'description':str($sample.sample_description),'tax_name':str($sample.scientific_name),'tax_id':str($sample.tax_id),'collection_date':str($sample.collection_date),'geo_location':str($sample.geo_location_country),'host_common_name':str($sample.host_common_name),'host_subject_id':str($sample.host_subject_id),'host_health_state':str($sample.host_health_state),'host_sex':str($sample.host_sex),'host_scientific_name':str($sample.host_scientific_name),'collector_name':str($sample.collector_name),'collecting_institution':str($sample.collecting_institution),'isolate':str($sample.isolate),'experiments':$experiments})
       #else:
-
-$samples.append({'title':str($sample.sample_title),'description':str($sample.sample_description),'tax_name':str($sample.scientific_name),'tax_id':str($sample.tax_id),'experiments':$experiments})
+$samples.append({'title':str($sample.sample_title),'description':str($sample.sample_description),'tax_name':str($sample.scientific_name),'tax_id':str($sample.tax_id),'collection_date':str($sample.collection_date),'geo_location':str($sample.geo_location_country),'experiments':$experiments})
       #end if
     #end for
     $studies.append({'title':str($study.study_title),'type':str($study.study_type),'abstract':str($study.study_abstract),'pubmed_id':str($study.study_pubmed_id),'samples':$samples})
@@ -326,7 +325,7 @@
                 <option value="modify">Modify metadata</option>
             </param>
             <when value="add">
-                <param type="boolean" name="idempotent" checked="False" label="Idempotent submissions: only add the elements that were not submitted before" help="NOTE: this feature is based on a beta parameter of the CLI that checks the remote ENA repository for entries using the alias. This remote detection can have false positives, i.e assumes that it's present in the repository but it's not"/>
+                <param type="boolean" name="idempotent" checked="False" label="Idempotent submissions: only add the elements that were not submitted before" help="NOTE: this feature is based on a beta parameter of the CLI that checks the remote ENA repository for entries using the alias. This remote detection can have False positives, i.e assumes that it's present in the repository but it's not"/>
                 <expand macro="test_submit_section"/>    
                 <param name="test_submit" type="hidden" value="False" />
                 <expand macro="table_inputs_macro" />
@@ -355,19 +354,19 @@
     </outputs>
     <tests>
        <!--Test 1:  excel input of VIRAL samples -->
-        <test>
+        <test expect_num_outputs="5">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="true" />
+                    <param name="submit_dev" value="False" />
+                    <param name="dry_run" value="True" />
                 </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="excel_tables"/>
                     <param name="checklist_id" value="ERC000033"/>
                     <param name="xlsx_file" value="metadata_test_viral.xlsx"/>
                     <conditional name="run_input_format_conditional">
-                        <param name="add_extension" value="false"/>
+                        <param name="add_extension" value="False"/>
                         <param name="run_input_format" value="multiple_selection_list"/>
                         <param name="data" value="C030_exp5_clean.fastq,C053_exp5_clean.fastq,C026_exp5_clean.fastq,C067_exp5_clean.fastq"/>
                     </conditional>
@@ -405,19 +404,19 @@
             </output>
         </test>
         <!--Test 2: excel input of VIRAL samples with extended columns-->
-        <test>
+        <test expect_num_outputs="5">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="true" />
+                    <param name="submit_dev" value="False" />
+                    <param name="dry_run" value="True" />
                 </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="excel_tables"/>
                     <param name="checklist_id" value="ERC000033"/>
                     <param name="xlsx_file" value="metadata_test_viral_optional_columns.xlsx"/>
                     <conditional name="run_input_format_conditional">
-                        <param name="add_extension" value="false"/>
+                        <param name="add_extension" value="False"/>
                         <param name="run_input_format" value="multiple_selection_list"/>
                         <param name="data" value="C030_exp5_clean.fastq,C053_exp5_clean.fastq,C026_exp5_clean.fastq,C067_exp5_clean.fastq"/>
                     </conditional>
@@ -465,19 +464,19 @@
             </output>
         </test>
         <!--Test 3: excel input of NON-VIRAL samples-->
-        <test>
+        <test expect_num_outputs="5">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="true" />
+                    <param name="submit_dev" value="False" />
+                    <param name="dry_run" value="True" />
                 </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="excel_tables"/>
                     <param name="checklist_id" value="ERC000011"/>
                     <param name="xlsx_file" value="metadata_test_nonviral_1_run.xlsx"/>
                     <conditional name="run_input_format_conditional">
-                        <param name="add_extension" value="true"/>
+                        <param name="add_extension" value="True"/>
                         <param name="run_input_format" value="multiple_selection_list"/>
                         <param name="data" value="sample_no_extension"/>
                     </conditional>
@@ -501,8 +500,8 @@
             <output name="samples_table_out">
                 <assert_contents>
                     <has_n_lines n="2"/>
-                    <has_n_columns n="8"/>
-                    <has_line_matching expression="alias\ttitle\tscientific_name\tsample_description\taccession\tsubmission_date\tstatus\ttaxon_id"/>
+                    <has_n_columns n="10"/>
+                    <has_line_matching expression="s_20201007_026\tC026\tSaccharomyces cerevisiae\tSample from beer\t2020-03-25\tAntarctica\t\t\tADDED\t"/>
                 </assert_contents>
             </output>
             <output name="runs_table_out">
@@ -515,15 +514,15 @@
             </output>
         </test>
         <!--Test 4: failure on excel input of NON-VIRAL samples with runs PAIRED collection -->
-        <test expect_failure="true">
+        <test expect_failure="True">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="false" />
+                    <param name="submit_dev" value="False" />
+                    <param name="dry_run" value="False" />
                 </section>
                 <conditional name="input_format_conditional">
-                    <param name="add_extension" value="true"/>
+                    <param name="add_extension" value="True"/>
                     <param name="input_format" value="excel_tables"/>
                     <param name="checklist_id" value="ERC000011"/>
                     <param name="xlsx_file" value="metadata_test_nonviral.xlsx"/>
@@ -550,16 +549,16 @@
             </assert_command>
         </test>
         <!--Test 4b: failure on excel input of NON-VIRAL samples with runs PAIRED collection + Idempotent submission -->
-        <test expect_failure="true">
+        <test expect_failure="True">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="false" />
+                    <param name="submit_dev" value="False" />
+                    <param name="dry_run" value="False" />
                 </section>
-                <param name="idempotent" value="true"/>
+                <param name="idempotent" value="True"/>
                 <conditional name="input_format_conditional">
-                    <param name="add_extension" value="true"/>
+                    <param name="add_extension" value="True"/>
                     <param name="input_format" value="excel_tables"/>
                     <param name="checklist_id" value="ERC000011"/>
                     <param name="xlsx_file" value="metadata_test_nonviral.xlsx"/>
@@ -587,16 +586,16 @@
             </assert_command>
         </test>
         <!--Test 5: build tables from user input fields NON-VIRAL samples-->
-        <test>
+        <test expect_num_outputs="5">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="true" />
+                    <param name="submit_dev" value="False" />
+                    <param name="dry_run" value="True" />
                 </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="build_tables"/>
-                    <param name="add_extension" value="false"/>
+                    <param name="add_extension" value="False"/>
                     <conditional name="conditional_viral_metadata">
                         <param name="viral_sample" value="False"/>
                         <repeat name="rep_study">
@@ -609,6 +608,8 @@
                                 <param name="sample_description" value="Test Sample description"/>
                                 <param name="scientific_name" value="Test Sample scientific name"/>
                                 <param name="tax_id" value="2697049"/>
+                                <param name="collection_date" value="2020"/>
+                                <param name="geo_location_country" value="Belgium"/>
                                 <repeat name="rep_experiment">
                                     <param name="experiment_title" value="Test experiment title"/>
                                     <param name="experiment_design" value="Test experiment design description"/>
@@ -646,7 +647,7 @@
             <output name="samples_table_out">
                 <assert_contents>
                     <has_n_lines n="2"/>
-                    <has_n_columns n="8"/>
+                    <has_n_columns n="10"/>
                 </assert_contents>
             </output>
             <output name="runs_table_out">
@@ -657,19 +658,21 @@
                 </assert_contents>
             </output>
         </test>
-        <!--Test 6: RUN failing build tables from user input fields NON-VIRAL samples-->
-        <test expect_failure="true">
+        <!--Test 6: with submit_test to skip credentials checksRUN failing build tables from user input fields NON-VIRAL samples
+            also tests compression of uncompressed inputs and adding the .gz suffix -->
+        <test expect_failure="False" expect_num_outputs="5">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="true" />
-                    <param name="dry_run" value="false" />
+                    <param name="submit_dev" value="True" />
+                    <param name="dry_run" value="True" />
                 </section>
+                <param name="test_submit" value="True"/>
                 <conditional name="input_format_conditional">
+                    <param name="add_extension" value="False"/>
                     <param name="input_format" value="build_tables"/>
-                    <param name="add_extension" value="true"/>
                     <conditional name="conditional_viral_metadata">
-                        <param name="viral_sample" value="false"/>
+                        <param name="viral_sample" value="False"/>
                         <repeat name="rep_study">
                             <param name="study_title" value="Test study title"/>
                             <param name="study_abstract" value="Test study abstract"/>
@@ -680,56 +683,8 @@
                                 <param name="sample_description" value="Test Sample description"/>
                                 <param name="scientific_name" value="Test Sample scientific name"/>
                                 <param name="tax_id" value="2697049"/>
-                                <repeat name="rep_experiment">
-                                    <param name="experiment_title" value="Test experiment title"/>
-                                    <param name="experiment_design" value="Test experiment design description"/>
-                                    <param name="library_strategy" value="CTS"/>
-                                    <param name="library_source" value="GENOMIC"/>
-                                    <param name="library_selection" value="PCR"/>
-                                    <param name="library_layout" value="SINGLE"/>
-                                    <param name="insert_size" value="150"/>
-                                    <param name="library_construction_protocol" value="Test library construction"/>
-                                    <param name="platform" value="ILLUMINA"/>
-                                    <param name="instrument_model" value="Illumina HiSeq 4000"/>
-                                    <repeat name="rep_runs">
-                                        <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger"/>
-                                    </repeat>
-                                </repeat>
-                            </repeat>
-                        </repeat>
-                    </conditional>
-                </conditional>
-            </conditional>
-            <param name="center" value="Some research center"/>
-            <assert_stdout>
-                <has_text_matching expression="No ENA credentials defined"/>
-            </assert_stdout>
-        </test>
-        <!--Test 7: with submit_test to skip credentials checksRUN failing build tables from user input fields NON-VIRAL samples
-            also tests compression of uncompressed inputs and adding the .gz suffix -->
-        <test expect_failure="false">
-            <conditional name="action_options">
-                <param name="action" value="add"/>
-                <section name="test_submit_parameters">
-                    <param name="submit_dev" value="true" />
-                    <param name="dry_run" value="true" />
-                </section>
-                <param name="test_submit" value="True"/>
-                <conditional name="input_format_conditional">
-                    <param name="add_extension" value="false"/>
-                    <param name="input_format" value="build_tables"/>
-                    <conditional name="conditional_viral_metadata">
-                        <param name="viral_sample" value="false"/>
-                        <repeat name="rep_study">
-                            <param name="study_title" value="Test study title"/>
-                            <param name="study_abstract" value="Test study abstract"/>
-                            <param name="study_type" value="Epigenetics"/>
-                            <param name="study_pubmed_id" value="Test study pubmedID"/>
-                            <repeat name="rep_sample">
-                                <param name="sample_title" value="Test Sample title"/>
-                                <param name="sample_description" value="Test Sample description"/>
-                                <param name="scientific_name" value="Test Sample scientific name"/>
-                                <param name="tax_id" value="2697049"/>
+                                <param name="collection_date" value="2020"/>
+                                <param name="geo_location_country" value="Belgium"/>
                                 <repeat name="rep_experiment">
                                     <param name="experiment_title" value="Test experiment title"/>
                                     <param name="experiment_design" value="Test experiment design description"/>
@@ -759,20 +714,20 @@
                 <not_has_text text="modify" />
             </assert_command>
         </test>
-        <!--Test 8: viral submission - User input metadata - Add extension = False-->
-        <test expect_failure="false">
+        <!--Test 7: viral submission - User input metadata - Add extension = False-->
+        <test expect_failure="False" expect_num_outputs="5">
             <conditional name="action_options">
                 <param name="action" value="add"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="true" />
+                    <param name="submit_dev" value="False" />
+                    <param name="dry_run" value="True" />
                 </section>
                 <param name="test_submit" value="True"/>
                 <conditional name="input_format_conditional">
-                    <param name="add_extension" value="false"/>
+                    <param name="add_extension" value="False"/>
                     <param name="input_format" value="build_tables"/>
                     <conditional name="conditional_viral_metadata">
-                        <param name="viral_sample" value="true"/>
+                        <param name="viral_sample" value="True"/>
                         <repeat name="rep_study">
                             <param name="study_title" value="Test study title"/>
                             <param name="study_abstract" value="Test study abstract"/>
@@ -822,17 +777,17 @@
                 <has_text_matching expression="--checklist ERC000033"/>
             </assert_command>
         </test>
-        <!--Test 9: modify option and auto compression - viral submission - User input metadata-->
-        <test expect_failure="false">
+        <!--Test 8: modify option and auto compression - viral submission - User input metadata-->
+        <test expect_failure="False" expect_num_outputs="5">
             <conditional name="action_options">
                 <param name="action" value="modify"/>
                 <section name="test_submit_parameters">
-                    <param name="submit_dev" value="false" />
-                    <param name="dry_run" value="true" />
+                    <param name="submit_dev" value="False" />
+                    <param name="dry_run" value="True" />
                 </section>
                 <param name="test_submit" value="True"/>
                 <conditional name="input_format_conditional">
-                    <param name="add_extension" value="fasle"/>
+                    <param name="add_extension" value="False"/>
                     <param name="input_format" value="build_tables"/>
                     <conditional name="conditional_viral_metadata">
                         <param name="viral_sample" value="True"/>
@@ -869,7 +824,7 @@
                                     <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                     <repeat name="rep_runs">
                                         <param name="run_base_name" value="run_from_hospital_X"/>
-                                        <param name="upload_files" value="1.fastqsanger.gz" ftype="fastqsanger.gz"/>
+                                        <param name="upload_files" value="1.fastqsanger.gz,2.fastqsanger.gz" ftype="fastqsanger.gz"/>
                                     </repeat>
                                 </repeat>
                             </repeat>
@@ -880,7 +835,7 @@
             <param name="center" value="Some research center"/>
             <assert_command>
                 <has_text_matching expression="ena-upload-cli"/>
-                <has_text_matching expression="--data '1.fastqsanger.gz'"/>
+                <has_text_matching expression="--data '1.fastqsanger.gz' '2.fastqsanger.gz'"/>
                 <has_text_matching expression="--action 'modify' --center 'Some research center'"/>
                 <has_text_matching expression="--checklist ERC000033"/>
                 <not_has_text text="add" />
@@ -889,9 +844,9 @@
     </tests>
     <help><![CDATA[
         This is a wrapper for the ENA upload tool in https://github.com/usegalaxy-eu/ena-upload-cli
-        The input metadata can be submitted following the tabular format of the templates in https://github.com/usegalaxy-eu/ena-upload-cli/tree/master/example_tables
-        It is also possible to submit an excel file by following the template in https://github.com/ELIXIR-Belgium/ENA-metadata-templates
+        The input metadata can be submitted following the tabular format of the templates or their excel spreadsheet equivalent in https://github.com/ELIXIR-Belgium/ENA-metadata-templates. This template repo provides ready to use sheets for every ENA sample checklist and is automatically updated.
     ]]></help>
     <citations>
+        <citation type="doi">doi:10.1093/bioinformatics/btab421</citation>
     </citations>
 </tool>
--- a/extract_tables.py	Thu Nov 10 15:18:00 2022 +0000
+++ b/extract_tables.py	Fri Oct 27 19:44:40 2023 +0000
@@ -40,21 +40,20 @@
 for study_index, study in enumerate(studies_dict):
     study_alias = 'study_' + str(study_index) + '_' + timestamp
     studies_table.write('\t'.join([study_alias, action, study['title'],
-                                   study['type'], study['abstract'], study['pubmed_id'],
-                                   ]))
-    if "geo_location" in study['samples'][0].keys():           # sample belongs to a viral sample
+                                   study['type'], study['abstract'], study['pubmed_id']]))
+    if "host_subject_id" in study['samples'][0].keys():           # sample belongs to a viral sample
         samples_table.write('\t'.join(['alias', 'status', 'title', 'scientific_name',
                                        'taxon_id', 'sample_description', 'collection date',
                                        'geographic location (country and/or sea)', 'host common name', 'host subject id',
                                        'host health state', 'host sex', 'host scientific name',
-                                       'collector name', 'collecting institution', 'isolate',
-                                       ]) + '\n')
+                                       'collector name', 'collecting institution', 'isolate']) + '\n')
     else:
         samples_table.write('\t'.join(['alias', 'status', 'title', 'scientific_name',
-                                       'taxon_id', 'sample_description']) + '\n')
+                                       'taxon_id', 'sample_description', 'collection date',
+                                       'geographic location (country and/or sea)']) + '\n')
     for sample_index, sample in enumerate(study['samples']):
         sample_alias = 'sample_' + str(sample_index) + '_' + timestamp
-        if "geo_location" in sample.keys():  # sample belongs to a viral sample
+        if "host_subject_id" in sample.keys():  # sample belongs to a viral sample
             if sample['collector_name'] == '':
                 sample['collector_name'] = 'unknown'
             samples_table.write('\t'.join([sample_alias, action, sample['title'],
@@ -64,12 +63,13 @@
                                            sample['host_subject_id'], sample['host_health_state'],
                                            sample['host_sex'], sample['host_scientific_name'],
                                            sample['collector_name'],
-                                           sample['collecting_institution'], sample['isolate'],
+                                           sample['collecting_institution'], sample['isolate']
                                            ]) + '\n')
         else:
             samples_table.write('\t'.join([sample_alias, action, sample['title'],
                                            sample['tax_name'], sample['tax_id'],
-                                           sample['description']]) + '\n')
+                                           sample['description'], sample['collection_date'],
+                                           sample['geo_location']]) + '\n')
         for exp_index, exp in enumerate(sample['experiments']):
             exp_alias = 'experiment_' + str(exp_index) + '.' + str(sample_index) + '_' + timestamp
             lib_alias = 'library_' + str(exp_index) + '_' + str(sample_index)
@@ -79,8 +79,7 @@
                                                exp['library_source'], exp['library_selection'],
                                                exp['library_layout'].lower(), exp['insert_size'],
                                                exp['library_construction_protocol'],
-                                               exp['platform'], exp['instrument_model'],
-                                               ]) + '\n')
+                                               exp['platform'], exp['instrument_model']]) + '\n')
             run_index = 0
             # exp['runs'] is a list of lists
             for (base_run, run_files) in exp['runs']:
--- a/samples_macros.xml	Thu Nov 10 15:18:00 2022 +0000
+++ b/samples_macros.xml	Fri Oct 27 19:44:40 2023 +0000
@@ -44,9 +44,9 @@
         </param>
     </xml>
     <xml name="test_submit_section">
-        <section name="test_submit_parameters" expanded="true" title="Testing options">
-            <param name="submit_dev" type="boolean" truevalue="true" falsevalue="false" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted to the ENA test server. Uploads to test platform will not be public and will be removed in 24hrs. Performing a preliminary test upload is advised to check for errors with metadata structure. You can find the uploads to the test platform at https://wwwdev.ebi.ac.uk/ena/" />
-            <param name="dry_run" type="boolean" truevalue="true" falsevalue="false" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/>
+        <section name="test_submit_parameters" expanded="True" title="Testing options">
+            <param name="submit_dev" type="boolean" truevalue="True" falsevalue="False" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted to the ENA test server. Uploads to test platform will not be public and will be removed in 24hrs. Performing a preliminary test upload is advised to check for errors with metadata structure. You can find the uploads to the test platform at https://wwwdev.ebi.ac.uk/ena/" />
+            <param name="dry_run" type="boolean" truevalue="True" falsevalue="False" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/>
         </section>
     </xml>
     <xml name="run_inputs_macro">
@@ -57,7 +57,7 @@
             </param>
             <when value="multiple_selection_list">
                 <param name="add_extension" type="boolean" checked="False" label="Add .fastq.(gz,.bz2) extension to the Galaxy dataset names to match the ones described in the input tables?"/>
-                <param name="data" type="data" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="Select individual datasets or a dataset collection" help="Names should match the compressed run's files names defined in the metadata"/>
+                <param name="data" type="data" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="True" label="Select individual datasets or a dataset collection" help="Names should match the compressed run's files names defined in the metadata"/>
             </when>
             <when value="paired_list">
                 <param name="paired_end_collection" collection_type="list:paired" type="data_collection" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" label="List of paired-end runs files" help="Names should match the compressed run's files names defined in the metadata" />
@@ -68,7 +68,7 @@
         <conditional name="input_format_conditional">
             <param name="input_format" type="select" label="Would you like to submit pregenerated table files or interactively define the input structures?">
                 <option value="excel_tables" selected="True">User generated metadata tables based on Excel template</option>
-                <option value="build_tables" selected="False">Interactive generation of the study structure (recommended for small studies)</option>
+                <option value="build_tables" selected="False">Interactive generation of the study structure (only recommended for small studies)</option>
                 <option value="user_generated_tables" selected="False">User generated tabular files (studies - samples - experiments - runs) </option>
             </param>
             <when value="excel_tables">
@@ -81,19 +81,22 @@
                 <!--<param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />-->
                 <expand macro="checklist_input_macro"/>
                 <expand macro="run_inputs_macro" />
-                <param name="studies_users_table" type="data" format="tabular" multiple="false" label="Studies table" help="Studies metadata file"/>
-                <param name="samples_users_table" type="data" format="tabular" multiple="false" label="Samples table" help="Samples metadata file"/>
-                <param name="experiments_users_table" type="data" format="tabular" multiple="false" label="Experiments table" help="Experiments metadata file"/>
-                <param name="runs_users_table" type="data" format="tabular" multiple="false" label="Runs table" help="Runs metadata file"/>
+                <param name="studies_users_table" type="data" format="tabular" multiple="False" label="Studies table" help="Studies metadata file"/>
+                <param name="samples_users_table" type="data" format="tabular" multiple="False" label="Samples table" help="Samples metadata file"/>
+                <param name="experiments_users_table" type="data" format="tabular" multiple="False" label="Experiments table" help="Experiments metadata file"/>
+                <param name="runs_users_table" type="data" format="tabular" multiple="False" label="Runs table" help="Runs metadata file"/>
             </when>
             <when value="build_tables">
-                <param name="add_extension" type="boolean" checked="false" label="Add .fastq.(gz.bz2) extension to the Galaxy dataset names to match the ones described in the input tables?"/>
+                <param name="add_extension" type="boolean" checked="False" label="Add .fastq.(gz.bz2) extension to the Galaxy dataset names to match the ones described in the input tables?"/>
                 <conditional name="conditional_viral_metadata">
-                    <param name="viral_sample" type="boolean" truevalue="true" falsevalue="false" label="Does your submission contains viral samples?" />
-                    <when value="true">
+                    <param name="viral_sample" type="select" label="Select your sample type" help="Non-viral interactive submissions lack specific sample checklist attributes and are suboptimal. It's highly recommended to use Excel or TSV templates as input for a more comprehensive submission.">
+                        <option value="True" selected="True">Viral</option>
+                        <option value="False" selected="False">Non viral</option>
+                    </param>
+                    <when value="True">
                         <expand macro="viral_samples" />
                     </when>
-                    <when value="false">
+                    <when value="False">
                         <expand macro="nonviral_samples" />
                     </when>
                 </conditional>
@@ -106,9 +109,9 @@
             <param name="study_title" type="text" optional="False" label="Please provide a short descriptive title for the study"/>
             <param name="study_abstract" type="text" optional="True" label="Please provide an abstract to describe the study in detail"/>
             <param name="study_type" type="select" label="Please select the type of study">
-                    <options from_data_table="study_type">
-                        <column name="value" index="0"/>
-                    </options>
+                <options from_data_table="study_type">
+                    <column name="value" index="0"/>
+                </options>
             </param>
             <param name="study_pubmed_id" type="text" optional="True" value="" label="Please provide the PubMed id if exists (or leave it blank)"/>
             <repeat name="rep_sample" title="Samples associated with this study" min="1" >
@@ -146,17 +149,17 @@
                 <repeat name="rep_experiment" title="Sequencing experiments performed with this sample" min="1" >
                     <param name="experiment_title" type="text" label="Specify an experiment title" />
                     <param name="experiment_design" type="text" label="Describe the experiment design" />
-                    <param name="library_strategy" type="select" label="Library strategy" help="The library strategy specifies the sequencing technique intended for this library">
+                    <param name="library_strategy" type="select" label="Library strategy" help="The library strategy specifies the sequencing technique intended for this library">
                         <options from_data_table="library_strategy">
                             <column name="value" index="0"/>
                         </options>
                     </param>
-                    <param name="library_source" type="select" label="Select library source" help="The library source specifies the type of source material that is being sequenced">
+                    <param name="library_source" type="select" label="Select library source" help="The library source specifies the type of source material that is being sequenced">
                         <options from_data_table="library_source">
                             <column name="value" index="0"/>
                         </options>
                     </param>
-                    <param name="library_selection" type="select" label="Library selection" help="The library selection specifies whether any method was used to select for or against, enrich, or screen the material being sequenced">
+                    <param name="library_selection" type="select" label="Library selection" help="The library selection specifies whether any method was used to select for or against, enrich, or screen the material being sequenced">
                         <options from_data_table="library_selection">
                             <column name="value" index="0"/>
                         </options>
@@ -169,16 +172,9 @@
                     <param name="insert_size" type="integer" value="0" label="Specify the insert size"/>
                     <param name="library_construction_protocol" type="text" label="Please describe the library construction protocol"/>
                     <param name="platform" type="select" label="Select the sequencing platform used">
-                        <option value="LS454">LS454</option>
-                        <option value="ILLUMINA">Illumina</option>
-                        <option value="HELICOS">Helicos</option>
-                        <option value="ABI_SOLID">ABI Solid</option>
-                        <option value="COMPLETE_GENOMICS">Complete Genomics</option>
-                        <option value="BGISEQ">BGI Seq</option>
-                        <option value="OXFORD_NANOPORE">Oxford Nanopore</option>
-                        <option value="PACBIO_SMRT">PacBio</option>
-                        <option value="ION_TORRENT">Ion Torrent</option>
-                        <option value="CAPILLARY">Capillary sequencing</option>
+                        <options from_data_table="platform">
+                            <column name="value" index="0"/>
+                        </options>
                     </param>
                     <param name="instrument_model" type="select" label="Instrument model">
                         <options from_data_table="instrument_model">
@@ -187,75 +183,79 @@
                     </param>
                     <repeat name="rep_runs" title="Runs executed within this experiment" min="1" >
                         <param name="run_base_name" type="text" optional="False" value="" label="Run alias" help="If an alias is not provided it will be generated combining the sample and experiment indexes"/>
-                        <param name="upload_files" type="data" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="File(s) associated with this run"/>
+                        <param name="upload_files" type="data" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="True" label="File(s) associated with this run"/>
                     </repeat>
                 </repeat>
             </repeat>
         </repeat>
     </xml>
     <xml name="nonviral_samples">
-    <repeat name="rep_study" title="Study" min="1">
-        <param name="study_title" type="text" optional="False" label="Please provide a short descriptive title for the study"/>
-        <param name="study_abstract" type="text" optional="True" label="Please provide an abstract to describe the study in detail"/>
-        <param name="study_type" type="select" label="Please select the type of study">
-                <options from_data_table="study_type">
-                    <column name="value" index="0"/>
-                </options>
-        </param>
-        <param name="study_pubmed_id" type="text" optional="True" value="" label="Please provide the PubMed id if exists (or leave it blank)"/>
-        <repeat name="rep_sample" title="Samples associated with this study" min="1" >
-            <param name="sample_title" type="text" label="Sample title"/>
-            <param name="sample_description" type="text" help="e.g: liver cells" label="Describe the type of sample"/>
-            <param name="scientific_name" type="text" label="Enter the species of the sample" help="e.g Severe acute respiratory syndrome coronavirus 2"/>
-            <param name="tax_id" type="integer" value="0" label="Enter the taxonomic ID corresponding to the sample species" />
-            <repeat name="rep_experiment" title="Sequencing experiments performed with this sample" min="1" >
-                <param name="experiment_title" type="text" label="Specify an experiment title" />
-                <param name="experiment_design" type="text" label="Describe the experiment design" />
-                <param name="library_strategy" type="select" label="Library strategy" help="The library strategy specifies the sequencing technique intended for this library">
-                    <options from_data_table="library_strategy">
+        <repeat name="rep_study" title="Study" min="1">
+            <param name="study_title" type="text" optional="False" label="Please provide a short descriptive title for the study"/>
+            <param name="study_abstract" type="text" optional="True" label="Please provide an abstract to describe the study in detail"/>
+            <param name="study_type" type="select" label="Please select the type of study">
+                    <options from_data_table="study_type">
                         <column name="value" index="0"/>
                     </options>
+            </param>
+            <param name="study_pubmed_id" type="text" optional="True" value="" label="Please provide the PubMed id if exists (or leave it blank)"/>
+            <repeat name="rep_sample" title="Samples associated with this study" min="1" >
+                <param name="sample_title" type="text" label="Sample title"/>
+                <param name="sample_description" type="text" help="e.g: liver cells" label="Describe the type of sample"/>
+                <param name="scientific_name" type="text" label="Enter the species of the sample" help="e.g Severe acute respiratory syndrome coronavirus 2"/>
+                <param name="tax_id" type="integer" value="0" label="Enter the taxonomic ID corresponding to the sample species" />
+                <param name="collection_date" type="text" label="Collection date" optional="True" help="options are: YYYY, YYYY-MM, YYYY-MM-DD, not collected, restricted access or not provided">
+                    <option value="not collected">not collected</option>
+                    <option value="restricted access">restricted access</option>
+                    <option value="not provided">not provided</option>
+                    <validator type="regex"  message="Data format is not valid">(^[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?(/[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?)?$)|(^not collected$)|(^not provided$)|(^restricted access$)</validator>
                 </param>
-                <param name="library_source" type="select" label="Select library source" help="The library source specifies the type of source material that is being sequenced">
-                    <options from_data_table="library_source">
-                        <column name="value" index="0"/>
-                    </options>
-                </param>
-                <param name="library_selection" type="select" label="Library selection" help="The library selection specifies whether any method was used to select for or against, enrich, or screen the material being sequenced">
-                    <options from_data_table="library_selection">
+                <param name="geo_location_country" type="select" label="Select the country where the sample was obtained">
+                    <options from_data_table="geographic_location_1">
                         <column name="value" index="0"/>
                     </options>
                 </param>
-                <param name="library_layout" type="select" label="Library layout">
-                    <options from_data_table="library_layout">
-                        <column name="value" index="0"/>
-                    </options>
-                </param>
-                <param name="insert_size" type="integer" value="0" label="Specify the insert size"/>
-                <param name="library_construction_protocol" type="text" label="Please describe the library construction protocol"/>
-                <param name="platform" type="select" label="Select the sequencing platform used">
-                    <option value="LS454">LS454</option>
-                    <option value="ILLUMINA" selected="True">Illumina</option>
-                    <option value="HELICOS">Helicos</option>
-                    <option value="ABI_SOLID">ABI Solid</option>
-                    <option value="COMPLETE_GENOMICS">Complete Genomics</option>
-                    <option value="BGISEQ">BGI Seq</option>
-                    <option value="OXFORD_NANOPORE">Oxford Nanopore</option>
-                    <option value="PACBIO_SMRT">PacBio</option>
-                    <option value="ION_TORRENT">Ion Torrent</option>
-                    <option value="CAPILLARY">Capillary sequencing</option>
-                </param>
-                <param name="instrument_model" type="select" label="Instrument model">
-                    <options from_data_table="instrument_model">
-                        <column name="value" index="0"/>
-                    </options>
-                </param>
-                <repeat name="rep_runs" title="Runs executed within this experiment" min="1" >
-                    <param name="run_base_name" type="text" optional="False" value="" label="Run alias" help="If an alias is not provided it will be generated combining the sample and experiment indexes"/>
-                    <param name="upload_files" type="data" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="File(s) associated with this run"/>
+                <repeat name="rep_experiment" title="Sequencing experiments performed with this sample" min="1" >
+                    <param name="experiment_title" type="text" label="Specify an experiment title" />
+                    <param name="experiment_design" type="text" label="Describe the experiment design" />
+                    <param name="library_strategy" type="select" label="Library strategy" help="The library strategy specifies the sequencing technique intended for this library">
+                        <options from_data_table="library_strategy">
+                            <column name="value" index="0"/>
+                        </options>
+                    </param>
+                    <param name="library_source" type="select" label="Select library source" help="The library source specifies the type of source material that is being sequenced">
+                        <options from_data_table="library_source">
+                            <column name="value" index="0"/>
+                        </options>
+                    </param>
+                    <param name="library_selection" type="select" label="Library selection" help="The library selection specifies whether any method was used to select for or against, enrich, or screen the material being sequenced">
+                        <options from_data_table="library_selection">
+                            <column name="value" index="0"/>
+                        </options>
+                    </param>
+                    <param name="library_layout" type="select" label="Library layout">
+                        <options from_data_table="library_layout">
+                            <column name="value" index="0"/>
+                        </options>
+                    </param>
+                    <param name="insert_size" type="integer" value="0" label="Specify the insert size"/>
+                    <param name="library_construction_protocol" type="text" label="Please describe the library construction protocol"/>
+                    <param name="platform" type="select" label="Select the sequencing platform used">
+                        <options from_data_table="platform">
+                            <column name="value" index="0"/>
+                        </options>
+                    </param>
+                    <param name="instrument_model" type="select" label="Instrument model">
+                        <options from_data_table="instrument_model">
+                            <column name="value" index="0"/>
+                        </options>
+                    </param>
+                    <repeat name="rep_runs" title="Runs executed within this experiment" min="1" >
+                        <param name="run_base_name" type="text" optional="False" value="" label="Run alias" help="If an alias is not provided it will be generated combining the sample and experiment indexes"/>
+                        <param name="upload_files" type="data" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="True" label="File(s) associated with this run"/>
+                    </repeat>
                 </repeat>
             </repeat>
         </repeat>
-    </repeat>
     </xml>
 </macros>
Binary file test-data/metadata_test_nonviral.xlsx has changed
Binary file test-data/metadata_test_nonviral_1_run.xlsx has changed
--- a/tool-data/geographic_location_1.loc.sample	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool-data/geographic_location_1.loc.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -1,7 +1,3 @@
-not applicable
-not collected
-not provided
-restricted access
 Afghanistan
 Albania
 Algeria
@@ -277,3 +273,15 @@
 Yemen
 Zambia
 Zimbabwe
+missing: control sample
+missing: data agreement established pre-2023
+missing: endangered species
+missing: human-identifiable
+missing: lab stock
+missing: sample group
+missing: synthetic construct
+missing: third party data
+not applicable
+not collected
+not provided
+restricted access
--- a/tool-data/host_sex_1.loc.sample	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool-data/host_sex_1.loc.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -1,9 +1,17 @@
-other
+female
+hermaphrodite
+male
+missing: control sample
+missing: data agreement established pre-2023
+missing: endangered species
+missing: human-identifiable
+missing: lab stock
+missing: sample group
+missing: synthetic construct
+missing: third party data
+neuter
 not applicable
-not provided
-neuter
 not collected
-female
+not provided
+other
 restricted access
-male
-hermaphrodite
--- a/tool-data/instrument_model.loc.sample	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool-data/instrument_model.loc.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -1,58 +1,75 @@
-minION
-GridION
-PromethION
 454 GS
 454 GS 20
 454 GS FLX
+454 GS FLX Titanium
 454 GS FLX+
-454 GS FLX Titanium
 454 GS Junior
+AB 310 Genetic Analyzer
+AB 3130 Genetic Analyzer
+AB 3130xL Genetic Analyzer
+AB 3500 Genetic Analyzer
+AB 3500xL Genetic Analyzer
+AB 3730 Genetic Analyzer
+AB 3730xL Genetic Analyzer
+AB 5500 Genetic Analyzer
+AB 5500xl Genetic Analyzer
+AB 5500xl-W Genetic Analysis System
+AB SOLiD 3 Plus System
+AB SOLiD 4 System
+AB SOLiD 4hq System
+AB SOLiD PI System
+AB SOLiD System
+AB SOLiD System 2.0
+AB SOLiD System 3.0
+BGISEQ-50
+BGISEQ-500
+Complete Genomics
+DNBSEQ-G400
+DNBSEQ-G400 FAST
+DNBSEQ-G50
+DNBSEQ-T7
+Element AVITI
+GridION
+Helicos HeliScope
+HiSeq X Five
+HiSeq X Ten
 Illumina Genome Analyzer
 Illumina Genome Analyzer II
 Illumina Genome Analyzer IIx
+Illumina HiScanSQ
 Illumina HiSeq 1000
 Illumina HiSeq 1500
 Illumina HiSeq 2000
 Illumina HiSeq 2500
 Illumina HiSeq 3000
 Illumina HiSeq 4000
-Illumina iSeq 100
-Illumina HiScanSQ
-Illumina NextSeq 500
-Illumina NextSeq 550
-Illumina NovaSeq 6000
-Illumina HiSeq X Five
-Illumina HiSeq X Ten
+Illumina HiSeq X
 Illumina MiSeq
 Illumina MiniSeq
-AB SOLiD System
-AB SOLiD System 2.0
-AB SOLiD System 3.0
-AB SOLiD 3 Plus System
-AB SOLiD 4 System
-AB SOLiD 4hq System
-AB SOLiD PI System
-AB 5500 Genetic Analyzer
-AB 5500xl Genetic Analyzer
-AB 5500xl-W Genetic Analysis System
+Illumina NovaSeq 6000
+Illumina NovaSeq X
+Illumina iSeq 100
+Ion GeneStudio S5
+Ion GeneStudio S5 Plus
+Ion GeneStudio S5 Prime
+Ion Torrent Genexus
 Ion Torrent PGM
 Ion Torrent Proton
 Ion Torrent S5
 Ion Torrent S5 XL
-Complete Genomics
+MGISEQ-2000RS
+MinION
+NextSeq 1000
+NextSeq 2000
+NextSeq 500
+NextSeq 550
+Onso
 PacBio RS
 PacBio RS II
+PromethION
+Revio
 Sequel
 Sequel II
-AB 3730xL Genetic Analyzer
-AB 3730 Genetic Analyzer
-AB 3500xL Genetic Analyzer
-AB 3500 Genetic Analyzer
-AB 3130xL Genetic Analyzer
-AB 3130 Genetic Analyzer
-AB 310 Genetic Analyzer
-BGISEQ-500
-DNBSEQ-T7
-DNBSEQ-G400
-DNBSEQ-G50
-DNBSEQ-G400 FAST
+Sequel IIe
+UG 100
+unspecified
--- a/tool-data/library_selection.loc.sample	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool-data/library_selection.loc.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -8,7 +8,14 @@
 size fractionation
 MSLL
 cDNA
+cDNA_randomPriming
+cDNA_oligo_dT
+PolyA
+Oligo-dT
+Inverse rRNA
+Inverse rRNA selection
 ChIP
+ChIP-Seq
 MNase
 DNase
 Hybrid Selection
@@ -20,8 +27,5 @@
 RACE
 MDA
 padlock probes capture method
-Oligo-dT
-Inverse rRNA selection
-ChIP-Seq
 other
 unspecified
--- a/tool-data/library_source.loc.sample	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool-data/library_source.loc.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -1,5 +1,7 @@
 GENOMIC
+GENOMIC SINGLE CELL
 TRANSCRIPTOMIC
+TRANSCRIPTOMIC SINGLE CELL
 METAGENOMIC
 METATRANSCRIPTOMIC
 SYNTHETIC
--- a/tool-data/library_strategy.loc.sample	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool-data/library_strategy.loc.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -3,6 +3,7 @@
 WXS
 RNA-Seq
 ssRNA-seq
+snRNA-seq
 miRNA-Seq
 ncRNA-Seq
 FL-cDNA
@@ -33,6 +34,8 @@
 Synthetic-Long-Read
 Targeted-Capture
 Tethered Chromatin Conformation Capture
+NOMe-Seq
 ChM-Seq
 GBS
+Ribo-Seq
 OTHER
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/platform.loc.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -0,0 +1,13 @@
+LS454
+ILLUMINA
+HELICOS
+ABI_SOLID
+COMPLETE_GENOMICS
+BGISEQ
+OXFORD_NANOPORE
+PACBIO_SMRT
+ION_TORRENT
+CAPILLARY
+DNBSEQ
+ELEMENT
+ULTIMA
--- a/tool-data/study_type.loc.sample	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool-data/study_type.loc.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -12,3 +12,4 @@
 Exome Sequencing
 Pooled Clone Sequencing
 Transcriptome Sequencing
+Other
--- a/tool_data_table_conf.xml.sample	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool_data_table_conf.xml.sample	Fri Oct 27 19:44:40 2023 +0000
@@ -4,10 +4,10 @@
         <table name="library_layout" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/library_layout.loc" /></table>
         <table name="library_selection" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/library_selection.loc" /></table>
         <table name="instrument_model" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/instrument_model.loc" /></table>
+        <table name="platform" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/platform.loc" /></table>
         <table name="library_source" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/library_source.loc" /></table>
         <table name="library_strategy" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/library_strategy.loc" /></table>
         <table name="geographic_location_1" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/geographic_location_1.loc" /></table>
         <table name="host_sex_1" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/host_sex_1.loc" /></table>
         <table name="host_health_state_1" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="tool-data/host_health_state_1.loc" /></table>
     </tables>
-
--- a/tool_data_table_conf.xml.test	Thu Nov 10 15:18:00 2022 +0000
+++ b/tool_data_table_conf.xml.test	Fri Oct 27 19:44:40 2023 +0000
@@ -4,6 +4,7 @@
     <table name="library_layout" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="${__HERE__}/tool-data/library_layout.loc.sample" /></table>
     <table name="library_selection" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="${__HERE__}/tool-data/library_selection.loc.sample" /></table>
     <table name="instrument_model" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="${__HERE__}/tool-data/instrument_model.loc.sample" /></table>
+    <table name="platform" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="${__HERE__}/tool-data/platform.loc.sample" /></table>
     <table name="library_source" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="${__HERE__}/tool-data/library_source.loc.sample" /></table>
     <table name="library_strategy" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="${__HERE__}/tool-data/library_strategy.loc.sample" /></table>
     <table name="geographic_location_1" comment_char="#" allow_duplicate_entries="False"><columns>value</columns><file path="${__HERE__}/tool-data/geographic_location_1.loc.sample" /></table>