changeset 1:57251c760cab draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit ffea061c1ad6e7291abfe220230dbdbe8d19a2bd"
author iuc
date Fri, 30 Apr 2021 12:09:25 +0000
parents 382518f24d6d
children 9e2df763086c
files ena_upload.xml extract_tables.py process_xlsx.py samples_macros.xml test-data/2.fastqsanger.gz
diffstat 5 files changed, 263 insertions(+), 87 deletions(-) [+]
line wrap: on
line diff
--- a/ena_upload.xml	Sat Nov 28 09:45:44 2020 +0000
+++ b/ena_upload.xml	Fri Apr 30 12:09:25 2021 +0000
@@ -1,24 +1,31 @@
-<tool id="ena_upload" name="ENA Upload tool" version="0.3" profile="20.01" license="MIT">
+<tool id="ena_upload" name="ENA Upload tool" version="0.3.1" profile="20.01" license="MIT">
     <macros>
-        <token name="@VERSION@">0.2.4</token>
+        <token name="@VERSION@">0.2.7</token>
         <import>samples_macros.xml</import>
     </macros>
     <requirements>
         <requirement type="package" version="@VERSION@">ena-upload-cli</requirement>
         <requirement type="package" version="1.2.0">xlrd</requirement>
     </requirements>
+    <stdio>
+        <regex match="Oops" source="stderr" level="fatal"/>
+        <regex match="different file names between command line and RUN table" source="stderr" level="fatal"/>
+    </stdio>
     <command detect_errors="exit_code"><![CDATA[
 mkdir ./submission_files; 
+#set $studies_table_path = './submission_files/studies.tsv'
+#set $samples_table_path =   './submission_files/samples.tsv'
+#set $experiments_table_path = './submission_files/experiments.tsv'
+#set $runs_table_path =  './submission_files/runs.tsv'
+        
+#set $studies_table_path_updated = './submission_files/studies_updated.tsv'
+#set $samples_table_path_updated =   './submission_files/samples_updated.tsv'
+#set $experiments_table_path_updated = './submission_files/experiments_updated.tsv'
+#set $runs_table_path_updated =  './submission_files/runs_updated.tsv'
 
 #set working_dir = os.getcwd()
-#set $dry_run_option = "False"
-#set viral_submission = "False"
 #if $action_options.input_format_conditional.input_format == "build_tables":
   python '$__tool_directory__/extract_tables.py' --action $action_options.action --out_dir ./submission_files --studies $studies_json;
-  #set $studies_table_path = './submission_files/studies.tsv'
-  #set $samples_table_path =   './submission_files/samples.tsv'
-  #set $experiments_table_path = './submission_files/experiments.tsv'
-  #set $runs_table_path =  './submission_files/runs.tsv'
 #end if
 
 #if $action_options.input_format_conditional.input_format == "excel_tables":
@@ -26,35 +33,30 @@
     #if $action_options.input_format_conditional.viral_submission == "true":
         --vir 
     #end if
-    --action '$action_options.action' --form '$action_options.input_format_conditional.xlsx_file' --out_dir ./submission_files ;
-    #set $studies_table_path = './submission_files/studies.tsv'
-    #set $samples_table_path =   './submission_files/samples.tsv'
-    #set $experiments_table_path = './submission_files/experiments.tsv'
-    #set $runs_table_path =  './submission_files/runs.tsv'
-    #if $action_options.input_format_conditional.dry_run == "true":
-      #set $dry_run_option = "True"
-    #end if
+    --action '$action_options.action' --form '$action_options.input_format_conditional.xlsx_file' --out_dir ./submission_files --verbose > '$output';
 #end if
 
 #if $action_options.input_format_conditional.input_format != "user_generated_tables":
-    cp $studies_table_path $studies_table_out;
-    cp $samples_table_path $samples_table_out;
-    cp $experiments_table_path $experiments_table_out;
-    cp $runs_table_path $runs_table_out;
-    #if $action_options.input_format_conditional.dry_run == "true":
-      #set $dry_run_option = "True"
-    #end if
+    cp $studies_table_path $studies_table_out &&
+    cp $samples_table_path $samples_table_out &&
+    cp $experiments_table_path $experiments_table_out &&
+    cp $runs_table_path $runs_table_out &&
+#else:
+    ln -s '$action_options.input_format_conditional.experiments_users_table' $experiments_table_path &&
+    ln -s '$action_options.input_format_conditional.studies_users_table' $studies_table_path &&
+    ln -s '$action_options.input_format_conditional.runs_users_table' $runs_table_path &&
+    ln -s '$action_options.input_format_conditional.samples_users_table' $samples_table_path &&
 #end if
 
 
-#if $dry_run_option == "False" and $action_options.test_submit == "False":
+#if $action_options.test_submit_parameters.dry_run == "false" and $action_options.test_submit == "False":
     webin_id=`grep 'username' $credentials`;
     if [ "\$webin_id" = "" ]; then
       ## No credentials in user defined preferences    
       ## Fallback to global defined credentials (if exist)   
       #import os
       #if os.path.isfile(os.environ.get('GALAXY_ENA_SECRETS', '')):
-          credentials_path=\${ENA_SECRETS};     
+          credentials_path=\${GALAXY_ENA_SECRETS};     
           webin_id=`grep 'username' \$GALAXY_ENA_SECRETS`;
           if [ "\$webin_id" = "" ]; then
               echo "No global credentials defined. Check your GALAXY_ENA_SECRETS file or set your credentials via: User -> Preferences -> Manage Information";
@@ -91,15 +93,41 @@
       #end for
     #end for
 #else:
-    #for $file in $action_options.input_format_conditional.data:
-        #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
-        ln -s '$file' $safename_reads_file &&
-        $files_to_upload.append(str($safename_reads_file))
-    #end for
+    #if $action_options.input_format_conditional.run_input_format_conditional.run_input_format == 'paired_list':
+        #for $pair in $action_options.input_format_conditional.run_input_format_conditional.paired_end_collection:
+            #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $pair.name)
+            #if $pair.forward.is_of_type('fastq.gz', 'fastqsanger.gz'):
+                #set $safename_fwd_reads_file = $safename_reads_file + '_1.fastq.gz'
+            #elif $pair.forward.is_of_type('fastqsanger.bz2', 'fastq.bz2'):
+                #set $safename_fwd_reads_file = $safename_reads_file + '_1.fastq.bz2'
+            #else:
+                #set $safename_fwd_reads_file = $safename_reads_file + '_1.fastq'
+            #end if
+            #if $pair.reverse.is_of_type('fastq.gz', 'fastqsanger.gz'):
+                #set $safename_rev_reads_file = $safename_reads_file + '_2.fastq.gz'
+            #elif $pair.reverse.is_of_type('fastqsanger.bz2', 'fastq.bz2'):
+                #set $safename_rev_reads_file = $safename_reads_file + '_2.fastq.bz2'
+            #else:
+                #set $safename_rev_reads_file = $safename_reads_file + '_2.fastq'
+            #end if
+
+            ln -s '$pair.forward' $safename_fwd_reads_file &&
+            $files_to_upload.append(str($safename_fwd_reads_file))
+            ln -s '$pair.reverse' $safename_rev_reads_file &&
+            $files_to_upload.append(str($safename_rev_reads_file))
+        #end for
+    #end if
+    #if $action_options.input_format_conditional.run_input_format_conditional.run_input_format == 'multiple_selection_list':
+        #for $file in $action_options.input_format_conditional.run_input_format_conditional.data:
+            #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
+            ln -s '$file' $safename_reads_file &&
+            $files_to_upload.append(str($safename_reads_file))
+        #end for
+    #end if
 #end if
 
 
-#if $dry_run_option == "False":
+#if $action_options.test_submit_parameters.dry_run == "false":
 ena-upload-cli
     --tool 'ena-upload-cli v@VERSION@ @ Galaxy'
     --action '$action_options.action'
@@ -109,19 +137,15 @@
     #for $dataset in $files_to_upload:
         '$dataset'
     #end for
+--experiment '$experiments_table_path'
+--study '$studies_table_path'
+--run '$runs_table_path'
+--sample '$samples_table_path'
 #if $action_options.input_format_conditional.input_format == "user_generated_tables":
-    --experiment '$action_options.input_format_conditional.experiments_users_table'
-    --study '$action_options.input_format_conditional.studies_users_table'
-    --run '$action_options.input_format_conditional.runs_users_table'
-    --sample '$action_options.input_format_conditional.samples_users_table'
     #if "$action_options.input_format_conditional.viral_submission" == "true":
         --vir
     #end if
 #else:
-    --experiment '$experiments_table_path'
-    --study '$studies_table_path'
-    --run '$runs_table_path'
-    --sample '$samples_table_path'
     #if $action_options.input_format_conditional.input_format == "build_tables":
         #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true":
           --vir
@@ -133,12 +157,19 @@
     #end if
 #end if
 
-#if $action_options.submit_dev == "true":
+#if $action_options.test_submit_parameters.submit_dev == "true":
     -d
 #end if
-  > '$output'
+    >> '$output';
+    echo -e 'center_name\t$action_options.center' >> '$output';
+    echo -e 'action_option\t$action_options.action' >> '$output';
+    #if $action_options.input_format_conditional.input_format != "user_generated_tables":
+        cp $studies_table_path_updated $studies_table_out 2>/dev/null;
+        cp $samples_table_path_updated $samples_table_out 2>/dev/null;
+        cp $experiments_table_path_updated $experiments_table_out 2>/dev/null;
+        cp $runs_table_path_updated $runs_table_out 2>/dev/null;
+    #end if
 #else:
-    echo ""
     exit 0;
 #end if
 
@@ -170,9 +201,9 @@
               #set $safename_reads_file = re.sub('[^\w\-_\.]', '_', $file.element_identifier)
               $run_files.append(str($safename_reads_file))
             #end for
-            $runs.append($run_files)
+            $runs.append((str($run.run_base_name),$run_files))
         #end for
-
+    
 $experiments.append({'title':str($experiment.experiment_title),'experiment_design':str($experiment.experiment_design),'library_strategy':str($experiment.library_strategy),'library_source':str($experiment.library_source),'library_selection':str($experiment.library_selection),'library_layout':str($experiment.library_layout),'insert_size':str($experiment.insert_size),'library_construction_protocol':str($experiment.library_construction_protocol),'platform':str($experiment.platform),'instrument_model':str($experiment.instrument_model),'runs':$runs})
       #end for
       #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "true":
@@ -196,17 +227,18 @@
                 <option value="modify">Modify metadata</option>
             </param>
             <when value="add">
-                <param name="submit_dev" type="boolean" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted " />
+                <expand macro="test_submit_section"/>    
                 <param name="test_submit" type="hidden" value="False" />
                 <expand macro="table_inputs_macro" />
             </when>
             <when value="modify">
+                <expand macro="test_submit_section"/>    
                 <expand macro="table_inputs_macro" />
             </when>
         </conditional>
     </inputs>
     <outputs>
-        <data name="output" format="data" label="${tool.name} on ${on_string}: Upload summary"/>
+        <data name="output" format="txt" label="ENA submission receipt"/>
         <data name="studies_table_out" format="tabular" label="Studies table">
             <filter> action_options['input_format_conditional']['input_format'] == "build_tables" or action_options['input_format_conditional']['input_format'] == "excel_tables"</filter>
         </data>
@@ -225,13 +257,18 @@
         <test>
             <conditional name="action_options">
                 <param name="action" value="add"/>
-                <param name="submit_dev" value="False"/>
+                <section name="test_submit_parameters">
+                    <param name="submit_dev" value="false" />
+                    <param name="dry_run" value="true" />
+                </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="excel_tables"/>
                     <param name="viral_submission" value="True"/>
-                    <param name="dry_run" value="True"/>
                     <param name="xlsx_file" value="metadata_test_viral.xlsx"/>
-                    <param name="data" value="sample.fq"/>
+                    <conditional name="run_input_format_conditional">
+                        <param name="run_input_format" value="multiple_selection_list"/>
+                        <param name="data" value="sample.fq"/>
+                    </conditional>
                 </conditional>
             </conditional>
             <param name="center" value="Some research center"/>
@@ -271,13 +308,18 @@
         <test>
             <conditional name="action_options">
                 <param name="action" value="add"/>
-                <param name="submit_dev" value="False"/>
+                <section name="test_submit_parameters">
+                    <param name="submit_dev" value="false" />
+                    <param name="dry_run" value="true" />
+                </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="excel_tables"/>
                     <param name="viral_submission" value="False"/>
-                    <param name="dry_run" value="True"/>
                     <param name="xlsx_file" value="metadata_test_nonviral.xlsx"/>
-                    <param name="data" value="sample.fq"/>
+                    <conditional name="run_input_format_conditional">
+                        <param name="run_input_format" value="multiple_selection_list"/>
+                        <param name="data" value="sample.fq"/>
+                    </conditional>
                 </conditional>
             </conditional>
             <param name="center" value="Some research center"/>
@@ -311,14 +353,50 @@
                 </assert_contents>
             </output>
         </test>
+        <!--Test failure on excel input of NON-VIRAL samples with runs PAIRED collection -->
+        <test expect_failure="true">
+            <conditional name="action_options">
+                <param name="action" value="add"/>
+                <section name="test_submit_parameters">
+                    <param name="submit_dev" value="false" />
+                    <param name="dry_run" value="false" />
+                </section>
+                <conditional name="input_format_conditional">
+                    <param name="input_format" value="excel_tables"/>
+                    <param name="viral_submission" value="False"/>
+                    <param name="xlsx_file" value="metadata_test_nonviral.xlsx"/>
+                    <conditional name="run_input_format_conditional">
+                        <param name="run_input_format" value="paired_list"/>
+                        <param name="paired_end_collection">
+                            <collection type="list:paired">
+                                <element name="paired_run_name">
+                                    <collection type="paired">
+                                        <element name="forward" value="1.fastqsanger.gz" ftype="fastqsanger.gz" />
+                                        <element name="reverse" value="2.fastqsanger.gz" ftype="fastqsanger.gz" />
+                                    </collection>
+                                </element>
+                            </collection>
+                        </param>
+                    </conditional>
+                </conditional>
+            </conditional>
+            <param name="center" value="Some research center"/>
+            <assert_command>
+                <has_text_matching expression="ena-upload-cli"/>
+                <has_text_matching expression="--data 'paired_run_name_1.fastq.gz' 'paired_run_name_2.fastq.gz'"/>
+                <has_text_matching expression="--action 'add' --center 'Some research center'"/>
+            </assert_command>
+        </test>
         <!--Test build tables from user input fields NON-VIRAL samples-->
         <test>
             <conditional name="action_options">
                 <param name="action" value="add"/>
-                <param name="submit_dev" value="False"/>
+                <section name="test_submit_parameters">
+                    <param name="submit_dev" value="false" />
+                    <param name="dry_run" value="true" />
+                </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="build_tables"/>
-                    <param name="dry_run" value="True"/>
                     <conditional name="conditional_viral_metadata">
                         <param name="viral_sample" value="False"/>
                         <repeat name="rep_study">
@@ -383,12 +461,14 @@
         <test expect_failure="true">
             <conditional name="action_options">
                 <param name="action" value="add"/>
-                <param name="submit_dev" value="True"/>
+                <section name="test_submit_parameters">
+                    <param name="submit_dev" value="true" />
+                    <param name="dry_run" value="false" />
+                </section>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="build_tables"/>
-                    <param name="dry_run" value="False"/>
                     <conditional name="conditional_viral_metadata">
-                        <param name="viral_sample" value="False"/>
+                        <param name="viral_sample" value="false"/>
                         <repeat name="rep_study">
                             <param name="study_title" value="Test study title"/>
                             <param name="study_abstract" value="Test study abstract"/>
@@ -428,13 +508,15 @@
         <test expect_failure="true">
             <conditional name="action_options">
                 <param name="action" value="add"/>
-                <param name="submit_dev" value="True"/>
+                <section name="test_submit_parameters">
+                    <param name="submit_dev" value="true" />
+                    <param name="dry_run" value="false" />
+                </section>
                 <param name="test_submit" value="True"/>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="build_tables"/>
-                    <param name="dry_run" value="False"/>
                     <conditional name="conditional_viral_metadata">
-                        <param name="viral_sample" value="False"/>
+                        <param name="viral_sample" value="false"/>
                         <repeat name="rep_study">
                             <param name="study_title" value="Test study title"/>
                             <param name="study_abstract" value="Test study abstract"/>
@@ -457,6 +539,7 @@
                                     <param name="platform" value="ILLUMINA"/>
                                     <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                     <repeat name="rep_runs">
+                                        <param name="run_base_name" value="run_from_hospital_X"/>
                                         <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger"/>
                                     </repeat>
                                 </repeat>
@@ -472,18 +555,20 @@
                 <has_text_matching expression="--action 'add' --center 'Some research center'"/>
             </assert_command>
             <assert_stderr>
-                <has_text_matching expression="ENA_upload: error: Oops, the file test_fake_path does not exist"/>
+                <has_text_matching expression="Oops, the file test_fake_path does not exist"/>
             </assert_stderr>
         </test>
         <!--test viral submission - User input metadata-->
         <test expect_failure="true">
             <conditional name="action_options">
                 <param name="action" value="add"/>
-                <param name="submit_dev" value="False"/>
+                <section name="test_submit_parameters">
+                    <param name="submit_dev" value="false" />
+                    <param name="dry_run" value="false" />
+                </section>
                 <param name="test_submit" value="True"/>
                 <conditional name="input_format_conditional">
                     <param name="input_format" value="build_tables"/>
-                    <param name="dry_run" value="False"/>
                     <conditional name="conditional_viral_metadata">
                         <param name="viral_sample" value="True"/>
                         <repeat name="rep_study">
@@ -518,6 +603,7 @@
                                     <param name="platform" value="ILLUMINA"/>
                                     <param name="instrument_model" value="Illumina HiSeq 4000"/>
                                     <repeat name="rep_runs">
+                                        <param name="run_base_name" value="run_from_hospital_X"/>
                                         <param name="upload_files" value="1.fastqsanger.gz,sample.fq" ftype="fastqsanger"/>
                                     </repeat>
                                 </repeat>
@@ -534,7 +620,7 @@
                 <has_text_matching expression="--vir"/>
             </assert_command>
             <assert_stderr>
-                <has_text_matching expression="ENA_upload: error: Oops, the file test_fake_path does not exist"/>
+                <has_text_matching expression="Oops, the file test_fake_path does not exist"/>
             </assert_stderr>
         </test>
     </tests>
--- a/extract_tables.py	Sat Nov 28 09:45:44 2020 +0000
+++ b/extract_tables.py	Fri Apr 30 12:09:25 2021 +0000
@@ -3,6 +3,11 @@
 import pathlib
 from datetime import datetime
 
+"""
+Parse the configfile generated by the Galaxy tool.
+This file is JSON-formatted and should be converted to a set of tabular files.
+"""
+
 FILE_FORMAT = 'fastq'
 
 parser = argparse.ArgumentParser()
@@ -78,11 +83,15 @@
                                                'submission_date_ENA']) + '\n')
             run_index = 0
             # exp['runs'] is a list of lists
-            for run in exp['runs']:
+            for (base_run, run_files) in exp['runs']:
                 run_index += 1
-                run_alias = '.'.join(['run_' + str(run_index), str(exp_index), str(sample_index)]) \
-                            + '_' + timestamp
-                for file_entry in run:
+                if base_run != '':
+                    run_alias = base_run
+                else:
+                    # no alias provided, generated a unique one
+                    run_alias = '_'.join(['run_' + str(run_index), str(exp_index),
+                                          str(sample_index)]) + '_' + timestamp
+                for file_entry in run_files:
                     runs_table.write('\t'.join([run_alias, action, 'ena_run_accession', exp_alias,
                                                 file_entry, FILE_FORMAT, 'file_checksum',
                                                 'submission_date_ENA']) + '\n')
--- a/process_xlsx.py	Sat Nov 28 09:45:44 2020 +0000
+++ b/process_xlsx.py	Fri Apr 30 12:09:25 2021 +0000
@@ -3,7 +3,7 @@
 import sys
 
 import xlrd
-
+import yaml
 
 FILE_FORMAT = 'fastq'
 
@@ -36,15 +36,45 @@
             sheet_col_index = sheet_columns[expected_columns[col]]
             row_dict[expected_columns[col]] = xl_sheet.cell(row_id, sheet_col_index).value
         # should check for duplicate alias/ids?
-        data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict
+        if xl_sheet.cell(row_id, index_col).value in data_dict.keys():
+            tmp = data_dict[xl_sheet.cell(row_id, index_col).value]
+            data_dict[xl_sheet.cell(row_id, index_col).value] = [tmp]
+            data_dict[xl_sheet.cell(row_id, index_col).value].append(row_dict)
+        else:
+            data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict
     return data_dict
 
 
+def paste_xls2yaml(xlsx_path):
+    print('YAML -------------')
+    xls = xlrd.open_workbook(xlsx_path)
+    content_dict = {}
+    for sheet_name in xls.sheet_names():
+        if sheet_name == 'controlled_vocabulary':
+            continue
+        xls_sheet = xls.sheet_by_name(sheet_name)
+        sheet_contents_dict = {}
+        colnames = []
+        for col in range(xls_sheet.ncols):
+            colnames.append(xls_sheet.cell(0, col).value)
+        # skip first 2 rows (column names and suggestions)
+        for row_id in range(2, xls_sheet.nrows):
+            row_dict = {}
+            for col_id in range(0, xls_sheet.ncols):
+                row_dict[colnames[col_id]] = xls_sheet.cell(row_id, col_id).value
+            # should check for duplicate alias/ids?
+            sheet_contents_dict[row_id] = row_dict
+        content_dict[sheet_name] = sheet_contents_dict
+    yaml.dump(content_dict, sys.stdout)
+    print('YAML -------------')
+
+
 parser = argparse.ArgumentParser()
 parser.add_argument('--form', dest='xlsx_path', required=True)
 parser.add_argument('--out_dir', dest='out_path', required=True)
 parser.add_argument('--action', dest='action', required=True)
 parser.add_argument('--vir', dest='viral_submission', required=False, action='store_true')
+parser.add_argument('--verbose', dest='verbose', required=False, action='store_true')
 args = parser.parse_args()
 
 xl_workbook = xlrd.open_workbook(args.xlsx_path)
@@ -77,9 +107,11 @@
 xl_sheet = xl_workbook.sheet_by_name('ENA_experiment')
 if xl_sheet.nrows < 3:
     raise ValueError('No experiments found in experiments sheet')
-exp_columns = ['alias', 'title', 'study_alias', 'sample_alias', 'design_description', 'library_name',
-               'library_strategy', 'library_source', 'library_selection', 'library_layout',
-               'insert_size', 'library_construction_protocol', 'platform', 'instrument_model']
+exp_columns = ['alias', 'title', 'study_alias', 'sample_alias', 'design_description',
+               'library_name', 'library_strategy', 'library_source', 'library_selection',
+               'library_layout', 'insert_size', 'library_construction_protocol',
+               'platform', 'instrument_model']
+
 experiments_dict = extract_data(xl_sheet, exp_columns)
 
 # PARSE RUNS SHEET
@@ -123,6 +155,8 @@
 # ADD A TIMESTAMP TO THE ALIAS? SEEMS LIKE ENA REQUIRES ALL ENTRIES FOR A WEBIN TO HAVE UNIQUE IDS?
 # dt_oobj = datetime.now(tz=None)
 # timestamp = dt_oobj.strftime("%Y%m%d_%H:%M:%S")
+runs_included = []
+exp_included = []
 for study_alias, study in studies_dict.items():
     # study_alias = study_alias + '_' + timestamp
     studies_table.write('\t'.join([study_alias, action, 'ENA_accession', study['title'],
@@ -162,12 +196,37 @@
                                                exp['library_construction_protocol'],
                                                exp['platform'], exp['instrument_model'],
                                                'submission_date_ENA']) + '\n')
+            exp_included.append(exp_alias)
             for run_alias, run in runs_dict.items():
-                if run['experiment_alias'] == exp_alias:
-                    runs_table.write('\t'.join([run_alias, action, 'ena_run_accession', exp_alias,
-                                                run['file_name'], FILE_FORMAT, 'file_checksum',
-                                                'submission_date_ENA']) + '\n')
+                # check that the experiments library_layout is set to paired
+                # when multiple entries are associated with the same run alias
+                if not isinstance(run, list):
+                    runs_list = [run]
+                else:
+                    runs_list = run
+                for run_entry in runs_list:
+                    if run_entry['experiment_alias'] == exp_alias:
+                        runs_table.write('\t'.join([run_alias, action, 'ena_run_accession',
+                                                    exp_alias, run_entry['file_name'],
+                                                    FILE_FORMAT, 'file_checksum',
+                                                    'submission_date_ENA']) + '\n')
+                runs_included.append(run_alias)
+
+# check if any experiment or run was not associated with any sample
+for run in runs_dict.keys():
+    if run not in runs_included:
+        print(f'The run {run} is listed in the runs section but not associated with any \
+              used experiment')
+
+for exp in experiments_dict.keys():
+    if exp not in exp_included:
+        print(f'The experiment {exp} is listed in the experiments section but not associated \
+              with any used sample')
+
 studies_table.close()
 samples_table.close()
 experiments_table.close()
 runs_table.close()
+
+if args.verbose:
+    paste_xls2yaml(args.xlsx_path)
--- a/samples_macros.xml	Sat Nov 28 09:45:44 2020 +0000
+++ b/samples_macros.xml	Fri Apr 30 12:09:25 2021 +0000
@@ -1,5 +1,24 @@
 <macros>
-
+    <xml name="test_submit_section">
+        <section name="test_submit_parameters" expanded="true" title="Testing options">
+            <param name="submit_dev" type="boolean" truevalue="true" falsevalue="false" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted to the ENA test server. Uploads to test platform will not be public and will be removed in 24hrs. Performing a preliminary test upload is advised to check for errors with metadata structure. You can find the uploads to the test platform at https://wwwdev.ebi.ac.uk/ena/" />
+            <param name="dry_run" type="boolean" truevalue="true" falsevalue="false" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/>
+        </section>
+    </xml>
+    <xml name="run_inputs_macro">
+        <conditional name="run_input_format_conditional">
+            <param name="run_input_format" type="select" label="Select runs input format">
+                <option value="multiple_selection_list" selected="True">Select individual datasets or datasets collection</option>
+                <option value="paired_list" selected="False">Input from a paired collection</option>
+            </param>
+            <when value="multiple_selection_list">
+                <param name="data" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="Select individual datasets or a dataset collection" help="Names should match the compressed run's files names defined in the metadata"/>
+            </when>
+            <when value="paired_list">
+                <param name="paired_end_collection" collection_type="list:paired" type="data_collection" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" label="List of paired-end runs files" help="Names should match the compressed run's files names defined in the metadata" />
+            </when>
+        </conditional>
+    </xml>
     <xml name="table_inputs_macro">
         <conditional name="input_format_conditional">
             <param name="input_format" type="select" label="Would you like to submit pregenerated table files or interactively define the input structures?">
@@ -9,22 +28,20 @@
             </param>
             <when value="excel_tables">
                 <param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />
-                <param name="dry_run" type="boolean" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/>
-                <param name="xlsx_file" type="data" format="xlsx" />
-                <param name="data" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="Select all datasets to upload" help="Compressed reads files listed in the runs table"/>
+                <param name="xlsx_file" type="data" format="xlsx" label="Select Excel (xlsx) file based on templates" />
+                <expand macro="run_inputs_macro" />
             </when>
             <when value="user_generated_tables">
                 <param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />
-                <param name="data" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="Select all datasets to upload" help="Compressed reads files listed in the runs table"/>
+                <expand macro="run_inputs_macro" />
                 <param name="studies_users_table" type="data" format="tabular" multiple="false" label="Studies table" help="Studies metadata file"/>
                 <param name="samples_users_table" type="data" format="tabular" multiple="false" label="Samples table" help="Samples metadata file"/>
                 <param name="experiments_users_table" type="data" format="tabular" multiple="false" label="Experiments table" help="Experiments metadata file"/>
                 <param name="runs_users_table" type="data" format="tabular" multiple="false" label="Runs table" help="Runs metadata file"/>
             </when>
             <when value="build_tables">
-                <param name="dry_run" type="boolean" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/>
                 <conditional name="conditional_viral_metadata">
-                    <param name="viral_sample" type="boolean" label="Does your submission contains viral samples?" />
+                    <param name="viral_sample" type="boolean" truevalue="true" falsevalue="false" label="Does your submission contains viral samples?" />
                     <when value="true">
                         <expand macro="viral_samples" />
                     </when>
@@ -51,8 +68,11 @@
                 <param name="sample_description" type="text" help="e.g: liver cells" label="Describe the type of sample"/>
                 <param name="scientific_name" type="text" label="Enter the species of the sample" help="e.g Severe acute respiratory syndrome coronavirus 2"/>
                 <param name="tax_id" type="text" label="Enter the taxonomic ID corresponding to the sample species" />
-                <param name="collection_date" type="text" label="Collection date" optional="True" help="options are: YYYY, YYYY/MM, YYYY/MM/DD, not collected, restricted access or leave blank">
-                    <validator type="regex"  message="Data format is not valid">(^[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?(/[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?)?$)|(^not colected$)|(^not provided$)|(^restricted access$)</validator>
+                <param name="collection_date" type="text" label="Collection date" optional="True" help="options are: YYYY, YYYY-MM, YYYY-MM-DD, not collected, restricted access or not provided">
+                    <option value="not collected">not collected</option>
+                    <option value="restricted access">restricted access</option>
+                    <option value="not provided">not provided</option>
+                    <validator type="regex"  message="Data format is not valid">(^[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?(/[0-9]{4}(-[0-9]{2}(-[0-9]{2}(T[0-9]{2}:[0-9]{2}(:[0-9]{2})?Z?([+-][0-9]{1,2})?)?)?)?)?$)|(^not collected$)|(^not provided$)|(^restricted access$)</validator>
                 </param>
                 <param name="geo_location_country" type="select" label="Select the country where the sample was obtained">
                     <options from_data_table="geographic_location_1">
@@ -118,6 +138,7 @@
                         </options>
                     </param>
                     <repeat name="rep_runs" title="Runs executed within this experiment" min="1" >
+                        <param name="run_base_name" type="text" optional="False" default="" label="Run alias" help="If an alias is not provided it will be generated combining the sample and experiment indexes"/>
                         <param name="upload_files" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="File(s) associated with this run"/>
                     </repeat>
                 </repeat>
@@ -166,7 +187,7 @@
                 <param name="library_construction_protocol" type="text" label="Please describe the library construction protocol"/>
                 <param name="platform" type="select" label="Select the sequencing platform used">
                     <option value="LS454">LS454</option>
-                    <option value="ILLUMINA">Illumina</option>
+                    <option value="ILLUMINA" selected="True">Illumina</option>
                     <option value="HELICOS">Helicos</option>
                     <option value="ABI_SOLID">ABI Solid</option>
                     <option value="COMPLETE_GENOMICS">Complete Genomics</option>
@@ -182,6 +203,7 @@
                     </options>
                 </param>
                 <repeat name="rep_runs" title="Runs executed within this experiment" min="1" >
+                    <param name="run_base_name" type="text" optional="False" default="" label="Run alias" help="If an alias is not provided it will be generated combining the sample and experiment indexes"/>
                     <param name="upload_files" type="data" format="fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="true" label="File(s) associated with this run"/>
                 </repeat>
             </repeat>
Binary file test-data/2.fastqsanger.gz has changed