Repository 'ena_upload'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ena_upload

Changeset 11:f803a68fc9e5 (2024-03-23)
Previous changeset 10:480d9e9d156b (2023-10-27) Next changeset 12:29648e5e83a8 (2024-03-27)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit c7f54e6e3b1ffad5631ff7c150449cb79bd3686d
modified:
ena_upload.xml
samples_macros.xml
added:
test-data/ENA_TEST1.R1.fastq.gz
test-data/ENA_TEST2.R1.fastq.gz
test-data/ENA_TEST2.R2.fastq.gz
test-data/simple_test_case_v2.json
b
diff -r 480d9e9d156b -r f803a68fc9e5 ena_upload.xml
--- a/ena_upload.xml Fri Oct 27 19:44:40 2023 +0000
+++ b/ena_upload.xml Sat Mar 23 22:06:45 2024 +0000
[
b'@@ -1,6 +1,7 @@\n-<tool id="ena_upload" name="ENA Upload tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05" license="MIT">\n+<tool id="ena_upload" name="ENA Upload tool" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05" license="MIT">\n+    <description>Submission of (meta)data to the European Nucleotide Archive (ENA)</description>\n     <macros>\n-        <token name="@TOOL_VERSION@">0.6.3</token>\n+        <token name="@TOOL_VERSION@">0.7.1</token>\n         <token name="@VERSION_SUFFIX@">0</token>\n         <import>samples_macros.xml</import>\n     </macros>\n@@ -48,7 +49,10 @@\n #if $action_options.input_format_conditional.input_format == "excel_tables":\n     ln -s \'$action_options.input_format_conditional.xlsx_file\' ./xlsx_input.xlsx &&\n #end if\n-#if $action_options.test_submit_parameters.dry_run == "False" and $action_options.test_submit == "False":\n+#if $action_options.input_format_conditional.input_format == "isa_json":\n+    ln -s \'$action_options.input_format_conditional.isa_json_file\' ./isa_json_input.json &&\n+#end if\n+#if $action_options.test_submit_parameters.dry_run == "False":\n     webin_id=`grep \'username\' $credentials`;\n     if [ "\\$webin_id" = "" ]; then\n       ## No credentials in user defined preferences    \n@@ -171,95 +175,52 @@\n     #end if\n #end if\n \n-#if $action_options.action == "add":\n ena-upload-cli\n     --tool \'ena-upload-cli v@TOOL_VERSION@ @ Galaxy\'\n     --action \'$action_options.action\'\n-    --center \'$action_options.center\'\n+    --center \'$action_options.test_submit_parameters.center\'\n     --secret \\${credentials_path}\n     --data\n-    #for $dataset in $files_to_upload:\n-        \'$dataset\'\n-    #end for\n+#for $dataset in $files_to_upload:\n+    \'$dataset\'\n+#end for\n #if $action_options.test_submit_parameters.dry_run == "True":\n     --draft\n #end if\n-#if $action_options.input_format_conditional.input_format != "excel_tables":\n+#if $action_options.input_format_conditional.input_format == "excel_tables":\n+    --xlsx ./xlsx_input.xlsx \n+#elif $action_options.input_format_conditional.input_format == "user_generated_tables" or $action_options.input_format_conditional.input_format == "build_tables":\n     --experiment \'$experiments_table_path\'\n     --study \'$studies_table_path\'\n     --run \'$runs_table_path\'\n     --sample \'$samples_table_path\'\n-#else\n-    --xlsx ./xlsx_input.xlsx \n+#elif $action_options.input_format_conditional.input_format == "isa_json":\n+    --isa_json ./isa_json_input.json\n+    --isa_assay_stream \'$isa_assay_stream\'\n #end if\n---action add\n-#if $action_options.input_format_conditional.input_format == "user_generated_tables":\n-        --checklist $action_options.input_format_conditional.checklist_id\n+#if $action_options.input_format_conditional.input_format == "build_tables":\n+    #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "True":\n+        --checklist ERC000033\n+    #end if\n+#elif $action_options.input_format_conditional.input_format == "isa_json":\n+    --checklist ERC000011\n #else:\n-    #if $action_options.input_format_conditional.input_format == "build_tables":\n-        #if $action_options.input_format_conditional.conditional_viral_metadata.viral_sample == "True":\n-          --checklist ERC000033\n-        #end if\n-    #else:\n-          --checklist $action_options.input_format_conditional.checklist_id\n-    #end if\n-#end if\n-#if $action_options.idempotent == "True":\n-    --auto_action\n+    --checklist $action_options.input_format_conditional.checklist_id\n #end if\n #if $action_options.test_submit_parameters.submit_dev == "True":\n     -d\n #end if\n-    >> \'$output\';\n-#end if\n-\n+>> \'$output\';\n \n \n-#if $action_options.action == "modify":\n-    ena-upload-cli\n-    --tool \'ena-upload-cli v@TOOL_VERSION@ @ Galaxy\'\n-    --action \'$action_options.action\'\n-    --center \'$action_options.center\'\n-    --secret \\${credentials_path}\n-    --data\n-    #for $dataset in $files_to_upload:\n-        \'$dataset\'\n-    #end for\n-#if $action_options.'..b'          </assert_command>\n         </test>\n+        <!--Test 9: isa JSON input of NON-VIRAL samples-->\n+        <test expect_num_outputs="5">\n+            <conditional name="action_options">\n+                <param name="action" value="add"/>\n+                <section name="test_submit_parameters">\n+                    <param name="center" value="Some research center"/>\n+                    <param name="submit_dev" value="True" />\n+                    <param name="dry_run" value="True" />\n+                </section>\n+                <conditional name="input_format_conditional">\n+                    <param name="input_format" value="isa_json"/>\n+                    <param name="isa_json_file" value="simple_test_case_v2.json"/>\n+                    <param name="isa_assay_stream" value="Ena stream 1"/>\n+                    <conditional name="run_input_format_conditional">\n+                        <param name="add_extension" value="False"/>\n+                        <param name="run_input_format" value="multiple_selection_list"/>\n+                        <param name="data" value="ENA_TEST1.R1.fastq.gz,ENA_TEST2.R1.fastq.gz,ENA_TEST2.R2.fastq.gz"/>\n+                    </conditional>\n+                </conditional>\n+            </conditional>\n+            <output name="experiments_table_out">\n+                <assert_contents>\n+                    <has_n_lines n="4"/>\n+                    <has_n_columns n="17"/>\n+                    <has_line_matching expression="alias\\tstudy_alias\\tsample_alias\\tlibrary_name\\ttitle\\taccession\\tsubmission date\\tstatus\\tdesign_description\\tlibrary_source\\tlibrary_strategy\\tlibrary_selection\\tlibrary_layout\\tinsert_size\\tplatform\\tinstrument_model\\tsubmission_date"/>\n+                </assert_contents>\n+            </output>\n+            <output name="studies_table_out">\n+                <assert_contents>\n+                    <has_n_lines n="2"/>\n+                    <has_n_columns n="9"/>\n+                    <has_line_matching expression="alias\\ttitle\\tstudy_type\\tstudy_abstract\\tnew_study_type\\tpubmed_id\\taccession\\tsubmission_date\\tstatus"/>\n+                </assert_contents>\n+            </output>\n+            <output name="samples_table_out">\n+                <assert_contents>\n+                    <has_n_lines n="3"/>\n+                    <has_n_columns n="11"/>\n+                </assert_contents>\n+            </output>\n+            <output name="runs_table_out">\n+                <assert_contents>\n+                    <has_n_lines n="4"/>\n+                    <has_n_columns n="10"/>\n+                    <has_line_matching expression="alias\\texperiment_alias\\tfile_name\\tfile_type\\tfile checksum\\taccession\\tsubmission date\\tstatus\\tsubmission_date\\tfile_checksum"/>\n+                </assert_contents>\n+            </output>\n+        </test>\n     </tests>\n-    <help><![CDATA[\n-        This is a wrapper for the ENA upload tool in https://github.com/usegalaxy-eu/ena-upload-cli\n-        The input metadata can be submitted following the tabular format of the templates or their excel spreadsheet equivalent in https://github.com/ELIXIR-Belgium/ENA-metadata-templates. This template repo provides ready to use sheets for every ENA sample checklist and is automatically updated.\n-    ]]></help>\n+    <help><![CDATA[This is a wrapper for the ENA upload tool in https://github.com/usegalaxy-eu/ena-upload-cli. The input metadata can be submitted following the tabular format of the templates or their excel spreadsheet equivalent in https://github.com/ELIXIR-Belgium/ENA-metadata-templates. This template repo provides ready to use sheets for every ENA sample checklist and is automatically updated.\n+    \n+        .. class:: warningmark\n+    \n+            The ENA upload tool won\'t work unless you have provided an ENA Webin ID in User > Preferences > Manage Information > ENA Webin account details.]]></help>\n     <citations>\n         <citation type="doi">doi:10.1093/bioinformatics/btab421</citation>\n     </citations>\n'
b
diff -r 480d9e9d156b -r f803a68fc9e5 samples_macros.xml
--- a/samples_macros.xml Fri Oct 27 19:44:40 2023 +0000
+++ b/samples_macros.xml Sat Mar 23 22:06:45 2024 +0000
b
b'@@ -1,6 +1,6 @@\n <macros>\n     <xml name="checklist_input_macro">\n-        <param type="select" name="checklist_id" label="Select the metadata checklist" help="You can find metadata templates for each checklist at: https://github.com/ELIXIR-Belgium/ENA-metadata-templates">\n+        <param type="select" name="checklist_id" label="Select the ENA sample checklist" help="You can find metadata templates for each checklist at: https://github.com/ELIXIR-Belgium/ENA-metadata-templates">\n             <option value="ERC000011">ENA default sample checklist (ERC000011)</option>\n             <option value="ERC000012">GSC MIxS air (ERC000012)</option>\n             <option value="ERC000013">GSC MIxS host associated (ERC000013)</option>\n@@ -44,37 +44,39 @@\n         </param>\n     </xml>\n     <xml name="test_submit_section">\n-        <section name="test_submit_parameters" expanded="True" title="Testing options">\n-            <param name="submit_dev" type="boolean" truevalue="True" falsevalue="False" label="Submit to test ENA server?" help="By selecting yes the reads will be submitted to the ENA test server. Uploads to test platform will not be public and will be removed in 24hrs. Performing a preliminary test upload is advised to check for errors with metadata structure. You can find the uploads to the test platform at https://wwwdev.ebi.ac.uk/ena/" />\n-            <param name="dry_run" type="boolean" truevalue="True" falsevalue="False" label="Print the tables but do not submit the datasets" help="If yes is selected then NO submission will be performed."/>\n+        <section name="test_submit_parameters" expanded="True" title="Submissions options">\n+            <param name="center" type="text" optional="False" label="Affiliation center"/>\n+            <param name="submit_dev" type="boolean" truevalue="True" falsevalue="False" label="Submit to ENA test server" help="Uploads to the test server of ENA will not be made public and will be removed automatically in 24 hours. Performing a preliminary test upload is advised to check for errors with metadata structure. You can find these uploads at https://wwwdev.ebi.ac.uk/ena/." />\n+            <param name="dry_run" type="boolean" truevalue="True" falsevalue="False" label="Create test outputs without submitting (meta)data to ENA" help="If yes is selected then NO submission will be performed."/>\n         </section>\n     </xml>\n     <xml name="run_inputs_macro">\n         <conditional name="run_input_format_conditional">\n-            <param name="run_input_format" type="select" label="Select runs input format">\n-                <option value="multiple_selection_list" selected="True">Select individual datasets or datasets collection</option>\n-                <option value="paired_list" selected="False">Input from a paired collection</option>\n+            <param name="run_input_format" type="select" label="Select input data">\n+                <option value="multiple_selection_list" selected="True">Dataset or dataset collection</option>\n+                <option value="paired_list" selected="False">Paired dataset collection</option>\n             </param>\n             <when value="multiple_selection_list">\n-                <param name="add_extension" type="boolean" checked="False" label="Add .fastq.(gz,.bz2) extension to the Galaxy dataset names to match the ones described in the input tables?"/>\n-                <param name="data" type="data" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="True" label="Select individual datasets or a dataset collection" help="Names should match the compressed run\'s files names defined in the metadata"/>\n+                <param name="add_extension" type="boolean" checked="False" label="Add .fastq (.gz, .bz2) extension to the Galaxy dataset names to match the ones described in the input tables?"/>\n+                <param name="data" type="data" format="fastq,fastqsanger,fastqsanger.gz,fastqsanger.bz2,fastq.gz,fastq.bz2" multiple="True" labe'..b'hould match the compressed run\'s files names defined in the metadata" />\n             </when>\n         </conditional>\n     </xml>\n     <xml name="table_inputs_macro">\n         <conditional name="input_format_conditional">\n-            <param name="input_format" type="select" label="Would you like to submit pregenerated table files or interactively define the input structures?">\n-                <option value="excel_tables" selected="True">User generated metadata tables based on Excel template</option>\n-                <option value="build_tables" selected="False">Interactive generation of the study structure (only recommended for small studies)</option>\n-                <option value="user_generated_tables" selected="False">User generated tabular files (studies - samples - experiments - runs) </option>\n+            <param name="input_format" type="select" label="Select the metadata input method">\n+                <option value="excel_tables" selected="True">Excel file</option>\n+                <option value="user_generated_tables" selected="False">Tabular files (studies - samples - experiments - runs)</option>\n+                <option value="isa_json" selected="False">ISA json file</option>\n+                <option value="build_tables" selected="False">Interactively (only recommended for small studies)</option>\n             </param>\n             <when value="excel_tables">\n                 <!--<param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />-->\n                 <expand macro="checklist_input_macro"/>\n-                <param name="xlsx_file" type="data" format="xlsx" label="Select Excel (xlsx) file based on template" />\n+                <param name="xlsx_file" type="data" format="xlsx" label="Select Excel (.xlsx) file based on template" />\n                 <expand macro="run_inputs_macro" />\n             </when>\n             <when value="user_generated_tables">\n@@ -87,7 +89,7 @@\n                 <param name="runs_users_table" type="data" format="tabular" multiple="False" label="Runs table" help="Runs metadata file"/>\n             </when>\n             <when value="build_tables">\n-                <param name="add_extension" type="boolean" checked="False" label="Add .fastq.(gz.bz2) extension to the Galaxy dataset names to match the ones described in the input tables?"/>\n+                <param name="add_extension" type="boolean" checked="False" label="Add .fastq (.gz, .bz2) extension to the Galaxy dataset names to match the ones described in the input tables?"/>\n                 <conditional name="conditional_viral_metadata">\n                     <param name="viral_sample" type="select" label="Select your sample type" help="Non-viral interactive submissions lack specific sample checklist attributes and are suboptimal. It\'s highly recommended to use Excel or TSV templates as input for a more comprehensive submission.">\n                         <option value="True" selected="True">Viral</option>\n@@ -101,8 +103,13 @@\n                     </when>\n                 </conditional>\n             </when>\n+            <when value="isa_json">\n+                <param name="isa_json_file" type="data" format="json" label="Select ISA json file" />\n+                <param name="isa_assay_stream" type="text" optional="False" help="List the assay_stream values from the comment field in the assays list of the ISA-json you want to submit." label="Specify the ISA json assay stream(s)" />\n+                <expand macro="run_inputs_macro" />\n+            </when>\n         </conditional>\n-        <param name="center" type="text" optional="False" label="Affiliation center"/>\n+        \n     </xml>\n     <xml name="viral_samples">\n         <repeat name="rep_study" title="Study" min="1">\n'
b
diff -r 480d9e9d156b -r f803a68fc9e5 test-data/ENA_TEST1.R1.fastq.gz
b
Binary file test-data/ENA_TEST1.R1.fastq.gz has changed
b
diff -r 480d9e9d156b -r f803a68fc9e5 test-data/ENA_TEST2.R1.fastq.gz
b
Binary file test-data/ENA_TEST2.R1.fastq.gz has changed
b
diff -r 480d9e9d156b -r f803a68fc9e5 test-data/ENA_TEST2.R2.fastq.gz
b
Binary file test-data/ENA_TEST2.R2.fastq.gz has changed
b
diff -r 480d9e9d156b -r f803a68fc9e5 test-data/simple_test_case_v2.json
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/simple_test_case_v2.json Sat Mar 23 22:06:45 2024 +0000
[
b'@@ -0,0 +1,1631 @@\n+{\n+  "identifier": "",\n+  "title": "Test Case ENA upload tool",\n+  "description": "This investigation matches the test case of the ENA upload tool",\n+  "submissionDate": "",\n+  "publicReleaseDate": "",\n+  "ontologySourceReferences": [],\n+  "filename": "Test Case ENA upload tool.txt",\n+  "comments": [\n+    {\n+      "name": "ISAjson export time",\n+      "value": "2023-11-08T16:27:49Z"\n+    },\n+    {\n+      "name": "SEEK Project name",\n+      "value": "Test Project ENA upload Tool"\n+    },\n+    {\n+      "name": "SEEK Project ID",\n+      "value": "http://localhost:3000/single_pages/16"\n+    },\n+    {\n+      "name": "SEEK Investigation ID",\n+      "value": "27"\n+    }\n+  ],\n+  "publications": [],\n+  "people": [\n+    {\n+      "@id": "#people/4",\n+      "lastName": "De Pelseneer",\n+      "firstName": "Kevin",\n+      "midInitials": "",\n+      "email": "kevin.depelseneer@psb.ugent.be",\n+      "phone": "",\n+      "fax": "",\n+      "address": "",\n+      "affiliation": "",\n+      "roles": [\n+        {\n+          "termAccession": "",\n+          "termSource": "",\n+          "annotationValue": ""\n+        }\n+      ],\n+      "comments": [\n+        {\n+          "@id": "",\n+          "value": "",\n+          "name": ""\n+        }\n+      ]\n+    }\n+  ],\n+  "studies": [\n+    {\n+      "identifier": "",\n+      "title": "Study - Test Case ENA",\n+      "description": "",\n+      "submissionDate": "",\n+      "publicReleaseDate": "",\n+      "filename": "Study - Test Case ENA.txt",\n+      "comments": [\n+        {\n+          "@id": "#study_comment/25_13_49",\n+          "name": "ena_sample_alias_prefix",\n+          "value": "sample_alias_"\n+        },\n+        {\n+          "@id": "#study_comment/25_ae7461a0-6081-013c-9ed3-7a163e608de1",\n+          "name": "SEEK Study ID",\n+          "value": "25"\n+        },\n+        {\n+          "@id": "#study_comment/25_ae7467e0-6081-013c-9ed4-7a163e608de1",\n+          "name": "SEEK creation date",\n+          "value": "2023-10-20T11:12:23Z"\n+        }\n+      ],\n+      "publications": [],\n+      "people": [\n+        {\n+          "@id": "#people/4",\n+          "lastName": "De Pelseneer",\n+          "firstName": "Kevin",\n+          "midInitials": "",\n+          "email": "kevin.depelseneer@psb.ugent.be",\n+          "phone": "",\n+          "fax": "",\n+          "address": "",\n+          "affiliation": "",\n+          "roles": [\n+            {\n+              "termAccession": "",\n+              "termSource": "",\n+              "annotationValue": ""\n+            }\n+          ],\n+          "comments": [\n+            {\n+              "@id": "",\n+              "value": "",\n+              "name": ""\n+            }\n+          ]\n+        }\n+      ],\n+      "studyDesignDescriptors": [],\n+      "characteristicCategories": [\n+        {\n+          "@id": "#characteristic_category/geographic_location_(country_and/or_sea)_657",\n+          "characteristicType": {\n+            "annotationValue": "geographic location (country and/or sea)",\n+            "termAccession": "",\n+            "termSource": ""\n+          }\n+        },\n+        {\n+          "@id": "#characteristic_category/taxon_id_658",\n+          "characteristicType": {\n+            "annotationValue": "taxon_id",\n+            "termAccession": "",\n+            "termSource": ""\n+          }\n+        },\n+        {\n+          "@id": "#characteristic_category/title_662",\n+          "characteristicType": {\n+            "annotationValue": "title",\n+            "termAccession": "",\n+            "termSource": ""\n+          }\n+        },\n+        {\n+          "@id": "#characteristic_category/sample_description_663",\n+          "characteristicType": {\n+            "annotationValue": "sample_description",\n+            "termAccession": "",\n+            "termSource": ""\n+          }\n+        },\n+        {\n+          "@id": "#characteristic_category/collection_date_664",\n+          "characteristicType": {\n+            "annotationValue": "collection date",\n+            "termAccessi'..b'ial/227"\n+                }\n+              ],\n+              "outputs": [\n+                {\n+                  "@id": "#data_file/233"\n+                },\n+                {\n+                  "@id": "#data_file/234"\n+                }\n+              ],\n+              "previousProcess": {\n+                "@id": "#process/library_construction/233"\n+              },\n+              "nextProcess": {}\n+            },\n+            {\n+              "@id": "#process/nucleic_acid_sequencing/235",\n+              "name": "",\n+              "executesProtocol": {\n+                "@id": "#protocol/_49"\n+              },\n+              "parameterValues": [],\n+              "performer": "",\n+              "date": "",\n+              "inputs": [\n+                {\n+                  "@id": "#other_material/229"\n+                }\n+              ],\n+              "outputs": [\n+                {\n+                  "@id": "#data_file/235"\n+                }\n+              ],\n+              "previousProcess": {\n+                "@id": "#process/library_construction/235"\n+              },\n+              "nextProcess": {}\n+            }\n+          ],\n+          "dataFiles": [\n+            {\n+              "@id": "#data_file/233",\n+              "name": "RD file 1a",\n+              "type": "Raw Data File",\n+              "comments": [\n+                {\n+                  "name": "file_name",\n+                  "value": "ENA_TEST2.R1.fastq.gz"\n+                },\n+                {\n+                  "name": "file_type",\n+                  "value": "fastq"\n+                },\n+                {\n+                  "name": "file checksum",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "accession",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "submission date",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "status",\n+                  "value": "add"\n+                }\n+              ]\n+            },\n+            {\n+              "@id": "#data_file/234",\n+              "name": "RD file 1b",\n+              "type": "Raw Data File",\n+              "comments": [\n+                {\n+                  "name": "file_name",\n+                  "value": "ENA_TEST2.R2.fastq.gz"\n+                },\n+                {\n+                  "name": "file_type",\n+                  "value": "fastq"\n+                },\n+                {\n+                  "name": "file checksum",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "accession",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "submission date",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "status",\n+                  "value": "add"\n+                }\n+              ]\n+            },\n+            {\n+              "@id": "#data_file/235",\n+              "name": "RD file 3",\n+              "type": "Raw Data File",\n+              "comments": [\n+                {\n+                  "name": "file_name",\n+                  "value": "ENA_TEST1.R1.fastq.gz"\n+                },\n+                {\n+                  "name": "file_type",\n+                  "value": "fastq"\n+                },\n+                {\n+                  "name": "file checksum",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "accession",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "submission date",\n+                  "value": ""\n+                },\n+                {\n+                  "name": "status",\n+                  "value": "add"\n+                }\n+              ]\n+            }\n+          ],\n+          "unitCategories": []\n+        }\n+      ],\n+      "factors": [],\n+      "unitCategories": []\n+    }\n+  ]\n+}\n\\ No newline at end of file\n'