Repository 'ena_upload'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ena_upload

Changeset 7:6f6537780379 (2022-02-24)
Previous changeset 6:4aab5ae907b6 (2021-11-15) Next changeset 8:d147d6455873 (2022-05-04)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ena_upload commit 9961f24acebb17f837238df6541e1af59df1163b"
modified:
ena_upload.xml
extract_tables.py
samples_macros.xml
test-data/metadata_test_viral.xlsx
test-data/metadata_test_viral_optional_columns.xlsx
added:
dump_yaml.py
test-data/C026_exp5_clean.fastq
test-data/C030_exp5_clean.fastq
test-data/C053_exp5_clean.fastq
test-data/C067_exp5_clean.fastq
test-data/metadata_test_nonviral_1_run.xlsx
test-data/sample_no_extension
removed:
check_remote.py
mappings.py
process_xlsx.py
b
diff -r 4aab5ae907b6 -r 6f6537780379 check_remote.py
--- a/check_remote.py Mon Nov 15 11:47:13 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,23 +0,0 @@
-import json
-
-import requests
-
-URL = "https://www.ebi.ac.uk/ena/portal/api/search"
-
-
-def check_remote_entry(entry_type, query_dict, out_format='json'):
-    '''
-    Checks if an entry with that alias exists in the ENA repos
-    entry_type = [study | sample | experiment | run]
-    '''
-    assert entry_type in ['study', 'sample', 'experiment', 'run']
-    params_dict = {}
-    query_str = ' AND '.join(['%s="%s"' % (key, value) for (key, value) in query_dict.items()])
-    params_dict['query'] = query_str
-    params_dict['result'] = 'read_' + entry_type
-    params_dict['fields'] = entry_type + '_alias'
-    params_dict['format'] = out_format
-    response = requests.post(URL, data=params_dict)
-    if response.content != b'':
-        return json.loads(response.content)
-    return []
b
diff -r 4aab5ae907b6 -r 6f6537780379 dump_yaml.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dump_yaml.py Thu Feb 24 18:20:40 2022 +0000
[
@@ -0,0 +1,40 @@
+import sys
+
+import yaml
+
+
+def fetch_table_data(table_path):
+    data_dict = {}
+    with open(table_path) as table_to_load:
+        # load headers
+        headers = table_to_load.readline().strip('\n').split('\t')
+        row_id = 0
+        for line in table_to_load.readlines():
+            # print(line)
+            line_data = line.strip('\n').split('\t')
+            row_dict = {}
+            for col_num in range(len(headers)):
+                col_name = headers[col_num]
+                row_dict[col_name] = line_data[col_num]
+            data_dict[row_id] = row_dict
+            row_id += 1
+        return data_dict
+
+
+all_data_dict = {}
+print('YAML -------------')
+studies_table_path = sys.argv[1]
+table_data = fetch_table_data(studies_table_path)
+all_data_dict['ENA_study'] = table_data
+samples_table_path = sys.argv[2]
+table_data = fetch_table_data(samples_table_path)
+all_data_dict['ENA_sample'] = table_data
+experiments_table_path = sys.argv[3]
+table_data = fetch_table_data(experiments_table_path)
+all_data_dict['ENA_experiment'] = table_data
+runs_table_path = sys.argv[4]
+table_data = fetch_table_data(runs_table_path)
+all_data_dict['ENA_run'] = table_data
+# print(all_data_dict)
+print(yaml.dump(all_data_dict))
+print('YAML -------------')
b
diff -r 4aab5ae907b6 -r 6f6537780379 ena_upload.xml
--- a/ena_upload.xml Mon Nov 15 11:47:13 2021 +0000
+++ b/ena_upload.xml Thu Feb 24 18:20:40 2022 +0000
[
b'@@ -1,11 +1,10 @@\n <tool id="ena_upload" name="ENA Upload tool" version="@VERSION@" profile="20.01" license="MIT">\n     <macros>\n-        <token name="@VERSION@">0.4.3</token>\n+        <token name="@VERSION@">0.5.3</token>\n         <import>samples_macros.xml</import>\n     </macros>\n     <requirements>\n         <requirement type="package" version="@VERSION@">ena-upload-cli</requirement>\n-        <requirement type="package" version="1.2.0">xlrd</requirement>\n     </requirements>\n     <stdio>\n         <regex match="Oops" source="stderr" level="fatal"/>\n@@ -27,30 +26,27 @@\n #if $action_options.input_format_conditional.input_format == "build_tables":\n   python \'$__tool_directory__/extract_tables.py\' --action $action_options.action --out_dir ./submission_files --studies $studies_json;\n #end if\n+        \n+credentials_path=\'test_fake_path\';\n+echo "username: test_user" > \\$credentials_path;\n+echo "password: test_password" >> \\$credentials_path;\n \n-#if $action_options.input_format_conditional.input_format == "excel_tables":\n-    python \'$__tool_directory__/process_xlsx.py\'\n-    #if $action_options.input_format_conditional.viral_submission == "true":\n-        --vir \n-    #end if\n-    #if $action_options.test_submit_parameters.submit_dev == "true":\n-        --dev\n-    #end if\n-    --action \'$action_options.action\' --form \'$action_options.input_format_conditional.xlsx_file\' --out_dir ./submission_files --verbose > \'$output\';\n-#end if\n \n-#if $action_options.input_format_conditional.input_format != "user_generated_tables":\n+#if $action_options.input_format_conditional.input_format == "build_tables":\n     cp $studies_table_path $studies_table_out &&\n     cp $samples_table_path $samples_table_out &&\n     cp $experiments_table_path $experiments_table_out &&\n     cp $runs_table_path $runs_table_out &&\n-#else:\n+#end if\n+#if $action_options.input_format_conditional.input_format == "user_generated_tables":\n     ln -s \'$action_options.input_format_conditional.experiments_users_table\' $experiments_table_path &&\n     ln -s \'$action_options.input_format_conditional.studies_users_table\' $studies_table_path &&\n     ln -s \'$action_options.input_format_conditional.runs_users_table\' $runs_table_path &&\n     ln -s \'$action_options.input_format_conditional.samples_users_table\' $samples_table_path &&\n #end if\n-\n+#if $action_options.input_format_conditional.input_format == "excel_tables":\n+    ln -s \'$action_options.input_format_conditional.xlsx_file\' ./xlsx_input.xlsx &&\n+#end if\n #if $action_options.test_submit_parameters.dry_run == "false" and $action_options.test_submit == "False":\n     webin_id=`grep \'username\' $credentials`;\n     if [ "\\$webin_id" = "" ]; then\n@@ -73,9 +69,6 @@\n     fi;\n #end if\n \n-#if $action_options.test_submit == "True":\n-    credentials_path=\'test_fake_path\';\n-#end if\n \n ## create the list of files to upload and make the symlinks \n #import re      \n@@ -156,7 +149,7 @@\n             #if $file.is_of_type(\'fastq\', \'fastqsanger\'):\n                 ## always compress add the gz extension\n                 #if $action_options.input_format_conditional.run_input_format_conditional.add_extension == "true":\n-                    #set $safename_reads_file = $safename_reads_file + \'fastq.gz\'\n+                    #set $safename_reads_file = $safename_reads_file + \'.fastq.gz\'\n                 #else\n                     #set $safename_reads_file = $safename_reads_file + \'.gz\'\n                 #end if   \n@@ -164,9 +157,9 @@\n             #else\n                 #if $action_options.input_format_conditional.run_input_format_conditional.add_extension == "true":\n                     #if $file.is_of_type(\'fastq.gz\', \'fastqsanger.gz\'):\n-                        #set $extension = \'fastq.gz\'\n+                        #set $extension = \'.fastq.gz\'\n                     #elif $file.is_of_type(\'fastqsanger.bz2\', \'fastq.bz2\'):\n-                        #set $extension = \'fastq.bz2\'\n+                        #set $extension = \'.fastq.bz2\'\n                     #end if\n    '..b'exist"/>\n-            </assert_stderr>\n         </test>\n         <!--Test 9: modify option and auto compression - viral submission - User input metadata-->\n-        <test expect_failure="true">\n+        <test expect_failure="false">\n             <conditional name="action_options">\n                 <param name="action" value="modify"/>\n                 <section name="test_submit_parameters">\n                     <param name="submit_dev" value="false" />\n-                    <param name="dry_run" value="false" />\n+                    <param name="dry_run" value="true" />\n                 </section>\n                 <param name="test_submit" value="True"/>\n                 <conditional name="input_format_conditional">\n-                    <param name="add_extension" value="False"/>\n+                    <param name="add_extension" value="fasle"/>\n                     <param name="input_format" value="build_tables"/>\n                     <conditional name="conditional_viral_metadata">\n                         <param name="viral_sample" value="True"/>\n@@ -793,7 +805,7 @@\n                                 <param name="sample_title" value="Test Sample title"/>\n                                 <param name="sample_description" value="Test Sample description"/>\n                                 <param name="scientific_name" value="Test Sample scientific name"/>\n-                                <param name="tax_id" value="Test Sample tax_id"/>\n+                                <param name="tax_id" value="2697049"/>\n                                 <param name="collection_date" value="2020"/>\n                                 <param name="geo_location_country" value="Belgium"/>\n                                 <param name="host_common_name" value="Human"/>\n@@ -817,7 +829,7 @@\n                                     <param name="instrument_model" value="Illumina HiSeq 4000"/>\n                                     <repeat name="rep_runs">\n                                         <param name="run_base_name" value="run_from_hospital_X"/>\n-                                        <param name="upload_files" value="sample.fq" ftype="fastqsanger"/>\n+                                        <param name="upload_files" value="1.fastqsanger.gz" ftype="fastqsanger.gz"/>\n                                     </repeat>\n                                 </repeat>\n                             </repeat>\n@@ -828,21 +840,17 @@\n             <param name="center" value="Some research center"/>\n             <assert_command>\n                 <has_text_matching expression="ena-upload-cli"/>\n-                <has_text_matching expression="--data \'sample.fq.gz\'"/>\n+                <has_text_matching expression="--data \'1.fastqsanger.gz\'"/>\n                 <has_text_matching expression="--action \'modify\' --center \'Some research center\'"/>\n                 <has_text_matching expression="--checklist ERC000033"/>\n                 <not_has_text text="add" />\n             </assert_command>\n-            <assert_stderr>\n-                <has_text_matching expression="Oops, the file test_fake_path does not exist"/>\n-            </assert_stderr>\n         </test>\n     </tests>\n     <help><![CDATA[\n         This is a wrapper for the ENA upload tool in https://github.com/usegalaxy-eu/ena-upload-cli\n         The input metadata can be submitted following the tabular format of the templates in https://github.com/usegalaxy-eu/ena-upload-cli/tree/master/example_tables\n-        It is also possible to submit an excel file by following the template in https://drive.google.com/file/d/1ncC22--tW2v-EI-te_r86sAZujIPAjlX/view?usp=sharing\n-        For viral submissions a larger set of metadata is required, you can find the template in https://drive.google.com/file/d/1Gx78GKh58PmRjdmJ05DBbpObAL-3oUFX/view?usp=sharing\n+        It is also possible to submit an excel file by following the template in https://github.com/ELIXIR-Belgium/ENA-metadata-templates\n     ]]></help>\n     <citations>\n     </citations>\n'
b
diff -r 4aab5ae907b6 -r 6f6537780379 extract_tables.py
--- a/extract_tables.py Mon Nov 15 11:47:13 2021 +0000
+++ b/extract_tables.py Thu Feb 24 18:20:40 2022 +0000
[
@@ -44,10 +44,10 @@
                                    'ENA_submission_data']))
     if "geo_location" in study['samples'][0].keys():           # sample belongs to a viral sample
         samples_table.write('\t'.join(['alias', 'status', 'accession', 'title', 'scientific_name',
-                                       'taxon_id', 'sample_description', 'collection_date',
-                                       'geographic_location', 'host_common_name', 'host_subject_id',
-                                       'host_health_state', 'host_sex', 'host_scientific_name',
-                                       'collector_name', 'collecting_institution', 'isolate',
+                                       'taxon_id', 'sample_description', 'collection date',
+                                       'geographic location (country and/or sea)', 'host common name', 'host subject id',
+                                       'host health state', 'host sex', 'host scientific name',
+                                       'collector name', 'collecting institution', 'isolate',
                                        'submission_date']) + '\n')
     else:
         samples_table.write('\t'.join(['alias', 'status', 'accession', 'title', 'scientific_name',
b
diff -r 4aab5ae907b6 -r 6f6537780379 mappings.py
--- a/mappings.py Mon Nov 15 11:47:13 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
b
@@ -1,29 +0,0 @@
-
-optional_samples_cols_mapping = {
-    "collection date": "collection_date",
-    "geographic location (latitude)": "geographic_location_latitude",
-    "geographic location (longitude)": "geographic_location_longitude",
-    "geographic location (region)": "geographic_location_region",
-    "sample capture status": "sample_capture_status",
-    "host disease outcome": "host_disease_outcome",
-    "host_age": "host_age",
-    "virus identifier": "virus_identifier",
-    "receipt date": "receipt_date",
-    "definition for seropositive sample": "definition_for_seropositive_sample",
-    "serotype (required for a seropositive sample)": "serotype",
-    "host habitat": "host_habitat",
-    "isolation source host-associated": "isolation_source_host_associated",
-    "host behaviour": "host_behaviour",
-    "isolation source non-host-associated": "isolation_source_non_host_associated",
-    "subject exposure": "subject_exposure",
-    "subject exposure duration": "subject_exposure_duration",
-    "type exposure": "type_exposure",
-    "personal protective equipment": "personal_protective_equipment",
-    "hospitalisation": "hospitalisation",
-    "illness duration": "illness_duration",
-    "illness symptoms": "illness_symptoms",
-    "sample storage conditions": "sample_storage_conditions",
-    "strain": "strain",
-    "host description": "host_description",
-    "gravidity": "gravidity"
-}
b
diff -r 4aab5ae907b6 -r 6f6537780379 process_xlsx.py
--- a/process_xlsx.py Mon Nov 15 11:47:13 2021 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
b'@@ -1,307 +0,0 @@\n-import argparse\n-import pathlib\n-import sys\n-\n-import xlrd\n-import yaml\n-from check_remote import check_remote_entry\n-from mappings import optional_samples_cols_mapping\n-\n-FILE_FORMAT = \'fastq\'\n-\n-\n-def identify_action(entry_type, alias):\n-    \'\'\' define action [\'add\' | \'modify\'] that needs to be perfomed for this entry \'\'\'\n-    query = {entry_type + \'_alias\': alias}\n-    remote_accessions = check_remote_entry(entry_type, query)\n-    if isinstance(remote_accessions, list) and len(remote_accessions) > 0:\n-        print(f\'Found: {entry_type} entry with alias {alias}\')\n-        return \'modify\'\n-    else:\n-        print(f\'No {entry_type} entry found with alias {alias}\')\n-        return \'add\'\n-\n-\n-def extract_data(xl_sheet, expected_columns, optional_cols=None):\n-    """\n-    1. Check that the columns I expect are present in the sheet\n-    (any order and mixed with others, it\'s just a verification that\n-    the user filled the correct template)\n-    2. Fill a dictionary with the rows data indexed by first column in list"""\n-    sheet_columns = {}\n-    if optional_cols is None:\n-        optional_cols = []\n-    optional_cols_loaded = []\n-    for sh_col in range(xl_sheet.ncols):\n-        if (xl_sheet.cell(0, sh_col).value in expected_columns) \\\n-           or (xl_sheet.cell(0, sh_col).value in optional_cols):\n-            if xl_sheet.cell(0, sh_col).value in sheet_columns.keys():\n-                sys.exit("Duplicated columns found")\n-            else:\n-                sheet_columns[xl_sheet.cell(0, sh_col).value] = sh_col\n-                if xl_sheet.cell(0, sh_col).value in optional_cols:\n-                    # store the list of optional cols available\n-                    optional_cols_loaded.append(xl_sheet.cell(0, sh_col).value)\n-    provided_cols = expected_columns + optional_cols_loaded\n-\n-    # check that the required columns are all present\n-    # TODO: revise this for optional columns\n-    for col in range(len(expected_columns)):\n-        assert expected_columns[col] in sheet_columns.keys(), \\\n-            "Expected column %s not found" % expected_columns[col]\n-\n-    # fetch rows in a dict\n-    data_dict = {}\n-    # the first of the expected columns will be the index\n-    index_col = sheet_columns[expected_columns[0]]\n-    # skip first 2 rows: column names + comments rows\n-    for row_id in range(2, xl_sheet.nrows):\n-        row_dict = {}\n-        for col in range(1, len(provided_cols)):\n-            sheet_col_index = sheet_columns[provided_cols[col]]\n-            row_dict[provided_cols[col]] = xl_sheet.cell(row_id, sheet_col_index).value\n-        # should check for duplicate alias/ids?\n-        if xl_sheet.cell(row_id, index_col).value in data_dict.keys():\n-            tmp = data_dict[xl_sheet.cell(row_id, index_col).value]\n-            data_dict[xl_sheet.cell(row_id, index_col).value] = [tmp]\n-            data_dict[xl_sheet.cell(row_id, index_col).value].append(row_dict)\n-        else:\n-            data_dict[xl_sheet.cell(row_id, index_col).value] = row_dict\n-    return data_dict, optional_cols_loaded\n-\n-\n-def paste_xls2yaml(xlsx_path):\n-    print(\'YAML -------------\')\n-    xls = xlrd.open_workbook(xlsx_path)\n-    content_dict = {}\n-    for sheet_name in xls.sheet_names():\n-        if sheet_name == \'controlled_vocabulary\':\n-            continue\n-        xls_sheet = xls.sheet_by_name(sheet_name)\n-        sheet_contents_dict = {}\n-        colnames = []\n-        for col in range(xls_sheet.ncols):\n-            colnames.append(xls_sheet.cell(0, col).value)\n-        # skip first 2 rows (column names and suggestions)\n-        for row_id in range(2, xls_sheet.nrows):\n-            row_dict = {}\n-            for col_id in range(0, xls_sheet.ncols):\n-                row_dict[colnames[col_id]] = xls_sheet.cell(row_id, col_id).value\n-            # should check for duplicate alias/ids?\n-            sheet_contents_dict[row_id] = row_dict\n-        content_dict[sheet_name] = sheet_contents_dict\n-    yaml.dump(content_dic'..b"-' + str(day) + \\\n-                                'T' + str(hour) + ':' + str(minute) + ':' + str(second)\n-                        if optional_col in ('receipt date'):\n-                            # receipt date uses forma: 2008-01-23\n-                            sample[optional_col] = str(year) + '-' + str(month) + '-' + str(day)\n-                # excel stores everything as float so I need to check if\n-                # the value was actually an int and keep it as int\n-                if isinstance(sample[optional_col], float):\n-                    if int(sample[optional_col]) == sample[optional_col]:\n-                        # it is not really a float but an int\n-                        sample[optional_col] = int(sample[optional_col])\n-                samples_row_values.append(str(sample[optional_col]))\n-    samples_table.write('\\t'.join(samples_row_values) + '\\n')\n-\n-    for exp_alias, exp in experiments_dict.items():\n-        # should I check here if any experiment has a study or sample alias that is incorrect?\n-        # (not listed in the samples or study dict)\n-        # process the experiments for this sample\n-        if exp['sample_alias'] == sample_alias:\n-            # check the remote status\n-            if args.dev_submission:\n-                entry_action = args.action\n-            else:\n-                entry_action = identify_action('experiment', exp_alias)\n-            experiments_table.write('\\t'.join([exp_alias, entry_action, 'accession_ena', exp['title'],\n-                                               exp['study_alias'], sample_alias,\n-                                               exp['design_description'], exp['library_name'],\n-                                               exp['library_strategy'], exp['library_source'],\n-                                               exp['library_selection'],\n-                                               exp['library_layout'].lower(),\n-                                               str(int(exp['insert_size'])),\n-                                               exp['library_construction_protocol'],\n-                                               exp['platform'], exp['instrument_model'],\n-                                               'submission_date_ENA']) + '\\n')\n-            exp_included.append(exp_alias)\n-            for run_alias, run in runs_dict.items():\n-                # check that the experiments library_layout is set to paired\n-                # when multiple entries are associated with the same run alias\n-                if not isinstance(run, list):\n-                    runs_list = [run]\n-                else:\n-                    runs_list = run\n-                for run_entry in runs_list:\n-                    if run_entry['experiment_alias'] == exp_alias:\n-                        if args.dev_submission:\n-                            entry_action = args.action\n-                        else:\n-                            entry_action = identify_action('run', run_alias)\n-                        runs_table.write('\\t'.join([run_alias, entry_action, 'ena_run_accession',\n-                                                    exp_alias, run_entry['file_name'],\n-                                                    FILE_FORMAT, '',\n-                                                    'submission_date_ENA']) + '\\n')\n-                runs_included.append(run_alias)\n-\n-# check if any experiment or run was not associated with any sample\n-for run in runs_dict.keys():\n-    if run not in runs_included:\n-        print(f'The run {run} is listed in the runs section but not associated with any \\\n-              used experiment')\n-\n-for exp in experiments_dict.keys():\n-    if exp not in exp_included:\n-        print(f'The experiment {exp} is listed in the experiments section but not associated \\\n-              with any used sample')\n-\n-studies_table.close()\n-samples_table.close()\n-experiments_table.close()\n-runs_table.close()\n-\n-if args.verbose:\n-    paste_xls2yaml(args.xlsx_path)\n"
b
diff -r 4aab5ae907b6 -r 6f6537780379 samples_macros.xml
--- a/samples_macros.xml Mon Nov 15 11:47:13 2021 +0000
+++ b/samples_macros.xml Thu Feb 24 18:20:40 2022 +0000
b
b'@@ -1,4 +1,48 @@\n <macros>\n+    <xml name="checklist_input_macro">\n+        <param type="select" name="checklist_id" label="Select the metadata checklist" help="You can find metadata templates for each checklist at: https://github.com/ELIXIR-Belgium/ENA-metadata-templates">\n+            <option value="ERC000011">ENA default sample checklist (ERC000011)</option>\n+            <option value="ERC000012">GSC MIxS air (ERC000012)</option>\n+            <option value="ERC000013">GSC MIxS host associated (ERC000013)</option>\n+            <option value="ERC000014">GSC MIxS human associated (ERC000014)</option>\n+            <option value="ERC000015">GSC MIxS human gut (ERC000015)</option>\n+            <option value="ERC000016">GSC MIxS human oral (ERC000016)</option>\n+            <option value="ERC000017">GSC MIxS human skin (ERC000017)</option>\n+            <option value="ERC000018">GSC MIxS human vaginal (ERC000018)</option>\n+            <option value="ERC000019">GSC MIxS microbial mat biolfilm (ERC000019)</option>\n+            <option value="ERC000020">GSC MIxS plant associated (ERC000020)</option>\n+            <option value="ERC000021">GSC MIxS sediment (ERC000021)</option>\n+            <option value="ERC000022">GSC MIxS soil (ERC000022)</option>\n+            <option value="ERC000023">GSC MIxS wastewater sludge (ERC000023)</option>\n+            <option value="ERC000024">GSC MIxS water (ERC000024)</option>\n+            <option value="ERC000025">GSC MIxS miscellaneous natural or artificial environment (ERC000025)</option>\n+            <option value="ERC000027">ENA Micro B3 (ERC000027)</option>\n+            <option value="ERC000028">ENA prokaryotic pathogen minimal sample checklist (ERC000028)</option>\n+            <option value="ERC000029">ENA Global Microbial Identifier reporting standard checklist GMI_MDM:1.1 (ERC000029)</option>\n+            <option value="ERC000030">ENA Tara Oceans (ERC000030)</option>\n+            <option value="ERC000031">GSC MIxS built environment (ERC000031)</option>\n+            <option value="ERC000032">ENA Influenza virus reporting standard checklist (ERC000032)</option>\n+            <option value="ERC000033">ENA virus pathogen reporting standard checklist (ERC000033)</option>\n+            <option value="ERC000034">ENA mutagenesis by carcinogen treatment checklist (ERC000034)</option>\n+            <option value="ERC000035">ENA Crop Plant sample enhanced annotation checklist (ERC000035)</option>\n+            <option value="ERC000036">ENA sewage checklist (ERC000036)</option>\n+            <option value="ERC000037">ENA Plant Sample Checklist (ERC000037)</option>\n+            <option value="ERC000038">ENA Shellfish Checklist (ERC000038)</option>\n+            <option value="ERC000039">ENA parasite sample checklist (ERC000039)</option>\n+            <option value="ERC000040">ENA UniEuk_EukBank Checklist (ERC000040)</option>\n+            <option value="ERC000041">ENA Global Microbial Identifier Proficiency Test (GMI PT) checklist (ERC000041)</option>\n+            <option value="ERC000043">ENA Marine Microalgae Checklist (ERC000043)</option>\n+            <option value="ERC000044">COMPARE-ECDC-EFSA pilot human-associated reporting standard (ERC000044)</option>\n+            <option value="ERC000045">COMPARE-ECDC-EFSA pilot food-associated reporting standard (ERC000045)</option>\n+            <option value="ERC000047">GSC MIMAGS (ERC000047)</option>\n+            <option value="ERC000048">GSC MISAGS (ERC000048)</option>\n+            <option value="ERC000049">GSC MIUVIGS (ERC000049)</option>\n+            <option value="ERC000050">ENA binned metagenome (ERC000050)</option>\n+            <option value="ERC000051">PDX Checklist (ERC000051)</option>\n+            <option value="ERC000052">HoloFood Checklist (ERC000052)</option>\n+            <option value="ERC000053">Tree of Life Checklist (ERC000053)</option>\n+        </param>\n+    </xml>\n     <xml name="test_submit_section">\n         <section name="test_submit_parameters" expa'..b'le_inputs_macro">\n         <conditional name="input_format_conditional">\n             <param name="input_format" type="select" label="Would you like to submit pregenerated table files or interactively define the input structures?">\n-                <option value="excel_tables" selected="True">User generated metadata tables based on Excel templates</option>\n+                <option value="excel_tables" selected="True">User generated metadata tables based on Excel template</option>\n                 <option value="build_tables" selected="False">Interactive generation of the study structure (recommended for small studies)</option>\n                 <option value="user_generated_tables" selected="False">User generated tabular files (studies - samples - experiments - runs) </option>\n             </param>\n             <when value="excel_tables">\n-                <param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />\n-                <param name="xlsx_file" type="data" format="xlsx" label="Select Excel (xlsx) file based on templates" />\n+                <!--<param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />-->\n+                <expand macro="checklist_input_macro"/>\n+                <param name="xlsx_file" type="data" format="xlsx" label="Select Excel (xlsx) file based on template" />\n                 <expand macro="run_inputs_macro" />\n             </when>\n             <when value="user_generated_tables">\n-                <param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />\n+                <!--<param name="viral_submission" type="boolean" label="Does your submission data belong to a viral sample?" help="If you select yes then your data will be submitted using the ENA virus pathogen reporting standard checklist (see: https://ena-browser-docs.readthedocs.io/en/latest/help_and_guides/sars-cov-2-submissions.html)" />-->\n+                <expand macro="checklist_input_macro"/>\n                 <expand macro="run_inputs_macro" />\n                 <param name="studies_users_table" type="data" format="tabular" multiple="false" label="Studies table" help="Studies metadata file"/>\n                 <param name="samples_users_table" type="data" format="tabular" multiple="false" label="Samples table" help="Samples metadata file"/>\n@@ -161,7 +207,7 @@\n             <param name="sample_title" type="text" label="Sample title"/>\n             <param name="sample_description" type="text" help="e.g: liver cells" label="Describe the type of sample"/>\n             <param name="scientific_name" type="text" label="Enter the species of the sample" help="e.g Severe acute respiratory syndrome coronavirus 2"/>\n-            <param name="tax_id" type="text" label="Enter the taxonomic ID corresponding to the sample species" />\n+            <param name="tax_id" type="integer" value="0" label="Enter the taxonomic ID corresponding to the sample species" />\n             <repeat name="rep_experiment" title="Sequencing experiments performed with this sample" min="1" >\n                 <param name="experiment_title" type="text" label="Specify an experiment title" />\n                 <param name="experiment_design" type="text" label="Describe the experiment design" />\n'
b
diff -r 4aab5ae907b6 -r 6f6537780379 test-data/C026_exp5_clean.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/C026_exp5_clean.fastq Thu Feb 24 18:20:40 2022 +0000
b
b'@@ -0,0 +1,400 @@\n+@M01368:8:000000000-A3GHV:1:1101:6911:8255/1\n+ATCTGGTTCCTACTTCAGGGCCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATT\n++\n+BCCCCFFFFFFFGGGGGGGGGGGHHHHGHGHHHHHHHHHGGGGGGHHHHGHHHHHHHHHHGHHHHHHGGHGGHHHGHHHHFHHGHHHHHHHHHGHEHEFFGHHEGGCEFGGFHHHBGHHGHHHHGHFHHHGHGHGHGGCDFDDACGGGGGGGAAFFFFFFFFFBAFFFFFB;FFFFFFADDFFFFFFFFFFEFFFFFFFFFFBFFFFFFFFFFFFFFEFFFFFFFFBFEFFFFEFE;DFFFDFBFF/9BFB\n+@M01368:8:000000000-A3GHV:1:1101:14518:9998/1\n+GTTATTATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGAT\n++\n+AAAAAFFFFFFFGGGGGGGGGGHGGHHHHGHHHHHHHGCGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHGHHGFHFE5BGEEHFGGGHHHHHHHHFBHHGGGGFHGHHFGHHHHGHHHHHHGEGGGGFHFHGEGHHGGCDGDGHGGGDGGHGGCGGGHGHHH/ACDG?.1FGCDCCGCA.CC@CDCHFHGFFGGGEBFGAB//CEFBFGG.:;D;;A0AFFFFFB..:@ABFF//;BFFFFFBF/9D:A//\n+@M01368:8:000000000-A3GHV:1:1101:18422:19051/1\n+GTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACG\n++\n+CCCCCFDDDDDFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHFHHHHGGGGHHHHHHHHHGHHHHHHHHHHHHGGHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHGCGGGHHHHHHHHHHHHHHHHHHHHHHGFDHGFHCFGGGGFGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF;FFFFFFFFFFFFFFFFFFFFFFFFFFFFEFBFFFFFFFFFF:FFF.\n+@M01368:8:000000000-A3GHV:1:1101:25545:21098/1\n+ATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATAAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGGCTTATTTAAGGGGAACGGGTGGGG\n++\n+33AA?DFD5BDFGGGFEBDGEGHEGHGEGHCEGGHHCHGHHFFHHGFGAGE53FF2FAFFGDE5FFFE5GFBFGAEE1GHHHGHHHEHE3FGHF@GEGEGGHHGG3FAGFFDE?EEE3GFEGFGFGGCG?GHHHFHGGGC@DHFFHD/A<C@EGFDCGGGHFHHHEGFGHBFHG0:CEHFCHGGED.;0CEF.F99B0CFFEEFGGG0FBFBBF0F/FFBDE?/9//9B.FFBFFFFFFBF..A..;@B--\n+@M01368:8:000000000-A3GHV:1:1101:5446:12248/1\n+AATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTAT\n++\n+CCCCDFFFFCCFGGGGGGGGFGHHHHHGGGGHHHHHHHHHHHHHHHHGBGHGGHGGHHHHHHHHHHGHGHGGGGGHHHHHHHHGHHHHHHHHHGGGGGHHHHFFGHHHGGGGGGHHHGFGGHHGGGGHHHHHHGGGGGGHGHHGGGGGGGHGGGGGGHHHHHHHHHHHHHFHGGGHHHHGGGGGG:FE;EGEGGGGG/;?FGGGGGGGFFFFGGFFFFFFFFFBFFFFFFFFFFBFFFFFFEFFFFFEFFF\n+@M01368:8:000000000-A3GHV:1:1101:5861:6452/1\n+ATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTT\n++\n+ABCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHGHHHHGHHHHHHHHHHHGGGGHHHHHHHHFHHHHHHGGHGHGGHGGHHHHHHHGGHFHHHGGGGGHHHHHHHHHHHHHHHHHHGGGGGHHHHHEGGHHGGGGGGHHHGGGGHGGGGGHHHHHHGGGDCGHHHHGGGGGGGHEFGGGGHGHHHGHGGGFGGGGGGGEGGGGGGG?E0CEFGGGGGFEE9EEFFFFFBFFFFFFFBFFBD.AFFFFFFF0\n+@M01368:8:000000000-A3GHV:1:1102:10403:6021/1\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTG\n++\n+>A@AAAAD2ADDFFGGGGGFGGHA?EAEFBEAGHFABFGG5FDF5DB1EEGAFDFB53FF5FH@G5FFEHGHEFHFFHBE333GF43GCGGGGE@0?BFGGB0B?FHGFDGGHHHBFFDEGGHGFFFDFE@<1>@FFFGHHHHFHEFGDABFFGG/@DCE<CG1<GF0/DD000=<DHGBDFDCECE/:AC?-;-;9B/ABBB-AD9BFB99AB?BDFBAD-.9..@;=;;..9..9/9;BEF;A:9/BFF\n+@M01368:8:000000000-A3GHV:1:1102:10677:23253/1\n+CCTTAAATAAGACATCACGATGGATCACAGGTCTATCACC'..b'FHGDF@@?CGFHCEGGGFD.CCC?EGHBHHHFHHFBCFFGEB/CEGGGGDAA.90C9CEBFGGBBF/9.9FBFFFBBFF//99FFFFEABF//99FFEFFFBFF\n+@M01368:8:000000000-A3GHV:1:1113:5741:16959/1\n+TAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGA\n++\n+ABBBBFFFFFFFGGGFGGGGGGHHHGHHGGHBGHGAGFFCAFGHGFFGHHGFHHHHHGGGGGHGHHHHHHHHE3BFFHHHGG0EDF@GHFFGGGHGGGGGGGGGGGGGHHGGEEFHGFHHDDG@DGGGHHGDGGGGGHGG?CF?HHGHHHGHGHHHFFHGGGHHHHGGCD.;0<C;CGGGGEFF/.;0;FFFBF/0;0CFGFFB..9B/;0CBFFBBFFFFBAC?DED9;B9AD;.FFFB/B/;FBA/B//\n+@M01368:8:000000000-A3GHV:1:1114:10130:11959/1\n+CGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTCTGATCTGTCTCTTATACACATCTCCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTGCAACAAACACACATCCAGA\n++\n+>A33>FFFFFF@FFFGGGGFGGD4EGGGGGHHGFFGHFGGHHHFEFHECFHHHEHGAGBCCGCCEGGHGHEGFBFHFHHHHGGGHFHGHEGGGFEGEGG??@DBGHGGC-@FFGHFHFHB-??DA-DD@9:BEBG@B?E?A.ABBFBFA??F.FF/BFFB/B9B/9;BF9FF9FFFFFFFFFFFFFF?BB?;9EE;-;DDB9//;B-B@;ABFFEFFFF/-.9A.;//9/BF./B/..9.9:...//////\n+@M01368:8:000000000-A3GHV:1:1114:14540:5315/1\n+CTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGGGGGCTATTTAGGTTTTT\n++\n+AABCCFFFFFFFGGGGGGGGGGHHHHHHHFHHHHGHHGHHGGGHGGHHHHHHHGHHHHHHGGGGGHHFHHHFGHHGGFHHHHHGGGGGHHHGHGGHHHGGGGGGHGHGGGGHHGGGGHHHHHEGDDFGFFFHHGGGGGCDAFCFGFDHHHHGGHGHHHHHHBCGEHHHHGGHG.ACGEHGG0CBFFF:A;BB0;09CGF00CFFFE0AA?//CFFFFFFFFFFFFFFFBEF;A.-=A--:BBFB90;;FE.\n+@M01368:8:000000000-A3GHV:1:1114:15066:16302/1\n+TAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAACGGTTGTTAATTAATTATTGCTTGTAGGACA\n++\n+BBBBAFFBDFBFBGGGGGFGGGBGFFFHGFHHGFFFHGHHHGHHHHFFHHHGHGC?AEFFHEFBFFFGHHHHH1FDGFFHGHGHFEGCGC-<<AHHHGGGGGGGFHH0GHFCCCADGGG?.9/A-???DGGFFF.9F9/EE-;;BBBFFBFFFFFFFFFEFFFFBFFBBFFFFF/BFFBFFFFF-DBFFF;/BFF//BB//9/BEA---9:BFFFFFF/F/.;.:@9.BBFF/;BFF/;/////9/////.\n+@M01368:8:000000000-A3GHV:1:1114:16639:15258/1\n+CCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGCGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTCCAATATTACAGGCGAACATACTTACTAAATTGTGT\n++\n+11>>ABFFFFFFGGCGC1BBF1GHHHBHHHHGFHGH1A100AA/GGEHGFBAA1FFD2FHHHHECG2F1BB/E0FC/A>EE/FGGFGEF1EGBBEHHCGGGHBGEHFHE0B?E--;C?CCGGG@A@GBFBBBB09?@??A-AB--ABE@=-=-9AE9A;FFFFFE=9-//;//;9FF/////;;///9B///;/B////9FFBB;--A@-;/9;9-:-/;;FFFE9BF?@;-9-99/B9F/://///99/;\n+@M01368:8:000000000-A3GHV:1:1114:2404:13066/1\n+TCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCTGTCTATTATACACATC\n++\n+CCCCCFFFFCFFGGGGGGGGGGHHHHHGHHHHHHHHHFFHHHHHGGGGHHHHHHHHFHHHHHHFGGHHGGHGGHHHHHHGHHFHHHHGGGGGGHHHHHHGHHHHHHHHHHGGGGGGGHH?FGHHHGGGGGGHHGGFGGHHGGGGHHHHHFGGGGFGHGHHGGGGGGGHGGGEGGHHGHHHHHHHHHGFBFFDA0FGGGFFGG0:EFGGGGGGGG;AEBF0B0BFFBFFFFFFFFFFFFFFFFFFFFFEFF0\n+@M01368:8:000000000-A3GHV:1:1114:9184:6959/1\n+GGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGACCCTGAAGTAGGAACCAGATGTCGGATACAGTTCACTTTCTGTCTCTTA\n++\n+AABBBFFFCCCBFGGGGGGGGGHHHHHHHHGGGGGGHHHG3FFHHHFGFGGGHHHGGGEHHGGGGHHHHHHGGGGGGHGHGGGGGGGDEGGGGEGGFHHHHHHHHHHHHGGGFGEHHGGFDGGGDFFGFHHHHGFCFHHHHHEFHFHGGFFGHHGGGHHHHDGHHHFHHHFFFFGFGGG.EFGGGGFGEBFGGGFGFGGGGFFBFGGBBFFFFFB/FEFF?///;A::AABBFFFBFFFFFFFFFBFFFF/\n'
b
diff -r 4aab5ae907b6 -r 6f6537780379 test-data/C030_exp5_clean.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/C030_exp5_clean.fastq Thu Feb 24 18:20:40 2022 +0000
b
b'@@ -0,0 +1,400 @@\n+@M01368:8:000000000-A3GHV:1:1101:6911:8255/1\n+ATCTGGTTCCTACTTCAGGGCCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATT\n++\n+BCCCCFFFFFFFGGGGGGGGGGGHHHHGHGHHHHHHHHHGGGGGGHHHHGHHHHHHHHHHGHHHHHHGGHGGHHHGHHHHFHHGHHHHHHHHHGHEHEFFGHHEGGCEFGGFHHHBGHHGHHHHGHFHHHGHGHGHGGCDFDDACGGGGGGGAAFFFFFFFFFBAFFFFFB;FFFFFFADDFFFFFFFFFFEFFFFFFFFFFBFFFFFFFFFFFFFFEFFFFFFFFBFEFFFFEFE;DFFFDFBFF/9BFB\n+@M01368:8:000000000-A3GHV:1:1101:14518:9998/1\n+GTTATTATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGAT\n++\n+AAAAAFFFFFFFGGGGGGGGGGHGGHHHHGHHHHHHHGCGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHGHHGFHFE5BGEEHFGGGHHHHHHHHFBHHGGGGFHGHHFGHHHHGHHHHHHGEGGGGFHFHGEGHHGGCDGDGHGGGDGGHGGCGGGHGHHH/ACDG?.1FGCDCCGCA.CC@CDCHFHGFFGGGEBFGAB//CEFBFGG.:;D;;A0AFFFFFB..:@ABFF//;BFFFFFBF/9D:A//\n+@M01368:8:000000000-A3GHV:1:1101:18422:19051/1\n+GTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACG\n++\n+CCCCCFDDDDDFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHFHHHHGGGGHHHHHHHHHGHHHHHHHHHHHHGGHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHGCGGGHHHHHHHHHHHHHHHHHHHHHHGFDHGFHCFGGGGFGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF;FFFFFFFFFFFFFFFFFFFFFFFFFFFFEFBFFFFFFFFFF:FFF.\n+@M01368:8:000000000-A3GHV:1:1101:25545:21098/1\n+ATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATAAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGGCTTATTTAAGGGGAACGGGTGGGG\n++\n+33AA?DFD5BDFGGGFEBDGEGHEGHGEGHCEGGHHCHGHHFFHHGFGAGE53FF2FAFFGDE5FFFE5GFBFGAEE1GHHHGHHHEHE3FGHF@GEGEGGHHGG3FAGFFDE?EEE3GFEGFGFGGCG?GHHHFHGGGC@DHFFHD/A<C@EGFDCGGGHFHHHEGFGHBFHG0:CEHFCHGGED.;0CEF.F99B0CFFEEFGGG0FBFBBF0F/FFBDE?/9//9B.FFBFFFFFFBF..A..;@B--\n+@M01368:8:000000000-A3GHV:1:1101:5446:12248/1\n+AATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTAT\n++\n+CCCCDFFFFCCFGGGGGGGGFGHHHHHGGGGHHHHHHHHHHHHHHHHGBGHGGHGGHHHHHHHHHHGHGHGGGGGHHHHHHHHGHHHHHHHHHGGGGGHHHHFFGHHHGGGGGGHHHGFGGHHGGGGHHHHHHGGGGGGHGHHGGGGGGGHGGGGGGHHHHHHHHHHHHHFHGGGHHHHGGGGGG:FE;EGEGGGGG/;?FGGGGGGGFFFFGGFFFFFFFFFBFFFFFFFFFFBFFFFFFEFFFFFEFFF\n+@M01368:8:000000000-A3GHV:1:1101:5861:6452/1\n+ATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTT\n++\n+ABCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHGHHHHGHHHHHHHHHHHGGGGHHHHHHHHFHHHHHHGGHGHGGHGGHHHHHHHGGHFHHHGGGGGHHHHHHHHHHHHHHHHHHGGGGGHHHHHEGGHHGGGGGGHHHGGGGHGGGGGHHHHHHGGGDCGHHHHGGGGGGGHEFGGGGHGHHHGHGGGFGGGGGGGEGGGGGGG?E0CEFGGGGGFEE9EEFFFFFBFFFFFFFBFFBD.AFFFFFFF0\n+@M01368:8:000000000-A3GHV:1:1102:10403:6021/1\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTG\n++\n+>A@AAAAD2ADDFFGGGGGFGGHA?EAEFBEAGHFABFGG5FDF5DB1EEGAFDFB53FF5FH@G5FFEHGHEFHFFHBE333GF43GCGGGGE@0?BFGGB0B?FHGFDGGHHHBFFDEGGHGFFFDFE@<1>@FFFGHHHHFHEFGDABFFGG/@DCE<CG1<GF0/DD000=<DHGBDFDCECE/:AC?-;-;9B/ABBB-AD9BFB99AB?BDFBAD-.9..@;=;;..9..9/9;BEF;A:9/BFF\n+@M01368:8:000000000-A3GHV:1:1102:10677:23253/1\n+CCTTAAATAAGACATCACGATGGATCACAGGTCTATCACC'..b'FHGDF@@?CGFHCEGGGFD.CCC?EGHBHHHFHHFBCFFGEB/CEGGGGDAA.90C9CEBFGGBBF/9.9FBFFFBBFF//99FFFFEABF//99FFEFFFBFF\n+@M01368:8:000000000-A3GHV:1:1113:5741:16959/1\n+TAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGA\n++\n+ABBBBFFFFFFFGGGFGGGGGGHHHGHHGGHBGHGAGFFCAFGHGFFGHHGFHHHHHGGGGGHGHHHHHHHHE3BFFHHHGG0EDF@GHFFGGGHGGGGGGGGGGGGGHHGGEEFHGFHHDDG@DGGGHHGDGGGGGHGG?CF?HHGHHHGHGHHHFFHGGGHHHHGGCD.;0<C;CGGGGEFF/.;0;FFFBF/0;0CFGFFB..9B/;0CBFFBBFFFFBAC?DED9;B9AD;.FFFB/B/;FBA/B//\n+@M01368:8:000000000-A3GHV:1:1114:10130:11959/1\n+CGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTCTGATCTGTCTCTTATACACATCTCCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTGCAACAAACACACATCCAGA\n++\n+>A33>FFFFFF@FFFGGGGFGGD4EGGGGGHHGFFGHFGGHHHFEFHECFHHHEHGAGBCCGCCEGGHGHEGFBFHFHHHHGGGHFHGHEGGGFEGEGG??@DBGHGGC-@FFGHFHFHB-??DA-DD@9:BEBG@B?E?A.ABBFBFA??F.FF/BFFB/B9B/9;BF9FF9FFFFFFFFFFFFFF?BB?;9EE;-;DDB9//;B-B@;ABFFEFFFF/-.9A.;//9/BF./B/..9.9:...//////\n+@M01368:8:000000000-A3GHV:1:1114:14540:5315/1\n+CTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGGGGGCTATTTAGGTTTTT\n++\n+AABCCFFFFFFFGGGGGGGGGGHHHHHHHFHHHHGHHGHHGGGHGGHHHHHHHGHHHHHHGGGGGHHFHHHFGHHGGFHHHHHGGGGGHHHGHGGHHHGGGGGGHGHGGGGHHGGGGHHHHHEGDDFGFFFHHGGGGGCDAFCFGFDHHHHGGHGHHHHHHBCGEHHHHGGHG.ACGEHGG0CBFFF:A;BB0;09CGF00CFFFE0AA?//CFFFFFFFFFFFFFFFBEF;A.-=A--:BBFB90;;FE.\n+@M01368:8:000000000-A3GHV:1:1114:15066:16302/1\n+TAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAACGGTTGTTAATTAATTATTGCTTGTAGGACA\n++\n+BBBBAFFBDFBFBGGGGGFGGGBGFFFHGFHHGFFFHGHHHGHHHHFFHHHGHGC?AEFFHEFBFFFGHHHHH1FDGFFHGHGHFEGCGC-<<AHHHGGGGGGGFHH0GHFCCCADGGG?.9/A-???DGGFFF.9F9/EE-;;BBBFFBFFFFFFFFFEFFFFBFFBBFFFFF/BFFBFFFFF-DBFFF;/BFF//BB//9/BEA---9:BFFFFFF/F/.;.:@9.BBFF/;BFF/;/////9/////.\n+@M01368:8:000000000-A3GHV:1:1114:16639:15258/1\n+CCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGCGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTCCAATATTACAGGCGAACATACTTACTAAATTGTGT\n++\n+11>>ABFFFFFFGGCGC1BBF1GHHHBHHHHGFHGH1A100AA/GGEHGFBAA1FFD2FHHHHECG2F1BB/E0FC/A>EE/FGGFGEF1EGBBEHHCGGGHBGEHFHE0B?E--;C?CCGGG@A@GBFBBBB09?@??A-AB--ABE@=-=-9AE9A;FFFFFE=9-//;//;9FF/////;;///9B///;/B////9FFBB;--A@-;/9;9-:-/;;FFFE9BF?@;-9-99/B9F/://///99/;\n+@M01368:8:000000000-A3GHV:1:1114:2404:13066/1\n+TCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCTGTCTATTATACACATC\n++\n+CCCCCFFFFCFFGGGGGGGGGGHHHHHGHHHHHHHHHFFHHHHHGGGGHHHHHHHHFHHHHHHFGGHHGGHGGHHHHHHGHHFHHHHGGGGGGHHHHHHGHHHHHHHHHHGGGGGGGHH?FGHHHGGGGGGHHGGFGGHHGGGGHHHHHFGGGGFGHGHHGGGGGGGHGGGEGGHHGHHHHHHHHHGFBFFDA0FGGGFFGG0:EFGGGGGGGG;AEBF0B0BFFBFFFFFFFFFFFFFFFFFFFFFEFF0\n+@M01368:8:000000000-A3GHV:1:1114:9184:6959/1\n+GGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGACCCTGAAGTAGGAACCAGATGTCGGATACAGTTCACTTTCTGTCTCTTA\n++\n+AABBBFFFCCCBFGGGGGGGGGHHHHHHHHGGGGGGHHHG3FFHHHFGFGGGHHHGGGEHHGGGGHHHHHHGGGGGGHGHGGGGGGGDEGGGGEGGFHHHHHHHHHHHHGGGFGEHHGGFDGGGDFFGFHHHHGFCFHHHHHEFHFHGGFFGHHGGGHHHHDGHHHFHHHFFFFGFGGG.EFGGGGFGEBFGGGFGFGGGGFFBFGGBBFFFFFB/FEFF?///;A::AABBFFFBFFFFFFFFFBFFFF/\n'
b
diff -r 4aab5ae907b6 -r 6f6537780379 test-data/C053_exp5_clean.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/C053_exp5_clean.fastq Thu Feb 24 18:20:40 2022 +0000
b
b'@@ -0,0 +1,400 @@\n+@M01368:8:000000000-A3GHV:1:1101:6911:8255/1\n+ATCTGGTTCCTACTTCAGGGCCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATT\n++\n+BCCCCFFFFFFFGGGGGGGGGGGHHHHGHGHHHHHHHHHGGGGGGHHHHGHHHHHHHHHHGHHHHHHGGHGGHHHGHHHHFHHGHHHHHHHHHGHEHEFFGHHEGGCEFGGFHHHBGHHGHHHHGHFHHHGHGHGHGGCDFDDACGGGGGGGAAFFFFFFFFFBAFFFFFB;FFFFFFADDFFFFFFFFFFEFFFFFFFFFFBFFFFFFFFFFFFFFEFFFFFFFFBFEFFFFEFE;DFFFDFBFF/9BFB\n+@M01368:8:000000000-A3GHV:1:1101:14518:9998/1\n+GTTATTATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGAT\n++\n+AAAAAFFFFFFFGGGGGGGGGGHGGHHHHGHHHHHHHGCGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHGHHGFHFE5BGEEHFGGGHHHHHHHHFBHHGGGGFHGHHFGHHHHGHHHHHHGEGGGGFHFHGEGHHGGCDGDGHGGGDGGHGGCGGGHGHHH/ACDG?.1FGCDCCGCA.CC@CDCHFHGFFGGGEBFGAB//CEFBFGG.:;D;;A0AFFFFFB..:@ABFF//;BFFFFFBF/9D:A//\n+@M01368:8:000000000-A3GHV:1:1101:18422:19051/1\n+GTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACG\n++\n+CCCCCFDDDDDFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHFHHHHGGGGHHHHHHHHHGHHHHHHHHHHHHGGHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHGCGGGHHHHHHHHHHHHHHHHHHHHHHGFDHGFHCFGGGGFGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF;FFFFFFFFFFFFFFFFFFFFFFFFFFFFEFBFFFFFFFFFF:FFF.\n+@M01368:8:000000000-A3GHV:1:1101:25545:21098/1\n+ATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATAAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGGCTTATTTAAGGGGAACGGGTGGGG\n++\n+33AA?DFD5BDFGGGFEBDGEGHEGHGEGHCEGGHHCHGHHFFHHGFGAGE53FF2FAFFGDE5FFFE5GFBFGAEE1GHHHGHHHEHE3FGHF@GEGEGGHHGG3FAGFFDE?EEE3GFEGFGFGGCG?GHHHFHGGGC@DHFFHD/A<C@EGFDCGGGHFHHHEGFGHBFHG0:CEHFCHGGED.;0CEF.F99B0CFFEEFGGG0FBFBBF0F/FFBDE?/9//9B.FFBFFFFFFBF..A..;@B--\n+@M01368:8:000000000-A3GHV:1:1101:5446:12248/1\n+AATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTAT\n++\n+CCCCDFFFFCCFGGGGGGGGFGHHHHHGGGGHHHHHHHHHHHHHHHHGBGHGGHGGHHHHHHHHHHGHGHGGGGGHHHHHHHHGHHHHHHHHHGGGGGHHHHFFGHHHGGGGGGHHHGFGGHHGGGGHHHHHHGGGGGGHGHHGGGGGGGHGGGGGGHHHHHHHHHHHHHFHGGGHHHHGGGGGG:FE;EGEGGGGG/;?FGGGGGGGFFFFGGFFFFFFFFFBFFFFFFFFFFBFFFFFFEFFFFFEFFF\n+@M01368:8:000000000-A3GHV:1:1101:5861:6452/1\n+ATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTT\n++\n+ABCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHGHHHHGHHHHHHHHHHHGGGGHHHHHHHHFHHHHHHGGHGHGGHGGHHHHHHHGGHFHHHGGGGGHHHHHHHHHHHHHHHHHHGGGGGHHHHHEGGHHGGGGGGHHHGGGGHGGGGGHHHHHHGGGDCGHHHHGGGGGGGHEFGGGGHGHHHGHGGGFGGGGGGGEGGGGGGG?E0CEFGGGGGFEE9EEFFFFFBFFFFFFFBFFBD.AFFFFFFF0\n+@M01368:8:000000000-A3GHV:1:1102:10403:6021/1\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTG\n++\n+>A@AAAAD2ADDFFGGGGGFGGHA?EAEFBEAGHFABFGG5FDF5DB1EEGAFDFB53FF5FH@G5FFEHGHEFHFFHBE333GF43GCGGGGE@0?BFGGB0B?FHGFDGGHHHBFFDEGGHGFFFDFE@<1>@FFFGHHHHFHEFGDABFFGG/@DCE<CG1<GF0/DD000=<DHGBDFDCECE/:AC?-;-;9B/ABBB-AD9BFB99AB?BDFBAD-.9..@;=;;..9..9/9;BEF;A:9/BFF\n+@M01368:8:000000000-A3GHV:1:1102:10677:23253/1\n+CCTTAAATAAGACATCACGATGGATCACAGGTCTATCACC'..b'FHGDF@@?CGFHCEGGGFD.CCC?EGHBHHHFHHFBCFFGEB/CEGGGGDAA.90C9CEBFGGBBF/9.9FBFFFBBFF//99FFFFEABF//99FFEFFFBFF\n+@M01368:8:000000000-A3GHV:1:1113:5741:16959/1\n+TAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGA\n++\n+ABBBBFFFFFFFGGGFGGGGGGHHHGHHGGHBGHGAGFFCAFGHGFFGHHGFHHHHHGGGGGHGHHHHHHHHE3BFFHHHGG0EDF@GHFFGGGHGGGGGGGGGGGGGHHGGEEFHGFHHDDG@DGGGHHGDGGGGGHGG?CF?HHGHHHGHGHHHFFHGGGHHHHGGCD.;0<C;CGGGGEFF/.;0;FFFBF/0;0CFGFFB..9B/;0CBFFBBFFFFBAC?DED9;B9AD;.FFFB/B/;FBA/B//\n+@M01368:8:000000000-A3GHV:1:1114:10130:11959/1\n+CGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTCTGATCTGTCTCTTATACACATCTCCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTGCAACAAACACACATCCAGA\n++\n+>A33>FFFFFF@FFFGGGGFGGD4EGGGGGHHGFFGHFGGHHHFEFHECFHHHEHGAGBCCGCCEGGHGHEGFBFHFHHHHGGGHFHGHEGGGFEGEGG??@DBGHGGC-@FFGHFHFHB-??DA-DD@9:BEBG@B?E?A.ABBFBFA??F.FF/BFFB/B9B/9;BF9FF9FFFFFFFFFFFFFF?BB?;9EE;-;DDB9//;B-B@;ABFFEFFFF/-.9A.;//9/BF./B/..9.9:...//////\n+@M01368:8:000000000-A3GHV:1:1114:14540:5315/1\n+CTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGGGGGCTATTTAGGTTTTT\n++\n+AABCCFFFFFFFGGGGGGGGGGHHHHHHHFHHHHGHHGHHGGGHGGHHHHHHHGHHHHHHGGGGGHHFHHHFGHHGGFHHHHHGGGGGHHHGHGGHHHGGGGGGHGHGGGGHHGGGGHHHHHEGDDFGFFFHHGGGGGCDAFCFGFDHHHHGGHGHHHHHHBCGEHHHHGGHG.ACGEHGG0CBFFF:A;BB0;09CGF00CFFFE0AA?//CFFFFFFFFFFFFFFFBEF;A.-=A--:BBFB90;;FE.\n+@M01368:8:000000000-A3GHV:1:1114:15066:16302/1\n+TAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAACGGTTGTTAATTAATTATTGCTTGTAGGACA\n++\n+BBBBAFFBDFBFBGGGGGFGGGBGFFFHGFHHGFFFHGHHHGHHHHFFHHHGHGC?AEFFHEFBFFFGHHHHH1FDGFFHGHGHFEGCGC-<<AHHHGGGGGGGFHH0GHFCCCADGGG?.9/A-???DGGFFF.9F9/EE-;;BBBFFBFFFFFFFFFEFFFFBFFBBFFFFF/BFFBFFFFF-DBFFF;/BFF//BB//9/BEA---9:BFFFFFF/F/.;.:@9.BBFF/;BFF/;/////9/////.\n+@M01368:8:000000000-A3GHV:1:1114:16639:15258/1\n+CCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGCGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTCCAATATTACAGGCGAACATACTTACTAAATTGTGT\n++\n+11>>ABFFFFFFGGCGC1BBF1GHHHBHHHHGFHGH1A100AA/GGEHGFBAA1FFD2FHHHHECG2F1BB/E0FC/A>EE/FGGFGEF1EGBBEHHCGGGHBGEHFHE0B?E--;C?CCGGG@A@GBFBBBB09?@??A-AB--ABE@=-=-9AE9A;FFFFFE=9-//;//;9FF/////;;///9B///;/B////9FFBB;--A@-;/9;9-:-/;;FFFE9BF?@;-9-99/B9F/://///99/;\n+@M01368:8:000000000-A3GHV:1:1114:2404:13066/1\n+TCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCTGTCTATTATACACATC\n++\n+CCCCCFFFFCFFGGGGGGGGGGHHHHHGHHHHHHHHHFFHHHHHGGGGHHHHHHHHFHHHHHHFGGHHGGHGGHHHHHHGHHFHHHHGGGGGGHHHHHHGHHHHHHHHHHGGGGGGGHH?FGHHHGGGGGGHHGGFGGHHGGGGHHHHHFGGGGFGHGHHGGGGGGGHGGGEGGHHGHHHHHHHHHGFBFFDA0FGGGFFGG0:EFGGGGGGGG;AEBF0B0BFFBFFFFFFFFFFFFFFFFFFFFFEFF0\n+@M01368:8:000000000-A3GHV:1:1114:9184:6959/1\n+GGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGACCCTGAAGTAGGAACCAGATGTCGGATACAGTTCACTTTCTGTCTCTTA\n++\n+AABBBFFFCCCBFGGGGGGGGGHHHHHHHHGGGGGGHHHG3FFHHHFGFGGGHHHGGGEHHGGGGHHHHHHGGGGGGHGHGGGGGGGDEGGGGEGGFHHHHHHHHHHHHGGGFGEHHGGFDGGGDFFGFHHHHGFCFHHHHHEFHFHGGFFGHHGGGHHHHDGHHHFHHHFFFFGFGGG.EFGGGGFGEBFGGGFGFGGGGFFBFGGBBFFFFFB/FEFF?///;A::AABBFFFBFFFFFFFFFBFFFF/\n'
b
diff -r 4aab5ae907b6 -r 6f6537780379 test-data/C067_exp5_clean.fastq
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/C067_exp5_clean.fastq Thu Feb 24 18:20:40 2022 +0000
b
b'@@ -0,0 +1,400 @@\n+@M01368:8:000000000-A3GHV:1:1101:6911:8255/1\n+ATCTGGTTCCTACTTCAGGGCCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATT\n++\n+BCCCCFFFFFFFGGGGGGGGGGGHHHHGHGHHHHHHHHHGGGGGGHHHHGHHHHHHHHHHGHHHHHHGGHGGHHHGHHHHFHHGHHHHHHHHHGHEHEFFGHHEGGCEFGGFHHHBGHHGHHHHGHFHHHGHGHGHGGCDFDDACGGGGGGGAAFFFFFFFFFBAFFFFFB;FFFFFFADDFFFFFFFFFFEFFFFFFFFFFBFFFFFFFFFFFFFFEFFFFFFFFBFEFFFFEFE;DFFFDFBFF/9BFB\n+@M01368:8:000000000-A3GHV:1:1101:14518:9998/1\n+GTTATTATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGAT\n++\n+AAAAAFFFFFFFGGGGGGGGGGHGGHHHHGHHHHHHHGCGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHGHHGFHFE5BGEEHFGGGHHHHHHHHFBHHGGGGFHGHHFGHHHHGHHHHHHGEGGGGFHFHGEGHHGGCDGDGHGGGDGGHGGCGGGHGHHH/ACDG?.1FGCDCCGCA.CC@CDCHFHGFFGGGEBFGAB//CEFBFGG.:;D;;A0AFFFFFB..:@ABFF//;BFFFFFBF/9D:A//\n+@M01368:8:000000000-A3GHV:1:1101:18422:19051/1\n+GTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACG\n++\n+CCCCCFDDDDDFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHFHHHHGGGGHHHHHHHHHGHHHHHHHHHHHHGGHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHGCGGGHHHHHHHHHHHHHHHHHHHHHHGFDHGFHCFGGGGFGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF;FFFFFFFFFFFFFFFFFFFFFFFFFFFFEFBFFFFFFFFFF:FFF.\n+@M01368:8:000000000-A3GHV:1:1101:25545:21098/1\n+ATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATAAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGGCTTATTTAAGGGGAACGGGTGGGG\n++\n+33AA?DFD5BDFGGGFEBDGEGHEGHGEGHCEGGHHCHGHHFFHHGFGAGE53FF2FAFFGDE5FFFE5GFBFGAEE1GHHHGHHHEHE3FGHF@GEGEGGHHGG3FAGFFDE?EEE3GFEGFGFGGCG?GHHHFHGGGC@DHFFHD/A<C@EGFDCGGGHFHHHEGFGHBFHG0:CEHFCHGGED.;0CEF.F99B0CFFEEFGGG0FBFBBF0F/FFBDE?/9//9B.FFBFFFFFFBF..A..;@B--\n+@M01368:8:000000000-A3GHV:1:1101:5446:12248/1\n+AATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTAT\n++\n+CCCCDFFFFCCFGGGGGGGGFGHHHHHGGGGHHHHHHHHHHHHHHHHGBGHGGHGGHHHHHHHHHHGHGHGGGGGHHHHHHHHGHHHHHHHHHGGGGGHHHHFFGHHHGGGGGGHHHGFGGHHGGGGHHHHHHGGGGGGHGHHGGGGGGGHGGGGGGHHHHHHHHHHHHHFHGGGHHHHGGGGGG:FE;EGEGGGGG/;?FGGGGGGGFFFFGGFFFFFFFFFBFFFFFFFFFFBFFFFFFEFFFFFEFFF\n+@M01368:8:000000000-A3GHV:1:1101:5861:6452/1\n+ATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTT\n++\n+ABCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHGHHHHGHHHHHHHHHHHGGGGHHHHHHHHFHHHHHHGGHGHGGHGGHHHHHHHGGHFHHHGGGGGHHHHHHHHHHHHHHHHHHGGGGGHHHHHEGGHHGGGGGGHHHGGGGHGGGGGHHHHHHGGGDCGHHHHGGGGGGGHEFGGGGHGHHHGHGGGFGGGGGGGEGGGGGGG?E0CEFGGGGGFEE9EEFFFFFBFFFFFFFBFFBD.AFFFFFFF0\n+@M01368:8:000000000-A3GHV:1:1102:10403:6021/1\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTG\n++\n+>A@AAAAD2ADDFFGGGGGFGGHA?EAEFBEAGHFABFGG5FDF5DB1EEGAFDFB53FF5FH@G5FFEHGHEFHFFHBE333GF43GCGGGGE@0?BFGGB0B?FHGFDGGHHHBFFDEGGHGFFFDFE@<1>@FFFGHHHHFHEFGDABFFGG/@DCE<CG1<GF0/DD000=<DHGBDFDCECE/:AC?-;-;9B/ABBB-AD9BFB99AB?BDFBAD-.9..@;=;;..9..9/9;BEF;A:9/BFF\n+@M01368:8:000000000-A3GHV:1:1102:10677:23253/1\n+CCTTAAATAAGACATCACGATGGATCACAGGTCTATCACC'..b'FHGDF@@?CGFHCEGGGFD.CCC?EGHBHHHFHHFBCFFGEB/CEGGGGDAA.90C9CEBFGGBBF/9.9FBFFFBBFF//99FFFFEABF//99FFEFFFBFF\n+@M01368:8:000000000-A3GHV:1:1113:5741:16959/1\n+TAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGA\n++\n+ABBBBFFFFFFFGGGFGGGGGGHHHGHHGGHBGHGAGFFCAFGHGFFGHHGFHHHHHGGGGGHGHHHHHHHHE3BFFHHHGG0EDF@GHFFGGGHGGGGGGGGGGGGGHHGGEEFHGFHHDDG@DGGGHHGDGGGGGHGG?CF?HHGHHHGHGHHHFFHGGGHHHHGGCD.;0<C;CGGGGEFF/.;0;FFFBF/0;0CFGFFB..9B/;0CBFFBBFFFFBAC?DED9;B9AD;.FFFB/B/;FBA/B//\n+@M01368:8:000000000-A3GHV:1:1114:10130:11959/1\n+CGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTCTGATCTGTCTCTTATACACATCTCCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTGCAACAAACACACATCCAGA\n++\n+>A33>FFFFFF@FFFGGGGFGGD4EGGGGGHHGFFGHFGGHHHFEFHECFHHHEHGAGBCCGCCEGGHGHEGFBFHFHHHHGGGHFHGHEGGGFEGEGG??@DBGHGGC-@FFGHFHFHB-??DA-DD@9:BEBG@B?E?A.ABBFBFA??F.FF/BFFB/B9B/9;BF9FF9FFFFFFFFFFFFFF?BB?;9EE;-;DDB9//;B-B@;ABFFEFFFF/-.9A.;//9/BF./B/..9.9:...//////\n+@M01368:8:000000000-A3GHV:1:1114:14540:5315/1\n+CTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGGGGGCTATTTAGGTTTTT\n++\n+AABCCFFFFFFFGGGGGGGGGGHHHHHHHFHHHHGHHGHHGGGHGGHHHHHHHGHHHHHHGGGGGHHFHHHFGHHGGFHHHHHGGGGGHHHGHGGHHHGGGGGGHGHGGGGHHGGGGHHHHHEGDDFGFFFHHGGGGGCDAFCFGFDHHHHGGHGHHHHHHBCGEHHHHGGHG.ACGEHGG0CBFFF:A;BB0;09CGF00CFFFE0AA?//CFFFFFFFFFFFFFFFBEF;A.-=A--:BBFB90;;FE.\n+@M01368:8:000000000-A3GHV:1:1114:15066:16302/1\n+TAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAACGGTTGTTAATTAATTATTGCTTGTAGGACA\n++\n+BBBBAFFBDFBFBGGGGGFGGGBGFFFHGFHHGFFFHGHHHGHHHHFFHHHGHGC?AEFFHEFBFFFGHHHHH1FDGFFHGHGHFEGCGC-<<AHHHGGGGGGGFHH0GHFCCCADGGG?.9/A-???DGGFFF.9F9/EE-;;BBBFFBFFFFFFFFFEFFFFBFFBBFFFFF/BFFBFFFFF-DBFFF;/BFF//BB//9/BEA---9:BFFFFFF/F/.;.:@9.BBFF/;BFF/;/////9/////.\n+@M01368:8:000000000-A3GHV:1:1114:16639:15258/1\n+CCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGCGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTCCAATATTACAGGCGAACATACTTACTAAATTGTGT\n++\n+11>>ABFFFFFFGGCGC1BBF1GHHHBHHHHGFHGH1A100AA/GGEHGFBAA1FFD2FHHHHECG2F1BB/E0FC/A>EE/FGGFGEF1EGBBEHHCGGGHBGEHFHE0B?E--;C?CCGGG@A@GBFBBBB09?@??A-AB--ABE@=-=-9AE9A;FFFFFE=9-//;//;9FF/////;;///9B///;/B////9FFBB;--A@-;/9;9-:-/;;FFFE9BF?@;-9-99/B9F/://///99/;\n+@M01368:8:000000000-A3GHV:1:1114:2404:13066/1\n+TCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCTGTCTATTATACACATC\n++\n+CCCCCFFFFCFFGGGGGGGGGGHHHHHGHHHHHHHHHFFHHHHHGGGGHHHHHHHHFHHHHHHFGGHHGGHGGHHHHHHGHHFHHHHGGGGGGHHHHHHGHHHHHHHHHHGGGGGGGHH?FGHHHGGGGGGHHGGFGGHHGGGGHHHHHFGGGGFGHGHHGGGGGGGHGGGEGGHHGHHHHHHHHHGFBFFDA0FGGGFFGG0:EFGGGGGGGG;AEBF0B0BFFBFFFFFFFFFFFFFFFFFFFFFEFF0\n+@M01368:8:000000000-A3GHV:1:1114:9184:6959/1\n+GGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGACCCTGAAGTAGGAACCAGATGTCGGATACAGTTCACTTTCTGTCTCTTA\n++\n+AABBBFFFCCCBFGGGGGGGGGHHHHHHHHGGGGGGHHHG3FFHHHFGFGGGHHHGGGEHHGGGGHHHHHHGGGGGGHGHGGGGGGGDEGGGGEGGFHHHHHHHHHHHHGGGFGEHHGGFDGGGDFFGFHHHHGFCFHHHHHEFHFHGGFFGHHGGGHHHHDGHHHFHHHFFFFGFGGG.EFGGGGFGEBFGGGFGFGGGGFFBFGGBBFFFFFB/FEFF?///;A::AABBFFFBFFFFFFFFFBFFFF/\n'
b
diff -r 4aab5ae907b6 -r 6f6537780379 test-data/metadata_test_nonviral_1_run.xlsx
b
Binary file test-data/metadata_test_nonviral_1_run.xlsx has changed
b
diff -r 4aab5ae907b6 -r 6f6537780379 test-data/metadata_test_viral.xlsx
b
Binary file test-data/metadata_test_viral.xlsx has changed
b
diff -r 4aab5ae907b6 -r 6f6537780379 test-data/metadata_test_viral_optional_columns.xlsx
b
Binary file test-data/metadata_test_viral_optional_columns.xlsx has changed
b
diff -r 4aab5ae907b6 -r 6f6537780379 test-data/sample_no_extension
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_no_extension Thu Feb 24 18:20:40 2022 +0000
b
b'@@ -0,0 +1,400 @@\n+@M01368:8:000000000-A3GHV:1:1101:6911:8255/1\n+ATCTGGTTCCTACTTCAGGGCCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATT\n++\n+BCCCCFFFFFFFGGGGGGGGGGGHHHHGHGHHHHHHHHHGGGGGGHHHHGHHHHHHHHHHGHHHHHHGGHGGHHHGHHHHFHHGHHHHHHHHHGHEHEFFGHHEGGCEFGGFHHHBGHHGHHHHGHFHHHGHGHGHGGCDFDDACGGGGGGGAAFFFFFFFFFBAFFFFFB;FFFFFFADDFFFFFFFFFFEFFFFFFFFFFBFFFFFFFFFFFFFFEFFFFFFFFBFEFFFFEFE;DFFFDFBFF/9BFB\n+@M01368:8:000000000-A3GHV:1:1101:14518:9998/1\n+GTTATTATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGGGATAGACCTGTGATCCATCGTGAT\n++\n+AAAAAFFFFFFFGGGGGGGGGGHGGHHHHGHHHHHHHGCGHHHHHHHHHHHHHHHGGGGGHHHHHHHHHGHHGFHFE5BGEEHFGGGHHHHHHHHFBHHGGGGFHGHHFGHHHHGHHHHHHGEGGGGFHFHGEGHHGGCDGDGHGGGDGGHGGCGGGHGHHH/ACDG?.1FGCDCCGCA.CC@CDCHFHGFFGGGEBFGAB//CEFBFGG.:;D;;A0AFFFFFB..:@ABFF//;BFFFFFBF/9D:A//\n+@M01368:8:000000000-A3GHV:1:1101:18422:19051/1\n+GTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACG\n++\n+CCCCCFDDDDDFGGGGGGGGGGHHHHHHHHHHHHHHHHGHHHHHHFHHHHGGGGHHHHHHHHHGHHHHHHHHHHHHGGHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHGCGGGHHHHHHHHHHHHHHHHHHHHHHGFDHGFHCFGGGGFGGFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF;FFFFFFFFFFFFFFFFFFFFFFFFFFFFEFBFFFFFFFFFF:FFF.\n+@M01368:8:000000000-A3GHV:1:1101:25545:21098/1\n+ATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATAAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGGCTTATTTAAGGGGAACGGGTGGGG\n++\n+33AA?DFD5BDFGGGFEBDGEGHEGHGEGHCEGGHHCHGHHFFHHGFGAGE53FF2FAFFGDE5FFFE5GFBFGAEE1GHHHGHHHEHE3FGHF@GEGEGGHHGG3FAGFFDE?EEE3GFEGFGFGGCG?GHHHFHGGGC@DHFFHD/A<C@EGFDCGGGHFHHHEGFGHBFHG0:CEHFCHGGED.;0CEF.F99B0CFFEEFGGG0FBFBBF0F/FFBDE?/9//9B.FFBFFFFFFBF..A..;@B--\n+@M01368:8:000000000-A3GHV:1:1101:5446:12248/1\n+AATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTAT\n++\n+CCCCDFFFFCCFGGGGGGGGFGHHHHHGGGGHHHHHHHHHHHHHHHHGBGHGGHGGHHHHHHHHHHGHGHGGGGGHHHHHHHHGHHHHHHHHHGGGGGHHHHFFGHHHGGGGGGHHHGFGGHHGGGGHHHHHHGGGGGGHGHHGGGGGGGHGGGGGGHHHHHHHHHHHHHFHGGGHHHHGGGGGG:FE;EGEGGGGG/;?FGGGGGGGFFFFGGFFFFFFFFFBFFFFFFFFFFBFFFFFFEFFFFFEFFF\n+@M01368:8:000000000-A3GHV:1:1101:5861:6452/1\n+ATTATGTCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTT\n++\n+ABCCCFFFFFFFGGGGGGGGGGHHHHHHHHHHHGHHHHGHHHHHHHHHHHGGGGHHHHHHHHFHHHHHHGGHGHGGHGGHHHHHHHGGHFHHHGGGGGHHHHHHHHHHHHHHHHHHGGGGGHHHHHEGGHHGGGGGGHHHGGGGHGGGGGHHHHHHGGGDCGHHHHGGGGGGGHEFGGGGHGHHHGHGGGFGGGGGGGEGGGGGGG?E0CEFGGGGGFEE9EEFFFFFBFFFFFFFBFFBD.AFFFFFFF0\n+@M01368:8:000000000-A3GHV:1:1102:10403:6021/1\n+CGCTCCGGGCCCATAACACTTGGGGGTAGCTAAAGTGAACTGTATCCGACATCTGGTTCCTACTTCAGGGTCATAAAACCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTG\n++\n+>A@AAAAD2ADDFFGGGGGFGGHA?EAEFBEAGHFABFGG5FDF5DB1EEGAFDFB53FF5FH@G5FFEHGHEFHFFHBE333GF43GCGGGGE@0?BFGGB0B?FHGFDGGHHHBFFDEGGHGFFFDFE@<1>@FFFGHHHHFHEFGDABFFGG/@DCE<CG1<GF0/DD000=<DHGBDFDCECE/:AC?-;-;9B/ABBB-AD9BFB99AB?BDFBAD-.9..@;=;;..9..9/9;BEF;A:9/BFF\n+@M01368:8:000000000-A3GHV:1:1102:10677:23253/1\n+CCTTAAATAAGACATCACGATGGATCACAGGTCTATCACC'..b'FHGDF@@?CGFHCEGGGFD.CCC?EGHBHHHFHHFBCFFGEB/CEGGGGDAA.90C9CEBFGGBBF/9.9FBFFFBBFF//99FFFFEABF//99FFEFFFBFF\n+@M01368:8:000000000-A3GHV:1:1113:5741:16959/1\n+TAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGA\n++\n+ABBBBFFFFFFFGGGFGGGGGGHHHGHHGGHBGHGAGFFCAFGHGFFGHHGFHHHHHGGGGGHGHHHHHHHHE3BFFHHHGG0EDF@GHFFGGGHGGGGGGGGGGGGGHHGGEEFHGFHHDDG@DGGGHHGDGGGGGHGG?CF?HHGHHHGHGHHHFFHGGGHHHHGGCD.;0<C;CGGGGEFF/.;0;FFFBF/0;0CFGFFB..9B/;0CBFFBBFFFFBAC?DED9;B9AD;.FFFB/B/;FBA/B//\n+@M01368:8:000000000-A3GHV:1:1114:10130:11959/1\n+CGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTCTGATCTGTCTCTTATACACATCTCCGAGCCCACGAGACTAAGGCGAATCTCGTATGCCGTCTTCTGCTTGCAACAAACACACATCCAGA\n++\n+>A33>FFFFFF@FFFGGGGFGGD4EGGGGGHHGFFGHFGGHHHFEFHECFHHHEHGAGBCCGCCEGGHGHEGFBFHFHHHHGGGHFHGHEGGGFEGEGG??@DBGHGGC-@FFGHFHFHB-??DA-DD@9:BEBG@B?E?A.ABBFBFA??F.FF/BFFB/B9B/9;BF9FF9FFFFFFFFFFFFFF?BB?;9EE;-;DDB9//;B-B@;ABFFEFFFF/-.9A.;//9/BF./B/..9.9:...//////\n+@M01368:8:000000000-A3GHV:1:1114:14540:5315/1\n+CTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGGGGGCTATTTAGGTTTTT\n++\n+AABCCFFFFFFFGGGGGGGGGGHHHHHHHFHHHHGHHGHHGGGHGGHHHHHHHGHHHHHHGGGGGHHFHHHFGHHGGFHHHHHGGGGGHHHGHGGHHHGGGGGGHGHGGGGHHGGGGHHHHHEGDDFGFFFHHGGGGGCDAFCFGFDHHHHGGHGHHHHHHBCGEHHHHGGHG.ACGEHGG0CBFFF:A;BB0;09CGF00CFFFE0AA?//CFFFFFFFFFFFFFFFBEF;A.-=A--:BBFB90;;FE.\n+@M01368:8:000000000-A3GHV:1:1114:15066:16302/1\n+TAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGTGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTTCAATATTACAGGCGAACATACTTACTAACGGTTGTTAATTAATTATTGCTTGTAGGACA\n++\n+BBBBAFFBDFBFBGGGGGFGGGBGFFFHGFHHGFFFHGHHHGHHHHFFHHHGHGC?AEFFHEFBFFFGHHHHH1FDGFFHGHGHFEGCGC-<<AHHHGGGGGGGFHH0GHFCCCADGGG?.9/A-???DGGFFF.9F9/EE-;;BBBFFBFFFFFFFFFEFFFFBFFBBFFFFF/BFFBFFFFF-DBFFF;/BFF//BB//9/BEA---9:BFFFFFF/F/.;.:@9.BBFF/;BFF/;/////9/////.\n+@M01368:8:000000000-A3GHV:1:1114:16639:15258/1\n+CCTAAATAGCCCACACGTTCCCCTTAAATAAGACATCACGATGGATCACAGGTCTATCACCCTATTAACCACTCACGGGAGCTCTCCATGCATTTGGTATTTTCGTCTGGGGGGTGCGCACGCGATAGCATTGCGAGACGCTGGAGCCGGAGCACCCTATGTCGCAGTATCTGTCTTTGATTCCTGCCTCATCCTATTATTTATCGCACCTACGTCCAATATTACAGGCGAACATACTTACTAAATTGTGT\n++\n+11>>ABFFFFFFGGCGC1BBF1GHHHBHHHHGFHGH1A100AA/GGEHGFBAA1FFD2FHHHHECG2F1BB/E0FC/A>EE/FGGFGEF1EGBBEHHCGGGHBGEHFHE0B?E--;C?CCGGG@A@GBFBBBB09?@??A-AB--ABE@=-=-9AE9A;FFFFFE=9-//;//;9FF/////;;///9B///;/B////9FFBB;--A@-;/9;9-:-/;;FFFE9BF?@;-9-99/B9F/://///99/;\n+@M01368:8:000000000-A3GHV:1:1114:2404:13066/1\n+TCCTACAAGCATTAATTAATTAACACACTTTAGTAAGTATGTTCGCCTGTAATATTGAACGTAGGTGCGATAAATAATAGGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCTGTCTATTATACACATC\n++\n+CCCCCFFFFCFFGGGGGGGGGGHHHHHGHHHHHHHHHFFHHHHHGGGGHHHHHHHHFHHHHHHFGGHHGGHGGHHHHHHGHHFHHHHGGGGGGHHHHHHGHHHHHHHHHHGGGGGGGHH?FGHHHGGGGGGHHGGFGGHHGGGGHHHHHFGGGGFGHGHHGGGGGGGHGGGEGGHHGHHHHHHHHHGFBFFDA0FGGGFFGG0:EFGGGGGGGG;AEBF0B0BFFBFFFFFFFFFFFFFFFFFFFFFEFF0\n+@M01368:8:000000000-A3GHV:1:1114:9184:6959/1\n+GGATGAGGCAGGAATCAAAGACAGATACTGCGACATAGGGTGCTCCGGCTCCAGCGTCTCGCAATGCTATCGCGTGCACACCCCCCAGACGAAAATACCAAATGCATGGAGAGCTCCCGTGAGTGGTTAATAGGGTGATAGACCTGTGATCCATCGTGATGTCTTATTTAAGGGGAACGTGTGGGCTATTTAGGTTTTATGACCCTGAAGTAGGAACCAGATGTCGGATACAGTTCACTTTCTGTCTCTTA\n++\n+AABBBFFFCCCBFGGGGGGGGGHHHHHHHHGGGGGGHHHG3FFHHHFGFGGGHHHGGGEHHGGGGHHHHHHGGGGGGHGHGGGGGGGDEGGGGEGGFHHHHHHHHHHHHGGGFGEHHGGFDGGGDFFGFHHHHGFCFHHHHHEFHFHGGFFGHHGGGHHHHDGHHHFHHHFFFFGFGGG.EFGGGGFGEBFGGGFGFGGGGFFBFGGBBFFFFFB/FEFF?///;A::AABBFFFBFFFFFFFFFBFFFF/\n'