Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fastq_dl commit 8da9481e027494c5fd881564d01d9e2ab55fe305 |
added:
fastq_dl.xml macros.xml test-data/Metadata_files/DRR011117.tsv test-data/Metadata_files/ERR2651925.tsv test-data/Metadata_files/ERR4319712.tsv test-data/Metadata_files/SRR9678965.tsv test-data/accessions.txt |
b |
diff -r 000000000000 -r 5e7401777990 fastq_dl.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dl.xml Sat Nov 16 18:43:55 2024 +0000 |
[ |
b'@@ -0,0 +1,326 @@\n+<tool id="fastq_dl" name="fastq-dl" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">\n+ <description>Download FASTQ files from ENA</description>\n+ <macros>\n+ <import>macros.xml</import>\n+ </macros>\n+ <expand macro="requirements"/>\n+ <expand macro="creators"/>\n+ <command detect_errors="aggressive"><![CDATA[\n+ mkdir -p single-end paired-end logs &&\n+ #if str($input_type.select_input_type) == "accession_ids"\n+ IFS=\' \' &&\n+ read -ra accessionsarr <<< "$accessions" &&\n+ @FASTQ_DL_FOR_LOOP@\n+ #elif str($input_type.select_input_type) == "accessions_list"\n+ mapfile -t accessionsarr < "$accessions_file" &&\n+ @FASTQ_DL_FOR_LOOP@\n+ #end if\n+ #if str($only_download_metadata) == ""\n+ &&\n+ find . -maxdepth 1 -name "*_1.fastq.gz" -exec bash -c \'mv "\\$0" "paired-end/\\$(basename "\\$0" | sed "s/_1/_forward/")"\' {} \\; &&\n+ find . -maxdepth 1 -name "*_2.fastq.gz" -exec bash -c \'mv "\\$0" "paired-end/\\$(basename "\\$0" | sed "s/_2/_reverse/")"\' {} \\; &&\n+ find . -maxdepth 1 -name "*_R1.fastq.gz" -exec bash -c \'mv "\\$0" "paired-end/\\$(basename "\\$0" | sed "s/_R1/_forward/")"\' {} \\; &&\n+ find . -maxdepth 1 -name "*_R2.fastq.gz" -exec bash -c \'mv "\\$0" "paired-end/\\$(basename "\\$0" | sed "s/_R2/_reverse/")"\' {} \\; &&\n+ mv *.gz single-end > /dev/null 2>&1 || true\n+ #end if\n+ ]]></command>\n+ <inputs>\n+ <conditional name="input_type">\n+ <param name="select_input_type" type="select" label="Select an input type">\n+ <option value="accession_ids">ENA accession IDs</option>\n+ <option value="accessions_list">A list of ENA accession IDs, one per row</option>\n+ </param>\n+ <when value="accession_ids">\n+ <param name="accessions" type="text" label="Accession IDs" help="ENA accessions (Study, Sample, Experiment, Run accession) separated by whitespaces" optional="false" />\n+ </when>\n+ <when value="accessions_list">\n+ <param name="accessions_file" type="data" format="txt" label="Accession IDs File" help="ENA accessions (Study, Sample, Experiment, Run accession) stored in a file. One accession per line" optional="false" />\n+ </when>\n+ </conditional>\n+ <param name="group_by_experiment" type="boolean" label="Group by Experiment" help="Group Runs by experiment accession" truevalue="--group-by-experiment" falsevalue="" />\n+ <param name="group_by_sample" type="boolean" label="Group by Sample" help="Group Runs by sample accession" truevalue="--group-by-sample" falsevalue="" />\n+ <param name="only_download_metadata" type="boolean" label="Only Download Metadata" help="Skip FASTQ download and retrieve metadata only" truevalue="--only-download-metadata" falsevalue="" />\n+ </inputs>\n+ <outputs>\n+ <collection name="metadata" type="list" label="Metadata files">\n+ <discover_datasets pattern="(?P<designation>.+)\\-fastq-run-info.tsv" directory="logs" ext="tsv" />\n+ </collection>\n+ <collection name="single_end_collection" type="list" label="Single-end data">\n+ <filter>only_download_metadata == False</filter>\n+ <discover_datasets pattern="(?P<designation>.+)\\.fastq\\.gz" directory="single-end" ext="fastq.gz" />\n+ </collection>\n+ <collection name="paired_end_collection" type="list:paired" label="Paired-end data">\n+ <filter>only_download_metadata == False</filter>\n+ <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\\.fastq.gz" directory="paired-end" ext="fastq.gz" />\n+ </collection>\n+ </outputs>\n+ <tests>\n+ <!-- #1 Testing single end and paired end using accessions file -->\n+ <test expect_num_outputs="3">\n+ <param name="select_input_type" value="accessions_list" />\n+ <param name="acce'..b'25" file="Metadata_files/ERR2651925.tsv" />\n+ <element name="ERR4319712" file="Metadata_files/ERR4319712.tsv" />\n+ <element name="SRR9678965" file="Metadata_files/SRR9678965.tsv" />\n+ </output_collection>\n+ <output_collection name="single_end_collection" type="list" count="2">\n+ <element name="SAMD00008419">\n+ <assert_contents>\n+ <has_size size="23102" />\n+ </assert_contents>\n+ </element>\n+ <element name="SAMN12272107">\n+ <assert_contents>\n+ <has_size size="2465043" />\n+ </assert_contents>\n+ </element>\n+ </output_collection>\n+ <output_collection name="paired_end_collection" type="list:paired" count="2">\n+ <element name="SAMEA4724129">\n+ <element name="forward">\n+ <assert_contents>\n+ <has_size size="4977454" />\n+ </assert_contents>\n+ </element>\n+ <element name="reverse">\n+ <assert_contents>\n+ <has_size size="6079979" />\n+ </assert_contents>\n+ </element>\n+ </element>\n+ <element name="SAMEA7040559">\n+ <element name="forward">\n+ <assert_contents>\n+ <has_size size="2104680" />\n+ </assert_contents>\n+ </element>\n+ <element name="reverse">\n+ <assert_contents>\n+ <has_size size="2578613" />\n+ </assert_contents>\n+ </element>\n+ </element>\n+ </output_collection>\n+ </test>\n+ </tests>\n+ <help><![CDATA[\n+\n+This tool downloads FASTQ files from the European Nucleotide Archive (ENA) based on a list of ENA accession IDs.\n+You can provide either accession IDs in text format or upload a file containing accession IDs (one per line).\n+The tool also allows you to group downloaded data by experiment or sample and can optionally retrieve only metadata\n+without downloading the FASTQ files.\n+\n+Input Types\n+-----------\n+\n+You can select from two types of inputs:\n+\n+1. **ENA Accession IDs (Text Input)**:\n+ - Provide a list of ENA accession IDs (e.g., Study, Sample, Experiment, or Run accessions) separated by whitespace.\n+ \n+2. **Accession IDs File**:\n+ - Provide a file containing a list of ENA accession IDs, one per line.\n+\n+Parameters\n+----------\n+\n+- **Group by Experiment**: \n+ This option groups the downloaded runs by the experiment accession, which can be useful if you need to process \n+ data related to a specific experiment.\n+\n+- **Group by Sample**: \n+ This option groups the downloaded runs by the sample accession.\n+\n+- **Only Download Metadata**: \n+ Select this option if you only want to retrieve metadata without downloading the actual FASTQ files. This is \n+ useful if you need information about the runs but do not need the raw sequence data.\n+\n+Outputs\n+-------\n+\n+The tool generates three types of outputs:\n+\n+1. **Metadata Files**: \n+ This collection contains metadata files for each accession, in `.tsv` format, which provide details about the \n+ corresponding run.\n+\n+2. **Single-End Data**: \n+ If the input FASTQ files contain single-end reads, those files will be placed into a separate collection.\n+ In `.fastq.gz` format.\n+\n+3. **Paired-End Data**: \n+ If the input FASTQ files contain paired-end reads, those files will be grouped into pairs (forward and reverse).\n+ The paired files will also be placed in a separate collection and will be in `.fastq.gz` format.\n+\n+ ]]></help>\n+ <expand macro="citations"/>\n+</tool>\n\\ No newline at end of file\n' |
b |
diff -r 000000000000 -r 5e7401777990 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sat Nov 16 18:43:55 2024 +0000 |
[ |
@@ -0,0 +1,42 @@ +<macros> + <token name="@TOOL_VERSION@">3.0.0</token> + <token name="@VERSION_SUFFIX@">0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">fastq-dl</requirement> + </requirements> + </xml> + <token name="@FASTQ_DL_FOR_LOOP@"><![CDATA[ + for accessionid in "\${accessionsarr[@]}"; do + fastq-dl --accession "\$accessionid" + --provider ena + --only-provider + $only_download_metadata + $group_by_experiment + $group_by_sample + && + mv fastq-run-info.tsv logs/"\$accessionid"-fastq-run-info.tsv > /dev/null 2>&1 || true; + done + ]]></token> + <xml name="citations"> + <citations> + <citation type="bibtex"> + <![CDATA[ + @software{petit2024fastq-dl, + author = {Robert A. Petit III and Micheal B. Hall and Gerry Tonkin-Hill and Jie Zhu and Timothy D. Read}, + title = {{fastq-dl}: efficiently download FASTQ files from SRA or ENA repositories}, + version = {2.0.2}, + year = {2024}, + url = {https://github.com/rpetit3/fastq-dl}, + note = {Accessed: 2024-10-31} + } + ]]> + </citation> + </citations> + </xml> + <xml name="creators"> + <creator> + <person givenName="Rand" familyName="Zoabi" url="https://github.com/RZ9082" identifier="https://orcid.org/0009-0000-2501-8053" /> + </creator> + </xml> +</macros> |
b |
diff -r 000000000000 -r 5e7401777990 test-data/Metadata_files/DRR011117.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Metadata_files/DRR011117.tsv Sat Nov 16 18:43:55 2024 +0000 |
b |
@@ -0,0 +1,2 @@ +run_accession experiment_title sample_accession project_name submission_accession library_min_fragment_size bam_md5 assembly_software library_prep_longitude library_selection pcr_isolation_protocol chip_protocol sequencing_primer_provider serotype environment_feature last_updated submitted_galaxy extraction_protocol germline secondary_project culture_collection submission_tool sra_bytes read_strand rna_purity_280_ratio hi_c_protocol collected_by submitted_ftp restriction_enzyme_target_sequence isolate fastq_bytes instrument_platform variety sequencing_date_format temperature sra_aspera ecotype submitted_aspera sampling_campaign bam_ftp tissue_lib environmental_sample control_experiment sex submitted_md5 checklist fastq_galaxy library_gen_protocol specimen_voucher library_prep_latitude submitted_bytes taxonomic_identity_marker run_date country ncbi_reporting_standard sample_description sra_galaxy sample_prep_interval fastq_md5 secondary_study_accession experimental_protocol read_count study_title bio_material rna_prep_5_protocol host_body_site local_environmental_context assembly_quality collection_date_end sample_capture_status sample_title host_genotype host_phenotype environmental_medium cultivar instrument_model faang_library_selection target_gene bam_bytes library_max_fragment_size experiment_target sequencing_date description nominal_sdev chip_ab_provider environment_material host_tax_id sample_material sample_storage_processing sra_md5 cell_type fastq_ftp disease sample_prep_interval_units broker_name sub_strain base_count library_strategy restriction_site serovar investigation_type location library_source sra_ftp age library_layout experimental_factor sequencing_primer_catalog environment_biome rna_purity_230_ratio dnase_protocol dev_stage library_prep_date_format bam_aspera binning_software datahub rna_integrity_num library_prep_date location_start marine_region aligned file_location sample_collection chip_target nominal_length broad_scale_environmental_context sequencing_location status completeness_score lon fastq_aspera tax_lineage host_sex library_pcr_isolation_protocol sample_alias mating_type collection_date_start sub_species contamination_score run_alias restriction_enzyme depth submitted_read_type library_construction_protocol host_growth_conditions collection_date experiment_alias host_gravidity center_name identified_by cell_line sampling_site host library_name tag first_created lat strain experiment_accession scientific_name host_status tax_id study_accession submitted_format submitted_host_sex bisulfite_protocol altitude rt_prep_protocol host_scientific_name bam_galaxy accession secondary_sample_accession sample_storage cage_protocol sampling_platform taxonomic_classification location_end protocol_label elevation salinity sequencing_method sequencing_primer_lot first_public transposase_protocol study_alias +DRR011117 454 GS Junior sequencing: HXVJWSB01__Yaku_0782__ITS3 SAMD00008419 rhizosphere metagenome DRA001010 PCR 2015-06-19 163157 23102 LS454 fasp.sra.ebi.ac.uk:/vol1/drr/DRR011/DRR011117 ftp.sra.ebi.ac.uk/vol1/fastq/DRR011/DRR011117/DRR011117.fastq.gz 1352764800000 Generic HXVJWSB01__Yaku_0782__ITS3 ftp.sra.ebi.ac.uk/vol1/drr/DRR011/DRR011117 b737064403c17493036beee0987a0556 DRP001052 104 Complex community structure of ectomycorrhizal, arbuscular-mycorrhizal and root-endophytic fungi in a mixed subtropical forest of ectomycorrhizal and arbuscular-mycorrhizal plants HXVJWSB01__Yaku_0782__ITS3 454 GS Junior 454 GS Junior sequencing: HXVJWSB01__Yaku_0782__ITS3 105143a627249be0bcc20c7755c1adcf ftp.sra.ebi.ac.uk/vol1/fastq/DRR011/DRR011117/DRR011117.fastq.gz 51498 AMPLICON METAGENOMIC ftp.sra.ebi.ac.uk/vol1/drr/DRR011/DRR011117 SINGLE dcc_metagenome public fasp.sra.ebi.ac.uk:/vol1/fastq/DRR011/DRR011117/DRR011117.fastq.gz 1;2787823;12908;408169;410657;939928 SAMD00008419 DRR011117 We sequenced fungal ITS sequences based on a tag-encoded massively-parallel pyrosequencing. For each root sample, the entire ITS region and partial ribosomal large subunit region was amplified using the fungus-specific high-coverage primer ITS1F_KYO2 and the universal primer LR3 (http://www.biology.duke.edu /fungi/mycolab/primers.htm). PCR was conducted using a temperature profile of 95??C for 10 min, followed by 20 cycles at 94??C for 20 s, 50??C for 30 s, 72??C for 120 s and a final extension at 72??C for 7 min with a buffer system of Ampdirect Plus and BIOTAQ HS DNA Polymerase (Shimadzu, Kyoto, Japan). The PCR product of each root sample was subjected to a second PCR step targeting the ITS2 region. The second PCR was conducted with a universal primer ITS3_KYO2 fused with the 454 Adaptor A and each sample-specific molecular ID, and the reverse universal primer LR_KYO1b (5'-MGC WGC ATT CCC AAA CWA-3') fused with the 454 Adaptor B. A buffer system of Taq DNA Polymerase with Standard Taq Buffer (New England BioLabs, Ipswich, MA, USA) was used under a temperature profile of 95??C for 1 min, followed by 40 cycles at 94??C for 20 s, 50??C for 30 s, 72??C for 60 s and a final extension at 72??C for 7 min. The ITS amplicons of the second PCR step were subjected to pyrosequencing. The first 576 and the second 624 samples were sequenced separately using a GS Junior sequencer (Roche). The rbcL amplicons of the first 480 root samples were pooled and purified using ExoSAP-IT (GE Healthcare) and QIAquick PCR Purification Kit (QIAGEN). The sequencing of the first 576 samples was conducted as instructed by the manufacturer. Likewise, the amplicons of the remaining 624 samples were pooled and purified, and then sequenced in the second run. DRX010073 KYOTO_HE HXVJWSB01__Yaku_0782__ITS3 2014-04-28 DRX010073 rhizosphere metagenome 939928 PRJDB2078 DRR011117 DRS009918 2014-04-28 PRJDB2078 |
b |
diff -r 000000000000 -r 5e7401777990 test-data/Metadata_files/ERR2651925.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Metadata_files/ERR2651925.tsv Sat Nov 16 18:43:55 2024 +0000 |
b |
@@ -0,0 +1,2 @@ +run_accession experiment_title sample_accession project_name submission_accession library_min_fragment_size bam_md5 assembly_software library_prep_longitude library_selection pcr_isolation_protocol chip_protocol sequencing_primer_provider serotype environment_feature last_updated submitted_galaxy extraction_protocol germline secondary_project culture_collection submission_tool sra_bytes read_strand rna_purity_280_ratio hi_c_protocol collected_by submitted_ftp restriction_enzyme_target_sequence isolate fastq_bytes instrument_platform variety sequencing_date_format temperature sra_aspera ecotype submitted_aspera sampling_campaign bam_ftp tissue_lib environmental_sample control_experiment sex submitted_md5 checklist fastq_galaxy library_gen_protocol specimen_voucher library_prep_latitude submitted_bytes taxonomic_identity_marker run_date country ncbi_reporting_standard sample_description sra_galaxy sample_prep_interval fastq_md5 secondary_study_accession experimental_protocol read_count study_title bio_material rna_prep_5_protocol host_body_site local_environmental_context assembly_quality collection_date_end sample_capture_status sample_title host_genotype host_phenotype environmental_medium cultivar instrument_model faang_library_selection target_gene bam_bytes library_max_fragment_size experiment_target sequencing_date description nominal_sdev chip_ab_provider environment_material host_tax_id sample_material sample_storage_processing sra_md5 cell_type fastq_ftp disease sample_prep_interval_units broker_name sub_strain base_count library_strategy restriction_site serovar investigation_type location library_source sra_ftp age library_layout experimental_factor sequencing_primer_catalog environment_biome rna_purity_230_ratio dnase_protocol dev_stage library_prep_date_format bam_aspera binning_software datahub rna_integrity_num library_prep_date location_start marine_region aligned file_location sample_collection chip_target nominal_length broad_scale_environmental_context sequencing_location status completeness_score lon fastq_aspera tax_lineage host_sex library_pcr_isolation_protocol sample_alias mating_type collection_date_start sub_species contamination_score run_alias restriction_enzyme depth submitted_read_type library_construction_protocol host_growth_conditions collection_date experiment_alias host_gravidity center_name identified_by cell_line sampling_site host library_name tag first_created lat strain experiment_accession scientific_name host_status tax_id study_accession submitted_format submitted_host_sex bisulfite_protocol altitude rt_prep_protocol host_scientific_name bam_galaxy accession secondary_sample_accession sample_storage cage_protocol sampling_platform taxonomic_classification location_end protocol_label elevation salinity sequencing_method sequencing_primer_lot first_public transposase_protocol study_alias library_prep_location rna_prep_3_protocol ph sequencing_longitude tissue_type isolation_source +ERR2651925 Illumina MiSeq paired end sequencing SAMEA4724129 ERA1521386 PCR 2018-11-16 ftp.sra.ebi.ac.uk/vol1/run/ERR265/ERR2651925/HDG2C-2016-10-r2-ITS_ACAGAC_L001_R1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/run/ERR265/ERR2651925/HDG2C-2016-10-r2-ITS_ACAGAC_L001_R2.fastq.gz 14052012 Lydie Kerdraon ftp.sra.ebi.ac.uk/vol1/run/ERR265/ERR2651925/HDG2C-2016-10-r2-ITS_ACAGAC_L001_R1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/run/ERR265/ERR2651925/HDG2C-2016-10-r2-ITS_ACAGAC_L001_R2.fastq.gz 4977454;6079979 ILLUMINA fasp.sra.ebi.ac.uk:/vol1/err/ERR265/005/ERR2651925 fasp.sra.ebi.ac.uk:/vol1/run/ERR265/ERR2651925/HDG2C-2016-10-r2-ITS_ACAGAC_L001_R1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/run/ERR265/ERR2651925/HDG2C-2016-10-r2-ITS_ACAGAC_L001_R2.fastq.gz 37493c0f131917a24acf4c535dc5b6b0;47d07f4bc698f774cc66d5d3111b8e60 ERC000011 ftp.sra.ebi.ac.uk/vol1/fastq/ERR265/005/ERR2651925/ERR2651925_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR265/005/ERR2651925/ERR2651925_2.fastq.gz 4821716;5918907 France ITS1 region associated to Crop residues of Wheat (Rotation) ftp.sra.ebi.ac.uk/vol1/err/ERR265/005/ERR2651925 6a1679e27fac1783e3c752cf89a6eb3f;d9795cac381bfcce1acb09a94729a280 ERP109315 49902 Microbial assemblages associated to crop residues 2016-10-31 Crop residues of Wheat (Rotation) Soissons Illumina MiSeq Illumina MiSeq paired end sequencing 1fde860669a32db7f8a9e46b6491fa77 ftp.sra.ebi.ac.uk/vol1/fastq/ERR265/005/ERR2651925/ERR2651925_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR265/005/ERR2651925/ERR2651925_2.fastq.gz 24951000 AMPLICON METAGENOMIC ftp.sra.ebi.ac.uk/vol1/err/ERR265/005/ERR2651925 PAIRED dcc_metagenome 425 public fasp.sra.ebi.ac.uk:/vol1/fastq/ERR265/005/ERR2651925/ERR2651925_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/ERR265/005/ERR2651925/ERR2651925_2.fastq.gz 1;131567;2759;33090;35493;131221;3193;58023;78536;58024;3398;1437183;4447;1437197;4734;38820;4479;359160;147368;1648038;147389;1648030;4564;4565 HDG2C.2016.10.r2.ITS_ACAGAC 2016-09-30 ena-RUN-IRHS-19-06-2018-10:38:11:697-377 PAIRED;PAIRED PCR based protocol 2016-10 ena-EXPERIMENT-IRHS-19-06-2018-10:38:11:697-377 IRHS PCR_ITS1 2018-06-19 ERX2668415 Triticum aestivum 4565 PRJEB27255 FASTQ;FASTQ ERR2651925 ERS2544267 2018-11-30 ena-STUDY-IRHS-12-06-2018-07:39:33:052-533 Crop residues |
b |
diff -r 000000000000 -r 5e7401777990 test-data/Metadata_files/ERR4319712.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Metadata_files/ERR4319712.tsv Sat Nov 16 18:43:55 2024 +0000 |
b |
@@ -0,0 +1,2 @@ +run_accession experiment_title sample_accession project_name submission_accession library_min_fragment_size bam_md5 assembly_software library_prep_longitude library_selection pcr_isolation_protocol chip_protocol sequencing_primer_provider serotype environment_feature last_updated submitted_galaxy extraction_protocol germline secondary_project culture_collection submission_tool sra_bytes read_strand rna_purity_280_ratio hi_c_protocol collected_by submitted_ftp restriction_enzyme_target_sequence isolate fastq_bytes instrument_platform variety sequencing_date_format temperature sra_aspera ecotype submitted_aspera sampling_campaign bam_ftp tissue_lib environmental_sample control_experiment sex submitted_md5 checklist fastq_galaxy library_gen_protocol specimen_voucher library_prep_latitude submitted_bytes taxonomic_identity_marker run_date country ncbi_reporting_standard sample_description sra_galaxy sample_prep_interval fastq_md5 secondary_study_accession experimental_protocol read_count study_title bio_material rna_prep_5_protocol host_body_site local_environmental_context assembly_quality collection_date_end sample_capture_status sample_title host_genotype host_phenotype environmental_medium cultivar instrument_model faang_library_selection target_gene bam_bytes library_max_fragment_size experiment_target sequencing_date description nominal_sdev chip_ab_provider environment_material host_tax_id sample_material sample_storage_processing sra_md5 cell_type fastq_ftp disease sample_prep_interval_units broker_name sub_strain base_count library_strategy restriction_site serovar investigation_type location library_source sra_ftp age library_layout experimental_factor sequencing_primer_catalog environment_biome rna_purity_230_ratio dnase_protocol dev_stage library_prep_date_format bam_aspera binning_software datahub rna_integrity_num library_prep_date location_start marine_region aligned file_location sample_collection chip_target nominal_length broad_scale_environmental_context sequencing_location status completeness_score lon fastq_aspera tax_lineage host_sex library_pcr_isolation_protocol sample_alias mating_type collection_date_start sub_species contamination_score run_alias restriction_enzyme depth submitted_read_type library_construction_protocol host_growth_conditions collection_date experiment_alias host_gravidity center_name identified_by cell_line sampling_site host library_name tag first_created lat strain experiment_accession scientific_name host_status tax_id study_accession submitted_format submitted_host_sex bisulfite_protocol altitude rt_prep_protocol host_scientific_name bam_galaxy accession secondary_sample_accession sample_storage cage_protocol sampling_platform taxonomic_classification location_end protocol_label elevation salinity sequencing_method sequencing_primer_lot first_public transposase_protocol study_alias +ERR4319712 Illumina MiSeq paired end sequencing SAMEA7040559 ERA2762825 PCR 2020-07-06 ftp.sra.ebi.ac.uk/vol1/run/ERR431/ERR4319712/STROMAEQ-100_TTCTTG_L001_R1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/run/ERR431/ERR4319712/STROMAEQ-100_TTCTTG_L001_R2.fastq.gz 4994168 ftp.sra.ebi.ac.uk/vol1/run/ERR431/ERR4319712/STROMAEQ-100_TTCTTG_L001_R1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/run/ERR431/ERR4319712/STROMAEQ-100_TTCTTG_L001_R2.fastq.gz 2104680;2578613 ILLUMINA fasp.sra.ebi.ac.uk:/vol1/err/ERR431/002/ERR4319712 fasp.sra.ebi.ac.uk:/vol1/run/ERR431/ERR4319712/STROMAEQ-100_TTCTTG_L001_R1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/run/ERR431/ERR4319712/STROMAEQ-100_TTCTTG_L001_R2.fastq.gz 59daac95e6d090255b2a9937d57300e5;f9ce64d451f71471cc3f636e5fecaf36 ERC000011 ftp.sra.ebi.ac.uk/vol1/fastq/ERR431/002/ERR4319712/ERR4319712_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR431/002/ERR4319712/ERR4319712_2.fastq.gz 2055148;2522395 Equine Gut Microbiome ftp.sra.ebi.ac.uk/vol1/err/ERR431/002/ERR4319712 a4f8e84258f24104cf60e9a1a50511df;49876c41e079a0b4adfa49e22cd1897e ERP122744 16481 Strongyle Infection and Gut Microbiota: Profiling of Resistant and Susceptible Horses Over a Grazing Season Equine Gut Microbiome Illumina MiSeq Illumina MiSeq paired end sequencing 8951789af9d5e44cf180f0c72a81fd00 ftp.sra.ebi.ac.uk/vol1/fastq/ERR431/002/ERR4319712/ERR4319712_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR431/002/ERR4319712/ERR4319712_2.fastq.gz 8240500 AMPLICON METAGENOMIC ftp.sra.ebi.ac.uk/vol1/err/ERR431/002/ERR4319712 PAIRED dcc_metagenome 300 public fasp.sra.ebi.ac.uk:/vol1/fastq/ERR431/002/ERR4319712/ERR4319712_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/ERR431/002/ERR4319712/ERR4319712_2.fastq.gz 1;131567;2759;33154;33208;6072;33213;33511;7711;89593;7742;7776;117570;117571;8287;1338369;32523;32524;40674;32525;9347;1437010;314145;9787;9788;9789;9796 STROMAEQ-100 ena-RUN-VETERINARY Faculty-06-07-2020-09:59:55:879-100 PAIRED;PAIRED ena-EXPERIMENT-VETERINARY Faculty-06-07-2020-09:59:55:879-100 VETERINARY Faculty unspecified 2020-07-06 ERX4268079 Equus caballus 9796 PRJEB39250 FASTQ;FASTQ ERR4319712 ERS4804749 2020-07-26 ena-STUDY-VETERINARY Faculty-06-07-2020-10:00:04:543-1358 |
b |
diff -r 000000000000 -r 5e7401777990 test-data/Metadata_files/SRR9678965.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Metadata_files/SRR9678965.tsv Sat Nov 16 18:43:55 2024 +0000 |
b |
@@ -0,0 +1,2 @@ +run_accession experiment_title sample_accession project_name submission_accession library_min_fragment_size bam_md5 assembly_software library_prep_longitude library_selection pcr_isolation_protocol chip_protocol sequencing_primer_provider serotype environment_feature last_updated submitted_galaxy extraction_protocol germline secondary_project culture_collection submission_tool sra_bytes read_strand rna_purity_280_ratio hi_c_protocol collected_by submitted_ftp restriction_enzyme_target_sequence isolate fastq_bytes instrument_platform variety sequencing_date_format temperature sra_aspera ecotype submitted_aspera sampling_campaign bam_ftp tissue_lib environmental_sample control_experiment sex submitted_md5 checklist fastq_galaxy library_gen_protocol specimen_voucher library_prep_latitude submitted_bytes taxonomic_identity_marker run_date country ncbi_reporting_standard sample_description sra_galaxy sample_prep_interval fastq_md5 secondary_study_accession experimental_protocol read_count study_title bio_material rna_prep_5_protocol host_body_site local_environmental_context assembly_quality collection_date_end sample_capture_status sample_title host_genotype host_phenotype environmental_medium cultivar instrument_model faang_library_selection target_gene bam_bytes library_max_fragment_size experiment_target sequencing_date description nominal_sdev chip_ab_provider environment_material host_tax_id sample_material sample_storage_processing sra_md5 cell_type fastq_ftp disease sample_prep_interval_units broker_name sub_strain base_count library_strategy restriction_site serovar investigation_type location library_source sra_ftp age library_layout experimental_factor sequencing_primer_catalog environment_biome rna_purity_230_ratio dnase_protocol dev_stage library_prep_date_format bam_aspera binning_software datahub rna_integrity_num library_prep_date location_start marine_region aligned file_location sample_collection chip_target nominal_length broad_scale_environmental_context sequencing_location status completeness_score lon fastq_aspera tax_lineage host_sex library_pcr_isolation_protocol sample_alias mating_type collection_date_start sub_species contamination_score run_alias restriction_enzyme depth submitted_read_type library_construction_protocol host_growth_conditions collection_date experiment_alias host_gravidity center_name identified_by cell_line sampling_site host library_name tag first_created lat strain experiment_accession scientific_name host_status tax_id study_accession submitted_format submitted_host_sex bisulfite_protocol altitude rt_prep_protocol host_scientific_name bam_galaxy accession secondary_sample_accession sample_storage cage_protocol sampling_platform taxonomic_classification location_end protocol_label elevation salinity sequencing_method sequencing_primer_lot first_public transposase_protocol study_alias library_prep_location rna_prep_3_protocol ph sequencing_longitude tissue_type isolation_source +SRR9678965 454 GS FLX sequencing: Amplicon seqencing of Homo sapiens: adult skin surface SAMN12272107 SRA920837 PCR 2019-09-12 3528954 2465043 LS454 fasp.sra.ebi.ac.uk:/vol1/srr/SRR967/005/SRR9678965 ftp.sra.ebi.ac.uk/vol1/fastq/SRR967/005/SRR9678965/SRR9678965.fastq.gz Germany: Duesseldorf Metagenome or environmental Metagenome or environmental sample from human skin metagenome ftp.sra.ebi.ac.uk/vol1/srr/SRR967/005/SRR9678965 6c04efc21529bb1b4bbd2758435dd491 SRP214545 11487 16S rRNA gene profiling of atopic dermatitis and psoriasis patients compared to healthy volunteers 2016-12-31 Metagenome or environmental sample from human skin metagenome 454 GS FLX 454 GS FLX sequencing: Amplicon seqencing of Homo sapiens: adult skin surface 9606 9ba2d40c7ef09e349804bcae64879eae ftp.sra.ebi.ac.uk/vol1/fastq/SRR967/005/SRR9678965/SRR9678965.fastq.gz 5809265 AMPLICON 51.15 N 6.48 E METAGENOMIC ftp.sra.ebi.ac.uk/vol1/srr/SRR967/005/SRR9678965 SINGLE dcc_metagenome 51.15 N 6.48 E public 6.48 fasp.sra.ebi.ac.uk:/vol1/fastq/SRR967/005/SRR9678965/SRR9678965.fastq.gz 1;2787823;12908;408169;410656;539655 MAARS Cohort 2016-01-01 plate1_3.007.01.sff.fastq 2016 plate1_3.007.01 SUB5942029 Homo sapiens plate1_3.007.01 env_geo:terrestrial 2019-09-12 51.15 SRX6439351 human skin metagenome 539655 PRJNA554499 Homo sapiens SRR9678965 SRS5093728 51.15 N 6.48 E 2019-09-12 PRJNA554499 skin surface |
b |
diff -r 000000000000 -r 5e7401777990 test-data/accessions.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/accessions.txt Sat Nov 16 18:43:55 2024 +0000 |
b |
@@ -0,0 +1,4 @@ +ERR4319712 +DRR011117 +ERR2651925 +SRR9678965 |