- +

+ + + + + + + + + +

- - - not uncompressed - - - not uncompressed - - - uncompressed - + - - uncompressed and file_choices['include'] and "seq-report" in file_choices['include'] + + file_choices['include'] and "seq-report" in file_choices['include'] - - uncompressed and file_choices['include'] and "genome" in file_choices['include'] + + file_choices['include'] and "genome" in file_choices['include'] - - uncompressed and file_choices['include'] and "rna" in file_choices['include'] + + file_choices['include'] and "rna" in file_choices['include'] - - uncompressed and file_choices['include'] and "protein" in file_choices['include'] + + file_choices['include'] and "protein" in file_choices['include'] - - uncompressed and file_choices['include'] and "cds" in file_choices['include'] + + file_choices['include'] and "cds" in file_choices['include'] - uncompressed and file_choices['include'] and "gff3" in file_choices['include'] + file_choices['include'] and "gff3" in file_choices['include'] - uncompressed and file_choices['include'] and "gtf" in file_choices['include'] + file_choices['include'] and "gtf" in file_choices['include'] - uncompressed and file_choices['include'] and "gbff" in file_choices['include'] + file_choices['include'] and "gbff" in file_choices['include'] - + + - - + - - - +

+ +

+ - + + + - - + - - +

+ + +

@@ -174,22 +216,24 @@ + - + - - + - - +

+ + +

@@ -197,6 +241,8 @@ + + @@ -208,21 +254,41 @@ - - +

+ + +

+ + + + + + + + + + + + + + + + + + - - + + @@ -233,12 +299,17 @@ - - +

+ + +

- + + + + @@ -250,7 +321,9 @@ - + + + @@ -258,32 +331,14 @@ - - - - - - - - - - - - - - - - - - - - - - - - +

+ +

+ @@ -293,11 +348,64 @@ - - +

+ + +

+ + + + + + + + + + + + + + + + + + + + + + + + +

+ +

+ + + + + + + + + + + + + + + + + + + + + + + @@ -307,8 +415,11 @@ - - +

+ + +

+ @@ -320,12 +431,10 @@ - - + - @@ -338,15 +447,18 @@ **Download Genome Datasets from NCBI** Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report. -Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. +Genome datasets can be specified by NCBI Assembly or BioProject accession(s) or by taxon. + +The download is a three step process: -Tthe default genome dataset includes the following files (if available): - * data_report.jsonl (genome assembly and annotation metadata, not always available) - * genomic.fna (genomic sequences) - * rna.fna (transcript sequences) - * protein.faa (protein sequences) - * genomic.gff (genome annotation in gff3 format) - * dataset_catalog.json (a list of files and file types included in the dataset) +1. A "dehydrated" zip file is downloaded which includes the metadata and the download URL) +2. The metadata is transformed into a tabular (TSV) file +3. The data is hydrated (the actual data is downloaded) + +The 3rd step can be skipped by unselecting all output types in the `Include` parameter. +Thereby its possible to inspect the metadata prior to the actual data download. Also this +allows to use the tool for querying data sets (and their accessions) of interest which +can then be downloaded in a second call using the accessions. ]]> diff -r a3395b1d871b -r ac24fff14f23 macros.xml --- a/macros.xml Mon Nov 21 11:40:05 2022 +0000 +++ b/macros.xml Fri Dec 02 10:52:48 2022 +0000 @@ -1,5 +1,5 @@ - 14.3 + 14.4 0 21.01 MIT @@ -39,8 +39,10 @@ - - + + + @@ -59,18 +61,45 @@ - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + [0-9]{2}/[0-9]{2}/[0-9]{4} @@ -93,9 +303,9 @@ #end if - + - + diff -r a3395b1d871b -r ac24fff14f23 test-data/geneids.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/geneids.txt Fri Dec 02 10:52:48 2022 +0000 @@ -0,0 +1,2 @@ +2597 +14433