Repository 'ega_download_client'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ega_download_client

Changeset 1:1162bfd9f782 (2022-04-12)
Previous changeset 0:caf213d51d6c (2020-10-30) Next changeset 2:e82175804eb1 (2022-06-14)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pyega3 commit f91c99780efc91e89950ed9494ef9e6f83d198c7"
modified:
pyega3.xml
added:
test-data/filelist.tabular
test-data/filelist2.tabular
test-data/filelist_EGAD00001003338.tabular
b
diff -r caf213d51d6c -r 1162bfd9f782 pyega3.xml
--- a/pyega3.xml Fri Oct 30 22:18:41 2020 +0000
+++ b/pyega3.xml Tue Apr 12 11:36:51 2022 +0000
[
b'@@ -1,9 +1,9 @@\n-<tool id="pyega3" name="EGA Download Client" version="@VERSION@+galaxy0" profile="19.09" >\n+<tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy0" profile="21.01" >\n     <macros>\n-        <token name="@VERSION@">3.4.0</token>\n+        <token name="@TOOL_VERSION@">4.0.0</token>\n     </macros>\n     <requirements>\n-        <requirement type="package" version="@VERSION@">pyega3</requirement>\n+        <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement>\n     </requirements>\n         <command detect_errors="exit_code"><![CDATA[\n #set $username = $__user__.extra_preferences.get(\'ega_account|username\', "")\n@@ -18,9 +18,18 @@\n #elif $action.action_type == "list_dataset_files"\n     pyega3 -cf \'$credentials\'\n       files \'$action.dataset_id\'\n+    &&\n+\n+    ## create file header\n+    echo -e \'File ID\\tStatus\\tBytes\\tCheck sum\\tFile name\' > \'$dataset_file_list\' &&\n+\n+    ## remove timestamps and convert spaces to tabs\n+    grep EGAF pyega3_output.log | sed -e \'s/^\\[.*\\]\\s\\+//g\' | sed \'s/\\s\\+/\\t/g\' >> \'$dataset_file_list\'\n+\n #elif $action.action_type == "download_file"\n-    pyega3 -cf \'$credentials\'\n+    pyega3 -c \\${PYEGA_CONNECTIONS:-30} -cf \'$credentials\'\n       fetch \'$action.file_id\'\n+      --max-retries 10\n       #if $action.range.reference_name\n         --reference-name \'$action.range.reference_name\'\n         #if $action.range.start\n@@ -30,7 +39,30 @@\n           --end $action.range.end\n         #end if\n       #end if\n-      --saveto \'$downloaded_file\'\n+    && mv ${action.file_id} downloads\n+    && rm -f downloads/*.md5  ## checksum validation already performed by pyEGA, cleanup downloads folder\n+\n+#elif $action.action_type == "download_files"\n+    #set file_ids=[x.split(\'\\t\')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines() if x.split(\'\\t\')[int(str($action.id_column))-1].startswith(\'EGAF\') ]\n+    mkdir downloads\n+    #for f in $file_ids\n+      &&\n+      pyega3 -c \\${PYEGA_CONNECTIONS:-30} -cf \'$credentials\'\n+        fetch \'$f\'\n+          --max-retries 10\n+          #if $action.range.reference_name\n+          --reference-name \'$action.range.reference_name\'\n+          #if $action.range.start\n+            --start $action.range.start\n+          #end if\n+          #if $action.range.end\n+            --end $action.range.end\n+          #end if\n+        #end if\n+        --output-dir downloads\n+    #end for\n+    && rm -f downloads/**/*.md5  ## checksum validation already performed by pyEGA, clean up downloads folder\n+\n #end if\n     ]]></command>\n     <configfiles>\n@@ -53,6 +85,7 @@\n                 <option value="list_datasets"> List my authorized datasets </option>\n                 <option value="list_dataset_files"> List files in a datasets </option>\n                 <option value="download_file"> Download a file </option>\n+                <option value="download_files"> Download multiple files (based on a file with IDs) </option>\n             </param>\n             <when value="list_dataset_files">\n                 <param name="dataset_id" type="text" optional="false" label="EGA Dataset Accession ID" help="Identifier starting with \'EGAD\'. For example: EGAD00001003338">\n@@ -65,40 +98,59 @@\n                      <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by \'EGAD\' (datasets) or \'EGAF\' (files)">EGAF[0-9]+</validator>\n                 </param>\n                 <section name="range" title="Request a specific Genomic range?" expanded="false">\n-                <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example \'chr1\', \'1\', or \'chrX\'. If unspecified, all data is returned." />\n-                <param argument="--start" type="integer" optional="true" min="0" label="Start Position" help="0-based, inclusive. Only used if a reference sequence name was specified"/>\n-                <param argument="--end" type="integer" optional="true" min="0" label="End Position" he'..b'] == \'download_files\' </filter>\n+            <discover_datasets pattern="__designation_and_ext__" recurse="true" directory="downloads" />\n+        </collection>\n     </outputs>\n     <tests>\n         <test expect_num_outputs="1"><!-- list datasets with default credentials -->\n             <param name="action_type" value="list_datasets"/>\n             <output name="authorized_datasets" ftype="txt">\n                 <assert_contents>\n-                    <has_text text="pyEGA3 - EGA python client version @VERSION@"/>\n+                    <has_text text="pyEGA3 - EGA python client version @TOOL_VERSION@"/>\n                     <has_text text="EGAD00001003338"/>\n                 </assert_contents>\n             </output>\n         </test>\n-        <test expect_num_outputs="1"><!-- list dataset files with default credentials -->\n+        <test expect_num_outputs="2"><!-- list dataset files with default credentials, and request a log output file -->\n             <param name="action_type" value="list_dataset_files"/>\n             <param name="dataset_id" value="EGAD00001003338"/>\n-            <output name="dataset_file_list" ftype="txt">\n+            <param name="output_log" value="true"/>\n+            <output name="dataset_file_list" file="filelist_EGAD00001003338.tabular"/>\n+            <output name="logfile" ftype="txt">\n                 <assert_contents>\n-                    <has_text text="pyEGA3 - EGA python client version @VERSION@"/>\n+                    <has_text text="pyEGA3 - EGA python client version @TOOL_VERSION@"/>\n                     <has_line_matching expression="^\\[.*\\]\\s+File ID\\s+Status\\s+Bytes\\s+Check sum\\s+File name$"/>\n                     <has_text text="EGAF00001753734"/>\n                 </assert_contents>\n@@ -109,7 +161,7 @@\n             <param name="file_id" value="EGAF00001775036"/>\n             <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/>\n         </test>\n-         <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified -->\n+        <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified -->\n             <param name="action_type" value="download_file"/>\n             <param name="file_id" value="EGAF00001753756"/>\n             <param name="reference_name" value="1"/>\n@@ -117,6 +169,27 @@\n             <param name="end" value="10000"/>\n             <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/>\n         </test>\n+        <test expect_num_outputs="1"> <!-- download multiple files -->\n+            <param name="action_type" value="download_files"/>\n+            <param name="id_table" value="filelist.tabular"/>\n+            <param name="id_column" value="1"/>\n+            <output_collection name="downloaded_file_collection" type="list" count="2">\n+                <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" />\n+                <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" />\n+            </output_collection>\n+        </test>\n+        <test expect_num_outputs="1"> <!-- download multiple files, in combination with a genomic range -->\n+            <param name="action_type" value="download_files"/>\n+            <param name="id_table" value="filelist2.tabular"/>\n+            <param name="id_column" value="1"/>\n+            <param name="reference_name" value="1"/>\n+            <param name="start" value="100"/>\n+            <param name="end" value="10000"/>\n+            <output_collection name="downloaded_file_collection" count="2">\n+                <element name="NA19239_genomic_range_1_100_10000" md5="bcdcf18846233cbe5cc8afd95168552c" />\n+                <element name="NA19240_genomic_range_1_100_10000" md5="e576a38748feec45aa45191f6e902ce2" />\n+            </output_collection>\n+        </test>\n     </tests>\n     <help><![CDATA[\n The pyEGA3 download client is a python-based tool for viewing and downloading files from authorized EGA datasets.\n'
b
diff -r caf213d51d6c -r 1162bfd9f782 test-data/filelist.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filelist.tabular Tue Apr 12 11:36:51 2022 +0000
b
@@ -0,0 +1,3 @@
+File ID Status Bytes Check sum File name
+EGAF00001775034 1 5991400 b8ae14d5d1f717ab17d45e8fc36946a0 ENCFF000VWO.bam.bai
+EGAF00001775036 1 4804928 3b89b96387db5199fef6ba613f70e27c ENCFF284YOU.bam.bai
b
diff -r caf213d51d6c -r 1162bfd9f782 test-data/filelist2.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filelist2.tabular Tue Apr 12 11:36:51 2022 +0000
b
@@ -0,0 +1,3 @@
+File ID Status Bytes Check sum File name
+EGAF00001753756 1 140445765831 2413ce93a4b2b50fa0c2ff5bdf97695f NA19240.bam
+EGAF00001753754 1 136016115737 59fbc3828fb878d8e637557ce707d445 NA19239.bam
b
diff -r caf213d51d6c -r 1162bfd9f782 test-data/filelist_EGAD00001003338.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filelist_EGAD00001003338.tabular Tue Apr 12 11:36:51 2022 +0000
b
@@ -0,0 +1,49 @@
+File ID Status Bytes Check sum File name
+EGAF00005000662 1 25444204 274de4071bca5354ff16a1de0116c455 NA19238.chr22.vcf.gz
+EGAF00005000663 1 18596 02fdb6fc68b854f98fef710ff4dee0c1 NA19238.chr22.vcf.gz.tbi
+EGAF00005000664 1 26957200 62b16cc9ce6ceb3ef97b98c99aa6fec5 NA19238.chr22.bcf
+EGAF00005000665 1 14509 7cf0f467fd44dd783ff05cb4662642b6 NA19238.chr22.bcf.csi
+EGAF00005001623 1 214453766 ad7d6e0c05edafd7faed7601f7f3eaba ALL_chr22_20130502_2504Individuals.vcf.gz
+EGAF00005001624 1 36094 4202e9a481aa8103b471531a96665047 ALL_chr22_20130502_2504Individuals.vcf.gz.tbi
+EGAF00005001625 1 186424665 c65ca1a4abd55351598ccbc65ebfa9a6 ALL_chr22_20130502_2504Individuals.bcf
+EGAF00005001626 1 27620 09e3b4724404fc7bb5f9948f80016757 ALL_chr22_20130502_2504Individuals.bcf.csi
+EGAF00005007180 1 1837578063 74d3b803823d3f8b73bd592941f23726 HG01775.GRCh38DH.exome.cram
+EGAF00005007181 1 2938941932 910141b9f4ccbfbf57813dee1a7a3f1d NA18534.GRCh38DH.exome.cram
+EGAF00005007323 1 5719142 388fb466c983d4bec2082941647409f3 ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz
+EGAF00005007324 1 8074 fa37e14805cce3221f6f9d3a4cd749a4 ALL.chrY.phase3_integrated_v2a.20130502.genotypes.vcf.gz.tbi
+EGAF00005007325 1 5527171 395c0d3d454d7c7d61c4f771fbab02fc ALL.chrY.phase3_integrated_v2a.20130502.genotypes.bcf
+EGAF00005007326 1 6251 ae2d2097a8744877d9d20907200cbdcf ALL.chrY.phase3_integrated_v2a.20130502.genotypes.bcf.csi
+EGAF00005007327 1 850737 f3dee64b466efe334b2cac77f5c2f710 HG01775.chrY.vcf.gz
+EGAF00005007328 1 4981 d0e71e5dd7f5279e113c4f0dfd37fc23 HG01775.chrY.vcf.gz.tbi
+EGAF00005007329 1 876313 aaca702e347ae6caa734d44527a49212 HG01775.chrY.bcf
+EGAF00005007330 1 4722 110b493c17210ff3484ed2561a2fe21f HG01775.chrY.bcf.csi
+EGAF00005007331 1 137465 fcf1cc38cd404ea1cdba3975d26f4a8b HG01775.GRCh38DH.exome.cram.crai
+EGAF00005007332 1 229305 56e8de04466aba23ab5acbaf1c087045 NA18534.GRCh38DH.exome.cram.crai
+EGAF00001753734 1 45030910198 040ef7533533a3db67a35b9f454b9269 NA12878.cram
+EGAF00001753735 1 1575103 41fd8741e91924eae19c6baa7893eeb8 NA12878.crai
+EGAF00001753736 1 38215425935 bbc03793c9534a22f77e751d2723cb10 NA12891.cram
+EGAF00001753737 1 1310034 0ab7a2d110740561871ccdca7f15f13b NA12891.crai
+EGAF00001753738 1 38370156211 a7503d228d0851b999b826b736b8dd32 NA12892.cram
+EGAF00001753739 1 1331384 bb569235226b5b9f0578d34d1b52482e NA12892.crai
+EGAF00001753740 1 34823972801 492780f603da2f5f3306c41011e0acd2 NA19238.cram
+EGAF00001753741 1 1195785 3b862e018b0b85db7954cbed2e17b6ba NA19238.crai
+EGAF00001753742 1 44113571936 d963539652de2ea20005d98e934d59c2 NA19239.cram
+EGAF00001753743 1 1514700 be2024ccbf5b3bd9132f6d270a37118c NA19239.crai
+EGAF00001753744 1 48309446909 728bea9317cbab1c98429e43e48f9a83 NA19240.cram
+EGAF00001753745 1 1622405 18e0e7070b6cf4d042c7f9bee15d56bd NA19240.crai
+EGAF00001753746 1 143427187111 11395de33f28ed867170d2dc723cc700 NA12878.bam
+EGAF00001753747 1 8949984 a23a84c89d338796f78e68804c8d2c6c NA12878.bam.bai
+EGAF00001753748 1 4317237247 71a78dfb5258939abab2257a2abd1126 NA12891.bam
+EGAF00001753749 1 9212704 e04dbb7ccbc24ccd853d89b8b066166c NA12891.bai
+EGAF00001753750 1 66145394874 201bded705401615fe5e90988d509656 NA12892.bam
+EGAF00001753751 1 9204720 c1eadd98469fcd3ced4c51a84b3ce307 NA12892.bai
+EGAF00001753752 1 229774247950 0751106bbe1c4c83ec934a5972a4efdf NA19238.bam
+EGAF00001753753 1 9379032 028ab5c73fea03c349e0d73943913141 NA19238.bai
+EGAF00001753754 1 136016115737 59fbc3828fb878d8e637557ce707d445 NA19239.bam
+EGAF00001753755 1 9005792 767fc92be753de8cf570690bd7fbe629 NA19239.bai
+EGAF00001753756 1 140445765831 2413ce93a4b2b50fa0c2ff5bdf97695f NA19240.bam
+EGAF00001753757 1 9018288 351130149989cca43fe8c7382e9d326a NA19240.bai
+EGAF00001770106 1 462139278 ce073afcbc07afa343f2d4e4d07efeda ENCFF000VWO.bam
+EGAF00001770107 1 3551031027 dfef3f355230915418a78da460665d56 ENCFF284YOU.bam
+EGAF00001775034 1 5991400 b8ae14d5d1f717ab17d45e8fc36946a0 ENCFF000VWO.bam.bai
+EGAF00001775036 1 4804928 3b89b96387db5199fef6ba613f70e27c ENCFF284YOU.bam.bai