Repository 'ega_download_client'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/ega_download_client

Changeset 3:8bbe09a52904 (2022-10-27)
Previous changeset 2:e82175804eb1 (2022-06-14) Next changeset 4:9564758e8638 (2022-12-07)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/pyega3 commit c472a64dc5e68fb058b71e7404f180847ba9f6d4
modified:
pyega3.xml
test-data/filelist_EGAD00001003338.tabular
added:
test-data/filelist3.tabular
b
diff -r e82175804eb1 -r 8bbe09a52904 pyega3.xml
--- a/pyega3.xml Tue Jun 14 17:06:07 2022 +0000
+++ b/pyega3.xml Thu Oct 27 15:12:30 2022 +0000
[
b'@@ -1,11 +1,15 @@\n-<tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy1" profile="21.01" >\n+<tool id="pyega3" name="EGA Download Client" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.01" >\n     <macros>\n-        <token name="@TOOL_VERSION@">4.0.0</token>\n+        <token name="@TOOL_VERSION@">4.0.5</token>\n+        <token name="@VERSION_SUFFIX@">1</token>\n     </macros>\n     <requirements>\n         <requirement type="package" version="@TOOL_VERSION@">pyega3</requirement>\n     </requirements>\n-        <command detect_errors="exit_code"><![CDATA[\n+    <version_command><![CDATA[\n+        pyega3 -v |& grep version | cut -d" " -f 10\n+    ]]></version_command>\n+    <command detect_errors="exit_code"><![CDATA[\n #set $username = $__user__.extra_preferences.get(\'ega_account|username\', "")\n #if $username == "":\n     #set $username = "ega-test-data@ebi.ac.uk (default user)"\n@@ -43,9 +47,17 @@\n     && rm -f downloads/*.md5  ## checksum validation already performed by pyEGA, cleanup downloads folder\n \n #elif $action.action_type == "download_files"\n-    #set file_ids=[x.split(\'\\t\')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines() if x.split(\'\\t\')[int(str($action.id_column))-1].startswith(\'EGAF\') ]\n+    #import re\n+    #set file_ids=[x.split(\'\\t\')[int(str($action.id_column))-1] for x in open(str($id_table)).readlines()]\n+    #set outfiles=[x.split(\'\\t\')[int(str($action.file_column))-1].replace(\'\\n\', \'\') for x in open(str($id_table)).readlines()]\n     mkdir downloads\n-    #for f in $file_ids\n+    #for f, o in zip($file_ids, $outfiles)\n+      #if not f.startswith("EGAF")\n+        && >&2 echo "Ignoring \\"$f\\": no EGA file ID"\n+        #continue\n+      #end if\n+      && \n+      echo \'Downloading $f: $o\'\n       &&\n       pyega3 -c \\${PYEGA_CONNECTIONS:-30} -cf \'$credentials\'\n         fetch \'$f\'\n@@ -60,6 +72,16 @@\n           #end if\n         #end if\n         --output-dir downloads\n+      #if re.match(".*vcf(_genomic_range_.*|).gz$", o)\n+        #if $action.range.reference_name or ($action.range.start or $action.range.end)\n+          && mv \'downloads/$f/\'$o[:-3]\'_genomic_range_\'\\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.reference_name")\'_\'\\$([ -z "$action.range.start" ] && printf "None" || printf "$action.range.reference_name")\'_\'\\$([ -z "$action.range.reference_name" ] && printf "None" || printf "$action.range.end")\'.gz\' \'downloads/$f/\'$o\'.vcf_bgzip\'\n+        #else\n+          && mv \'downloads/$f/$o\' \'downloads/$f/\'$o\'.vcf_bgzip\'\n+        #end if\n+      #end if\n+      #if re.match(".*ped$", o)\n+        && mv \'downloads/$f/$o\' \'downloads/$f/\'$o\'.tabular\'\n+      #end if\n     #end for\n     && rm -f downloads/**/*.md5  ## checksum validation already performed by pyEGA, clean up downloads folder\n \n@@ -95,7 +117,7 @@\n             <when value="list_datasets"/>\n             <when value="download_file">\n                 <param name="file_id" type="text" optional="false" label="EGA File Accession Identifier" help="Identifier starting with \'EGAF\'. For example: EGAF00001753735">\n-                     <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by \'EGAD\' (datasets) or \'EGAF\' (files)">EGAF[0-9]+</validator>\n+                     <validator type="regex" message="EGA Accession ID must be a string of numbers prefixed by \'EGAD\' (datasets) or \'EGAF\' (files)">EGA[DF][0-9]+</validator>\n                 </param>\n                 <section name="range" title="Request a specific Genomic range?" expanded="false">\n                     <param argument="--reference-name" type="text" optional="true" label="Reference Sequence Name" help="For example \'chr1\', \'1\', or \'chrX\'. If unspecified, all data is returned." />\n@@ -106,6 +128,7 @@\n             <when value="download_files">\n                 <param name="id_table" type="data" format="tabular" label="Table with IDs to download" help="A tabular file where one column contains the set of f'..b'-- list dataset files with default credentials, and request a log output file -->\n+        <test expect_num_outputs="2">\n             <param name="action_type" value="list_dataset_files"/>\n             <param name="dataset_id" value="EGAD00001003338"/>\n             <param name="output_log" value="true"/>\n@@ -156,12 +181,14 @@\n                 </assert_contents>\n             </output>\n         </test>\n-        <test expect_num_outputs="1"> <!-- download a single file -->\n+        <!-- download a single file -->\n+        <test expect_num_outputs="1"> \n             <param name="action_type" value="download_file"/>\n             <param name="file_id" value="EGAF00001775036"/>\n             <output name="downloaded_file" md5="3b89b96387db5199fef6ba613f70e27c"/>\n         </test>\n-        <test expect_num_outputs="1"> <!-- download a single file, with genomic range specified -->\n+        <!-- download a single file, with genomic range specified -->\n+        <test expect_num_outputs="1">\n             <param name="action_type" value="download_file"/>\n             <param name="file_id" value="EGAF00001753756"/>\n             <param name="reference_name" value="1"/>\n@@ -169,19 +196,26 @@\n             <param name="end" value="10000"/>\n             <output name="downloaded_file" ftype="bam" md5="e576a38748feec45aa45191f6e902ce2"/>\n         </test>\n-        <test expect_num_outputs="1"> <!-- download multiple files -->\n+        <!-- download multiple files -->\n+        <test expect_num_outputs="1">\n             <param name="action_type" value="download_files"/>\n             <param name="id_table" value="filelist.tabular"/>\n             <param name="id_column" value="1"/>\n+            <param name="file_column" value="5"/>\n             <output_collection name="downloaded_file_collection" type="list" count="2">\n                 <element name="ENCFF000VWO.bam" md5="b8ae14d5d1f717ab17d45e8fc36946a0" />\n                 <element name="ENCFF284YOU.bam" md5="3b89b96387db5199fef6ba613f70e27c" />\n             </output_collection>\n+            <assert_stderr>\n+                <has_text text="Ignoring &quot;File ID&quot;: no EGA file ID"/>\n+            </assert_stderr>\n         </test>\n-        <test expect_num_outputs="1"> <!-- download multiple files, in combination with a genomic range -->\n+        <!-- download multiple files, in combination with a genomic range -->\n+        <test expect_num_outputs="1">\n             <param name="action_type" value="download_files"/>\n             <param name="id_table" value="filelist2.tabular"/>\n             <param name="id_column" value="1"/>\n+            <param name="file_column" value="5"/>\n             <param name="reference_name" value="1"/>\n             <param name="start" value="0"/>\n             <param name="end" value="10000"/>\n@@ -189,6 +223,20 @@\n                 <element name="NA19239_genomic_range_1_0_10000" md5="bcdcf18846233cbe5cc8afd95168552c" />\n                 <element name="NA19240_genomic_range_1_0_10000" md5="e576a38748feec45aa45191f6e902ce2" />\n             </output_collection>\n+            <assert_stderr>\n+                <has_text text="Ignoring &quot;File ID&quot;: no EGA file ID"/>\n+            </assert_stderr>\n+        </test>\n+        <!-- download multiple vcf.gz files -->\n+        <test expect_num_outputs="1">\n+            <param name="action_type" value="download_files"/>\n+            <param name="id_table" value="filelist3.tabular"/>\n+            <param name="id_column" value="1"/>\n+            <param name="file_column" value="5"/>\n+            <output_collection name="downloaded_file_collection" type="list" count="2">\n+                <element name="HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz" md5="51cfb69bf3b9416ff425381a58c18a2b" />\n+                <element name="HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz" md5="ebad4425191a89d3e970c02190a87175" />\n+            </output_collection>\n         </test>\n     </tests>\n     <help><![CDATA[\n'
b
diff -r e82175804eb1 -r 8bbe09a52904 test-data/filelist3.tabular
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filelist3.tabular Thu Oct 27 15:12:30 2022 +0000
b
@@ -0,0 +1,3 @@
+File ID Status Bytes Check sum File name
+EGAF00007243779 1 15340 ebad4425191a89d3e970c02190a87175 HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz
+EGAF00007243775 1 23033 51cfb69bf3b9416ff425381a58c18a2b HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz
\ No newline at end of file
b
diff -r e82175804eb1 -r 8bbe09a52904 test-data/filelist_EGAD00001003338.tabular
--- a/test-data/filelist_EGAD00001003338.tabular Tue Jun 14 17:06:07 2022 +0000
+++ b/test-data/filelist_EGAD00001003338.tabular Thu Oct 27 15:12:30 2022 +0000
b
@@ -19,6 +19,18 @@
 EGAF00005007330 1 4722 110b493c17210ff3484ed2561a2fe21f HG01775.chrY.bcf.csi
 EGAF00005007331 1 137465 fcf1cc38cd404ea1cdba3975d26f4a8b HG01775.GRCh38DH.exome.cram.crai
 EGAF00005007332 1 229305 56e8de04466aba23ab5acbaf1c087045 NA18534.GRCh38DH.exome.cram.crai
+EGAF00007243773 1 194837 ed365c71461eac21a64d2c29e7216e50 HG00096.GRCh38DH__1097r__10.10000-10100__21.5000000-5050000.bam
+EGAF00007243774 1 135144 687996620a153a8dc451ab71701bb290 HG00096.GRCh38DH__1097r__10.10000-10100__21.5000000-5050000.cram
+EGAF00007243775 1 23033 51cfb69bf3b9416ff425381a58c18a2b HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz
+EGAF00007243776 1 1985 245fd6f73095ce4c80202d5815c84528 HG00408.novoBreak__256r__4.100100-10100100__7.200100-9000100.vcf.gz.tbi
+EGAF00007243777 1 122474 eee2883c95d3cd707961c80b6e44ee68 HG01500.GRCh38DH__90r__3.10000-10500__4.10000-10500.bam
+EGAF00007243778 1 112250 98798731624e6e402c800c276556bb95 HG01500.GRCh38DH__90r__3.10000-10500__4.10000-10500.cram
+EGAF00007243779 1 15340 ebad4425191a89d3e970c02190a87175 HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz
+EGAF00007243780 1 1978 73ab82ce05b4f6259256ae0d4eb33e3b HG01890.HGSVC__145r__1.900100-10001000__18.2001000-90001000.vcf.gz.tbi
+EGAF00007243781 1 27104 10bc4d1dc9ca944339da830e4c868857 HG01500.GRCh38DH__90r__3.10000-10500__4.10000-10500.bam.bai
+EGAF00007243782 1 29568 7d6347593fb4ad59cb62cec9c7f97a7a HG00096.GRCh38DH__1097r__10.10000-10100__21.5000000-5050000.bam.bai
+EGAF00007243783 1 83 c43fcb885eba78b62efb4bf5d1cc51bf HG00096.GRCh38DH__1097r__10.10000-10100__21.5000000-5050000.cram.crai
+EGAF00007243784 1 80 3537be5d71b4e6f0a7f9127cf5bda9aa HG01500.GRCh38DH__90r__3.10000-10500__4.10000-10500.cram.crai
 EGAF00001753734 1 45030910198 040ef7533533a3db67a35b9f454b9269 NA12878.cram
 EGAF00001753735 1 1575103 41fd8741e91924eae19c6baa7893eeb8 NA12878.crai
 EGAF00001753736 1 38215425935 bbc03793c9534a22f77e751d2723cb10 NA12891.cram