Repository 'gemini_interactions'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/gemini_interactions

Changeset 5:7028ca3cac1c (2019-01-11)
Previous changeset 4:a6d326ffbb72 (2018-12-14) Next changeset 6:ce6db5020339 (2020-01-24)
Commit message:
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 62ed732cba355e695181924a8ed4cce49ca21c59
modified:
gemini_interactions.xml
gemini_macros.xml
repository_dependencies.xml
test-data/gemini_amend_input.db
test-data/gemini_annotate_result.db
test-data/gemini_auto_dom_input.db
test-data/gemini_auto_rec_input.db
test-data/gemini_comphets_input.db
test-data/gemini_de_novo_input.db
test-data/gemini_is_somatic_result.db
test-data/gemini_load_result1.db
test-data/gemini_load_result2.db
test-data/gemini_versioned_databases.loc
test-data/test-cache/gemini-config.yaml
added:
test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz
test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz
test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz
test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi
removed:
test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz
test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz.tbi
test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz
test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz.tbi
b
diff -r a6d326ffbb72 -r 7028ca3cac1c gemini_interactions.xml
--- a/gemini_interactions.xml Fri Dec 14 12:50:32 2018 -0500
+++ b/gemini_interactions.xml Fri Jan 11 17:46:29 2019 -0500
[
@@ -1,60 +1,96 @@
-<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.1">
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@">
     <description>Find genes among variants that are interacting partners</description>
     <macros>
         <import>gemini_macros.xml</import>
         <token name="@BINARY@">interactions</token>
     </macros>
     <expand macro="requirements" />
-    <expand macro="stdio" />
+    <expand macro="stdio">
+        <!-- Fail loudly when the user-specified gene is unknown to gemini -->
+        <regex match="Gene name not found or gene not in interaction file"
+               source="stderr"
+               level="fatal"
+               description="The gene you specified is not defined in the interaction file" />
+    </expand>
     <expand macro="version_command" />
     <command>
 <![CDATA[
-        @PROVIDE_ANNO_DATA@
+        #if str($interactions.source) == 'preinstalled':
+            #set $annotation_databases = $interactions.annotation_databases
+            @PROVIDE_ANNO_DATA@
+        #end if
 
         gemini
-            #if $gene.gene_selector == 'lof':
-                ## lof interactions is a separate program
-                lof_interactions
+            #set $gene = str($gene).strip()
+            #if $gene:
+                interactions -g '$gene'
             #else:
-                ## use normal gemini interactions program
-                @BINARY@
-                -g "${gene.gene}"
+                ## lof interactions is a separate command line tool
+                lof_interactions
             #end if
 
-            -r "${radius}"
+            #if str($interactions.source) == 'history':
+                --edges '${interactions.data}'
+            #end if
+
+            -r $radius
             $variant_mode
-            "${ infile }"
-            > "${ outfile }"
+
+            '$infile'
+            > '$outfile'
 ]]>
     </command>
     <inputs>
         <expand macro="infile" />
-
-        <conditional name="gene">
-            <param name="gene_selector" type="select" label="Studying" help="">
-                <option value="gene">Interesting gene</option>
-                <option value="lof">All loss-of-function variants</option>
+        <conditional name="interactions">
+            <param name="source" type="select" label="Interaction data source"
+            help="This tool requires a catalogue of known protein-protein interactions. Such interaction data, obtained from the Human Protein Reference Database (HPRD), is part of GEMINI's own annotation data, but you can choose to provide your own interactions data instead.">
+                <option value="preinstalled">HPRD interaction data bundled with GEMINI</option>
+                <option value="history">History dataset with interactions</option>
             </param>
-            <when value="gene">
-                <param name="gene" type="text" label="Specify gene name" help="e.g. PTPN22 (-g)" />
+            <when value="preinstalled">
+                <expand macro="annotation_dir" />
+            </when>
+            <when value="history">
+                <param name="data" type="data" format="txt"
+                label="Interactions data"
+                help="You can provide interaction data as a simple text file with one interaction of the form geneA|geneB (e.g., ZFPM2|GATA4) per line." />
             </when>
-            <when value="lof"/>
         </conditional>
-        <expand macro="annotation_dir" />
-        <expand macro="radius" />
-        <expand macro="variant_mode" />
+        <param name="gene" type="text" value=""
+        label="Report affected interaction partners of this particular gene"
+        help="By default, the tool finds all genes affected by loss-of-function variants in your input, then, for every such gene, reports its interaction partners if they are also affected by any variant. If you specify the name of a gene of interest (e.g. PTPN22) here, you get the affected interaction partners of only this particular gene reported, irrespective of whether your gene of interest itself is affected by any variant or not." />
+        <param argument="-r" name="radius" type="integer" value="3" min="0"
+        label="Report interaction partners up to (and including) this order"
+        help="A value of 1, for example, means: report only affected direct interaction partners. A value of 0 restricts the report to just variants in the query gene itself." />
+        <param argument="--var" name="variant_mode" type="select" display="radio"
+        label="Report format for interactions"
+        help="">
+            <option value="">Interaction partners only</option>
+            <option value="--var">Interaction partners and the variants affecting them</option>
+        </param>
     </inputs>
     <outputs>
         <data name="outfile" format="tabular" />
     </outputs>
     <tests>
         <test>
-            <param name="infile" value="gemini_load_result1.db" ftype="gemini.sqlite" />
-            <param name="gene" value="BCL6" />
+            <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" />
             <param name="radius" value="5" />
             <output name="outfile">
                 <assert_contents>
-                    <has_line_matching expression="sample&#009;gene&#009;order_of_interaction&#009;interacting_gene.*" />
+                    <has_line line="sample&#009;lof_gene&#009;order_of_interaction&#009;interacting_gene" />
+                    <has_n_columns n="4" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <param name="infile" value="gemini_comphets_input.db" ftype="gemini.sqlite" />
+            <param name="radius" value="5" />
+            <param name="variant_mode" value="--var" />
+            <output name="outfile">
+                <assert_contents>
+                    <has_line_matching expression="sample&#009;lof_gene&#009;order_of_interaction&#009;interacting_gene.+" />
                 </assert_contents>
             </output>
         </test>
b
diff -r a6d326ffbb72 -r 7028ca3cac1c gemini_macros.xml
--- a/gemini_macros.xml Fri Dec 14 12:50:32 2018 -0500
+++ b/gemini_macros.xml Fri Jan 11 17:46:29 2019 -0500
[
b'@@ -1,15 +1,12 @@\n <macros>\n     <!-- gemini version to be used -->\n-    <token name="@VERSION@">0.18.1</token>\n+    <token name="@VERSION@">0.20.1</token>\n     <!-- minimal annotation files version required by this version of gemini -->\n-    <token name="@DB_VERSION@">181</token>\n+    <token name="@DB_VERSION@">200</token>\n \n     <xml name="requirements">\n         <requirements>\n             <requirement type="package" version="@VERSION@">gemini</requirement>\n-            <requirement type="package" version="0.2.6">tabix</requirement>\n-            <!-- for conda useage -->\n-            <!--requirement type="package" version="1.3.1">htslib</requirement-->\n             <yield />\n         </requirements>\n     </xml>\n@@ -24,9 +21,17 @@\n             <exit_code range=":-1" />\n             <regex match="Error:" />\n             <regex match="Exception:" />\n+            <yield />\n         </stdio>\n     </xml>\n \n+    <xml name="citations">\n+        <citations>\n+            <citation type="doi">10.1371/journal.pcbi.1003153</citation>\n+            <yield />\n+        </citations>\n+    </xml>\n+\n     <xml name="annotation_dir">\n         <param name="annotation_databases" type="select" label="Choose a gemini annotation source">\n             <options from_data_table="gemini_versioned_databases">\n@@ -36,31 +41,36 @@\n         </param>\n     </xml>\n \n-    <xml name="add_header_column">\n-        <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" \n-            label="Add a header of column names to the output" help="(--header)"/>\n-    </xml>\n-\n-    <xml name="radius">\n-        <param name="radius" type="integer" value="3" label="Set filter for Breadth-first search (BFS) in the Protein-Protein Interaction network" help="(-r)" >\n-            <validator type="in_range" min="0"/>\n+    <xml name="infile">\n+        <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." >\n+            <options options_filter_attribute="metadata.gemini_version" >\n+                <filter type="add_value" value="@VERSION@" />\n+            </options>\n         </param>\n     </xml>\n-    <xml name="variant_mode">\n-        <param name="variant_mode" type="boolean" truevalue="--var" falsevalue="" checked="False" \n-            label="Returns variant info (e.g. impact, biotype) for interacting genes" help="(--var)"/>\n+\n+    <xml name="add_header_column">\n+        <param argument="--header" name="header" type="boolean" truevalue="--header" falsevalue="" checked="True" \n+        label="Add a header of column names to the output" />\n     </xml>\n \n-    <xml name="column_filter">\n+    <xml name="column_filter" token_help="" token_minimalset="variant_id, gene">\n         <conditional name="report">\n-            <param name="report_selector" type="select" label="Columns to include in the report"\n-                help="By default, this tool reports all columns in the variants table. One may choose to report only a subset of the columns.">\n-                <option value="all" selected="True">all</option>\n-                <option value="column_filter">User given columns</option>\n+            <param name="report_selector" type="select"\n+            label="Set of columns to include in the variant report table"\n+            help="@HELP@">\n+                <option value="minimal">Minimal (report only a preconfigured minimal set of columns)</option>\n+                <option value="full">Full (report all columns defined in the GEMINI database variants table)</option>\n+                <option value="custom">Custom (report user-specified columns)</option>\n             </param>\n-            <when value="all"/>\n-            <when value="column_filter">\n-                <param name="columns" type="select" display="checkboxes" multiple="True" label="Choose columns to include in the report" help="(--columns)">\n+            <when value="full" />\n+            <when value="minimal">\n+         '..b'r($report.extra_cols).strip():\n+                #if $cols:\n+                    #set $cols = $cols + \', \' + str($report.extra_cols)\n+                #else:\n+                    #set $cols = str($report.extra_cols)\n+                #end if\n+            #end if\n+            #if not $cols:\n+                #set $cols = "variant_id, gene"\n+            #end if\n         #end if\n     </token>\n \n     <token name="@COLUMN_SELECT@">\n-        #if $report.report_selector != \'all\':\n-            --columns "${report.columns}\n-            #if str($report.extra_cols).strip()\n-                #echo \',\'+\',\'.join(str($report.extra_cols).split()) \n-            #end if\n-            "\n+        @SET_COLS@\n+        #if $cols != "*"\n+            --columns \'$cols\'\n         #end if\n     </token>\n \n-    <xml name="family">\n-        <param name="families" type="text" value="" label="Comma seperated list of families to restrict the analysis to." help="e.g. Family1,Family3 (--families)"/>\n-    </xml>\n-\n-    <xml name="lenient">\n-        <param name="lenient" type="boolean" truevalue="--lenient" falsevalue="" checked="False" label="Loosen the restrictions on family structure"/>\n-    </xml>\n-\n-    <xml name="unaffected">\n-        <param name="allow_unaffected" type="boolean" truevalue="--allow-unaffected" falsevalue="" checked="False" label="Report candidates that also impact samples labeled as unaffected." help="(--allow-unaffected)"/>\n-    </xml>\n-\n-    <xml name="min_kindreds">\n-        <param name="min_kindreds" type="integer" value="1" label="The min. number of kindreds that must have a candidate variant in a gene" help="default: 1 (--min-kindreds)" />\n-    </xml>\n-\n-    <xml name="min_sequence_depth">\n-        <param name="d" type="integer" value="0" min="0" label="The minimum aligned sequence depth (genotype DP) required for each sample"\n-                help="default: 0 (-d)" />\n-    </xml>\n-\n-    <xml name="min_gq">\n-        <param name="min_gq" type="integer" value="0" label="the minimum genotype quality required for each sample in a family" help="default: 0 (--min-gq)">\n-            <validator type="in_range" min="0"/>\n-        </param>\n-    </xml>\n-\n-    <xml name="gt_pl_max">\n-        <param name="gt_pl_max" type="integer" value="-1" min="-1" label="The maximum phred-scaled genotype likelihod (PL) allowed for each sample in a family" help="default: -1 (not set) (--gt-pl-max)" />\n-    </xml>\n-\n-    <xml name="citations">\n-        <citations>\n-            <citation type="doi">10.1371/journal.pcbi.1003153</citation>\n-            <yield />\n-        </citations>\n-    </xml>\n-\n-    <xml name="infile">\n-        <param name="infile" type="data" format="gemini.sqlite" label="GEMINI database" help="Only files with version @VERSION@ are accepted." >\n-            <options options_filter_attribute="metadata.gemini_version" >\n-                <filter type="add_value" value="@VERSION@" />\n-            </options>\n-            <validator type="expression" message="This version of Gemini will only work with Gemini files that are for version @VERSION@.">value is not None and value.metadata.gemini_version == "@VERSION@"</validator>\n-        </param>\n-    </xml>\n-\n+    <token name="@PARSE_REGION_ELEMENTS@"><![CDATA[\n+        #set $region_elements = []\n+        #for $r in $regions:\n+            ## The actual chromosome name needs to be single-quoted\n+            ## in SQL, so we need to quote the single quotes like the\n+            ## sanitize_query macro would if the whole was a parameter.\n+            #set $r_elements = ["chrom = \'\\"\'\\"\'%s\'\\"\'\\"\'" % str($r.chrom).strip()]\n+            #if str($r.start).strip():\n+                #silent $r_elements.append("start >= %d" % int($r.start))\n+            #end if\n+            #if str($r.stop).strip():\n+                #silent $r_elements.append("end <= %d" % int($r.stop))\n+            #end if\n+            #silent $region_elements.append("(%s)" % " AND ".join($r_elements))\n+        #end for\n+    ]]>\n+    </token>\n </macros>\n'
b
diff -r a6d326ffbb72 -r 7028ca3cac1c repository_dependencies.xml
--- a/repository_dependencies.xml Fri Dec 14 12:50:32 2018 -0500
+++ b/repository_dependencies.xml Fri Jan 11 17:46:29 2019 -0500
b
@@ -1,4 +1,4 @@
 <?xml version="1.0" ?>
 <repositories description="This requires the GEMINI data manager definition to install all required annotation databases.">
-    <repository changeset_revision="fe5a9a7d95b0" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"/>
+    <repository changeset_revision="f57426daa04d" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu"/>
 </repositories>
\ No newline at end of file
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_amend_input.db
b
Binary file test-data/gemini_amend_input.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_annotate_result.db
b
Binary file test-data/gemini_annotate_result.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_auto_dom_input.db
b
Binary file test-data/gemini_auto_dom_input.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_auto_rec_input.db
b
Binary file test-data/gemini_auto_rec_input.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_comphets_input.db
b
Binary file test-data/gemini_comphets_input.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_de_novo_input.db
b
Binary file test-data/gemini_de_novo_input.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_is_somatic_result.db
b
Binary file test-data/gemini_is_somatic_result.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_load_result1.db
b
Binary file test-data/gemini_load_result1.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_load_result2.db
b
Binary file test-data/gemini_load_result2.db has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/gemini_versioned_databases.loc
--- a/test-data/gemini_versioned_databases.loc Fri Dec 14 12:50:32 2018 -0500
+++ b/test-data/gemini_versioned_databases.loc Fri Jan 11 17:46:29 2019 -0500
b
@@ -1,3 +1,3 @@
 ## GEMINI versioned databases
 #DownloadDate dbkey DBversion Description Path
-1999-01-01 hg19 181 GEMINI annotations (test snapshot) ${__HERE__}/test-cache
+1999-01-01 hg19 200 GEMINI annotations (test snapshot) ${__HERE__}/test-cache
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini-config.yaml
--- a/test-data/test-cache/gemini-config.yaml Fri Dec 14 12:50:32 2018 -0500
+++ b/test-data/test-cache/gemini-config.yaml Fri Jan 11 17:46:29 2019 -0500
b
@@ -2,12 +2,14 @@
 versions:
   ALL.wgs.phase3_shapeit2_mvncall_integrated_v5a.20130502.sites.tidy.vcf.gz: 4
   ESP6500SI.all.snps_indels.tidy.v2.vcf.gz: 2
-  ExAC.r0.3.sites.vep.tidy.vcf.gz: 3
+  ExAC.r0.3.sites.vep.tidy.vcf.gz: 4
   GRCh37-gms-mappability.vcf.gz: 2
-  clinvar_20160203.tidy.vcf.gz: 5
+  clinvar_20170130.tidy.vcf.gz: 5
   cosmic-v68-GRCh37.tidy.vcf.gz: 3
-  dbsnp.b141.20140813.hg19.tidy.vcf.gz: 4
+  dbsnp.b147.20160601.tidy.vcf.gz: 1
   detailed_gene_table_v75: 2
   geno2mp.variants.tidy.vcf.gz: 1
+  gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz: 2
   hg19.rmsk.bed.gz: 2
   summary_gene_table_v75: 2
+  whole_genome_SNVs.tsv.compressed.gz: 2
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/clinvar_20160203.tidy.vcf.gz.tbi has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/clinvar_20170130.tidy.vcf.gz.tbi has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/dbsnp.b141.20140813.hg19.tidy.vcf.gz.tbi has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/dbsnp.b147.20160601.tidy.vcf.gz.tbi has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz
b
Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz has changed
b
diff -r a6d326ffbb72 -r 7028ca3cac1c test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi
b
Binary file test-data/test-cache/gemini/data/gnomad.exomes.r2.0.1.sites.no-VEP.nohist.tidy.vcf.gz.tbi has changed