Repository 'gemini'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/gemini

Changeset 0:720cbfb4190d (2014-08-25)
Next changeset 1:d3c4d0208bb2 (2014-08-27)
Commit message:
Imported from capsule None
added:
gemini_annotate.xml
gemini_autosomal_recessive.xml
gemini_burden.xml
gemini_comp_hets.xml
gemini_db_info.xml
gemini_de_novo.xml
gemini_interactions.xml
gemini_load.xml
gemini_lof_sieve.xml
gemini_macros.xml
gemini_pathways.xml
gemini_query.xml
gemini_region.xml
gemini_roh.xml
gemini_stats.xml
gemini_windower.xml
readme.rst
repository_dependencies.xml
tool-data/gemini_databases.loc.sample
tool_data_table_conf.xml.sample
tool_dependencies.xml
b
diff -r 000000000000 -r 720cbfb4190d gemini_annotate.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_annotate.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,96 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>adding your own custom annotations</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">annotate</token>
+    </macros>
+    <command>
+<![CDATA[
+
+bgzip -c $annotate_source > tabixed.gz;
+tabix -p bed tabixed.gz;
+
+        gemini @BINARY@
+            -f tabixed.gz
+            -c $column_name
+            -a $a.a_selector
+            #if $a.a_selector == 'extract':
+                -t $a.column_type
+                -e $a.column_extracts
+                -o $a.operation
+            #end if
+
+            "${ infile }"
+            > "${ outfile }"
+]]>
+
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+        <param name="annotate_source" type="data" format="bed" label="File containing the annotations in BED format" help="(-f)"/>
+
+        <param name="column_name" size="20" type="text" value=""
+            label="The name of the column to be added to the variant table" help="(-c)">
+            <sanitizer invalid_char=" ">
+                <valid initial="string.letters,string.digits">
+                    <add value="_" /> 
+                </valid>
+            </sanitizer>
+        </param>
+        <conditional name="a">
+            <param name="a_selector" type="select" label="How should the annotation file be used?" help="(-a)">
+                <option value="boolean">Did a variant overlap a region or not? (boolean)</option>
+                <option value="count">How many regions did a variant overlap? (count)</option>
+                <option value="extract" selected="True">Extract specific values from a BED file. (extract)</option>
+            </param>
+            <when value="extract">
+
+                <param name="column_extracts" label="Column to extract information from for list annotations" 
+                    type="data_column" data_ref="annotate_source" force_select="true" help="(-e)"/>
+
+
+                <param name="column_type" type="select" label="What data type(s) should be used to represent the new values in the database?"
+                    help="(-t)">
+                    <option value="float">Decimal precision number (float)</option>
+                    <option value="integer">Integer number (integer)</option>
+                    <option value="text">Text columns such as “valid”, “yes” (text)</option>
+                </param>
+
+                <param name="operation" type="select" label="Operation to apply to the extract column values ..."
+                    help="in the event that a variant overlaps multiple annotations in your annotation file. (-o)">
+                    <option value="mean">Compute the average of the (numeric) values</option>
+                    <option value="median">Compute the median of the (numeric) values</option>
+                    <option value="mix">Compute the minimum of the (numeric) values</option>
+                    <option value="max">Compute the maximum of the (numeric) values</option>
+                    <option value="mode">Compute the maximum of the (numeric) values</option>
+                    <option value="first">Use the value from the first record in the annotation file</option>
+                    <option value="last">Use the value from the last record in the annotation file</option>
+                    <option value="list">Create a comma-separated list of the observed (text) values</option>
+                    <option value="uniq_list">Create a comma-separated list of non-redundant observed (text) values</option>
+                </param>
+
+            </when>
+            <when value="boolean"/>
+            <when value="count"/>
+        </conditional>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+It is inevitable that researchers will want to enhance the gemini framework with their own, custom annotations. gemini provides a sub-command called annotate for exactly this purpose.
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_autosomal_recessive.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_autosomal_recessive.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,70 @@
+<tool id="gemini_recessive_and_dominant" name="GEMINI autosomal recessive/dominant" version="@VERSION@.0">
+    <description>Find variants meeting an autosomal recessive/dominant model</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+    </macros>
+    <command>
+<![CDATA[
+        gemini 
+
+            #if $rec_or_dom == 'recessive':
+                ## start autosomal_recessive
+                autosomal_recessive
+            #else:
+                ## start autosomal_dominant
+                autosomal_dominant
+            #end if
+
+            #if $report.report_selector != 'all':
+                --columns "${report.columns}"
+            #end if
+
+            #if $filter.filter_selector == 'yes':
+                --filter "${filter.filter}"
+            #end if
+
+            -d $d
+            #if int($min_kindreds) > 0:
+                --min-kindreds $min_kindreds
+            #end if
+
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+
+        <param name="rec_or_dom" type="select" label="Autosomal ..." help="">
+            <option value="recessive">recessive</option>
+            <option value="dominant">dominant</option>
+        </param>
+
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+        <expand macro="column_filter" />
+        <expand macro="filter" />
+        <expand macro="min_sequence_depth" />
+        <param name="min_kindreds" size="4" type="integer" value="-1" lebel="The min. number of kindreds that must have a candidate variant in a gene"
+            help="-1 means default values (--min-kindreds)" />
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, one can leverage a 
+built-in tool for identifying variants that meet an autosomal recessive or dominant inheritance pattern. 
+The reported variants will be restricted to those variants having the potential to impact the function of affecting protein coding transcripts.
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_burden.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_burden.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,82 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>perform sample-wise gene-level burden calculations</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">burden</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+          --cases $cases
+          --controls $controls
+            $save_tscores
+            $nonsynonymous
+            $calpha
+            --permutations $permutations
+            #if float( str($min_aaf) ) >= 0.0:
+                --min-aaf $min_aaf
+            #end if
+            #if float( str($max_aaf) ) >= 0.0:
+                --max-aaf $max_aaf
+            #end if
+            "${ infile }"
+            > "${ outfile }"
+]]>
+
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <param name="cases" size="30" type="text" value="" label="Space separated list of cases for association testing" help="(--cases)"/>
+        <param name="controls" size="30" type="text" value="" label="Space separated list of controls for association testing" help="(--controls)"/>
+
+        <param name="save_tscores" type="boolean" truevalue="--save_tscores" falsevalue="" checked="False" 
+            label="Save the permuted T-scores in the output file" help="(--save_tscores)"/>
+
+        <param name="nonsynonymous" type="boolean" truevalue="--nonsynonymous" falsevalue="" checked="False" 
+            label="Count all nonsynonymous variants as contributing burden" help="(--nonsynonymous)"/>
+        <param name="calpha" type="boolean" truevalue="--calpha" falsevalue="" checked="False" 
+            label="Run the C-alpha association test" help="(--calpha)"/>
+        <param name="min_aaf" type="float" value="-1" size="5" label="The min. alt. allele frequency for a variant to be included" 
+            help="(--min-aaf)">
+            <!--validator type="in_range" min="0.0"/-->
+        </param>
+        <param name="max_aaf" type="float" value="-1" size="5" label="The max. alt. allele frequency for a variant to be included" 
+            help="(--max-aaf)">
+            <!--validator type="in_range" min="0.0"/-->
+        </param>
+
+        <param name="permutations" type="integer" value="1000" size="10" label="Number of permutations to run for the C-alpha test" 
+            help="(--permutations)">
+            <validator type="in_range" min="0"/>
+        </param>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+The burden tool provides a set of utilities to perform burden summaries on a per-gene, per sample basis. 
+By default, it outputs a table of gene-wise counts of all high impact variants in coding regions for each sample.
+
+$ gemini burden test.burden.db
+gene    M10475  M10478  M10500  M128215
+WDR37   2       2       2       2
+CTBP2   0       0       0       1
+DHODH   1       0       0       0
+
+@CITATION@
+    </help>
+    <expand macro="citations">
+        <citation type="doi">10.1371/journal.pgen.1001322</citation><!-- c-alpha citation -->
+    </expand>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_comp_hets.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_comp_hets.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,61 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Identifying potential compound heterozygotes</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">comp_hets</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+
+            #if $report.report_selector != 'all':
+                --columns "${report.columns}"
+            #end if
+
+            #if $filter.filter_selector == 'yes':
+                --filter "${filter.filter}"
+            #end if
+            $only_affected
+            $ignore_phasing
+
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+        <expand macro="add_header_column" />
+        <expand macro="column_filter" />
+        <param name="only_affected" type="boolean" truevalue="--only-affected" falsevalue="" checked="False" 
+            label="Report solely those compund heterozygotes impacted a sample labeled as affected" help="(--only-affected)"/>
+        <param name="ignore_phasing" type="boolean" truevalue="--ignore-phasing" falsevalue="" checked="False" 
+            label="Ignore phasing when screening for compound hets" help="Candidates are inherently putative. (--ignore-phasing)"/>
+        <expand macro="filter" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Many recessive disorders are caused by compound heterozygotes. Unlike canonical recessive sites where the same recessive allele is 
+inherited from both parents at the _same_ site in the gene, compound heterozygotes occur when the individual’s phenotype is caused 
+by two heterozygous recessive alleles at _different_ sites in a particular gene.
+
+So basically, we are looking for two (typically loss-of-function (LoF)) heterozygous variants impacting the same gene at different loci. 
+The complicating factor is that this is _recessive_ and as such, we must also require that the consequential alleles at each heterozygous 
+site were inherited on different chromosomes (one from each parent). As such, in order to use this tool, we require that all variants are phased. 
+Once this has been done, the comp_hets tool will provide a report of candidate compound heterozygotes for each sample/gene.
+
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_db_info.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_db_info.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,36 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>List the gemini database tables and columns</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">db_info</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            "${ infile }" | tr -s ' ' '\t'
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Because of the sheer number of annotations that are stored in gemini, there are admittedly too many columns to remember by rote. 
+If you can’t recall the name of particular column, just use the db_info tool. It will report all of the tables and all of the columns / types in each table.
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_de_novo.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_de_novo.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,49 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Identifying potential de novo mutations</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">de_novo</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+
+            #if $report.report_selector != 'all':
+                --columns "${report.columns}"
+            #end if
+
+            #if $filter.filter_selector == 'yes':
+                --filter "${filter.filter}"
+            #end if
+            -d $d
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <expand macro="column_filter" />
+        <expand macro="filter" />
+        <expand macro="min_sequence_depth" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Assuming you have defined the familial relationships between samples when loading your VCF into GEMINI, 
+you can use this tool for identifying de novo (a.k.a spontaneous) mutations that arise in offspring.
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_interactions.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_interactions.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,68 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Find genes among variants that are interacting partners</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">interactions</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini 
+            --annotation-dir ${annotation_databases.fields.path}
+            #if $gene.gene_selector == 'lof':
+                ## lof interactions is a separate program
+                lof_interactions
+            #else:
+                ## use normal gemini interactions program
+                @BINARY@
+                -g "${gene.gene}"
+            #end if
+
+            -r "${radius}"
+            $variant_mode
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <conditional name="gene">
+            <param name="gene_selector" type="select" label="Studying" help="">
+                <option value="gene">Interesting gene</option>
+                <option value="lof">All loss-of-function variants</option>
+            </param>
+            <when value="gene">
+                <param name="gene" type="text" label="Specify gene name" help="e.g. PTPN22 (-g)" />
+            </when>
+            <when value="lof"/>
+        </conditional>
+        <expand macro="annotation_dir" />
+        <expand macro="radius" />
+        <expand macro="variant_mode" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Integrating the knowledge of the known protein-protein interactions would be useful in explaining variation data.
+Meaning to say that a damaging variant in an interacting partner of a potential protein may be equally interesting as the
+protein itself. We have used the HPRD_ binary interaction data to build a p-p network graph which can be explored by GEMINI.
+
+.. _HPRD: http://www.ncbi.nlm.nih.gov/pubmed/18988627
+
+
+@CITATION@
+    </help>
+    <expand macro="citations">
+        <citation type="doi">10.1093/nar/gkn892</citation><!-- HPRD citation -->
+    </expand>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_load.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_load.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,80 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Loading a VCF file into GEMINI</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">load</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini 
+            --annotation-dir ${annotation_databases.fields.path}
+            @BINARY@
+            -v "${ infile }"
+            -t $annotation_type
+
+            #if $ped:
+                -p $ped
+            #end if
+
+            $skip_gerp_bp
+            $skip_cadd
+            $skip_gene_tables
+            $no_load_genotypes
+            $no_genotypes
+            $passonly
+            --cores \${GALAXY_SLOTS:-4}
+
+            "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="vcf" label="VCF file to be loaded in the GEMINI database" />
+
+        <param name="annotation_type" type="select" label="The annotations to be used with the input vcf" help="(-t)">
+            <option value="snpEff">snpEff annotated VCF file</option>
+            <option value="VEP">VEP annotated VCF file</option>
+        </param>
+        <param name="ped" type="data" format="tablar" optional="True" label="Sample information file in PED+ format" help="(-p)" />
+        <expand macro="annotation_dir" />
+
+        <param name="skip_gerp_bp" type="boolean" truevalue="--skip-gerp-bp" falsevalue="" checked="False" 
+            label="Do not load GERP scores at base pair resolution" help="(--skip-gerp-bp)"/>
+
+        <param name="skip_cadd" type="boolean" truevalue="--skip-cadd" falsevalue="" checked="False" 
+            label="Do not load CADD scores" help="(--skip-cadd)"/>
+
+        <param name="skip_gene_tables" type="boolean" truevalue="--skip-gene-tables" falsevalue="" checked="False" 
+            label="Do not load gene tables" help="(--skip-gene-tables)"/>
+
+        <param name="no_load_genotypes" type="boolean" truevalue="--no-load-genotypes" falsevalue="" checked="False" 
+            label="Genotypes exist in the file, but should not be stored" help="(--no-load-genotypes)"/>
+
+        <param name="no_genotypes" type="boolean" truevalue="--no-load-genotypes" falsevalue="" checked="False" 
+            label="There are no genotypes in the file" help="e.g. some 1000G VCFs (--no-genotypes)"/>
+
+        <param name="passonly" type="boolean" truevalue="--passonly" falsevalue="" checked="False" 
+            label="Keep only variants that pass all filters" help="e.g. some 1000G VCFs (--passonly)"/>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="sqlite" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Before we can use GEMINI to explore genetic variation, we must first load our VCF file into the GEMINI database framework.
+We expect you to have first annotated the functional consequence of each variant in your VCF using either VEP or snpEff.
+
+http://gemini.readthedocs.org/en/latest/content/loading.html
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_lof_sieve.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_lof_sieve.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,39 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@version@.0">
+    <description>Filter LoF variants by transcript position and type</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">lof_sieve</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Not all candidate LoF variants are created equal. For e.g, a nonsense (stop gain) variant impacting the first 5% of a polypeptide is far 
+more likely to be deleterious than one affecting the last 5%. Assuming you’ve annotated your VCF with snpEff v3.0+, the lof_sieve tool 
+reports the fractional position (e.g. 0.05 for the first 5%) of the mutation in the amino acid sequence. 
+In addition, it also reports the predicted function of the transcript so that one can segregate candidate 
+LoF variants that affect protein_coding transcripts from processed RNA, etc.
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_macros.xml Mon Aug 25 17:15:54 2014 -0400
b
@@ -0,0 +1,118 @@
+<macros>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="0.10.0">gemini</requirement>
+            <requirement type="package" version="0.1.3">grabix</requirement>
+            <requirement type="package" version="0.2.6">tabix</requirement>
+            <requirement type="package" version="0.1.19">samtools</requirement>
+            <requirement type="package" version="2.19.1">bedtools</requirement>
+            <yield />
+        </requirements>
+    </xml>
+
+    <xml name="version_command">
+        <version_command>gemini --version</version_command>
+    </xml>
+
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:" />
+            <exit_code range=":-1" />
+            <regex match="Error:" />
+            <regex match="Exception:" />
+        </stdio>
+    </xml>
+
+    <xml name="annotation_dir">
+        <param name="annotation_databases" type="select" label="Choose a gemini annotation database">
+            <options from_data_table="gemini_databases">
+                <filter type="sort_by" column="0" />
+                <validator type="no_options" message="No annotation database is available" />
+            </options>
+        </param>
+    </xml>
+
+    <xml name="add_header_column">
+        <param name="header" type="boolean" truevalue="--header" falsevalue="" checked="False" 
+            label="Add a header of column names to the output" help="(--header)"/>
+    </xml>
+
+    <xml name="radius">
+        <param name="radius" type="integer" value="3" size="5" label="Set filter for Breadth-first search (BFS) in the Protein-Protein Interaction network" help="(-r)" >
+            <validator type="in_range" min="0"/>
+        </param>
+    </xml>
+    <xml name="variant_mode">
+        <param name="variant_mode" type="boolean" truevalue="--var" falsevalue="" checked="False" 
+            label="Returns variant info (e.g. impact, biotype) for interacting genes" help="(--var)"/>
+    </xml>
+
+    <xml name="column_filter">
+        <conditional name="report">
+            <param name="report_selector" type="select" label="Columns to include in the report"
+                help="By default, this tool reports all columns in the variants table. One may choose to report only a subset of the columns.">
+                <option value="all" selected="True">all</option>
+                <option value="column_filter">User given columns</option>
+            </param>
+            <when value="all"/>
+            <when value="column_filter">
+                <param name="columns" type="select" display="checkboxes" multiple="True" label="Choose columns to include in the report" help="(--columns)">
+                    <option value="gene">gene</option>
+                    <option value="chrom">chrom</option>
+                    <option value="start">start</option>
+                    <option value="end">end</option>
+                    <option value="ref">ref</option>
+                    <option value="alt">alt</option>
+                    <option value="impact">impact</option>
+                    <option value="impact_severity">impact_severity</option>
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="filter">
+        <conditional name="filter">
+            <param name="filter_selector" type="select" label="Apply additional constraints"
+                help="By default, this tool will report all variants regardless of their putative functional impact. In order to apply additional constraints on the variants returned, you can this optional filter.">
+                <option value="no">No additional constraints</option>
+                <option value="yes">Apply additional constraints</option>
+            </param>
+            <when value="no"/>
+            <when value="yes">
+                <param name="filter" type="text" size="20" label="Contraints in SQL syntax" help="Conditions applied here will become WHERE clauses in the query issued to the GEMINI database. E.g. alt='G' or impact_severity = 'HIGH'. (--filter)">
+                    <expand macro="sanitize_query" />
+                </param>
+            </when>
+        </conditional>
+    </xml>
+
+    <xml name="sanitize_query">
+        <sanitizer invalid_char="">
+            <valid initial="string.printable">
+            </valid>
+       </sanitizer>
+    </xml>
+
+    <xml name="min_sequence_depth">
+        <param name="d" type="integer" value="0" size="5" label="The minimum aligned sequence depth (genotype DP) required for each sample"
+                help="default: 0 (-d)">
+            <validator type="in_range" min="0"/>
+        </param>
+    </xml>
+
+    <token name="@VERSION@">0.10.0</token>
+
+    <token name="@CITATION@">------
+
+**Citation**
+
+If you use GEMINI in your research, please cite the following manuscript:
+
+  </token>
+  <xml name="citations">
+    <citations>
+        <citation type="doi">10.1371/journal.pcbi.1003153</citation>
+        <yield />
+    </citations>
+  </xml>
+</macros>
b
diff -r 000000000000 -r 720cbfb4190d gemini_pathways.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_pathways.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,52 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Map genes and variants to KEGG pathways</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">pathways</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini 
+            --annotation-dir ${annotation_databases.fields.path}
+            @BINARY@
+            -v $ensembl
+            $lof
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <param name="ensembl" type="integer" value="68" size="5" label="Version of ensembl genes to use" 
+            help="Supported versions: 66 to 71. use versions that match the VEP/snpEff versions of the annotated vcf for correctness. For e.g VEP v2.6 and snpEff v3.1 use Ensembl 68 version of the genomes. (-v)">
+            <validator type="in_range" min="66" max="71"/>
+        </param>
+
+        <param name="lof" type="boolean" truevalue="--lof" falsevalue="" checked="False" 
+            label="Report only pathways with loss-of-function variants" help="(--lof)"/>
+        <expand macro="annotation_dir" />
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Mapping genes to biological pathways is useful in understanding the function/role played by a gene. 
+Likewise, genes involved in common pathways is helpful in understanding heterogeneous diseases. 
+We have integrated the KEGG pathway mapping for gene variants, to explain/annotate variation. 
+
+This requires your VCF be annotated with either snpEff/VEP.
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_query.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_query.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,110 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Querying the GEMINI database</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">query</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+
+            --in "${in}"
+
+            #if $gt_filter.strip():
+                --gt-filter "${gt_filter}"
+            #end if
+
+            #if $sample_filter.strip():
+                --sample-filter "${sample_filter}"
+            #end if
+
+            $show_samples
+            $show_families
+            $family_wise
+            $header
+            $dgidb
+            #if $region.strip():
+                --region "${region}"
+            #end if
+            #if int($min_kindreds) > 0:
+                --min-kindreds $min_kindreds
+            #end if
+            ##--format FORMAT       Format of output (JSON, TPED or default) # we will take default for the time being
+            ##   --sample-delim STRING The delimiter to be used with the --show-samples option.
+
+            #if $q.strip():
+                -q "${q}"
+            #end if
+
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+                <!--
+            ##TODO:
+              - -carrier-summary-by-phenotype CARRIER_SUMMARY
+                        Output columns of counts of carriers and non-carriers
+                        stratified by the given sample phenotype column-->
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <param name="q" type="text" area="True" size="5x50" label="The query to be issued to the database" help="(-q)">
+            <expand macro="sanitize_query" />
+        </param>
+        <param name="gt_filter" type="text" area="True" size="5x50" label="Restrictions to apply to genotype values" help="(--gt-filer)">
+            <expand macro="sanitize_query" />
+        </param>
+        <param name="sample_filter" type="text" area="True" size="5x50" label="SQL filter to use to filter the sample table" help="(--sample-filter)">
+            <expand macro="sanitize_query" />
+        </param>
+
+        <param name="show_samples" type="boolean" truevalue="--show-samples" falsevalue="" checked="False" 
+            label="Add a column of all sample names with a variant to each variant" help="(--show-samples)"/>
+
+        <param name="show_families" type="boolean" truevalue="--show-families" falsevalue="" checked="False" 
+            label="Add a column listing all of the families with a variant to each variant" help="(--show-families)"/>
+
+        <param name="family_wise" type="boolean" truevalue="--family-wise" falsevalue="" checked="False" 
+            label="Perform the sample-filter on a family-wise basis" help="(--family-wise)"/>
+
+        <expand macro="add_header_column" />
+
+        <!-- TODO: is there any default values set? -->
+        <param name="min_kindreds" size="4" type="integer" value="-1" lebel="Minimum number of families for a variant passing a family-wise filter to be in" help="-1 means default values (--min-kindreds)" />
+
+        <param name="dgidb" type="boolean" truevalue="--dgidb" falsevalue="" checked="False" 
+            label="Request drug-gene interaction info from DGIdb" help="(--dgidb)"/>
+
+        <param name="in" type="select" label="A variant must be in either all, none or any samples passing the sample-query filter" help="(--in)">
+            <option value="all">Return a variant if all samples matching the query have the variant. (all)</option>
+            <option value="none">Return a variant if the variant does not appear in any of the matching samples. (none)</option>
+            <option value="any">Return all of the variant which are in all of the matching samples and not in any of the non-matching samples. (any)</option>
+            <option value="only">Return a variant if the variant is only in the matching samples and not in any of the non-matching samples. (only)</option>
+        </param>
+
+        <param name="region" size="30" type="text" value="" label="Restrict query to this region" help="e.g. chr1:10-20 (--region)"/>
+
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+The real power in the GEMINI framework lies in the fact that all of your genetic variants have been stored in a convenient database in the context of a wealth of genome annotations that facilitate variant interpretation. 
+The expressive power of SQL allows one to pose intricate questions of one’s variation data. This tool offers you an easy way to query your variants!
+
+http://gemini.readthedocs.org/en/latest/content/querying.html
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_region.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_region.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,75 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Extracting variants from specific regions or genes</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">region</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+
+            #if $region.region_selector == '--reg':
+                #if str(${region.region}) != '':
+                    --reg "${region.region}"
+                #end if
+            #else:
+                #if str(${region.gene}) != '':
+                    --gene "${region.gene}"
+                #end if
+            #end if
+
+            #if $report.report_selector != 'all':
+                --columns "${report.columns}"
+            #end if
+
+            #if $filter.filter_selector == 'yes':
+                --filter "${filter.filter}"
+            #end if
+
+            $header
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <conditional name="region">
+            <param name="region_selector" type="select" label="Select by ...?" help="">
+                <option value="--reg">genomic coordinates</option>
+                <option value="--gene">gene name</option>
+            </param>
+            <when value="--reg">
+                <param name="region" type="text" label="Specify genomic region" help="e.g. chr1:100-200 (--reg)"/>
+            </when>
+            <when value="--gene">
+                <param name="gene" type="text" label="Specify gene name" help="e.g. PTPN22 (--gene)" />
+            </when>
+        </conditional>
+
+        <expand macro="column_filter" />
+        <expand macro="filter" />
+
+
+        <expand macro="add_header_column" />
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+One often is concerned with variants found solely in a particular gene or genomic region.
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_roh.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_roh.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,106 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Identifying runs of homozygosity</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">roh</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            --min-snps $min_snps
+            --min-total-depth $min_total_depth
+            --min-gt-depth $min_gt_depth
+            --min-size $min_size
+            --max-hets $max_hets
+            --max-unknowns $max_unknowns
+            #if $samples.strip() != '':
+                -s "${samples}"
+            #end if
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <param name="min_snps" type="integer" value="25" size="5" label="Minimum number of expected homozygous SNPs" help="default: 25 (--min-snps)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_total_depth" type="integer" value="20" size="10" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_gt_depth" type="integer" value="0" size="10" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered" 
+            help="default: 0 (--min-gt-depth)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_size" type="integer" value="100000" size="10" label="Minimum run size in base pairs" help="default: 100000 (--min-size)">
+            <validator type="in_range" min="1"/>
+        </param>
+        <param name="max_hets" type="integer" value="1" size="5" label="Maximum number of allowed hets in the run" help="default: 1 (--max-hets)">
+            <validator type="in_range" min="1"/>
+        </param>
+        <param name="max_unknowns" type="integer" value="3" size="5" label="Maximum number of allowed unknowns in the run" help="default: 3 (-max-unknowns)">
+            <validator type="in_range" min="0"/>
+        </param>
+
+        <param name="samples" size="30" type="text" value="" label="Comma separated list of samples to screen for ROHs" help="e.g S120,S450 (-s)"/>
+
+    </inputs>
+
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+===========================================================================
+``ROH``: Identifying runs of homozygosity
+===========================================================================
+Runs of homozygosity are long stretches of homozygous genotypes that reflect
+segments shared identically by descent and are a result of consanguinity or
+natural selection. Consanguinity elevates the occurrence of rare recessive 
+diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious 
+mutations. Hence, the identification of these runs holds medical value. 
+
+The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data. 
+The tool basically looks at every homozygous position on the chromosome as a possible
+start site for the run and looks for those that could give rise to a potentially long 
+stretch of homozygous genotypes. 
+
+For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u) 
+the possible roh runs (H) would be:
+
+
+::
+
+ genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H
+ roh_run1     = H H H H h H H H H u H H H H H u H H H H H H H
+ roh_run2     =           H H H H u H H H H H u H H H H H H H h H H H H H
+ roh_run3     =                     H H H H H u H H H H H H H h H H H H H
+ roh_run4     =                                 H H H H H H H h H H H H H
+
+roh returned for --min-snps = 20 would be:
+
+::
+
+ roh_run1     = H H H H h H H H H u H H H H H u H H H H H H H
+ roh_run2     =           H H H H u H H H H H u H H H H H H H h H H H H H
+
+
+As you can see, the immediate homozygous position right of a break (h or u) would be the possible 
+start of a new roh run and genotypes to the left of a break are pruned since they cannot 
+be part of a longer run than we have seen before.
+
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_stats.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_stats.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,57 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Compute useful variant statistics</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">stats</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            $stats_type
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <param name="stats_type" type="select" label="Studying ..." help="">
+            <option value="--tstv">Compute the transition and transversion ratios for the snps (--tstv)</option>
+            <option value="--tstv-coding">Compute the transition/transversion ratios for the snps in the coding regions (--tstv-coding)</option>
+            <option value="--tstv-noncoding">Compute the transition/transversion ratios for the snps in the non-coding regions (--tstv-noncoding)</option>
+            <option value="--snp-counts">Compute the type and count of the snps (--snp-counts)</option>
+            <option value="--sfs">Calculate the site frequency spectrum of the variants (--sfs)</option>
+            <option value="--mds">Compute the pair-wise genetic distance between each sample (--mds)</option>
+            <option value="--vars_by_sample">Return the total variants per sample, sum of homozygous and heterozygous variants (--vars-by-sample)</option>
+        </param>
+
+    </inputs>
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+The stats tool computes some useful variant statistics for a GEMINI database.
+
+
+$ gemini stats --summarize "select * from variants where in_dbsnp=1 and chrom='chr1'" my.db
+sample  total   num_het num_hom_alt
+M10475  1       1       0
+M128215 1       1       0
+M10478  2       2       0
+M10500  2       1       1
+
+
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d gemini_windower.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_windower.xml Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,64 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Conducting analyses on genome "windows"</description>
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">windower</token>
+    </macros>
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            -w $w
+            -s $s
+            -t $window_analysis
+            -o $operation
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <expand macro="stdio" />
+    <inputs>
+        <param name="infile" type="data" format="sqlite" label="GEMINI database" />
+
+        <param name="window_analysis" type="select" label="The type of window analysis requested?" help="(-t)">
+            <option value="nucl_div">(nucl_div)</option>
+            <option value="hwe">(hwe)</option>
+        </param>
+
+        <param name="operation" type="select" label="The operation that should be applied to the -t values" help="(-o)">
+            <option value="mean">mean</option>
+            <option value="median">median</option>
+            <option value="min">min</option>
+            <option value="max">max</option>
+            <option value="collapse">collapse</option>
+        </param>
+
+        <param name="w" type="integer" value="10000" size="10" label="The window size in bp" 
+            help="(-w)">
+            <validator type="in_range" min="0"/>
+        </param>
+
+        <param name="s" type="integer" value="1000" size="10" label="The step size for the windows in bp" 
+            help="(-s)">
+            <validator type="in_range" min="0"/>
+        </param>
+
+    </inputs>
+
+    <outputs>
+        <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+It computs variation metrics across genomic windows (both fixed and sliding).
+
+@CITATION@
+    </help>
+    <expand macro="citations"/>
+</tool>
b
diff -r 000000000000 -r 720cbfb4190d readme.rst
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst Mon Aug 25 17:15:54 2014 -0400
[
@@ -0,0 +1,65 @@
+=========================
+Galaxy wrapper for GEMINI
+=========================
+
+
+GEMINI: a flexible framework for exploring genome variation
+
+GEMINI (GEnome MINIng) is designed to be a flexible framework for exploring genetic variation in the context of 
+the wealth of genome annotations available for the human genome. By placing genetic variants, sample genotypes, 
+and useful genome annotations into an integrated database framework, GEMINI provides a simple, flexible, yet very 
+powerful system for exploring genetic variation for for disease and population genetics.
+
+Using the GEMINI framework begins by loading a VCF file into a database. Each variant is automatically 
+annotated by comparing it to several genome annotations from source such as ENCODE tracks, UCSC tracks, 
+OMIM, dbSNP, KEGG, and HPRD. All of this information is stored in portable SQLite database that allows 
+one to explore and interpret both coding and non-coding variation using “off-the-shelf” tools or an 
+enhanced SQL engine.
+
+Please also see the original [manuscript](http://www.ploscompbiol.org/article/info%3Adoi%2F10.1371%2Fjournal.pcbi.1003153).
+
+
+============
+Installation
+============
+
+It is recommended to install this wrapper via the `Galaxy Tool Shed`.
+
+.. _`Galaxy Tool Shed`:  https://testtoolshed.g2.bx.psu.edu/view/iuc/gemini
+
+
+=======
+History
+=======
+- 0.9.1: Initial public release
+
+
+====================
+Detailed description
+====================
+
+View the original GEMINI documentation: http://gemini.readthedocs.org/en/latest/index.html
+
+
+===============================
+Wrapper Licence (MIT/BSD style)
+===============================
+
+Permission to use, copy, modify, and distribute this software and its
+documentation with or without modifications and for any purpose and
+without fee is hereby granted, provided that any copyright notices
+appear in all copies and that both those copyright notices and this
+permission notice appear in supporting documentation, and that the
+names of the contributors or copyright holders not be used in
+advertising or publicity pertaining to distribution of the software
+without specific prior permission.
+
+THE CONTRIBUTORS AND COPYRIGHT HOLDERS OF THIS SOFTWARE DISCLAIM ALL
+WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL THE
+CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT
+OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+OR PERFORMANCE OF THIS SOFTWARE.
+
b
diff -r 000000000000 -r 720cbfb4190d repository_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml Mon Aug 25 17:15:54 2014 -0400
b
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="This requires the GEMINI data manager definition to install all required annotation databases.">
+  <repository changeset_revision="fd9e9ac5ecb3" name="data_manager_gemini_database_downloader" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+</repositories>
b
diff -r 000000000000 -r 720cbfb4190d tool-data/gemini_databases.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/gemini_databases.loc.sample Mon Aug 25 17:15:54 2014 -0400
b
@@ -0,0 +1,3 @@
+## GEMINI databases
+#Version dbkey Description
+#08_08_2014 hg19 Database (08-08-2014)
b
diff -r 000000000000 -r 720cbfb4190d tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Mon Aug 25 17:15:54 2014 -0400
b
@@ -0,0 +1,7 @@
+<tables>
+    <table name="gemini_databases" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/gemini_databases.loc" />
+    </table>
+</tables>
+
b
diff -r 000000000000 -r 720cbfb4190d tool_dependencies.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml Mon Aug 25 17:15:54 2014 -0400
b
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="grabix" version="0.1.3">
+      <repository changeset_revision="0714d88bd854" name="package_grabix_0_1_3" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="tabix" version="0.2.6">
+      <repository changeset_revision="3d6beba7393e" name="package_tabix_0_2_6" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.19">
+      <repository changeset_revision="923adc89c666" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="bedtools" version="2.19.1">
+      <repository changeset_revision="fb3a854c7104" name="package_bedtools_2_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="gemini" version="0.10.0">
+      <repository changeset_revision="42c72725f879" name="package_gemini_0_10_0" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>