diff gemini_roh.xml @ 0:91b4db3e6df4 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
author iuc
date Thu, 18 Feb 2016 08:57:56 -0500
parents
children ce54eb6fd5f9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gemini_roh.xml	Thu Feb 18 08:57:56 2016 -0500
@@ -0,0 +1,110 @@
+<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0">
+    <description>Identifying runs of homozygosity</description>
+    <macros>
+        <import>gemini_macros.xml</import>
+        <token name="@BINARY@">roh</token>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version_command" />
+    <command>
+<![CDATA[
+        gemini @BINARY@
+            --min-snps $min_snps
+            --min-total-depth $min_total_depth
+            --min-gt-depth $min_gt_depth
+            --min-size $min_size
+            --max-hets $max_hets
+            --max-unknowns $max_unknowns
+            #if $samples.strip():
+                -s "${samples}"
+            #end if
+            "${ infile }"
+            > "${ outfile }"
+]]>
+    </command>
+    <inputs>
+        <expand macro="infile" />
+
+        <param name="min_snps" type="integer" value="25" label="Minimum number of expected homozygous SNPs" help="default: 25 (--min-snps)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_total_depth" type="integer" value="20" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_gt_depth" type="integer" value="0" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered"
+            help="default: 0 (--min-gt-depth)">
+            <validator type="in_range" min="0"/>
+        </param>
+        <param name="min_size" type="integer" value="100000" label="Minimum run size in base pairs" help="default: 100000 (--min-size)">
+            <validator type="in_range" min="1"/>
+        </param>
+        <param name="max_hets" type="integer" value="1" label="Maximum number of allowed hets in the run" help="default: 1 (--max-hets)">
+            <validator type="in_range" min="1"/>
+        </param>
+        <param name="max_unknowns" type="integer" value="3" label="Maximum number of allowed unknowns in the run" help="default: 3 (-max-unknowns)">
+            <validator type="in_range" min="0"/>
+        </param>
+
+        <param name="samples" type="text" value="" label="Comma separated list of samples to screen for ROHs" help="e.g S120,S450 (-s)"/>
+
+    </inputs>
+
+    <outputs>
+        <data name="outfile" format="tabular" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="gemini_burden_input.db" ftype="gemini.sqlite" />
+            <param name="min_snps" value="3" />
+            <param name="min_size" value="10" />
+            <param name="min_total_depth" value="0" />
+            <output name="outfile" file="gemini_roh_result.tabular" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**What it does**
+
+===========================================================================
+``ROH``: Identifying runs of homozygosity
+===========================================================================
+Runs of homozygosity are long stretches of homozygous genotypes that reflect
+segments shared identically by descent and are a result of consanguinity or
+natural selection. Consanguinity elevates the occurrence of rare recessive
+diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious
+mutations. Hence, the identification of these runs holds medical value.
+
+The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data.
+The tool basically looks at every homozygous position on the chromosome as a possible
+start site for the run and looks for those that could give rise to a potentially long
+stretch of homozygous genotypes.
+
+For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u)
+the possible roh runs (H) would be:
+
+
+::
+
+	genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H
+	roh_run1     = H H H H h H H H H u H H H H H u H H H H H H H
+	roh_run2     =           H H H H u H H H H H u H H H H H H H h H H H H H
+	roh_run3     =                     H H H H H u H H H H H H H h H H H H H
+	roh_run4     =                                 H H H H H H H h H H H H H
+
+roh returned for --min-snps = 20 would be:
+
+::
+
+	roh_run1     = H H H H h H H H H u H H H H H u H H H H H H H
+	roh_run2     =           H H H H u H H H H H u H H H H H H H h H H H H H
+
+
+As you can see, the immediate homozygous position right of a break (h or u) would be the possible
+start of a new roh run and genotypes to the left of a break are pruned since they cannot
+be part of a longer run than we have seen before.
+
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>