Mercurial > repos > iuc > gemini
diff gemini_roh.xml @ 0:720cbfb4190d draft
Imported from capsule None
author | iuc |
---|---|
date | Mon, 25 Aug 2014 17:15:54 -0400 |
parents | |
children | 93bb0cfacefb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gemini_roh.xml Mon Aug 25 17:15:54 2014 -0400 @@ -0,0 +1,106 @@ +<tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.0"> + <description>Identifying runs of homozygosity</description> + <expand macro="requirements" /> + <expand macro="version_command" /> + <macros> + <import>gemini_macros.xml</import> + <token name="@BINARY@">roh</token> + </macros> + <command> +<![CDATA[ + gemini @BINARY@ + --min-snps $min_snps + --min-total-depth $min_total_depth + --min-gt-depth $min_gt_depth + --min-size $min_size + --max-hets $max_hets + --max-unknowns $max_unknowns + #if $samples.strip() != '': + -s "${samples}" + #end if + "${ infile }" + > "${ outfile }" +]]> + </command> + <expand macro="stdio" /> + <inputs> + <param name="infile" type="data" format="sqlite" label="GEMINI database" /> + + <param name="min_snps" type="integer" value="25" size="5" label="Minimum number of expected homozygous SNPs" help="default: 25 (--min-snps)"> + <validator type="in_range" min="0"/> + </param> + <param name="min_total_depth" type="integer" value="20" size="10" label="The minimum overall sequencing depth requiredfor a SNP to be considered" help="default: 20 (--min-total-depth)"> + <validator type="in_range" min="0"/> + </param> + <param name="min_gt_depth" type="integer" value="0" size="10" label="The minimum required sequencing depth underlying a given sample's genotype for a SNP to be considered" + help="default: 0 (--min-gt-depth)"> + <validator type="in_range" min="0"/> + </param> + <param name="min_size" type="integer" value="100000" size="10" label="Minimum run size in base pairs" help="default: 100000 (--min-size)"> + <validator type="in_range" min="1"/> + </param> + <param name="max_hets" type="integer" value="1" size="5" label="Maximum number of allowed hets in the run" help="default: 1 (--max-hets)"> + <validator type="in_range" min="1"/> + </param> + <param name="max_unknowns" type="integer" value="3" size="5" label="Maximum number of allowed unknowns in the run" help="default: 3 (-max-unknowns)"> + <validator type="in_range" min="0"/> + </param> + + <param name="samples" size="30" type="text" value="" label="Comma separated list of samples to screen for ROHs" help="e.g S120,S450 (-s)"/> + + </inputs> + + <outputs> + <data name="outfile" format="tabular" label="${tool.name} on ${on_string}" /> + </outputs> + <tests> + <test> + </test> + </tests> + <help> + +**What it does** + +=========================================================================== +``ROH``: Identifying runs of homozygosity +=========================================================================== +Runs of homozygosity are long stretches of homozygous genotypes that reflect +segments shared identically by descent and are a result of consanguinity or +natural selection. Consanguinity elevates the occurrence of rare recessive +diseases (e.g. cystic fibrosis) that represent homozygotes for strongly deleterious +mutations. Hence, the identification of these runs holds medical value. + +The 'roh' tool in GEMINI returns runs of homozygosity identified in whole genome data. +The tool basically looks at every homozygous position on the chromosome as a possible +start site for the run and looks for those that could give rise to a potentially long +stretch of homozygous genotypes. + +For e.g. for the given example allowing ``1 HET`` genotype (h) and ``2 UKW`` genotypes (u) +the possible roh runs (H) would be: + + +:: + + genotype_run = H H H H h H H H H u H H H H H u H H H H H H H h H H H H H h H H H H H + roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H + roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H + roh_run3 = H H H H H u H H H H H H H h H H H H H + roh_run4 = H H H H H H H h H H H H H + +roh returned for --min-snps = 20 would be: + +:: + + roh_run1 = H H H H h H H H H u H H H H H u H H H H H H H + roh_run2 = H H H H u H H H H H u H H H H H H H h H H H H H + + +As you can see, the immediate homozygous position right of a break (h or u) would be the possible +start of a new roh run and genotypes to the left of a break are pruned since they cannot +be part of a longer run than we have seen before. + + +@CITATION@ + </help> + <expand macro="citations"/> +</tool>