diff discover_familial_relationships.py @ 31:a631c2f6d913

Update to Miller Lab devshed revision 3c4110ffacc3
author Richard Burhans <burhans@bx.psu.edu>
date Fri, 20 Sep 2013 13:25:27 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/discover_familial_relationships.py	Fri Sep 20 13:25:27 2013 -0400
@@ -0,0 +1,100 @@
+#!/usr/bin/env python
+
+import sys
+import gd_util
+
+from Population import Population
+
+################################################################################
+
+if len(sys.argv) != 6:
+    gd_util.die('Usage')
+
+input, input_type, ind_arg, pop_input, output = sys.argv[1:]
+
+p_total = Population()
+p_total.from_wrapped_dict(ind_arg)
+
+p1 = Population()
+p1.from_population_file(pop_input)
+if not p_total.is_superset(p1):
+    gd_util.die('There is an individual in the population that is not in the SNP table')
+
+################################################################################
+
+prog = 'kinship_prep'
+
+args = [ prog ]
+args.append(input)  # a Galaxy SNP table
+args.append(0)      # required number of reads for each individual to use a SNP
+args.append(0)      # required genotype quality for each individual to use a SNP
+args.append(0)      # minimum spacing between SNPs on the same scaffold
+
+for tag in p1.tag_list():
+    if input_type == 'gd_genotype':
+        column, name = tag.split(':')
+        tag = '{0}:{1}'.format(int(column) - 2, name)
+    args.append(tag)
+
+gd_util.run_program(prog, args)
+
+# kinship.map
+# kinship.ped
+# kinship.dat
+
+################################################################################
+
+prog = 'king'
+
+args = [ prog ]
+args.append('-d')
+args.append('kinship.dat')
+args.append('-p')
+args.append('kinship.ped')
+args.append('-m')
+args.append('kinship.map')
+args.append('--kinship')
+
+gd_util.run_program(prog, args)
+
+# king.kin
+
+################################################################################
+
+valid_header = 'FID\tID1\tID2\tN_SNP\tZ0\tPhi\tHetHet\tIBS0\tKinship\tError\n'
+
+with open('king.kin') as fh:
+    header = fh.readline()
+    if header != valid_header:
+        gd_util.die('crap')
+
+    with open(output, 'w') as ofh:
+
+        for line in fh:
+            elems = line.split('\t')
+            if len(elems) != 10:
+                gd_util.die('crap')
+
+            x = elems[1]
+            y = elems[2]
+            z = elems[8]
+
+            f = float(z)
+
+            message = ''
+
+            if f > 0.354:
+                message = 'duplicate or MZ twin'
+            elif f >= 0.177:
+                message = '1st degree relatives'
+            elif f >= 0.0884:
+                message = '2nd degree relatives'
+            elif f >= 0.0442:
+                message = '3rd degree relatives'
+
+            print >> ofh, '\t'.join([x, y, z, message])
+
+################################################################################
+
+sys.exit(0)
+