annotate check_bcfile.py @ 2:d033e1ccb386 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
author iuc
date Thu, 16 Jul 2020 07:31:44 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
2
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
3 import argparse
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
4 import sys
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
5
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
6 parser = argparse.ArgumentParser()
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
7 parser.add_argument('bcfile', help='barcode file')
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
8 args = parser.parse_args()
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
9
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
10 barcodes = []
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
11
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
12 with open(args.bcfile, "r") as fh:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
13 for line in fh:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
14 if len(line) == 0:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
15 continue
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
16 if line.startswith("#"):
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
17 continue
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
18 barcodes.append(line.split())
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
19
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
20 if len(barcodes) <= 1:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
21 sys.exit("barcode file is empty")
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
22
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
23 # check that all lines have the same number of columns
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
24 ncol = None
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
25 for bc in barcodes:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
26 if ncol is None:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
27 ncol = len(bc)
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
28 elif ncol != len(bc):
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
29 sys.exit("barcode file has inconsistent number of columns")
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
30
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
31 isname = False
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
32 for bc in barcodes:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
33 if len(bc[-1].strip("ATCGatcg")) > 0:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
34 isname = True
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
35 break
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
36
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
37 names = set()
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
38 for bc in barcodes:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
39 if isname:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
40 n = bc[-1]
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
41 else:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
42 n = '-'.join(bc)
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
43 if n in names:
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
44 sys.exit("duplicate sample %s in barcode file" % n)
d033e1ccb386 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
45 names.add(n)