annotate check_bcfile.py @ 2:43e3eeb2e0ec draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
author iuc
date Wed, 15 Jul 2020 17:42:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
2
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
3 import argparse
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
4 import sys
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
5
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
6 parser = argparse.ArgumentParser()
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
7 parser.add_argument('bcfile', help='barcode file')
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
8 args = parser.parse_args()
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
9
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
10 barcodes = []
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
11
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
12 with open(args.bcfile, "r") as fh:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
13 for line in fh:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
14 if len(line) == 0:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
15 continue
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
16 if line.startswith("#"):
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
17 continue
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
18 barcodes.append(line.split())
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
19
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
20 if len(barcodes) <= 1:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
21 sys.exit("barcode file is empty")
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
22
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
23 # check that all lines have the same number of columns
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
24 ncol = None
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
25 for bc in barcodes:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
26 if ncol is None:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
27 ncol = len(bc)
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
28 elif ncol != len(bc):
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
29 sys.exit("barcode file has inconsistent number of columns")
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
30
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
31 isname = False
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
32 for bc in barcodes:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
33 if len(bc[-1].strip("ATCGatcg")) > 0:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
34 isname = True
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
35 break
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
36
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
37 names = set()
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
38 for bc in barcodes:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
39 if isname:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
40 n = bc[-1]
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
41 else:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
42 n = '-'.join(bc)
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
43 if n in names:
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
44 sys.exit("duplicate sample %s in barcode file" % n)
43e3eeb2e0ec "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit f55e2407891a3c1f73f14a77b7ddadcd6f5eb1f8"
iuc
parents:
diff changeset
45 names.add(n)