Mercurial > repos > iuc > anndata_manipulate
annotate loompy_to_tsv.py @ 7:43cb7b5a6fe7 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit 033c2d20a0c73cd84c5c3aee73ae224183a9c1c2"
author | iuc |
---|---|
date | Wed, 22 Jul 2020 12:21:13 -0400 |
parents | 6db1b06e6bbb |
children | 3d748954434b |
rev | line source |
---|---|
3
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
2 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
3 """Converts a loompy file to tsv file(s). Each layer becomes a new file.""" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
4 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
5 import argparse |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
6 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
7 import loompy |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
8 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
9 parser = argparse.ArgumentParser(description="Loompy file converter flags") |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
10 parser.add_argument('--version', action='version', version='%(prog)s 0.1.0', |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
11 help="Displays tool version") |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
12 parser.add_argument("-f", "--file", help="loom file to import") |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
13 args = parser.parse_args() |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
14 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
15 file = args.file |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
16 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
17 matrices = [] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
18 allcols = [] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
19 colstrings = [] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
20 allrows = [] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
21 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
22 # Build background info for all attributes and layers |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
23 loompyfile = loompy.connect(file) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
24 row_attributes = loompyfile.ra.keys() # List of row attributes |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
25 for row in row_attributes: # Each list represents rownames for row_attributes |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
26 c_row = loompyfile.ra[row] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
27 c_row = [str(r) for r in c_row] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
28 allrows.append(c_row) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
29 col_attributes = loompyfile.ca.keys() # List of column attributes |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
30 for col in col_attributes: # each list represents colnames for col_attributes |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
31 c_col = loompyfile.ca[col] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
32 c_col = [str(c) for c in c_col] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
33 allcols.append(c_col) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
34 layers = loompyfile.layers.keys() # List of layers |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
35 for layer in layers: # List with each element being a loompy layer |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
36 c_layer = loompyfile[layer] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
37 c_layer = c_layer[:, :] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
38 c_layer = c_layer.astype(str) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
39 matrices.append(c_layer) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
40 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
41 # Create column attribute output |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
42 with open("attributes/col_attr.tsv", "w") as colout: |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
43 col_attributes = "\t".join(col_attributes) + "\n" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
44 colout.write(col_attributes) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
45 for length in range(0, len(c_col)): |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
46 attributestring = "" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
47 for col in allcols: |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
48 attributestring = attributestring + col[length] + "\t" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
49 while attributestring[-1] == "\t": |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
50 attributestring = attributestring[:-1] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
51 colout.write(attributestring) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
52 colout.write("\n") |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
53 # Create row attribute output |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
54 with open("attributes/row_attr.tsv", "w") as rowout: |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
55 row_attributes = "\t".join(row_attributes) + "\n" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
56 rowout.write(row_attributes) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
57 for length in range(0, len(c_row)): |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
58 attributestring = "" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
59 for row in allrows: |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
60 attributestring = attributestring + row[length] + "\t" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
61 while attributestring[-1] == "\t": |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
62 attributestring = attributestring[:-1] |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
63 rowout.write(attributestring) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
64 rowout.write("\n") |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
65 |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
66 # Build output files for each layer |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
67 for x in range(0, len(layers)): |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
68 # Output file name generation |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
69 if layers[x] in layers[0: x]: # Different output names if layers have same names somehow |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
70 repeats = layers[0, x].count(layer[x]) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
71 outputname = "output/" + layers[x] + repeats + ".tsv" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
72 elif layers[x] == "": # Empty layer name |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
73 outputname = "output/mainmatrix.tsv" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
74 else: |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
75 outputname = "output/" + str(layers[x]) + ".tsv" # Usual case |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
76 # Matrix output |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
77 with open(outputname, "w") as outputmatrix: |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
78 for line in matrices[x]: |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
79 line = "\t".join(line) |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
80 line += "\n" |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
81 line = line |
6db1b06e6bbb
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff
changeset
|
82 outputmatrix.write(line) |