annotate loompy_to_tsv.py @ 8:e98619de2776 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit 656ce7ff596a8870b77848469e85b406c7bd9344
author iuc
date Sun, 12 Nov 2023 16:44:29 +0000
parents 4b0adaa31c95
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
2
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
3 """Converts a loompy file to tsv file(s). Each layer becomes a new file."""
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
4
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
5 import argparse
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
6
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
7 import loompy
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
8
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
9 parser = argparse.ArgumentParser(description="Loompy file converter flags")
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
10 parser.add_argument('--version', action='version', version='%(prog)s 0.1.0',
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
11 help="Displays tool version")
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
12 parser.add_argument("-f", "--file", help="loom file to import")
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
13 args = parser.parse_args()
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
14
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
15 file = args.file
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
16
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
17 matrices = []
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
18 allcols = []
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
19 colstrings = []
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
20 allrows = []
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
21
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
22 # Build background info for all attributes and layers
7
4b0adaa31c95 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit 7decaee045e15bd71deb3bc46b81aa39caf1ed31
iuc
parents: 0
diff changeset
23 loompyfile = loompy.connect(file, mode="r")
0
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
24 row_attributes = loompyfile.ra.keys() # List of row attributes
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
25 for row in row_attributes: # Each list represents rownames for row_attributes
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
26 c_row = loompyfile.ra[row]
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
27 c_row = [str(r) for r in c_row]
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
28 allrows.append(c_row)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
29 col_attributes = loompyfile.ca.keys() # List of column attributes
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
30 for col in col_attributes: # each list represents colnames for col_attributes
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
31 c_col = loompyfile.ca[col]
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
32 c_col = [str(c) for c in c_col]
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
33 allcols.append(c_col)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
34 layers = loompyfile.layers.keys() # List of layers
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
35 for layer in layers: # List with each element being a loompy layer
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
36 c_layer = loompyfile[layer]
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
37 c_layer = c_layer[:, :]
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
38 c_layer = c_layer.astype(str)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
39 matrices.append(c_layer)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
40
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
41 # Create column attribute output
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
42 with open("attributes/col_attr.tsv", "w") as colout:
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
43 col_attributes = "\t".join(col_attributes) + "\n"
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
44 colout.write(col_attributes)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
45 for length in range(0, len(c_col)):
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
46 attributestring = ""
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
47 for col in allcols:
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
48 attributestring = attributestring + col[length] + "\t"
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
49 while attributestring[-1] == "\t":
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
50 attributestring = attributestring[:-1]
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
51 colout.write(attributestring)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
52 colout.write("\n")
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
53 # Create row attribute output
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
54 with open("attributes/row_attr.tsv", "w") as rowout:
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
55 row_attributes = "\t".join(row_attributes) + "\n"
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
56 rowout.write(row_attributes)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
57 for length in range(0, len(c_row)):
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
58 attributestring = ""
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
59 for row in allrows:
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
60 attributestring = attributestring + row[length] + "\t"
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
61 while attributestring[-1] == "\t":
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
62 attributestring = attributestring[:-1]
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
63 rowout.write(attributestring)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
64 rowout.write("\n")
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
65
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
66 # Build output files for each layer
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
67 for x in range(0, len(layers)):
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
68 # Output file name generation
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
69 if layers[x] in layers[0: x]: # Different output names if layers have same names somehow
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
70 repeats = layers[0, x].count(layer[x])
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
71 outputname = "output/" + layers[x] + repeats + ".tsv"
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
72 elif layers[x] == "": # Empty layer name
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
73 outputname = "output/mainmatrix.tsv"
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
74 else:
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
75 outputname = "output/" + str(layers[x]) + ".tsv" # Usual case
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
76 # Matrix output
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
77 with open(outputname, "w") as outputmatrix:
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
78 for line in matrices[x]:
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
79 line = "\t".join(line)
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
80 line += "\n"
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
81 line = line
c8e4d0b9ae8c "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
iuc
parents:
diff changeset
82 outputmatrix.write(line)