comparison tsv_to_loompy.py @ 3:b5c7ba11401d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit dc9d19d1f902f3ed54009cd0e68c8518c284b856"
author iuc
date Mon, 06 Jan 2020 13:45:13 -0500
parents
children
comparison
equal deleted inserted replaced
2:e175d4067b00 3:b5c7ba11401d
1 #!/usr/bin/env python
2 """This module converts a tsv file into a binary loom file"""
3
4 import argparse
5 import os
6
7 import loompy
8 import numpy as np
9
10 parser = argparse.ArgumentParser(description="Loompy file converter flags")
11 parser.add_argument('--VERSION', action='version', version='%(prog)s 0.1.0',
12 help="Displays tool version")
13 parser.add_argument('--rowfile', '-r', help="File of row attributes & values")
14 parser.add_argument('--colfile', '-c',
15 help="File of column attributes and values")
16 parser.add_argument('--output', '-o', help="Output file name")
17 parser.add_argument('--files', '-f', nargs='*',
18 help="Input tsv files. First file becomes main layer.")
19 args = parser.parse_args()
20
21 colsfile = args.colfile
22 rowsfile = args.rowfile
23 if args.output:
24 filename = args.output
25 else:
26 filename = "converted.loom"
27 alldata = args.files
28 alayers = []
29 layernames = []
30 rowdict = {}
31 coldict = {}
32
33 # Creates dictionary based on row file
34 # For each attribute:
35 # Attribute: [attribute values]
36 with open(rowsfile, "r") as rows:
37 count = 0
38 for line in rows:
39 line = line.strip().split("\t")
40 if count == 0: # First time through
41 row_attributes = line
42 for x in row_attributes:
43 rowdict[x] = []
44 count += 1
45 else:
46 for x in range(0, len(line)):
47 rowdict[row_attributes[x]].append(line[x])
48 # Same as above, but for columns
49 with open(colsfile, "r") as cols:
50 count = 0
51 for line in cols:
52 line = line.replace('\"', "")
53 line = line.replace(' ', "")
54 line = line.strip().split("\t")
55 if count == 0: # First time through
56 col_attributes = line
57 for x in col_attributes:
58 coldict[x] = []
59 count += 1
60 else:
61 for x in range(0, len(line)):
62 coldict[col_attributes[x]].append(line[x])
63 # Finding dimensions for the loom layers
64 rowshape = len(rowdict[list(rowdict.keys())[0]])
65 colshape = len(coldict[list(coldict.keys())[0]])
66
67 # Creates a list with each element being entire matrix of
68 # each layer file as floats
69 for file in range(0, len(alldata)):
70 layer = alldata[file][:-4]
71 layer = layer.split("/")[-1]
72 if layer == "":
73 raise Exception("Please only use named files")
74 layernames.append(layer)
75 cfile = alldata[file]
76 with open(cfile, "r") as tsv:
77 cmatrix = []
78 for line in tsv:
79 line = line.strip().split("\t")
80 line = [float(i) for i in line]
81 cmatrix += line
82 alayers.append(cmatrix)
83
84 # Loompy cannot overwright existing files. If somehow it finds
85 # a second file with the same name, it must be deleted
86 if os.path.isfile(filename):
87 os.remove(filename)
88 # To create the file properly, the first row and column attributes must be
89 # added separately in the form of individual dictionaries
90 row_attrs = {row_attributes[0]: np.asarray(rowdict[row_attributes[0]])}
91 col_attrs = {col_attributes[0]: np.asarray(coldict[col_attributes[0]])}
92 matrix = np.asarray(alayers[0])
93 matrix = matrix.astype(float)
94 matrix = matrix.reshape(rowshape, colshape)
95 # Creation of initial loom file
96 if "loom" not in filename[-5:]:
97 filename = filename + ".loom"
98 loompy.create(filename, matrix, row_attrs, col_attrs)
99 # Adding all row and column attributes, then all layers
100 with loompy.connect(filename) as loomfile:
101 for x in row_attributes:
102 loomfile.ra[x] = rowdict[x]
103 for y in col_attributes:
104 loomfile.ca[y] = coldict[y]
105 for z in range(1, len(alayers)):
106 matrix = np.asarray(alayers[z])
107 matrix = matrix.astype(float)
108 matrix = matrix.reshape(rowshape, colshape)
109 loomfile[layernames[z]] = matrix