Mercurial > repos > recetox > matchms_fingerprint_similarity
comparison formatter.py @ 0:84af792d3a78 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
author | recetox |
---|---|
date | Tue, 27 Jun 2023 14:27:04 +0000 |
parents | |
children | df85b26201d1 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:84af792d3a78 |
---|---|
1 import click | |
2 from matchms.importing import scores_from_json | |
3 from pandas import DataFrame | |
4 | |
5 | |
6 def scores_to_dataframe(scores): | |
7 """Unpack scores from matchms.scores into two dataframes of scores and matches. | |
8 | |
9 Args: | |
10 scores (matchms.scores): matchms.scores object. | |
11 | |
12 Returns: | |
13 DataFrame: Scores | |
14 DataFrame: Matches | |
15 """ | |
16 dataframe = DataFrame(columns=['query', 'reference', *scores.scores.score_names]) | |
17 | |
18 for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)): | |
19 dataframe.loc[i] = [scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]] | |
20 | |
21 return dataframe | |
22 | |
23 | |
24 def load_data(scores_filename: str) -> DataFrame: | |
25 """Load data from filenames and join on compound id. | |
26 | |
27 Args: | |
28 scores_filename (str): Path to json file with serialized scores. | |
29 | |
30 Returns: | |
31 DataFrame: Joined dataframe on compounds containing scores and matches in long format. | |
32 """ | |
33 scores = scores_from_json(scores_filename) | |
34 scores = scores_to_dataframe(scores) | |
35 | |
36 return scores | |
37 | |
38 | |
39 @click.group(invoke_without_command=True) | |
40 @click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True) | |
41 @click.option('--o', 'output_filename', type=click.Path(writable=True), required=True) | |
42 def cli(scores_filename, output_filename): | |
43 result = load_data(scores_filename) | |
44 result.to_csv(output_filename, sep="\t", index=False) | |
45 pass | |
46 | |
47 | |
48 if __name__ == '__main__': | |
49 cli() |