comparison formatter.py @ 5:a177ac3c752c draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
author recetox
date Tue, 27 Jun 2023 14:25:01 +0000
parents
children 13de8005adba
comparison
equal deleted inserted replaced
4:357df6c47d92 5:a177ac3c752c
1 import click
2 from matchms.importing import scores_from_json
3 from pandas import DataFrame
4
5
6 def scores_to_dataframe(scores):
7 """Unpack scores from matchms.scores into two dataframes of scores and matches.
8
9 Args:
10 scores (matchms.scores): matchms.scores object.
11
12 Returns:
13 DataFrame: Scores
14 DataFrame: Matches
15 """
16 dataframe = DataFrame(columns=['query', 'reference', *scores.scores.score_names])
17
18 for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)):
19 dataframe.loc[i] = [scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]]
20
21 return dataframe
22
23
24 def load_data(scores_filename: str) -> DataFrame:
25 """Load data from filenames and join on compound id.
26
27 Args:
28 scores_filename (str): Path to json file with serialized scores.
29
30 Returns:
31 DataFrame: Joined dataframe on compounds containing scores and matches in long format.
32 """
33 scores = scores_from_json(scores_filename)
34 scores = scores_to_dataframe(scores)
35
36 return scores
37
38
39 @click.group(invoke_without_command=True)
40 @click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True)
41 @click.option('--o', 'output_filename', type=click.Path(writable=True), required=True)
42 def cli(scores_filename, output_filename):
43 result = load_data(scores_filename)
44 result.to_csv(output_filename, sep="\t", index=False)
45 pass
46
47
48 if __name__ == '__main__':
49 cli()