annotate formatter.py @ 3:caf007467c84 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 0ea5c86ceabe9f24dd61b399127b0d19a0ae2657
author recetox
date Fri, 01 Dec 2023 11:18:46 +0000
parents 107186a6fcec
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
1 import click
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
2 from matchms.importing import scores_from_json
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
3 from pandas import DataFrame
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
4
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
5
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
6 def scores_to_dataframe(scores):
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
7 """Unpack scores from matchms.scores into two dataframes of scores and matches.
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
8
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
9 Args:
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
10 scores (matchms.scores): matchms.scores object.
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
11
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
12 Returns:
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
13 DataFrame: Scores
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
14 DataFrame: Matches
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
15 """
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
16 data = []
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
17
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
18 for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)):
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
19 data.append([scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]])
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
20
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
21 dataframe = DataFrame(data, columns=['query', 'reference', *scores.scores.score_names])
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
22
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
23 return dataframe
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
24
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
25
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
26 def load_data(scores_filename: str) -> DataFrame:
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
27 """Load data from filenames and join on compound id.
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
28
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
29 Args:
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
30 scores_filename (str): Path to json file with serialized scores.
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
31
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
32 Returns:
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
33 DataFrame: Joined dataframe on compounds containing scores and matches in long format.
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
34 """
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
35 scores = scores_from_json(scores_filename)
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
36 scores = scores_to_dataframe(scores)
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
37
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
38 return scores
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
39
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
40
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
41 @click.group(invoke_without_command=True)
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
42 @click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True)
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
43 @click.option('--o', 'output_filename', type=click.Path(writable=True), required=True)
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
44 def cli(scores_filename, output_filename):
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
45 result = load_data(scores_filename)
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
46 result.to_csv(output_filename, sep="\t", index=False)
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
47 pass
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
48
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
49
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
50 if __name__ == '__main__':
107186a6fcec planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
recetox
parents:
diff changeset
51 cli()