Mercurial > repos > recetox > matchms_add_key
comparison formatter.py @ 0:ea00a749ec1f draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
| author | recetox |
|---|---|
| date | Thu, 12 Oct 2023 13:26:04 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ea00a749ec1f |
|---|---|
| 1 import click | |
| 2 from matchms.importing import scores_from_json | |
| 3 from pandas import DataFrame | |
| 4 | |
| 5 | |
| 6 def scores_to_dataframe(scores): | |
| 7 """Unpack scores from matchms.scores into two dataframes of scores and matches. | |
| 8 | |
| 9 Args: | |
| 10 scores (matchms.scores): matchms.scores object. | |
| 11 | |
| 12 Returns: | |
| 13 DataFrame: Scores | |
| 14 DataFrame: Matches | |
| 15 """ | |
| 16 data = [] | |
| 17 | |
| 18 for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)): | |
| 19 data.append([scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]]) | |
| 20 | |
| 21 dataframe = DataFrame(data, columns=['query', 'reference', *scores.scores.score_names]) | |
| 22 | |
| 23 return dataframe | |
| 24 | |
| 25 | |
| 26 def load_data(scores_filename: str) -> DataFrame: | |
| 27 """Load data from filenames and join on compound id. | |
| 28 | |
| 29 Args: | |
| 30 scores_filename (str): Path to json file with serialized scores. | |
| 31 | |
| 32 Returns: | |
| 33 DataFrame: Joined dataframe on compounds containing scores and matches in long format. | |
| 34 """ | |
| 35 scores = scores_from_json(scores_filename) | |
| 36 scores = scores_to_dataframe(scores) | |
| 37 | |
| 38 return scores | |
| 39 | |
| 40 | |
| 41 @click.group(invoke_without_command=True) | |
| 42 @click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True) | |
| 43 @click.option('--o', 'output_filename', type=click.Path(writable=True), required=True) | |
| 44 def cli(scores_filename, output_filename): | |
| 45 result = load_data(scores_filename) | |
| 46 result.to_csv(output_filename, sep="\t", index=False) | |
| 47 pass | |
| 48 | |
| 49 | |
| 50 if __name__ == '__main__': | |
| 51 cli() |
