Repository 'matchms_formatter'
hg clone https://toolshed.g2.bx.psu.edu/repos/recetox/matchms_formatter

Changeset 27:402620029a7a (2024-04-22)
Previous changeset 26:48ade5cc847f (2024-04-16) Next changeset 28:8bd942dcf1ad (2024-05-30)
Commit message:
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit c626c8db7ba4dd30f85f7086e16e1e2413e36bd8
modified:
matchms_formatter.xml
removed:
formatter.py
b
diff -r 48ade5cc847f -r 402620029a7a formatter.py
--- a/formatter.py Tue Apr 16 11:25:14 2024 +0000
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
[
@@ -1,51 +0,0 @@
-import click
-from matchms.importing import scores_from_json
-from pandas import DataFrame
-
-
-def scores_to_dataframe(scores):
-    """Unpack scores from matchms.scores into two dataframes of scores and matches.
-
-    Args:
-        scores (matchms.scores): matchms.scores object.
-
-    Returns:
-        DataFrame: Scores
-        DataFrame: Matches
-    """
-    data = []
-
-    for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)):
-        data.append([scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]])
-
-    dataframe = DataFrame(data, columns=['query', 'reference', *scores.scores.score_names])
-
-    return dataframe
-
-
-def load_data(scores_filename: str) -> DataFrame:
-    """Load data from filenames and join on compound id.
-
-    Args:
-        scores_filename (str): Path to json file with serialized scores.
-
-    Returns:
-        DataFrame: Joined dataframe on compounds containing scores and matches in long format.
-    """
-    scores = scores_from_json(scores_filename)
-    scores = scores_to_dataframe(scores)
-
-    return scores
-
-
-@click.group(invoke_without_command=True)
-@click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True)
-@click.option('--o', 'output_filename', type=click.Path(writable=True), required=True)
-def cli(scores_filename, output_filename):
-    result = load_data(scores_filename)
-    result.to_csv(output_filename, sep="\t", index=False)
-    pass
-
-
-if __name__ == '__main__':
-    cli()
b
diff -r 48ade5cc847f -r 402620029a7a matchms_formatter.xml
--- a/matchms_formatter.xml Tue Apr 16 11:25:14 2024 +0000
+++ b/matchms_formatter.xml Mon Apr 22 08:40:39 2024 +0000
[
@@ -1,4 +1,4 @@
-<tool id="matchms_formatter" name="matchms scores formatter" version="@TOOL_VERSION@+galaxy3" profile="21.09">
+<tool id="matchms_formatter" name="matchms scores formatter" version="@TOOL_VERSION@+galaxy4" profile="21.09">
     <description>reformat scores object of matchms to long format table</description>
 
     <macros>
@@ -13,12 +13,11 @@
 
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
-        <requirement type="package" version="8.0.1">click</requirement>
         <requirement type="package" version="1.1.4">pandas</requirement>
     </requirements>
 
     <command detect_errors="aggressive"><![CDATA[
-        sh ${matchms_formatter_cli}
+        python3 '${formatter}'
     ]]></command>
     
     <environment_variables>
@@ -29,15 +28,26 @@
     </environment_variables>
 
     <configfiles>
-        <configfile name="matchms_formatter_cli">
-            python3 ${__tool_directory__}/formatter.py \
-            --sf '$scores' \
-            --o '$output'
+        <configfile name="formatter">
+from matchms.importing import scores_from_json
+from pandas import DataFrame
+
+scores = scores_from_json('$scores')
+data = []
+
+for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)):
+        data.append([scores.queries[col].metadata['$key'], scores.references[row].metadata['$key'], *scores.scores.data[i]])
+
+result = DataFrame(data, columns=['query', 'reference', *scores.scores.score_names])
+
+result.to_csv('$output', sep="\t", index=False)
+
         </configfile>
     </configfiles>
 
     <inputs>
         <param label="Scores object" name="scores" type="data" format="json" help="matchms Scores json file." />
+        <param label="Key" name="key" type="text" value="compound_name" help="Name of the key to use. Default is 'compound_name'." />
     </inputs>
     <outputs>
         <data label="${tool.name} on ${on_string}" name="output" format="tsv"/>
@@ -46,11 +56,13 @@
     <tests>
         <test>
             <param name="scores" value="formatter/fill2_trunc_scores_with_metadata_match.json" ftype="json"/>
+            <param name="key" value="compound_name"/>
             <output name="output" value="formatter/fill2_formatted.tsv" ftype="tsv"
                 checksum="md5$4f0d83da381b8a403d807d26a9dd0f34"/>
         </test>
         <test>
             <param name="scores" value="similarity/scores_test4_out.json" ftype="json"/>
+            <param name="key" value="compound_name"/>
             <output name="output" file="formatter/test4_formatted.tsv" ftype="tsv"/>
         </test>
     </tests>