changeset 0:a21347be425d draft default tip

planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/xls2tsv commit 2e75a14496fa80104e76b307289c58b9b7013ae1
author ufz
date Mon, 16 Dec 2024 20:56:29 +0000
parents
children
files test-data/excel_test.xlsx test-data/output_sheet_1.tsv test-data/output_sheet_2.tsv xlsx2tsv.py xlsx2tsv.xml
diffstat 5 files changed, 90 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file test-data/excel_test.xlsx has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_sheet_1.tsv	Mon Dec 16 20:56:29 2024 +0000
@@ -0,0 +1,4 @@
+column0	column1
+test1	value1
+test2	value2
+test3	value3
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/output_sheet_2.tsv	Mon Dec 16 20:56:29 2024 +0000
@@ -0,0 +1,4 @@
+column2	column3
+test4	value4
+test5	value5
+test6	value6
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xlsx2tsv.py	Mon Dec 16 20:56:29 2024 +0000
@@ -0,0 +1,32 @@
+import argparse
+
+import pandas as pd
+
+
+def convert_xlsx_to_tsv(input_file, sheet_name, output):
+    try:
+        # Read the specified sheet and convert them to tsv
+        df = pd.read_excel(input_file, sheet_name=sheet_name)
+        df.to_csv(output, sep='\t', index=False)
+        print(f"Extracted sheet '{sheet_name}' from {input_file}")
+
+    except Exception as e:
+        print(f"Failed to convert sheet '{sheet_name}' from {input_file}: {e}")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Convert specific sheets from a single .xlsx file to .tsv format in the same directory.")
+    parser.add_argument("--input-file", type=str, required=True, help="Path to the input .xlsx file.")
+    parser.add_argument("--sheet-names", type=str, required=True, help="Comma-separated list of sheet names to convert.")
+    parser.add_argument("--output", type=str, default="extracted_sheet.tsv", required=False, help="Suffix for the tsv file")
+    args = parser.parse_args()
+
+    # Convert sheet names from str to list
+    sheet_names = args.sheet_names
+
+    # Call the conversion function with the provided arguments
+    convert_xlsx_to_tsv(args.input_file, sheet_names, args.output)
+
+
+if __name__ == "__main__":
+    main()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/xlsx2tsv.xml	Mon Dec 16 20:56:29 2024 +0000
@@ -0,0 +1,50 @@
+<tool id="xlsx2tsv" name="Excel to Tabular" version="0.1.0+galaxy0" license="MIT" profile = "23.0">
+    <description>with pandas</description>
+    <requirements>
+        <requirement type="package" version="2.2.1">pandas</requirement>
+        <requirement type="package" version="3.1.5">openpyxl</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+    python '$__tool_directory__/xlsx2tsv.py'
+    --input-file '$input_file'
+    --sheet-names '$sheet_names'
+    --output '$output'
+    ]]></command>
+    <inputs>
+        <param name="input_file" type="data" format="excel.xls,xlsx" optional="false" label="Input excel file" help="Input XLS/XLSX file"/>
+        <param name="sheet_names" type="text" optional="false" label="Name of the excel sheet" help="Excel sheet to convert to tsv"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="tabular"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_file" value="excel_test.xlsx"/>
+            <param name="sheet_names" value="Sheet1"/>
+            <output name="output" value="output_sheet_1.tsv" ftype="tabular">
+                <assert_contents>
+                        <has_text text="column0"/>
+                        <has_n_columns n="2"/>
+                    </assert_contents>
+                </output>
+        </test>
+        <test>
+            <param name="input_file" value="excel_test.xlsx"/>
+            <param name="sheet_names" value="Sheet2"/>
+            <output name="output" value="output_sheet_2.tsv" ftype="tabular">
+                <assert_contents>
+                    <has_text text="column2"/>
+                    <has_n_columns n="2"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+Description
+-----------
+Extract a sheet from XLS/XLSX file to a tabular file
+    </help>
+    <citations>
+        <citation type="doi">10.5281/zenodo.13819579</citation>
+    </citations>
+</tool>
\ No newline at end of file