Mercurial > repos > ufz > xlsx2tsv
changeset 0:a21347be425d draft default tip
planemo upload for repository https://github.com/Helmholtz-UFZ/galaxy-tools/tree/main/tools/xls2tsv commit 2e75a14496fa80104e76b307289c58b9b7013ae1
author | ufz |
---|---|
date | Mon, 16 Dec 2024 20:56:29 +0000 |
parents | |
children | |
files | test-data/excel_test.xlsx test-data/output_sheet_1.tsv test-data/output_sheet_2.tsv xlsx2tsv.py xlsx2tsv.xml |
diffstat | 5 files changed, 90 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_sheet_1.tsv Mon Dec 16 20:56:29 2024 +0000 @@ -0,0 +1,4 @@ +column0 column1 +test1 value1 +test2 value2 +test3 value3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/output_sheet_2.tsv Mon Dec 16 20:56:29 2024 +0000 @@ -0,0 +1,4 @@ +column2 column3 +test4 value4 +test5 value5 +test6 value6
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xlsx2tsv.py Mon Dec 16 20:56:29 2024 +0000 @@ -0,0 +1,32 @@ +import argparse + +import pandas as pd + + +def convert_xlsx_to_tsv(input_file, sheet_name, output): + try: + # Read the specified sheet and convert them to tsv + df = pd.read_excel(input_file, sheet_name=sheet_name) + df.to_csv(output, sep='\t', index=False) + print(f"Extracted sheet '{sheet_name}' from {input_file}") + + except Exception as e: + print(f"Failed to convert sheet '{sheet_name}' from {input_file}: {e}") + + +def main(): + parser = argparse.ArgumentParser(description="Convert specific sheets from a single .xlsx file to .tsv format in the same directory.") + parser.add_argument("--input-file", type=str, required=True, help="Path to the input .xlsx file.") + parser.add_argument("--sheet-names", type=str, required=True, help="Comma-separated list of sheet names to convert.") + parser.add_argument("--output", type=str, default="extracted_sheet.tsv", required=False, help="Suffix for the tsv file") + args = parser.parse_args() + + # Convert sheet names from str to list + sheet_names = args.sheet_names + + # Call the conversion function with the provided arguments + convert_xlsx_to_tsv(args.input_file, sheet_names, args.output) + + +if __name__ == "__main__": + main()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xlsx2tsv.xml Mon Dec 16 20:56:29 2024 +0000 @@ -0,0 +1,50 @@ +<tool id="xlsx2tsv" name="Excel to Tabular" version="0.1.0+galaxy0" license="MIT" profile = "23.0"> + <description>with pandas</description> + <requirements> + <requirement type="package" version="2.2.1">pandas</requirement> + <requirement type="package" version="3.1.5">openpyxl</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + python '$__tool_directory__/xlsx2tsv.py' + --input-file '$input_file' + --sheet-names '$sheet_names' + --output '$output' + ]]></command> + <inputs> + <param name="input_file" type="data" format="excel.xls,xlsx" optional="false" label="Input excel file" help="Input XLS/XLSX file"/> + <param name="sheet_names" type="text" optional="false" label="Name of the excel sheet" help="Excel sheet to convert to tsv"/> + </inputs> + <outputs> + <data name="output" format="tabular"/> + </outputs> + <tests> + <test> + <param name="input_file" value="excel_test.xlsx"/> + <param name="sheet_names" value="Sheet1"/> + <output name="output" value="output_sheet_1.tsv" ftype="tabular"> + <assert_contents> + <has_text text="column0"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + </test> + <test> + <param name="input_file" value="excel_test.xlsx"/> + <param name="sheet_names" value="Sheet2"/> + <output name="output" value="output_sheet_2.tsv" ftype="tabular"> + <assert_contents> + <has_text text="column2"/> + <has_n_columns n="2"/> + </assert_contents> + </output> + </test> + </tests> + <help> +Description +----------- +Extract a sheet from XLS/XLSX file to a tabular file + </help> + <citations> + <citation type="doi">10.5281/zenodo.13819579</citation> + </citations> +</tool> \ No newline at end of file