view bionumeric_converter.py @ 0:b000a3130db8 draft

planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
author nml
date Mon, 18 Mar 2019 13:15:57 -0400
parents
children 07dfb8fd47f4
line wrap: on
line source

#!/usr/bin/env python

# Import dependancies needed
import argparse

import pandas as pd

# Define the main function:


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-f',
        '--filename',
        required=True,
        help='Specify your tsv input')
    parser.add_argument(
        '-o',
        '--output',
        default='output.csv',
        help='Specify output name')
    args = parser.parse_args()
    tsv_file = args.filename
    out_name = args.output

    no_comma_tsv = comma_remover(tsv_file)
    df = qc_shortener(no_comma_tsv)
    df.to_csv(out_name, index=False)

# Remove comma function:


def comma_remover(tsv_file):
    # Create a table from the tsv file as an input into the dataframe.
    df = pd.read_csv(tsv_file, sep='\t')
    # Change all commas to / in the QC message
    no_comma_tsv = df.replace(',', '/', regex=True)
    return no_comma_tsv

# Shorten QC results:


def qc_shortener(df):
    for count in df.index:
        message = str(df.at[count, 'qc_message'])
        if len(message) > 150:
            results = message.find('|')
            new_message = "Truncated after first '|' : " + message[0:results]
            df['qc_message'] = df['qc_message'].replace(message, new_message)
    return df


if __name__ == '__main__':
    main()