# HG changeset patch # User nml # Date 1552929357 14400 # Node ID b000a3130db8eb882b31d766cfa050271bc53e71 planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a diff -r 000000000000 -r b000a3130db8 bionumeric_convert.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bionumeric_convert.xml Mon Mar 18 13:15:57 2019 -0400 @@ -0,0 +1,40 @@ + + compliant results + + pandas + + + + + + + + + + + + + + + + + + \ No newline at end of file diff -r 000000000000 -r b000a3130db8 bionumeric_converter.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bionumeric_converter.py Mon Mar 18 13:15:57 2019 -0400 @@ -0,0 +1,55 @@ +#!/usr/bin/env python + +# Import dependancies needed +import argparse + +import pandas as pd + +# Define the main function: + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + '-f', + '--filename', + required=True, + help='Specify your tsv input') + parser.add_argument( + '-o', + '--output', + default='output.csv', + help='Specify output name') + args = parser.parse_args() + tsv_file = args.filename + out_name = args.output + + no_comma_tsv = comma_remover(tsv_file) + df = qc_shortener(no_comma_tsv) + df.to_csv(out_name, index=False) + +# Remove comma function: + + +def comma_remover(tsv_file): + # Create a table from the tsv file as an input into the dataframe. + df = pd.read_csv(tsv_file, sep='\t') + # Change all commas to / in the QC message + no_comma_tsv = df.replace(',', '/', regex=True) + return no_comma_tsv + +# Shorten QC results: + + +def qc_shortener(df): + for count in df.index: + message = str(df.at[count, 'qc_message']) + if len(message) > 150: + results = message.find('|') + new_message = "Truncated after first '|' : " + message[0:results] + df['qc_message'] = df['qc_message'].replace(message, new_message) + return df + + +if __name__ == '__main__': + main() diff -r 000000000000 -r b000a3130db8 test-data/Output.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Output.csv Mon Mar 18 13:15:57 2019 -0400 @@ -0,0 +1,2 @@ +sample,scheme,scheme_version,subtype,all_subtypes,tiles_matching_subtype,are_subtypes_consistent,inconsistent_subtypes,n_tiles_matching_all,n_tiles_matching_all_expected,n_tiles_matching_positive,n_tiles_matching_positive_expected,n_tiles_matching_subtype,n_tiles_matching_subtype_expected,file_path,avg_tile_coverage,qc_status,qc_message +2019C-111,heidelberg,0.5.0,2.2.3.1.2,2; 2.2; 2.2.3; 2.2.3.1; 2.2.3.1.2,2.2.3.1.2,True,,202,202,14,14,3,3,['2019C-111_1.fastq'/ '2019C-111_2.fastq'],30.07,PASS,Truncated after first '|' : This is a trial to the cut /off/ system as this data all passed the checks. diff -r 000000000000 -r b000a3130db8 test-data/results.tab --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/results.tab Mon Mar 18 13:15:57 2019 -0400 @@ -0,0 +1,2 @@ +sample scheme scheme_version subtype all_subtypes tiles_matching_subtype are_subtypes_consistent inconsistent_subtypes n_tiles_matching_all n_tiles_matching_all_expected n_tiles_matching_positive n_tiles_matching_positive_expected n_tiles_matching_subtype n_tiles_matching_subtype_expected file_path avg_tile_coverage qc_status qc_message +2019C-111 heidelberg 0.5.0 2.2.3.1.2 2; 2.2; 2.2.3; 2.2.3.1; 2.2.3.1.2 2.2.3.1.2 True 202 202 14 14 3 3 ['2019C-111_1.fastq', '2019C-111_2.fastq'] 30.070 PASS This is a trial to the cut ,off, system as this data all passed the checks. | I will attemp to get 150 characters into here in a way that is not awful and sounds decent. We can try counting the letters and as of now, it should be ok!