# HG changeset patch # User nml # Date 1557766755 14400 # Node ID 07dfb8fd47f498a0721d99e16e095efba3e44280 # Parent b000a3130db8eb882b31d766cfa050271bc53e71 planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee diff -r b000a3130db8 -r 07dfb8fd47f4 bionumeric_convert.xml --- a/bionumeric_convert.xml Mon Mar 18 13:15:57 2019 -0400 +++ b/bionumeric_convert.xml Mon May 13 12:59:15 2019 -0400 @@ -1,38 +1,41 @@ - + compliant results pandas - + - + diff -r b000a3130db8 -r 07dfb8fd47f4 bionumeric_converter.py --- a/bionumeric_converter.py Mon Mar 18 13:15:57 2019 -0400 +++ b/bionumeric_converter.py Mon May 13 12:59:15 2019 -0400 @@ -14,7 +14,7 @@ '-f', '--filename', required=True, - help='Specify your tsv input') + help='Specify your biohansel tsv or other tabular separated input') parser.add_argument( '-o', '--output', @@ -24,30 +24,27 @@ tsv_file = args.filename out_name = args.output - no_comma_tsv = comma_remover(tsv_file) - df = qc_shortener(no_comma_tsv) - df.to_csv(out_name, index=False) - -# Remove comma function: - + df_input = pd.read_csv(tsv_file, sep='\t') -def comma_remover(tsv_file): - # Create a table from the tsv file as an input into the dataframe. - df = pd.read_csv(tsv_file, sep='\t') - # Change all commas to / in the QC message - no_comma_tsv = df.replace(',', '/', regex=True) - return no_comma_tsv + df_no_comma = df_input.replace(',', '/', regex=True) + df = qc_shortener(df_no_comma) + df.to_csv(out_name, index=False) # Shorten QC results: +def splittingstrings(string, length): + return (string[0+i:length+i] for i in range(0, len(string), length)) + + def qc_shortener(df): - for count in df.index: - message = str(df.at[count, 'qc_message']) + for i, row in df.iterrows(): + message = str(row['qc_message']) if len(message) > 150: - results = message.find('|') - new_message = "Truncated after first '|' : " + message[0:results] - df['qc_message'] = df['qc_message'].replace(message, new_message) + message_list = list(splittingstrings(message, 150)) + df.at[i, 'qc_message'] = message_list[0] + for val in range(1, len(message_list)): + df.at[i, 'qc_message_{}'.format(val)] = message_list[val] return df diff -r b000a3130db8 -r 07dfb8fd47f4 test-data/Biohansel_Bionumerics.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/Biohansel_Bionumerics.csv Mon May 13 12:59:15 2019 -0400 @@ -0,0 +1,4 @@ +sample,subtype,avg_tile_coverage,qc_status,qc_message,qc_message_1 +SRR1645238,1.3,43.345,PASS,, +SRR1753252,1.1,32.33,PASS,FAIL: This is a test of the cut off system. The data is good and as such I have to manually type this message in to get it to cut off. I am adding in ,5 comas ///// +SRR1928313,1.1.1,555.11,PASS,, diff -r b000a3130db8 -r 07dfb8fd47f4 test-data/Output.csv --- a/test-data/Output.csv Mon Mar 18 13:15:57 2019 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,2 +0,0 @@ -sample,scheme,scheme_version,subtype,all_subtypes,tiles_matching_subtype,are_subtypes_consistent,inconsistent_subtypes,n_tiles_matching_all,n_tiles_matching_all_expected,n_tiles_matching_positive,n_tiles_matching_positive_expected,n_tiles_matching_subtype,n_tiles_matching_subtype_expected,file_path,avg_tile_coverage,qc_status,qc_message -2019C-111,heidelberg,0.5.0,2.2.3.1.2,2; 2.2; 2.2.3; 2.2.3.1; 2.2.3.1.2,2.2.3.1.2,True,,202,202,14,14,3,3,['2019C-111_1.fastq'/ '2019C-111_2.fastq'],30.07,PASS,Truncated after first '|' : This is a trial to the cut /off/ system as this data all passed the checks. diff -r b000a3130db8 -r 07dfb8fd47f4 test-data/results.tab --- a/test-data/results.tab Mon Mar 18 13:15:57 2019 -0400 +++ b/test-data/results.tab Mon May 13 12:59:15 2019 -0400 @@ -1,2 +1,4 @@ -sample scheme scheme_version subtype all_subtypes tiles_matching_subtype are_subtypes_consistent inconsistent_subtypes n_tiles_matching_all n_tiles_matching_all_expected n_tiles_matching_positive n_tiles_matching_positive_expected n_tiles_matching_subtype n_tiles_matching_subtype_expected file_path avg_tile_coverage qc_status qc_message -2019C-111 heidelberg 0.5.0 2.2.3.1.2 2; 2.2; 2.2.3; 2.2.3.1; 2.2.3.1.2 2.2.3.1.2 True 202 202 14 14 3 3 ['2019C-111_1.fastq', '2019C-111_2.fastq'] 30.070 PASS This is a trial to the cut ,off, system as this data all passed the checks. | I will attemp to get 150 characters into here in a way that is not awful and sounds decent. We can try counting the letters and as of now, it should be ok! +sample subtype avg_tile_coverage qc_status qc_message +SRR1645238 1.3 43.345 PASS +SRR1753252 1.1 32.33 PASS "FAIL: This is a test of the cut off system. The data is good and as such I have to manually type this message in to get it to cut off. I am adding in 5 comas ,,,,," +SRR1928313 1.1.1 555.11 PASS