Repository 'biohansel_bionumeric_converter'
hg clone https://toolshed.g2.bx.psu.edu/repos/nml/biohansel_bionumeric_converter

Changeset 0:b000a3130db8 (2019-03-18)
Next changeset 1:07dfb8fd47f4 (2019-05-13)
Commit message:
planemo upload commit e5e384ce6c90f595e8d397a7c45ca9c17d4a3e2a
added:
bionumeric_convert.xml
bionumeric_converter.py
test-data/Output.csv
test-data/results.tab
b
diff -r 000000000000 -r b000a3130db8 bionumeric_convert.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bionumeric_convert.xml Mon Mar 18 13:15:57 2019 -0400
[
@@ -0,0 +1,40 @@
+<tool id="bionumeric_convert" name="biohansel2bionumerics" version="0.1.0">
+    <description>compliant results</description>
+    <requirements>
+        <requirement type="package" version="0.24.1">pandas</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        $__tool_directory__/bionumeric_converter.py -f '$Input' -o '$output'
+    ]]></command>
+    <inputs>
+        <param type="data" name="Input" format="tabular"/>
+    </inputs>
+    <outputs>
+        <data name="output" format="csv" from_work_dir="output" label="Output.csv"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="Input" value="results.tab"/>
+            <output name="output" value="Output.csv"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+        **What it does**
+
+        This tool is a supplementary script that takes *only* BioHansel output data and converts it into a format compatible with bionumerics.
+
+        **How to run it**
+
+        1. Input any of your BioHansel output files (tech_results.tab, match_results.tab, and results.tab)
+        2. Click Execute
+
+        **Specific modifications done on the data**
+
+        1. Converts all commas in the output to "/"
+        2. Shortens BioHansel qc_messages if they are over 150 characters
+        3. Converts the .tab file to a .csv file
+
+    ]]></help>
+    <citations>
+    </citations>
+</tool>
\ No newline at end of file
b
diff -r 000000000000 -r b000a3130db8 bionumeric_converter.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/bionumeric_converter.py Mon Mar 18 13:15:57 2019 -0400
[
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+
+# Import dependancies needed
+import argparse
+
+import pandas as pd
+
+# Define the main function:
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '-f',
+        '--filename',
+        required=True,
+        help='Specify your tsv input')
+    parser.add_argument(
+        '-o',
+        '--output',
+        default='output.csv',
+        help='Specify output name')
+    args = parser.parse_args()
+    tsv_file = args.filename
+    out_name = args.output
+
+    no_comma_tsv = comma_remover(tsv_file)
+    df = qc_shortener(no_comma_tsv)
+    df.to_csv(out_name, index=False)
+
+# Remove comma function:
+
+
+def comma_remover(tsv_file):
+    # Create a table from the tsv file as an input into the dataframe.
+    df = pd.read_csv(tsv_file, sep='\t')
+    # Change all commas to / in the QC message
+    no_comma_tsv = df.replace(',', '/', regex=True)
+    return no_comma_tsv
+
+# Shorten QC results:
+
+
+def qc_shortener(df):
+    for count in df.index:
+        message = str(df.at[count, 'qc_message'])
+        if len(message) > 150:
+            results = message.find('|')
+            new_message = "Truncated after first '|' : " + message[0:results]
+            df['qc_message'] = df['qc_message'].replace(message, new_message)
+    return df
+
+
+if __name__ == '__main__':
+    main()
b
diff -r 000000000000 -r b000a3130db8 test-data/Output.csv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Output.csv Mon Mar 18 13:15:57 2019 -0400
[
@@ -0,0 +1,2 @@
+sample,scheme,scheme_version,subtype,all_subtypes,tiles_matching_subtype,are_subtypes_consistent,inconsistent_subtypes,n_tiles_matching_all,n_tiles_matching_all_expected,n_tiles_matching_positive,n_tiles_matching_positive_expected,n_tiles_matching_subtype,n_tiles_matching_subtype_expected,file_path,avg_tile_coverage,qc_status,qc_message
+2019C-111,heidelberg,0.5.0,2.2.3.1.2,2; 2.2; 2.2.3; 2.2.3.1; 2.2.3.1.2,2.2.3.1.2,True,,202,202,14,14,3,3,['2019C-111_1.fastq'/ '2019C-111_2.fastq'],30.07,PASS,Truncated after first '|' : This is a trial to the cut /off/ system as this data all passed the checks. 
b
diff -r 000000000000 -r b000a3130db8 test-data/results.tab
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/results.tab Mon Mar 18 13:15:57 2019 -0400
[
@@ -0,0 +1,2 @@
+sample scheme scheme_version subtype all_subtypes tiles_matching_subtype are_subtypes_consistent inconsistent_subtypes n_tiles_matching_all n_tiles_matching_all_expected n_tiles_matching_positive n_tiles_matching_positive_expected n_tiles_matching_subtype n_tiles_matching_subtype_expected file_path avg_tile_coverage qc_status qc_message
+2019C-111 heidelberg 0.5.0 2.2.3.1.2 2; 2.2; 2.2.3; 2.2.3.1; 2.2.3.1.2 2.2.3.1.2 True 202 202 14 14 3 3 ['2019C-111_1.fastq', '2019C-111_2.fastq'] 30.070 PASS This is a trial to the cut ,off, system as this data all passed the checks. | I will attemp to get 150 characters into here in a way that is not awful and sounds decent. We can try counting the letters and as of now, it should be ok!