changeset 1:07dfb8fd47f4 draft default tip

planemo upload commit e0d65bf0850ce95ffb89982e61f2136fcf0359ee
author nml
date Mon, 13 May 2019 12:59:15 -0400
parents b000a3130db8
children
files bionumeric_convert.xml bionumeric_converter.py test-data/Biohansel_Bionumerics.csv test-data/Output.csv test-data/results.tab
diffstat 5 files changed, 37 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/bionumeric_convert.xml	Mon Mar 18 13:15:57 2019 -0400
+++ b/bionumeric_convert.xml	Mon May 13 12:59:15 2019 -0400
@@ -1,38 +1,41 @@
-<tool id="bionumeric_convert" name="biohansel2bionumerics" version="0.1.0">
+<tool id="bionumeric_convert" name="biohansel2bionumerics" version="0.2.0">
     <description>compliant results</description>
     <requirements>
         <requirement type="package" version="0.24.1">pandas</requirement>
     </requirements>
     <command detect_errors="exit_code"><![CDATA[
-        $__tool_directory__/bionumeric_converter.py -f '$Input' -o '$output'
+        $__tool_directory__/bionumeric_converter.py -f '$Input' -o '$Output'
     ]]></command>
     <inputs>
         <param type="data" name="Input" format="tabular"/>
     </inputs>
     <outputs>
-        <data name="output" format="csv" from_work_dir="output" label="Output.csv"/>
+        <data name="Output" format="csv" from_work_dir="output" label="Biohansel_Bionumerics"/>
     </outputs>
     <tests>
         <test>
             <param name="Input" value="results.tab"/>
-            <output name="output" value="Output.csv"/>
+            <output name="Output" value="Biohansel_Bionumerics.csv"/>
         </test>
     </tests>
     <help><![CDATA[
         **What it does**
 
-        This tool is a supplementary script that takes *only* BioHansel output data and converts it into a format compatible with bionumerics.
+        This tool is a supplementary script that takes Biohansel output data and converts it into a format compatible with Bionumerics.
 
-        **How to run it**
+        **Inputs:**
+
+        - *Individual* output or *Collection* of outputs for any of the three Biohansel results files (tech_results.tab, match_results.tab, or results.tab)
 
-        1. Input any of your BioHansel output files (tech_results.tab, match_results.tab, and results.tab)
-        2. Click Execute
+        **Outputs:**
 
-        **Specific modifications done on the data**
+        - A .CSV file or a collection of .CSV files called "*Output*" that can be renamed and downloaded as required.
+
+        **Specific modifications done to the data**
 
         1. Converts all commas in the output to "/"
-        2. Shortens BioHansel qc_messages if they are over 150 characters
-        3. Converts the .tab file to a .csv file
+        2. Splits Biohansel qc_message column into multiple columns if the message is longer than 150 characters
+        3. Converts the .tab or .tsv file to a .csv file
 
     ]]></help>
     <citations>
--- a/bionumeric_converter.py	Mon Mar 18 13:15:57 2019 -0400
+++ b/bionumeric_converter.py	Mon May 13 12:59:15 2019 -0400
@@ -14,7 +14,7 @@
         '-f',
         '--filename',
         required=True,
-        help='Specify your tsv input')
+        help='Specify your biohansel tsv or other tabular separated input')
     parser.add_argument(
         '-o',
         '--output',
@@ -24,30 +24,27 @@
     tsv_file = args.filename
     out_name = args.output
 
-    no_comma_tsv = comma_remover(tsv_file)
-    df = qc_shortener(no_comma_tsv)
-    df.to_csv(out_name, index=False)
-
-# Remove comma function:
-
+    df_input = pd.read_csv(tsv_file, sep='\t')
 
-def comma_remover(tsv_file):
-    # Create a table from the tsv file as an input into the dataframe.
-    df = pd.read_csv(tsv_file, sep='\t')
-    # Change all commas to / in the QC message
-    no_comma_tsv = df.replace(',', '/', regex=True)
-    return no_comma_tsv
+    df_no_comma = df_input.replace(',', '/', regex=True)
+    df = qc_shortener(df_no_comma)
+    df.to_csv(out_name, index=False)
 
 # Shorten QC results:
 
 
+def splittingstrings(string, length):
+    return (string[0+i:length+i] for i in range(0, len(string), length))
+
+
 def qc_shortener(df):
-    for count in df.index:
-        message = str(df.at[count, 'qc_message'])
+    for i, row in df.iterrows():
+        message = str(row['qc_message'])
         if len(message) > 150:
-            results = message.find('|')
-            new_message = "Truncated after first '|' : " + message[0:results]
-            df['qc_message'] = df['qc_message'].replace(message, new_message)
+            message_list = list(splittingstrings(message, 150))
+            df.at[i, 'qc_message'] = message_list[0]
+            for val in range(1, len(message_list)):
+                df.at[i, 'qc_message_{}'.format(val)] = message_list[val]
     return df
 
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Biohansel_Bionumerics.csv	Mon May 13 12:59:15 2019 -0400
@@ -0,0 +1,4 @@
+sample,subtype,avg_tile_coverage,qc_status,qc_message,qc_message_1
+SRR1645238,1.3,43.345,PASS,,
+SRR1753252,1.1,32.33,PASS,FAIL: This is a test of the cut off system. The data is good and as such I have to manually type this message in to get it to cut off. I am adding in ,5 comas /////
+SRR1928313,1.1.1,555.11,PASS,,
--- a/test-data/Output.csv	Mon Mar 18 13:15:57 2019 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-sample,scheme,scheme_version,subtype,all_subtypes,tiles_matching_subtype,are_subtypes_consistent,inconsistent_subtypes,n_tiles_matching_all,n_tiles_matching_all_expected,n_tiles_matching_positive,n_tiles_matching_positive_expected,n_tiles_matching_subtype,n_tiles_matching_subtype_expected,file_path,avg_tile_coverage,qc_status,qc_message
-2019C-111,heidelberg,0.5.0,2.2.3.1.2,2; 2.2; 2.2.3; 2.2.3.1; 2.2.3.1.2,2.2.3.1.2,True,,202,202,14,14,3,3,['2019C-111_1.fastq'/ '2019C-111_2.fastq'],30.07,PASS,Truncated after first '|' : This is a trial to the cut /off/ system as this data all passed the checks. 
--- a/test-data/results.tab	Mon Mar 18 13:15:57 2019 -0400
+++ b/test-data/results.tab	Mon May 13 12:59:15 2019 -0400
@@ -1,2 +1,4 @@
-sample	scheme	scheme_version	subtype	all_subtypes	tiles_matching_subtype	are_subtypes_consistent	inconsistent_subtypes	n_tiles_matching_all	n_tiles_matching_all_expected	n_tiles_matching_positive	n_tiles_matching_positive_expected	n_tiles_matching_subtype	n_tiles_matching_subtype_expected	file_path	avg_tile_coverage	qc_status	qc_message
-2019C-111	heidelberg	0.5.0	2.2.3.1.2	2; 2.2; 2.2.3; 2.2.3.1; 2.2.3.1.2	2.2.3.1.2	True		202	202	14	14	3	3	['2019C-111_1.fastq', '2019C-111_2.fastq']	30.070	PASS	This is a trial to the cut ,off, system as this data all passed the checks. | I will attemp to get 150 characters into here in a way that is not awful and sounds decent. We can try counting the letters and as of now, it should be ok!
+sample	subtype	avg_tile_coverage	qc_status	qc_message
+SRR1645238	1.3	43.345	PASS
+SRR1753252	1.1	32.33	PASS	"FAIL: This is a test of the cut off system. The data is good and as such I have to manually type this message in to get it to cut off. I am adding in 5 comas ,,,,,"
+SRR1928313	1.1.1	555.11	PASS
:i maīhF|mG}2QS8)HjO]W6\+ژd>nLP*k]ȋyLkdnfLRCNӍ%Tauh>[}rh> {5\?pɴrA/݀uv:lx<VnI%.rmN&ڽlѥmUt,o8)HAǛlne;r\kAZV <(J)^SsL՚ mu=|e3b2)nd%R}-/G ,x٬3!_K%mwr6D9SlSDgzf0N^$#VlOGND3-hVGÙ넭WJҠWKۧKTʜޏө{Cs`>nh/f`,ȰXޓ:/ #C0 xS1V\f,0TÕd,^88r6P_fk(ywUu|${ nR0psϮ%"<`Wwep|XEfaIOWTֳm+ iVVmP2GYް3AI)=, l*S -{K?V/Z]!P>w;mȶS2Q*׊B ds ^^`uf3Ҹ~& DkDtԧ-fFq~|j}% L{VdN&Go |R^gA]52T8FԟoY3:zmĒk=mEfRz)踄۶T:`ƛ׶kMS{׶NvN3=m+x }M潭*}-d/ u'G?d2v rqvw_@8m^i0]`3n'4ΎӭȂs2c+U. G^ (qXz_=/ So?^TmW5d? rm#!z'6Y|;g)Tw uP=2-~(HڪUnο-.W5z,2lpUU׍pUlO$Q3c>kG^ Bm`͗k3>|22{ۃ>]=j]BAe:<|YQڪxinZuk[j/m+b2*B-74pfJdk~`hC"QH 8\F3}O9Uҩ8঄ Uʺ:Y fkhPftagr>!\qԵlJpl!E'̓u9^Bz FV lsZuWyUnҔ $}~X/lnj'A&\SwN5 Ɗ+/m-$&ˠps\h꠮Mܒ-$`,}?2QⵐD$ڃw^ӝ#(1p DiD}J໣-Me|]g"xMw*TwB^5'"%!;Z!I >ă& L7&ӂx$"% Aejj0OhNt*%_`3C&ÝmA뙥)=P `RSuS>!F^~C9)U.q(Pft)o }GGۤ( wAmJY'=Whh*Օhn''_VyLů)ڼae= j2=#걋Ez6WN8~y.3~,%\9O0s9uh2A>J=/|8M KzJZ^eZu ~Q]kUwo;Qb^f(VPgA[ mԈurü+WT}-T?lkCu2E rfM ,"]uOfnQh.>W5^]V p &;I?0ÁxazޓX^W>tExmm_fmo0=r2Å FX1-†?ّM`gmJp 6[0=D+)xAUrs A&rÅ ^ꉧNm^;ߠE-lB3H<^Q |J]$Zע6L1o2/W'X)N z3/3ڄzyœc&% F Az<7粰 Eb2C!&ړ2]OMz|)ZHԤA83]`le@{nʿ{woݞu/ˌn25:|2r^#xOz}/2r"!6lՉ&a3NS`U=I=>z 0A^vlNϸF^Kt 16<}<2cbO7Z'=g}:G~2kا!)QTU]B:LDžs" >h_Nq#kJ!VM`ؗmwC{FV E)ّ@I9*ی,"