changeset 0:f75e2ac7b6cd draft

Uploaded
author greg
date Tue, 21 Apr 2020 10:17:20 -0400
parents
children 895d18fcfebe
files .shed.yml test-data/BCG_Danish_Human_UK_SRR9596061 test-data/BCG_Danish_Human_UK_SRR9596061.fastq test-data/Dassie_Dassie_ZA_SRR3745455 test-data/Dassie_Dassie_ZA_SRR3745455.fastq test-data/Mbov_Cattle_NI_SRR10993937 test-data/Mbov_Cattle_NI_SRR10993937.fastq vsnp_sample_names.xml
diffstat 8 files changed, 133 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Tue Apr 21 10:17:20 2020 -0400
@@ -0,0 +1,11 @@
+name: vsnp_sample_names
+owner: greg
+description: |
+  Contains a tool that outputs a unique portion of the input file name.
+homepage_url: https://github.com/USDA-VS/vSNP
+long_description: |
+  Contains a tool that outputs a unique portion of the input file name.
+remote_repository_url: https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_sample_names
+type: unrestricted
+categories:
+  - Sequence Analysis
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/BCG_Danish_Human_UK_SRR9596061	Tue Apr 21 10:17:20 2020 -0400
@@ -0,0 +1,1 @@
+BCG_Danish_Human_UK_SRR9596061
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/BCG_Danish_Human_UK_SRR9596061.fastq	Tue Apr 21 10:17:20 2020 -0400
@@ -0,0 +1,14 @@
+@SRR9596061.1 1 length=100
+GACCCCGGTTCAGGCTTCACCACAGTGTGGAACGCGGTCGTCTCCGAACTTAACGGCGACCCTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCA
++SRR9596061.1 1 length=100
+??<?BA@D8@F<F<;C?CFGAFAGI?@9:D<?B?@:A;';;1(=A=4;<;AC>AB>@B;755?<<>C:>?C@############################
+@SRR9596061.2 2 length=100
+GCTGAGATTAGCATCACTGCTGGGTCCGTCGTCAACCTTAGGGTCGCCGTTAAGTTCGGAGACGACCGCGTTCCACACTGTGGTGAAGCCTGAACCGGGG
++SRR9596061.2 2 length=100
+?<<++A2=?ADDDEEI9A?>AECEA9CFEEEC??:BDIIEB9?9?@ACDA;;CCCDDD/;=<????;=?>;;034+>>>>AA93<>::A8>2<>A?>9;9
+@SRR9596061.3 3 length=100
+GCTCGATTTCGTTTTGGACAAAGCTGCTCGGCACGGATAACAGAGCAAAACCCTCGACGATGGTCAATGGCTTGACTAAATTGAGCCAAGCCATTTCGCG
++SRR9596061.3 3 length=100
+????D?DDB<D<AAE?1+<+<F9FF3E<91?:)?D@((0.BC4)8@@CD).;@C=A############################################
+@SRR9596061.4 4 length=100
+GAACTTAACGGCGACCCTAAGGTTGACGACGGACCCAGCAGTGATGCTAATCTCAGCGCTCCGCTGACCCCTCAGCAAAGGGCTTGGCTCAATCTCGTCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Dassie_Dassie_ZA_SRR3745455	Tue Apr 21 10:17:20 2020 -0400
@@ -0,0 +1,1 @@
+Dassie_Dassie_ZA_SRR3745455
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Dassie_Dassie_ZA_SRR3745455.fastq	Tue Apr 21 10:17:20 2020 -0400
@@ -0,0 +1,8 @@
+@SRR3745455.1 1 length=251
+ATACAAGACCGAGCTGATCAAACCCGGCAAGCCCTGGCGGTCCATCGAGGATGTCGAGTTGGCCACCGCGCGCTGGGTCGACTGGTTCAACCATCGCCGCCTCTACCAGTACTGCGGCGACGTCCCGCCGGTCGAACTCGAGGCTGCCTACTACGCTCAACGCCAGAGACCAGCCGCCGGCTGAGGTCTCAGATCAGAGAGTCTCCGGACTCACCGGGGCGGTTCAGGTTGTGATTTGATCAGGCATGCCG
++SRR3745455.1 1 length=251
+CCCBCFFFFFCCGGGGGGGGGGGFGGGGGGGHGHHAHHGGGGGHHGGHGGGHHHHHHGHFHGHGFGHGGGFFGGGCGHHGGGGGGHGHHHHHGHHGGFFGGGGGHHHHHHHHHHGGF?CCGGCGGGFGGGGGGFGFFFFDFFFFFFEFFFFFFBFFFFDFFBFFFFFFFBEFFFEBF=B;DCFFFFFBFFFFBFFFBFBFFFFFF0.-DDAFFFFF;--@DF99.00:;;/::0BFFFBFE00;/EFFFF-
+@SRR3745455.2 2 length=251
+GTCTTCGGTGCGGTAGCGCCGAATGTCCAACGGAGAGTGCAAAATACCATGAAGCTCATTCTCACGGCCGATGTCGATCACCTCGGGTCCATCGGCGACACTGTCGAGGTCAAGGACGGGTATGGCCGTAACTTTCTGCTCCCGCGCGGCCTGGCGATCGTCGCCTCGCGCGGAGCCCAGAAGCAGGCTGACGAGATCCGCCGGGCCCGCGAAACCAAAAGCGTACGCGACCTAGAGCACGCCAACGAGAT
++SRR3745455.2 2 length=251
+AAAAAFAB>A?DGEEGGGGGFGGGHGHHFGFGGFGGGFFHFFHHHHHHFHHHHHHHFHFGHHHHHEGGGGGGGHHGFGHEGFFAF?CGGHFFFGGGGGCGGHFDGGCC/CHGHGFHHGGG@DHHFHHGGGGCHHHHHBGGHHGDGC?BCAGGEGGGGFEC9ADC?.ADFFFFFAFFFE?E./BBFFFDFFFEDFBFFF/;.;@@;ACFFF-=;-BFEEEFFFDF?--.;9-9AFBFB/BFFF.D>ED.9..
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Mbov_Cattle_NI_SRR10993937	Tue Apr 21 10:17:20 2020 -0400
@@ -0,0 +1,1 @@
+Mbov_Cattle_NI_SRR10993937
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/Mbov_Cattle_NI_SRR10993937.fastq	Tue Apr 21 10:17:20 2020 -0400
@@ -0,0 +1,6 @@
+@M00963:204:000000000-B953L:1:1102:18443:7739 1:N:0:16
+GCACCTGAGATTGACACCGCCCGCGAATACGAAGTAACCGCCGAATACCAGTCCTGGCGGGCCGTCTAGGGAAGCGACGCAGCATTACTTACGGTCGGCGTCGGGATAGGCCCGGCCATCCCCCTCGGGTGGCTCACGTTAGCGAACCGGCACCCGGACAAACCTGGGGCGGCCGCGAACCCAGCCCCTGCGGGGCTAACAACACGGTCCGCGCCCACCGCCGCCCCCCCGTCAACCGAGCCAATACCACA
++
+AAAAAFFFBFFFG11G11E1A0EGCGG?/1A//AFD2AAE/A/A>/FFGBCGHBFGHFGG////>/0/21?BGA///><</ACGDF1?111=<.0.------;---.;////.-:@--/9....-;@A@-@--9//;9--;9;---@-9-9-9--9--9---;9/9-------@-----;@-----;A--9---9-9-/;BB9-;9-9B-9-------------@----;-;A--/---9;@/-//99//;
+@M00963:204:000000000-B953L:1:1102:15355:7851 1:N:0:16
+GTTCGGCGCGCCGGTGGGTCCTGATCCGCTGGCGGGTTTCGTCAACGACAGCGGCCTGCGACCCCGATATCCCGCGATTGAGATCTACCGGGTGAGCGCGCCCGCCAACCCCGGGGCGCCCTACTTCGCCGCGACCGACCAGCTCGCCCGCGTCGACGGCGGACCCGAGGTCCTGCTGCGGCTGGACGAACGACGCCGGCTGCAGGGCCAGCCCCCCCTGGGGCCGGTGCTGATGACCGCGGACGCCCGAG
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vsnp_sample_names.xml	Tue Apr 21 10:17:20 2020 -0400
@@ -0,0 +1,91 @@
+<tool id="vsnp_sample_names" name="vSNP: sample names" version="1.0.0">
+    <description></description>
+    <command detect_errors="exit_code"><![CDATA[
+#import os
+#import re
+#set output_dir = 'output'
+mkdir -p $output_dir
+#if str($input_type_cond.input_type) == "single":
+    ## We may have a single read or a pair, but in
+    ## either case we want the same base file name.
+    #set sample_name = $os.path.basename($input_type_cond.read.element_identifier)
+    #if $sample_name.find(".") > 0:
+        #set sample_name = $sample_name.split(".")[0]
+    #end if
+    #if $sample_name.find("_") > 0:
+        #set sample_name = $sample_name.split("_")[0]
+    #end if
+    echo $sample_name > $output
+#else:
+    #for $i in $input_type_cond.reads_collection:
+        #set sample_name = $os.path.basename($i.element_identifier)
+        #if $sample_name.find(".") > 0:
+            #set sample_name = $sample_name.split(".")[0]
+        #end if
+        #set output_file = $os.path.join($output_dir, $sample_name)
+        && echo $sample_name > $output_file
+    #end for
+#end if
+]]></command>
+    <inputs>
+        <conditional name="input_type_cond">
+            <param name="input_type" type="select" label="Choose the category of the files to be analyzed">
+                <option value="single" selected="true">Single files</option>
+                <option value="collection">Collections of files</option>
+            </param>
+            <when value="single">
+                <param name="read" type="data" format="fastqsanger.gz,fastqsanger" label="Sample file"/>
+            </when>
+            <when value="collection">
+                <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of sample files"/>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output" format="txt">
+            <filter>input_type_cond['input_type'] == 'single'</filter>
+        </data>
+        <collection name="output__collection" type="list">
+            <discover_datasets pattern="__name__" directory="output" format="txt" />
+            <filter>input_type_cond['input_type'] == 'collection'</filter>
+        </collection>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input_type" value="collection"/>
+            <param name="reads_collection">
+                <collection type="list">
+                    <element name="BCG_Danish_Human_UK_SRR9596061.fastq" value="BCG_Danish_Human_UK_SRR9596061.fastq" dbkey="89"/>
+                    <element name="Dassie_Dassie_ZA_SRR3745455.fastq" value="Dassie_Dassie_ZA_SRR3745455.fastq" dbkey="89"/>
+                    <element name="Mbov_Cattle_NI_SRR10993937.fastq" value="Mbov_Cattle_NI_SRR10993937.fastq" dbkey="89"/>
+                </collection>
+            </param>
+            <output_collection name="output__collection" type="list">
+                <element name="BCG_Danish_Human_UK_SRR9596061" file="BCG_Danish_Human_UK_SRR9596061" ftype="txt"/>
+                <element name="Dassie_Dassie_ZA_SRR3745455" file="Dassie_Dassie_ZA_SRR3745455" ftype="txt"/>
+                <element name="Mbov_Cattle_NI_SRR10993937" file="Mbov_Cattle_NI_SRR10993937" ftype="txt"/>
+            </output_collection>
+        </test>
+    </tests>
+    <help>
+**What it does**
+
+Accepts one or more sample files and extracts a unique portion of the file name as the content of the output file(s).  These
+text files are then used as workflow parameter values for the Read Group Identifier parameter in the bwa-mem tool.
+
+**Required Options**
+
+ * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option.
+    </help>
+    <citations>
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {1. Stuber T},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {https://github.com/USDA-VS/vSNP},}
+        </citation>
+    </citations>
+</tool>
+