Mercurial > repos > greg > vsnp_sample_names
changeset 3:fb3defef50e5 draft
Uploaded
author | greg |
---|---|
date | Sun, 03 Jan 2021 15:54:36 +0000 |
parents | a56648c94fd3 |
children | 67e3fa4bbc53 |
files | macros.xml vsnp_sample_names.xml |
diffstat | 2 files changed, 93 insertions(+), 60 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Sun Jan 03 15:54:36 2021 +0000 @@ -0,0 +1,24 @@ +<?xml version='1.0' encoding='UTF-8'?> +<macros> + <token name="@WRAPPER_VERSION@">1.0</token> + <token name="@PROFILE@">19.09</token> + <xml name="param_reference_source"> + <param name="reference_source" type="select" label="Choose the source for the reference genome"> + <option value="cached" selected="true">locally cached</option> + <option value="history">from history</option> + </param> + </xml> + <xml name="citations"> + <citations> + <citation type="bibtex"> + @misc{None, + journal = {None}, + author = {1. Stuber T}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/USDA-VS/vSNP},} + </citation> + </citations> + </xml> +</macros> +
--- a/vsnp_sample_names.xml Tue Oct 27 18:26:58 2020 +0000 +++ b/vsnp_sample_names.xml Sun Jan 03 15:54:36 2021 +0000 @@ -1,91 +1,100 @@ -<tool id="vsnp_sample_names" name="vSNP: sample names" version="1.0.0"> +<tool id="vsnp_sample_names" name="vSNP: sample names" version="@WRAPPER_VERSION@.1" profile="@PROFILE@"> <description></description> + <macros> + <import>macros.xml</import> + </macros> <command detect_errors="exit_code"><![CDATA[ #import os #import re -#set output_dir = 'output' -mkdir -p $output_dir -#if str($input_type_cond.input_type) == "single": - ## We may have a single read or a pair, but in - ## either case we want the same base file name. - #set sample_name = $os.path.basename($input_type_cond.read.element_identifier) - #if $sample_name.find(".") > 0: - #set sample_name = $sample_name.split(".")[0] - #end if - #if $sample_name.find("_") > 0: - #set sample_name = $sample_name.split("_")[0] + +#set sample_name_read2 = None + +#if $input_type_cond.input_type in ['single', 'pair']: + #set read1 = $input_type_cond.read1 + #set sample_name = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) +#else: + #set read1_filename = $input_type_cond.reads_collection['forward'].name + #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_filename)) +#end if + +#if $sample_name.find('_R1') >0: + ## Something like CMC_20E1_R1.fastq.gz + #set sample_name = $sample_name.split('_R1')[0] +#else if $sample_name.find(".") > 0: + #if $read1.is_of_type('fastqsanger.gz'): + ## Something like my_sample.fastq.gz + #set sample_name = '.'.join($sample_name.split('.')[0:-2]) + #else: + ## Something like my_sample.fastq + #set sample_name = $os.path.splitext($sample_name)[0] #end if - && echo '$sample_name' > '$output' -#else: - #for $i in $input_type_cond.reads_collection: - #set sample_name = $os.path.basename($i.element_identifier) - #if $sample_name.find(".") > 0: - #set sample_name = $sample_name.split(".")[0] - #end if - #set output_file = $os.path.join($output_dir, $sample_name) - && echo '$sample_name' > '$output_file' - #end for +#else if $sample_name.find("_") > 0: + #if $read1.is_of_type('fastqsanger.gz'): + ## Something like my_sample_fastq_gz + #set sample_name = '_'.join($sample_name.split('_')[0:-2]) + #else: + ## Something like my_sample_fastq + #set sample_name = "_".join($sample_name.split("_")[0:-1]) + #end if #end if +echo '$sample_name' > '$output' ]]></command> <inputs> <conditional name="input_type_cond"> <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> - <option value="single" selected="true">Single files</option> - <option value="collection">Collections of files</option> + <option value="single" selected="true">Single dataset</option> + <option value="pair">Dataset pair</option> + <option value="paired">List of dataset pairs</option> </param> <when value="single"> - <param name="read" type="data" format="fastqsanger.gz,fastqsanger" label="Sample file"/> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> </when> - <when value="collection"> - <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="list" label="Collection of sample files"/> + <when value="paired"> + <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> + </when> + <when value="pair"> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> + <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> </when> </conditional> </inputs> <outputs> - <data name="output" format="txt"> - <filter>input_type_cond['input_type'] == 'single'</filter> - </data> - <collection name="output__collection" type="list"> - <discover_datasets pattern="__name__" directory="output" format="txt" /> - <filter>input_type_cond['input_type'] == 'collection'</filter> - </collection> + <data name="output" format="txt"/> </outputs> <tests> + <!-- Single files --> <test> - <param name="input_type" value="collection"/> + <param name="input_type" value="single"/> + <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> + <output name="output" file="sample_names.txt" ftype="txt"/> + </test> + <!-- Paired reads --> + <test> + <param name="input_type" value="paired"/> <param name="reads_collection"> - <collection type="list"> - <element name="BCG_Danish_Human_UK_SRR9596061.fastq" value="BCG_Danish_Human_UK_SRR9596061.fastq" dbkey="89"/> - <element name="Dassie_Dassie_ZA_SRR3745455.fastq" value="Dassie_Dassie_ZA_SRR3745455.fastq" dbkey="89"/> - <element name="Mbov_Cattle_NI_SRR10993937.fastq" value="Mbov_Cattle_NI_SRR10993937.fastq" dbkey="89"/> + <collection type="paired"> + <element name="forward" value="CMC_20E1_R1.fastq.gz"/> + <element name="reverse" value="CMC_20E1_R2.fastq.gz"/> </collection> </param> - <output_collection name="output__collection" type="list"> - <element name="BCG_Danish_Human_UK_SRR9596061" file="BCG_Danish_Human_UK_SRR9596061" ftype="txt"/> - <element name="Dassie_Dassie_ZA_SRR3745455" file="Dassie_Dassie_ZA_SRR3745455" ftype="txt"/> - <element name="Mbov_Cattle_NI_SRR10993937" file="Mbov_Cattle_NI_SRR10993937" ftype="txt"/> - </output_collection> + <output name="output" file="sample_names.txt" ftype="txt"/> + </test> + <!-- Paired reads in separate datasets --> + <test> + <param name="input_type" value="pair"/> + <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> + <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/> + <output name="output" file="sample_names.txt" ftype="txt"/> </test> </tests> <help> **What it does** -Accepts one or more sample files and extracts a unique portion of the file name as the content of the output file(s). These -text files are then used as workflow parameter values for the Read Group Identifier parameter in the bwa-mem tool. - -**Required Options** - - * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single or paired fastqsanger reads or collections of fastqsanger reads) based on the selected option. +Accepts fastqsanger sample files, extracts a unique portion of the file name as the sample name, and writes it to +the output. The output text file can be consumed by the **Parse parameter value** expression tool to provide workflow +parameter values to the **Read group identifier (ID)** and the **Sample name identifier (SM)** parameters in the +**Map with BWA-MEM** tool. </help> - <citations> - <citation type="bibtex"> - @misc{None, - journal = {None}, - author = {1. Stuber T}, - title = {Manuscript in preparation}, - year = {None}, - url = {https://github.com/USDA-VS/vSNP},} - </citation> - </citations> + <expand macro="citations"/> </tool>