Mercurial > repos > greg > sample_names
diff sample_names.xml @ 0:54380a6171b8 draft default tip
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/misc/sample_names commit b935f36cfe430263564503ebb71f78dc79315acb"
author | greg |
---|---|
date | Mon, 22 Nov 2021 00:30:39 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sample_names.xml Mon Nov 22 00:30:39 2021 +0000 @@ -0,0 +1,102 @@ +<tool id="sample_names" name="Extract sample names" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>from input file names</description> + <macros> + <import>macros.xml</import> + </macros> + <command detect_errors="exit_code"><![CDATA[ +#import difflib +#import re + +#if $input_type_cond.input_type == 'single': + #set read1 = $input_type_cond.read1 + #set sample_name = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) +#else if $input_type_cond.input_type == 'pair': + #set read1 = $input_type_cond.read1 + #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) + #set read2 = $input_type_cond.read2 + #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) + #set matches = difflib.SequenceMatcher(None, read1_identifier, read2_identifier).get_matching_blocks() + #set match = $matches[0] + #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_identifier[match.a:match.a + match.size])) +#else: + #set read1_name = $input_type_cond.reads_collection['forward'].name + #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1_name)) + #set read2_name = $input_type_cond.reads_collection['reverse'].name + #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2_name)) + #set matches = difflib.SequenceMatcher(None, read1_identifier, read2_identifier).get_matching_blocks() + #set match = $matches[0] + #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_identifier[match.a:match.a + match.size])) +#end if + +echo '$sample_name' > '$output' +]]></command> + <inputs> + <conditional name="input_type_cond"> + <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> + <option value="single" selected="true">Single dataset</option> + <option value="pair">Dataset pair</option> + <option value="paired">List of dataset pairs</option> + </param> + <when value="single"> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> + </when> + <when value="pair"> + <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> + <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> + </when> + <when value="paired"> + <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="output" format="txt"/> + </outputs> + <tests> + <!-- Single files --> + <test> + <param name="input_type" value="single"/> + <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> + <output name="output" file="sample_names.txt" ftype="txt"/> + </test> + <!-- Paired reads in separate datasets --> + <test> + <param name="input_type" value="pair"/> + <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> + <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/> + <output name="output" file="sample_names2.txt" ftype="txt"/> + </test> + <!-- Collection of Paired reads --> + <test> + <param name="input_type" value="paired"/> + <param name="reads_collection"> + <collection type="paired"> + <element name="forward" value="CMC_20E1_R1.fastq.gz"/> + <element name="reverse" value="CMC_20E1_R2.fastq.gz"/> + </collection> + </param> + <output name="output" file="sample_names3.txt" ftype="txt"/> + </test> + <!-- Collection of Paired reads --> + <test> + <param name="input_type" value="paired"/> + <param name="reads_collection"> + <collection type="paired"> + <element name="forward" value="SRR14085881_forward"/> + <element name="reverse" value="SRR14085881_reverse"/> + </collection> + </param> + <output name="output" file="sample_names4.txt" ftype="txt"/> + </test> + </tests> + <help> +**What it does** + +Accepts fastqsanger sample files, extracts a unique portion of the file name as the sample name, and writes it to +the output. The output text file can be consumed by the **Parse parameter value** expression tool to provide workflow +parameter values to the **Read group identifier (ID)** and the **Sample name identifier (SM)** parameters in the +**Map with BWA-MEM** tool. + </help> + <expand macro="citations"/> +</tool> +