Mercurial > repos > greg > vsnp_sample_names
changeset 7:e1cb13d6a82c draft
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_sample_names commit f85ae0f073297ca7f5dfa22ae3a24f5fc2e6bad6"
author | greg |
---|---|
date | Fri, 03 Sep 2021 17:21:52 +0000 |
parents | 7514b1e55372 |
children | 4f43f163c408 |
files | test-data/SRR14085881_forward test-data/SRR14085881_reverse test-data/sample_names2.txt vsnp_sample_names.xml |
diffstat | 4 files changed, 37 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sample_names2.txt Fri Sep 03 17:21:52 2021 +0000 @@ -0,0 +1,1 @@ +SRR14085881
--- a/vsnp_sample_names.xml Wed Aug 04 12:46:56 2021 +0000 +++ b/vsnp_sample_names.xml Fri Sep 03 17:21:52 2021 +0000 @@ -4,16 +4,24 @@ <import>macros.xml</import> </macros> <command detect_errors="exit_code"><![CDATA[ +#import difflib #import os #import re -#set sample_name_read2 = None - -#if $input_type_cond.input_type in ['single', 'pair']: +#if $input_type_cond.input_type == 'single': + #set read1 = $input_type_cond.read1 + #set sample_name = re.sub('[^\s\w\-\\.]', '_', str($read1.element_identifier)) +#else if $input_type_cond.input_type == 'pair': #set read1 = $input_type_cond.read1 - #set sample_name = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) + #set read1_identifier = re.sub('[^\s\w\-]', '_', str($read1.element_identifier)) + #set read2 = $input_type_cond.read2 + #set read2_identifier = re.sub('[^\s\w\-]', '_', str($read2.element_identifier)) + #set matches = difflib.SequenceMatcher(None, read1_identifier, read2_identifier).get_matching_blocks() + #set match = $matches[0] + #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_identifier[match.a:match.a + match.size])) #else: - #set read1_filename = $input_type_cond.reads_collection['forward'].name + #set read1 = $input_type_cond.reads_collection['forward'] + #set read1_filename = $read1.name #set sample_name = re.sub('[^\s\w\-]', '_', str($read1_filename)) #end if @@ -21,7 +29,7 @@ ## Something like CMC_20E1_R1.fastq.gz #set sample_name = $sample_name.split('_R1')[0] #else if $sample_name.find(".") > 0: - #if $read1.is_of_type('fastqsanger.gz'): + #if $read1.is_of_type('fastqsanger.gz') and $sample_name.endswith('gz'): ## Something like my_sample.fastq.gz #set sample_name = '.'.join($sample_name.split('.')[0:-2]) #else: @@ -29,7 +37,7 @@ #set sample_name = $os.path.splitext($sample_name)[0] #end if #else if $sample_name.find("_") > 0: - #if $read1.is_of_type('fastqsanger.gz'): + #if $read1.is_of_type('fastqsanger.gz') and $sample_name.endswith('gz'): ## Something like my_sample_fastq_gz #set sample_name = '_'.join($sample_name.split('_')[0:-2]) #else: @@ -37,6 +45,7 @@ #set sample_name = "_".join($sample_name.split("_")[0:-1]) #end if #end if + echo '$sample_name' > '$output' ]]></command> <inputs> @@ -49,13 +58,13 @@ <when value="single"> <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> </when> - <when value="paired"> - <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> - </when> <when value="pair"> <param name="read1" type="data" format="fastqsanger.gz,fastqsanger" label="Read1 fastq file"/> <param name="read2" type="data" format="fastqsanger.gz,fastqsanger" label="Read2 fastq file"/> </when> + <when value="paired"> + <param name="reads_collection" type="data_collection" format="fastqsanger,fastqsanger.gz" collection_type="paired" label="Collection of fastqsanger paired read files"/> + </when> </conditional> </inputs> <outputs> @@ -68,7 +77,14 @@ <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> <output name="output" file="sample_names.txt" ftype="txt"/> </test> - <!-- Paired reads --> + <!-- Paired reads in separate datasets --> + <test> + <param name="input_type" value="pair"/> + <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> + <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/> + <output name="output" file="sample_names.txt" ftype="txt"/> + </test> + <!-- Collection of Paired reads --> <test> <param name="input_type" value="paired"/> <param name="reads_collection"> @@ -79,12 +95,16 @@ </param> <output name="output" file="sample_names.txt" ftype="txt"/> </test> - <!-- Paired reads in separate datasets --> + <!-- Collection of Paired reads --> <test> - <param name="input_type" value="pair"/> - <param name="read1" value="CMC_20E1_R1.fastq.gz" dbkey="89"/> - <param name="read2" value="CMC_20E1_R2.fastq.gz" dbkey="89"/> - <output name="output" file="sample_names.txt" ftype="txt"/> + <param name="input_type" value="paired"/> + <param name="reads_collection"> + <collection type="paired"> + <element name="forward" value="SRR14085881_forward"/> + <element name="reverse" value="SRR14085881_reverse"/> + </collection> + </param> + <output name="output" file="sample_names2.txt" ftype="txt"/> </test> </tests> <help>