diff convert_extract_sequence_file.xml @ 0:01c2b74b3a21 draft

planemo upload commit 23ef4b1699065b4f6200c58328bfecfb33dd7fd1-dirty
author bebatut
date Tue, 26 Apr 2016 08:18:18 -0400
parents
children 158642ce204f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/convert_extract_sequence_file.xml	Tue Apr 26 08:18:18 2016 -0400
@@ -0,0 +1,249 @@
+<tool id="convert_extract_sequence_file" name="Convert/ Extract information" version="1.0.0">
+
+	<description>from a sequence file, with possible constraints</description>
+
+    <macros>
+        <xml name="extraction_option">
+            <param name="to_extract" type="select" display="checkboxes" multiple="true" label="Information to extract" help="">
+                <option value="id">Identifiant</option>
+                <option value="length">Length</option>
+                <validator type="no_options" message="Select at least one information to extract"/>
+            </param>
+        </xml>
+
+        <xml name="extraction_test">
+            <param name='specific_extraction' type="select" label="Extract specific information?" help="If no is selected, a sequence file is generated. If yes, a text file containing the wanted information is generated">
+                <option value="True">Yes</option>
+                <option value="False" selected="true">No</option>
+            </param>
+        </xml>
+    </macros>
+
+	<requirements>
+  	</requirements>
+
+    <stdio>
+    </stdio>
+
+    <version_command>
+    </version_command>
+
+  	<!--<command>-->
+    <command><![CDATA[
+  		python $__tool_directory__/convert_extract_sequence_file.py 
+      		--input $sequence_file_format.sequence_file
+
+            --custom_extraction_type $sequence_file_format.extraction.specific_extraction
+
+            #if $sequence_file_format.extraction.specific_extraction == "True":
+                --to_extract "{$sequence_file_format.extraction.to_extract}"
+                --output_information $information_file
+            #else if $sequence_file_format.format=="fastq":
+                --split $sequence_file_format.extraction.split.split_test
+                #if $sequence_file_format.extraction.split.split_test :
+                    --quality_format $sequence_file_format.extraction.split.quality_format
+                    --output_sequence $fasta_sequence_file_from_fastq
+                    --output_quality $quality_file
+                #else:
+                    --output_sequence $fastq_sequence_file
+                #end if
+            #else:
+                --output_sequence $fasta_sequence_file
+            #end if
+                
+    		#if $constraints.constrained_extraction == "True" :
+                #for $i, $constrain in enumerate( $constraints.constraint_definition )
+                	#set info_to_constrain=$constrain.constrained_information['info_to_constrain']             
+                    #if $info_to_constrain in ("id"):
+                    	--constraint "$info_to_constrain:
+                    	${constrain.constrained_information.constraint_type.type}:
+                    	${constrain.constrained_information.constraint_type.value}"
+                    #else:
+                    	#for $j, $sub_constrain in enumerate( $constrain.constrained_information.constraint_definition )
+                    		--constraint "$info_to_constrain:
+                    		${sub_constrain.type}:
+                    		${sub_constrain.value}"
+                        #end for
+                    #end if
+                #end for
+            #end if
+
+            --report $report_filepath
+            --format $sequence_file_format.format
+        ]]>
+  	</command>
+
+  	<inputs>
+        <conditional name="sequence_file_format">
+        	<param name="format" type="select" display="radio" 
+                label="Format of the sequence file" help="">
+	            <option value="fasta">Fasta</option>
+                <option value="fastq">FastQ</option>
+	        </param>
+            <when value="fastq">
+                <param name="sequence_file" type="data" format="fastq" 
+                    label="Sequence file" help=""/>
+                <conditional name="extraction">
+                    <expand macro="extraction_test"/> 
+
+                    <when value="True">
+                        <expand macro="extraction_option"/> 
+                    </when>
+                    <when value="False">
+                        <conditional name="split">
+                            <param name='split_test' type="select" label="Split file into sequence and quality files?" help="If yes is selected, a fasta and a quality file are generated. If no, a fastq file is generated">
+                                <option value="True" selected="true">Yes</option>
+                                <option value="False">No</option>
+                            </param>
+
+                            <when value="True">
+                                <param name="quality_format" type="select" display="radio" label="Coding of quality scores?" help="">
+                                    <option value="sanger" selected="true">Sanger (Phred+33)</option>
+                                    <option value="solexa">Solexa (Solexa+64) </option>
+                                    <option value="illumina_1_3">Illumina 1.3+ (Phred+64) </option>
+                                    <option value="illumina_1_5">Illumina 1.5+ (Phred+64) </option>
+                                    <option value="illumina_1_8">Illumina 1.8+ (Phred+33) </option>
+                                </param>
+                            </when>
+                            <when value="False" />
+                        </conditional> 
+                    </when>
+                </conditional> 
+            </when>
+
+            <when value="fasta">
+                <param name="sequence_file" type="data" format="fasta" 
+                    label="Sequence file" help=""/>
+                <conditional name="extraction">
+                    <expand macro="extraction_test"/> 
+
+                    <when value="True">
+                        <expand macro="extraction_option"/>
+                    </when>
+                    <when value="False" />
+                </conditional> 
+            </when>
+        </conditional>
+        
+        <conditional name="constraints">
+            <param name='constrained_extraction' type='select' label="Constrain extraction?" help="">
+                <option value="True">Yes</option>
+                <option value="False" selected="true">No</option>
+            </param>
+
+            <when value="True">
+                <repeat name="constraint_definition" title="Constraints on sequences" min="1">
+                    <conditional name="constrained_information">
+                        <param name="info_to_constrain" type="select" label="Information to constrain" help="">
+                            <option value="id">Identifiant</option>
+                            <option value="length">Length</option>
+                        </param>
+                        <when value="id">
+                            <conditional name="constraint_type">
+                                <param name="type" type="select" display="radio" label="Type of constraint" help="">
+                                    <option value="equal">Equal a value</option>
+                                    <option value="in">In a list</option>
+                                    <option value="not_in">Not in a list</option>
+                                </param>
+                                <when value="equal">
+                                    <param name="value" type="text" size="200" label="Equal to" help=""/>
+                                        <validator type="empty_field" message="Give a value"/>
+                                </when>
+                                <when value="in">
+                                    <param format="txt" name="value" type="data" label="List of constraint" help="Text file with a value per line and nothing else"/>
+                                        <validator type="unspecified_build" message="Select a file"/>
+                                </when>
+                                <when value="not_in">
+                                    <param format="txt" name="value" type="data" label="List of constraint" help="Text file with a value per line and nothing else"/>
+                                        <validator type="unspecified_build" message="Select a file"/>
+                                </when>
+                            </conditional>
+                        </when>  
+                        <when value="length">
+                            <repeat name="constraint_definition" title="Constraint on sequence length" min="1">
+                                <param name="type" type="select" label="Type of constraint" help="">
+                                    <option value="equal">Equal to </option>
+                                    <option value="lower">Lower than </option>
+                                    <option value="strictly_lower">Strictly lower than </option>
+                                    <option value="greater">Greater than </option>
+                                    <option value="strictly_greater">Strictly greater than </option>
+                                </param>
+                                <param name="value" type="integer" min="0" max="3000" value="100" label="Value" help=""/>
+                            </repeat>
+                        </when>  
+                    </conditional>
+                </repeat>
+            </when>
+            <when value="False" />
+        </conditional> />
+  	</inputs>
+
+  	<outputs>
+        <data format="txt" name="information_file" 
+            label="${tool.name} on ${on_string}: Information">
+            <filter>((sequence_file_format['extraction']['specific_extraction'] == "True" ))</filter>
+        </data>
+
+        <data format="fasta" name="fasta_sequence_file"
+            label="${tool.name} on ${on_string}: Extracted sequences" >
+            <filter>((sequence_file_format['format'] == 'fasta' and not sequence_file_format['extraction']['specific_extraction']== "True" ))</filter>
+        </data>
+
+        <data format="fastq" name="fastq_sequence_file"
+            label="${tool.name} on ${on_string}: Extracted sequences">
+            <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "False" ))</filter>
+        </data>
+
+        <data format="qual" name="quality_file" 
+            label="${tool.name} on ${on_string}: Extracted quality">
+            <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "True" ))</filter>
+        </data>
+
+        <data format="fasta" name="fasta_sequence_file_from_fastq"
+            label="${tool.name} on ${on_string}: Extracted sequences">
+            <filter>((sequence_file_format['format'] == 'fastq' and sequence_file_format['extraction']['specific_extraction'] == "False" and sequence_file_format['extraction']['split']['split_test'] == "True" ))</filter>
+        </data>
+
+        <data format="txt" name="report_filepath" 
+            label="${tool.name} on ${on_string}: Report"/>
+  	</outputs>
+
+  	<tests>
+        <test>
+            <param name="format" value="fasta"/>
+            <param name="sequence_file" value="input_sequence_file.fasta"/>
+            <param name="specific_extraction" value="True" />
+            <param name="to_extract" value="length" />
+            <param name="constrained_extraction" value="False" />
+            <output name="information_file" file="information_lenght_fasta_output.txt"/>
+            <output name="report_filepath" file="report_length_fasta_output.txt"/>
+        </test>
+        <test>
+            <param name="format" value="fastq"/>
+            <param name="sequence_file" value="input_sequence_file.fastq"/>
+            <param name="specific_extraction" value="False" />
+            <param name="split_test" value="True" />
+            <param name="quality_format" value="illumina_1_3" />
+            <param name="constrained_extraction" value="False" />
+            <output name="quality_file" file="extracted_quality_illumina_1_3_fastq_output.qual"/>
+            <output name="fasta_sequence_file_from_fastq" file="extracted_sequences_illumina_1_3_fastq_output.fasta"/>
+            <output name="report_filepath" file="report_illumina_1_3_fastq_output.txt"/>
+        </test>
+  	</tests>
+
+  	<help><![CDATA[
+
+**What it does**
+
+This tool extracts information (sequences, id, length, ...) from sequence files or convert a FastQ file to Fasta file.
+
+Some constraints could be added to extraction/conversion. For example, only sequences with more than 30 bp could be extracted. Or, a sequences whose the identifiant is in a list. 
+
+The input is a sequence file in fasta or fastq format. The tool generates different outputs given the chosen parameters.
+]]>
+  	</help>
+
+    <citations>
+    </citations>
+</tool>
+