comparison repex_full_clustering.xml @ 2:968f0867acc5 draft

documentation and help update
author petr-novak
date Mon, 03 Feb 2020 02:34:46 -0500
parents e2b8e71b85b9
children 67964b619af8
comparison
equal deleted inserted replaced
1:e6fb0f2b2097 2:968f0867acc5
4 <regex match="Traceback" source="stderr" level="fatal" description="Unknown error" /> 4 <regex match="Traceback" source="stderr" level="fatal" description="Unknown error" />
5 <regex match="error" source="stderr" level="fatal" description="Unknown error" /> 5 <regex match="error" source="stderr" level="fatal" description="Unknown error" />
6 <regex match="Warning" source="stderr" level="warning" description="Unknown error" /> 6 <regex match="Warning" source="stderr" level="warning" description="Unknown error" />
7 <exit_code range="1:" level="fatal" description="Error" /> 7 <exit_code range="1:" level="fatal" description="Error" />
8 </stdio> 8 </stdio>
9 <description>Improved version or repeat discovery and characterization using graph based sequence clustering</description> 9 <description>Improved version or repeat discovery and characterization using graph-based sequence clustering</description>
10 <requirements> 10 <requirements>
11 <requirement type="package">last</requirement> 11 <requirement type="package">last</requirement>
12 <requirement type="package">imagemagick</requirement> 12 <requirement type="package">imagemagick</requirement>
13 <requirement type="package">mafft</requirement> 13 <requirement type="package">mafft</requirement>
14 <requirement type="package">blast</requirement> 14 <requirement type="package">blast</requirement>
77 cp *.fasta ${ReportFile.files_path}/ 2>>$log &amp;&amp; rm -r ../tarean_output || : 77 cp *.fasta ${ReportFile.files_path}/ 2>>$log &amp;&amp; rm -r ../tarean_output || :
78 78
79 </command> 79 </command>
80 <inputs> 80 <inputs>
81 <param name="FastaFile" label="NGS reads" type="data" format="fasta" 81 <param name="FastaFile" label="NGS reads" type="data" format="fasta"
82 help="Input file must contain fasta-formatted NGS reads. If paired end reads are used, reads must be interlaced and all pairs must be complete. Example of input data format is provided in the help below. "/> 82 help="Input file must contain FASTA-formatted NGS reads. Illumina paired-end reads are recommended."/>
83 <param name="paired" type="boolean" truevalue="--paired" falsevalue="" checked="True" label="Paired-end reads" help="Check if you are using pair reads and input sequences contain both read mates and left mates alternate with their right mates" /> 83 <param name="paired" type="boolean" truevalue="--paired" falsevalue="" checked="True" label="Paired-end reads" help="If paired-end reads are used, left- and right-hand reads must be interlaced and all pairs must be complete. Example of the correct format is provided in the help below." />
84 84
85 <param name="sample" label="Sample size" type="integer" value="500000" min="10000"/> 85 <param name="sample" label="Sample size" type="integer" value="500000" min="10000"/>
86 <param name="taxon" label="Select taxon and protein domain database version (REXdb)" type="select" help="Reference database of transposable element protein domains - REXdb - is used for annotation of repeats"> 86 <param name="taxon" label="Select taxon and protein domain database version (REXdb)" type="select" help="Reference database of transposable element protein domains - REXdb - is used for annotation of repeats">
87 <option value="VIRIDIPLANTAE3.0" selected="true">Viridiplantae version 3.0 </option> 87 <option value="VIRIDIPLANTAE3.0" selected="true">Viridiplantae version 3.0 </option>
88 <option value="VIRIDIPLANTAE2.2" selected="true">Viridiplantae version 2.2</option> 88 <option value="VIRIDIPLANTAE2.2" selected="true">Viridiplantae version 2.2</option>
96 <when value="false"> 96 <when value="false">
97 <!-- pass --> 97 <!-- pass -->
98 </when> 98 </when>
99 <when value="true"> 99 <when value="true">
100 <conditional name="comparative"> 100 <conditional name="comparative">
101 <param name="options_comparative" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Perform comparative analysis" help="Use this options when you want to compare sequences multiple groups"/> 101 <param name="options_comparative" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Perform comparative analysis" help="Use this options to analyze multiple samples simultaneously"/>
102 <when value="false"> 102 <when value="false">
103 <!-- do nothing here --> 103 <!-- do nothing here -->
104 </when> 104 </when>
105 <when value="true"> 105 <when value="true">
106 <param name="prefix_length" label="Group code length" type="integer" value="3" min="1" max="10" help="For comparative analysis, sequences are from individial groups distinguished by sample code which must be used as prefix for sequence name. See example below."/> 106 <param name="prefix_length" label="Group code length" type="integer" value="3" min="1" max="10" help="For comparative analysis, reads from different samples are distinguished by sample codes included as prefix to the read names. See example below."/>
107 </when> 107 </when>
108 </conditional> 108 </conditional>
109 109
110 <conditional name="blastx"> 110 <conditional name="blastx">
111 <param name="options_blastx" type="select" label="Select parameters for protein domain search"> 111 <param name="options_blastx" type="select" label="Select parameters for protein domain search">
114 <option value="DIAMOND" selected="false">diamond program (the least sensitive, fastest)</option> 114 <option value="DIAMOND" selected="false">diamond program (the least sensitive, fastest)</option>
115 </param> 115 </param>
116 </conditional> 116 </conditional>
117 117
118 <conditional name="options"> 118 <conditional name="options">
119 <param name="options" type="select" label="Similarity search options" help="Different similarity search parameters are used depending on the used input data to adjust search to differences in length and error rate"> 119 <param name="options" type="select" label="Similarity search options" help="Different similarity search parameters are used depending on the input data to adjust for differences in read length and error rate">
120 <option value="ILLUMINA" selected="true">Illumina reads, read length 100nt or more </option> 120 <option value="ILLUMINA" selected="true">Illumina reads, read length 100nt or more </option>
121 <option value="ILLUMINA_SHORT" selected="false">Illumina reads, shorter than 100nt (Do not use reads shorter than 50nt!) </option> 121 <option value="ILLUMINA_SHORT" selected="false">Illumina reads, shorter than 100nt (Do not use reads shorter than 50nt!) </option>
122 <option value="ILLUMINA_DUST_OFF" selected="false">Illumina reads, no masking of low complexity repeats </option> 122 <option value="ILLUMINA_DUST_OFF" selected="false">Illumina reads, no masking of low complexity repeats </option>
123 <option value="OXFORD_NANOPORE" selected="false"> 123 <option value="OXFORD_NANOPORE" selected="false">
124 Pseudo short reads simulated from Oxford Nanopore data (experimental feature) 124 Pseudo short reads simulated from Oxford Nanopore data (experimental feature)
130 <param name="options_custom_library" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Use custom repeat database"/> 130 <param name="options_custom_library" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Use custom repeat database"/>
131 <when value="false"> 131 <when value="false">
132 <!-- do nothing here --> 132 <!-- do nothing here -->
133 </when> 133 </when>
134 <when value="true"> 134 <when value="true">
135 <param name="library" format="fasta" type="data" label="Custom library of repeats" help="Library of repeats as DNA sequences in fasta format. The required format for IDs in a custom library is : '>reapeatname#class/subclass'"/> 135 <param name="library" format="fasta" type="data" label="Custom repeat database" help="The database should contain DNA sequences in FASTA format. The required format for sequence IDs is : '>reapeatname#class/subclass'"/>
136 </when> 136 </when>
137 </conditional> 137 </conditional>
138 <param name="size_threshold" label="Cluster size threshold for detailed analysis" type="float" value="0.01" min="0.0001" max="100" help ="Minimal size (as percentage of input reads) of the smallest cluster which is analyzed, cluster with less than 20 reads are not considered at all."/> 138 <param name="size_threshold" label="Cluster size threshold for detailed analysis" type="float" value="0.01" min="0.0001" max="100" help ="Minimal size (as percentage of input reads) of the smallest cluster which is analyzed; clusters with less than 20 reads are not considered."/>
139 <param name="automatic_filtering" label="Perform automatic filtering of abundant satellite repeats" help="Automatic filtering tries to identify the most abundant tandem repeats and remove such sequences partially from analysis. Removal of abundant tandem repeat can enable to analyze higher proportion of other less abundant repeats." type="boolean" truevalue="--automatic_filtering" falsevalue="" checked="false"/> 139 <param name="automatic_filtering" label="Perform automatic filtering of abundant satellite repeats" help="Automatic filtering identifies the most abundant tandem repeats and partially removes their reads from the analysis. This enables to analyze higher proportions of other less abundant repeats." type="boolean" truevalue="--automatic_filtering" falsevalue="" checked="false"/>
140 <param name="keep_names" label="Keep original sequences names" type="boolean" truevalue="--keep_names" falsevalue="" checked="false" help="By default sequence are relabeled using integers. If you want to keep original names, use this option."/> 140 <param name="keep_names" label="Keep original read names" type="boolean" truevalue="--keep_names" falsevalue="" checked="false" help="By default, reads are renamed using integers. Use this option to keep original names."/>
141 <param name="assembly_min_cluster_size" type="integer" label="min cluster size for assembly" value="5" min="2" max="100"/> 141 <param name="assembly_min_cluster_size" type="integer" label="Minimal cluster size for assembly" value="5" min="2" max="100"/>
142 </when> 142 </when>
143 </conditional> 143 </conditional>
144 144
145 <conditional name="queue_definition"> 145 <conditional name="queue_definition">
146 <param name="queue_select" type="select" label="Select queue"> 146 <param name="queue_select" type="select" label="Select queue">
147 <option value="basic_fast_queue">basic &amp; fast</option> 147 <option value="basic_fast_queue">basic (max runtime 2 days, 4 GB RAM)</option>
148 <option value="long_slow_queue">long &amp; slow</option> 148 <option value="long_slow_queue">long (max runtime 2 weeks, 64 GB RAM)</option>
149 <option value="extra_long_slow_queue">extra long &amp; slow</option> 149 <option value="extra_long_slow_queue">extra long (max runtime 4 weeks, 64 GB RAM)</option>
150 </param> 150 </param>
151 <when value="basic_fast_queue"> 151 <when value="basic_fast_queue">
152 <param name="queue_specification" type="text" label="Modify parameters (optional)" 152 <param name="queue_specification" type="text" label="Modify parameters (optional)"
153 value="-l select=1:ncpus=10:mem=32gb:scratch_local=50gb -l walltime=48:00:00 -q elixirre@pbs.elixir-czech.cz -v TAREAN_MAX_MEM=4000000,TAREAN_CPU=4" /> 153 value="-l select=1:ncpus=10:mem=32gb:scratch_local=50gb -l walltime=48:00:00 -q elixirre@pbs.elixir-czech.cz -v TAREAN_MAX_MEM=4000000,TAREAN_CPU=4" />
154 </when> 154 </when>