nanocompore_sampcomp: sampcomp.xml comparison

comparison sampcomp.xml @ 0:557cf45ff2c8 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/nanocompore commit 8fa5ff35b45c2b046c7f4800410cf39cb89a299a"

author	iuc
date	Tue, 05 May 2020 06:57:10 -0400
parents
children	c43f4b80f5a9

comparison

equal deleted inserted replaced

--1:000000000000
+:557cf45ff2c8
+<?xml version="1.0"?>
+<tool id="nanocompore_sampcomp" name="SampComp" version="@TOOL_VERSION@+@WRAPPER_VERSION@">
+<description>to compare Nanopolished datasets</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="requirements"/>
+<version_command><![CDATA[nanocompore --version]]></version_command>
+<command detect_errors="exit_code"><![CDATA[
+## initialize
+## requires a minimum of 3 threads
+threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) &&
+## same name pattern required
+#for $i, $current in enumerate($file1_rep)
+ln -s '$current.file' 'sample_1_${i}.tsv' &&
+ln -s '$current.index' 'sample_1_${i}.tsv.idx' &&
+#end for
+#for $i, $current in enumerate($file2_rep)
+ln -s '$current.file' 'sample_2_${i}.tsv' &&
+ln -s '$current.index' 'sample_2_${i}.tsv.idx' &&
+#end for
+## run
+nanocompore sampcomp
+## required
+--label1 '$label1'
+#set files1 = ','.join(['sample_1_' + str(item) + '.tsv' for item in range(len($file1_rep))])
+--file_list1 '$files1'
+--label2 '$label2'
+#set files2 = ','.join(['sample_2_' + str(item) + '.tsv' for item in range(len($file2_rep))])
+--file_list2 '$files2'
+--fasta '$fasta'
+## optional
+#if $ap.bed
+--bed '$ap.bed'
+#end if
+--max_invalid_kmers_freq $ap.max_invalid_kmers_freq
+--min_coverage $ap.min_coverage
+--min_ref_length $ap.min_ref_length
+--comparison_methods '$ap.comparison_methods'
+--sequence_context $ap.sequence_context
+--sequence_context_weights '$ap.sequence_context_weights'
+--pvalue_thr $ap.pvalue_thr
+$ap.logit
+$ap.allow_warnings
+--outpath 'results'
+--nthreads \$threads
+--log_level debug
+&& tar -cf 'results/db.tar' 'results/out_SampComp.db.bak' 'results/out_SampComp.db.dir' 'results/out_SampComp.db.dat'
+]]></command>
+<inputs>
+<param argument="--label1" type="text" value="Condition 1" label="Set label of first condition"/>
+<repeat name="file1_rep" min="1" title="First condition files">
+<param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list1)"/>
+<param name="index" type="data" format="tabular" label="Select index file"/>
+</repeat>
+<param argument="--label2" type="text" value="Condition 2" label="Set label of second condition"/>
+<repeat name="file2_rep" min="1" title="Second condition files">
+<param name="file" type="data" format="tabular" label="Select NanopolishComp file" help="(--file_list2)"/>
+<param name="index" type="data" format="tabular" label="Select index file"/>
+</repeat>
+<param argument="--fasta" type="data" format="fasta" label="Select mapping file"/>
+<section name="ap" title="Advanced parameters">
+<param argument="--bed" type="data" format="bed" optional="true" label="Select mapping file with annotation of transcriptome"/>
+<param argument="--max_invalid_kmers_freq" type="float" value="0.1" min="0.0" max="1.0" label="Set max fequency of invalid kmers"/>
+<param argument="--min_coverage" type="integer" value="30" min="0" label="Set minimum coverage required in each condition to do the comparison"/>
+<param argument="--min_ref_length" type="integer" value="100" min="0" label="Set minimum length of a reference transcript to include it in the analysis"/>
+<param argument="--comparison_methods" type="select" multiple="true" label="Select comparison methods">
+<option value="GMM" selected="true">GMM</option>
+<option value="KS" selected="true">KS</option>
+<option value="TT">TT</option>
+<option value="MW">MW</option>
+</param>
+<param argument="--sequence_context" type="integer" value="0" min="0" max="4" label="Set sequence context for combining p-values"/>
+<param argument="--sequence_context_weights" type="select" label="Select type of weights to use for combining p-values">
+<option value="uniform" selected="true">Uniform</option>
+<option value="harmonic">Harmonic</option>
+</param>
+<param argument="--pvalue_thr" type="float" value="0.05" min="0.0" max="1.0" label="Set adjusted p-value threshold for reporting significant sites"/>
+<param argument="--logit" type="boolean" truevalue="--logit" falsevalue="" label="Use logistic regression testing also when all conditions have replicates?"/>
+<param argument="--allow_warnings" type="boolean" truevalue="--allow_warnings" falsevalue="" label="Should runtime warnings during the ANOVA tests raise an error?"/>
+<param name="out" type="select" multiple="true" optional="false" label="Select output file(s)">
+<option value="results" selected="true">Results</option>
+<option value="shift" selected="true">Shift stats</option>
+<option value="db" selected="true">Database (*.db.dir, *.db.bak, *.db.dat)</option>
+<option value="log">Log</option>
+</param>
+</section>
+</inputs>
+<outputs>
+<data name="out_results" format="tabular" from_work_dir="results/out_nanocompore_results.tsv" label="${tool.name} on ${on_string}: Results">
+<filter>'results' in ap['out']</filter>
+</data>
+<data name="out_shift" format="tabular" from_work_dir="results/out_nanocompore_shift_stats.tsv" label="${tool.name} on ${on_string}: Shift stats">
+<filter>'shift' in ap['out']</filter>
+</data>
+<data name="out_db" format="tar" from_work_dir="results/db.tar" label="${tool.name} on ${on_string}: Database">
+<filter>'db' in ap['out']</filter>
+</data>
+<data name="out_log" format="txt" from_work_dir="results/out_SampComp.log" label="${tool.name} on ${on_string}: log">
+<filter>'log' in ap['out']</filter>
+</data>
+</outputs>
+<tests>
+<!-- #1 -->
+<test expect_num_outputs="3">
+<repeat name="file1_rep">
+<param name="file" value="sample1.tsv"/>
+<param name="index" value="sample1.tsv.idx"/>
+</repeat>
+<repeat name="file2_rep">
+<param name="file" value="sample2.tsv"/>
+<param name="index" value="sample2.tsv.idx"/>
+</repeat>
+<param name="fasta" value="reference.fa"/>
+<output name="out_results">
+<assert_contents>
+<has_n_lines n="3"/>
+<has_text_matching expression="pos&#09;chr.+"/>
+<has_text_matching expression="22102&#09;NA.+"/>
+</assert_contents>
+</output>
+<output name="out_shift">
+<assert_contents>
+<has_n_lines n="3"/>
+<has_text_matching expression="ref\_id&#09;pos.+"/>
+<has_text_matching expression="chr&#09;22102.+"/>
+</assert_contents>
+</output>
+<output name="out_db">
+<assert_contents>
+<has_size value="5408256"/>
+</assert_contents>
+</output>
+</test>
+<!-- #2 -->
+<test expect_num_outputs="4">
+<param name="label1" value="C1"/>
+<repeat name="file1_rep">
+<param name="file" value="sample1.tsv"/>
+<param name="index" value="sample1.tsv.idx"/>
+</repeat>
+<param name="label2" value="C2"/>
+<repeat name="file2_rep">
+<param name="file" value="sample2.tsv"/>
+<param name="index" value="sample2.tsv.idx"/>
+</repeat>
+<param name="fasta" value="reference.fa"/>
+<section name="ap">
+<param name="max_invalid_kmers_freq" value="0.2"/>
+<param name="min_coverage" value="31"/>
+<param name="min_ref_length" value="101"/>
+<param name="comparison_methods" value="GMM,KS,TT,MW"/>
+<param name="sequence_context" value="1"/>
+<param name="sequence_context_weights" value="harmonic"/>
+<param name="pvalue_thr" value="0.06"/>
+<param name="logit" value="true"/>
+<param name="allow_warnings" value="true"/>
+<param name="out" value="results,shift,db,log"/>
+</section>
+<output name="out_results">
+<assert_contents>
+<has_n_lines n="3"/>
+<has_text_matching expression="pos&#09;chr.+"/>
+<has_text_matching expression="22102&#09;NA.+"/>
+</assert_contents>
+</output>
+<output name="out_shift">
+<assert_contents>
+<has_n_lines n="3"/>
+<has_text_matching expression="ref\_id&#09;pos.+"/>
+<has_text_matching expression="chr&#09;22102.+"/>
+</assert_contents>
+</output>
+<output name="out_db">
+<assert_contents>
+<has_size value="5410304"/>
+</assert_contents>
+</output>
+<output name="out_log">
+<assert_contents>
+<has_n_lines n="31"/>
+<has_text_matching expression=".+package\_name.+"/>
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+.. class:: infomark
+**What it does**
+@WID@
+SampComp provides a very flexible analysis framework with a few mandatory options and many optional parameters.
+First, SampComp parses the sample eventalign collapse files and then the observed results are piled-up per reference at position level. In a second time, positions are compared using various statistical methods and the statistics are stored in a shelve DBM database containing the results for all positions with sufficient coverage.
+**Input**
+SampComp requires sample files obtained with NanopolishComp EventalignCollapse as explained before (see data preparation) for both the control and the experimental conditions. 2 conditions are expected and at least 2 replicates per conditions are highly recommended.
+A transcriptome FASTA reference file is required to extract kmer sequences during the analyses. The reference has to be the same as the one used at the mapping step.
+Optionally, a BED file containing the genome annotations corresponding to the transcriptome fasta file can be provided. In that case Nanocompore will also convert the transcript coordinates into the genome space.
+**Output**
+The database object returned by Sampcomp is a Python GDBM object database indexed by reference id and can be be used with SampCompDB.
+.. class:: infomark
+**References**
+@REFERENCES@
+]]></help>
+<expand macro="citations"/>
+</tool>

Mercurial > repos > iuc > nanocompore_sampcomp

comparison sampcomp.xml @ 0:557cf45ff2c8 draft