Mercurial > repos > iuc > nanopolishcomp_eventaligncollapse
diff eventaligncollapse.xml @ 0:6847a625db4d draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ commit e96954b0926211a1da3292c00aada6d366a2b25d"
author | iuc |
---|---|
date | Thu, 30 Apr 2020 05:56:29 -0400 |
parents | |
children | ad011fc670d6 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/eventaligncollapse.xml Thu Apr 30 05:56:29 2020 -0400 @@ -0,0 +1,166 @@ +<?xml version="1.0"?> +<tool id="nanopolishcomp_eventaligncollapse" name="Eventalign Collapse" version="@TOOL_VERSION@+@WRAPPER_VERSION@"> + <description>by kmers rather than by event</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <stdio></stdio> + <version_command>NanopolishComp --version</version_command> + <command detect_errors="exit_code"><![CDATA[ + ## initialize + ## requires a minimum of 3 threads + threads=\$((3 > \${GALAXY_SLOTS:-3} ? 3 : \${GALAXY_SLOTS:-3})) && + + ## run + NanopolishComp + Eventalign_collapse + -i '$i' + -s + -r $r + -f + #for $current in $f + $current + #end for + -o 'results' + -p 'out' + -t \$threads + -v + ]]></command> + <inputs> + <param argument="-i" type="data" format="tabular" label="Select nanopolish eventalign file"/> + <param argument="-s" type="boolean" truevalue="-s" falsevalue="" label="Should raw samples be written?" help="You need to run nanopolish eventalign with --sample option to make use of this feature."/> + <param argument="-r" type="integer" value="0" min="0" label="Set maximum number of reads to parse" help="Use 0 to deactivate this option."/> + <param argument="-f" type="select" multiple="true" label="Select statistical fields to compute" help="You need to run nanopolish eventalign with --sample option to make use of this feature."> + <option value="mean" selected="true">Mean</option> + <option value="std">Std</option> + <option value="median" selected="true">Median</option> + <option value="mad">Mad</option> + <option value="num_signals" selected="true">Number of signals</option> + </param> + <param name="out" type="select" multiple="true" label="Select output file(s)" help=""> + <option value="eventalign_collapse" selected="true">Eventalign Collapse</option> + <option value="index" selected="true">Index</option> + <option value="log">Log</option> + </param> + </inputs> + <outputs> + <data name="out_eventalign_collapse" format="tabular" from_work_dir="results/out_eventalign_collapse.tsv" label="${tool.name} on ${on_string}: Eventalign Collapse"> + <filter>'eventalign_collapse' in out</filter> + </data> + <data name="out_index" format="tabular" from_work_dir="results/out_eventalign_collapse.tsv.idx" label="${tool.name} on ${on_string}: Index"> + <filter>'index' in out</filter> + </data> + <data name="out_log" format="txt" from_work_dir="results/out_eventalign_collapse.log" label="${tool.name} on ${on_string}: log"> + <filter>'log' in out</filter> + </data> + </outputs> + <tests> + <!-- #1 default --> + <test expect_num_outputs="3"> + <param name="i" value="sample.tsv"/> + <param name="out" value="eventalign_collapse,index,log"/> + <output name="out_eventalign_collapse"> + <assert_contents> + <has_n_lines n="236"/> + <has_text_matching expression="ref_pos	ref_kmer"/> + <has_text_matching expression="22102	GGAAA"/> + </assert_contents> + </output> + <output name="out_index"> + <assert_contents> + <has_n_lines n="60"/> + <has_text_matching expression="ref_id	ref_start"/> + <has_text_matching expression="chr	22102"/> + </assert_contents> + </output> + <output name="out_log"> + <assert_contents> + <has_n_lines n="13"/> + <has_line line="General options:"/> + </assert_contents> + </output> + </test> + <!-- #2 --> + <test expect_num_outputs="2"> + <param name="i" value="sample.tsv"/> + <param name="s" value="true"/> + <param name="r" value="10"/> + <param name="f" value="mean,std,median,mad,num_signals"/> + <param name="out" value="eventalign_collapse,index"/> + <output name="out_eventalign_collapse"> + <assert_contents> + <has_n_lines n="236"/> + <has_text_matching expression="ref_pos	ref_kmer"/> + <has_text_matching expression="22102	GGAAA"/> + </assert_contents> + </output> + <output name="out_index"> + <assert_contents> + <has_n_lines n="60"/> + <has_text_matching expression="ref_id	ref_start"/> + <has_text_matching expression="chr	22102"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +@WID@ + +Eventalign_collapse collapses the raw file generated by nanopolish eventalign by kmers rather than by event. + +**Input** + +A nanopolish eventalign tabular output file. + +**Output** + +Contrary to nanopolish eventalign output text file, in Eventalign_collapse the reads are separated by a hashtag headers containing the read_id and ref_id. This reduces the redundancy and makes it easier to find the start and end of a read. + +:: + + Example : #7ef1d7b9-5824-4382-b23b-78d82c07ebbd YHR055C. + +The main data file contains the following fields: + +- ref_pos: Reference sequence ID (contig). +- ref_kmer: Sequence of the reference kmers. +- -num_events: Number of events for this kmer before collapsing. +- dwell_time: dwell time for this kmer in seconds +- NNNNN_dwell_time: dwell time of events for this kmers with a model sequence "NNNNN" (events ignored by nanopolish HMM). +- mismatch_dwell_time: dwell time of events for this kmers with a model sequence different from the reference kmer +- start_idx: Only if nanopolish eventalign called with --signal_idx. Start coordinate on original raw signal in fast5 file +- end_idx: Only if nanopolish eventalign called with --signal_idx. End coordinate on original raw signal in fast5 file +- mean: Only if nanopolish eventalign called with --samples. Mean of the normalised signal values provided by Nanopolish eventalign +- median: Only if nanopolish eventalign called with --samples. Median of the normalised signal values provided by Nanopolish eventalign +- std: Only if nanopolish eventalign called with --samples. Standard deviation of the normalised signal values provided by Nanopolish eventalign +- mad: Only if nanopolish eventalign called with --samples. Median absolute deviation of the normalised signal values provided by Nanopolish eventalign +- num_signals: Only if nanopolish eventalign called with --samples. Number of raw signal points. +- samples: Only if nanopolish eventalign called with --samples and Eventalign_collapse called with --write_samples. List of normalised signal intensity values for this kmer + +In addition Eventalign_collapse also generates an useful index file containing reads level information. It contains the following fields: + +- read_id: Name or index of the read +- ref_id: Name of the reference sequence the read was aligned on (contig) +- ref_start: Start coordinate of the alignment on the reference sequence +- ref_end: End coordinate of the alignment on the reference sequence +- dwell_time: Cumulative dwell time in seconds for the entire resquiggled sequence +- kmers: Overall number of resquiggled kmers +- NNNNN_kmers: Number of resquiggled kmers containing at least 1 event for which the model sequence was "NNNNN" +- mismatching_kmers: Number of resquiggled kmers containing at least 1 event for which the model sequence diverged from the reference sequence +- missing_kmers: Number of skipped/missing reference positions in nanopolish output +- byte_offset: Number of characters before the start of the sequence in the main output file. This can be used in conjunction with file.seek() to directly access the start of a read. An example is provided in the Usage notebook. +- byte_len: Length of characters after byte_offset to the end of the read, excluding the last newline. This can be used in conjunction with read() to read all the text chunk corresponding to the read. + +.. class:: infomark + +**References** + +@REFERENCES@ + ]]></help> + <expand macro="citations"/> +</tool> \ No newline at end of file