Mercurial > repos > jowong > kwip
changeset 0:6c38443b46ac draft
planemo upload
author | jowong |
---|---|
date | Thu, 22 Nov 2018 09:47:44 -0500 |
parents | |
children | 8217df2fd8c5 |
files | kwip.xml kwip_postprocess.py kwip_postprocess.xml tool_dependencies.xml |
diffstat | 4 files changed, 129 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kwip.xml Thu Nov 22 09:47:44 2018 -0500 @@ -0,0 +1,61 @@ +<tool id="kwip" name="kwip" version="1.2.9"> + <description>Calculates k-mer weighted inner product, a de novo estimator of genetic similarity</description> + <requirements> + <requirement type="package" version="0.2.0">kwip</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #for $input in $inputs# cp $input $(input.element_identifier).ct.gz && #end for# + kwip + -t $advanced.thread + -k kwip.kernel + -d kwip.dist + #for $input in $inputs# $(input.element_identifier).ct.gz #end for# + #for $input in $inputs# && rm $(input.element_identifier).ct.gz #end for# + ]]></command> + <inputs> + <param name="inputs" format="data" type="data_collection" label="inputs" help="Specify dataset with hashed reads"/> + <section name="advanced" title="Advanced options" expanded="false"> + <param type="boolean" argument="--unweighted" label="Unweighted Inner Product" checked="false" truevalue="--unweighted" falsevalue="" help="Use the unweighted inner proudct kernel. (default: False)"/> + <param name="thread" type="integer" value="1" label="Threads" help="Number of simultaneous threads to execute (default: 1)" /> + + </section> + </inputs> + <outputs> + <data name="kwip_kernel" label="kWip kernel" format="txt" type="data" from_work_dir="kwip.kernel"/> + <data name="kwip_distance" label="kWip distance" format="txt" type="data" from_work_dir="kwip.dist"/> + </outputs> + <tests> + </tests> + <help><![CDATA[ + usage: USAGE: kwip [options] hashes + + kWip Options. + + optional arguments: + -t, --threads Number of threads to utilise. [default N_CPUS] + -k, --kernel Output file for the kernel matrix. [default None] + -d, --distance Output file for the distance matrix. [default stdout] + -U, --unweighted Use the unweighted inner proudct kernel. [default off] + -w, --weights Bin weight vector file (input, or output w/ -C). + -C, --calc-weights Calculate only the bin weight vector, not kernel matrix. + -h, --help Print this help message. + -V, --version Print the version string. + -v, --verbose Increase verbosity. May or may not acutally do anything. + -q, --quiet Execute silently but for errors. + + Each sample's oxli Countgraph should be specified after arguments: + kwip [options] sample1.ct sample2.ct ... sampleN.ct + + ]]></help> + <citations> + <citation type="bibtex"> +@misc{githubPythonPRINCE, + author = {Murray, Kevin}, + year = {2015}, + title = {kWIP}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/kdmurray91/kWIPE}, +}</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kwip_postprocess.py Thu Nov 22 09:47:44 2018 -0500 @@ -0,0 +1,27 @@ +#!/usr/bin/env python + +import sys +import argparse as ap +import re +parser = ap.ArgumentParser(prog='kwip_postprocess', conflict_handler='resolve', + description="Postprocess galaxy kWIP output") + +input = parser.add_argument_group('Input', '') +input.add_argument('-i', '--input', nargs=1, required=True, help="kWIP galaxy OUTPUT") + + +if len(sys.argv) == 0: + parser.print_usage() + sys.exit(1) + +args = parser.parse_args() + + +with open(args.input[0]) as kwip_output: + with open('kwip_postprocess_output.txt', 'w') as output: + for line in kwip_output: + new_line = re.sub('(_1.fastq(.gz)*|_2.fastq(.gz)*|.fastq(.gz)*)', '', line) + output.write(new_line) + + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/kwip_postprocess.xml Thu Nov 22 09:47:44 2018 -0500 @@ -0,0 +1,17 @@ +<tool id="kwip_postprocess" name="kWIP Postprocess" version="1.0.0"> + <description>Postprocess kWIP galaxy output</description> + <command interpreter="python"><![CDATA[ + kwip_postprocess.py -i $kwip_output + ]]></command> + <inputs> + <param name="kwip_output" type="data" format="txt" label="kWIP output" /> + </inputs> + <outputs> + <data name="output" format="txt" from_work_dir="kwip_postprocess_output.txt"/> + </outputs> + <help> +This tool processes the kwip galaxy output such that it is in line with the command line + </help> + <citations> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Thu Nov 22 09:47:44 2018 -0500 @@ -0,0 +1,24 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="kwip" version="0.2.0"> + <install version="1.0"> + <actions_group> + <actions architecture="x86_64" os="linux"> + <action type="download_by_url">https://github.com/kdmurray91/kWIP/releases/download/0.2.0/kwip-binaries_0.2.0.tar.gz</action> + <action type="move_directory_files"> + <source_directory>.</source_directory> + <destination_directory>$INSTALL_DIR</destination_directory> + </action> + </actions> + <action type="set_environment"> + <environment_variable action="prepend_to" name="PATH">$INSTALL_DIR/bin</environment_variable> + </action> + </actions_group> + </install> + <readme> +<![CDATA[ +kWIP works by decomposing sequencing reads to short k-mers, hashing these k-mers and performing pairwise distance calculation between these sample k-mer hashes. We use khmer from the DIB lab, UC Davis to hash sequencing reads. KWIP calculates the distance between samples in a computationally efficient manner, and generates a distance matrix which may be used by downstream tools. The power of kWIP comes from the weighting applied across different hash values, which decreases the effect of erroneous, rare or over-abundant k-mers while focusing on k-mers which give the most insight into the similarity of samples. +]]> + </readme> + </package> +</tool_dependency>