Mercurial > repos > john-mccallum > dnaclust
changeset 0:84be1fe7e34a draft
Uploaded
author | john-mccallum |
---|---|
date | Thu, 06 Sep 2012 23:05:22 -0400 |
parents | |
children | 8d31c21e4f28 |
files | count_cluster_size.xml cut_dnaclust.xml dnaclust.xml dnaclust2tab.awk dnaclust2tab.xml fastaselect.xml |
diffstat | 6 files changed, 132 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/count_cluster_size.xml Thu Sep 06 23:05:22 2012 -0400 @@ -0,0 +1,18 @@ +<?xml version="1.0"?> +<tool id="count_clustersize_1" name="count_clustersize"> + <description>Get cluster size DNAclust output</description> + <command>awk 'OFS="\t" {print$1, NF}' $inputClusterFile > $outputfile </command> + <inputs> + + <param format="tabular" name="inputClusterFile" type="data" label=" Cluster input file from DNAclust"/> + </inputs> + <outputs> + <data format="tabular" name="outputfile" /> + </outputs> + +<help> +Returns the number of members in a cluster by counting columns from DNA clust output + +</help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/cut_dnaclust.xml Thu Sep 06 23:05:22 2012 -0400 @@ -0,0 +1,25 @@ +<?xml version="1.0"?> +<tool id="cutClust_1" name="cutClust"> + <description>Remove clusters below a certain depth</description> + <command >cut -f $depth- $inputFile | sed '/^$/d' | sort | uniq > $outputfile</command> + <inputs> + <param format="tabular" name="inputFile" type="data" label="Columnar Input File" help="tabular file of DNAclust clusters" /> + <param name="depth" size="10" type="text" value="50" label="Minimum depth for cutoff" /> + + </inputs> + <outputs> + <data format="tabular" name="outputfile" /> + </outputs> +<help> +.. class:: infomark + +**TIP** + +:: + +This tool simply cuts off columns from the left +e.g. set to 2 to remove singletons, 50 to remove clusters with less than 50 reads +</help> + + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dnaclust.xml Thu Sep 06 23:05:22 2012 -0400 @@ -0,0 +1,34 @@ +<?xml version="1.0"?> +<tool id="dnaclust_1" name="dnaclust"> + <description>Cluster sequences into OTUs using DNAclust </description> + <command> dnaclust -s $similarity -i $inputFastaFile > $outputfile </command> + <inputs> + <param format="fasta" name="inputFastaFile" type="data" label="Fasta input file"/> + <param name="similarity" size="10" type="text" value="0.99" label="similarity between center and cluster sequences" /> + + </inputs> + <outputs> + + <data format="tabular" name="outputfile" /> + </outputs> + +<help> + +.. class:: infomark + +**TIP** + +see the DNACLUST documentation at http://dnaclust.sourceforge.net/ + + + +Each line will contain the ids of the sequences in each cluster, and the first id of each line is the cluster representative. + +Example: To cluster a set of 16S rRNA fragments at 0.98 similarity use: +./dnaclust file.fasta -l -s 0.98 > clusters + +You can optionally specify a k-mer length for the filter. The longer k-mers use more memory. Also the filter will be more specific with longer k-mers. The default log_4(median length) should be good for mo\ +st cases. +</help> +</tool> +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dnaclust2tab.awk Thu Sep 06 23:05:22 2012 -0400 @@ -0,0 +1,10 @@ +#!/bin/awk -f +BEGIN { + FS="\t" + OFS="\t" +} +{ +OTU = $1 +{for (i=2;i<NF;i++) + print OTU, $i} +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dnaclust2tab.xml Thu Sep 06 23:05:22 2012 -0400 @@ -0,0 +1,21 @@ +<?xml version="1.0"?> +<tool id="dnaclust2tab_1" name="dnaclust2tab"> + <description>Convert dnaclust to tabular</description> + <command >dnaclust2tab.awk $inputFile > $outputfile</command> + <inputs> + <param format="tabular" name="inputFile" type="data" label="Columnar Input File" help="tabular file of DNAclust clusters" /> + </inputs> + <outputs> + <data format="tabular" name="outputfile" /> + </outputs> +<help> +.. class:: infomark + +**TIP** + +This tool collapses dnaclust output into 2 column tabular form + +</help> + + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastaselect.xml Thu Sep 06 23:05:22 2012 -0400 @@ -0,0 +1,24 @@ +<?xml version="1.0"?> +<tool id="fastaselect_1" name="fastaselectclust"> + <description>Get Fasta file of cluster centres from DNAclust output</description> + <command>cat inputClusterFile | fastaselect -c -f $inputFastaFile > $outputfile </command> + <inputs> + <param format="fasta" name="inputFastaFile" type="data" label="Fasta input file"/> + <param format="tabular" name="inputClusterFile" type="data" label=" Cluster input file from DNAclust"/> + + + </inputs> + <outputs> + + <data format="fasta" name="outputfile" /> + </outputs> + +<help> + +This tool returns a fasta file containing a subset of sequences from an input mult-fasta file and a list of identifiers. + +It was developed as part of the DNACLUST package http://dnaclust.sourceforge.net/ for use in retrieving cluster centres but is handy for any extraction of a sequence subset from Galaxy tabular output + +</help> +</tool> +