# HG changeset patch
# User john-mccallum
# Date 1346987122 14400
# Node ID 84be1fe7e34a9e3f4949eaac2d73afb5b07d1a7a
Uploaded
diff -r 000000000000 -r 84be1fe7e34a count_cluster_size.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/count_cluster_size.xml Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,18 @@
+
+
+ Get cluster size DNAclust output
+ awk 'OFS="\t" {print$1, NF}' $inputClusterFile > $outputfile
+
+
+
+
+
+
+
+
+
+Returns the number of members in a cluster by counting columns from DNA clust output
+
+
+
+
diff -r 000000000000 -r 84be1fe7e34a cut_dnaclust.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/cut_dnaclust.xml Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,25 @@
+
+
+ Remove clusters below a certain depth
+ cut -f $depth- $inputFile | sed '/^$/d' | sort | uniq > $outputfile
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP**
+
+::
+
+This tool simply cuts off columns from the left
+e.g. set to 2 to remove singletons, 50 to remove clusters with less than 50 reads
+
+
+
+
diff -r 000000000000 -r 84be1fe7e34a dnaclust.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dnaclust.xml Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,34 @@
+
+
+ Cluster sequences into OTUs using DNAclust
+ dnaclust -s $similarity -i $inputFastaFile > $outputfile
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP**
+
+see the DNACLUST documentation at http://dnaclust.sourceforge.net/
+
+
+
+Each line will contain the ids of the sequences in each cluster, and the first id of each line is the cluster representative.
+
+Example: To cluster a set of 16S rRNA fragments at 0.98 similarity use:
+./dnaclust file.fasta -l -s 0.98 > clusters
+
+You can optionally specify a k-mer length for the filter. The longer k-mers use more memory. Also the filter will be more specific with longer k-mers. The default log_4(median length) should be good for mo\
+st cases.
+
+
+
diff -r 000000000000 -r 84be1fe7e34a dnaclust2tab.awk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/dnaclust2tab.awk Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,10 @@
+#!/bin/awk -f
+BEGIN {
+ FS="\t"
+ OFS="\t"
+}
+{
+OTU = $1
+{for (i=2;i
+
+ Convert dnaclust to tabular
+ dnaclust2tab.awk $inputFile > $outputfile
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP**
+
+This tool collapses dnaclust output into 2 column tabular form
+
+
+
+
+
diff -r 000000000000 -r 84be1fe7e34a fastaselect.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaselect.xml Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,24 @@
+
+
+ Get Fasta file of cluster centres from DNAclust output
+ cat inputClusterFile | fastaselect -c -f $inputFastaFile > $outputfile
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool returns a fasta file containing a subset of sequences from an input mult-fasta file and a list of identifiers.
+
+It was developed as part of the DNACLUST package http://dnaclust.sourceforge.net/ for use in retrieving cluster centres but is handy for any extraction of a sequence subset from Galaxy tabular output
+
+
+
+