changeset 0:84be1fe7e34a draft

Uploaded
author john-mccallum
date Thu, 06 Sep 2012 23:05:22 -0400
parents
children 8d31c21e4f28
files count_cluster_size.xml cut_dnaclust.xml dnaclust.xml dnaclust2tab.awk dnaclust2tab.xml fastaselect.xml
diffstat 6 files changed, 132 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/count_cluster_size.xml	Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+<tool id="count_clustersize_1" name="count_clustersize">
+  <description>Get cluster size  DNAclust output</description>
+  <command>awk 'OFS="\t" {print$1, NF}' $inputClusterFile  > $outputfile </command>
+  <inputs>
+
+    <param format="tabular" name="inputClusterFile" type="data" label=" Cluster input file from DNAclust"/>
+  </inputs>
+  <outputs>
+     <data format="tabular" name="outputfile" />
+  </outputs>
+
+<help>
+Returns the number of members in a  cluster by counting columns from DNA clust output 
+
+</help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cut_dnaclust.xml	Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,25 @@
+<?xml version="1.0"?>
+<tool id="cutClust_1" name="cutClust">
+  <description>Remove clusters below a certain depth</description>
+  <command >cut -f $depth- $inputFile | sed '/^$/d'   | sort | uniq >  $outputfile</command>
+  <inputs>
+    <param format="tabular" name="inputFile" type="data" label="Columnar Input File" help="tabular file of DNAclust clusters" />
+ <param name="depth" size="10" type="text" value="50" label="Minimum depth for cutoff" />
+
+  </inputs>
+  <outputs>
+     <data format="tabular"  name="outputfile" />
+  </outputs>
+<help>
+.. class:: infomark
+
+**TIP**
+
+::
+
+This tool simply cuts off columns from the left
+e.g. set to 2 to remove singletons, 50 to remove clusters with less than 50 reads
+</help>
+
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dnaclust.xml	Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,34 @@
+<?xml version="1.0"?>
+<tool id="dnaclust_1" name="dnaclust">
+  <description>Cluster sequences  into OTUs using DNAclust </description>
+  <command> dnaclust  -s $similarity -i $inputFastaFile > $outputfile </command>
+  <inputs>
+    <param format="fasta" name="inputFastaFile" type="data" label="Fasta input file"/>
+ <param name="similarity" size="10" type="text" value="0.99" label="similarity between center and cluster sequences" />
+
+  </inputs>
+  <outputs>
+
+     <data format="tabular" name="outputfile" />
+  </outputs>
+
+<help>
+
+.. class:: infomark
+
+**TIP**
+
+see the  DNACLUST documentation at http://dnaclust.sourceforge.net/
+
+
+
+Each line will contain the ids of the sequences in each cluster, and the first id of each line is the cluster representative.
+
+Example: To cluster a set of 16S rRNA fragments at 0.98 similarity use:
+./dnaclust file.fasta -l -s 0.98 > clusters
+
+You can optionally specify a k-mer length for the filter. The longer k-mers use more memory.  Also the filter will be more specific with longer k-mers. The default log_4(median length) should be good for mo\
+st cases.
+</help>
+</tool>
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dnaclust2tab.awk	Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,10 @@
+#!/bin/awk -f
+BEGIN {
+  FS="\t"
+  OFS="\t"
+}
+{
+OTU = $1
+{for (i=2;i<NF;i++)
+	print OTU, $i}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dnaclust2tab.xml	Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<tool id="dnaclust2tab_1" name="dnaclust2tab">
+  <description>Convert dnaclust to tabular</description>
+  <command >dnaclust2tab.awk  $inputFile >  $outputfile</command>
+  <inputs>
+    <param format="tabular" name="inputFile" type="data" label="Columnar Input File" help="tabular file of DNAclust clusters" />
+  </inputs>
+  <outputs>
+     <data format="tabular"  name="outputfile" />
+  </outputs>
+<help>
+.. class:: infomark
+
+**TIP**
+
+This tool  collapses dnaclust output into 2 column tabular form 
+
+</help>
+
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastaselect.xml	Thu Sep 06 23:05:22 2012 -0400
@@ -0,0 +1,24 @@
+<?xml version="1.0"?>
+<tool id="fastaselect_1" name="fastaselectclust">
+  <description>Get Fasta file of cluster centres from DNAclust output</description>
+  <command>cat inputClusterFile | fastaselect  -c  -f  $inputFastaFile > $outputfile </command>
+  <inputs>
+    <param format="fasta" name="inputFastaFile" type="data" label="Fasta input file"/>
+    <param format="tabular" name="inputClusterFile" type="data" label=" Cluster input file from DNAclust"/>
+
+
+  </inputs>
+  <outputs>
+
+     <data format="fasta" name="outputfile" />
+  </outputs>
+
+<help>
+
+This tool returns a fasta file containing a subset of sequences from an input mult-fasta file and a list of identifiers.
+
+It was developed as part of the DNACLUST package http://dnaclust.sourceforge.net/  for use in retrieving cluster centres but is handy for any extraction of a sequence subset from Galaxy tabular output
+
+</help>
+</tool>
+