changeset 10:ca56c4ab0edf default tip

Incorporate samtools invocation
author mwoodbri
date Thu, 27 Oct 2011 19:01:46 -0400
parents e3b8eefe6586
children
files mmseq.sh mmseq.xml mmseq/mmseq.sh mmseq/mmseq.xml
diffstat 4 files changed, 60 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mmseq.sh	Thu Oct 27 19:01:46 2011 -0400
@@ -0,0 +1,10 @@
+#!/bin/sh
+PREFIX=temp
+FA=$(grep -P "index\t$2\t" $3/sam_fa_indices.loc | cut -f3)
+samtools view -bt $FA.fai -o $PREFIX.bam $1 2> /dev/null
+samtools sort -n $PREFIX.bam $PREFIX.namesorted 2> /dev/null
+bam2hits $FA $PREFIX.namesorted.bam > $PREFIX.hits 2> /dev/null
+mmseq $PREFIX.hits $PREFIX > /dev/null
+mv $PREFIX.mmseq $4
+mv $PREFIX.identical.mmseq $5
+mv $PREFIX.gene.mmseq $6
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mmseq.xml	Thu Oct 27 19:01:46 2011 -0400
@@ -0,0 +1,50 @@
+<tool id="mmseq" name="MMSEQ">
+  <description>Haplotype and isoform specific expression estimation using multi-mapping RNA-seq reads</description>
+  <command interpreter="bash">
+    mmseq.sh
+      $alignments_sam
+      ${alignments_sam.metadata.dbkey}
+      ${GALAXY_DATA_INDEX_DIR}
+      $transcripts
+      $identical_transcripts
+      $genes
+  </command>
+  <inputs>
+    <param name="alignments_sam" type="data" format="sam" metadata_name="dbkey" label="SAM file of reads aligned to reference transcripts">
+       <validator type="unspecified_build"/>
+       <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are 
+not currently available for the specified build." line_startswith="index"/>
+    </param>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="transcripts" label="MMSEQ: transcript expression estimates"/>
+    <data format="tabular" name="identical_transcripts" label="MMSEQ: amalgamated transcript expression estimates"/>
+    <data format="tabular" name="genes" label="MMSEQ: gene expression estimates"/>
+  </outputs>
+  <help>
+
+**About MMSEQ**
+
+MMSEQ_ is a novel statistical RNA-seq analysis method for estimating haplotype, isoform and gene specific expression. It deconvolves the mapping of reads to multiple transcripts (isoforms or haplotype-specific isoforms). It can take into account non-uniform read generation and works with paired-end reads. Please cite: Turro, E.; Su, S-Y.; Goncalves, A.; Coin, L.J.M.; Richardson, S. and A., Lewin(2011). Haplotype and isoform specific expression estimation using multi-mapping RNA-seq reads. Genome Biology. 12:R13.
+
+.. _MMSEQ: http://www.bgx.org.uk/software/mmseq.html
+
+--------
+
+**Input formats**
+
+MMSEQ accepts sorted BAM file as input. The SAM files obtained as a result of Bowtie alignment can be converted to BAM files and sorted using the SAMTools.
+
+--------
+
+**Outputs**
+
+MMSEQ generates three output files with expectation maximization (EM) and Gibbs sampling (GS) expression estimates with associated Monte Carlo standard errors (MCSE) tabulated in each one of them (Turro et al., 2011).
+
+The first file provides estimates at the transcript/haplo-isoform level. The second file provides aggregate estimates for sets of transcripts that have been amalgamated due to having identical sequences (and so indistinguishable expression levels). The third file aggregates transcript estimates into genes, thus providing gene level estimates. Homozygous transcripts are aggregated together, while heterozygous transcripts are aggregated separately to produce 'haplo-gene' level estimates.	
+
+Out of the three outputs, the amalgamated estimates are recommended for use as the individual transcript estimates exhibit high variability and anti-correlation, but the total expression of two identical transcripts can be well estimated. 
+
+
+  </help>
+</tool>
--- a/mmseq/mmseq.sh	Thu Oct 27 19:00:59 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-#!/bin/sh
-PREFIX=temp
-FA=$(grep -P "index\t$2\t" $3/sam_fa_indices.loc | cut -f3)
-samtools view -bt $FA.fai -o $PREFIX.bam $1 2> /dev/null
-samtools sort -n $PREFIX.bam $PREFIX.namesorted 2> /dev/null
-bam2hits $FA $PREFIX.namesorted.bam > $PREFIX.hits 2> /dev/null
-mmseq $PREFIX.hits $PREFIX > /dev/null
-mv $PREFIX.mmseq $4
-mv $PREFIX.identical.mmseq $5
-mv $PREFIX.gene.mmseq $6
\ No newline at end of file
--- a/mmseq/mmseq.xml	Thu Oct 27 19:00:59 2011 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-<tool id="mmseq" name="MMSEQ">
-  <description>Haplotype and isoform specific expression estimation using multi-mapping RNA-seq reads</description>
-  <command interpreter="bash">
-    mmseq.sh
-      $alignments_sam
-      ${alignments_sam.metadata.dbkey}
-      ${GALAXY_DATA_INDEX_DIR}
-      $transcripts
-      $identical_transcripts
-      $genes
-  </command>
-  <inputs>
-    <param name="alignments_sam" type="data" format="sam" metadata_name="dbkey" label="SAM file of reads aligned to reference transcripts">
-       <validator type="unspecified_build"/>
-       <validator type="dataset_metadata_in_file" filename="sam_fa_indices.loc" metadata_name="dbkey" metadata_column="1" message="Sequences are 
-not currently available for the specified build." line_startswith="index"/>
-    </param>
-  </inputs>
-  <outputs>
-    <data format="tabular" name="transcripts" label="MMSEQ: transcript expression estimates"/>
-    <data format="tabular" name="identical_transcripts" label="MMSEQ: amalgamated transcript expression estimates"/>
-    <data format="tabular" name="genes" label="MMSEQ: gene expression estimates"/>
-  </outputs>
-  <help>
-
-**About MMSEQ**
-
-MMSEQ_ is a novel statistical RNA-seq analysis method for estimating haplotype, isoform and gene specific expression. It deconvolves the mapping of reads to multiple transcripts (isoforms or haplotype-specific isoforms). It can take into account non-uniform read generation and works with paired-end reads. Please cite: Turro, E.; Su, S-Y.; Goncalves, A.; Coin, L.J.M.; Richardson, S. and A., Lewin(2011). Haplotype and isoform specific expression estimation using multi-mapping RNA-seq reads. Genome Biology. 12:R13.
-
-.. _MMSEQ: http://www.bgx.org.uk/software/mmseq.html
-
---------
-
-**Input formats**
-
-MMSEQ accepts sorted BAM file as input. The SAM files obtained as a result of Bowtie alignment can be converted to BAM files and sorted using the SAMTools.
-
---------
-
-**Outputs**
-
-MMSEQ generates three output files with expectation maximization (EM) and Gibbs sampling (GS) expression estimates with associated Monte Carlo standard errors (MCSE) tabulated in each one of them (Turro et al., 2011).
-
-The first file provides estimates at the transcript/haplo-isoform level. The second file provides aggregate estimates for sets of transcripts that have been amalgamated due to having identical sequences (and so indistinguishable expression levels). The third file aggregates transcript estimates into genes, thus providing gene level estimates. Homozygous transcripts are aggregated together, while heterozygous transcripts are aggregated separately to produce 'haplo-gene' level estimates.	
-
-Out of the three outputs, the amalgamated estimates are recommended for use as the individual transcript estimates exhibit high variability and anti-correlation, but the total expression of two identical transcripts can be well estimated. 
-
-
-  </help>
-</tool>