changeset 12:61c47a1d6a7a

Add a test to check if a valid FASTA file is used (ribocount)
author Vimalkumar Velayudhan <vimal@biotechcoder.com>
date Wed, 19 Aug 2015 11:11:37 +0100
parents 7571f5c89090
children 8a87a2c44d09
files riboplot.egg-info/SOURCES.txt riboplot/ribocore.py tests/test_ribocount.py tests/test_riboplot.py
diffstat 4 files changed, 52 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/riboplot.egg-info/SOURCES.txt	Wed Aug 19 10:14:52 2015 +0100
+++ b/riboplot.egg-info/SOURCES.txt	Wed Aug 19 11:11:37 2015 +0100
@@ -30,7 +30,9 @@
 riboplot.egg-info/dependency_links.txt
 riboplot.egg-info/not-zip-safe
 riboplot.egg-info/top_level.txt
+tests/.test_ribocount.py.swp
 tests/__init__.py
+tests/test_ribocount.py
 tests/test_riboplot.py
 tests/.ropeproject/config.py
 tests/.ropeproject/globalnames
@@ -45,5 +47,7 @@
 tests/data/ribocount.html
 tests/data/ribocount_index.html
 tests/data/riboplot.html
+tests/data/unrelated.fna
+tests/data/unrelated.fna.fai
 tests/data/zebrafish.fna
 tests/data/zebrafish.fna.fai
\ No newline at end of file
--- a/riboplot/ribocore.py	Wed Aug 19 10:14:52 2015 +0100
+++ b/riboplot/ribocore.py	Wed Aug 19 11:11:37 2015 +0100
@@ -97,6 +97,14 @@
     return True
 
 
+def get_fasta_record(fasta):
+    """Return a single transcript name from a valid fasta file"""
+    f = pysam.FastaFile(fasta)
+    transcript = f.references[0]
+    f.close()
+    return transcript
+
+
 def get_fasta_records(fasta, transcripts):
     """Return list of transcript records from the given fasta file.
     Each record will be of the form {'sequence_id': {'sequence': 'AAA', 'length': 3}}
@@ -162,19 +170,26 @@
         create_bam_index(ribo_file)
 
     # Is FASTA file valid?
+    fasta_valid = False
     try:
         fasta_valid = is_fasta_valid(transcriptome_fasta)
     except IOError:
         log.error('Transcriptome FASTA file is not valid')
         raise
 
-    if fasta_valid and transcript_name:
-        try:
-            get_fasta_records(transcriptome_fasta, [transcript_name])
-        except IOError:
-            log.error('Transcriptome FASTA file is not valid')
-            raise
+    if fasta_valid:
+        if transcript_name:
+            try:
+                get_fasta_records(transcriptome_fasta, [transcript_name])
+            except IOError:
+                log.error('Could not get FASTA sequence of "{}" from transcriptome FASTA file'.format(transcript_name))
+                raise
+        else:
+            # ribocount doesn't have a transcript option so we get the first
+            # sequence name from the fasta file
+            transcript_name =  get_fasta_record(transcriptome_fasta)
 
+        # check if transcript also exists in BAM
         with pysam.AlignmentFile(ribo_file, 'rb') as bam_file:
             if transcript_name not in bam_file.references:
                 msg = 'Transcript "{}" does not exist in BAM file'.format(transcript_name)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test_ribocount.py	Wed Aug 19 11:11:37 2015 +0100
@@ -0,0 +1,25 @@
+import os
+import logging
+import unittest
+
+from riboplot import ribocore, riboplot, ribocount
+
+# use testing configuration
+CONFIG = ribocount.CONFIG = riboplot.config.TestingConfig()
+logging.disable(logging.CRITICAL)
+
+RIBO_FILE = os.path.join(CONFIG.DATA_DIR, '5hRPFsorted.bam')
+RNA_FILE = os.path.join(CONFIG.DATA_DIR, '5hmRNAsorted.bam')
+TRANSCRIPT_NAME = 'gi|148357119|ref|NM_001098396.1|'
+TRANSCRIPTOME_FASTA = os.path.join(CONFIG.DATA_DIR, 'zebrafish.fna')
+TRANSCRIPTOME_FASTA_MINUS1 = os.path.join(CONFIG.DATA_DIR, 'zebrafish_minus1.fna')
+UNRELATED_FASTA = os.path.join(CONFIG.DATA_DIR, 'unrelated.fna')
+
+
+class RiboCountTestCase(unittest.TestCase):
+
+    def test_unrelated_fasta_file(self):
+        """If an unrelated fasta file is used, raise an error"""
+        parser = ribocount.create_parser()
+        args = parser.parse_args(['-b', RIBO_FILE, '-f', UNRELATED_FASTA])
+        self.assertRaises(ribocore.ArgumentError, ribocount.main, args)
--- a/tests/test_riboplot.py	Wed Aug 19 10:14:52 2015 +0100
+++ b/tests/test_riboplot.py	Wed Aug 19 11:11:37 2015 +0100
@@ -118,6 +118,7 @@
         args = parser.parse_args(['-b', RIBO_FILE,  '-f', TRANSCRIPTOME_FASTA, '-t', TRANSCRIPT_NAME, '-n', TRANSCRIPTOME_FASTA])
         self.assertRaises(ValueError, ribocore.check_optional_arguments, ribo_file=args.ribo_file, rna_file=args.rna_file)
 
+
 class RiboPlotTestCase(unittest.TestCase):
 
     def test_get_codon_positions(self):
@@ -146,7 +147,7 @@
 
     def test_transcript_with_no_counts(self):
         """If the transcript has no ribocounts, no plot should be produced."""
-        transcript = 'gi|62955616|ref|NM_001017822.1|' # has no reads
+        transcript = 'gi|62955616|ref|NM_001017822.1|'  # has no reads
         output_dir = tempfile.mkdtemp()
         parser = riboplot.create_parser()
         args = parser.parse_args(['-b', RIBO_FILE, '-f', TRANSCRIPTOME_FASTA, '-t', transcript, '-o', output_dir])
@@ -154,13 +155,3 @@
         for fname in ('riboplot.png', 'riboplot.svg', 'RiboCounts.csv'):
             self.assertFalse(os.path.exists(os.path.join(output_dir, fname)))
         shutil.rmtree(output_dir)
-
-    @unittest.skip('todo')
-    def test_get_ribo_counts(self):
-        """Get RiboSeq read counts"""
-        pass
-
-    @unittest.skip('todo')
-    def test_write_ribo_counts(self):
-        """Write RiboSeq read counts as CSV."""
-        pass