# HG changeset patch # User Vimalkumar Velayudhan # Date 1439979097 -3600 # Node ID 61c47a1d6a7ae0d264f38586b2feca143b97e277 # Parent 7571f5c89090956ccc2832f76c71a3b8f3871de5 Add a test to check if a valid FASTA file is used (ribocount) diff -r 7571f5c89090 -r 61c47a1d6a7a riboplot.egg-info/SOURCES.txt --- a/riboplot.egg-info/SOURCES.txt Wed Aug 19 10:14:52 2015 +0100 +++ b/riboplot.egg-info/SOURCES.txt Wed Aug 19 11:11:37 2015 +0100 @@ -30,7 +30,9 @@ riboplot.egg-info/dependency_links.txt riboplot.egg-info/not-zip-safe riboplot.egg-info/top_level.txt +tests/.test_ribocount.py.swp tests/__init__.py +tests/test_ribocount.py tests/test_riboplot.py tests/.ropeproject/config.py tests/.ropeproject/globalnames @@ -45,5 +47,7 @@ tests/data/ribocount.html tests/data/ribocount_index.html tests/data/riboplot.html +tests/data/unrelated.fna +tests/data/unrelated.fna.fai tests/data/zebrafish.fna tests/data/zebrafish.fna.fai \ No newline at end of file diff -r 7571f5c89090 -r 61c47a1d6a7a riboplot/ribocore.py --- a/riboplot/ribocore.py Wed Aug 19 10:14:52 2015 +0100 +++ b/riboplot/ribocore.py Wed Aug 19 11:11:37 2015 +0100 @@ -97,6 +97,14 @@ return True +def get_fasta_record(fasta): + """Return a single transcript name from a valid fasta file""" + f = pysam.FastaFile(fasta) + transcript = f.references[0] + f.close() + return transcript + + def get_fasta_records(fasta, transcripts): """Return list of transcript records from the given fasta file. Each record will be of the form {'sequence_id': {'sequence': 'AAA', 'length': 3}} @@ -162,19 +170,26 @@ create_bam_index(ribo_file) # Is FASTA file valid? + fasta_valid = False try: fasta_valid = is_fasta_valid(transcriptome_fasta) except IOError: log.error('Transcriptome FASTA file is not valid') raise - if fasta_valid and transcript_name: - try: - get_fasta_records(transcriptome_fasta, [transcript_name]) - except IOError: - log.error('Transcriptome FASTA file is not valid') - raise + if fasta_valid: + if transcript_name: + try: + get_fasta_records(transcriptome_fasta, [transcript_name]) + except IOError: + log.error('Could not get FASTA sequence of "{}" from transcriptome FASTA file'.format(transcript_name)) + raise + else: + # ribocount doesn't have a transcript option so we get the first + # sequence name from the fasta file + transcript_name = get_fasta_record(transcriptome_fasta) + # check if transcript also exists in BAM with pysam.AlignmentFile(ribo_file, 'rb') as bam_file: if transcript_name not in bam_file.references: msg = 'Transcript "{}" does not exist in BAM file'.format(transcript_name) diff -r 7571f5c89090 -r 61c47a1d6a7a tests/test_ribocount.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tests/test_ribocount.py Wed Aug 19 11:11:37 2015 +0100 @@ -0,0 +1,25 @@ +import os +import logging +import unittest + +from riboplot import ribocore, riboplot, ribocount + +# use testing configuration +CONFIG = ribocount.CONFIG = riboplot.config.TestingConfig() +logging.disable(logging.CRITICAL) + +RIBO_FILE = os.path.join(CONFIG.DATA_DIR, '5hRPFsorted.bam') +RNA_FILE = os.path.join(CONFIG.DATA_DIR, '5hmRNAsorted.bam') +TRANSCRIPT_NAME = 'gi|148357119|ref|NM_001098396.1|' +TRANSCRIPTOME_FASTA = os.path.join(CONFIG.DATA_DIR, 'zebrafish.fna') +TRANSCRIPTOME_FASTA_MINUS1 = os.path.join(CONFIG.DATA_DIR, 'zebrafish_minus1.fna') +UNRELATED_FASTA = os.path.join(CONFIG.DATA_DIR, 'unrelated.fna') + + +class RiboCountTestCase(unittest.TestCase): + + def test_unrelated_fasta_file(self): + """If an unrelated fasta file is used, raise an error""" + parser = ribocount.create_parser() + args = parser.parse_args(['-b', RIBO_FILE, '-f', UNRELATED_FASTA]) + self.assertRaises(ribocore.ArgumentError, ribocount.main, args) diff -r 7571f5c89090 -r 61c47a1d6a7a tests/test_riboplot.py --- a/tests/test_riboplot.py Wed Aug 19 10:14:52 2015 +0100 +++ b/tests/test_riboplot.py Wed Aug 19 11:11:37 2015 +0100 @@ -118,6 +118,7 @@ args = parser.parse_args(['-b', RIBO_FILE, '-f', TRANSCRIPTOME_FASTA, '-t', TRANSCRIPT_NAME, '-n', TRANSCRIPTOME_FASTA]) self.assertRaises(ValueError, ribocore.check_optional_arguments, ribo_file=args.ribo_file, rna_file=args.rna_file) + class RiboPlotTestCase(unittest.TestCase): def test_get_codon_positions(self): @@ -146,7 +147,7 @@ def test_transcript_with_no_counts(self): """If the transcript has no ribocounts, no plot should be produced.""" - transcript = 'gi|62955616|ref|NM_001017822.1|' # has no reads + transcript = 'gi|62955616|ref|NM_001017822.1|' # has no reads output_dir = tempfile.mkdtemp() parser = riboplot.create_parser() args = parser.parse_args(['-b', RIBO_FILE, '-f', TRANSCRIPTOME_FASTA, '-t', transcript, '-o', output_dir]) @@ -154,13 +155,3 @@ for fname in ('riboplot.png', 'riboplot.svg', 'RiboCounts.csv'): self.assertFalse(os.path.exists(os.path.join(output_dir, fname))) shutil.rmtree(output_dir) - - @unittest.skip('todo') - def test_get_ribo_counts(self): - """Get RiboSeq read counts""" - pass - - @unittest.skip('todo') - def test_write_ribo_counts(self): - """Write RiboSeq read counts as CSV.""" - pass