Mercurial > repos > vimalkumarvelayudhan > riboplot
view riboplot/ribocore.py @ 7:096c6bbf4a04
Bugfix: (ribocounts) Read length was sent as rnafile argument in check_optional_arguments
author | Vimalkumar Velayudhan <vimal@biotechcoder.com> |
---|---|
date | Thu, 13 Aug 2015 15:03:20 +0100 |
parents | 2ffa8172dce1 |
children | 7571f5c89090 |
line wrap: on
line source
"""Common functions. """ import pysam import logging import subprocess class ArgumentError(Exception): """Raised when invalid arguments are sent in the command line.""" pass class RiboPlotError(Exception): """General errors relating to riboplot.""" pass class RiboCountError(Exception): """General errors relating to ribocount.""" pass class RNACountsError(Exception): """For errors related to RNA Coverage generation using bedtools. """ pass def is_bam_valid(bam_file): """Check if bam file is valid. Raises a ValueError if pysam cannot read the file. #TODO: pysam does not differentiate between BAM and SAM """ try: f = pysam.AlignmentFile(bam_file) except ValueError: raise else: f.close() return True def bam_has_index(bam_file): """Check if bam file has an index. Returns True/False.""" has_index = None with pysam.AlignmentFile(bam_file, 'rb') as bam_fileobj: try: bam_fileobj.fetch(bam_fileobj.references[0]) except ValueError as err: if err.message == 'fetch called on bamfile without index': bam_fileobj.close() has_index = False else: has_index = True return has_index def create_bam_index(bam_file): """Create an index for the given BAM file.""" pysam.index(bam_file) def get_bam_read_lengths(bam_file): """For a given BAM file, return an unique list of read lengths present.""" read_lengths = [] with pysam.AlignmentFile(bam_file, 'rb') as bam_fileobj: for record in bam_fileobj: read_lengths.append(record.query_length) return set(read_lengths) def is_fasta_valid(fasta_file): """Check if fasta file is valid. Raises a ValueError if pysam cannot read the file. #TODO: pysam does not differentiate between BAM and SAM """ try: f = pysam.FastaFile(fasta_file) except IOError: raise else: f.close() return True def get_fasta_records(fasta, transcripts): """Return list of transcript records from the given fasta file. Each record will be of the form {'sequence_id': {'sequence': 'AAA', 'length': 3}} trascripts should be provided as a list of sequence id's. """ records = {} f = pysam.FastaFile(fasta) for transcript in transcripts: try: sequence, length = f.fetch(transcript), f.get_reference_length(transcript) except KeyError: raise ArgumentError('Transcript "{}" does not exist in FASTA file "{}"'.format(transcript, fasta)) records[transcript] = {'sequence': sequence, 'length': length} f.close() return records def get_ribo_counts(ribo_fileobj, transcript_name, read_length=0): """Get total reads and read counts in all 3 frames for the give transcript from input BAM file (indexed). """ read_counts = {} total_reads = 0 for record in ribo_fileobj.fetch(transcript_name): if read_length: if record.query_length != read_length: continue total_reads += 1 position = record.pos + 1 try: read_counts[position] except KeyError: read_counts[position] = {1: 0, 2: 0, 3: 0} rem = position % 3 if rem == 0: read_counts[position][3] += 1 else: read_counts[position][rem] += 1 return read_counts, total_reads def check_required_arguments(ribo_file, transcriptome_fasta, transcript_name=None): """Check required arguments of both riboplot and ribocount.""" # Is this a valid BAM file? i.e., can pysam read it? try: is_bam_valid(ribo_file) except ValueError: logging.error('The given RiboSeq BAM file is not valid') raise # Does the BAM file have an index? If not, create it. if not bam_has_index(ribo_file): logging.info('Creating an index for the BAM file...') create_bam_index(ribo_file) # Is FASTA file valid? try: fasta_valid = is_fasta_valid(transcriptome_fasta) except IOError: logging.error('This FASTA file is not valid -> {}'.format(transcriptome_fasta)) if fasta_valid and transcript_name: try: get_fasta_records(transcriptome_fasta, [transcript_name]) except IOError: logging.error('This FASTA file is not valid -> {}'.format(transcriptome_fasta)) raise except ArgumentError: # does transcript exist in fasta? raise with pysam.AlignmentFile(ribo_file, 'rb') as bam_file: if transcript_name not in bam_file.references: raise ArgumentError('Transcript "{}" does not exist in BAM file'.format(transcript_name)) def check_optional_arguments(ribo_file, read_length=None, read_offset=None, rna_file=None): """Check all optional arguments.""" if rna_file: try: subprocess.check_output(['bedtools', '--version']) except OSError: logging.error('Could not find bedtools in PATH. bedtools is ' 'required for generating RNA coverage plot.') raise # Is this a valid BAM file? i.e., can pysam read it? try: is_bam_valid(rna_file) except ValueError: logging.error('The given RNASeq BAM file is not valid') raise # If read_length is given, it must be a positive integer or reads of that # length must exist in the BAM file if read_length: if read_length < 0: raise ArgumentError('Read length must be a positive value') bam_read_lengths = get_bam_read_lengths(ribo_file) if read_length not in bam_read_lengths: raise ArgumentError('Reads of the length "{}" does not exist in the BAM file'.format(read_length)) # If read_offset is given, it must be a positive integer if read_offset: if read_offset < 0: raise ArgumentError('Read offset must be 0 or greater')