# HG changeset patch # User vipints # Date 1381234534 14400 # Node ID 2fe512c7bfdfc527147305cb9d1686c1faff1b5f # Parent e27b4f7811c23f6bc5488829f082aecfaeb95b9b DESeq2 version 1.0.19 added to the repo diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/README Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,82 @@ +------------------------------------------------------ +DESeq2-hts: A Galaxy wrapper for DESeq2 version 1.0.19 +------------------------------------------------------ + +Description: +------------ + DESeq2 can be used as a web service embedded in a Galaxy instance. + We call it as DESeq2-hts. + +Requirements: +------------- + MATLAB/OCTAVE and Python :- Preprocessing of sequencing reads and GFF files + R, Bio-conductor package :- Required for DESeq2 + SCIPY, NUMPY :- for python + SAMTOOLS :- Sequencing read processing + +Contents: +--------- + ./src + All relevant scripts for DESeq-hts are located in the subdirectory + src. src/deseq2-hts.sh is the main script to start DESeq2-hts. The + preprocessing of BAM and GFF file start before the R DESeq2 script. + Please follow the shell script to understand the details. + + ./galaxy + Galaxy tool configuration file can be found galaxy folder. Please + make necessary editing for .xml file and remaining .sh files and + perform few tests. + + ./setup.sh + Setup script for DESeq2-hts. + + ./mex + matlab executable files. + + ./bin + Contains deseq2_config.sh file which is used for the configuration of + DESeq2-hts. According to your platform, the default file will be changed. + + ./test_data + This sub-directory contains all data for running a functional test in + Galaxy framework. You may need to move these test files into the test-data + directory. + + ./tools + A python based GFF parsing program. Also contains small utils programs. + +Getting started: +---------------- + Check for all requirements first, then + + a) Run ./setup.sh and setup paths and configuration options for DESeq2-hts. + + b) Inside the mex folder execute the make file to create platform dependent .mex files + cd mex/Makefile + make [interpreter] + make octave for octave + make matlab for matlab + make all for octave and matlab + + c) Edit the Galaxy tool configuration file to adjust the path if necessary. + +Licenses: +--------- + If **DESeq2** is used to obtain results for scientific publications it should be cited as [1]. + + This wrapper program (DESeq2-hts) is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the Free Software Foundation; + either version 3 of the License, or (at your option) any later version. + + Written (W) 2009-2012 Jonas Behr, Regina Bohnert, Andre Kahles, Gunnar Raetsch, Vipin T. Sreedharan + Copyright (C) 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany and + 2013 cBio Memorial Sloan Kettering Cancer Center, New York City, USA. + +References: +----------- + [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`. + +Contact: +-------- + vipin [at] cbio.mskcc.org + diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/bin/deseq2_config.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/bin/deseq2_config.sh Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,18 @@ +#!/bin/bash +export DESEQ2_VERSION=1.0.19 +export DESEQ2_PATH= +export DESEQ2_SRC_PATH=$DESEQ2_PATH/src +export DESEQ2_BIN_PATH=$DESEQ2_PATH/bin +export INTERPRETER= +export MATLAB_BIN_PATH= +export MATLAB_MEX_PATH= +export MATLAB_INCLUDE_DIR= +export OCTAVE_BIN_PATH= +export OCTAVE_MKOCT= +export SAMTOOLS_DIR= +export PYTHON_PATH=/usr/bin/python +export SCIPY_PATH= +export R_PATH= +export LD_LIBRARY_PATH= +export ENVIRONMENT=galaxy + diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/bin/deseq2_config.sh.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/bin/deseq2_config.sh.sample Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,17 @@ +#!/bin/bash +export DESEQ2_VERSION=1.0.19 +export DESEQ2_PATH=/home/galaxy/software/DESeq2-hts/ +export DESEQ2_SRC_PATH=$DESEQ2_PATH/src +export DESEQ2_BIN_PATH=$DESEQ2_PATH/bin +export INTERPRETER=octave +export MATLAB_BIN_PATH= +export MATLAB_MEX_PATH= +export MATLAB_INCLUDE_DIR= +export OCTAVE_BIN_PATH=/home/galaxy/software/octave/source/octave-3.6.3/octave +export OCTAVE_MKOCT=/home/galaxy/software/bin/mkoctfile +export SAMTOOLS_DIR=/home/galaxy/software/samtools-0.1.19/ +export PYTHON_PATH=/usr/bin/python +export SCIPY_PATH=/home/galaxy/software/lib/python2.6/site-packages/ +export R_PATH=/home/galaxy/software/bin/R +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/oqtansTools/oqtans_dep/octave-3.6.2_64/lib/octave/3.6.2/ +export ENVIRONMENT=galaxy diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/bin/genarglist.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/bin/genarglist.sh Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,13 @@ +#/bin/bash +## +# Copyright (C) 2009-2013 Max Planck Society and Memorial Sloan Kettering Cancer Center +## +until [ -z $1 ] ; do + if [ $# != 1 ]; + then + echo -n "'$1', " + else + echo -n "'$1'" + fi + shift +done diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/bin/genes_cell2struct --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/bin/genes_cell2struct Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,7 @@ +#!/bin/bash +# deseq-hts wrapper script to start the interpreter with the correct list of arguments +# Copyright (C) 2010-2012 Max Planck Society +set -e +PROG=`basename $0` +DIR=`dirname $0` +exec ${DIR}/start_interpreter.sh ${PROG} "`${DIR}/genarglist.sh $@`" diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/bin/get_read_counts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/bin/get_read_counts Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,7 @@ +#!/bin/bash +# deseq-hts wrapper script to start the interpreter with the correct list of arguments +# Copyright (C) 2010-2012 Max Planck Society +set -e +PROG=`basename $0` +DIR=`dirname $0` +exec ${DIR}/start_interpreter.sh ${PROG} "`${DIR}/genarglist.sh $@`" diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/bin/start_interpreter.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/bin/start_interpreter.sh Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,26 @@ +#/bin/bash +## +# Copyright (C) 2009-2013 Max Planck Society and Memorial Sloan Kettering Cancer Center +## + +set -e + +. `dirname $0`/deseq2_config.sh + +export MATLAB_RETURN_FILE=`mktemp` + +if [ "$INTERPRETER" == 'octave' ]; +then + echo exit | ${OCTAVE_BIN_PATH} --no-window-system --silent --eval "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ2_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Octave failed; rm -f $MATLAB_RETURN_FILE; exit -1) ; +fi + +if [ "$INTERPRETER" == 'matlab' ]; +then + echo exit | ${MATLAB_BIN_PATH} -nodisplay -r "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ2_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Matlab failed; rm -f $MATLAB_RETURN_FILE; exit -1) ; +fi + +test -f $MATLAB_RETURN_FILE || exit 0 +ret=`cat $MATLAB_RETURN_FILE` ; +rm -f $MATLAB_RETURN_FILE +exit $ret + diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/galaxy/deseq2.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/galaxy/deseq2.xml Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,106 @@ + + Differential gene expression analysis based on the negative binomial distribution + +./../src/deseq2-hts.sh $anno_input_selected $deseq_out $deseq_out.extra_files_path/gene_map.mat +$distype +#for $i in $replicate_groups +#for $j in $i.replicates +$j.bam_alignment:#slurp +#end for + +#end for + >> $Log_File + + + + + + + + + + + + + + + + + + + + + + + + ./deseq2-hts.sh ../test_data/deseq_c_elegans_WS200-I-regions.gff3 ../test_data/deseq_c_elegans_WS200-I-regions_deseq.txt ../test_data/genes.mat ../test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam ../test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam + + + + + + + + + + +.. class:: infomark + +**What it does** + +DESeq2_ Estimate variance-mean dependence in count data from high-throughput sequencing assays and test for differential expression based on a model using the negative binomial distribution. + +.. _DESeq2: http://bioconductor.org/packages/2.12/bioc/html/DESeq2.html + +`DESeq2` requires: + +Genome annotation in GFF file type, containing the necessary information about the transcripts that are to be quantified. + +The BAM alignment files grouped into replicate groups, each containing several replicates. BAM files store the read alignments, The program will also work with only two groups containing only a single replicate each. However, this analysis has less statistical power and is therefore not recommended! + +------ + +**Licenses** + +If **DESeq2** is used to obtain results for scientific publications it +should be cited as [1]_. + +**References** + +.. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_. + +.. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106 + +------ + +.. class:: infomark + +**About formats** + +**GFF/GTF format** General Feature Format/Gene Transfer Format is a format for describing genes and other features associated with DNA, RNA and protein sequences. GFF3 lines have nine tab-separated fields: + +1. seqid - The name of a chromosome or scaffold. +2. source - The program that generated this feature. +3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon". +4. start - The starting position of the feature in the sequence. The first base is numbered 1. +5. stop - The ending position of the feature (inclusive). +6. score - A score between 0 and 1000. If there is no score value, enter ".". +7. strand - Valid entries include '+', '-', or '.' (for don't know/care). +8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. +9. attributes - All lines with the same group are linked together into a single item. + +For more information see http://www.sequenceontology.org/gff3.shtml + +**BAM format** The Sequence Alignment/Map (SAM) format is a +tab-limited text format that stores large nucleotide sequence +alignments. BAM is the binary version of a SAM file that allows for +fast and intensive data processing. The format specification and the +description of SAMtools can be found on +http://samtools.sourceforge.net/. + +------ + +DESeq2-hts Wrapper Version 0.2 (Aug 2013) + + + diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/mex/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/mex/Makefile Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,38 @@ +include ../bin/deseq2_config.sh + +MEX=${MATLAB_MEX_PATH} +MKOCTFILE=${OCTAVE_MKOCT} +MATLAB_INCL=${MATLAB_INCLUDE_DIR} +SAMDIR=${SAMTOOLS_DIR} + + +all: get_reads.mex get_bam_properties.mex interval_overlap.mex get_reads.mexa64 get_bam_properties.mexa64 interval_overlap.mexa64 +octave: get_reads.mex get_bam_properties.mex interval_overlap.mex +matlab: get_reads.mexa64 get_bam_properties.mexa64 interval_overlap.mexa64 + + +get_reads.mexa64: get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp + rm -f *.o + ${MEX} -g -O get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp -I${SAMDIR} -L${SAMDIR} -lbam -lz -lcurses -I$(MATLAB_INCL) + +get_bam_properties.mexa64: get_bam_properties.cpp + rm -f *.o + ${MEX} -g -O get_bam_properties.cpp -I$(MATLAB_INCL) + +interval_overlap.mexa64: interval_overlap.cpp + ${MEX} -g -O interval_overlap.cpp -I$(MATLAB_INCL) + +get_reads.mex: get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp + rm -f *.o + ${MKOCTFILE} -g --mex get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp -I${SAMDIR} -L${SAMDIR} -lbam -lz -lcurses + +get_bam_properties.mex: get_bam_properties.cpp + rm -f *.o + ${MKOCTFILE} -g --mex get_bam_properties.cpp + +interval_overlap.mex: interval_overlap.cpp + rm -f *.o + ${MKOCTFILE} -g --mex interval_overlap.cpp + +clean: + rm -f *.o *.mexa64 *.mex diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/mex/get_bam_properties.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/mex/get_bam_properties.cpp Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,216 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2009-2011 Regina Bohnert +* Copyright (C) 2009-2011 Max Planck Society +*/ + + +#include +#include +#include +#include +#include + +#include + using std::vector; +#include + using std::string; +#include + using std::find; + using std::min; + +#include + + +char *get_string(const mxArray *prhs); + +typedef unsigned int uint32_t; +typedef unsigned char uint8_t; + +/* + * [read_len num_reads] = get_bam_properties(fname, path_samtools, contig_name) + * + * -- input -- + * prhs[0] file name of paired reads in BAM format (sorted by read id) + * prhs[1] path to samtools + * prhs[2] contig name + * + * -- output -- + * plhs[0] length of read + * plhs[1] number of unique reads +*/ +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { + // checks for the right number of arguments + if (nrhs !=3 || nlhs > 2) { + mexErrMsgTxt("number of input and output args should be 3 and 2\nUSAGE:\n [read_len, num_reads] = get_bam_properties(fname, path_samtools, contig_name)\n"); + return; + } + + signal(SIGCHLD, SIG_IGN); // avoid zombies + + // read input arguments + char *fname = get_string(prhs[0]); + char *path_samtools = get_string(prhs[1]); + char *contig_name = get_string(prhs[2]); + char command[10000]; + + sprintf(command, "%s./samtools view %s %s 2>/dev/null", path_samtools, fname, contig_name); + //printf("%s\n", command); + + // get number of unique reads + int status; + uint32_t num_unique_reads = 0; + char command2[10000]; + sprintf(command2, "%s | cut -f 1 | sort -u | wc -l", command); + FILE* fp = popen(command2, "r"); + if (fp == NULL) { + mexErrMsgTxt("Error using popen\n"); + } + int num_scans = 1; + num_scans = fscanf(fp, "%d", &num_unique_reads); + if (num_scans != 1) { + rewind(fp); + char ret[1000]; + fgets(ret, 1000, fp); + fprintf(stdout, "%s", ret); + mexErrMsgTxt("Could not determine number of reads\n"); + } + status = pclose(fp); + //printf("%i", num_unique_reads); + + // select reads for given positions and strand + int num_rows_selected = min((int) num_unique_reads, 100); + sprintf(command, "%s | head -n %i | cut -f 1-11", command, num_rows_selected); + fp = popen(command, "r"); + if (fp == NULL) { + mexErrMsgTxt("Error using popen\n"); + } + /* SAM format + 1: read id, 2: flag, 3: reference name, 4: start (1-based, incl.), 5: mapping quality, + 6: CIGAR, 7: mate reference name, 8: mate start (1-based, incl.), 9: insert size, 10: read, 11: quality + 12+: additional tags + */ + uint32_t read_idx = 0, row_idx = 0, num_col = 0; + uint32_t flag = 0, start_pos = 0, map_score = 0, mate_end_pos = 0, num_matches = 0, num_del = 0, num_ins = 0, ins_size = 0; + char ri [1000], read_contig_name [1000], cg [1000], mate_read_id [1000], read [1000], read_qual [1000]; + string last_read_id; + vector block_lengths, block_starts; + vector read_ids; + vector::iterator it; + + uint32_t read_len = 0; + bool empty_line = true; + int num_rows = 0; + while(empty_line && num_rows < num_rows_selected) { + num_col = fscanf(fp, "%s\t%i\t%s\t%i\t%i\t%s\t%s\t%i\t%i\t%s\t%s", &ri, &flag, &read_contig_name, &start_pos, &map_score, &cg, &mate_read_id, &mate_end_pos, &ins_size, &read, &read_qual); + if (num_col != 11) { + mexErrMsgTxt("error reading SAM line\n"); + } + + string cigar = (string) cg; + // ignore lines with reads w/o mapping information + if (start_pos == 0 || cigar.compare("*")==0) { + continue; + } + // parse CIGAR + uint last_c = 0; + string last_str; + num_matches = 0; + char *end = NULL; + uint32_t tmp_nm = 0, tmp_nd = 0, tmp_ni = 0; + uint32_t last_block_start = 0, last_block_length = 0, last_intron_len = 0; + block_lengths.clear(); block_starts.clear(); + + for (uint c = 0; c < cigar.size(); c++) { + switch (cigar[c]) { + case 'M': + last_str = cigar.substr(last_c, c-last_c); + tmp_nm = strtoul(last_str.c_str(), &end, 10); + if (*end != '\0') + mexErrMsgTxt("error: number of mismatches\n"); + end = NULL; + last_block_length += tmp_nm; + num_matches += tmp_nm; + last_c = c + 1; + break; + case 'I': + last_str = cigar.substr(last_c, c-last_c); + tmp_ni = strtoul(last_str.c_str(), &end, 10); + if (*end != '\0') + mexErrMsgTxt("error: number of insertions\n"); + end = NULL; + num_ins += tmp_ni; + last_c = c + 1; + break; + case 'D': + last_str = cigar.substr(last_c, c-last_c); + tmp_nd = strtoul(last_str.c_str(), &end, 10); + if (*end != '\0') + mexErrMsgTxt("error: number of deletions\n"); + end = NULL; + num_del += tmp_nd; + last_block_length += tmp_nd; + last_c = c + 1; + break; + case 'N': + last_str = cigar.substr(last_c, c-last_c); + last_intron_len = strtoul(last_str.c_str(), &end, 10); + end = NULL; + last_c = c + 1; + break; + case 'S': + break; + case 'H': + break; + case 'P': + break; + default: + break; + } + if (cigar[c] == 'N' || c==cigar.size()-1) { + block_starts.push_back(last_block_start); + last_block_start = last_block_start + last_block_length + last_intron_len; + last_intron_len = 0; + block_lengths.push_back(last_block_length); + last_block_length = 0; + } + } + read_len = 0; + for (uint n = 0; n < block_lengths.size(); n++) { + read_len += block_lengths[n]; + } + empty_line = false; + } // end of stream parsing + + status = pclose(fp); + + if (empty_line) + mexErrMsgTxt("Could not determine read length\n"); + + plhs[0] = mxCreateDoubleScalar((double) read_len); + plhs[1] = mxCreateDoubleScalar((double) num_unique_reads); + + return; +} + + +char *get_string(const mxArray *prhs) { + char *buf; + int buflen; + if (!prhs) + mexErrMsgTxt("get_string called with NULL pointer arg"); + if (!mxIsChar(prhs)) + mexErrMsgTxt("input is not a string"); + if (mxGetM(prhs) != 1) + mexErrMsgTxt("input is not a row vector"); + buflen = mxGetN(prhs) + 1; + buf = (char*) malloc(buflen); + /* copy the string from prhs into buf and add terminating NULL char */ + if (mxGetString(prhs, buf, buflen)) + mexErrMsgTxt("not enough space"); + return buf; +} diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/mex/get_reads.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/mex/get_reads.cpp Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,293 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include +#include +#include +#include +#include +#include + using std::vector; +#include "get_reads_direct.h" +#include "mex_input.h" +#include "read.h" + +#define MAXLINE 10000 + +/* + * input: + * 1 bam file + * 2 chromosome + * 3 region start (1-based index) + * 4 region end (1-based index) + * 5 strand (either '+' or '-' or '0') + * [6] collapse flag: if true the reads are collapsed to a coverage track + * [7] subsample percentage: percentage of reads to be subsampled (in per mill) + * [8] intron length filter + * [9] exon length filter + * [10] mismatch filter + * [11] bool: use mapped reads for coverage + * [12] bool: use spliced reads for coverage + * [13] return maxminlen + * [14] return pair coverage + * + * output: + * 1 coverage + * [2] intron cell array + * [3] pair coverage + * [4] pair list + * + * example call: + * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30); + */ +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) { + + if (nrhs<5 || nrhs>14 || (nlhs<1 || nlhs>4)) { + fprintf(stderr, "usage: [x [introns] [pair]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches], [mapped], [spliced], [maxminlen], [pair]);\n"); + return; + } + + /* obligatory arguments + * **********************/ + char *fname = get_string(prhs[0]); + //fprintf(stdout, "arg1: %s\n", fname); + char *chr = get_string(prhs[1]); + //fprintf(stdout, "arg2: %s\n", chr); + int from_pos = get_int(prhs[2]); + //fprintf(stdout, "arg3: %d\n", from_pos); + int to_pos = get_int(prhs[3]); + //fprintf(stdout, "arg4: %d\n", to_pos); + char *strand = get_string(prhs[4]); + //fprintf(stdout, "arg5: %s\n", strand); + + if (from_pos>to_pos) + mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n"); + + if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0') + mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0"); + + /* optional arguments + * ******************/ + int collapse = 0; + if (nrhs>=6) + collapse = get_int(prhs[5]); + + int subsample = 1000; + if (nrhs>=7) + subsample = get_int(prhs[6]); + + int intron_len_filter = 1e9; + if (nrhs>=8) + intron_len_filter = get_int(prhs[7]); + + int exon_len_filter = -1; + if (nrhs>=9) + exon_len_filter = get_int(prhs[8]); + + int filter_mismatch = 1e9; + if (nrhs>=10) + filter_mismatch = get_int(prhs[9]); + + int mapped = 1; + if (nrhs>=11) + mapped = get_int(prhs[10]); + + int spliced = 1; + if (nrhs>=12) + spliced = get_int(prhs[11]); + + int maxminlen = 0; + if (nrhs>=13) + maxminlen = get_int(prhs[12]); + + int pair_cov = 0; + if (nrhs>=14) + pair_cov = get_int(prhs[13]); + + /* call function to get reads + * **************************/ + char region[MAXLINE]; + sprintf(region, "%s:%i-%i", chr, from_pos, to_pos); + + vector all_reads; + + get_reads_from_bam(fname, region, &all_reads, strand[0], subsample); + + /* filter reads + * **************/ + int left = 0; + int right = 0; + + vector reads; + for (int i=0; ileft) + left++; + if (all_reads[i]->right) + right++; + if (all_reads[i]->max_intron_len()min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch) + reads.push_back(all_reads[i]); + } + + + /* prepare output + * **************/ + int num_rows = reads.size(); + int num_pos = to_pos-from_pos+1; + + if (pair_cov==1 && nlhs>=3) { + // sort reads by read_id + printf("\n\nleft:%i right:%i \n\n", left, right); + sort(reads.begin(), reads.end(), CRead::compare_by_read_id); + } + + // read coverages collapsed + if (collapse) { + plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL); + uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]); + if (num_pos>0 && mask_ret==NULL) + mexErrMsgTxt("Error allocating memory\n"); + if (mapped && spliced) { + for (int i=0; iget_coverage(from_pos, to_pos, mask_ret); + } + } else { + for (int i=0; iblock_starts.size(); + if ((num_exons==1 && mapped) || (num_exons>1 && spliced)) + reads[i]->get_coverage(from_pos, to_pos, mask_ret); + } + } + } + // reads not collapsed + else { + uint32_t nzmax = 0; // maximal number of nonzero elements + int len = to_pos-from_pos+1; + for (uint i=0; iblock_starts.size(); n++) { + uint32_t from, to; + if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0) + from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos; + else + from = 0; + if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0) + to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n]; + else + to = 0; + for (int bp=from; bp0 && mask_ret==NULL) + mexErrMsgTxt("Error allocating memory\n"); + uint32_t mask_ret_c = 0; // counter + for (uint i=0; iget_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i); + } + if (mask_ret_c!=2*nzmax) + mexErrMsgTxt("Error filling index arrays for sparse matrix\n"); + } + // introns + if (maxminlen==0 && nlhs>=2) { + vector intron_list; + for (int i=0; iget_introns(&intron_list); + } + + plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL); + uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]); + for (int p = 0; p=2) { + vector intron_starts; + vector intron_ends; + vector block_len1; + vector block_len2; + for (int i=0; iget_introns(&intron_starts, &intron_ends, &block_len1, &block_len2); + } + + plhs[1] = mxCreateNumericMatrix(4, intron_starts.size(), mxINT32_CLASS, mxREAL); + uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]); + for (int p = 0; p=3) { + plhs[2] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL); + uint32_t *p_pair_map = (uint32_t*) mxGetData(plhs[2]); + if (num_pos>0 && p_pair_map==NULL) + mexErrMsgTxt("Error allocating memory\n"); + + vector pair_ids; + + int take_cnt = 0; + int discard_cnt = 0; + // find consecutive reads with the same id + for (int i=0; i<((int) reads.size())-1; i++) { + int j = i+1; + while(jread_id, reads[j]->read_id) == 0) { + if ((reads[i]->left && reads[j]->right) || (reads[j]->left && reads[i]->right) && (reads[i]->reverse != reads[j]->reverse)) { + if (reads[i]->get_last_position()==-1 || reads[j]->get_last_position()==-1) + break; + if (reads[i]->get_last_position()start_pos && reads[j]->start_pos-reads[i]->get_last_position()<60000) { + int from = std::max(0, reads[i]->get_last_position()-from_pos); + int to = std::min(num_pos-1, reads[j]->start_pos-from_pos); + pair_ids.push_back(i); + pair_ids.push_back(j); + for (int k=from; kstart_pos>reads[j]->get_last_position() && reads[j]->get_last_position()-reads[i]->start_pos<60000) { + int from = std::max(0, reads[j]->get_last_position()-from_pos); + int to = std::min(num_pos-1, reads[i]->start_pos-from_pos); + pair_ids.push_back(i); + pair_ids.push_back(j); + for (int k=from; k=4) { + plhs[3] = mxCreateNumericMatrix(2, pair_ids.size()/2, mxUINT32_CLASS, mxREAL); + uint32_t *pair_ids_ret = (uint32_t*) mxGetData(plhs[3]); + if (pair_ids.size()>0 && pair_ids_ret==NULL) + mexErrMsgTxt("Error allocating memory\n"); + for (int i=0; i +#include +#include "sam.h" +#include "get_reads_direct.h" + +#include + using std::vector; +#include + using std::string; + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + int beg, end; + samfile_t *in; +} tmpstruct_t; + +typedef struct { + uint64_t u, v; +} pair64_t; + +static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b) +{ + uint32_t rbeg = b->core.pos; + uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1; + return (rend > beg && rbeg < end); +} + +pair64_t * get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int* cnt_off); + + int bam_fetch_reads(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_header_t* header, vector* reads, char strand); + +// callback for bam_plbuf_init() +static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) +{ + //tmpstruct_t *tmp = (tmpstruct_t*)data; + //if ((int)pos >= tmp->beg && (int)pos < tmp->end) + // printf("%s\t%d\t%d\n", tmp->in->header->target_name[tid], pos + 1, n); + return 0; +} +#ifdef __cplusplus +} +#endif +int parse_sam_line(char* line, CRead* read); +//int set_strand(char c); +//void parse_cigar(bam1_t* b, CRead* read); +void parse_cigar(bam1_t* b, CRead* read, bam_header_t* header); + + +int get_reads_from_bam(char* filename, char* region, vector* reads, char strand, int lsubsample) +{ + subsample = lsubsample; + //set_strand(strand); + + srand (time(NULL)); + //srand (1234); + tmpstruct_t tmp; + tmp.in = samopen(filename, "rb", 0); + if (tmp.in == 0) { + fprintf(stderr, "Fail to open BAM file %s\n", filename); + return 1; + } + int ref; + bam_index_t *idx; + bam_plbuf_t *buf; + idx = bam_index_load(filename); // load BAM index + if (idx == 0) { + fprintf(stderr, "BAM indexing file is not available.\n"); + return 1; + } + bam_parse_region(tmp.in->header, region, &ref, + &tmp.beg, &tmp.end); // parse the region + if (ref < 0) { + fprintf(stderr, "Invalid region %s\n", region); + return 1; + } + + buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup + + bam_fetch_reads(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, buf, tmp.in->header, reads, strand); + //fprintf(stdout, "intron_list: %d \n", intron_list->size()); + + bam_plbuf_push(0, buf); // finalize pileup + bam_index_destroy(idx); + bam_plbuf_destroy(buf); + samclose(tmp.in); + return 0; +} + + +int bam_fetch_reads(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_header_t* header, vector* reads, char strand) +{ + int n_off; + pair64_t *off = get_chunk_coordinates(idx, tid, beg, end, &n_off); + if (off == 0) return 0; + { + // retrive alignments + uint64_t curr_off; + int i, ret, n_seeks; + n_seeks = 0; i = -1; curr_off = 0; + bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t)); + for (;;) { + if (curr_off == 0 || curr_off >= off[i].v) { // then jump to the next chunk + if (i == n_off - 1) break; // no more chunks + if (i >= 0) assert(curr_off == off[i].v); // otherwise bug + if (i < 0 || off[i].v != off[i+1].u) { // not adjacent chunks; then seek + bam_seek(fp, off[i+1].u, SEEK_SET); + curr_off = bam_tell(fp); + ++n_seeks; + } + ++i; + } + if ((ret = bam_read1(fp, b)) > 0) { + curr_off = bam_tell(fp); + if (b->core.tid != tid || b->core.pos >= end) break; // no need to proceed + else if (is_overlap(beg, end, b)) + { + int rr = rand(); + if ((rr%1000 < subsample)) + { + CRead* read = new CRead(); + parse_cigar(b, read, header); + + if (strand == '0' || strand==read->strand[0] || read->strand[0]=='0') + { + read->left = (b->core.flag & left_flag_mask) >0; + read->right = (b->core.flag & right_flag_mask) >0; + read->reverse = (b->core.flag & reverse_flag_mask) >0; + reads->push_back(read); + } + else + { + delete read; + } + //else if (read->strand[0]=='0'&&((b->core.flag & g_flag_off) >0)) + //{ + // //fprintf(stdout, "(-)-strand; read->strand[0]==0, num_exons: %i \n", read->block_starts.size()); + // // this flag means that the read has been reversed for alignment + // // flag bit set and (-)-strand requested + // reads->push_back(read); + //} + //else if (read->strand[0]=='0'&&(g_flag_on>0&&(b->core.flag & g_flag_on)==0)) + //{ + // //fprintf(stdout, "(+)-strand; read->strand[0]==0, num_exons: %i \n", read->block_starts.size()); + // // (+)-strand requested and flag bit not set + // reads->push_back(read); + //} + } + } + } else break; // end of file + } +// fprintf(stderr, "[bam_fetch] # seek calls: %d\n", n_seeks); + bam_destroy1(b); + } + free(off); + return 0; +} + +void parse_cigar(bam1_t* b, CRead* read, bam_header_t* header) +{ + read->start_pos = b->core.pos+1; + read->set_strand('0'); + read->read_id = new char[100]; + sprintf(read->read_id, "%s\0", bam1_qname(b)); + + for (int k = 0; k < b->core.n_cigar; ++k) + { + int op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation + int l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length + //fprintf(stdout, "op:%d l:%d\n", op, l); + if (op == BAM_CMATCH) + { + if (k==0) + { + read->block_lengths.push_back(l); + read->block_starts.push_back(0); + } + else + { + int op_prev = bam1_cigar(b)[k-1] & BAM_CIGAR_MASK; + int l_prev = bam1_cigar(b)[k-1] >> BAM_CIGAR_SHIFT; + if (op_prev==BAM_CREF_SKIP)// intron before + { + if (read->block_lengths.size()>=1) + { + int last_block_start = (*(read->block_starts.end()-1)); + int intron_start = last_block_start+(*(read->block_lengths.end()-1)); + read->block_lengths.push_back(l); + read->block_starts.push_back(intron_start+l_prev); + } + else + { + // start of first block was not a match + read->block_lengths.push_back(l); + read->block_starts.push_back(0); + } + } + else + { + if (read->block_lengths.size()>=1 && op == BAM_CDEL)// if it is an insertion then the matching block is not inreased + (*(read->block_lengths.end()-1))+=l; + else + { + //char *samline = bam_format1(header, b); + //printf("header: %s \n", samline); + } + } + } + } + else if (op == BAM_CDEL) + { + if (k>0 && read->block_lengths.size()>=1) + (*(read->block_lengths.end()-1))+=l; + } + else if (op == BAM_CREF_SKIP)//intron + {} + else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) + {} + } + // parse auxiliary data + uint8_t* s = bam1_aux(b); + uint8_t* end = b->data + b->data_len; + while (s < end) + { + uint8_t type, key[2]; + key[0] = s[0]; key[1] = s[1]; + s += 2; type = *s; ++s; + //fprintf(stdout, "\n%c%c:%c\n", key[0], key[1], type); + if (type == 'A') + { + if ( key[0] =='X' && key[1] == 'S') + { + read->set_strand((char) *s); + } + ++s; + } + else if (type == 'C') + { + if ( key[0] =='H' && key[1] == '0') + { + uint8_t matches = *s; + read->matches = (int) matches; + } + if ( key[0] =='N' && key[1] == 'M') + { + uint8_t mismatches = *s; + read->mismatches = (int) mismatches; + } + if ( key[0] =='H' && key[1] == 'I') + { + uint8_t mai = *s; + read->multiple_alignment_index = (int) mai; + } + + ++s; + } + else if (type == 'c') { ++s; } + else if (type == 'S') { s += 2; } + else if (type == 's') { s += 2; } + else if (type == 'I') { s += 4; } + else if (type == 'i') { s += 4; } + else if (type == 'f') { s += 4; } + else if (type == 'd') { s += 8; } + else if (type == 'Z') { ++s; } + else if (type == 'H') { ++s; } + } +} + +//int set_strand(char c) +//{ +// if (c=='+') +// { +// char* fl = (char*) "0x0010"; +// g_flag_on = strtol(fl, 0, 0); +// g_flag_off = 0; +// } +// else if (c=='-') +// { +// char* fl = (char*) "0x0010"; +// g_flag_off = strtol(fl, 0, 0); +// g_flag_on = 0; +// } +// return 0; +//} + diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/mex/get_reads_direct.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/mex/get_reads_direct.h Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,29 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#ifndef __GET_READS_DIRECT_H__ +#define __GET_READS_DIRECT_H__ + +#include + using std::vector; +#include "read.h" + +//static int g_flag_on = 0, g_flag_off = 0; +static int left_flag_mask = strtol((char*) "0x40", 0, 0); +static int right_flag_mask = strtol((char*) "0x80", 0, 0); +static int reverse_flag_mask = strtol((char*) "0x10", 0, 0); + +static int subsample = 1000; +//static int collapse = 0; + +int get_reads_from_bam(char* filename, char* region, vector* reads, char strand, int lsubsample); + +#endif diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/mex/interval_overlap.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/mex/interval_overlap.cpp Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,217 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + using std::vector; +#include + using std::sort; + using std::min; + using std::max; + +typedef struct { + int start; + int stop; + int idx; + int set_id; +} interval_t; + +bool compare (interval_t i, interval_t j) +{ + return (i.start= 1); +} +bool leftOf(interval_t a, interval_t b) +{ + return (a.stop < b.start); +} + +void scan(interval_t f, vector* Wf, interval_t g, vector* Wg, vector* overlap) +{ + vector::iterator i; + i=Wg->begin(); + while (iend()) + { + interval_t g2 = *i; + if (leftOf(g2,f)) + { + Wg->erase(i);// inefficient if Wg is large + // this moves all elements, therefore i is not incremented + } + else if (overlaps(g2,f)) + { + if (g2.set_id==1) + { + //printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", g2.idx, g2.start, g2.stop, f.idx, f.start, f.stop); + overlap->push_back(g2.idx); + overlap->push_back(f.idx); + } + else if (f.set_id==1) + { + //printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", f.idx, f.start, f.stop, g2.idx, g2.start, g2.stop); + overlap->push_back(f.idx); + overlap->push_back(g2.idx); + } + i++; + } + else + { + printf("never happens??\n"); + i++; + } + } + if (!leftOf(f, g)) + { + Wf->push_back(f); + //printf("push: [%i, %i] size:%i\n", f.start, f.stop, Wf->size()); + } +} + +/* + * prhs[0] first interval set starts + * prhs[1] first interval set stops + * prhs[2] second interval set starts + * prhs[3] second interval set stops + * + * return: + * plhs[0] one based index in first interval set overlapping with a interval in the second set + * plhs[1] corresponding index in the second set + * +*/ +void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) +{ + if (nrhs!=4) + mexErrMsgTxt("Expected 4 arguments: starts1, stops1, starts2, stops2 \n"); + if (nlhs!=2) + mexErrMsgTxt("Expected 2 output arguments \n"); + + int num_intervals1 = mxGetNumberOfElements(prhs[0]); + assert(num_intervals1 == mxGetNumberOfElements(prhs[1])); + int num_intervals2 = mxGetNumberOfElements(prhs[2]); + assert(num_intervals2 == mxGetNumberOfElements(prhs[3])); + + //printf("num_intervals1: %i\n", num_intervals1); + //printf("num_intervals2: %i\n", num_intervals2); + + double* starts1 = mxGetPr(prhs[0]); + double* stops1 = mxGetPr(prhs[1]); + double* starts2 = mxGetPr(prhs[2]); + double* stops2 = mxGetPr(prhs[3]); + + vector intervals1; + for (int i=0; i::max(); + i.stop = std::numeric_limits::max(); + i.set_id = std::numeric_limits::max(); + i.idx = std::numeric_limits::max(); + intervals1.push_back(i); + + //printf("num_intervals1: %i\n", intervals1.size()); + vector intervals2; + for (int i=0; i overlap; + vector Wx; + vector Wy; + vector::iterator x = intervals1.begin(); + vector::iterator y = intervals2.begin(); + while (x::iterator x; + //vector::iterator y; + //if (it1>intervals1.end()) + // x = inf_interval(); + //else + // x = it1; + //if (it2>intervals2.end()) + // y = inf_interval(); + //else + // y=it2; + + if (x->start <= y->start) + { + scan(*x, &Wx, *y, &Wy, &overlap); + x++; + } + else + { + if (y<=intervals2.end()) + { + scan(*y, &Wy, *x, &Wx, &overlap); + y++; + } + } + } + + plhs[0] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL); + double* idx1 = mxGetPr(plhs[0]); + + plhs[1] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL); + double* idx2 = mxGetPr(plhs[1]); + + for (int i=0; i +#include +#include "mex_input.h" + +char *get_string(const mxArray *prhs) { + char *buf; + int buflen; + if (!prhs) + mexErrMsgTxt("get_string called with NULL pointer arg"); + if (!mxIsChar(prhs)) + mexErrMsgTxt("input is not a string"); + if (mxGetM(prhs) != 1) + mexErrMsgTxt("input is not a row vector"); + buflen = mxGetN(prhs) + 1; + buf = (char*) malloc(buflen); + /* copy the string from prhs into buf and add terminating NULL char */ + if (mxGetString(prhs, buf, buflen)) + mexErrMsgTxt("not enough space"); + return buf; +} + +bool get_bool(const mxArray *prhs) +{ + const int M = mxGetM(prhs); + const int N = mxGetN(prhs); + double *f = (double*) mxGetPr(prhs); + + if (!prhs) + mexErrMsgTxt("Arg is NULL pointer"); + if (M != 1 || N != 1) + mexErrMsgTxt("Arg is not a scalar"); + if (f[0] != 0) + return true; + return false; +} + +int get_int(const mxArray *prhs) +{ + const int M = mxGetM(prhs); + const int N = mxGetN(prhs); + double *f = (double*) mxGetPr(prhs); + + if (!prhs) + mexErrMsgTxt("Arg is NULL pointer"); + if (M != 1 || N != 1) + mexErrMsgTxt("Arg is not a scalar"); + + return (int) f[0]; +} diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/mex/mex_input.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/mex/mex_input.h Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,20 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include +#include + +#ifndef __MEX_INPUT_h__ +#define __MEX_INPUT_h__ + char *get_string(const mxArray *prhs); + bool get_bool(const mxArray *prhs); + int get_int(const mxArray *prhs); +#endif diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/mex/read.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/mex/read.cpp Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,214 @@ +/* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 3 of the License, or +* (at your option) any later version. +* +* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch +* Copyright (C) 2010-2011 Max Planck Society +*/ + + +#include "read.h" + +CRead::CRead() { + read_id = NULL; + sam_line = NULL; + start_pos = 0; + matches = 0; + mismatches = 0; + multiple_alignment_index = 0; + strand = NULL; + left = false; + right = false; + reverse = false; +} + +CRead::~CRead() { + delete[] read_id; + delete[] sam_line; + delete[] strand; +} + +/* + * Augments 'coverage' array at the positions covered by the read in the queried interval. + */ +void CRead::get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage) +{ + // block1 block2 + // |=====|======|============|===========|======|====| + // ^ ^ ^ + // p_start_pos | p_end_pos + // start_pos + // |0000001111111111111000000000000111111100000| + // *coverage + int len = p_end_pos-p_start_pos+1; + for (uint32_t n = 0; n < block_starts.size(); n++) { + int32_t from, to; + from = block_starts[n]+start_pos-p_start_pos; + to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; + if (from < 0) + from = 0; + if (to < 0) + continue; + else if (to > len) + to = len; + for (int bp=from; bp0) // this if for some reason zero in case of softclips + return start_pos+block_starts.back()+block_lengths.back(); + return -1; +} + +/* + * Adds the column indices (= positions) covered by the read to 'reads' array in current row (= read). + * These indices can be used to build up a sparse matrix of reads x positions. + */ +void CRead::get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx) { + int len = p_end_pos-p_start_pos+1; + for (uint32_t n = 0; n < block_starts.size(); n++) { + uint32_t from, to; + if (block_starts[n]+start_pos-p_start_pos >= 0) + from = block_starts[n]+start_pos-p_start_pos; + else + from = 0; + if (block_starts[n]+start_pos-p_start_pos+block_lengths[n] >= 0) + to = block_starts[n]+start_pos-p_start_pos+block_lengths[n]; + else + to = 0; + for (int bp=from; bp* acc_pos) +{ + if (strand[0]=='+') + { + for (int k=1;kpush_back(start_pos+block_starts[k]-1); + } + else if (strand[0]=='-') + { + for (int k=1;kpush_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); + } +} + +void CRead::get_don_splice_sites(vector* don_pos) +{ + + if (strand[0]=='+') + { + for (int k=1;kpush_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2); + } + else if (strand[0]=='-') + { + for (int k=1;kpush_back(start_pos+block_starts[k]-1); + } +} + +int CRead::min_exon_len() +{ + int min = 1e8; + for (int k=0;kmax) + max = block_starts[k]-(block_starts[k-1]+block_lengths[k-1]); + return max; +} + +/* + * Adds start and end of introns in the read consecutively to the 'introns' vector. + */ +void CRead::get_introns(vector* introns) +{ + for (int i=1; ipush_back(istart); + introns->push_back(iend); + //fprintf(stdout, "%i intron: %d->%d\n", i, istart, iend); + } +} +void CRead::get_introns(vector* intron_starts, vector* intron_ends, vector* block_len1, vector* block_len2) +{ + for (int i=1; ipush_back(istart); + intron_ends->push_back(iend); + block_len1->push_back(block_lengths[i-1]) ; + block_len2->push_back(block_lengths[i]) ; + } +} + +bool CRead::operator==(const CRead& read) const +{ + if (block_starts.size()!=read.block_starts.size()) + return false; + if (block_lengths.size()!=read.block_lengths.size()) + return false; + if (start_pos!=read.start_pos) + return false; + if (strand[0] != read.strand[0]) + return false; + for (int i=0; i +#include +#include +#include + using std::vector; + + +class CRead { + public: + /** constructor + */ + CRead(); + ~CRead(); + + vector block_starts; + vector block_lengths; + char* read_id; + char* sam_line; + int start_pos; + char * strand; + int matches; + int mismatches; + int multiple_alignment_index; + bool left; + bool right; + bool reverse; + + void get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage); + int get_last_position(); + void get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx); + void get_introns(vector* introns); + void get_introns(vector* intron_starts, vector* intron_ends, vector* block_len1, vector* block_len2); + void get_acc_splice_sites(vector* acc_pos); + void get_don_splice_sites(vector* acc_pos); + int max_intron_len(); + int min_exon_len(); + bool operator==(const CRead& read) const; + void print(); + void set_strand(char s); + int get_mismatches(); + static bool compare_by_read_id(const CRead* read1, const CRead* read2) + { + if (!read1->read_id) + return true; + if (!read2->read_id) + return false; + + int cnt1=0; + while (read1->read_id[cnt1]!='\0') + cnt1++; + int cnt2 = 0; + while (read2->read_id[cnt2]!='\0') + cnt2++; + + return std::lexicographical_compare(read1->read_id,read1->read_id+cnt1,read2->read_id,read2->read_id+cnt2); + }; +}; +#endif diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/setup_deseq2-hts.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/setup_deseq2-hts.sh Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,192 @@ +#!/bin/bash +set -e + +DIR=`dirname $0` +. ${DIR}/./bin/deseq2_config.sh + +echo ========================================== +echo DESeq2-hts setup script \(DESeq2 version $DESEQ2_VERSION\) +echo ========================================== +echo +echo DESeq2-hts base directory \(currently set to \"$DESEQ2_PATH\", suggest to set to \"`pwd`\", used if left empty\) +read DESEQ2_PATH +if [ "$DESEQ2_PATH" == "" ]; +then + DESEQ2_PATH=`pwd` +fi +echo '=>' Setting DESeq2-hts base directory to \"$DESEQ2_PATH\" +echo +echo SAMTools directory \(currently set to \"$SAMTOOLS_DIR\", system version used if left empty\) +read SAMTOOLS_DIR +if [ "$SAMTOOLS_DIR" == "" ]; +then + if [ "$(which samtools)" != "" ] ; + then + SAMTOOLS_DIR=$(dirname $(which samtools)) + else + echo samtools not found + exit -1 ; + fi +fi +echo '=>' Setting SAMTools directory to \"$SAMTOOLS_DIR\" +echo + +echo Path to the python binary \(currently set to \"$PYTHON_PATH\", system version used, if left empty\) +read PYTHON_PATH +if [ "$PYTHON_PATH" == "" ]; +then + PYTHON_PATH=`which python` + if [ "$PYTHON_PATH" == "" ]; + then + echo python not found + exit -1 + fi +fi +echo '=>' Setting Python path to \"$PYTHON_PATH\" +echo + +echo Path to the R binary \(currently set to \"$R_PATH\", system version used, if left empty\) +read R_PATH +if [ "$R_PATH" == "" ]; +then + R_PATH=`which R` + if [ "$R_PATH" == "" ]; + then + echo R not found + exit -1 + fi +fi +echo '=>' Setting R path to \"$R_PATH\" +echo + +echo Path to Scipy library files \(currently set to \"$SCIPY_PATH\", system version is used if left empty\) +read SCIPY_PATH +echo '=>' Setting Scipy path to \"$SCIPY_PATH\" +echo + +echo Which interpreter should be used \(\"octave\" or \"matlab\"\) +read INTERPRETER +if [ "$INTERPRETER" != 'octave' -a "$INTERPRETER" != 'matlab' ]; +then + echo Unrecognized choice: \"$INTERPRETER\" + echo Aborting + false +fi +echo '=>' Setting interpreter to \"$INTERPRETER\" +echo + +if [ "$INTERPRETER" == 'octave' ]; +then + echo Please enter the full path to octave \(currently set to \"$OCTAVE_BIN_PATH\", system version used, if left empty\) + read OCTAVE_BIN_PATH + if [ "$OCTAVE_BIN_PATH" == "" ]; + then + OCTAVE_BIN_PATH=`which octave` + if [ "$OCTAVE_BIN_PATH" == "" ]; + then + echo octave not found + exit -1 + fi + fi + echo '=>' Setting octave\'s path to \"$OCTAVE_BIN_PATH\" + echo + echo Please enter the full path to mkoctfile \(currently set to \"$OCTAVE_MKOCT\", system version used, if left empty\) + read OCTAVE_MKOCT + if [ "$OCTAVE_MKOCT" == "" ]; + then + OCTAVE_MKOCT=`which mkoctfile` + if [ "$OCTAVE_MKOCT" == "" ]; + then + OCTAVE_MKOCT=$(dirname $OCTAVE_BIN_PATH)/mkoctfile + if [ ! -f OCTAVE_MKOCT ]; + then + echo mkoctfile not found + exit -1 + fi + fi + fi + echo '=>' Setting mkoctfile\'s path to \"$OCTAVE_MKOCT\" + echo + MATLAB_BIN_PATH= +fi +if [ "$INTERPRETER" == 'matlab' ]; +then + echo Please enter the full path to matlab \(currently set to \"$MATLAB_BIN_PATH\", system version used, if left empty\) + read MATLAB_BIN_PATH + if [ "${MATLAB_BIN_PATH}" == "" ]; + then + MATLAB_BIN_PATH=`which matlab` + if [ "$MATLAB_BIN_PATH" == "" ]; + then + echo matlab not found + exit -1 + fi + fi + if [ ! -f $MATLAB_BIN_PATH ]; + then + echo matlab not found + exit -1 + fi + echo '=>' Setting matlab\'s path to \"$MATLAB_BIN_PATH\" + echo + echo Please enter the full path to mex binary \(currently set to \"$MATLAB_MEX_PATH\", system version used if left empty\) + read MATLAB_MEX_PATH + if [ "$MATLAB_MEX_PATH" == "" ]; + then + MATLAB_MEX_PATH=`which mex` + if [ "$MATLAB_MEX_PATH" == "" ]; + then + echo mex not found + exit -1 + fi + fi + if [ ! -f "$MATLAB_MEX_PATH" ]; + then + echo mex not found + exit -1 + fi + echo '=>' Setting mex\' path to \"$MATLAB_MEX_PATH\" + echo + echo Please enter the full path to the matlab include directory \(currently set to \"$MATLAB_INCLUDE_DIR\", system version used, if left empty\) + read MATLAB_INCLUDE_DIR + if [ "$MATLAB_INCLUDE_DIR" == "" ]; + then + MATLAB_INCLUDE_DIR=$(dirname $MATLAB_BIN_PATH)/../extern/include + fi + if [ ! -d "$MATLAB_INCLUDE_DIR" ]; + then + echo matlab include dir not found + exit -1 + fi + echo '=>' Setting matlab\'s include directory to \"$MATLAB_INCLUDE_DIR\" + echo + OCTAVE_BIN_PATH= +fi + +cp -p bin/deseq2_config.sh bin/deseq2_config.sh.bk +grep -v -e OCTAVE_BIN_PATH -e OCTAVE_MKOCT -e MATLAB_BIN_PATH -e MATLAB_MEX_PATH -e MATLAB_INCLUDE_DIR \ + -e DESEQ2_PATH -e DESEQ2_SRC_PATH -e DESEQ2_BIN_PATH \ + -e INTERPRETER -e SAMTOOLS_DIR -e PYTHON_PATH -e SCIPY_PATH -e R_PATH -e $DESEQ2_VERSION bin/deseq2_config.sh.bk \ + > bin/deseq2_config.sh +echo +echo +echo generating config file + +echo export DESEQ2_VERSION=$DESEQ2_VERSION >> bin/deseq2_config.sh +echo export DESEQ2_PATH=$DESEQ2_PATH >> bin/deseq2_config.sh +echo export DESEQ2_SRC_PATH=${DESEQ2_PATH}/src >> bin/deseq2_config.sh +echo export DESEQ2_BIN_PATH=${DESEQ2_PATH}/bin >> bin/deseq2_config.sh +echo export INTERPRETER=$INTERPRETER >> bin/deseq2_config.sh +echo export MATLAB_BIN_PATH=$MATLAB_BIN_PATH >> bin/deseq2_config.sh +echo export MATLAB_MEX_PATH=$MATLAB_MEX_PATH >> bin/deseq2_config.sh +echo export MATLAB_INCLUDE_DIR=$MATLAB_INCLUDE_DIR >> bin/deseq2_config.sh +echo export OCTAVE_BIN_PATH=$OCTAVE_BIN_PATH >> bin/deseq2_config.sh +echo export OCTAVE_MKOCT=$OCTAVE_MKOCT >> bin/deseq2_config.sh +echo export SAMTOOLS_DIR=$SAMTOOLS_DIR >> bin/deseq2_config.sh +echo export PYTHON_PATH=$PYTHON_PATH >> bin/deseq2_config.sh +echo export SCIPY_PATH=$SCIPY_PATH >> bin/deseq2_config.sh +echo export R_PATH=$R_PATH >> bin/deseq2_config.sh + +echo +echo Done. +echo diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/src/deseq2-hts.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/src/deseq2-hts.sh Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,88 @@ +#/bin/bash +## +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# Copyright (C) 2009-2013 Max Planck Society & Memorial Sloan-Kettering Cancer Center +## + +set -e + +PROG=`basename $0` +DIR=`dirname $0` + +. ${DIR}/../bin/deseq2_config.sh + +echo +echo ${PROG}: Oqtans http://galaxy.cbio.mskcc.org Galaxy wrapper for the DESeq2 version $DESEQ2_VERSION. +echo +echo DESeq2 performs differential gene expression analysis based on the negative binomial distribution. +echo + +ANNO_INPUT=${1} +shift +DESEQ_RES_FILE=${1} +shift +GENES_FN=${1} +shift + +mkdir -p `dirname $GENES_FN` +touch ${GENES_FN} + +echo %%%%%%%%%%%%%%%%%%%%%%% +echo % 1. Data preparation % +echo %%%%%%%%%%%%%%%%%%%%%%% +echo +echo load the genome annotation in GFF3 format and create an annotation object +${PYTHON_PATH} ${DIR}/../tools/GFFParser.py ${ANNO_INPUT} ${GENES_FN} +echo +echo genome annotation stored in $GENES_FN +echo + +FITTYP=${1} +shift + +echo %%%%%%%%%%%%%%%%%%%% +echo % 2. Read counting % +echo %%%%%%%%%%%%%%%%%%%% +echo +echo checking for BAM index... +for REPLICATE_GROUP in $@ +do + IFS=':' + for BAM_FILE in ${REPLICATE_GROUP} + do + echo + if [ ! -f ${BAM_FILE}.bai ] + then + echo "Indexing $BAM_FILE" + ${SAMTOOLS_DIR}/samtools index $BAM_FILE + else + echo "$BAM_FILE already indexed" + fi + echo + done +done +echo counting reads overlapping exons using given alignments... +tmpfile=`mktemp --tmpdir=/tmp` + +echo "${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile $@" +${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile "$@" +echo counted reads map to each feature. +echo + +echo %%%%%%%%%%%%%%%%%%%%%%%%%%% +echo % 3. Differential testing % +echo %%%%%%%%%%%%%%%%%%%%%%%%%%% +echo +echo testing genes for differential expression using given read alignments + + +echo "cat ${DIR}/../src/difftest_deseq2.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $#" +(cat ${DIR}/../src/difftest_deseq2.R | $R_PATH --slave --args ${FITTYP} $tmpfile ${DESEQ_RES_FILE} 2>&1 || (echo R script execution failed 1>&2)) + +echo %%%%%%%% +echo % Done % +echo %%%%%%%% diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/src/deseq_config.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/src/deseq_config.m Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,55 @@ +function deseq_config +% DESEQ2_CONFIG Sets a few global variables with system dependent paths. +% +% +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% Written (W) 2009-2011 Regina Bohnert, Gunnar Raetsch +% Copyright (C) 2009-2011 Max Planck Society +% + +% paths +global DESEQ2_PATH DESEQ2_SRC_PATH + +% interpreter paths +global INTERPRETER MATLAB_BIN_PATH OCTAVE_BIN_PATH + +% SAMTools path +global SAMTOOLS_DIR + +% configuration (adapt to the user's configuration) +DESEQ2_PATH = getenv('DESEQ2_PATH'); +DESEQ2_SRC_PATH = getenv('DESEQ2_SRC_PATH'); +INTERPRETER = getenv('INTERPRETER'); +MATLAB_BIN_PATH = getenv('MATLAB_BIN_PATH'); +OCTAVE_BIN_PATH = getenv('OCTAVE_BIN_PATH'); +SAMTOOLS_DIR = getenv('SAMTOOLS_DIR'); + +% switch off a few expected warnings +addpath(sprintf('%s/tools', DESEQ2_PATH)); +engine=''; +lserve=license; +if ~isequal(lserve, 'GNU General Public License'), + engine='matlab'; +else + engine='octave'; +end; +if isequal(engine, 'octave'), + warning('off', 'Octave:precedence-change'); + warning('off', 'Octave:function-name-clash'); + warning('off', ''); + warning('off', 'Octave:num-to-str'); + warning('off', 'Octave:function-name-clash'); + warning('off', 'Octave:divide-by-zero'); + warning('off', 'Octave:future-time-stamp'); + warning('off', 'Octave:assign-as-truth-value'); +else + warning('off', 'MATLAB:typeaheadBufferOverflow'); +end + +% make sure no process stops with a debug prompt +global g_ignore_keyboard +g_ignore_keyboard = 1; diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/src/difftest_deseq2.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/src/difftest_deseq2.R Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,78 @@ +### load DESeq package +suppressMessages(require("DESeq2")) + +### get arguments 1: INFILE, 2: OUTFILE 3:SIZE +args <- commandArgs() +FITTYP<-args[4] +INFILE<-args[5] +OUTFILE<-args[6] + +INFILE_COUNTS=c(paste(INFILE, "_COUNTS.tab", sep="")) +INFILE_CONDS=c(paste(INFILE, "_CONDITIONS.tab", sep="")) + +### read count data from file +countsTable <- read.delim( INFILE_COUNTS, header=TRUE, stringsAsFactors=TRUE) +condsTable <- read.delim( INFILE_CONDS, header=TRUE, stringsAsFactors=TRUE) + +tagnames <- countsTable[-(1:2), 1] +#print(tagnames) + +## use gene IDs as row names +rownames( countsTable ) <- countsTable$gene +countsTable <- countsTable[ , -1 ] +#print(countsTable) + +conditions<-factor( condsTable[ , 2] ) +#print(conditions) + +## unique condition to define the pair of tests +uniq_conds <- unique(conditions) +#print(uniq_conds) + +## all possible pairs of conditions +pw_tests <- list() +for(i in 1:(length(uniq_conds)-1)) { + for(j in (i+1):length(uniq_conds)) { + pw_tests[[length(pw_tests)+1]] <- c(uniq_conds[i], uniq_conds[j]) + } +} +#print(pw_tests) + +tab <- NULL +## testing all possible pairs of conditions +for(i in 1:length(pw_tests)) { + ## header name + test_pair_name <- c(paste(pw_tests[[i]][1], "__vs__", pw_tests[[i]][2], sep="")) + #print(test_pair_name) + ## colnames respective to the test pair + sub.data <- subset(condsTable, (conditions %in% c(pw_tests[[i]][1],pw_tests[[i]][2]))) + #print(sub.data) + #print(sub.data[[1]]) # sample file name + #print(sub.data[[2]]) # condition + #print(sub.data[[3]]) # replicates + colData <- data.frame(row.names=sub.data[[1]], condition=sub.data[[2]], libType=sub.data[[3]]) + #print(colData) + #print(countsTable[(sub.data[[1]])]) + dds <- DESeqDataSetFromMatrix(countData=countsTable[(sub.data[[1]])], colData=colData, design=~condition) + colData(dds)$condition <- factor(colData(dds)$condition, levels=unique(sub.data[[2]])) + dds <- DESeq(dds, fitType=FITTYP) + ## concatenate the results + tested_pairs <- results(dds) + #print(typeof(tested_pairs)) + + colnames(tested_pairs) <- paste(test_pair_name, colnames(tested_pairs), sep=":") + #print(colnames(tested_pairs)) + #print(tested_pairs) + + tab_tmp <- tested_pairs[tagnames,] + if(is.null(tab)) { + tab<- as.data.frame(tab_tmp) + } + else tab<- cbind(tab, as.data.frame(tab_tmp)) +} +## TODO cbind creates a X character to the string start place. +colnames(tab) <- gsub("^X", "", colnames(tab)) +## adding gene names to the row +tab <- cbind(Feature=row.names(tab_tmp), tab) +## priting the result +write.table(tab, OUTFILE, quote=F, sep="\t", row.names=F) diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/src/genes_cell2struct.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/src/genes_cell2struct.m Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,28 @@ +function genes_cell2struct(anno_fname) +% GENES_CELL2STRUCT Converts genes stored as a cell to struct. +% +% genes_cell2struct(anno_fname) +% +% -- input -- +% anno_fname: name of file where genes as cell are stored +% +% -- output -- +% genes as a struct + +load(anno_fname, 'genes'); +if iscell(genes) + genes_cell = genes; + clear genes; + for g = 1:length(genes_cell), + gene = genes_cell{g}; + for e = 1:length(gene.exons) + gene.exons{e} = double(gene.exons{e}); + end + gene.exons = reshape(gene.exons, 1, length(gene.exons)); + gene.id = double(gene.id); + gene.start = double(gene.start); + gene.stop = double(gene.stop); + genes(g) = gene; + end +save(anno_fname, 'genes'); +end diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/src/get_read_counts.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/src/get_read_counts.m Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,172 @@ +function get_read_counts(anno_dir, outfile, varargin) +% +% -- input -- +% anno_dir: directory of genes +% outfile: output file +% varargin: list of BAM files (at least two) + +% DESeq paths +global DESEQ2_PATH DESEQ2_SRC_PATH + +% interpreter paths +global INTERPRETER MATLAB_BIN_PATH OCTAVE_BIN_PATH + +% SAMTools path +global SAMTOOLS_DIR + +%%%% paths +addpath(sprintf('%s/tools', DESEQ2_PATH)); +addpath(sprintf('%s/mex', DESEQ2_PATH)); +addpath(sprintf('%s', DESEQ2_SRC_PATH)); + +deseq_config; + +%%% read list of replicate groups from variable length argument list +rg_list = cell(1,size(varargin, 2)); +file_list = cell(); +file_cond_ids = []; +file_rep_ids = []; +for idx = 1:size(varargin, 2) + rg_list(idx) = varargin(idx); +end +idx = strmatch('', rg_list, 'exact'); +rg_list(idx) = []; +for idx = 1:length(rg_list), + items = separate(rg_list{idx}, ':'); + for idx2 = 1:length(items) + if isempty(deblank(items{idx2})), + continue; + end; + file_list{end + 1} = items{idx2}; + file_cond_ids(end + 1) = idx; + file_rep_ids(end + 1) = idx2; + end; +end; +clear idx idx2; + +%%%%% adapt to number of input arguments +file_num = length(file_list); +RESULTS = cell(1, file_num); + +%%%% get annotation file +load(sprintf('%s', anno_dir)); + +%%%%% mask overlapping gene regions -> later not counted +[genes] = mask_dubl(genes,0); + +%%%% remove genes with no annotated exons or where no +idx = find(arrayfun(@(x)(~isempty(x.exons)*~isempty(x.start)*~isempty(x.stop)), genes)); +fprintf('removed %i of %i genes, which had either no exons annotated or lacked a start or stop position\n', size(genes, 2) - size(idx, 2), size(genes, 2)) +genes = genes(idx); +clear idx; + +%%%% check if genes have field chr_num +if ~isfield(genes, 'chr_num') + chrms = unique({genes(:).chr}); + for i = 1:length(genes) + genes(i).chr_num = strmatch(genes(i).chr, chrms, 'exact'); + end; +end; + +%%%% iterate over all given bam files +for f_idx = 1:file_num + expr1_bam = fullfile('', file_list{f_idx}); + STAT = cell(size(genes, 2),1); + for i=1:size(genes,2) + RESULT = cell(1,7); + gene = genes(i); + RESULT{4} = f_idx; + RESULT{1} = gene.name; + if isempty(gene.exons) + RESULT{2} = inf; + RESULT{3} = inf; + RESULT{5} = [inf,inf]; + STAT{i} = RESULT; + continue; + elseif or(isempty(gene.start),isempty(gene.stop)) + RESULT{2} = inf; + RESULT{3} = inf; + RESULT{5} = [inf,inf]; + STAT{i} = RESULT; + continue; + end + if ~isempty(gene.chr_num), + [mask1, read_intron_list] = get_reads(expr1_bam, gene.chr, gene.start, gene.stop, '0'); + clear read_intron_list; + else + mask1 = []; + end; + + if isempty(mask1) + reads1 = zeros(0,gene.stop-gene.start+1); + else + reads1 = sparse(mask1(1,:)',mask1(2,:)',ones(size(mask1,2),1),max(mask1(1,:)),gene.stop-gene.start+1); + end + if ~isempty(reads1); + [reads1,FLAG] = remove_reads_from_other_genes(reads1,gene); + end + L = size(reads1); + RESULT{2}=[size(reads1,1)]; % number of all reads falling in that gene + EXON_IDX=zeros(1,gene.stop-gene.start+1); + for t=1:size(gene.transcripts,2) + for e=1:size(gene.exons{t},1) + EXON_IDX((gene.exons{t}(e,1)-gene.start+1):(gene.exons{t}(e,2)-gene.start+1))=1; + end + end + reads1 = reads1(sum(reads1(:,find(EXON_IDX)),2)>0,:); + L1 = sum(EXON_IDX); + RESULT{3}=[size(reads1,1)]; % number of reads overlapping to exons + RESULT{5}=[L, L1]; % size of reads1, number of exonic positions + % old and weighted poisson new ,weighted regions reads and + % unexplained reads + clear reads1; + STAT{i} = RESULT; + end; + RESULTS{f_idx} = STAT; +end; + +S=size(genes,2); +READCOUNTS_ALL=zeros(S, file_num); +READCOUNTS_EXON=zeros(S, file_num); +LENGTH_ALL=zeros(S,file_num); +LEN_EXON=zeros(S, file_num); + +for j=1:file_num, + for i=1:S + T=RESULTS{j}{i}; + if isempty(T) + continue + else + READCOUNTS_ALL(i,j)=T{2}; + READCOUNTS_EXON(i,j)=T{3}; + LENGTH_ALL(i,j)=T{5}(1); + LEN_EXON(i,j)=T{5}(2); + end + end +end + +%%%%% write results for all bam files +fid_conditions = fopen(sprintf('%s_CONDITIONS.tab', outfile), 'w'); +fid_counts = fopen(sprintf('%s_COUNTS.tab', outfile) ,'w'); +fprintf(fid_counts,'gene'); +fprintf(fid_conditions, 'file\tcondition\treplicate\n'); +for j = 1:length(file_list) + fname = file_list{j} ; + fname = separate(fname, '/'); + fname = fname{end}; + fname = strrep(fname, '.bam', '') ; + fprintf(fid_counts,'\t%s', fname); + fprintf(fid_conditions, '%s\t%i\t%i\n', fname, file_cond_ids(j), file_rep_ids(j)); +end; +fprintf(fid_counts,'\n') ; + +for i = 1:size(genes,2) + fprintf(fid_counts,'%s',genes(i).name); + for j = 1:length(file_list), + fprintf(fid_counts,'\t%i', READCOUNTS_EXON(i,j)); + end + fprintf(fid_counts,'\n'); +end +fclose(fid_counts); +fclose(fid_conditions); +exit; diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/src/mask_dubl.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/src/mask_dubl.m Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,81 @@ +function [new_genes]=mask_dubl(genes,THRESH); + +CHROMOSOMES={}; +COUNTER=1; +for i=1:size(genes,2) + CHROMOSOMES{COUNTER}=genes(i).chr; + COUNTER=COUNTER+1; +end +CHROMOSOMES=unique(CHROMOSOMES); + + +INFO=zeros(size(genes,2),4); +for i=1:size(genes,2) + CHR_VAL=0; + for chr= 1:length(CHROMOSOMES) + if strcmp(genes(i).chr,CHROMOSOMES(chr)) + CHR_VAL=chr; + end + end + INFO(i,:)=[i,genes(i).start,genes(i).stop, CHR_VAL]; +end + +COUNTER=1; +new_genes=genes; +for chr= 1:length(CHROMOSOMES) + GENES_ON_CHR=INFO(INFO(:,4)==chr,:); + [TEMP,POS]=sort(GENES_ON_CHR(:,2)); + GENES_ON_CHR=GENES_ON_CHR(POS,:); + STARTS=GENES_ON_CHR(:,2); + STOPS=GENES_ON_CHR(:,3); + for i=1:(size(GENES_ON_CHR,1)) + MIN_START=find(STOPS>=STARTS(i),1,'first'); + MAX_STOP=find(STARTS<=STOPS(i),1,'last'); + if MIN_START==i + MIN_START=[]; + end + if MAX_STOP==i + MAX_STOP=[]; + end + EXONS=[]; + if not (isempty(MIN_START)) + for CURR=MIN_START:(i-1) + if(not(isempty(genes(GENES_ON_CHR(CURR,1)).transcripts))) + for tra=1:size(genes(GENES_ON_CHR(CURR,1)).transcripts,2) + if(not(isempty(genes(GENES_ON_CHR(CURR,1)).exons))) + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).exons{tra}]; + else + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop]; + end + end + else + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop]; + end + end + end + if not (isempty(MAX_STOP)) + for CURR=(i+1):MAX_STOP + if(not(isempty(genes(GENES_ON_CHR(CURR,1)).transcripts))) + for tra=1:size(genes(GENES_ON_CHR(CURR,1)).transcripts,2) + if(not(isempty(genes(GENES_ON_CHR(CURR,1)).exons))) + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).exons{tra}]; + else + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop]; + end + end + else + EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop]; + end + + end + end + if not (isempty([MAX_STOP,MIN_START])) + EXONS=EXONS(EXONS(:,2)>=STARTS(i),:); + EXONS=EXONS(EXONS(:,1)<=STOPS(i),:); + new_genes(GENES_ON_CHR(i,1)).non_unique_regions=EXONS; + else + new_genes(GENES_ON_CHR(i,1)).non_unique_regions=[]; + end + end + COUNTER=COUNTER+1; +end diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/src/remove_reads_from_other_genes.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/src/remove_reads_from_other_genes.m Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,21 @@ +function [READS_OUT,FLAG]=remove_reads_from_other_genes(READS,GENE) +%This funtion removes the reads in READS which could ome from other +%annotated genes. FLAG is 1 if this was sucsesfull and 0 otherwise +READS_IN=READS; +if isfield(GENE,'non_unique_regions') + EXONS=GENE.non_unique_regions; + IDX=zeros(1,GENE.stop-GENE.start+1); + + for i=1:size(EXONS,1) + START=max(EXONS(i,1),GENE.start)-GENE.start+1; + STOP=min(EXONS(i,2),GENE.stop)-GENE.start+1; + IDX(START:STOP)=1; + end + READS=READS(not(sum(READS(:,IDX>0),2)==sum(READS,2)),:); + FLAG=1; + READS_OUT=READS; +else + READS_OUT=READS_IN; + FLAG=0; +end + diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam Binary file deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam has changed diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam.bai Binary file deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam.bai has changed diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam Binary file deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam has changed diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam.bai Binary file deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam.bai has changed diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions.gff3 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions.gff3 Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,747 @@ +##gff-version 3 +##Seqid Source Type Start End Score Phase Attributes +I Coding_transcript gene 11495 16831 . + . ID=Gene:Gene:Y74C9A.2.2 +I Coding_transcript mRNA 11495 16793 . + . ID=Transcript:Gene:Y74C9A.2.2.1;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11495 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript three_prime_UTR 16702 16793 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1 +I Coding_transcript mRNA 11499 16790 . + . ID=Transcript:Gene:Y74C9A.2.2.2;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11499 11557 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript three_prime_UTR 16586 16790 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2 +I Coding_transcript mRNA 11499 16831 . + . ID=Transcript:Gene:Y74C9A.2.2.3;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11499 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript three_prime_UTR 16586 16831 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3 +I Coding_transcript mRNA 11505 16790 . + . ID=Transcript:Gene:Y74C9A.2.2.4;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11505 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript five_prime_UTR 11623 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript three_prime_UTR 16586 16790 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4 +I Coding_transcript mRNA 11618 16804 . + . ID=Transcript:Gene:Y74C9A.2.2.5;Parent=Gene:Gene:Y74C9A.2.2 +I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript three_prime_UTR 16586 16804 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5 +I Coding_transcript gene 47472 49416 . + . ID=Gene:Gene:Y48G1C.12 +I Coding_transcript mRNA 47472 49416 . + . ID=Transcript:Gene:Y48G1C.12.1;Parent=Gene:Gene:Y48G1C.12 +I Coding_transcript CDS 47472 47610 . + 0 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1 +I Coding_transcript CDS 47696 47858 . + 2 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1 +I Coding_transcript CDS 48348 48530 . + 1 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1 +I Coding_transcript CDS 49251 49416 . + 1 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1 +I Coding_transcript gene 71858 81071 . + . ID=Gene:Gene:Y48G1C.2.1 +I Coding_transcript mRNA 71858 81071 . + . ID=Transcript:Gene:Y48G1C.2.1.1;Parent=Gene:Gene:Y48G1C.2.1 +I Coding_transcript five_prime_UTR 71858 71932 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript three_prime_UTR 80345 81071 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1 +I Coding_transcript mRNA 71878 81063 . + . ID=Transcript:Gene:Y48G1C.2.1.2;Parent=Gene:Gene:Y48G1C.2.1 +I Coding_transcript five_prime_UTR 71878 71932 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript three_prime_UTR 80345 80561 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript three_prime_UTR 80814 81063 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2 +I Coding_transcript mRNA 72511 80344 . + . ID=Transcript:Gene:Y48G1C.2.1.3;Parent=Gene:Gene:Y48G1C.2.1 +I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3 +I Coding_transcript gene 86841 90607 . + . ID=Gene:Gene:Y48G1C.11 +I Coding_transcript mRNA 86841 90607 . + . ID=Transcript:Gene:Y48G1C.11.1;Parent=Gene:Gene:Y48G1C.11 +I Coding_transcript CDS 86841 86904 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 87034 87223 . + 2 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 87520 87734 . + 1 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 88268 88500 . + 2 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 88566 88706 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 89372 89584 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript CDS 90419 90607 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1 +I Coding_transcript gene 91380 92877 . + . ID=Gene:Gene:Y48G1C.9.1 +I Coding_transcript mRNA 91380 92654 . + . ID=Transcript:Gene:Y48G1C.9.1.1;Parent=Gene:Gene:Y48G1C.9.1 +I Coding_transcript five_prime_UTR 91380 91408 . + . ID=five_prime_UTR:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1 +I Coding_transcript CDS 91409 91594 . + 0 ID=CDS:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1 +I Coding_transcript CDS 92523 92648 . + 0 ID=CDS:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1 +I Coding_transcript three_prime_UTR 92649 92654 . + . ID=three_prime_UTR:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1 +I Coding_transcript mRNA 91404 92877 . + . ID=Transcript:Gene:Y48G1C.9.1.2;Parent=Gene:Gene:Y48G1C.9.1 +I Coding_transcript five_prime_UTR 91404 91408 . + . ID=five_prime_UTR:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2 +I Coding_transcript CDS 91409 91594 . + 0 ID=CDS:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2 +I Coding_transcript CDS 92523 92648 . + 0 ID=CDS:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2 +I Coding_transcript three_prime_UTR 92649 92877 . + . ID=three_prime_UTR:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2 +I Coding_transcript gene 113807 114681 . + . ID=Gene:Gene:F53G12.8 +I Coding_transcript mRNA 113807 114681 . + . ID=Transcript:Gene:F53G12.8.1;Parent=Gene:Gene:F53G12.8 +I Coding_transcript CDS 113807 113863 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1 +I Coding_transcript CDS 114085 114423 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1 +I Coding_transcript CDS 114544 114681 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1 +I Coding_transcript gene 115739 117438 . + . ID=Gene:Gene:F53G12.7 +I Coding_transcript mRNA 115739 117438 . + . ID=Transcript:Gene:F53G12.7.1;Parent=Gene:Gene:F53G12.7 +I Coding_transcript CDS 115739 115915 . + 0 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript CDS 116432 116666 . + 0 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript CDS 116719 116974 . + 2 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript CDS 117086 117401 . + 1 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript three_prime_UTR 117402 117438 . + . ID=three_prime_UTR:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1 +I Coding_transcript gene 127297 134263 . + . ID=Gene:Gene:F53G12.5b +I Coding_transcript mRNA 127297 134263 . + . ID=Transcript:Gene:F53G12.5b.1;Parent=Gene:Gene:F53G12.5b +I Coding_transcript CDS 127297 127336 . + 0 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 127385 127436 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 128697 128896 . + 1 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 129176 129333 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript three_prime_UTR 133583 134263 . + . ID=three_prime_UTR:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1 +I Coding_transcript mRNA 128693 133809 . + . ID=Transcript:Gene:F53G12.5b.2;Parent=Gene:Gene:F53G12.5b +I Coding_transcript five_prime_UTR 128693 128697 . + . ID=five_prime_UTR:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 128698 128896 . + 0 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 129167 129333 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript three_prime_UTR 133583 133809 . + . ID=three_prime_UTR:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2 +I Coding_transcript mRNA 128697 133582 . + . ID=Transcript:Gene:F53G12.5b.3;Parent=Gene:Gene:F53G12.5b +I Coding_transcript five_prime_UTR 128697 128697 . + . ID=five_prime_UTR:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 128698 128896 . + 0 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 129167 129333 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3 +I Coding_transcript gene 134337 137282 . + . ID=Gene:Gene:F53G12.4 +I Coding_transcript mRNA 134337 137282 . + . ID=Transcript:Gene:F53G12.4.1;Parent=Gene:Gene:F53G12.4 +I Coding_transcript five_prime_UTR 134337 134353 . + . ID=five_prime_UTR:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 134354 134428 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 134506 134581 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 135549 135898 . + 2 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 136235 136712 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript CDS 137209 137282 . + 2 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1 +I Coding_transcript gene 137845 144565 . + . ID=Gene:Gene:F53G12.3 +I Coding_transcript mRNA 137845 144565 . + . ID=Transcript:Gene:F53G12.3.1;Parent=Gene:Gene:F53G12.3 +I Coding_transcript CDS 137845 137886 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 138017 138143 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 138193 138351 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 138393 138782 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 138829 139032 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 139080 139331 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 139378 139669 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 139769 139982 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 140136 140292 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 140371 140496 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 140554 140870 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 140916 141213 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 141681 141854 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 141900 142023 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 142067 142230 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 142278 142477 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 142547 142671 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 142729 142939 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 143007 143684 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 144265 144396 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript CDS 144440 144565 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1 +I Coding_transcript gene 173412 175988 . + . ID=Gene:Gene:F56C11.6b +I Coding_transcript mRNA 173412 175932 . + . ID=Transcript:Gene:F56C11.6b.1;Parent=Gene:Gene:F56C11.6b +I Coding_transcript five_prime_UTR 173412 173508 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript five_prime_UTR 173561 173725 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript five_prime_UTR 173775 173873 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 173874 174312 . + 0 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 174605 174832 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 174878 175053 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 175097 175241 . + 0 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 175579 175708 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript CDS 175755 175932 . + 1 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1 +I Coding_transcript mRNA 173412 175988 . + . ID=Transcript:Gene:F56C11.6b.2;Parent=Gene:Gene:F56C11.6b +I Coding_transcript five_prime_UTR 173412 173421 . + . ID=five_prime_UTR:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 173422 173508 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 173561 173725 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 173775 174312 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 174605 174832 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 174878 175053 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 175097 175241 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 175579 175708 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript CDS 175755 175932 . + 1 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript three_prime_UTR 175933 175988 . + . ID=three_prime_UTR:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2 +I Coding_transcript gene 178538 182159 . + . ID=Gene:Gene:F56C11.5b +I Coding_transcript mRNA 178538 182159 . + . ID=Transcript:Gene:F56C11.5b.1;Parent=Gene:Gene:F56C11.5b +I Coding_transcript five_prime_UTR 178538 178566 . + . ID=five_prime_UTR:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 178567 178620 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 180600 180698 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 181251 181514 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 181663 181767 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript CDS 181814 181966 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript three_prime_UTR 181967 182159 . + . ID=three_prime_UTR:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1 +I Coding_transcript mRNA 180394 181966 . + . ID=Transcript:Gene:F56C11.5b.2;Parent=Gene:Gene:F56C11.5b +I Coding_transcript CDS 180394 180465 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript CDS 180600 180698 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript CDS 181251 181514 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript CDS 181663 181767 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript CDS 181814 181966 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2 +I Coding_transcript gene 216005 219099 . + . ID=Gene:Gene:Y48G1BL.1 +I Coding_transcript mRNA 216005 219099 . + . ID=Transcript:Gene:Y48G1BL.1.1;Parent=Gene:Gene:Y48G1BL.1 +I Coding_transcript five_prime_UTR 216005 216092 . + . ID=five_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript five_prime_UTR 216145 216180 . + . ID=five_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript CDS 216181 216277 . + 0 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript CDS 216331 216910 . + 2 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript CDS 218110 218205 . + 1 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript CDS 218772 218955 . + 1 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript three_prime_UTR 218956 219099 . + . ID=three_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1 +I Coding_transcript gene 291216 305461 . + . ID=Gene:Gene:C53D5.1c.1 +I Coding_transcript mRNA 291216 305081 . + . ID=Transcript:Gene:C53D5.1c.1.1;Parent=Gene:Gene:C53D5.1c.1 +I Coding_transcript five_prime_UTR 291216 291308 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript five_prime_UTR 295793 295851 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript five_prime_UTR 302596 302671 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 302672 302705 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1 +I Coding_transcript mRNA 291216 305461 . + . ID=Transcript:Gene:C53D5.1c.1.2;Parent=Gene:Gene:C53D5.1c.1 +I Coding_transcript five_prime_UTR 291216 291248 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 291249 291308 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 295793 295851 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 302596 302705 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript three_prime_UTR 305082 305461 . + . ID=three_prime_UTR:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2 +I Coding_transcript mRNA 295364 305081 . + . ID=Transcript:Gene:C53D5.1c.1.3;Parent=Gene:Gene:C53D5.1c.1 +I Coding_transcript five_prime_UTR 295364 295367 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 295368 295421 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 295793 295851 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 302596 302705 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3 +I Coding_transcript mRNA 302596 305081 . + . ID=Transcript:Gene:C53D5.1c.1.4;Parent=Gene:Gene:C53D5.1c.1 +I Coding_transcript five_prime_UTR 302596 302671 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 302672 302705 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4 +I Coding_transcript gene 347484 348360 . + . ID=Gene:Gene:Y48G1A.2 +I Coding_transcript mRNA 347484 348360 . + . ID=Transcript:Gene:Y48G1A.2.1;Parent=Gene:Gene:Y48G1A.2 +I Coding_transcript five_prime_UTR 347484 347577 . + . ID=five_prime_UTR:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript CDS 347578 347584 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript CDS 347633 347709 . + 2 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript CDS 347765 347986 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript CDS 348045 348152 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript three_prime_UTR 348153 348360 . + . ID=three_prime_UTR:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1 +I Coding_transcript gene 364522 368511 . + . ID=Gene:Gene:R119.7 +I Coding_transcript mRNA 364522 368511 . + . ID=Transcript:Gene:R119.7.1;Parent=Gene:Gene:R119.7 +I Coding_transcript CDS 364522 364682 . + 0 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 364739 365194 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 365534 365599 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 365647 365963 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 366656 367212 . + 2 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript CDS 367749 367943 . + 0 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript three_prime_UTR 367944 368511 . + . ID=three_prime_UTR:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1 +I Coding_transcript gene 382836 388540 . + . ID=Gene:Gene:R119.6 +I Coding_transcript mRNA 382836 388540 . + . ID=Transcript:Gene:R119.6.1;Parent=Gene:Gene:R119.6 +I Coding_transcript CDS 382836 382916 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 382992 383067 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 383124 383377 . + 2 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 383432 383567 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 384628 385228 . + 2 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 386079 386339 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 388049 388153 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript CDS 388380 388437 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript three_prime_UTR 388438 388540 . + . ID=three_prime_UTR:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1 +I Coding_transcript gene 488619 489908 . + . ID=Gene:Gene:W04C9.2 +I Coding_transcript mRNA 488619 489908 . + . ID=Transcript:Gene:W04C9.2.1;Parent=Gene:Gene:W04C9.2 +I Coding_transcript CDS 488619 488726 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript CDS 488771 488836 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript CDS 488885 488947 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript CDS 489703 489774 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript three_prime_UTR 489775 489908 . + . ID=three_prime_UTR:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1 +I Coding_transcript gene 489987 496153 . + . ID=Gene:Gene:W04C9.1 +I Coding_transcript mRNA 489987 496153 . + . ID=Transcript:Gene:W04C9.1.1;Parent=Gene:Gene:W04C9.1 +I Coding_transcript five_prime_UTR 489987 490053 . + . ID=five_prime_UTR:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 490054 490205 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 491200 491305 . + 1 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 491353 491562 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 491613 491727 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 491839 492163 . + 2 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 492228 492354 . + 1 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 492870 493559 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 494263 494346 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 494395 494709 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript CDS 495831 496070 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript three_prime_UTR 496071 496153 . + . ID=three_prime_UTR:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1 +I Coding_transcript gene 534461 535347 . + . ID=Gene:Gene:Y65B4BR.8 +I Coding_transcript mRNA 534461 535347 . + . ID=Transcript:Gene:Y65B4BR.8.1;Parent=Gene:Gene:Y65B4BR.8 +I Coding_transcript CDS 534461 534572 . + 0 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript CDS 534621 534727 . + 2 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript CDS 534775 534973 . + 0 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript CDS 535034 535197 . + 2 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript three_prime_UTR 535198 535347 . + . ID=three_prime_UTR:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1 +I Coding_transcript gene 2931462 2947153 . + . ID=Gene:Gene:Y71F9AM.4a +I Coding_transcript mRNA 2931462 2947153 . + . ID=Transcript:Gene:Y71F9AM.4a.1;Parent=Gene:Gene:Y71F9AM.4a +I Coding_transcript CDS 2931462 2931549 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2931617 2931783 . + 2 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2932810 2932974 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2933904 2934137 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2935626 2935717 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2935988 2936131 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2936561 2936882 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2937759 2937919 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2938443 2938677 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2942825 2943021 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2943072 2943204 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2944077 2944223 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2944275 2944382 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript CDS 2946747 2946938 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript three_prime_UTR 2946939 2947153 . + . ID=three_prime_UTR:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1 +I Coding_transcript mRNA 2935624 2944611 . + . ID=Transcript:Gene:Y71F9AM.4a.2;Parent=Gene:Gene:Y71F9AM.4a +I Coding_transcript five_prime_UTR 2935624 2935717 . + . ID=five_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript five_prime_UTR 2935988 2936042 . + . ID=five_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2936043 2936131 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2936561 2936882 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2937759 2937919 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2938443 2938677 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2942825 2943021 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2943072 2943204 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2944077 2944223 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript CDS 2944275 2944424 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript three_prime_UTR 2944425 2944611 . + . ID=three_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2 +I Coding_transcript gene 2929778 2931362 . + . ID=Gene:Gene:Y71F9AM.5b +I Coding_transcript mRNA 2929778 2931362 . + . ID=Transcript:Gene:Y71F9AM.5b.1;Parent=Gene:Gene:Y71F9AM.5b +I Coding_transcript five_prime_UTR 2929778 2929840 . + . ID=five_prime_UTR:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript CDS 2929841 2929885 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript CDS 2929947 2930071 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript CDS 2930175 2930367 . + 1 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript CDS 2931224 2931256 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript three_prime_UTR 2931257 2931362 . + . ID=three_prime_UTR:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1 +I Coding_transcript mRNA 2929785 2931344 . + . ID=Transcript:Gene:Y71F9AM.5b.2;Parent=Gene:Gene:Y71F9AM.5b +I Coding_transcript five_prime_UTR 2929785 2929840 . + . ID=five_prime_UTR:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript CDS 2929841 2929885 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript CDS 2929947 2930089 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript CDS 2930175 2930367 . + 1 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript CDS 2931224 2931256 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript three_prime_UTR 2931257 2931344 . + . ID=three_prime_UTR:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2 +I Coding_transcript gene 537125 542200 . + . ID=Gene:Gene:Y65B4BR.4b +I Coding_transcript mRNA 537125 541634 . + . ID=Transcript:Gene:Y65B4BR.4b.1;Parent=Gene:Gene:Y65B4BR.4b +I Coding_transcript five_prime_UTR 537125 537140 . + . ID=five_prime_UTR:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 537141 537246 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 537306 537563 . + 2 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 537609 537838 . + 2 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 538705 538914 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 539456 539730 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 539804 540011 . + 1 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 540067 540387 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 540726 540986 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 541067 541288 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript CDS 541347 541634 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1 +I Coding_transcript mRNA 537125 542200 . + . ID=Transcript:Gene:Y65B4BR.4b.2;Parent=Gene:Gene:Y65B4BR.4b +I Coding_transcript five_prime_UTR 537125 537140 . + . ID=five_prime_UTR:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 537141 537246 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 537306 537563 . + 2 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 537609 537838 . + 2 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 538705 538914 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 539456 539730 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 539798 540011 . + 1 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 540067 540387 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 540726 540986 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 541067 541288 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript CDS 541347 541634 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript three_prime_UTR 541635 542200 . + . ID=three_prime_UTR:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2 +I Coding_transcript gene 562739 565184 . + . ID=Gene:Gene:Y65B4BR.1 +I Coding_transcript mRNA 562739 565184 . + . ID=Transcript:Gene:Y65B4BR.1.1;Parent=Gene:Gene:Y65B4BR.1 +I Coding_transcript five_prime_UTR 562739 562765 . + . ID=five_prime_UTR:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 562766 562872 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 562923 563122 . + 1 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 563167 563333 . + 2 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 563377 563460 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 563510 563583 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript CDS 564612 565113 . + 1 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript three_prime_UTR 565114 565184 . + . ID=three_prime_UTR:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1 +I Coding_transcript gene 618210 621466 . + . ID=Gene:Gene:F56A6.1b +I Coding_transcript mRNA 618210 621424 . + . ID=Transcript:Gene:F56A6.1b.1;Parent=Gene:Gene:F56A6.1b +I Coding_transcript CDS 618210 618360 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 618418 618671 . + 2 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 618716 618915 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 618962 619021 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 619072 619190 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 619250 619692 . + 2 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 619741 620027 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 620073 620469 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript CDS 620516 621175 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript three_prime_UTR 621176 621424 . + . ID=three_prime_UTR:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1 +I Coding_transcript mRNA 618210 621466 . + . ID=Transcript:Gene:F56A6.1b.2;Parent=Gene:Gene:F56A6.1b +I Coding_transcript CDS 618210 618360 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 618418 618671 . + 2 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 618716 618915 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 618962 619021 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 619072 619190 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 619250 619692 . + 2 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 619741 620027 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 620073 620469 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 620516 621171 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript CDS 621218 621314 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript three_prime_UTR 621315 621466 . + . ID=three_prime_UTR:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2 +I Coding_transcript gene 720014 731077 . + . ID=Gene:Gene:Y18H1A.6 +I Coding_transcript mRNA 720014 731077 . + . ID=Transcript:Gene:Y18H1A.6.1;Parent=Gene:Gene:Y18H1A.6 +I Coding_transcript five_prime_UTR 720014 720080 . + . ID=five_prime_UTR:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720081 720256 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720301 720435 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720489 720602 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720647 720787 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 720831 720942 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 722106 722204 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 722247 722358 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 724573 724769 . + 2 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 725112 725228 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 725289 725395 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 726504 726873 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 730284 730535 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript CDS 730839 730940 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript three_prime_UTR 730941 731077 . + . ID=three_prime_UTR:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1 +I Coding_transcript gene 763627 770707 . + . ID=Gene:Gene:T06A4.3a +I Coding_transcript mRNA 763627 770707 . + . ID=Transcript:Gene:T06A4.3a.1;Parent=Gene:Gene:T06A4.3a +I Coding_transcript five_prime_UTR 763627 763676 . + . ID=five_prime_UTR:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 763677 763826 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 765140 765276 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 765434 765647 . + 1 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 766050 766151 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 766328 766447 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 766529 766643 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 767585 767736 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 768167 768226 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 768276 768450 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 768502 768630 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript CDS 770464 770627 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript three_prime_UTR 770628 770707 . + . ID=three_prime_UTR:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1 +I Coding_transcript mRNA 763677 768919 . + . ID=Transcript:Gene:T06A4.3a.2;Parent=Gene:Gene:T06A4.3a +I Coding_transcript CDS 763677 763826 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 765140 765276 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 765434 765647 . + 1 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 766050 766151 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 766328 766447 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 766529 766643 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 767585 767736 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 768167 768226 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 768276 768450 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 768502 768630 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript CDS 768801 768814 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript three_prime_UTR 768815 768919 . + . ID=three_prime_UTR:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2 +I Coding_transcript gene 770894 776356 . + . ID=Gene:Gene:T06A4.1b +I Coding_transcript mRNA 770894 776346 . + . ID=Transcript:Gene:T06A4.1b.1;Parent=Gene:Gene:T06A4.1b +I Coding_transcript five_prime_UTR 770894 770968 . + . ID=five_prime_UTR:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 770969 771060 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 771453 771568 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 771808 771913 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 771970 772156 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 773166 773387 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 773437 773548 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 773702 773989 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 774970 775066 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 775112 775574 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript CDS 776166 776303 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript three_prime_UTR 776304 776346 . + . ID=three_prime_UTR:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1 +I Coding_transcript mRNA 770969 776356 . + . ID=Transcript:Gene:T06A4.1b.2;Parent=Gene:Gene:T06A4.1b +I Coding_transcript CDS 770969 771060 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 771453 771568 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 771808 771913 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 771970 772156 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 773166 773387 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 773437 773548 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 773702 773989 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 774970 775066 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 775112 775389 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 775471 775574 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript CDS 776166 776303 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript three_prime_UTR 776304 776356 . + . ID=three_prime_UTR:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2 +I Coding_transcript gene 853461 854133 . + . ID=Gene:Gene:Y95B8A.8 +I Coding_transcript mRNA 853461 854133 . + . ID=Transcript:Gene:Y95B8A.8.1;Parent=Gene:Gene:Y95B8A.8 +I Coding_transcript CDS 853461 853489 . + 0 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript CDS 853563 853628 . + 1 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript CDS 853699 853771 . + 1 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript CDS 853830 853932 . + 0 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript CDS 853988 854133 . + 2 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1 +I Coding_transcript gene 858489 871831 . + . ID=Gene:Gene:Y95B8A.7 +I Coding_transcript mRNA 858489 871831 . + . ID=Transcript:Gene:Y95B8A.7.1;Parent=Gene:Gene:Y95B8A.7 +I Coding_transcript five_prime_UTR 858489 858568 . + . ID=five_prime_UTR:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 858569 858837 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 861488 861921 . + 1 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 864406 864821 . + 2 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 865733 865936 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 866765 866860 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 868612 868723 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 870196 870326 . + 2 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript CDS 871656 871820 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript three_prime_UTR 871821 871831 . + . ID=three_prime_UTR:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1 +I Coding_transcript gene 882920 890209 . + . ID=Gene:Gene:Y95B8A.6a.2 +I Coding_transcript mRNA 882920 889792 . + . ID=Transcript:Gene:Y95B8A.6a.2.1;Parent=Gene:Gene:Y95B8A.6a.2 +I Coding_transcript five_prime_UTR 882920 883166 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript five_prime_UTR 884906 885042 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript five_prime_UTR 886509 886608 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript CDS 886609 886709 . + 0 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript CDS 887882 888060 . + 1 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript CDS 888152 888348 . + 2 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1 +I Coding_transcript mRNA 886213 890209 . + . ID=Transcript:Gene:Y95B8A.6a.2.2;Parent=Gene:Gene:Y95B8A.6a.2 +I Coding_transcript five_prime_UTR 886213 886608 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript CDS 886609 886709 . + 0 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript CDS 887882 888060 . + 1 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript CDS 888152 888348 . + 2 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript three_prime_UTR 889793 890209 . + . ID=three_prime_UTR:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2 +I Coding_transcript mRNA 887841 889870 . + . ID=Transcript:Gene:Y95B8A.6a.2.3;Parent=Gene:Gene:Y95B8A.6a.2 +I Coding_transcript five_prime_UTR 887841 888153 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3 +I Coding_transcript CDS 888154 888348 . + 0 ID=CDS:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3 +I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3 +I Coding_transcript three_prime_UTR 889793 889870 . + . ID=three_prime_UTR:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3 +I Coding_transcript gene 897288 905906 . + . ID=Gene:Gene:Y95B8A.5 +I Coding_transcript mRNA 897288 905906 . + . ID=Transcript:Gene:Y95B8A.5.1;Parent=Gene:Gene:Y95B8A.5 +I Coding_transcript five_prime_UTR 897288 897291 . + . ID=five_prime_UTR:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 897292 897357 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 898181 898351 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 900615 900802 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 903105 903192 . + 1 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 903255 903331 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 903384 903513 . + 1 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 904613 904846 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript CDS 905736 905855 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript three_prime_UTR 905856 905906 . + . ID=three_prime_UTR:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1 +I Coding_transcript gene 1003891 1010767 . + . ID=Gene:Gene:C54G6.2 +I Coding_transcript mRNA 1003891 1010767 . + . ID=Transcript:Gene:C54G6.2.1;Parent=Gene:Gene:C54G6.2 +I Coding_transcript CDS 1003891 1004091 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1004703 1005062 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1006029 1006324 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1006461 1006548 . + 1 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1007013 1007136 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1007188 1007273 . + 2 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1008082 1008464 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1009524 1009755 . + 1 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1009820 1009891 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1010207 1010293 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript CDS 1010594 1010767 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1 +I Coding_transcript gene 1018837 1019221 . + . ID=Gene:Gene:Y34D9A.11 +I Coding_transcript mRNA 1018837 1019221 . + . ID=Transcript:Gene:Y34D9A.11.1;Parent=Gene:Gene:Y34D9A.11 +I Coding_transcript five_prime_UTR 1018837 1018844 . + . ID=five_prime_UTR:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1 +I Coding_transcript CDS 1018845 1019159 . + 0 ID=CDS:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1 +I Coding_transcript three_prime_UTR 1019160 1019221 . + . ID=three_prime_UTR:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1 +I Coding_transcript gene 1034474 1040870 . + . ID=Gene:Gene:Y34D9A.3 +I Coding_transcript mRNA 1034474 1040870 . + . ID=Transcript:Gene:Y34D9A.3.1;Parent=Gene:Gene:Y34D9A.3 +I Coding_transcript five_prime_UTR 1034474 1034474 . + . ID=five_prime_UTR:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1034475 1034499 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1034549 1034646 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1034693 1034881 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1034934 1035065 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1035123 1035246 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1035306 1035398 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1037314 1037453 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1037508 1037715 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1039055 1039296 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1039340 1039883 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1040116 1040648 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript CDS 1040700 1040765 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript three_prime_UTR 1040766 1040870 . + . ID=three_prime_UTR:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1 +I Coding_transcript gene 1049596 1050714 . + . ID=Gene:Gene:Y34D9A.6 +I Coding_transcript mRNA 1049596 1050714 . + . ID=Transcript:Gene:Y34D9A.6.1;Parent=Gene:Gene:Y34D9A.6 +I Coding_transcript five_prime_UTR 1049596 1049604 . + . ID=five_prime_UTR:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1 +I Coding_transcript CDS 1049605 1049688 . + 0 ID=CDS:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1 +I Coding_transcript CDS 1050407 1050640 . + 0 ID=CDS:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1 +I Coding_transcript three_prime_UTR 1050641 1050714 . + . ID=three_prime_UTR:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1 +I Coding_transcript gene 1062295 1065271 . + . ID=Gene:Gene:Y34D9A.2 +I Coding_transcript mRNA 1062295 1065271 . + . ID=Transcript:Gene:Y34D9A.2.1;Parent=Gene:Gene:Y34D9A.2 +I Coding_transcript five_prime_UTR 1062295 1062448 . + . ID=five_prime_UTR:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript CDS 1062449 1062757 . + 0 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript CDS 1063602 1064008 . + 0 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript CDS 1064656 1064920 . + 1 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript three_prime_UTR 1064921 1065271 . + . ID=three_prime_UTR:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1 +I Coding_transcript gene 1068593 1075012 . + . ID=Gene:Gene:R06A10.2.1 +I Coding_transcript mRNA 1068593 1075012 . + . ID=Transcript:Gene:R06A10.2.1.1;Parent=Gene:Gene:R06A10.2.1 +I Coding_transcript five_prime_UTR 1068593 1068997 . + . ID=five_prime_UTR:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1068998 1069090 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1069416 1069577 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1070370 1070489 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1071472 1071569 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1071623 1071751 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1071801 1071935 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1072675 1072918 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript CDS 1074199 1074345 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript three_prime_UTR 1074346 1075012 . + . ID=three_prime_UTR:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1 +I Coding_transcript mRNA 1068613 1074345 . + . ID=Transcript:Gene:R06A10.2.1.2;Parent=Gene:Gene:R06A10.2.1 +I Coding_transcript five_prime_UTR 1068613 1068893 . + . ID=five_prime_UTR:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript five_prime_UTR 1068976 1068997 . + . ID=five_prime_UTR:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1068998 1069090 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1069416 1069577 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1070370 1070489 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1071472 1071569 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1071623 1071751 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1071801 1071935 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1072675 1072918 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript CDS 1074199 1074345 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2 +I Coding_transcript gene 1128326 1131739 . + . ID=Gene:Gene:ZK993.1 +I Coding_transcript mRNA 1128326 1131739 . + . ID=Transcript:Gene:ZK993.1.1;Parent=Gene:Gene:ZK993.1 +I Coding_transcript five_prime_UTR 1128326 1128360 . + . ID=five_prime_UTR:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1128361 1128428 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1129212 1129396 . + 1 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1129808 1130016 . + 2 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1131224 1131289 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript CDS 1131475 1131636 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript three_prime_UTR 1131637 1131739 . + . ID=three_prime_UTR:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1 +I Coding_transcript gene 1337021 1338121 . + . ID=Gene:Gene:K12C11.6 +I Coding_transcript mRNA 1337021 1338121 . + . ID=Transcript:Gene:K12C11.6.1;Parent=Gene:Gene:K12C11.6 +I Coding_transcript CDS 1337021 1337117 . + 0 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1 +I Coding_transcript CDS 1337256 1337373 . + 2 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1 +I Coding_transcript CDS 1337938 1338121 . + 1 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1 +I Coding_transcript gene 1340679 1341259 . + . ID=Gene:Gene:K12C11.2.1 +I Coding_transcript mRNA 1340679 1341259 . + . ID=Transcript:Gene:K12C11.2.1.1;Parent=Gene:Gene:K12C11.2.1 +I Coding_transcript five_prime_UTR 1340679 1340703 . + . ID=five_prime_UTR:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript CDS 1340704 1340769 . + 0 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript CDS 1340826 1340926 . + 0 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript CDS 1340977 1341085 . + 1 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript three_prime_UTR 1341086 1341259 . + . ID=three_prime_UTR:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1 +I Coding_transcript mRNA 1340704 1341253 . + . ID=Transcript:Gene:K12C11.2.1.2;Parent=Gene:Gene:K12C11.2.1 +I Coding_transcript CDS 1340704 1340769 . + 0 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2 +I Coding_transcript CDS 1340826 1340926 . + 0 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2 +I Coding_transcript CDS 1340977 1341085 . + 1 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2 +I Coding_transcript three_prime_UTR 1341086 1341253 . + . ID=three_prime_UTR:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2 +I Coding_transcript gene 1394570 1402943 . + . ID=Gene:Gene:Y92H12BR.7 +I Coding_transcript mRNA 1394570 1402943 . + . ID=Transcript:Gene:Y92H12BR.7.1;Parent=Gene:Gene:Y92H12BR.7 +I Coding_transcript CDS 1394570 1394781 . + 0 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1395760 1395875 . + 1 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1395940 1396113 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1397517 1397708 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1399169 1399348 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript CDS 1402093 1402943 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1 +I Coding_transcript gene 1405945 1413072 . + . ID=Gene:Gene:Y92H12BR.6 +I Coding_transcript mRNA 1405945 1413072 . + . ID=Transcript:Gene:Y92H12BR.6.1;Parent=Gene:Gene:Y92H12BR.6 +I Coding_transcript CDS 1405945 1406373 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript CDS 1408458 1408637 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript CDS 1411442 1411616 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript CDS 1412612 1413033 . + 2 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript three_prime_UTR 1413034 1413072 . + . ID=three_prime_UTR:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1 +I Coding_transcript gene 1416592 1425131 . + . ID=Gene:Gene:Y92H12BR.3b +I Coding_transcript mRNA 1416592 1424609 . + . ID=Transcript:Gene:Y92H12BR.3b.1;Parent=Gene:Gene:Y92H12BR.3b +I Coding_transcript CDS 1416592 1416738 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1 +I Coding_transcript CDS 1416796 1416961 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1 +I Coding_transcript CDS 1422689 1422993 . + 2 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1 +I Coding_transcript CDS 1424412 1424609 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1 +I Coding_transcript mRNA 1416592 1425131 . + . ID=Transcript:Gene:Y92H12BR.3b.2;Parent=Gene:Gene:Y92H12BR.3b +I Coding_transcript CDS 1416592 1416738 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript CDS 1416796 1416961 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript CDS 1420212 1421309 . + 2 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript CDS 1422689 1422993 . + 2 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript CDS 1424412 1424609 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript three_prime_UTR 1424610 1425131 . + . ID=three_prime_UTR:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2 +I Coding_transcript gene 1483084 1490474 . + . ID=Gene:Gene:F47G6.1 +I Coding_transcript mRNA 1483084 1490474 . + . ID=Transcript:Gene:F47G6.1.1;Parent=Gene:Gene:F47G6.1 +I Coding_transcript five_prime_UTR 1483084 1483106 . + . ID=five_prime_UTR:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1483107 1483236 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1484720 1484877 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1484924 1485060 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1485927 1486012 . + 1 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1486057 1486252 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1487180 1487411 . + 1 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1487770 1488096 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1488425 1488676 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1489069 1489282 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript CDS 1490282 1490322 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript three_prime_UTR 1490323 1490474 . + . ID=three_prime_UTR:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1 +I Coding_transcript gene 1514763 1527350 . + . ID=Gene:Gene:Y92H12A.4 +I Coding_transcript mRNA 1514763 1527350 . + . ID=Transcript:Gene:Y92H12A.4.1;Parent=Gene:Gene:Y92H12A.4 +I Coding_transcript CDS 1514763 1514900 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1514955 1515059 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1515111 1515186 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1515233 1515396 . + 2 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1517534 1518121 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1520159 1520271 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1522175 1522459 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1524299 1524644 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1525846 1526006 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript CDS 1526978 1527350 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1 +I Coding_transcript gene 1610391 1619944 . + . ID=Gene:Gene:Y73E7A.9 +I Coding_transcript mRNA 1610391 1619944 . + . ID=Transcript:Gene:Y73E7A.9.1;Parent=Gene:Gene:Y73E7A.9 +I Coding_transcript CDS 1610391 1610504 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1610564 1610901 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1611487 1611595 . + 1 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1611747 1611828 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1613478 1613896 . + 2 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1616100 1616261 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1617834 1617969 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript CDS 1619628 1619944 . + 2 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1 +I Coding_transcript gene 1629004 1633494 . + . ID=Gene:Gene:Y73E7A.1a.1 +I Coding_transcript mRNA 1629004 1633494 . + . ID=Transcript:Gene:Y73E7A.1a.1.1;Parent=Gene:Gene:Y73E7A.1a.1 +I Coding_transcript five_prime_UTR 1629004 1629022 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1629023 1629118 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1629164 1629226 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1629283 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript three_prime_UTR 1633366 1633494 . + . ID=three_prime_UTR:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1 +I Coding_transcript mRNA 1629006 1633365 . + . ID=Transcript:Gene:Y73E7A.1a.1.2;Parent=Gene:Gene:Y73E7A.1a.1 +I Coding_transcript five_prime_UTR 1629006 1629022 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1629023 1629118 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1629164 1629226 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1629283 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2 +I Coding_transcript mRNA 1629010 1633365 . + . ID=Transcript:Gene:Y73E7A.1a.1.3;Parent=Gene:Gene:Y73E7A.1a.1 +I Coding_transcript five_prime_UTR 1629010 1629118 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript five_prime_UTR 1629164 1629225 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript five_prime_UTR 1629281 1629354 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript CDS 1629355 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3 +I Coding_transcript gene 1652917 1655337 . + . ID=Gene:Gene:Y71G12B.18 +I Coding_transcript mRNA 1652917 1655337 . + . ID=Transcript:Gene:Y71G12B.18.1;Parent=Gene:Gene:Y71G12B.18 +I Coding_transcript CDS 1652917 1653000 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript CDS 1653438 1653755 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript CDS 1653901 1653981 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript CDS 1654754 1654975 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript CDS 1655026 1655334 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript three_prime_UTR 1655335 1655337 . + . ID=three_prime_UTR:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1 +I Coding_transcript gene 1681814 1685064 . + . ID=Gene:Gene:Y71G12B.16 +I Coding_transcript mRNA 1681814 1685064 . + . ID=Transcript:Gene:Y71G12B.16.1;Parent=Gene:Gene:Y71G12B.16 +I Coding_transcript CDS 1681814 1681899 . + 0 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 +I Coding_transcript CDS 1682831 1683432 . + 1 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 +I Coding_transcript CDS 1683480 1683521 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 +I Coding_transcript CDS 1684382 1684450 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 +I Coding_transcript CDS 1684829 1685064 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1 diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions.info --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions.info Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,5 @@ +The files contain example data from 50 genes of the C. elegans genome on chromosome I. + +These files are: +* the genome annotation in GFF3 format +* two small set of aligned reads in SAM format (generated with PalMapper) from SRX001872 (http://www.ncbi.nlm.nih.gov/sra/SRX001872?report=full) and SRX001875 (http://www.ncbi.nlm.nih.gov/sra/SRX001875?report=full) diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions_deseq.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/test_data/deseq_c_elegans_WS200-I-regions_deseq.txt Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,51 @@ +deseq_c_elegans_WS200.I.regions.SRX001872_vs._deseq_c_elegans_WS200.I.regions.SRX001875 +Gene:Gene:Y48G1BL.1 0.298162984580380 +Gene:Gene:F53G12.8 0.0100203770664889 +Gene:Gene:F56A6.1b 0.196356415924958 +Gene:Gene:F56C11.6b 0.131729472518535 +Gene:Gene:Y95B8A.7 0.82068108848155 +Gene:Gene:Y34D9A.2 0.0853090936357318 +Gene:Gene:R06A10.2.1 0.0729242320545049 +Gene:Gene:W04C9.2 0.566684334319464 +Gene:Gene:Y48G1C.2.1 0.600568879247855 +Gene:Gene:Y71G12B.18 0.436992089489852 +Gene:Gene:Y48G1C.9.1 0.653680190916246 +Gene:Gene:Y48G1C.12 0.962705668820165 +Gene:Gene:Y92H12A.4 0.71745387890079 +Gene:Gene:Y34D9A.6 0.554353505200952 +Gene:Gene:Y74C9A.2.2 0.193317121924372 +Gene:Gene:Y73E7A.1a.1 1 +Gene:Gene:Y48G1C.11 0.343541645863403 +Gene:Gene:ZK993.1 0.160531710602269 +Gene:Gene:Y73E7A.9 0.505459732270159 +Gene:Gene:Y34D9A.11 0.0963318383936614 +Gene:Gene:Y92H12BR.7 0.640348677865408 +Gene:Gene:Y92H12BR.3b 0.91026935144141 +Gene:Gene:Y65B4BR.8 0.838122463966569 +Gene:Gene:Y71F9AM.5b 0.971282320939981 +Gene:Gene:R119.7 0.0391196294523181 +Gene:Gene:F56C11.5b 0.457982589114528 +Gene:Gene:Y92H12BR.6 0.487521663730281 +Gene:Gene:Y95B8A.6a.2 0.927779341088235 +Gene:Gene:F53G12.7 0.196705810975026 +Gene:Gene:Y48G1A.2 0.453586101344411 +Gene:Gene:C53D5.1c.1 0.391738974840997 +Gene:Gene:Y34D9A.3 0.71006993005907 +Gene:Gene:Y18H1A.6 0.329792105744889 +Gene:Gene:T06A4.1b 0.295665526024382 +Gene:Gene:F53G12.3 0.631494248315874 +Gene:Gene:R119.6 0.651145530807306 +Gene:Gene:Y95B8A.5 0.512001544838653 +Gene:Gene:Y65B4BR.4b 0.904179315054261 +Gene:Gene:K12C11.2.1 0.364903241182862 +Gene:Gene:F53G12.5b 0.537007770889288 +Gene:Gene:Y95B8A.8 0.59137956260358 +Gene:Gene:T06A4.3a 0.171238758638891 +Gene:Gene:F53G12.4 0.949563214223464 +Gene:Gene:C54G6.2 0.0780422099961616 +Gene:Gene:W04C9.1 0.223549535364483 +Gene:Gene:K12C11.6 0.301231157430176 +Gene:Gene:F47G6.1 0.196694866924542 +Gene:Gene:Y71F9AM.4a 0.728480588565213 +Gene:Gene:Y71G12B.16 0.900935024006703 +Gene:Gene:Y65B4BR.1 0.475324346757848 diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/test_data/genes.mat Binary file deseq-hts_2.0/test_data/genes.mat has changed diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/tools/GFFParser.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/tools/GFFParser.py Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,379 @@ +#!/usr/bin/env python +""" +Extract genome annotation from a GFF (a tab delimited format for storing sequence features and annotations) file. + +Requirements: + Numpy :- http://numpy.org/ + Scipy :- http://scipy.org/ + +Copyright (C) + +2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. +2012-2013 Memorial Sloan-Kettering Cancer Center, New York City, USA. +""" + +import re +import os +import sys +import urllib +import numpy as np +import scipy.io as sio +from collections import defaultdict +import helper as utils + +def _attribute_tags(col9): + """ + Split the key-value tags from the attribute column, it takes column number 9 from GTF/GFF file + """ + info = defaultdict(list) + is_gff = False + + if not col9: + return is_gff, info + + # trim the line ending semi-colon ucsc may have some white-space + col9 = col9.rstrip(';| ') + # attributes from 9th column + atbs = col9.split(" ; ") + if len(atbs) == 1: + atbs = col9.split("; ") + if len(atbs) == 1: + atbs = col9.split(";") + # check the GFF3 pattern which has key value pairs like: + gff3_pat = re.compile("\w+=") + # sometime GTF have: gene_id uc002zkg.1; + gtf_pat = re.compile("\s?\w+\s") + + key_vals = [] + + if gff3_pat.match(atbs[0]): # gff3 pattern + is_gff = True + key_vals = [at.split('=') for at in atbs] + elif gtf_pat.match(atbs[0]): # gtf pattern + for at in atbs: + key_vals.append(at.strip().split(" ",1)) + else: + # to handle attribute column has only single value + key_vals.append(['ID', atbs[0]]) + # get key, val items + for item in key_vals: + key, val = item + # replace the double qoutes from feature identifier + val = re.sub('"', '', val) + # replace the web formating place holders to plain text format + info[key].extend([urllib.unquote(v) for v in val.split(',') if v]) + + return is_gff, info + +def _spec_features_keywd(gff_parts): + """ + Specify the feature key word according to the GFF specifications + """ + for t_id in ["transcript_id", "transcriptId", "proteinId"]: + try: + gff_parts["info"]["Parent"] = gff_parts["info"][t_id] + break + except KeyError: + pass + for g_id in ["gene_id", "geneid", "geneId", "name", "gene_name", "genename"]: + try: + gff_parts["info"]["GParent"] = gff_parts["info"][g_id] + break + except KeyError: + pass + ## TODO key words + for flat_name in ["Transcript", "CDS"]: + if gff_parts["info"].has_key(flat_name): + # parents + if gff_parts['type'] in [flat_name] or re.search(r'transcript', gff_parts['type'], re.IGNORECASE): + if not gff_parts['id']: + gff_parts['id'] = gff_parts['info'][flat_name][0] + #gff_parts["info"]["ID"] = [gff_parts["id"]] + # children + elif gff_parts["type"] in ["intron", "exon", "pseudogenic_exon", "three_prime_UTR", + "coding_exon", "five_prime_UTR", "CDS", "stop_codon", + "start_codon"]: + gff_parts["info"]["Parent"] = gff_parts["info"][flat_name] + break + return gff_parts + +def Parse(ga_file): + """ + Parsing GFF/GTF file based on feature relationship, it takes the input file. + """ + child_map = defaultdict(list) + parent_map = dict() + + ga_handle = utils._open_file(ga_file) + + for rec in ga_handle: + rec = rec.strip('\n\r') + + # skip empty line fasta identifier and commented line + if not rec or rec[0] in ['#', '>']: + continue + # skip the genome sequence + if not re.search('\t', rec): + continue + + parts = rec.split('\t') + assert len(parts) >= 8, rec + + # process the attribute column (9th column) + ftype, tags = _attribute_tags(parts[-1]) + if not tags: # skip the line if no attribute column. + continue + + # extract fields + if parts[1]: + tags["source"] = parts[1] + if parts[7]: + tags["phase"] = parts[7] + + gff_info = dict() + gff_info['info'] = dict(tags) + #gff_info["is_gff3"] = ftype + gff_info['chr'] = parts[0] + + if parts[3] and parts[4]: + gff_info['location'] = [int(parts[3]) , + int(parts[4])] + gff_info['type'] = parts[2] + gff_info['id'] = tags.get('ID', [''])[0] + if parts[6] in ['?', '.']: + parts[6] = None + gff_info['strand'] = parts[6] + + # key word according to the GFF spec. + if not ftype: + gff_info = _spec_features_keywd(gff_info) + + # link the feature relationships + if gff_info['info'].has_key('Parent'): + for p in gff_info['info']['Parent']: + if p == gff_info['id']: + gff_info['id'] = '' + break + rec_category = 'child' + elif gff_info['id']: + rec_category = 'parent' + else: + rec_category = 'record' + + # depends on the record category organize the features + if rec_category == 'child': + for p in gff_info['info']['Parent']: + # create the data structure based on source and feature id + child_map[(gff_info['chr'], gff_info['info']['source'], p)].append( + dict( type = gff_info['type'], + location = gff_info['location'], + strand = gff_info['strand'], + ID = gff_info['id'], + gene_id = gff_info['info'].get('GParent', '') + )) + elif rec_category == 'parent': + parent_map[(gff_info['chr'], gff_info['info']['source'], gff_info['id'])] = dict( + type = gff_info['type'], + location = gff_info['location'], + strand = gff_info['strand'], + name = tags.get('Name', [''])[0]) + elif rec_category == 'record': + #TODO how to handle plain records? + c = 1 + ga_handle.close() + + # depends on file type create parent feature + if not ftype: + parent_map, child_map = _create_missing_feature_type(parent_map, child_map) + + # connecting parent child relations + # // essentially the parent child features are here from any type of GTF/GFF2/GFF3 file + gene_mat = _format_gene_models(parent_map, child_map) + + return gene_mat + +def _format_gene_models(parent_nf_map, child_nf_map): + """ + Genarate GeneObject based on the parsed file contents + + parent_map: parent features with source and chromosome information + child_map: transctipt and exon information are encoded + """ + g_cnt = 0 + gene = np.zeros((len(parent_nf_map),), dtype = utils.init_gene_DE()) + + for pkey, pdet in parent_nf_map.items(): + # considering only gene features + if not re.search(r'gene', pdet.get('type', '')): + continue + # infer the gene start and stop if not there in the + if not pdet.get('location', []): + GNS, GNE = [], [] + # multiple number of transcripts + for L1 in child_nf_map[pkey]: + GNS.append(L1.get('location', [])[0]) + GNE.append(L1.get('location', [])[1]) + GNS.sort() + GNE.sort() + pdet['location'] = [GNS[0], GNE[-1]] + orient = pdet.get('strand', '') + + gene[g_cnt]['id'] = g_cnt +1 + gene[g_cnt]['chr'] = pkey[0] + gene[g_cnt]['source'] = pkey[1] + gene[g_cnt]['name'] = pkey[-1] + gene[g_cnt]['start'] = pdet.get('location', [])[0] + gene[g_cnt]['stop'] = pdet.get('location', [])[1] + gene[g_cnt]['strand'] = orient + + # default value + gene[g_cnt]['is_alt_spliced'] = 0 + if len(child_nf_map[pkey]) > 1: + gene[g_cnt]['is_alt_spliced'] = 1 + + # complete sub-feature for all transcripts + dim = len(child_nf_map[pkey]) + TRS = np.zeros((dim,), dtype=np.object) + EXON = np.zeros((dim,), dtype=np.object) + + # fetching corresponding transcripts + for xq, Lv1 in enumerate(child_nf_map[pkey]): + + TID = Lv1.get('ID', '') + TRS[xq]= np.array([TID]) + + orient = Lv1.get('strand', '') + + # fetching different sub-features + child_feat = defaultdict(list) + for Lv2 in child_nf_map[(pkey[0], pkey[1], TID)]: + E_TYP = Lv2.get('type', '') + child_feat[E_TYP].append(Lv2.get('location')) + + # make exon coordinate from cds and utr regions + if not child_feat.get('exon'): + if child_feat.get('CDS'): + exon_cod = utils.make_Exon_cod( orient, + NonetoemptyList(child_feat.get('five_prime_UTR')), + NonetoemptyList(child_feat.get('CDS')), + NonetoemptyList(child_feat.get('three_prime_UTR'))) + child_feat['exon'] = exon_cod + else: + # searching through keys to find a pattern describing exon feature + ex_key_pattern = [k for k in child_feat if k.endswith("exon")] + child_feat['exon'] = child_feat[ex_key_pattern[0]] + # TODO only UTR's + + # make general ascending order of coordinates + if orient == '-': + for etype, excod in child_feat.items(): + if len(excod) > 1: + if excod[0][0] > excod[-1][0]: + excod.reverse() + child_feat[etype] = excod + + # add sub-feature # make array for export to different out + EXON[xq] = np.array(child_feat.get('exon'), np.float64) + + # add sub-features to the parent gene feature + gene[g_cnt]['transcripts'] = TRS + gene[g_cnt]['exons'] = EXON + + gene[g_cnt]['gene_info'] = dict( ID = pkey[-1], + Name = pdet.get('name'), + Source = pkey[1]) + g_cnt += 1 + + ## deleting empty gene records from the main array + for XP, ens in enumerate(gene): + if ens[0]==0: + break + + XQC = range(XP, len(gene)+1) + gene = np.delete(gene, XQC) + + return gene + +def NonetoemptyList(XS): + """ + Convert a None type to empty list + """ + return [] if XS is None else XS + +def _create_missing_feature_type(p_feat, c_feat): + """ + GFF/GTF file defines only child features. This function tries to create + the parent feature from the information provided in the attribute column. + + example: + chr21 hg19_knownGene exon 9690071 9690100 0.000000 + . gene_id "uc002zkg.1"; transcript_id "uc002zkg.1"; + chr21 hg19_knownGene exon 9692178 9692207 0.000000 + . gene_id "uc021wgt.1"; transcript_id "uc021wgt.1"; + chr21 hg19_knownGene exon 9711935 9712038 0.000000 + . gene_id "uc011abu.2"; transcript_id "uc011abu.2"; + + This function gets the parsed feature annotations. + """ + child_n_map = defaultdict(list) + for fid, det in c_feat.items(): + # get the details from grand child + GID = STRD = None + SPOS, EPOS = [], [] + TYP = dict() + for gchild in det: + GID = gchild.get('gene_id', [''])[0] + SPOS.append(gchild.get('location', [])[0]) + EPOS.append(gchild.get('location', [])[1]) + STRD = gchild.get('strand', '') + TYP[gchild.get('type', '')] = 1 + SPOS.sort() + EPOS.sort() + + # infer transcript type + transcript_type = 'transcript' + transcript_type = 'mRNA' if TYP.get('CDS', '') or TYP.get('cds', '') else transcript_type + + # gene id and transcript id are same + transcript_id = fid[-1] + if GID == transcript_id: + transcript_id = 'Transcript:' + str(GID) + + # level -1 feature type + p_feat[(fid[0], fid[1], GID)] = dict( type = 'gene', + location = [], ## infer location based on multiple transcripts + strand = STRD, + name = GID ) + # level -2 feature type + child_n_map[(fid[0], fid[1], GID)].append( + dict( type = transcript_type, + location = [SPOS[0], EPOS[-1]], + strand = STRD, + ID = transcript_id, + gene_id = '' )) + # reorganizing the grand child + for gchild in det: + child_n_map[(fid[0], fid[1], transcript_id)].append( + dict( type = gchild.get('type', ''), + location = gchild.get('location'), + strand = gchild.get('strand'), + ID = gchild.get('ID'), + gene_id = '' )) + return p_feat, child_n_map + + +## General instruction to use the above functions: +## Usage: GFFParser.py in.gff3 out.mat + +try: + gff_file = sys.argv[1] + out_mat = sys.argv[2] +except: + print __doc__ + sys.exit(-1) + +## Parse the file accoring to the type and returns the genes informations -- +gene_struct = Parse(gff_file) + +## Write the gene annotations to a matlab struct array format -- +sio.savemat(out_mat, + mdict = dict(genes = gene_struct), + format = '5', + oned_as = 'row') diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/tools/helper.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/tools/helper.py Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,179 @@ +#!/usr/bin/env python +""" +Common utility functions +""" + +import os +import re +import sys +import gzip +import bz2 +import numpy + +def init_gene_DE(): + """ + Initializing the gene structure for DE + """ + gene_det = [('id', 'f8'), + ('chr', 'S15'), + ('exons', numpy.dtype), + ('gene_info', numpy.dtype), + ('is_alt_spliced', 'f8'), + ('name', 'S25'), + ('source', 'S25'), + ('start', 'f8'), + ('stop', 'f8'), + ('strand', 'S2'), + ('transcripts', numpy.dtype)] + + return gene_det + +def _open_file(fname): + """ + Open the file (supports .gz .bz2) and returns the handler + """ + try: + if os.path.splitext(fname)[1] == ".gz": + FH = gzip.open(fname, 'rb') + elif os.path.splitext(fname)[1] == ".bz2": + FH = bz2.BZ2File(fname, 'rb') + else: + FH = open(fname, 'rU') + except Exception as error: + sys.exit(error) + return FH + +def make_Exon_cod(strand_p, five_p_utr, cds_cod, three_p_utr): + """ + Create exon cordinates from UTR's and CDS region + """ + exon_pos = [] + if strand_p == '+': + utr5_start, utr5_end = 0, 0 + if five_p_utr != []: + utr5_start, utr5_end = five_p_utr[-1][0], five_p_utr[-1][1] + cds_5start, cds_5end = cds_cod[0][0], cds_cod[0][1] + jun_exon = [] + if cds_5start-utr5_end == 0 or cds_5start-utr5_end == 1: + jun_exon = [utr5_start, cds_5end] + if len(cds_cod) == 1: + five_prime_flag = 0 + if jun_exon != []: + five_p_utr = five_p_utr[:-1] + five_prime_flag = 1 + for utr5 in five_p_utr: + exon_pos.append(utr5) + jun_exon = [] + utr3_start, utr3_end = 0, 0 + if three_p_utr != []: + utr3_start = three_p_utr[0][0] + utr3_end = three_p_utr[0][1] + if utr3_start-cds_5end == 0 or utr3_start-cds_5end == 1: + jun_exon = [cds_5start, utr3_end] + three_prime_flag = 0 + if jun_exon != []: + cds_cod = cds_cod[:-1] + three_p_utr = three_p_utr[1:] + three_prime_flag = 1 + if five_prime_flag == 1 and three_prime_flag == 1: + exon_pos.append([utr5_start, utr3_end]) + if five_prime_flag == 1 and three_prime_flag == 0: + exon_pos.append([utr5_start, cds_5end]) + cds_cod = cds_cod[:-1] + if five_prime_flag == 0 and three_prime_flag == 1: + exon_pos.append([cds_5start, utr3_end]) + for cds in cds_cod: + exon_pos.append(cds) + for utr3 in three_p_utr: + exon_pos.append(utr3) + else: + if jun_exon != []: + five_p_utr = five_p_utr[:-1] + cds_cod = cds_cod[1:] + for utr5 in five_p_utr: + exon_pos.append(utr5) + exon_pos.append(jun_exon) if jun_exon != [] else '' + jun_exon = [] + utr3_start, utr3_end = 0, 0 + if three_p_utr != []: + utr3_start = three_p_utr[0][0] + utr3_end = three_p_utr[0][1] + cds_3start = cds_cod[-1][0] + cds_3end = cds_cod[-1][1] + if utr3_start-cds_3end == 0 or utr3_start-cds_3end == 1: + jun_exon = [cds_3start, utr3_end] + if jun_exon != []: + cds_cod = cds_cod[:-1] + three_p_utr = three_p_utr[1:] + for cds in cds_cod: + exon_pos.append(cds) + exon_pos.append(jun_exon) if jun_exon != [] else '' + for utr3 in three_p_utr: + exon_pos.append(utr3) + elif strand_p == '-': + utr3_start, utr3_end = 0, 0 + if three_p_utr != []: + utr3_start = three_p_utr[-1][0] + utr3_end = three_p_utr[-1][1] + cds_3start = cds_cod[0][0] + cds_3end = cds_cod[0][1] + jun_exon = [] + if cds_3start-utr3_end == 0 or cds_3start-utr3_end == 1: + jun_exon = [utr3_start, cds_3end] + if len(cds_cod) == 1: + three_prime_flag = 0 + if jun_exon != []: + three_p_utr = three_p_utr[:-1] + three_prime_flag = 1 + for utr3 in three_p_utr: + exon_pos.append(utr3) + jun_exon = [] + (utr5_start, utr5_end) = (0, 0) + if five_p_utr != []: + utr5_start = five_p_utr[0][0] + utr5_end = five_p_utr[0][1] + if utr5_start-cds_3end == 0 or utr5_start-cds_3end == 1: + jun_exon = [cds_3start, utr5_end] + five_prime_flag = 0 + if jun_exon != []: + cds_cod = cds_cod[:-1] + five_p_utr = five_p_utr[1:] + five_prime_flag = 1 + if three_prime_flag == 1 and five_prime_flag == 1: + exon_pos.append([utr3_start, utr5_end]) + if three_prime_flag == 1 and five_prime_flag == 0: + exon_pos.append([utr3_start, cds_3end]) + cds_cod = cds_cod[:-1] + if three_prime_flag == 0 and five_prime_flag == 1: + exon_pos.append([cds_3start, utr5_end]) + for cds in cds_cod: + exon_pos.append(cds) + for utr5 in five_p_utr: + exon_pos.append(utr5) + else: + if jun_exon != []: + three_p_utr = three_p_utr[:-1] + cds_cod = cds_cod[1:] + for utr3 in three_p_utr: + exon_pos.append(utr3) + if jun_exon != []: + exon_pos.append(jun_exon) + jun_exon = [] + (utr5_start, utr5_end) = (0, 0) + if five_p_utr != []: + utr5_start = five_p_utr[0][0] + utr5_end = five_p_utr[0][1] + cds_5start = cds_cod[-1][0] + cds_5end = cds_cod[-1][1] + if utr5_start-cds_5end == 0 or utr5_start-cds_5end == 1: + jun_exon = [cds_5start, utr5_end] + if jun_exon != []: + cds_cod = cds_cod[:-1] + five_p_utr = five_p_utr[1:] + for cds in cds_cod: + exon_pos.append(cds) + if jun_exon != []: + exon_pos.append(jun_exon) + for utr5 in five_p_utr: + exon_pos.append(utr5) + return exon_pos diff -r e27b4f7811c2 -r 2fe512c7bfdf deseq-hts_2.0/tools/separate.m --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq-hts_2.0/tools/separate.m Tue Oct 08 08:15:34 2013 -0400 @@ -0,0 +1,7 @@ +function f = separate(str, delim) + +f={}; +idx=[0 find(str==delim) length(str)+1]; +for i=1:length(idx)-1 + f{i}=deblank(str(idx(i)+1:idx(i+1)-1)); +end;