# HG changeset patch
# User vipints
# Date 1336610627 14400
# Node ID 94a108763d9e96cbbc8c780e362c7cba527d9eca
deseq-hts version 1.0 wraps the DESeq 1.6.0
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/README Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,75 @@
+---------------------------------------------------
+DESeq-hts: A Galaxy wrapper for DESeq version 1.6.0
+---------------------------------------------------
+
+Description:
+ DESeq can be used as a web service embedded in a Galaxy instance.
+ We call it as DESeq-hts.
+
+Requirements:
+ MATLAB/OCTAVE and Python :- Preprocessing of sequencing reads and GFF files
+ R, Bio-conductor package :- Required for DESEQ
+ SCIPY, NUMPY :- for python
+ SAMTOOLS :- Read processing
+
+Contents:
+ [src]
+ All relevant scripts for DESeq-hts are located in the subdirectory
+ src. src/deseq.sh is the main script to start DESeq-hts. The
+ preprocessing of BAM and GFF file start before the R DESEQ script.
+ Please follow the shell script to understand the details.
+
+ [galaxy]
+ Galaxy tool configuration file can be found galaxy folder. Please
+ make necessary editing for .xml file and remaining .sh files and
+ perform few tests.
+
+ [setup_deseq-hts.sh]
+ Setup script for DESeq-hts.
+
+ [mex]
+ matlab executable files.
+
+ [bin]
+ Contains deseq_config.sh file which is used for the configuration of
+ DESeq-hts. According to your platform, the default file will be changed.
+
+ [test_data]
+ This subsirectory contains all data for running a functional test in
+ Galaxy framework. You may need to move these test files into the test-data
+ directory.
+
+ [tools]
+ A python based GFF parsing program. Also contains small utils programs.
+
+Getting started:
+ Check for all requirements first, then
+
+ a) Run ./setup_deseq-hts.sh and setup paths and configuration options for DESeq-hts.
+
+ b) Inside the mex folder execute the make file to create platform dependent .mex files
+ cd mex/Makefile
+ make [interpreter]
+ make octave for octave
+ make matlab for matlab
+ make all for octave and matlab
+
+ c) Edit the Galaxy tool configuration file to adjust the path if necessary.
+
+Licenses:
+ If **DESeq** is used to obtain results for scientific publications it should be cited as [1].
+
+ This wrapper program (DESeq-hts) is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by the Free Software Foundation;
+ either version 3 of the License, or (at your option) any later version.
+
+ Written (W) 2009-2012 Jonas Behr, Regina Bohnert, Andre Kahles, Gunnar Raetsch, Vipin T. Sreedharan
+ Copyright (C) 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany and
+ 2012 cBio Memorial Sloan Kettering Cancer Center, New York City, USA.
+
+References:
+ [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`.
+
+Contact:
+ vipin@cbio.mskcc.org
+
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/bin/deseq_config.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/bin/deseq_config.sh Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,18 @@
+#!/bin/bash
+# Copyright (C) 2010-2012 Max Planck Society
+export DESEQ_VERSION=1.6.0
+export DESEQ_PATH=
+export DESEQ_SRC_PATH=$DESEQ_PATH/src
+export DESEQ_BIN_PATH=$DESEQ_PATH/bin
+export INTERPRETER=
+export MATLAB_BIN_PATH=
+export MATLAB_MEX_PATH=
+export MATLAB_INCLUDE_DIR=
+export OCTAVE_BIN_PATH=
+export OCTAVE_MKOCT=
+export SAMTOOLS_DIR=
+export PYTHON_PATH=
+export SCIPY_PATH=
+export R_PATH=
+export LD_LIBRARY_PATH=
+export ENVIRONMENT=galaxy
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/bin/genarglist.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/bin/genarglist.sh Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,12 @@
+#/bin/bash
+# Copyright (C) 2010-2012 Max Planck Society
+
+until [ -z $1 ] ; do
+ if [ $# != 1 ];
+ then
+ echo -n "'$1', "
+ else
+ echo -n "'$1'"
+ fi
+ shift
+done
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/bin/genes_cell2struct
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/bin/genes_cell2struct Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,7 @@
+#!/bin/bash
+# deseq-hts wrapper script to start the interpreter with the correct list of arguments
+# Copyright (C) 2010-2012 Max Planck Society
+set -e
+PROG=`basename $0`
+DIR=`dirname $0`
+exec ${DIR}/start_interpreter.sh ${PROG} "`${DIR}/genarglist.sh $@`"
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/bin/get_read_counts
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/bin/get_read_counts Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,7 @@
+#!/bin/bash
+# deseq-hts wrapper script to start the interpreter with the correct list of arguments
+# Copyright (C) 2010-2012 Max Planck Society
+set -e
+PROG=`basename $0`
+DIR=`dirname $0`
+exec ${DIR}/start_interpreter.sh ${PROG} "`${DIR}/genarglist.sh $@`"
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/bin/start_interpreter.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/bin/start_interpreter.sh Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,24 @@
+#/bin/bash
+# Copyright (C) 2010-2012 Max Planck Society
+
+set -e
+
+. `dirname $0`/deseq_config.sh
+
+export MATLAB_RETURN_FILE=`tempfile`
+
+if [ "$INTERPRETER" == 'octave' ];
+then
+ echo exit | ${OCTAVE_BIN_PATH} --eval "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Octave failed; rm -f $MATLAB_RETURN_FILE; exit -1) ;
+fi
+
+if [ "$INTERPRETER" == 'matlab' ];
+then
+ echo exit | ${MATLAB_BIN_PATH} -nodisplay -r "global SHELL_INTERPRETER_INVOKE; SHELL_INTERPRETER_INVOKE=1; addpath $DESEQ_SRC_PATH; deseq_config; $1($2); exit;" || (echo starting Matlab failed; rm -f $MATLAB_RETURN_FILE; exit -1) ;
+fi
+
+test -f $MATLAB_RETURN_FILE || exit 0
+ret=`cat $MATLAB_RETURN_FILE` ;
+rm -f $MATLAB_RETURN_FILE
+exit $ret
+
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/galaxy/deseq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/galaxy/deseq.xml Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,105 @@
+
+ Determines differentially expressed transcripts from read alignments
+
+deseq-hts/src/deseq-hts.sh $anno_input_selected $deseq_out $deseq_out.extra_files_path/gene_map.mat
+#for $i in $replicate_groups
+#for $j in $i.replicates
+$j.bam_alignment:#slurp
+#end for
+#end for
+ >> $Log_File
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ command:
+ ./deseq-hts.sh ../test_data/deseq_c_elegans_WS200-I-regions.gff3 ../test_data/deseq_c_elegans_WS200-I-regions_deseq.txt ../test_data/genes.mat ../test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam ../test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**What it does**
+
+`DESeq` is a tool for differential expression testing of RNA-Seq data.
+
+
+**Inputs**
+
+`DESeq` requires three input files to run:
+
+1. Annotation file in GFF3, containing the necessary information about the transcripts that are to be quantified.
+2. The BAM alignment files grouped into replicate groups, each containing several replicates. BAM files store the read alignments in a compressed format. They can be generated using the `SAM-to-BAM` tool in the NGS: SAM Tools section. (The script will also work with only two groups containing only a single replicate each. However, this analysis has less statistical power and is therefor not recommended.)
+
+**Output**
+
+`DESeq` generates a text file containing the gene name and the p-value.
+
+------
+
+**Licenses**
+
+If **DESeq** is used to obtain results for scientific publications it
+should be cited as [1]_.
+
+**References**
+
+.. [1] Anders, S and Huber, W (2010): `Differential expression analysis for sequence count data`_.
+
+.. _Differential expression analysis for sequence count data: http://dx.doi.org/10.1186/gb-2010-11-10-r106
+
+------
+
+.. class:: infomark
+
+**About formats**
+
+
+**GFF3 format** General Feature Format is a format for describing genes
+and other features associated with DNA, RNA and protein
+sequences. GFF3 lines have nine tab-separated fields:
+
+1. seqid - The name of a chromosome or scaffold.
+2. source - The program that generated this feature.
+3. type - The name of this type of feature. Some examples of standard feature types are "gene", "CDS", "protein", "mRNA", and "exon".
+4. start - The starting position of the feature in the sequence. The first base is numbered 1.
+5. stop - The ending position of the feature (inclusive).
+6. score - A score between 0 and 1000. If there is no score value, enter ".".
+7. strand - Valid entries include '+', '-', or '.' (for don't know/care).
+8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'.
+9. attributes - All lines with the same group are linked together into a single item.
+
+For more information see http://www.sequenceontology.org/gff3.shtml
+
+**SAM/BAM format** The Sequence Alignment/Map (SAM) format is a
+tab-limited text format that stores large nucleotide sequence
+alignments. BAM is the binary version of a SAM file that allows for
+fast and intensive data processing. The format specification and the
+description of SAMtools can be found on
+http://samtools.sourceforge.net/.
+
+------
+
+DESeq-hts Wrapper Version 0.3 (Feb 2012)
+
+
+
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/mex/Makefile
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/mex/Makefile Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,38 @@
+include ../bin/deseq_config.sh
+
+MEX=${MATLAB_MEX_PATH}
+MKOCTFILE=${OCTAVE_MKOCT}
+MATLAB_INCL=${MATLAB_INCLUDE_DIR}
+SAMDIR=${SAMTOOLS_DIR}
+
+
+all: get_reads.mex get_bam_properties.mex interval_overlap.mex get_reads.mexa64 get_bam_properties.mexa64 interval_overlap.mexa64
+octave: get_reads.mex get_bam_properties.mex interval_overlap.mex
+matlab: get_reads.mexa64 get_bam_properties.mexa64 interval_overlap.mexa64
+
+
+get_reads.mexa64: get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp
+ rm -f *.o
+ ${MEX} -g -O get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp -I${SAMDIR} -L${SAMDIR} -lbam -lz -lcurses -I$(MATLAB_INCL)
+
+get_bam_properties.mexa64: get_bam_properties.cpp
+ rm -f *.o
+ ${MEX} -g -O get_bam_properties.cpp -I$(MATLAB_INCL)
+
+interval_overlap.mexa64: interval_overlap.cpp
+ ${MEX} -g -O interval_overlap.cpp -I$(MATLAB_INCL)
+
+get_reads.mex: get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp
+ rm -f *.o
+ ${MKOCTFILE} -g --mex get_reads.cpp get_reads_direct.cpp mex_input.cpp read.cpp -I${SAMDIR} -L${SAMDIR} -lbam -lz -lcurses
+
+get_bam_properties.mex: get_bam_properties.cpp
+ rm -f *.o
+ ${MKOCTFILE} -g --mex get_bam_properties.cpp
+
+interval_overlap.mex: interval_overlap.cpp
+ rm -f *.o
+ ${MKOCTFILE} -g --mex interval_overlap.cpp
+
+clean:
+ rm -f *.o *.mexa64 *.mex
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/mex/get_bam_properties.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/mex/get_bam_properties.cpp Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,216 @@
+/*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or
+* (at your option) any later version.
+*
+* Written (W) 2009-2011 Regina Bohnert
+* Copyright (C) 2009-2011 Max Planck Society
+*/
+
+
+#include
+#include
+#include
+#include
+#include
+
+#include
+ using std::vector;
+#include
+ using std::string;
+#include
+ using std::find;
+ using std::min;
+
+#include
+
+
+char *get_string(const mxArray *prhs);
+
+typedef unsigned int uint32_t;
+typedef unsigned char uint8_t;
+
+/*
+ * [read_len num_reads] = get_bam_properties(fname, path_samtools, contig_name)
+ *
+ * -- input --
+ * prhs[0] file name of paired reads in BAM format (sorted by read id)
+ * prhs[1] path to samtools
+ * prhs[2] contig name
+ *
+ * -- output --
+ * plhs[0] length of read
+ * plhs[1] number of unique reads
+*/
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+ // checks for the right number of arguments
+ if (nrhs !=3 || nlhs > 2) {
+ mexErrMsgTxt("number of input and output args should be 3 and 2\nUSAGE:\n [read_len, num_reads] = get_bam_properties(fname, path_samtools, contig_name)\n");
+ return;
+ }
+
+ signal(SIGCHLD, SIG_IGN); // avoid zombies
+
+ // read input arguments
+ char *fname = get_string(prhs[0]);
+ char *path_samtools = get_string(prhs[1]);
+ char *contig_name = get_string(prhs[2]);
+ char command[10000];
+
+ sprintf(command, "%s./samtools view %s %s 2>/dev/null", path_samtools, fname, contig_name);
+ //printf("%s\n", command);
+
+ // get number of unique reads
+ int status;
+ uint32_t num_unique_reads = 0;
+ char command2[10000];
+ sprintf(command2, "%s | cut -f 1 | sort -u | wc -l", command);
+ FILE* fp = popen(command2, "r");
+ if (fp == NULL) {
+ mexErrMsgTxt("Error using popen\n");
+ }
+ int num_scans = 1;
+ num_scans = fscanf(fp, "%d", &num_unique_reads);
+ if (num_scans != 1) {
+ rewind(fp);
+ char ret[1000];
+ fgets(ret, 1000, fp);
+ fprintf(stdout, "%s", ret);
+ mexErrMsgTxt("Could not determine number of reads\n");
+ }
+ status = pclose(fp);
+ //printf("%i", num_unique_reads);
+
+ // select reads for given positions and strand
+ int num_rows_selected = min((int) num_unique_reads, 100);
+ sprintf(command, "%s | head -n %i | cut -f 1-11", command, num_rows_selected);
+ fp = popen(command, "r");
+ if (fp == NULL) {
+ mexErrMsgTxt("Error using popen\n");
+ }
+ /* SAM format
+ 1: read id, 2: flag, 3: reference name, 4: start (1-based, incl.), 5: mapping quality,
+ 6: CIGAR, 7: mate reference name, 8: mate start (1-based, incl.), 9: insert size, 10: read, 11: quality
+ 12+: additional tags
+ */
+ uint32_t read_idx = 0, row_idx = 0, num_col = 0;
+ uint32_t flag = 0, start_pos = 0, map_score = 0, mate_end_pos = 0, num_matches = 0, num_del = 0, num_ins = 0, ins_size = 0;
+ char ri [1000], read_contig_name [1000], cg [1000], mate_read_id [1000], read [1000], read_qual [1000];
+ string last_read_id;
+ vector block_lengths, block_starts;
+ vector read_ids;
+ vector::iterator it;
+
+ uint32_t read_len = 0;
+ bool empty_line = true;
+ int num_rows = 0;
+ while(empty_line && num_rows < num_rows_selected) {
+ num_col = fscanf(fp, "%s\t%i\t%s\t%i\t%i\t%s\t%s\t%i\t%i\t%s\t%s", &ri, &flag, &read_contig_name, &start_pos, &map_score, &cg, &mate_read_id, &mate_end_pos, &ins_size, &read, &read_qual);
+ if (num_col != 11) {
+ mexErrMsgTxt("error reading SAM line\n");
+ }
+
+ string cigar = (string) cg;
+ // ignore lines with reads w/o mapping information
+ if (start_pos == 0 || cigar.compare("*")==0) {
+ continue;
+ }
+ // parse CIGAR
+ uint last_c = 0;
+ string last_str;
+ num_matches = 0;
+ char *end = NULL;
+ uint32_t tmp_nm = 0, tmp_nd = 0, tmp_ni = 0;
+ uint32_t last_block_start = 0, last_block_length = 0, last_intron_len = 0;
+ block_lengths.clear(); block_starts.clear();
+
+ for (uint c = 0; c < cigar.size(); c++) {
+ switch (cigar[c]) {
+ case 'M':
+ last_str = cigar.substr(last_c, c-last_c);
+ tmp_nm = strtoul(last_str.c_str(), &end, 10);
+ if (*end != '\0')
+ mexErrMsgTxt("error: number of mismatches\n");
+ end = NULL;
+ last_block_length += tmp_nm;
+ num_matches += tmp_nm;
+ last_c = c + 1;
+ break;
+ case 'I':
+ last_str = cigar.substr(last_c, c-last_c);
+ tmp_ni = strtoul(last_str.c_str(), &end, 10);
+ if (*end != '\0')
+ mexErrMsgTxt("error: number of insertions\n");
+ end = NULL;
+ num_ins += tmp_ni;
+ last_c = c + 1;
+ break;
+ case 'D':
+ last_str = cigar.substr(last_c, c-last_c);
+ tmp_nd = strtoul(last_str.c_str(), &end, 10);
+ if (*end != '\0')
+ mexErrMsgTxt("error: number of deletions\n");
+ end = NULL;
+ num_del += tmp_nd;
+ last_block_length += tmp_nd;
+ last_c = c + 1;
+ break;
+ case 'N':
+ last_str = cigar.substr(last_c, c-last_c);
+ last_intron_len = strtoul(last_str.c_str(), &end, 10);
+ end = NULL;
+ last_c = c + 1;
+ break;
+ case 'S':
+ break;
+ case 'H':
+ break;
+ case 'P':
+ break;
+ default:
+ break;
+ }
+ if (cigar[c] == 'N' || c==cigar.size()-1) {
+ block_starts.push_back(last_block_start);
+ last_block_start = last_block_start + last_block_length + last_intron_len;
+ last_intron_len = 0;
+ block_lengths.push_back(last_block_length);
+ last_block_length = 0;
+ }
+ }
+ read_len = 0;
+ for (uint n = 0; n < block_lengths.size(); n++) {
+ read_len += block_lengths[n];
+ }
+ empty_line = false;
+ } // end of stream parsing
+
+ status = pclose(fp);
+
+ if (empty_line)
+ mexErrMsgTxt("Could not determine read length\n");
+
+ plhs[0] = mxCreateDoubleScalar((double) read_len);
+ plhs[1] = mxCreateDoubleScalar((double) num_unique_reads);
+
+ return;
+}
+
+
+char *get_string(const mxArray *prhs) {
+ char *buf;
+ int buflen;
+ if (!prhs)
+ mexErrMsgTxt("get_string called with NULL pointer arg");
+ if (!mxIsChar(prhs))
+ mexErrMsgTxt("input is not a string");
+ if (mxGetM(prhs) != 1)
+ mexErrMsgTxt("input is not a row vector");
+ buflen = mxGetN(prhs) + 1;
+ buf = (char*) malloc(buflen);
+ /* copy the string from prhs into buf and add terminating NULL char */
+ if (mxGetString(prhs, buf, buflen))
+ mexErrMsgTxt("not enough space");
+ return buf;
+}
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/mex/get_reads.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/mex/get_reads.cpp Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,293 @@
+/*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or
+* (at your option) any later version.
+*
+* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch
+* Copyright (C) 2010-2011 Max Planck Society
+*/
+
+
+#include
+#include
+#include
+#include
+#include
+#include
+ using std::vector;
+#include "get_reads_direct.h"
+#include "mex_input.h"
+#include "read.h"
+
+#define MAXLINE 10000
+
+/*
+ * input:
+ * 1 bam file
+ * 2 chromosome
+ * 3 region start (1-based index)
+ * 4 region end (1-based index)
+ * 5 strand (either '+' or '-' or '0')
+ * [6] collapse flag: if true the reads are collapsed to a coverage track
+ * [7] subsample percentage: percentage of reads to be subsampled (in per mill)
+ * [8] intron length filter
+ * [9] exon length filter
+ * [10] mismatch filter
+ * [11] bool: use mapped reads for coverage
+ * [12] bool: use spliced reads for coverage
+ * [13] return maxminlen
+ * [14] return pair coverage
+ *
+ * output:
+ * 1 coverage
+ * [2] intron cell array
+ * [3] pair coverage
+ * [4] pair list
+ *
+ * example call:
+ * [cov introns] = get_reads('polyA_left_I+_el15_mm1_spliced.bam', 'I', 10000, 12000, '-', 1, 30);
+ */
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[]) {
+
+ if (nrhs<5 || nrhs>14 || (nlhs<1 || nlhs>4)) {
+ fprintf(stderr, "usage: [x [introns] [pair]] = get_reads(fname, chr, start, end, strand, [collapse], [subsample], [max intron length], [min exonlength], [max mismatches], [mapped], [spliced], [maxminlen], [pair]);\n");
+ return;
+ }
+
+ /* obligatory arguments
+ * **********************/
+ char *fname = get_string(prhs[0]);
+ //fprintf(stdout, "arg1: %s\n", fname);
+ char *chr = get_string(prhs[1]);
+ //fprintf(stdout, "arg2: %s\n", chr);
+ int from_pos = get_int(prhs[2]);
+ //fprintf(stdout, "arg3: %d\n", from_pos);
+ int to_pos = get_int(prhs[3]);
+ //fprintf(stdout, "arg4: %d\n", to_pos);
+ char *strand = get_string(prhs[4]);
+ //fprintf(stdout, "arg5: %s\n", strand);
+
+ if (from_pos>to_pos)
+ mexErrMsgTxt("Start (arg 3) must be <= end (arg 4)\n");
+
+ if (strand[0]!='+' && strand[0]!='-' && strand[0]!='0')
+ mexErrMsgTxt("Unknown strand (arg 5): either + or - or 0");
+
+ /* optional arguments
+ * ******************/
+ int collapse = 0;
+ if (nrhs>=6)
+ collapse = get_int(prhs[5]);
+
+ int subsample = 1000;
+ if (nrhs>=7)
+ subsample = get_int(prhs[6]);
+
+ int intron_len_filter = 1e9;
+ if (nrhs>=8)
+ intron_len_filter = get_int(prhs[7]);
+
+ int exon_len_filter = -1;
+ if (nrhs>=9)
+ exon_len_filter = get_int(prhs[8]);
+
+ int filter_mismatch = 1e9;
+ if (nrhs>=10)
+ filter_mismatch = get_int(prhs[9]);
+
+ int mapped = 1;
+ if (nrhs>=11)
+ mapped = get_int(prhs[10]);
+
+ int spliced = 1;
+ if (nrhs>=12)
+ spliced = get_int(prhs[11]);
+
+ int maxminlen = 0;
+ if (nrhs>=13)
+ maxminlen = get_int(prhs[12]);
+
+ int pair_cov = 0;
+ if (nrhs>=14)
+ pair_cov = get_int(prhs[13]);
+
+ /* call function to get reads
+ * **************************/
+ char region[MAXLINE];
+ sprintf(region, "%s:%i-%i", chr, from_pos, to_pos);
+
+ vector all_reads;
+
+ get_reads_from_bam(fname, region, &all_reads, strand[0], subsample);
+
+ /* filter reads
+ * **************/
+ int left = 0;
+ int right = 0;
+
+ vector reads;
+ for (int i=0; ileft)
+ left++;
+ if (all_reads[i]->right)
+ right++;
+ if (all_reads[i]->max_intron_len()min_exon_len()>exon_len_filter && all_reads[i]->get_mismatches()<=filter_mismatch)
+ reads.push_back(all_reads[i]);
+ }
+
+
+ /* prepare output
+ * **************/
+ int num_rows = reads.size();
+ int num_pos = to_pos-from_pos+1;
+
+ if (pair_cov==1 && nlhs>=3) {
+ // sort reads by read_id
+ printf("\n\nleft:%i right:%i \n\n", left, right);
+ sort(reads.begin(), reads.end(), CRead::compare_by_read_id);
+ }
+
+ // read coverages collapsed
+ if (collapse) {
+ plhs[0] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
+ uint32_t *mask_ret = (uint32_t*) mxGetData(plhs[0]);
+ if (num_pos>0 && mask_ret==NULL)
+ mexErrMsgTxt("Error allocating memory\n");
+ if (mapped && spliced) {
+ for (int i=0; iget_coverage(from_pos, to_pos, mask_ret);
+ }
+ } else {
+ for (int i=0; iblock_starts.size();
+ if ((num_exons==1 && mapped) || (num_exons>1 && spliced))
+ reads[i]->get_coverage(from_pos, to_pos, mask_ret);
+ }
+ }
+ }
+ // reads not collapsed
+ else {
+ uint32_t nzmax = 0; // maximal number of nonzero elements
+ int len = to_pos-from_pos+1;
+ for (uint i=0; iblock_starts.size(); n++) {
+ uint32_t from, to;
+ if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos >= 0)
+ from = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos;
+ else
+ from = 0;
+ if (reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n] >= 0)
+ to = reads[i]->block_starts[n]+reads[i]->start_pos-from_pos+reads[i]->block_lengths[n];
+ else
+ to = 0;
+ for (int bp=from; bp0 && mask_ret==NULL)
+ mexErrMsgTxt("Error allocating memory\n");
+ uint32_t mask_ret_c = 0; // counter
+ for (uint i=0; iget_reads_sparse(from_pos, to_pos, mask_ret, mask_ret_c, i);
+ }
+ if (mask_ret_c!=2*nzmax)
+ mexErrMsgTxt("Error filling index arrays for sparse matrix\n");
+ }
+ // introns
+ if (maxminlen==0 && nlhs>=2) {
+ vector intron_list;
+ for (int i=0; iget_introns(&intron_list);
+ }
+
+ plhs[1] = mxCreateNumericMatrix(2, intron_list.size()/2, mxUINT32_CLASS, mxREAL);
+ uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
+ for (int p = 0; p=2) {
+ vector intron_starts;
+ vector intron_ends;
+ vector block_len1;
+ vector block_len2;
+ for (int i=0; iget_introns(&intron_starts, &intron_ends, &block_len1, &block_len2);
+ }
+
+ plhs[1] = mxCreateNumericMatrix(4, intron_starts.size(), mxINT32_CLASS, mxREAL);
+ uint32_t *p_intron_list = (uint32_t*) mxGetData(plhs[1]);
+ for (int p = 0; p=3) {
+ plhs[2] = mxCreateNumericMatrix(1, num_pos, mxUINT32_CLASS, mxREAL);
+ uint32_t *p_pair_map = (uint32_t*) mxGetData(plhs[2]);
+ if (num_pos>0 && p_pair_map==NULL)
+ mexErrMsgTxt("Error allocating memory\n");
+
+ vector pair_ids;
+
+ int take_cnt = 0;
+ int discard_cnt = 0;
+ // find consecutive reads with the same id
+ for (int i=0; i<((int) reads.size())-1; i++) {
+ int j = i+1;
+ while(jread_id, reads[j]->read_id) == 0) {
+ if ((reads[i]->left && reads[j]->right) || (reads[j]->left && reads[i]->right) && (reads[i]->reverse != reads[j]->reverse)) {
+ if (reads[i]->get_last_position()==-1 || reads[j]->get_last_position()==-1)
+ break;
+ if (reads[i]->get_last_position()start_pos && reads[j]->start_pos-reads[i]->get_last_position()<60000) {
+ int from = std::max(0, reads[i]->get_last_position()-from_pos);
+ int to = std::min(num_pos-1, reads[j]->start_pos-from_pos);
+ pair_ids.push_back(i);
+ pair_ids.push_back(j);
+ for (int k=from; kstart_pos>reads[j]->get_last_position() && reads[j]->get_last_position()-reads[i]->start_pos<60000) {
+ int from = std::max(0, reads[j]->get_last_position()-from_pos);
+ int to = std::min(num_pos-1, reads[i]->start_pos-from_pos);
+ pair_ids.push_back(i);
+ pair_ids.push_back(j);
+ for (int k=from; k=4) {
+ plhs[3] = mxCreateNumericMatrix(2, pair_ids.size()/2, mxUINT32_CLASS, mxREAL);
+ uint32_t *pair_ids_ret = (uint32_t*) mxGetData(plhs[3]);
+ if (pair_ids.size()>0 && pair_ids_ret==NULL)
+ mexErrMsgTxt("Error allocating memory\n");
+ for (int i=0; i
+#include
+#include "sam.h"
+#include "get_reads_direct.h"
+
+#include
+ using std::vector;
+#include
+ using std::string;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+ int beg, end;
+ samfile_t *in;
+} tmpstruct_t;
+
+typedef struct {
+ uint64_t u, v;
+} pair64_t;
+
+static inline int is_overlap(uint32_t beg, uint32_t end, const bam1_t *b)
+{
+ uint32_t rbeg = b->core.pos;
+ uint32_t rend = b->core.n_cigar? bam_calend(&b->core, bam1_cigar(b)) : b->core.pos + 1;
+ return (rend > beg && rbeg < end);
+}
+
+pair64_t * get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int* cnt_off);
+
+ int bam_fetch_reads(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_header_t* header, vector* reads, char strand);
+
+// callback for bam_plbuf_init()
+static int pileup_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)
+{
+ //tmpstruct_t *tmp = (tmpstruct_t*)data;
+ //if ((int)pos >= tmp->beg && (int)pos < tmp->end)
+ // printf("%s\t%d\t%d\n", tmp->in->header->target_name[tid], pos + 1, n);
+ return 0;
+}
+#ifdef __cplusplus
+}
+#endif
+int parse_sam_line(char* line, CRead* read);
+//int set_strand(char c);
+//void parse_cigar(bam1_t* b, CRead* read);
+void parse_cigar(bam1_t* b, CRead* read, bam_header_t* header);
+
+
+int get_reads_from_bam(char* filename, char* region, vector* reads, char strand, int lsubsample)
+{
+ subsample = lsubsample;
+ //set_strand(strand);
+
+ srand (time(NULL));
+ //srand (1234);
+ tmpstruct_t tmp;
+ tmp.in = samopen(filename, "rb", 0);
+ if (tmp.in == 0) {
+ fprintf(stderr, "Fail to open BAM file %s\n", filename);
+ return 1;
+ }
+ int ref;
+ bam_index_t *idx;
+ bam_plbuf_t *buf;
+ idx = bam_index_load(filename); // load BAM index
+ if (idx == 0) {
+ fprintf(stderr, "BAM indexing file is not available.\n");
+ return 1;
+ }
+ bam_parse_region(tmp.in->header, region, &ref,
+ &tmp.beg, &tmp.end); // parse the region
+ if (ref < 0) {
+ fprintf(stderr, "Invalid region %s\n", region);
+ return 1;
+ }
+
+ buf = bam_plbuf_init(pileup_func, &tmp); // initialize pileup
+
+ bam_fetch_reads(tmp.in->x.bam, idx, ref, tmp.beg, tmp.end, buf, tmp.in->header, reads, strand);
+ //fprintf(stdout, "intron_list: %d \n", intron_list->size());
+
+ bam_plbuf_push(0, buf); // finalize pileup
+ bam_index_destroy(idx);
+ bam_plbuf_destroy(buf);
+ samclose(tmp.in);
+ return 0;
+}
+
+
+int bam_fetch_reads(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_header_t* header, vector* reads, char strand)
+{
+ int n_off;
+ pair64_t *off = get_chunk_coordinates(idx, tid, beg, end, &n_off);
+ if (off == 0) return 0;
+ {
+ // retrive alignments
+ uint64_t curr_off;
+ int i, ret, n_seeks;
+ n_seeks = 0; i = -1; curr_off = 0;
+ bam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));
+ for (;;) {
+ if (curr_off == 0 || curr_off >= off[i].v) { // then jump to the next chunk
+ if (i == n_off - 1) break; // no more chunks
+ if (i >= 0) assert(curr_off == off[i].v); // otherwise bug
+ if (i < 0 || off[i].v != off[i+1].u) { // not adjacent chunks; then seek
+ bam_seek(fp, off[i+1].u, SEEK_SET);
+ curr_off = bam_tell(fp);
+ ++n_seeks;
+ }
+ ++i;
+ }
+ if ((ret = bam_read1(fp, b)) > 0) {
+ curr_off = bam_tell(fp);
+ if (b->core.tid != tid || b->core.pos >= end) break; // no need to proceed
+ else if (is_overlap(beg, end, b))
+ {
+ int rr = rand();
+ if ((rr%1000 < subsample))
+ {
+ CRead* read = new CRead();
+ parse_cigar(b, read, header);
+
+ if (strand == '0' || strand==read->strand[0] || read->strand[0]=='0')
+ {
+ read->left = (b->core.flag & left_flag_mask) >0;
+ read->right = (b->core.flag & right_flag_mask) >0;
+ read->reverse = (b->core.flag & reverse_flag_mask) >0;
+ reads->push_back(read);
+ }
+ else
+ {
+ delete read;
+ }
+ //else if (read->strand[0]=='0'&&((b->core.flag & g_flag_off) >0))
+ //{
+ // //fprintf(stdout, "(-)-strand; read->strand[0]==0, num_exons: %i \n", read->block_starts.size());
+ // // this flag means that the read has been reversed for alignment
+ // // flag bit set and (-)-strand requested
+ // reads->push_back(read);
+ //}
+ //else if (read->strand[0]=='0'&&(g_flag_on>0&&(b->core.flag & g_flag_on)==0))
+ //{
+ // //fprintf(stdout, "(+)-strand; read->strand[0]==0, num_exons: %i \n", read->block_starts.size());
+ // // (+)-strand requested and flag bit not set
+ // reads->push_back(read);
+ //}
+ }
+ }
+ } else break; // end of file
+ }
+// fprintf(stderr, "[bam_fetch] # seek calls: %d\n", n_seeks);
+ bam_destroy1(b);
+ }
+ free(off);
+ return 0;
+}
+
+void parse_cigar(bam1_t* b, CRead* read, bam_header_t* header)
+{
+ read->start_pos = b->core.pos+1;
+ read->set_strand('0');
+ read->read_id = new char[100];
+ sprintf(read->read_id, "%s\0", bam1_qname(b));
+
+ for (int k = 0; k < b->core.n_cigar; ++k)
+ {
+ int op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation
+ int l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length
+ //fprintf(stdout, "op:%d l:%d\n", op, l);
+ if (op == BAM_CMATCH)
+ {
+ if (k==0)
+ {
+ read->block_lengths.push_back(l);
+ read->block_starts.push_back(0);
+ }
+ else
+ {
+ int op_prev = bam1_cigar(b)[k-1] & BAM_CIGAR_MASK;
+ int l_prev = bam1_cigar(b)[k-1] >> BAM_CIGAR_SHIFT;
+ if (op_prev==BAM_CREF_SKIP)// intron before
+ {
+ if (read->block_lengths.size()>=1)
+ {
+ int last_block_start = (*(read->block_starts.end()-1));
+ int intron_start = last_block_start+(*(read->block_lengths.end()-1));
+ read->block_lengths.push_back(l);
+ read->block_starts.push_back(intron_start+l_prev);
+ }
+ else
+ {
+ // start of first block was not a match
+ read->block_lengths.push_back(l);
+ read->block_starts.push_back(0);
+ }
+ }
+ else
+ {
+ if (read->block_lengths.size()>=1 && op == BAM_CDEL)// if it is an insertion then the matching block is not inreased
+ (*(read->block_lengths.end()-1))+=l;
+ else
+ {
+ //char *samline = bam_format1(header, b);
+ //printf("header: %s \n", samline);
+ }
+ }
+ }
+ }
+ else if (op == BAM_CDEL)
+ {
+ if (k>0 && read->block_lengths.size()>=1)
+ (*(read->block_lengths.end()-1))+=l;
+ }
+ else if (op == BAM_CREF_SKIP)//intron
+ {}
+ else if (op == BAM_CINS || op == BAM_CSOFT_CLIP)
+ {}
+ }
+ // parse auxiliary data
+ uint8_t* s = bam1_aux(b);
+ uint8_t* end = b->data + b->data_len;
+ while (s < end)
+ {
+ uint8_t type, key[2];
+ key[0] = s[0]; key[1] = s[1];
+ s += 2; type = *s; ++s;
+ //fprintf(stdout, "\n%c%c:%c\n", key[0], key[1], type);
+ if (type == 'A')
+ {
+ if ( key[0] =='X' && key[1] == 'S')
+ {
+ read->set_strand((char) *s);
+ }
+ ++s;
+ }
+ else if (type == 'C')
+ {
+ if ( key[0] =='H' && key[1] == '0')
+ {
+ uint8_t matches = *s;
+ read->matches = (int) matches;
+ }
+ if ( key[0] =='N' && key[1] == 'M')
+ {
+ uint8_t mismatches = *s;
+ read->mismatches = (int) mismatches;
+ }
+ if ( key[0] =='H' && key[1] == 'I')
+ {
+ uint8_t mai = *s;
+ read->multiple_alignment_index = (int) mai;
+ }
+
+ ++s;
+ }
+ else if (type == 'c') { ++s; }
+ else if (type == 'S') { s += 2; }
+ else if (type == 's') { s += 2; }
+ else if (type == 'I') { s += 4; }
+ else if (type == 'i') { s += 4; }
+ else if (type == 'f') { s += 4; }
+ else if (type == 'd') { s += 8; }
+ else if (type == 'Z') { ++s; }
+ else if (type == 'H') { ++s; }
+ }
+}
+
+//int set_strand(char c)
+//{
+// if (c=='+')
+// {
+// char* fl = (char*) "0x0010";
+// g_flag_on = strtol(fl, 0, 0);
+// g_flag_off = 0;
+// }
+// else if (c=='-')
+// {
+// char* fl = (char*) "0x0010";
+// g_flag_off = strtol(fl, 0, 0);
+// g_flag_on = 0;
+// }
+// return 0;
+//}
+
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/mex/get_reads_direct.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/mex/get_reads_direct.h Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,29 @@
+/*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or
+* (at your option) any later version.
+*
+* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch
+* Copyright (C) 2010-2011 Max Planck Society
+*/
+
+
+#ifndef __GET_READS_DIRECT_H__
+#define __GET_READS_DIRECT_H__
+
+#include
+ using std::vector;
+#include "read.h"
+
+//static int g_flag_on = 0, g_flag_off = 0;
+static int left_flag_mask = strtol((char*) "0x40", 0, 0);
+static int right_flag_mask = strtol((char*) "0x80", 0, 0);
+static int reverse_flag_mask = strtol((char*) "0x10", 0, 0);
+
+static int subsample = 1000;
+//static int collapse = 0;
+
+int get_reads_from_bam(char* filename, char* region, vector* reads, char strand, int lsubsample);
+
+#endif
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/mex/interval_overlap.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/mex/interval_overlap.cpp Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,217 @@
+/*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or
+* (at your option) any later version.
+*
+* Written (W) 2010-2011 Jonas Behr
+* Copyright (C) 2010-2011 Max Planck Society
+*/
+
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+ using std::vector;
+#include
+ using std::sort;
+ using std::min;
+ using std::max;
+
+typedef struct {
+ int start;
+ int stop;
+ int idx;
+ int set_id;
+} interval_t;
+
+bool compare (interval_t i, interval_t j)
+{
+ return (i.start= 1);
+}
+bool leftOf(interval_t a, interval_t b)
+{
+ return (a.stop < b.start);
+}
+
+void scan(interval_t f, vector* Wf, interval_t g, vector* Wg, vector* overlap)
+{
+ vector::iterator i;
+ i=Wg->begin();
+ while (iend())
+ {
+ interval_t g2 = *i;
+ if (leftOf(g2,f))
+ {
+ Wg->erase(i);// inefficient if Wg is large
+ // this moves all elements, therefore i is not incremented
+ }
+ else if (overlaps(g2,f))
+ {
+ if (g2.set_id==1)
+ {
+ //printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", g2.idx, g2.start, g2.stop, f.idx, f.start, f.stop);
+ overlap->push_back(g2.idx);
+ overlap->push_back(f.idx);
+ }
+ else if (f.set_id==1)
+ {
+ //printf("overlap: [%i | %i, %i] [%i | %i, %i]\n", f.idx, f.start, f.stop, g2.idx, g2.start, g2.stop);
+ overlap->push_back(f.idx);
+ overlap->push_back(g2.idx);
+ }
+ i++;
+ }
+ else
+ {
+ printf("never happens??\n");
+ i++;
+ }
+ }
+ if (!leftOf(f, g))
+ {
+ Wf->push_back(f);
+ //printf("push: [%i, %i] size:%i\n", f.start, f.stop, Wf->size());
+ }
+}
+
+/*
+ * prhs[0] first interval set starts
+ * prhs[1] first interval set stops
+ * prhs[2] second interval set starts
+ * prhs[3] second interval set stops
+ *
+ * return:
+ * plhs[0] one based index in first interval set overlapping with a interval in the second set
+ * plhs[1] corresponding index in the second set
+ *
+*/
+void mexFunction(int nlhs, mxArray *plhs[], int nrhs, const mxArray *prhs[])
+{
+ if (nrhs!=4)
+ mexErrMsgTxt("Expected 4 arguments: starts1, stops1, starts2, stops2 \n");
+ if (nlhs!=2)
+ mexErrMsgTxt("Expected 2 output arguments \n");
+
+ int num_intervals1 = mxGetNumberOfElements(prhs[0]);
+ assert(num_intervals1 == mxGetNumberOfElements(prhs[1]));
+ int num_intervals2 = mxGetNumberOfElements(prhs[2]);
+ assert(num_intervals2 == mxGetNumberOfElements(prhs[3]));
+
+ //printf("num_intervals1: %i\n", num_intervals1);
+ //printf("num_intervals2: %i\n", num_intervals2);
+
+ double* starts1 = mxGetPr(prhs[0]);
+ double* stops1 = mxGetPr(prhs[1]);
+ double* starts2 = mxGetPr(prhs[2]);
+ double* stops2 = mxGetPr(prhs[3]);
+
+ vector intervals1;
+ for (int i=0; i::max();
+ i.stop = std::numeric_limits::max();
+ i.set_id = std::numeric_limits::max();
+ i.idx = std::numeric_limits::max();
+ intervals1.push_back(i);
+
+ //printf("num_intervals1: %i\n", intervals1.size());
+ vector intervals2;
+ for (int i=0; i overlap;
+ vector Wx;
+ vector Wy;
+ vector::iterator x = intervals1.begin();
+ vector::iterator y = intervals2.begin();
+ while (x::iterator x;
+ //vector::iterator y;
+ //if (it1>intervals1.end())
+ // x = inf_interval();
+ //else
+ // x = it1;
+ //if (it2>intervals2.end())
+ // y = inf_interval();
+ //else
+ // y=it2;
+
+ if (x->start <= y->start)
+ {
+ scan(*x, &Wx, *y, &Wy, &overlap);
+ x++;
+ }
+ else
+ {
+ if (y<=intervals2.end())
+ {
+ scan(*y, &Wy, *x, &Wx, &overlap);
+ y++;
+ }
+ }
+ }
+
+ plhs[0] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL);
+ double* idx1 = mxGetPr(plhs[0]);
+
+ plhs[1] = mxCreateDoubleMatrix(1, overlap.size()/2, mxREAL);
+ double* idx2 = mxGetPr(plhs[1]);
+
+ for (int i=0; i
+#include
+#include "mex_input.h"
+
+char *get_string(const mxArray *prhs) {
+ char *buf;
+ int buflen;
+ if (!prhs)
+ mexErrMsgTxt("get_string called with NULL pointer arg");
+ if (!mxIsChar(prhs))
+ mexErrMsgTxt("input is not a string");
+ if (mxGetM(prhs) != 1)
+ mexErrMsgTxt("input is not a row vector");
+ buflen = mxGetN(prhs) + 1;
+ buf = (char*) malloc(buflen);
+ /* copy the string from prhs into buf and add terminating NULL char */
+ if (mxGetString(prhs, buf, buflen))
+ mexErrMsgTxt("not enough space");
+ return buf;
+}
+
+bool get_bool(const mxArray *prhs)
+{
+ const int M = mxGetM(prhs);
+ const int N = mxGetN(prhs);
+ double *f = (double*) mxGetPr(prhs);
+
+ if (!prhs)
+ mexErrMsgTxt("Arg is NULL pointer");
+ if (M != 1 || N != 1)
+ mexErrMsgTxt("Arg is not a scalar");
+ if (f[0] != 0)
+ return true;
+ return false;
+}
+
+int get_int(const mxArray *prhs)
+{
+ const int M = mxGetM(prhs);
+ const int N = mxGetN(prhs);
+ double *f = (double*) mxGetPr(prhs);
+
+ if (!prhs)
+ mexErrMsgTxt("Arg is NULL pointer");
+ if (M != 1 || N != 1)
+ mexErrMsgTxt("Arg is not a scalar");
+
+ return (int) f[0];
+}
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/mex/mex_input.h
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/mex/mex_input.h Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,20 @@
+/*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or
+* (at your option) any later version.
+*
+* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch
+* Copyright (C) 2010-2011 Max Planck Society
+*/
+
+
+#include
+#include
+
+#ifndef __MEX_INPUT_h__
+#define __MEX_INPUT_h__
+ char *get_string(const mxArray *prhs);
+ bool get_bool(const mxArray *prhs);
+ int get_int(const mxArray *prhs);
+#endif
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/mex/read.cpp
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/mex/read.cpp Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,214 @@
+/*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 3 of the License, or
+* (at your option) any later version.
+*
+* Written (W) 2010-2011 Jonas Behr, Regina Bohnert, Gunnar Raetsch
+* Copyright (C) 2010-2011 Max Planck Society
+*/
+
+
+#include "read.h"
+
+CRead::CRead() {
+ read_id = NULL;
+ sam_line = NULL;
+ start_pos = 0;
+ matches = 0;
+ mismatches = 0;
+ multiple_alignment_index = 0;
+ strand = NULL;
+ left = false;
+ right = false;
+ reverse = false;
+}
+
+CRead::~CRead() {
+ delete[] read_id;
+ delete[] sam_line;
+ delete[] strand;
+}
+
+/*
+ * Augments 'coverage' array at the positions covered by the read in the queried interval.
+ */
+void CRead::get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage)
+{
+ // block1 block2
+ // |=====|======|============|===========|======|====|
+ // ^ ^ ^
+ // p_start_pos | p_end_pos
+ // start_pos
+ // |0000001111111111111000000000000111111100000|
+ // *coverage
+ int len = p_end_pos-p_start_pos+1;
+ for (uint32_t n = 0; n < block_starts.size(); n++) {
+ int32_t from, to;
+ from = block_starts[n]+start_pos-p_start_pos;
+ to = block_starts[n]+start_pos-p_start_pos+block_lengths[n];
+ if (from < 0)
+ from = 0;
+ if (to < 0)
+ continue;
+ else if (to > len)
+ to = len;
+ for (int bp=from; bp0) // this if for some reason zero in case of softclips
+ return start_pos+block_starts.back()+block_lengths.back();
+ return -1;
+}
+
+/*
+ * Adds the column indices (= positions) covered by the read to 'reads' array in current row (= read).
+ * These indices can be used to build up a sparse matrix of reads x positions.
+ */
+void CRead::get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx) {
+ int len = p_end_pos-p_start_pos+1;
+ for (uint32_t n = 0; n < block_starts.size(); n++) {
+ uint32_t from, to;
+ if (block_starts[n]+start_pos-p_start_pos >= 0)
+ from = block_starts[n]+start_pos-p_start_pos;
+ else
+ from = 0;
+ if (block_starts[n]+start_pos-p_start_pos+block_lengths[n] >= 0)
+ to = block_starts[n]+start_pos-p_start_pos+block_lengths[n];
+ else
+ to = 0;
+ for (int bp=from; bp* acc_pos)
+{
+ if (strand[0]=='+')
+ {
+ for (int k=1;kpush_back(start_pos+block_starts[k]-1);
+ }
+ else if (strand[0]=='-')
+ {
+ for (int k=1;kpush_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2);
+ }
+}
+
+void CRead::get_don_splice_sites(vector* don_pos)
+{
+
+ if (strand[0]=='+')
+ {
+ for (int k=1;kpush_back(start_pos+block_starts[k-1]+block_lengths[k-1]-2);
+ }
+ else if (strand[0]=='-')
+ {
+ for (int k=1;kpush_back(start_pos+block_starts[k]-1);
+ }
+}
+
+int CRead::min_exon_len()
+{
+ int min = 1e8;
+ for (int k=0;kmax)
+ max = block_starts[k]-(block_starts[k-1]+block_lengths[k-1]);
+ return max;
+}
+
+/*
+ * Adds start and end of introns in the read consecutively to the 'introns' vector.
+ */
+void CRead::get_introns(vector* introns)
+{
+ for (int i=1; ipush_back(istart);
+ introns->push_back(iend);
+ //fprintf(stdout, "%i intron: %d->%d\n", i, istart, iend);
+ }
+}
+void CRead::get_introns(vector* intron_starts, vector* intron_ends, vector* block_len1, vector* block_len2)
+{
+ for (int i=1; ipush_back(istart);
+ intron_ends->push_back(iend);
+ block_len1->push_back(block_lengths[i-1]) ;
+ block_len2->push_back(block_lengths[i]) ;
+ }
+}
+
+bool CRead::operator==(const CRead& read) const
+{
+ if (block_starts.size()!=read.block_starts.size())
+ return false;
+ if (block_lengths.size()!=read.block_lengths.size())
+ return false;
+ if (start_pos!=read.start_pos)
+ return false;
+ if (strand[0] != read.strand[0])
+ return false;
+ for (int i=0; i
+#include
+#include
+#include
+ using std::vector;
+
+
+class CRead {
+ public:
+ /** constructor
+ */
+ CRead();
+ ~CRead();
+
+ vector block_starts;
+ vector block_lengths;
+ char* read_id;
+ char* sam_line;
+ int start_pos;
+ char * strand;
+ int matches;
+ int mismatches;
+ int multiple_alignment_index;
+ bool left;
+ bool right;
+ bool reverse;
+
+ void get_coverage(int p_start_pos, int p_end_pos, uint32_t* coverage);
+ int get_last_position();
+ void get_reads_sparse(int p_start_pos, int p_end_pos, double* reads, uint32_t & reads_c, uint32_t row_idx);
+ void get_introns(vector* introns);
+ void get_introns(vector* intron_starts, vector* intron_ends, vector* block_len1, vector* block_len2);
+ void get_acc_splice_sites(vector* acc_pos);
+ void get_don_splice_sites(vector* acc_pos);
+ int max_intron_len();
+ int min_exon_len();
+ bool operator==(const CRead& read) const;
+ void print();
+ void set_strand(char s);
+ int get_mismatches();
+ static bool compare_by_read_id(const CRead* read1, const CRead* read2)
+ {
+ if (!read1->read_id)
+ return true;
+ if (!read2->read_id)
+ return false;
+
+ int cnt1=0;
+ while (read1->read_id[cnt1]!='\0')
+ cnt1++;
+ int cnt2 = 0;
+ while (read2->read_id[cnt2]!='\0')
+ cnt2++;
+
+ return std::lexicographical_compare(read1->read_id,read1->read_id+cnt1,read2->read_id,read2->read_id+cnt2);
+ };
+};
+#endif
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/setup_deseq-hts.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/setup_deseq-hts.sh Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,192 @@
+#!/bin/bash
+set -e
+
+. ./bin/deseq_config.sh
+
+echo ==========================================
+echo DESeq-hts setup script \(DESeq version $DESEQ_VERSION\)
+echo ==========================================
+echo
+echo DESeq-hts base directory \(currently set to \"$DESEQ_PATH\", suggest to set to \"`pwd`\", used if left empty\)
+read DESEQ_PATH
+if [ "$DESEQ_PATH" == "" ];
+then
+ DESEQ_PATH=`pwd`
+fi
+echo '=>' Setting DESeq-hts base directory to \"$DESEQ_PATH\"
+echo
+
+echo SAMTools directory \(currently set to \"$SAMTOOLS_DIR\", system version used if left empty\)
+read SAMTOOLS_DIR
+if [ "$SAMTOOLS_DIR" == "" ];
+then
+ if [ "$(which samtools)" != "" ] ;
+ then
+ SAMTOOLS_DIR=$(dirname $(which samtools))
+ else
+ echo samtools not found
+ exit -1 ;
+ fi
+fi
+echo '=>' Setting SAMTools directory to \"$SAMTOOLS_DIR\"
+echo
+
+echo Path to the python binary \(currently set to \"$PYTHON_PATH\", system version used, if left empty\)
+read PYTHON_PATH
+if [ "$PYTHON_PATH" == "" ];
+then
+ PYTHON_PATH=`which python`
+ if [ "$PYTHON_PATH" == "" ];
+ then
+ echo python not found
+ exit -1
+ fi
+fi
+echo '=>' Setting Python path to \"$PYTHON_PATH\"
+echo
+
+echo Path to the R binary \(currently set to \"$R_PATH\", system version used, if left empty\)
+read R_PATH
+if [ "$R_PATH" == "" ];
+then
+ R_PATH=`which R`
+ if [ "$R_PATH" == "" ];
+ then
+ echo R not found
+ exit -1
+ fi
+fi
+echo '=>' Setting R path to \"$R_PATH\"
+echo
+
+echo Path to Scipy library files \(currently set to \"$SCIPY_PATH\", system version is used if left empty\)
+read SCIPY_PATH
+echo '=>' Setting Scipy path to \"$SCIPY_PATH\"
+echo
+
+echo Which interpreter should be used \(\"octave\" or \"matlab\"\)
+read INTERPRETER
+if [ "$INTERPRETER" != 'octave' -a "$INTERPRETER" != 'matlab' ];
+then
+ echo Unrecognized choice: \"$INTERPRETER\"
+ echo Aborting
+ false
+fi
+echo '=>' Setting interpreter to \"$INTERPRETER\"
+echo
+
+if [ "$INTERPRETER" == 'octave' ];
+then
+ echo Please enter the full path to octave \(currently set to \"$OCTAVE_BIN_PATH\", system version used, if left empty\)
+ read OCTAVE_BIN_PATH
+ if [ "$OCTAVE_BIN_PATH" == "" ];
+ then
+ OCTAVE_BIN_PATH=`which octave`
+ if [ "$OCTAVE_BIN_PATH" == "" ];
+ then
+ echo octave not found
+ exit -1
+ fi
+ fi
+ echo '=>' Setting octave\'s path to \"$OCTAVE_BIN_PATH\"
+ echo
+ echo Please enter the full path to mkoctfile \(currently set to \"$OCTAVE_MKOCT\", system version used, if left empty\)
+ read OCTAVE_MKOCT
+ if [ "$OCTAVE_MKOCT" == "" ];
+ then
+ OCTAVE_MKOCT=`which mkoctfile`
+ if [ "$OCTAVE_MKOCT" == "" ];
+ then
+ OCTAVE_MKOCT=$(dirname $OCTAVE_BIN_PATH)/mkoctfile
+ if [ ! -f OCTAVE_MKOCT ];
+ then
+ echo mkoctfile not found
+ exit -1
+ fi
+ fi
+ fi
+ echo '=>' Setting mkoctfile\'s path to \"$OCTAVE_MKOCT\"
+ echo
+ MATLAB_BIN_PATH=
+fi
+if [ "$INTERPRETER" == 'matlab' ];
+then
+ echo Please enter the full path to matlab \(currently set to \"$MATLAB_BIN_PATH\", system version used, if left empty\)
+ read MATLAB_BIN_PATH
+ if [ "${MATLAB_BIN_PATH}" == "" ];
+ then
+ MATLAB_BIN_PATH=`which matlab`
+ if [ "$MATLAB_BIN_PATH" == "" ];
+ then
+ echo matlab not found
+ exit -1
+ fi
+ fi
+ if [ ! -f $MATLAB_BIN_PATH ];
+ then
+ echo matlab not found
+ exit -1
+ fi
+ echo '=>' Setting matlab\'s path to \"$MATLAB_BIN_PATH\"
+ echo
+ echo Please enter the full path to mex binary \(currently set to \"$MATLAB_MEX_PATH\", system version used if left empty\)
+ read MATLAB_MEX_PATH
+ if [ "$MATLAB_MEX_PATH" == "" ];
+ then
+ MATLAB_MEX_PATH=`which mex`
+ if [ "$MATLAB_MEX_PATH" == "" ];
+ then
+ echo mex not found
+ exit -1
+ fi
+ fi
+ if [ ! -f "$MATLAB_MEX_PATH" ];
+ then
+ echo mex not found
+ exit -1
+ fi
+ echo '=>' Setting mex\' path to \"$MATLAB_MEX_PATH\"
+ echo
+ echo Please enter the full path to the matlab include directory \(currently set to \"$MATLAB_INCLUDE_DIR\", system version used, if left empty\)
+ read MATLAB_INCLUDE_DIR
+ if [ "$MATLAB_INCLUDE_DIR" == "" ];
+ then
+ MATLAB_INCLUDE_DIR=$(dirname $MATLAB_BIN_PATH)/../extern/include
+ fi
+ if [ ! -d "$MATLAB_INCLUDE_DIR" ];
+ then
+ echo matlab include dir not found
+ exit -1
+ fi
+ echo '=>' Setting matlab\'s include directory to \"$MATLAB_INCLUDE_DIR\"
+ echo
+ OCTAVE_BIN_PATH=
+fi
+
+cp -p bin/deseq_config.sh bin/deseq_config.sh.bk
+grep -v -e OCTAVE_BIN_PATH -e OCTAVE_MKOCT -e MATLAB_BIN_PATH -e MATLAB_MEX_PATH -e MATLAB_INCLUDE_DIR \
+ -e DESEQ_PATH -e DESEQ_SRC_PATH -e DESEQ_BIN_PATH \
+ -e INTERPRETER -e SAMTOOLS_DIR -e PYTHON_PATH -e SCIPY_PATH -e R_PATH -e $DESEQ_VERSION bin/deseq_config.sh.bk \
+ > bin/deseq_config.sh
+echo
+echo
+echo generating config file
+
+echo export DESEQ_VERSION=$DESEQ_VERSION >> bin/deseq_config.sh
+echo export DESEQ_PATH=$DESEQ_PATH >> bin/deseq_config.sh
+echo export DESEQ_SRC_PATH=${DESEQ_PATH}/src >> bin/deseq_config.sh
+echo export DESEQ_BIN_PATH=${DESEQ_PATH}/bin >> bin/deseq_config.sh
+echo export INTERPRETER=$INTERPRETER >> bin/deseq_config.sh
+echo export MATLAB_BIN_PATH=$MATLAB_BIN_PATH >> bin/deseq_config.sh
+echo export MATLAB_MEX_PATH=$MATLAB_MEX_PATH >> bin/deseq_config.sh
+echo export MATLAB_INCLUDE_DIR=$MATLAB_INCLUDE_DIR >> bin/deseq_config.sh
+echo export OCTAVE_BIN_PATH=$OCTAVE_BIN_PATH >> bin/deseq_config.sh
+echo export OCTAVE_MKOCT=$OCTAVE_MKOCT >> bin/deseq_config.sh
+echo export SAMTOOLS_DIR=$SAMTOOLS_DIR >> bin/deseq_config.sh
+echo export PYTHON_PATH=$PYTHON_PATH >> bin/deseq_config.sh
+echo export SCIPY_PATH=$SCIPY_PATH >> bin/deseq_config.sh
+echo export R_PATH=$R_PATH >> bin/deseq_config.sh
+
+echo
+echo Done.
+echo
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/src/deseq-hts.sh
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/src/deseq-hts.sh Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,89 @@
+#/bin/bash
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or
+# (at your option) any later version.
+#
+# Copyright (C) 2009-2012 Max Planck Society
+#
+
+set -e
+
+PROG=`basename $0`
+DIR=`dirname $0`
+
+. ${DIR}/../bin/deseq_config.sh
+
+echo
+echo ${PROG}: FML http://galaxy.fml.mpg.de Galaxy wrapper for the DESeq version $DESEQ_VERSION.
+echo
+echo DESeq performs differential expression testing from RNA-Seq measurements.
+echo
+
+ANNO_INPUT=${1}
+shift
+DESEQ_RES_FILE=${1}
+shift
+GENES_FN=${1}
+shift
+
+mkdir -p `dirname $GENES_FN`
+
+echo %%%%%%%%%%%%%%%%%%%%%%%
+echo % 1. Data preparation %
+echo %%%%%%%%%%%%%%%%%%%%%%%
+echo
+echo load the genome annotation in GFF3 format and create an annotation object
+echo
+export PYTHONPATH=$PYTHONPATH:${SCIPY_PATH}
+${PYTHON_PATH} ${DIR}/../tools/ParseGFF.py ${ANNO_INPUT} ${GENES_FN}
+${DIR}/../bin/genes_cell2struct ${GENES_FN} 2>&1
+echo
+echo genome annotation stored in $GENES_FN
+
+echo
+echo %%%%%%%%%%%%%%%%%%%%
+echo % 2. Read counting %
+echo %%%%%%%%%%%%%%%%%%%%
+echo
+
+echo counting reads overlapping exons using given alignments
+for REPLICATE_GROUP in $@
+do
+ IFS=':'
+ for BAM_FILE in ${REPLICATE_GROUP}
+ do
+ echo
+ if [ ! -f ${BAM_FILE}.bai ]
+ then
+ echo "Indexing $BAM_FILE"
+ ${SAMTOOLS_DIR}/samtools index $BAM_FILE
+ else
+ echo "$BAM_FILE already indexed"
+ fi
+ echo
+ done
+done
+tmpfile=`mktemp --tmpdir=/tmp`
+
+echo "${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile $@"
+${DIR}/../bin/get_read_counts ${GENES_FN} $tmpfile $@ 2>&1
+
+echo
+echo %%%%%%%%%%%%%%%%%%%%%%%%%%%
+echo % 3. Differential testing %
+echo %%%%%%%%%%%%%%%%%%%%%%%%%%%
+echo
+
+echo testing genes for differential expression using given alignments
+
+echo "cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $#"
+cat ${DIR}/../src/difftest_deseq.R | $R_PATH --slave --args $tmpfile ${DESEQ_RES_FILE} $# 2> /dev/null
+
+rm $tmpfile ${tmpfile}_COUNTS.tab ${tmpfile}_CONDITIONS.tab
+echo
+echo %%%%%%%%
+echo % Done %
+echo %%%%%%%%
+echo
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/src/deseq_config.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/src/deseq_config.m Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,54 @@
+function deseq_config
+% DESEQ_CONFIG Sets a few global variables with system dependent paths.
+%
+%
+% This program is free software; you can redistribute it and/or modify
+% it under the terms of the GNU General Public License as published by
+% the Free Software Foundation; either version 3 of the License, or
+% (at your option) any later version.
+%
+% Written (W) 2009-2011 Regina Bohnert, Gunnar Raetsch
+% Copyright (C) 2009-2011 Max Planck Society
+%
+
+% paths
+global DESEQ_PATH DESEQ_SRC_PATH
+
+% interpreter paths
+global INTERPRETER MATLAB_BIN_PATH OCTAVE_BIN_PATH
+
+% SAMTools path
+global SAMTOOLS_DIR
+
+% configuration (adapt to the user's configuration)
+DESEQ_PATH = getenv('DESEQ_PATH');
+DESEQ_SRC_PATH = getenv('DESEQ_SRC_PATH');
+INTERPRETER = getenv('INTERPRETER');
+MATLAB_BIN_PATH = getenv('MATLAB_BIN_PATH');
+OCTAVE_BIN_PATH = getenv('OCTAVE_BIN_PATH');
+SAMTOOLS_DIR = getenv('SAMTOOLS_DIR');
+
+% switch off a few expected warnings
+addpath(sprintf('%s/tools', DESEQ_PATH));
+lserve=license;
+if ~isequal(lserve, 'GNU General Public License'),
+ engine='matlab';
+else
+ engine='octave';
+end;
+if isequal(engine, 'octave'),
+ warning('off', 'Octave:precedence-change');
+ warning('off', 'Octave:function-name-clash');
+ warning('off', '');
+ warning('off', 'Octave:num-to-str');
+ warning('off', 'Octave:function-name-clash');
+ warning('off', 'Octave:divide-by-zero');
+ warning('off', 'Octave:future-time-stamp');
+ warning('off', 'Octave:assign-as-truth-value');
+else
+ warning('off', 'MATLAB:typeaheadBufferOverflow');
+end
+
+% make sure no process stops with a debug prompt
+global g_ignore_keyboard
+g_ignore_keyboard = 1;
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/src/difftest_deseq.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/src/difftest_deseq.R Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,56 @@
+library( DESeq )
+
+### get arguments 1: INFILE, 2: OUTFILE 3:SIZE
+args <- commandArgs()
+INFILE<-args[4]
+OUTFILE<-args[5]
+
+INFILE_COUNTS=c(paste(INFILE, "_COUNTS.tab", sep=""))
+INFILE_CONDS=c(paste(INFILE, "_CONDITIONS.tab", sep=""))
+
+### read count data from file
+countsTable <- read.delim( INFILE_COUNTS, header=TRUE, stringsAsFactors=TRUE )
+condsTable <- read.delim( INFILE_CONDS, header=TRUE, stringsAsFactors=TRUE )
+
+### use gene IDs as row names
+rownames( countsTable ) <- countsTable$gene
+countsTable <- countsTable[ , -1 ]
+head( countsTable )
+
+conds <- factor( condsTable[ , 2] )
+#head( countsTable )
+
+cds <- newCountDataSet( round(countsTable), conds )
+#head( counts(cds) )
+
+cds <- estimateSizeFactors( cds )
+#sizeFactors( cds )
+
+### estimate variance function, use blind only, if no replicates are provided
+if (length(levels(conds)) < length(conds))
+{
+ cds <- estimateDispersions( cds )
+} else {
+ writeLines("\nYou did not enter any replicates! - The results may be less valuable without replicates!\n")
+ cds <- estimateDispersions( cds, method='blind', sharingMode='fit-only')
+}
+experiments <- levels(conds)
+
+res<-c()
+table_col_names<-c()
+for (i in 1:(length(experiments)-1))
+{
+ for( j in (i+1):(length(experiments)))
+ {
+ print(c(i,j))
+ tempres <- nbinomTest(cds,experiments[i],experiments[j])
+ res = cbind(res,tempres[,7])
+ #res = cbind(res,tempres[,8])
+ table_col_names = cbind(table_col_names,paste('cond_', experiments[i], '_vs._cond_', experiments[j], sep=''))
+ }
+}
+
+DiffTable<-res
+rownames(DiffTable)<-rownames(countsTable)
+colnames(DiffTable)<-table_col_names
+write.table(DiffTable, file = OUTFILE, quote = FALSE, sep ="\t", eol ="\n", na = "1.000", dec = ".", row.names = TRUE,col.names =TRUE)
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/src/genes_cell2struct.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/src/genes_cell2struct.m Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,28 @@
+function genes_cell2struct(anno_fname)
+% GENES_CELL2STRUCT Converts genes stored as a cell to struct.
+%
+% genes_cell2struct(anno_fname)
+%
+% -- input --
+% anno_fname: name of file where genes as cell are stored
+%
+% -- output --
+% genes as a struct
+
+load(anno_fname, 'genes');
+if iscell(genes)
+ genes_cell = genes;
+ clear genes;
+ for g = 1:length(genes_cell),
+ gene = genes_cell{g};
+ for e = 1:length(gene.exons)
+ gene.exons{e} = double(gene.exons{e});
+ end
+ gene.exons = reshape(gene.exons, 1, length(gene.exons));
+ gene.id = double(gene.id);
+ gene.start = double(gene.start);
+ gene.stop = double(gene.stop);
+ genes(g) = gene;
+ end
+save(anno_fname, 'genes');
+end
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/src/get_read_counts.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/src/get_read_counts.m Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,172 @@
+function get_read_counts(anno_dir, outfile, varargin)
+%
+% -- input --
+% anno_dir: directory of genes
+% outfile: output file
+% varargin: list of BAM files (at least two)
+
+% DESeq paths
+global DESEQ_PATH DESEQ_SRC_PATH
+
+% interpreter paths
+global INTERPRETER MATLAB_BIN_PATH OCTAVE_BIN_PATH
+
+% SAMTools path
+global SAMTOOLS_DIR
+
+%%%% paths
+addpath(sprintf('%s/tools', DESEQ_PATH));
+addpath(sprintf('%s/mex', DESEQ_PATH));
+addpath(sprintf('%s', DESEQ_SRC_PATH));
+
+deseq_config;
+
+%%% read list of replicate groups from variable length argument list
+rg_list = cell(1,size(varargin, 2));
+file_list = cell();
+file_cond_ids = [];
+file_rep_ids = [];
+for idx = 1:size(varargin, 2)
+ rg_list(idx) = varargin(idx);
+end
+idx = strmatch('', rg_list, 'exact');
+rg_list(idx) = [];
+for idx = 1:length(rg_list),
+ items = separate(rg_list{idx}, ':');
+ for idx2 = 1:length(items)
+ if isempty(deblank(items{idx2})),
+ continue;
+ end;
+ file_list{end + 1} = items{idx2};
+ file_cond_ids(end + 1) = idx;
+ file_rep_ids(end + 1) = idx2;
+ end;
+end;
+clear idx idx2;
+
+%%%%% adapt to number of input arguments
+file_num = length(file_list);
+RESULTS = cell(1, file_num);
+
+%%%% get annotation file
+load(sprintf('%s', anno_dir));
+
+%%%%% mask overlapping gene regions -> later not counted
+[genes] = mask_dubl(genes,0);
+
+%%%% remove genes with no annotated exons or where no
+idx = find(arrayfun(@(x)(~isempty(x.exons)*~isempty(x.start)*~isempty(x.stop)), genes));
+fprintf('removed %i of %i genes, which had either no exons annotated or lacked a start or stop position\n', size(genes, 2) - size(idx, 2), size(genes, 2))
+genes = genes(idx);
+clear idx;
+
+%%%% check if genes have field chr_num
+if ~isfield(genes, 'chr_num')
+ chrms = unique({genes(:).chr});
+ for i = 1:length(genes)
+ genes(i).chr_num = strmatch(genes(i).chr, chrms, 'exact');
+ end;
+end;
+
+%%%% iterate over all given bam files
+for f_idx = 1:file_num
+ expr1_bam = fullfile('', file_list{f_idx});
+ STAT = cell(size(genes, 2),1);
+ for i=1:size(genes,2)
+ RESULT = cell(1,7);
+ gene = genes(i);
+ RESULT{4} = f_idx;
+ RESULT{1} = gene.name;
+ if isempty(gene.exons)
+ RESULT{2} = inf;
+ RESULT{3} = inf;
+ RESULT{5} = [inf,inf];
+ STAT{i} = RESULT;
+ continue;
+ elseif or(isempty(gene.start),isempty(gene.stop))
+ RESULT{2} = inf;
+ RESULT{3} = inf;
+ RESULT{5} = [inf,inf];
+ STAT{i} = RESULT;
+ continue;
+ end
+ if ~isempty(gene.chr_num),
+ [mask1, read_intron_list] = get_reads(expr1_bam, gene.chr, gene.start, gene.stop, '0');
+ clear read_intron_list;
+ else
+ mask1 = [];
+ end;
+
+ if isempty(mask1)
+ reads1 = zeros(0,gene.stop-gene.start+1);
+ else
+ reads1 = sparse(mask1(1,:)',mask1(2,:)',ones(size(mask1,2),1),max(mask1(1,:)),gene.stop-gene.start+1);
+ end
+ if ~isempty(reads1);
+ [reads1,FLAG] = remove_reads_from_other_genes(reads1,gene);
+ end
+ L = size(reads1);
+ RESULT{2}=[size(reads1,1)]; % number of all reads falling in that gene
+ EXON_IDX=zeros(1,gene.stop-gene.start+1);
+ for t=1:size(gene.transcripts,2)
+ for e=1:size(gene.exons{t},1)
+ EXON_IDX((gene.exons{t}(e,1)-gene.start+1):(gene.exons{t}(e,2)-gene.start+1))=1;
+ end
+ end
+ reads1 = reads1(sum(reads1(:,find(EXON_IDX)),2)>0,:);
+ L1 = sum(EXON_IDX);
+ RESULT{3}=[size(reads1,1)]; % number of reads overlapping to exons
+ RESULT{5}=[L, L1]; % size of reads1, number of exonic positions
+ % old and weighted poisson new ,weighted regions reads and
+ % unexplained reads
+ clear reads1;
+ STAT{i} = RESULT;
+ end;
+ RESULTS{f_idx} = STAT;
+end;
+
+S=size(genes,2);
+READCOUNTS_ALL=zeros(S, file_num);
+READCOUNTS_EXON=zeros(S, file_num);
+LENGTH_ALL=zeros(S,file_num);
+LEN_EXON=zeros(S, file_num);
+
+for j=1:file_num,
+ for i=1:S
+ T=RESULTS{j}{i};
+ if isempty(T)
+ continue
+ else
+ READCOUNTS_ALL(i,j)=T{2};
+ READCOUNTS_EXON(i,j)=T{3};
+ LENGTH_ALL(i,j)=T{5}(1);
+ LEN_EXON(i,j)=T{5}(2);
+ end
+ end
+end
+
+%%%%% write results for all bam files
+fid_conditions = fopen(sprintf('%s_CONDITIONS.tab', outfile), 'w');
+fid_counts = fopen(sprintf('%s_COUNTS.tab', outfile) ,'w');
+fprintf(fid_counts,'gene');
+fprintf(fid_conditions, 'file\tcondition\treplicate\n');
+for j = 1:length(file_list)
+ fname = file_list{j} ;
+ fname = separate(fname, '/');
+ fname = fname{end};
+ fname = strrep(fname, '.bam', '') ;
+ fprintf(fid_counts,'\t%s', fname);
+ fprintf(fid_conditions, '%s\t%i\t%i\n', fname, file_cond_ids(j), file_rep_ids(j));
+end;
+fprintf(fid_counts,'\n') ;
+
+for i = 1:size(genes,2)
+ fprintf(fid_counts,'%s',genes(i).name);
+ for j = 1:length(file_list),
+ fprintf(fid_counts,'\t%i', READCOUNTS_EXON(i,j));
+ end
+ fprintf(fid_counts,'\n');
+end
+fclose(fid_counts);
+fclose(fid_conditions);
+exit;
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/src/mask_dubl.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/src/mask_dubl.m Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,81 @@
+function [new_genes]=mask_dubl(genes,THRESH);
+
+CHROMOSOMES={};
+COUNTER=1;
+for i=1:size(genes,2)
+ CHROMOSOMES{COUNTER}=genes(i).chr;
+ COUNTER=COUNTER+1;
+end
+CHROMOSOMES=unique(CHROMOSOMES);
+
+
+INFO=zeros(size(genes,2),4);
+for i=1:size(genes,2)
+ CHR_VAL=0;
+ for chr= 1:length(CHROMOSOMES)
+ if strcmp(genes(i).chr,CHROMOSOMES(chr))
+ CHR_VAL=chr;
+ end
+ end
+ INFO(i,:)=[i,genes(i).start,genes(i).stop, CHR_VAL];
+end
+
+COUNTER=1;
+new_genes=genes;
+for chr= 1:length(CHROMOSOMES)
+ GENES_ON_CHR=INFO(INFO(:,4)==chr,:);
+ [TEMP,POS]=sort(GENES_ON_CHR(:,2));
+ GENES_ON_CHR=GENES_ON_CHR(POS,:);
+ STARTS=GENES_ON_CHR(:,2);
+ STOPS=GENES_ON_CHR(:,3);
+ for i=1:(size(GENES_ON_CHR,1))
+ MIN_START=find(STOPS>=STARTS(i),1,'first');
+ MAX_STOP=find(STARTS<=STOPS(i),1,'last');
+ if MIN_START==i
+ MIN_START=[];
+ end
+ if MAX_STOP==i
+ MAX_STOP=[];
+ end
+ EXONS=[];
+ if not (isempty(MIN_START))
+ for CURR=MIN_START:(i-1)
+ if(not(isempty(genes(GENES_ON_CHR(CURR,1)).transcripts)))
+ for tra=1:size(genes(GENES_ON_CHR(CURR,1)).transcripts,2)
+ if(not(isempty(genes(GENES_ON_CHR(CURR,1)).exons)))
+ EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).exons{tra}];
+ else
+ EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop];
+ end
+ end
+ else
+ EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop];
+ end
+ end
+ end
+ if not (isempty(MAX_STOP))
+ for CURR=(i+1):MAX_STOP
+ if(not(isempty(genes(GENES_ON_CHR(CURR,1)).transcripts)))
+ for tra=1:size(genes(GENES_ON_CHR(CURR,1)).transcripts,2)
+ if(not(isempty(genes(GENES_ON_CHR(CURR,1)).exons)))
+ EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).exons{tra}];
+ else
+ EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop];
+ end
+ end
+ else
+ EXONS=[EXONS;genes(GENES_ON_CHR(CURR,1)).start,genes(GENES_ON_CHR(CURR,1)).stop];
+ end
+
+ end
+ end
+ if not (isempty([MAX_STOP,MIN_START]))
+ EXONS=EXONS(EXONS(:,2)>=STARTS(i),:);
+ EXONS=EXONS(EXONS(:,1)<=STOPS(i),:);
+ new_genes(GENES_ON_CHR(i,1)).non_unique_regions=EXONS;
+ else
+ new_genes(GENES_ON_CHR(i,1)).non_unique_regions=[];
+ end
+ end
+ COUNTER=COUNTER+1;
+end
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/src/remove_reads_from_other_genes.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/src/remove_reads_from_other_genes.m Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,21 @@
+function [READS_OUT,FLAG]=remove_reads_from_other_genes(READS,GENE)
+%This funtion removes the reads in READS which could ome from other
+%annotated genes. FLAG is 1 if this was sucsesfull and 0 otherwise
+READS_IN=READS;
+if isfield(GENE,'non_unique_regions')
+ EXONS=GENE.non_unique_regions;
+ IDX=zeros(1,GENE.stop-GENE.start+1);
+
+ for i=1:size(EXONS,1)
+ START=max(EXONS(i,1),GENE.start)-GENE.start+1;
+ STOP=min(EXONS(i,2),GENE.stop)-GENE.start+1;
+ IDX(START:STOP)=1;
+ end
+ READS=READS(not(sum(READS(:,IDX>0),2)==sum(READS,2)),:);
+ FLAG=1;
+ READS_OUT=READS;
+else
+ READS_OUT=READS_IN;
+ FLAG=0;
+end
+
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam
Binary file deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam has changed
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam.bai
Binary file deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001872.bam.bai has changed
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam
Binary file deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam has changed
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam.bai
Binary file deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions-SRX001875.bam.bai has changed
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions.gff3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions.gff3 Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,747 @@
+##gff-version 3
+##Seqid Source Type Start End Score Phase Attributes
+I Coding_transcript gene 11495 16831 . + . ID=Gene:Gene:Y74C9A.2.2
+I Coding_transcript mRNA 11495 16793 . + . ID=Transcript:Gene:Y74C9A.2.2.1;Parent=Gene:Gene:Y74C9A.2.2
+I Coding_transcript five_prime_UTR 11495 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1
+I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1
+I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1
+I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1
+I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1
+I Coding_transcript three_prime_UTR 16702 16793 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.1;Parent=Transcript:Gene:Y74C9A.2.2.1
+I Coding_transcript mRNA 11499 16790 . + . ID=Transcript:Gene:Y74C9A.2.2.2;Parent=Gene:Gene:Y74C9A.2.2
+I Coding_transcript five_prime_UTR 11499 11557 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2
+I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2
+I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2
+I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2
+I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2
+I Coding_transcript three_prime_UTR 16586 16790 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.2;Parent=Transcript:Gene:Y74C9A.2.2.2
+I Coding_transcript mRNA 11499 16831 . + . ID=Transcript:Gene:Y74C9A.2.2.3;Parent=Gene:Gene:Y74C9A.2.2
+I Coding_transcript five_prime_UTR 11499 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3
+I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3
+I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3
+I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3
+I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3
+I Coding_transcript three_prime_UTR 16586 16831 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.3;Parent=Transcript:Gene:Y74C9A.2.2.3
+I Coding_transcript mRNA 11505 16790 . + . ID=Transcript:Gene:Y74C9A.2.2.4;Parent=Gene:Gene:Y74C9A.2.2
+I Coding_transcript five_prime_UTR 11505 11561 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4
+I Coding_transcript five_prime_UTR 11623 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4
+I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4
+I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4
+I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4
+I Coding_transcript three_prime_UTR 16586 16790 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.4;Parent=Transcript:Gene:Y74C9A.2.2.4
+I Coding_transcript mRNA 11618 16804 . + . ID=Transcript:Gene:Y74C9A.2.2.5;Parent=Gene:Gene:Y74C9A.2.2
+I Coding_transcript five_prime_UTR 11618 11640 . + . ID=five_prime_UTR:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5
+I Coding_transcript CDS 11641 11689 . + 0 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5
+I Coding_transcript CDS 14951 15160 . + 2 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5
+I Coding_transcript CDS 16473 16585 . + 2 ID=CDS:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5
+I Coding_transcript three_prime_UTR 16586 16804 . + . ID=three_prime_UTR:Gene:Y74C9A.2.2.5;Parent=Transcript:Gene:Y74C9A.2.2.5
+I Coding_transcript gene 47472 49416 . + . ID=Gene:Gene:Y48G1C.12
+I Coding_transcript mRNA 47472 49416 . + . ID=Transcript:Gene:Y48G1C.12.1;Parent=Gene:Gene:Y48G1C.12
+I Coding_transcript CDS 47472 47610 . + 0 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1
+I Coding_transcript CDS 47696 47858 . + 2 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1
+I Coding_transcript CDS 48348 48530 . + 1 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1
+I Coding_transcript CDS 49251 49416 . + 1 ID=CDS:Gene:Y48G1C.12.1;Parent=Transcript:Gene:Y48G1C.12.1
+I Coding_transcript gene 71858 81071 . + . ID=Gene:Gene:Y48G1C.2.1
+I Coding_transcript mRNA 71858 81071 . + . ID=Transcript:Gene:Y48G1C.2.1.1;Parent=Gene:Gene:Y48G1C.2.1
+I Coding_transcript five_prime_UTR 71858 71932 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript three_prime_UTR 80345 81071 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.1;Parent=Transcript:Gene:Y48G1C.2.1.1
+I Coding_transcript mRNA 71878 81063 . + . ID=Transcript:Gene:Y48G1C.2.1.2;Parent=Gene:Gene:Y48G1C.2.1
+I Coding_transcript five_prime_UTR 71878 71932 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript three_prime_UTR 80345 80561 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript three_prime_UTR 80814 81063 . + . ID=three_prime_UTR:Gene:Y48G1C.2.1.2;Parent=Transcript:Gene:Y48G1C.2.1.2
+I Coding_transcript mRNA 72511 80344 . + . ID=Transcript:Gene:Y48G1C.2.1.3;Parent=Gene:Gene:Y48G1C.2.1
+I Coding_transcript five_prime_UTR 72511 72590 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript five_prime_UTR 72647 72648 . + . ID=five_prime_UTR:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 72649 72897 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 72941 73017 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 73605 73766 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 74625 74808 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 75171 75490 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 75951 76112 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 76949 77153 . + 1 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 77713 77799 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 79313 79447 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript CDS 80306 80344 . + 0 ID=CDS:Gene:Y48G1C.2.1.3;Parent=Transcript:Gene:Y48G1C.2.1.3
+I Coding_transcript gene 86841 90607 . + . ID=Gene:Gene:Y48G1C.11
+I Coding_transcript mRNA 86841 90607 . + . ID=Transcript:Gene:Y48G1C.11.1;Parent=Gene:Gene:Y48G1C.11
+I Coding_transcript CDS 86841 86904 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1
+I Coding_transcript CDS 87034 87223 . + 2 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1
+I Coding_transcript CDS 87520 87734 . + 1 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1
+I Coding_transcript CDS 88268 88500 . + 2 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1
+I Coding_transcript CDS 88566 88706 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1
+I Coding_transcript CDS 89372 89584 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1
+I Coding_transcript CDS 90419 90607 . + 0 ID=CDS:Gene:Y48G1C.11.1;Parent=Transcript:Gene:Y48G1C.11.1
+I Coding_transcript gene 91380 92877 . + . ID=Gene:Gene:Y48G1C.9.1
+I Coding_transcript mRNA 91380 92654 . + . ID=Transcript:Gene:Y48G1C.9.1.1;Parent=Gene:Gene:Y48G1C.9.1
+I Coding_transcript five_prime_UTR 91380 91408 . + . ID=five_prime_UTR:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1
+I Coding_transcript CDS 91409 91594 . + 0 ID=CDS:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1
+I Coding_transcript CDS 92523 92648 . + 0 ID=CDS:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1
+I Coding_transcript three_prime_UTR 92649 92654 . + . ID=three_prime_UTR:Gene:Y48G1C.9.1.1;Parent=Transcript:Gene:Y48G1C.9.1.1
+I Coding_transcript mRNA 91404 92877 . + . ID=Transcript:Gene:Y48G1C.9.1.2;Parent=Gene:Gene:Y48G1C.9.1
+I Coding_transcript five_prime_UTR 91404 91408 . + . ID=five_prime_UTR:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2
+I Coding_transcript CDS 91409 91594 . + 0 ID=CDS:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2
+I Coding_transcript CDS 92523 92648 . + 0 ID=CDS:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2
+I Coding_transcript three_prime_UTR 92649 92877 . + . ID=three_prime_UTR:Gene:Y48G1C.9.1.2;Parent=Transcript:Gene:Y48G1C.9.1.2
+I Coding_transcript gene 113807 114681 . + . ID=Gene:Gene:F53G12.8
+I Coding_transcript mRNA 113807 114681 . + . ID=Transcript:Gene:F53G12.8.1;Parent=Gene:Gene:F53G12.8
+I Coding_transcript CDS 113807 113863 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1
+I Coding_transcript CDS 114085 114423 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1
+I Coding_transcript CDS 114544 114681 . + 0 ID=CDS:Gene:F53G12.8.1;Parent=Transcript:Gene:F53G12.8.1
+I Coding_transcript gene 115739 117438 . + . ID=Gene:Gene:F53G12.7
+I Coding_transcript mRNA 115739 117438 . + . ID=Transcript:Gene:F53G12.7.1;Parent=Gene:Gene:F53G12.7
+I Coding_transcript CDS 115739 115915 . + 0 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1
+I Coding_transcript CDS 116432 116666 . + 0 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1
+I Coding_transcript CDS 116719 116974 . + 2 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1
+I Coding_transcript CDS 117086 117401 . + 1 ID=CDS:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1
+I Coding_transcript three_prime_UTR 117402 117438 . + . ID=three_prime_UTR:Gene:F53G12.7.1;Parent=Transcript:Gene:F53G12.7.1
+I Coding_transcript gene 127297 134263 . + . ID=Gene:Gene:F53G12.5b
+I Coding_transcript mRNA 127297 134263 . + . ID=Transcript:Gene:F53G12.5b.1;Parent=Gene:Gene:F53G12.5b
+I Coding_transcript CDS 127297 127336 . + 0 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript CDS 127385 127436 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript CDS 128697 128896 . + 1 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript CDS 129176 129333 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript three_prime_UTR 133583 134263 . + . ID=three_prime_UTR:Gene:F53G12.5b.1;Parent=Transcript:Gene:F53G12.5b.1
+I Coding_transcript mRNA 128693 133809 . + . ID=Transcript:Gene:F53G12.5b.2;Parent=Gene:Gene:F53G12.5b
+I Coding_transcript five_prime_UTR 128693 128697 . + . ID=five_prime_UTR:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2
+I Coding_transcript CDS 128698 128896 . + 0 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2
+I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2
+I Coding_transcript CDS 129167 129333 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2
+I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2
+I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2
+I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2
+I Coding_transcript three_prime_UTR 133583 133809 . + . ID=three_prime_UTR:Gene:F53G12.5b.2;Parent=Transcript:Gene:F53G12.5b.2
+I Coding_transcript mRNA 128697 133582 . + . ID=Transcript:Gene:F53G12.5b.3;Parent=Gene:Gene:F53G12.5b
+I Coding_transcript five_prime_UTR 128697 128697 . + . ID=five_prime_UTR:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3
+I Coding_transcript CDS 128698 128896 . + 0 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3
+I Coding_transcript CDS 128945 129124 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3
+I Coding_transcript CDS 129167 129333 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3
+I Coding_transcript CDS 132103 132553 . + 0 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3
+I Coding_transcript CDS 132612 132693 . + 2 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3
+I Coding_transcript CDS 133414 133582 . + 1 ID=CDS:Gene:F53G12.5b.3;Parent=Transcript:Gene:F53G12.5b.3
+I Coding_transcript gene 134337 137282 . + . ID=Gene:Gene:F53G12.4
+I Coding_transcript mRNA 134337 137282 . + . ID=Transcript:Gene:F53G12.4.1;Parent=Gene:Gene:F53G12.4
+I Coding_transcript five_prime_UTR 134337 134353 . + . ID=five_prime_UTR:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1
+I Coding_transcript CDS 134354 134428 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1
+I Coding_transcript CDS 134506 134581 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1
+I Coding_transcript CDS 135549 135898 . + 2 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1
+I Coding_transcript CDS 136235 136712 . + 0 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1
+I Coding_transcript CDS 137209 137282 . + 2 ID=CDS:Gene:F53G12.4.1;Parent=Transcript:Gene:F53G12.4.1
+I Coding_transcript gene 137845 144565 . + . ID=Gene:Gene:F53G12.3
+I Coding_transcript mRNA 137845 144565 . + . ID=Transcript:Gene:F53G12.3.1;Parent=Gene:Gene:F53G12.3
+I Coding_transcript CDS 137845 137886 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 138017 138143 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 138193 138351 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 138393 138782 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 138829 139032 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 139080 139331 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 139378 139669 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 139769 139982 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 140136 140292 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 140371 140496 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 140554 140870 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 140916 141213 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 141681 141854 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 141900 142023 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 142067 142230 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 142278 142477 . + 2 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 142547 142671 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 142729 142939 . + 1 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 143007 143684 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 144265 144396 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript CDS 144440 144565 . + 0 ID=CDS:Gene:F53G12.3.1;Parent=Transcript:Gene:F53G12.3.1
+I Coding_transcript gene 173412 175988 . + . ID=Gene:Gene:F56C11.6b
+I Coding_transcript mRNA 173412 175932 . + . ID=Transcript:Gene:F56C11.6b.1;Parent=Gene:Gene:F56C11.6b
+I Coding_transcript five_prime_UTR 173412 173508 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript five_prime_UTR 173561 173725 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript five_prime_UTR 173775 173873 . + . ID=five_prime_UTR:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript CDS 173874 174312 . + 0 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript CDS 174605 174832 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript CDS 174878 175053 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript CDS 175097 175241 . + 0 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript CDS 175579 175708 . + 2 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript CDS 175755 175932 . + 1 ID=CDS:Gene:F56C11.6b.1;Parent=Transcript:Gene:F56C11.6b.1
+I Coding_transcript mRNA 173412 175988 . + . ID=Transcript:Gene:F56C11.6b.2;Parent=Gene:Gene:F56C11.6b
+I Coding_transcript five_prime_UTR 173412 173421 . + . ID=five_prime_UTR:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript CDS 173422 173508 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript CDS 173561 173725 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript CDS 173775 174312 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript CDS 174605 174832 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript CDS 174878 175053 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript CDS 175097 175241 . + 0 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript CDS 175579 175708 . + 2 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript CDS 175755 175932 . + 1 ID=CDS:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript three_prime_UTR 175933 175988 . + . ID=three_prime_UTR:Gene:F56C11.6b.2;Parent=Transcript:Gene:F56C11.6b.2
+I Coding_transcript gene 178538 182159 . + . ID=Gene:Gene:F56C11.5b
+I Coding_transcript mRNA 178538 182159 . + . ID=Transcript:Gene:F56C11.5b.1;Parent=Gene:Gene:F56C11.5b
+I Coding_transcript five_prime_UTR 178538 178566 . + . ID=five_prime_UTR:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1
+I Coding_transcript CDS 178567 178620 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1
+I Coding_transcript CDS 180600 180698 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1
+I Coding_transcript CDS 181251 181514 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1
+I Coding_transcript CDS 181663 181767 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1
+I Coding_transcript CDS 181814 181966 . + 0 ID=CDS:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1
+I Coding_transcript three_prime_UTR 181967 182159 . + . ID=three_prime_UTR:Gene:F56C11.5b.1;Parent=Transcript:Gene:F56C11.5b.1
+I Coding_transcript mRNA 180394 181966 . + . ID=Transcript:Gene:F56C11.5b.2;Parent=Gene:Gene:F56C11.5b
+I Coding_transcript CDS 180394 180465 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2
+I Coding_transcript CDS 180600 180698 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2
+I Coding_transcript CDS 181251 181514 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2
+I Coding_transcript CDS 181663 181767 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2
+I Coding_transcript CDS 181814 181966 . + 0 ID=CDS:Gene:F56C11.5b.2;Parent=Transcript:Gene:F56C11.5b.2
+I Coding_transcript gene 216005 219099 . + . ID=Gene:Gene:Y48G1BL.1
+I Coding_transcript mRNA 216005 219099 . + . ID=Transcript:Gene:Y48G1BL.1.1;Parent=Gene:Gene:Y48G1BL.1
+I Coding_transcript five_prime_UTR 216005 216092 . + . ID=five_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1
+I Coding_transcript five_prime_UTR 216145 216180 . + . ID=five_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1
+I Coding_transcript CDS 216181 216277 . + 0 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1
+I Coding_transcript CDS 216331 216910 . + 2 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1
+I Coding_transcript CDS 218110 218205 . + 1 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1
+I Coding_transcript CDS 218772 218955 . + 1 ID=CDS:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1
+I Coding_transcript three_prime_UTR 218956 219099 . + . ID=three_prime_UTR:Gene:Y48G1BL.1.1;Parent=Transcript:Gene:Y48G1BL.1.1
+I Coding_transcript gene 291216 305461 . + . ID=Gene:Gene:C53D5.1c.1
+I Coding_transcript mRNA 291216 305081 . + . ID=Transcript:Gene:C53D5.1c.1.1;Parent=Gene:Gene:C53D5.1c.1
+I Coding_transcript five_prime_UTR 291216 291308 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript five_prime_UTR 295793 295851 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript five_prime_UTR 302596 302671 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript CDS 302672 302705 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.1;Parent=Transcript:Gene:C53D5.1c.1.1
+I Coding_transcript mRNA 291216 305461 . + . ID=Transcript:Gene:C53D5.1c.1.2;Parent=Gene:Gene:C53D5.1c.1
+I Coding_transcript five_prime_UTR 291216 291248 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript CDS 291249 291308 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript CDS 295793 295851 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript CDS 302596 302705 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript three_prime_UTR 305082 305461 . + . ID=three_prime_UTR:Gene:C53D5.1c.1.2;Parent=Transcript:Gene:C53D5.1c.1.2
+I Coding_transcript mRNA 295364 305081 . + . ID=Transcript:Gene:C53D5.1c.1.3;Parent=Gene:Gene:C53D5.1c.1
+I Coding_transcript five_prime_UTR 295364 295367 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript CDS 295368 295421 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript CDS 295793 295851 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript CDS 302596 302705 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.3;Parent=Transcript:Gene:C53D5.1c.1.3
+I Coding_transcript mRNA 302596 305081 . + . ID=Transcript:Gene:C53D5.1c.1.4;Parent=Gene:Gene:C53D5.1c.1
+I Coding_transcript five_prime_UTR 302596 302671 . + . ID=five_prime_UTR:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4
+I Coding_transcript CDS 302672 302705 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4
+I Coding_transcript CDS 302762 302831 . + 2 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4
+I Coding_transcript CDS 303204 303729 . + 1 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4
+I Coding_transcript CDS 303789 303910 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4
+I Coding_transcript CDS 304094 304256 . + 1 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4
+I Coding_transcript CDS 304929 305081 . + 0 ID=CDS:Gene:C53D5.1c.1.4;Parent=Transcript:Gene:C53D5.1c.1.4
+I Coding_transcript gene 347484 348360 . + . ID=Gene:Gene:Y48G1A.2
+I Coding_transcript mRNA 347484 348360 . + . ID=Transcript:Gene:Y48G1A.2.1;Parent=Gene:Gene:Y48G1A.2
+I Coding_transcript five_prime_UTR 347484 347577 . + . ID=five_prime_UTR:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1
+I Coding_transcript CDS 347578 347584 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1
+I Coding_transcript CDS 347633 347709 . + 2 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1
+I Coding_transcript CDS 347765 347986 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1
+I Coding_transcript CDS 348045 348152 . + 0 ID=CDS:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1
+I Coding_transcript three_prime_UTR 348153 348360 . + . ID=three_prime_UTR:Gene:Y48G1A.2.1;Parent=Transcript:Gene:Y48G1A.2.1
+I Coding_transcript gene 364522 368511 . + . ID=Gene:Gene:R119.7
+I Coding_transcript mRNA 364522 368511 . + . ID=Transcript:Gene:R119.7.1;Parent=Gene:Gene:R119.7
+I Coding_transcript CDS 364522 364682 . + 0 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1
+I Coding_transcript CDS 364739 365194 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1
+I Coding_transcript CDS 365534 365599 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1
+I Coding_transcript CDS 365647 365963 . + 1 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1
+I Coding_transcript CDS 366656 367212 . + 2 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1
+I Coding_transcript CDS 367749 367943 . + 0 ID=CDS:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1
+I Coding_transcript three_prime_UTR 367944 368511 . + . ID=three_prime_UTR:Gene:R119.7.1;Parent=Transcript:Gene:R119.7.1
+I Coding_transcript gene 382836 388540 . + . ID=Gene:Gene:R119.6
+I Coding_transcript mRNA 382836 388540 . + . ID=Transcript:Gene:R119.6.1;Parent=Gene:Gene:R119.6
+I Coding_transcript CDS 382836 382916 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript CDS 382992 383067 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript CDS 383124 383377 . + 2 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript CDS 383432 383567 . + 0 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript CDS 384628 385228 . + 2 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript CDS 386079 386339 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript CDS 388049 388153 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript CDS 388380 388437 . + 1 ID=CDS:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript three_prime_UTR 388438 388540 . + . ID=three_prime_UTR:Gene:R119.6.1;Parent=Transcript:Gene:R119.6.1
+I Coding_transcript gene 488619 489908 . + . ID=Gene:Gene:W04C9.2
+I Coding_transcript mRNA 488619 489908 . + . ID=Transcript:Gene:W04C9.2.1;Parent=Gene:Gene:W04C9.2
+I Coding_transcript CDS 488619 488726 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1
+I Coding_transcript CDS 488771 488836 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1
+I Coding_transcript CDS 488885 488947 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1
+I Coding_transcript CDS 489703 489774 . + 0 ID=CDS:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1
+I Coding_transcript three_prime_UTR 489775 489908 . + . ID=three_prime_UTR:Gene:W04C9.2.1;Parent=Transcript:Gene:W04C9.2.1
+I Coding_transcript gene 489987 496153 . + . ID=Gene:Gene:W04C9.1
+I Coding_transcript mRNA 489987 496153 . + . ID=Transcript:Gene:W04C9.1.1;Parent=Gene:Gene:W04C9.1
+I Coding_transcript five_prime_UTR 489987 490053 . + . ID=five_prime_UTR:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 490054 490205 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 491200 491305 . + 1 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 491353 491562 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 491613 491727 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 491839 492163 . + 2 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 492228 492354 . + 1 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 492870 493559 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 494263 494346 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 494395 494709 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript CDS 495831 496070 . + 0 ID=CDS:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript three_prime_UTR 496071 496153 . + . ID=three_prime_UTR:Gene:W04C9.1.1;Parent=Transcript:Gene:W04C9.1.1
+I Coding_transcript gene 534461 535347 . + . ID=Gene:Gene:Y65B4BR.8
+I Coding_transcript mRNA 534461 535347 . + . ID=Transcript:Gene:Y65B4BR.8.1;Parent=Gene:Gene:Y65B4BR.8
+I Coding_transcript CDS 534461 534572 . + 0 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1
+I Coding_transcript CDS 534621 534727 . + 2 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1
+I Coding_transcript CDS 534775 534973 . + 0 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1
+I Coding_transcript CDS 535034 535197 . + 2 ID=CDS:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1
+I Coding_transcript three_prime_UTR 535198 535347 . + . ID=three_prime_UTR:Gene:Y65B4BR.8.1;Parent=Transcript:Gene:Y65B4BR.8.1
+I Coding_transcript gene 2931462 2947153 . + . ID=Gene:Gene:Y71F9AM.4a
+I Coding_transcript mRNA 2931462 2947153 . + . ID=Transcript:Gene:Y71F9AM.4a.1;Parent=Gene:Gene:Y71F9AM.4a
+I Coding_transcript CDS 2931462 2931549 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2931617 2931783 . + 2 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2932810 2932974 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2933904 2934137 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2935626 2935717 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2935988 2936131 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2936561 2936882 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2937759 2937919 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2938443 2938677 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2942825 2943021 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2943072 2943204 . + 1 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2944077 2944223 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2944275 2944382 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript CDS 2946747 2946938 . + 0 ID=CDS:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript three_prime_UTR 2946939 2947153 . + . ID=three_prime_UTR:Gene:Y71F9AM.4a.1;Parent=Transcript:Gene:Y71F9AM.4a.1
+I Coding_transcript mRNA 2935624 2944611 . + . ID=Transcript:Gene:Y71F9AM.4a.2;Parent=Gene:Gene:Y71F9AM.4a
+I Coding_transcript five_prime_UTR 2935624 2935717 . + . ID=five_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript five_prime_UTR 2935988 2936042 . + . ID=five_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript CDS 2936043 2936131 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript CDS 2936561 2936882 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript CDS 2937759 2937919 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript CDS 2938443 2938677 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript CDS 2942825 2943021 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript CDS 2943072 2943204 . + 1 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript CDS 2944077 2944223 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript CDS 2944275 2944424 . + 0 ID=CDS:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript three_prime_UTR 2944425 2944611 . + . ID=three_prime_UTR:Gene:Y71F9AM.4a.2;Parent=Transcript:Gene:Y71F9AM.4a.2
+I Coding_transcript gene 2929778 2931362 . + . ID=Gene:Gene:Y71F9AM.5b
+I Coding_transcript mRNA 2929778 2931362 . + . ID=Transcript:Gene:Y71F9AM.5b.1;Parent=Gene:Gene:Y71F9AM.5b
+I Coding_transcript five_prime_UTR 2929778 2929840 . + . ID=five_prime_UTR:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1
+I Coding_transcript CDS 2929841 2929885 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1
+I Coding_transcript CDS 2929947 2930071 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1
+I Coding_transcript CDS 2930175 2930367 . + 1 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1
+I Coding_transcript CDS 2931224 2931256 . + 0 ID=CDS:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1
+I Coding_transcript three_prime_UTR 2931257 2931362 . + . ID=three_prime_UTR:Gene:Y71F9AM.5b.1;Parent=Transcript:Gene:Y71F9AM.5b.1
+I Coding_transcript mRNA 2929785 2931344 . + . ID=Transcript:Gene:Y71F9AM.5b.2;Parent=Gene:Gene:Y71F9AM.5b
+I Coding_transcript five_prime_UTR 2929785 2929840 . + . ID=five_prime_UTR:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2
+I Coding_transcript CDS 2929841 2929885 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2
+I Coding_transcript CDS 2929947 2930089 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2
+I Coding_transcript CDS 2930175 2930367 . + 1 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2
+I Coding_transcript CDS 2931224 2931256 . + 0 ID=CDS:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2
+I Coding_transcript three_prime_UTR 2931257 2931344 . + . ID=three_prime_UTR:Gene:Y71F9AM.5b.2;Parent=Transcript:Gene:Y71F9AM.5b.2
+I Coding_transcript gene 537125 542200 . + . ID=Gene:Gene:Y65B4BR.4b
+I Coding_transcript mRNA 537125 541634 . + . ID=Transcript:Gene:Y65B4BR.4b.1;Parent=Gene:Gene:Y65B4BR.4b
+I Coding_transcript five_prime_UTR 537125 537140 . + . ID=five_prime_UTR:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 537141 537246 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 537306 537563 . + 2 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 537609 537838 . + 2 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 538705 538914 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 539456 539730 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 539804 540011 . + 1 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 540067 540387 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 540726 540986 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 541067 541288 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript CDS 541347 541634 . + 0 ID=CDS:Gene:Y65B4BR.4b.1;Parent=Transcript:Gene:Y65B4BR.4b.1
+I Coding_transcript mRNA 537125 542200 . + . ID=Transcript:Gene:Y65B4BR.4b.2;Parent=Gene:Gene:Y65B4BR.4b
+I Coding_transcript five_prime_UTR 537125 537140 . + . ID=five_prime_UTR:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 537141 537246 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 537306 537563 . + 2 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 537609 537838 . + 2 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 538705 538914 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 539456 539730 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 539798 540011 . + 1 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 540067 540387 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 540726 540986 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 541067 541288 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript CDS 541347 541634 . + 0 ID=CDS:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript three_prime_UTR 541635 542200 . + . ID=three_prime_UTR:Gene:Y65B4BR.4b.2;Parent=Transcript:Gene:Y65B4BR.4b.2
+I Coding_transcript gene 562739 565184 . + . ID=Gene:Gene:Y65B4BR.1
+I Coding_transcript mRNA 562739 565184 . + . ID=Transcript:Gene:Y65B4BR.1.1;Parent=Gene:Gene:Y65B4BR.1
+I Coding_transcript five_prime_UTR 562739 562765 . + . ID=five_prime_UTR:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1
+I Coding_transcript CDS 562766 562872 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1
+I Coding_transcript CDS 562923 563122 . + 1 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1
+I Coding_transcript CDS 563167 563333 . + 2 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1
+I Coding_transcript CDS 563377 563460 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1
+I Coding_transcript CDS 563510 563583 . + 0 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1
+I Coding_transcript CDS 564612 565113 . + 1 ID=CDS:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1
+I Coding_transcript three_prime_UTR 565114 565184 . + . ID=three_prime_UTR:Gene:Y65B4BR.1.1;Parent=Transcript:Gene:Y65B4BR.1.1
+I Coding_transcript gene 618210 621466 . + . ID=Gene:Gene:F56A6.1b
+I Coding_transcript mRNA 618210 621424 . + . ID=Transcript:Gene:F56A6.1b.1;Parent=Gene:Gene:F56A6.1b
+I Coding_transcript CDS 618210 618360 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript CDS 618418 618671 . + 2 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript CDS 618716 618915 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript CDS 618962 619021 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript CDS 619072 619190 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript CDS 619250 619692 . + 2 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript CDS 619741 620027 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript CDS 620073 620469 . + 1 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript CDS 620516 621175 . + 0 ID=CDS:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript three_prime_UTR 621176 621424 . + . ID=three_prime_UTR:Gene:F56A6.1b.1;Parent=Transcript:Gene:F56A6.1b.1
+I Coding_transcript mRNA 618210 621466 . + . ID=Transcript:Gene:F56A6.1b.2;Parent=Gene:Gene:F56A6.1b
+I Coding_transcript CDS 618210 618360 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 618418 618671 . + 2 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 618716 618915 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 618962 619021 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 619072 619190 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 619250 619692 . + 2 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 619741 620027 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 620073 620469 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 620516 621171 . + 0 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript CDS 621218 621314 . + 1 ID=CDS:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript three_prime_UTR 621315 621466 . + . ID=three_prime_UTR:Gene:F56A6.1b.2;Parent=Transcript:Gene:F56A6.1b.2
+I Coding_transcript gene 720014 731077 . + . ID=Gene:Gene:Y18H1A.6
+I Coding_transcript mRNA 720014 731077 . + . ID=Transcript:Gene:Y18H1A.6.1;Parent=Gene:Gene:Y18H1A.6
+I Coding_transcript five_prime_UTR 720014 720080 . + . ID=five_prime_UTR:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 720081 720256 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 720301 720435 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 720489 720602 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 720647 720787 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 720831 720942 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 722106 722204 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 722247 722358 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 724573 724769 . + 2 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 725112 725228 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 725289 725395 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 726504 726873 . + 1 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 730284 730535 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript CDS 730839 730940 . + 0 ID=CDS:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript three_prime_UTR 730941 731077 . + . ID=three_prime_UTR:Gene:Y18H1A.6.1;Parent=Transcript:Gene:Y18H1A.6.1
+I Coding_transcript gene 763627 770707 . + . ID=Gene:Gene:T06A4.3a
+I Coding_transcript mRNA 763627 770707 . + . ID=Transcript:Gene:T06A4.3a.1;Parent=Gene:Gene:T06A4.3a
+I Coding_transcript five_prime_UTR 763627 763676 . + . ID=five_prime_UTR:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 763677 763826 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 765140 765276 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 765434 765647 . + 1 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 766050 766151 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 766328 766447 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 766529 766643 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 767585 767736 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 768167 768226 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 768276 768450 . + 0 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 768502 768630 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript CDS 770464 770627 . + 2 ID=CDS:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript three_prime_UTR 770628 770707 . + . ID=three_prime_UTR:Gene:T06A4.3a.1;Parent=Transcript:Gene:T06A4.3a.1
+I Coding_transcript mRNA 763677 768919 . + . ID=Transcript:Gene:T06A4.3a.2;Parent=Gene:Gene:T06A4.3a
+I Coding_transcript CDS 763677 763826 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 765140 765276 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 765434 765647 . + 1 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 766050 766151 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 766328 766447 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 766529 766643 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 767585 767736 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 768167 768226 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 768276 768450 . + 0 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 768502 768630 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript CDS 768801 768814 . + 2 ID=CDS:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript three_prime_UTR 768815 768919 . + . ID=three_prime_UTR:Gene:T06A4.3a.2;Parent=Transcript:Gene:T06A4.3a.2
+I Coding_transcript gene 770894 776356 . + . ID=Gene:Gene:T06A4.1b
+I Coding_transcript mRNA 770894 776346 . + . ID=Transcript:Gene:T06A4.1b.1;Parent=Gene:Gene:T06A4.1b
+I Coding_transcript five_prime_UTR 770894 770968 . + . ID=five_prime_UTR:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 770969 771060 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 771453 771568 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 771808 771913 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 771970 772156 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 773166 773387 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 773437 773548 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 773702 773989 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 774970 775066 . + 2 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 775112 775574 . + 1 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript CDS 776166 776303 . + 0 ID=CDS:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript three_prime_UTR 776304 776346 . + . ID=three_prime_UTR:Gene:T06A4.1b.1;Parent=Transcript:Gene:T06A4.1b.1
+I Coding_transcript mRNA 770969 776356 . + . ID=Transcript:Gene:T06A4.1b.2;Parent=Gene:Gene:T06A4.1b
+I Coding_transcript CDS 770969 771060 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 771453 771568 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 771808 771913 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 771970 772156 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 773166 773387 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 773437 773548 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 773702 773989 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 774970 775066 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 775112 775389 . + 1 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 775471 775574 . + 2 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript CDS 776166 776303 . + 0 ID=CDS:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript three_prime_UTR 776304 776356 . + . ID=three_prime_UTR:Gene:T06A4.1b.2;Parent=Transcript:Gene:T06A4.1b.2
+I Coding_transcript gene 853461 854133 . + . ID=Gene:Gene:Y95B8A.8
+I Coding_transcript mRNA 853461 854133 . + . ID=Transcript:Gene:Y95B8A.8.1;Parent=Gene:Gene:Y95B8A.8
+I Coding_transcript CDS 853461 853489 . + 0 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1
+I Coding_transcript CDS 853563 853628 . + 1 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1
+I Coding_transcript CDS 853699 853771 . + 1 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1
+I Coding_transcript CDS 853830 853932 . + 0 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1
+I Coding_transcript CDS 853988 854133 . + 2 ID=CDS:Gene:Y95B8A.8.1;Parent=Transcript:Gene:Y95B8A.8.1
+I Coding_transcript gene 858489 871831 . + . ID=Gene:Gene:Y95B8A.7
+I Coding_transcript mRNA 858489 871831 . + . ID=Transcript:Gene:Y95B8A.7.1;Parent=Gene:Gene:Y95B8A.7
+I Coding_transcript five_prime_UTR 858489 858568 . + . ID=five_prime_UTR:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript CDS 858569 858837 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript CDS 861488 861921 . + 1 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript CDS 864406 864821 . + 2 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript CDS 865733 865936 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript CDS 866765 866860 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript CDS 868612 868723 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript CDS 870196 870326 . + 2 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript CDS 871656 871820 . + 0 ID=CDS:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript three_prime_UTR 871821 871831 . + . ID=three_prime_UTR:Gene:Y95B8A.7.1;Parent=Transcript:Gene:Y95B8A.7.1
+I Coding_transcript gene 882920 890209 . + . ID=Gene:Gene:Y95B8A.6a.2
+I Coding_transcript mRNA 882920 889792 . + . ID=Transcript:Gene:Y95B8A.6a.2.1;Parent=Gene:Gene:Y95B8A.6a.2
+I Coding_transcript five_prime_UTR 882920 883166 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1
+I Coding_transcript five_prime_UTR 884906 885042 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1
+I Coding_transcript five_prime_UTR 886509 886608 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1
+I Coding_transcript CDS 886609 886709 . + 0 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1
+I Coding_transcript CDS 887882 888060 . + 1 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1
+I Coding_transcript CDS 888152 888348 . + 2 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1
+I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.1;Parent=Transcript:Gene:Y95B8A.6a.2.1
+I Coding_transcript mRNA 886213 890209 . + . ID=Transcript:Gene:Y95B8A.6a.2.2;Parent=Gene:Gene:Y95B8A.6a.2
+I Coding_transcript five_prime_UTR 886213 886608 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2
+I Coding_transcript CDS 886609 886709 . + 0 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2
+I Coding_transcript CDS 887882 888060 . + 1 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2
+I Coding_transcript CDS 888152 888348 . + 2 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2
+I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2
+I Coding_transcript three_prime_UTR 889793 890209 . + . ID=three_prime_UTR:Gene:Y95B8A.6a.2.2;Parent=Transcript:Gene:Y95B8A.6a.2.2
+I Coding_transcript mRNA 887841 889870 . + . ID=Transcript:Gene:Y95B8A.6a.2.3;Parent=Gene:Gene:Y95B8A.6a.2
+I Coding_transcript five_prime_UTR 887841 888153 . + . ID=five_prime_UTR:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3
+I Coding_transcript CDS 888154 888348 . + 0 ID=CDS:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3
+I Coding_transcript CDS 889676 889792 . + 0 ID=CDS:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3
+I Coding_transcript three_prime_UTR 889793 889870 . + . ID=three_prime_UTR:Gene:Y95B8A.6a.2.3;Parent=Transcript:Gene:Y95B8A.6a.2.3
+I Coding_transcript gene 897288 905906 . + . ID=Gene:Gene:Y95B8A.5
+I Coding_transcript mRNA 897288 905906 . + . ID=Transcript:Gene:Y95B8A.5.1;Parent=Gene:Gene:Y95B8A.5
+I Coding_transcript five_prime_UTR 897288 897291 . + . ID=five_prime_UTR:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript CDS 897292 897357 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript CDS 898181 898351 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript CDS 900615 900802 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript CDS 903105 903192 . + 1 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript CDS 903255 903331 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript CDS 903384 903513 . + 1 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript CDS 904613 904846 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript CDS 905736 905855 . + 0 ID=CDS:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript three_prime_UTR 905856 905906 . + . ID=three_prime_UTR:Gene:Y95B8A.5.1;Parent=Transcript:Gene:Y95B8A.5.1
+I Coding_transcript gene 1003891 1010767 . + . ID=Gene:Gene:C54G6.2
+I Coding_transcript mRNA 1003891 1010767 . + . ID=Transcript:Gene:C54G6.2.1;Parent=Gene:Gene:C54G6.2
+I Coding_transcript CDS 1003891 1004091 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1004703 1005062 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1006029 1006324 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1006461 1006548 . + 1 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1007013 1007136 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1007188 1007273 . + 2 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1008082 1008464 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1009524 1009755 . + 1 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1009820 1009891 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1010207 1010293 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript CDS 1010594 1010767 . + 0 ID=CDS:Gene:C54G6.2.1;Parent=Transcript:Gene:C54G6.2.1
+I Coding_transcript gene 1018837 1019221 . + . ID=Gene:Gene:Y34D9A.11
+I Coding_transcript mRNA 1018837 1019221 . + . ID=Transcript:Gene:Y34D9A.11.1;Parent=Gene:Gene:Y34D9A.11
+I Coding_transcript five_prime_UTR 1018837 1018844 . + . ID=five_prime_UTR:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1
+I Coding_transcript CDS 1018845 1019159 . + 0 ID=CDS:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1
+I Coding_transcript three_prime_UTR 1019160 1019221 . + . ID=three_prime_UTR:Gene:Y34D9A.11.1;Parent=Transcript:Gene:Y34D9A.11.1
+I Coding_transcript gene 1034474 1040870 . + . ID=Gene:Gene:Y34D9A.3
+I Coding_transcript mRNA 1034474 1040870 . + . ID=Transcript:Gene:Y34D9A.3.1;Parent=Gene:Gene:Y34D9A.3
+I Coding_transcript five_prime_UTR 1034474 1034474 . + . ID=five_prime_UTR:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1034475 1034499 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1034549 1034646 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1034693 1034881 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1034934 1035065 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1035123 1035246 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1035306 1035398 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1037314 1037453 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1037508 1037715 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1039055 1039296 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1039340 1039883 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1040116 1040648 . + 2 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript CDS 1040700 1040765 . + 0 ID=CDS:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript three_prime_UTR 1040766 1040870 . + . ID=three_prime_UTR:Gene:Y34D9A.3.1;Parent=Transcript:Gene:Y34D9A.3.1
+I Coding_transcript gene 1049596 1050714 . + . ID=Gene:Gene:Y34D9A.6
+I Coding_transcript mRNA 1049596 1050714 . + . ID=Transcript:Gene:Y34D9A.6.1;Parent=Gene:Gene:Y34D9A.6
+I Coding_transcript five_prime_UTR 1049596 1049604 . + . ID=five_prime_UTR:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1
+I Coding_transcript CDS 1049605 1049688 . + 0 ID=CDS:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1
+I Coding_transcript CDS 1050407 1050640 . + 0 ID=CDS:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1
+I Coding_transcript three_prime_UTR 1050641 1050714 . + . ID=three_prime_UTR:Gene:Y34D9A.6.1;Parent=Transcript:Gene:Y34D9A.6.1
+I Coding_transcript gene 1062295 1065271 . + . ID=Gene:Gene:Y34D9A.2
+I Coding_transcript mRNA 1062295 1065271 . + . ID=Transcript:Gene:Y34D9A.2.1;Parent=Gene:Gene:Y34D9A.2
+I Coding_transcript five_prime_UTR 1062295 1062448 . + . ID=five_prime_UTR:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1
+I Coding_transcript CDS 1062449 1062757 . + 0 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1
+I Coding_transcript CDS 1063602 1064008 . + 0 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1
+I Coding_transcript CDS 1064656 1064920 . + 1 ID=CDS:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1
+I Coding_transcript three_prime_UTR 1064921 1065271 . + . ID=three_prime_UTR:Gene:Y34D9A.2.1;Parent=Transcript:Gene:Y34D9A.2.1
+I Coding_transcript gene 1068593 1075012 . + . ID=Gene:Gene:R06A10.2.1
+I Coding_transcript mRNA 1068593 1075012 . + . ID=Transcript:Gene:R06A10.2.1.1;Parent=Gene:Gene:R06A10.2.1
+I Coding_transcript five_prime_UTR 1068593 1068997 . + . ID=five_prime_UTR:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript CDS 1068998 1069090 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript CDS 1069416 1069577 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript CDS 1070370 1070489 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript CDS 1071472 1071569 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript CDS 1071623 1071751 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript CDS 1071801 1071935 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript CDS 1072675 1072918 . + 1 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript CDS 1074199 1074345 . + 0 ID=CDS:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript three_prime_UTR 1074346 1075012 . + . ID=three_prime_UTR:Gene:R06A10.2.1.1;Parent=Transcript:Gene:R06A10.2.1.1
+I Coding_transcript mRNA 1068613 1074345 . + . ID=Transcript:Gene:R06A10.2.1.2;Parent=Gene:Gene:R06A10.2.1
+I Coding_transcript five_prime_UTR 1068613 1068893 . + . ID=five_prime_UTR:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript five_prime_UTR 1068976 1068997 . + . ID=five_prime_UTR:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript CDS 1068998 1069090 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript CDS 1069416 1069577 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript CDS 1070370 1070489 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript CDS 1071472 1071569 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript CDS 1071623 1071751 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript CDS 1071801 1071935 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript CDS 1072675 1072918 . + 1 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript CDS 1074199 1074345 . + 0 ID=CDS:Gene:R06A10.2.1.2;Parent=Transcript:Gene:R06A10.2.1.2
+I Coding_transcript gene 1128326 1131739 . + . ID=Gene:Gene:ZK993.1
+I Coding_transcript mRNA 1128326 1131739 . + . ID=Transcript:Gene:ZK993.1.1;Parent=Gene:Gene:ZK993.1
+I Coding_transcript five_prime_UTR 1128326 1128360 . + . ID=five_prime_UTR:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1
+I Coding_transcript CDS 1128361 1128428 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1
+I Coding_transcript CDS 1129212 1129396 . + 1 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1
+I Coding_transcript CDS 1129808 1130016 . + 2 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1
+I Coding_transcript CDS 1131224 1131289 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1
+I Coding_transcript CDS 1131475 1131636 . + 0 ID=CDS:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1
+I Coding_transcript three_prime_UTR 1131637 1131739 . + . ID=three_prime_UTR:Gene:ZK993.1.1;Parent=Transcript:Gene:ZK993.1.1
+I Coding_transcript gene 1337021 1338121 . + . ID=Gene:Gene:K12C11.6
+I Coding_transcript mRNA 1337021 1338121 . + . ID=Transcript:Gene:K12C11.6.1;Parent=Gene:Gene:K12C11.6
+I Coding_transcript CDS 1337021 1337117 . + 0 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1
+I Coding_transcript CDS 1337256 1337373 . + 2 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1
+I Coding_transcript CDS 1337938 1338121 . + 1 ID=CDS:Gene:K12C11.6.1;Parent=Transcript:Gene:K12C11.6.1
+I Coding_transcript gene 1340679 1341259 . + . ID=Gene:Gene:K12C11.2.1
+I Coding_transcript mRNA 1340679 1341259 . + . ID=Transcript:Gene:K12C11.2.1.1;Parent=Gene:Gene:K12C11.2.1
+I Coding_transcript five_prime_UTR 1340679 1340703 . + . ID=five_prime_UTR:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1
+I Coding_transcript CDS 1340704 1340769 . + 0 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1
+I Coding_transcript CDS 1340826 1340926 . + 0 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1
+I Coding_transcript CDS 1340977 1341085 . + 1 ID=CDS:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1
+I Coding_transcript three_prime_UTR 1341086 1341259 . + . ID=three_prime_UTR:Gene:K12C11.2.1.1;Parent=Transcript:Gene:K12C11.2.1.1
+I Coding_transcript mRNA 1340704 1341253 . + . ID=Transcript:Gene:K12C11.2.1.2;Parent=Gene:Gene:K12C11.2.1
+I Coding_transcript CDS 1340704 1340769 . + 0 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2
+I Coding_transcript CDS 1340826 1340926 . + 0 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2
+I Coding_transcript CDS 1340977 1341085 . + 1 ID=CDS:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2
+I Coding_transcript three_prime_UTR 1341086 1341253 . + . ID=three_prime_UTR:Gene:K12C11.2.1.2;Parent=Transcript:Gene:K12C11.2.1.2
+I Coding_transcript gene 1394570 1402943 . + . ID=Gene:Gene:Y92H12BR.7
+I Coding_transcript mRNA 1394570 1402943 . + . ID=Transcript:Gene:Y92H12BR.7.1;Parent=Gene:Gene:Y92H12BR.7
+I Coding_transcript CDS 1394570 1394781 . + 0 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1
+I Coding_transcript CDS 1395760 1395875 . + 1 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1
+I Coding_transcript CDS 1395940 1396113 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1
+I Coding_transcript CDS 1397517 1397708 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1
+I Coding_transcript CDS 1399169 1399348 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1
+I Coding_transcript CDS 1402093 1402943 . + 2 ID=CDS:Gene:Y92H12BR.7.1;Parent=Transcript:Gene:Y92H12BR.7.1
+I Coding_transcript gene 1405945 1413072 . + . ID=Gene:Gene:Y92H12BR.6
+I Coding_transcript mRNA 1405945 1413072 . + . ID=Transcript:Gene:Y92H12BR.6.1;Parent=Gene:Gene:Y92H12BR.6
+I Coding_transcript CDS 1405945 1406373 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1
+I Coding_transcript CDS 1408458 1408637 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1
+I Coding_transcript CDS 1411442 1411616 . + 0 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1
+I Coding_transcript CDS 1412612 1413033 . + 2 ID=CDS:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1
+I Coding_transcript three_prime_UTR 1413034 1413072 . + . ID=three_prime_UTR:Gene:Y92H12BR.6.1;Parent=Transcript:Gene:Y92H12BR.6.1
+I Coding_transcript gene 1416592 1425131 . + . ID=Gene:Gene:Y92H12BR.3b
+I Coding_transcript mRNA 1416592 1424609 . + . ID=Transcript:Gene:Y92H12BR.3b.1;Parent=Gene:Gene:Y92H12BR.3b
+I Coding_transcript CDS 1416592 1416738 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1
+I Coding_transcript CDS 1416796 1416961 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1
+I Coding_transcript CDS 1422689 1422993 . + 2 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1
+I Coding_transcript CDS 1424412 1424609 . + 0 ID=CDS:Gene:Y92H12BR.3b.1;Parent=Transcript:Gene:Y92H12BR.3b.1
+I Coding_transcript mRNA 1416592 1425131 . + . ID=Transcript:Gene:Y92H12BR.3b.2;Parent=Gene:Gene:Y92H12BR.3b
+I Coding_transcript CDS 1416592 1416738 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2
+I Coding_transcript CDS 1416796 1416961 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2
+I Coding_transcript CDS 1420212 1421309 . + 2 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2
+I Coding_transcript CDS 1422689 1422993 . + 2 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2
+I Coding_transcript CDS 1424412 1424609 . + 0 ID=CDS:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2
+I Coding_transcript three_prime_UTR 1424610 1425131 . + . ID=three_prime_UTR:Gene:Y92H12BR.3b.2;Parent=Transcript:Gene:Y92H12BR.3b.2
+I Coding_transcript gene 1483084 1490474 . + . ID=Gene:Gene:F47G6.1
+I Coding_transcript mRNA 1483084 1490474 . + . ID=Transcript:Gene:F47G6.1.1;Parent=Gene:Gene:F47G6.1
+I Coding_transcript five_prime_UTR 1483084 1483106 . + . ID=five_prime_UTR:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1483107 1483236 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1484720 1484877 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1484924 1485060 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1485927 1486012 . + 1 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1486057 1486252 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1487180 1487411 . + 1 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1487770 1488096 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1488425 1488676 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1489069 1489282 . + 0 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript CDS 1490282 1490322 . + 2 ID=CDS:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript three_prime_UTR 1490323 1490474 . + . ID=three_prime_UTR:Gene:F47G6.1.1;Parent=Transcript:Gene:F47G6.1.1
+I Coding_transcript gene 1514763 1527350 . + . ID=Gene:Gene:Y92H12A.4
+I Coding_transcript mRNA 1514763 1527350 . + . ID=Transcript:Gene:Y92H12A.4.1;Parent=Gene:Gene:Y92H12A.4
+I Coding_transcript CDS 1514763 1514900 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1514955 1515059 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1515111 1515186 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1515233 1515396 . + 2 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1517534 1518121 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1520159 1520271 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1522175 1522459 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1524299 1524644 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1525846 1526006 . + 0 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript CDS 1526978 1527350 . + 1 ID=CDS:Gene:Y92H12A.4.1;Parent=Transcript:Gene:Y92H12A.4.1
+I Coding_transcript gene 1610391 1619944 . + . ID=Gene:Gene:Y73E7A.9
+I Coding_transcript mRNA 1610391 1619944 . + . ID=Transcript:Gene:Y73E7A.9.1;Parent=Gene:Gene:Y73E7A.9
+I Coding_transcript CDS 1610391 1610504 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1
+I Coding_transcript CDS 1610564 1610901 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1
+I Coding_transcript CDS 1611487 1611595 . + 1 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1
+I Coding_transcript CDS 1611747 1611828 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1
+I Coding_transcript CDS 1613478 1613896 . + 2 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1
+I Coding_transcript CDS 1616100 1616261 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1
+I Coding_transcript CDS 1617834 1617969 . + 0 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1
+I Coding_transcript CDS 1619628 1619944 . + 2 ID=CDS:Gene:Y73E7A.9.1;Parent=Transcript:Gene:Y73E7A.9.1
+I Coding_transcript gene 1629004 1633494 . + . ID=Gene:Gene:Y73E7A.1a.1
+I Coding_transcript mRNA 1629004 1633494 . + . ID=Transcript:Gene:Y73E7A.1a.1.1;Parent=Gene:Gene:Y73E7A.1a.1
+I Coding_transcript five_prime_UTR 1629004 1629022 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1
+I Coding_transcript CDS 1629023 1629118 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1
+I Coding_transcript CDS 1629164 1629226 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1
+I Coding_transcript CDS 1629283 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1
+I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1
+I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1
+I Coding_transcript three_prime_UTR 1633366 1633494 . + . ID=three_prime_UTR:Gene:Y73E7A.1a.1.1;Parent=Transcript:Gene:Y73E7A.1a.1.1
+I Coding_transcript mRNA 1629006 1633365 . + . ID=Transcript:Gene:Y73E7A.1a.1.2;Parent=Gene:Gene:Y73E7A.1a.1
+I Coding_transcript five_prime_UTR 1629006 1629022 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2
+I Coding_transcript CDS 1629023 1629118 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2
+I Coding_transcript CDS 1629164 1629226 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2
+I Coding_transcript CDS 1629283 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2
+I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2
+I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.2;Parent=Transcript:Gene:Y73E7A.1a.1.2
+I Coding_transcript mRNA 1629010 1633365 . + . ID=Transcript:Gene:Y73E7A.1a.1.3;Parent=Gene:Gene:Y73E7A.1a.1
+I Coding_transcript five_prime_UTR 1629010 1629118 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3
+I Coding_transcript five_prime_UTR 1629164 1629225 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3
+I Coding_transcript five_prime_UTR 1629281 1629354 . + . ID=five_prime_UTR:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3
+I Coding_transcript CDS 1629355 1629410 . + 0 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3
+I Coding_transcript CDS 1630554 1630794 . + 1 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3
+I Coding_transcript CDS 1633222 1633365 . + 0 ID=CDS:Gene:Y73E7A.1a.1.3;Parent=Transcript:Gene:Y73E7A.1a.1.3
+I Coding_transcript gene 1652917 1655337 . + . ID=Gene:Gene:Y71G12B.18
+I Coding_transcript mRNA 1652917 1655337 . + . ID=Transcript:Gene:Y71G12B.18.1;Parent=Gene:Gene:Y71G12B.18
+I Coding_transcript CDS 1652917 1653000 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1
+I Coding_transcript CDS 1653438 1653755 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1
+I Coding_transcript CDS 1653901 1653981 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1
+I Coding_transcript CDS 1654754 1654975 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1
+I Coding_transcript CDS 1655026 1655334 . + 0 ID=CDS:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1
+I Coding_transcript three_prime_UTR 1655335 1655337 . + . ID=three_prime_UTR:Gene:Y71G12B.18.1;Parent=Transcript:Gene:Y71G12B.18.1
+I Coding_transcript gene 1681814 1685064 . + . ID=Gene:Gene:Y71G12B.16
+I Coding_transcript mRNA 1681814 1685064 . + . ID=Transcript:Gene:Y71G12B.16.1;Parent=Gene:Gene:Y71G12B.16
+I Coding_transcript CDS 1681814 1681899 . + 0 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1
+I Coding_transcript CDS 1682831 1683432 . + 1 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1
+I Coding_transcript CDS 1683480 1683521 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1
+I Coding_transcript CDS 1684382 1684450 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1
+I Coding_transcript CDS 1684829 1685064 . + 2 ID=CDS:Gene:Y71G12B.16.1;Parent=Transcript:Gene:Y71G12B.16.1
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions.info
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions.info Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,5 @@
+The files contain example data from 50 genes of the C. elegans genome on chromosome I.
+
+These files are:
+* the genome annotation in GFF3 format
+* two small set of aligned reads in SAM format (generated with PalMapper) from SRX001872 (http://www.ncbi.nlm.nih.gov/sra/SRX001872?report=full) and SRX001875 (http://www.ncbi.nlm.nih.gov/sra/SRX001875?report=full)
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions_deseq.txt
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/test_data/deseq_c_elegans_WS200-I-regions_deseq.txt Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,51 @@
+deseq_c_elegans_WS200.I.regions.SRX001872_vs._deseq_c_elegans_WS200.I.regions.SRX001875
+Gene:Gene:Y48G1BL.1 0.298162984580380
+Gene:Gene:F53G12.8 0.0100203770664889
+Gene:Gene:F56A6.1b 0.196356415924958
+Gene:Gene:F56C11.6b 0.131729472518535
+Gene:Gene:Y95B8A.7 0.82068108848155
+Gene:Gene:Y34D9A.2 0.0853090936357318
+Gene:Gene:R06A10.2.1 0.0729242320545049
+Gene:Gene:W04C9.2 0.566684334319464
+Gene:Gene:Y48G1C.2.1 0.600568879247855
+Gene:Gene:Y71G12B.18 0.436992089489852
+Gene:Gene:Y48G1C.9.1 0.653680190916246
+Gene:Gene:Y48G1C.12 0.962705668820165
+Gene:Gene:Y92H12A.4 0.71745387890079
+Gene:Gene:Y34D9A.6 0.554353505200952
+Gene:Gene:Y74C9A.2.2 0.193317121924372
+Gene:Gene:Y73E7A.1a.1 1
+Gene:Gene:Y48G1C.11 0.343541645863403
+Gene:Gene:ZK993.1 0.160531710602269
+Gene:Gene:Y73E7A.9 0.505459732270159
+Gene:Gene:Y34D9A.11 0.0963318383936614
+Gene:Gene:Y92H12BR.7 0.640348677865408
+Gene:Gene:Y92H12BR.3b 0.91026935144141
+Gene:Gene:Y65B4BR.8 0.838122463966569
+Gene:Gene:Y71F9AM.5b 0.971282320939981
+Gene:Gene:R119.7 0.0391196294523181
+Gene:Gene:F56C11.5b 0.457982589114528
+Gene:Gene:Y92H12BR.6 0.487521663730281
+Gene:Gene:Y95B8A.6a.2 0.927779341088235
+Gene:Gene:F53G12.7 0.196705810975026
+Gene:Gene:Y48G1A.2 0.453586101344411
+Gene:Gene:C53D5.1c.1 0.391738974840997
+Gene:Gene:Y34D9A.3 0.71006993005907
+Gene:Gene:Y18H1A.6 0.329792105744889
+Gene:Gene:T06A4.1b 0.295665526024382
+Gene:Gene:F53G12.3 0.631494248315874
+Gene:Gene:R119.6 0.651145530807306
+Gene:Gene:Y95B8A.5 0.512001544838653
+Gene:Gene:Y65B4BR.4b 0.904179315054261
+Gene:Gene:K12C11.2.1 0.364903241182862
+Gene:Gene:F53G12.5b 0.537007770889288
+Gene:Gene:Y95B8A.8 0.59137956260358
+Gene:Gene:T06A4.3a 0.171238758638891
+Gene:Gene:F53G12.4 0.949563214223464
+Gene:Gene:C54G6.2 0.0780422099961616
+Gene:Gene:W04C9.1 0.223549535364483
+Gene:Gene:K12C11.6 0.301231157430176
+Gene:Gene:F47G6.1 0.196694866924542
+Gene:Gene:Y71F9AM.4a 0.728480588565213
+Gene:Gene:Y71G12B.16 0.900935024006703
+Gene:Gene:Y65B4BR.1 0.475324346757848
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/test_data/genes.mat
Binary file deseq-hts_1.0/test_data/genes.mat has changed
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/tools/ParseGFF.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/tools/ParseGFF.py Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,362 @@
+#!/usr/bin/env python
+"""
+Extract genome annotation from a GFF3 (a tab delimited format
+for storing sequence features and annotations:
+http://www.sequenceontology.org/gff3.shtml) file.
+
+Usage: ParseGFF.py in.gff3 out.mat
+
+Requirements:
+ Scipy :- http://scipy.org/
+ Numpy :- http://numpy.org/
+
+Copyright (C) 2010-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany
+"""
+import re, sys, os
+import scipy.io as sio
+import numpy as np
+
+def createExon(strand_p, five_p_utr, cds_cod, three_p_utr):
+ """Create exon cordinates from UTR's and CDS region
+ """
+ exon_pos = []
+ if strand_p == '+':
+ utr5_start, utr5_end = 0, 0
+ if five_p_utr != []:
+ utr5_start, utr5_end = five_p_utr[-1][0], five_p_utr[-1][1]
+ cds_5start, cds_5end = cds_cod[0][0], cds_cod[0][1]
+ jun_exon = []
+ if cds_5start-utr5_end == 0 or cds_5start-utr5_end == 1:
+ jun_exon = [utr5_start, cds_5end]
+ if len(cds_cod) == 1:
+ five_prime_flag = 0
+ if jun_exon != []:
+ five_p_utr = five_p_utr[:-1]
+ five_prime_flag = 1
+ for utr5 in five_p_utr:
+ exon_pos.append(utr5)
+ jun_exon = []
+ utr3_start, utr3_end = 0, 0
+ if three_p_utr != []:
+ utr3_start = three_p_utr[0][0]
+ utr3_end = three_p_utr[0][1]
+ if utr3_start-cds_5end == 0 or utr3_start-cds_5end == 1:
+ jun_exon = [cds_5start, utr3_end]
+ three_prime_flag = 0
+ if jun_exon != []:
+ cds_cod = cds_cod[:-1]
+ three_p_utr = three_p_utr[1:]
+ three_prime_flag = 1
+ if five_prime_flag == 1 and three_prime_flag == 1:
+ exon_pos.append([utr5_start, utr3_end])
+ if five_prime_flag == 1 and three_prime_flag == 0:
+ exon_pos.append([utr5_start, cds_5end])
+ cds_cod = cds_cod[:-1]
+ if five_prime_flag == 0 and three_prime_flag == 1:
+ exon_pos.append([cds_5start, utr3_end])
+ for cds in cds_cod:
+ exon_pos.append(cds)
+ for utr3 in three_p_utr:
+ exon_pos.append(utr3)
+ else:
+ if jun_exon != []:
+ five_p_utr = five_p_utr[:-1]
+ cds_cod = cds_cod[1:]
+ for utr5 in five_p_utr:
+ exon_pos.append(utr5)
+ exon_pos.append(jun_exon) if jun_exon != [] else ''
+ jun_exon = []
+ utr3_start, utr3_end = 0, 0
+ if three_p_utr != []:
+ utr3_start = three_p_utr[0][0]
+ utr3_end = three_p_utr[0][1]
+ cds_3start = cds_cod[-1][0]
+ cds_3end = cds_cod[-1][1]
+ if utr3_start-cds_3end == 0 or utr3_start-cds_3end == 1:
+ jun_exon = [cds_3start, utr3_end]
+ if jun_exon != []:
+ cds_cod = cds_cod[:-1]
+ three_p_utr = three_p_utr[1:]
+ for cds in cds_cod:
+ exon_pos.append(cds)
+ exon_pos.append(jun_exon) if jun_exon != [] else ''
+ for utr3 in three_p_utr:
+ exon_pos.append(utr3)
+ elif strand_p == '-':
+ utr3_start, utr3_end = 0, 0
+ if three_p_utr != []:
+ utr3_start = three_p_utr[-1][0]
+ utr3_end = three_p_utr[-1][1]
+ cds_3start = cds_cod[0][0]
+ cds_3end = cds_cod[0][1]
+ jun_exon = []
+ if cds_3start-utr3_end == 0 or cds_3start-utr3_end == 1:
+ jun_exon = [utr3_start, cds_3end]
+ if len(cds_cod) == 1:
+ three_prime_flag = 0
+ if jun_exon != []:
+ three_p_utr = three_p_utr[:-1]
+ three_prime_flag = 1
+ for utr3 in three_p_utr:
+ exon_pos.append(utr3)
+ jun_exon = []
+ (utr5_start, utr5_end) = (0, 0)
+ if five_p_utr != []:
+ utr5_start = five_p_utr[0][0]
+ utr5_end = five_p_utr[0][1]
+ if utr5_start-cds_3end == 0 or utr5_start-cds_3end == 1:
+ jun_exon = [cds_3start, utr5_end]
+ five_prime_flag = 0
+ if jun_exon != []:
+ cds_cod = cds_cod[:-1]
+ five_p_utr = five_p_utr[1:]
+ five_prime_flag = 1
+ if three_prime_flag == 1 and five_prime_flag == 1:
+ exon_pos.append([utr3_start, utr5_end])
+ if three_prime_flag == 1 and five_prime_flag == 0:
+ exon_pos.append([utr3_start, cds_3end])
+ cds_cod = cds_cod[:-1]
+ if three_prime_flag == 0 and five_prime_flag == 1:
+ exon_pos.append([cds_3start, utr5_end])
+ for cds in cds_cod:
+ exon_pos.append(cds)
+ for utr5 in five_p_utr:
+ exon_pos.append(utr5)
+ else:
+ if jun_exon != []:
+ three_p_utr = three_p_utr[:-1]
+ cds_cod = cds_cod[1:]
+ for utr3 in three_p_utr:
+ exon_pos.append(utr3)
+ if jun_exon != []:
+ exon_pos.append(jun_exon)
+ jun_exon = []
+ (utr5_start, utr5_end) = (0, 0)
+ if five_p_utr != []:
+ utr5_start = five_p_utr[0][0]
+ utr5_end = five_p_utr[0][1]
+ cds_5start = cds_cod[-1][0]
+ cds_5end = cds_cod[-1][1]
+ if utr5_start-cds_5end == 0 or utr5_start-cds_5end == 1:
+ jun_exon = [cds_5start, utr5_end]
+ if jun_exon != []:
+ cds_cod = cds_cod[:-1]
+ five_p_utr = five_p_utr[1:]
+ for cds in cds_cod:
+ exon_pos.append(cds)
+ if jun_exon != []:
+ exon_pos.append(jun_exon)
+ for utr5 in five_p_utr:
+ exon_pos.append(utr5)
+ return exon_pos
+
+def init_gene():
+ """Initializing the gene structure
+ """
+ gene_details=dict(chr='',
+ exons=[],
+ gene_info={},
+ id='',
+ is_alt_spliced=0,
+ name='',
+ source='',
+ start='',
+ stop='',
+ strand='',
+ transcripts=[])
+ return gene_details
+
+def FeatureValueFormat(singlegene):
+ """Make feature value compactable to write in a .mat format
+ """
+ comp_exon = np.zeros((len(singlegene['exons']),), dtype=np.object)
+ for i in range(len(singlegene['exons'])):
+ comp_exon[i]= np.array(singlegene['exons'][i])
+ singlegene['exons'] = comp_exon
+ comp_transcripts = np.zeros((len(singlegene['transcripts']),), dtype=np.object)
+ for i in range(len(singlegene['transcripts'])):
+ comp_transcripts[i] = np.array(singlegene['transcripts'][i])
+ singlegene['transcripts'] = comp_transcripts
+ return singlegene
+
+def CreateGeneModels(genes_cmpt, transcripts_cmpt, exons_cmpt, utr3_cmpt, utr5_cmpt, cds_cmpt):
+ """Creating Coding/Non-coding gene models from parsed GFF objects.
+ """
+ gene_counter, gene_models=1, []
+ for gene_entry in genes_cmpt: ## Figure out the genes and transcripts associated feature
+ if gene_entry in transcripts_cmpt:
+ gene=init_gene()
+ gene['id']=gene_counter
+ gene['name']=gene_entry[1]
+ gene['chr']=genes_cmpt[gene_entry]['chr']
+ gene['source']=genes_cmpt[gene_entry]['source']
+ gene['start']=genes_cmpt[gene_entry]['start']
+ gene['stop']=genes_cmpt[gene_entry]['stop']
+ gene['strand']=genes_cmpt[gene_entry]['strand']
+ if not gene['strand'] in ['+', '-']:
+ gene['strand']='.' # Strand info not known replaced with a dot symbol instead of None, ?, . etc.
+ gene['gene_info']=dict(ID=gene_entry[1])
+ if len(transcripts_cmpt[gene_entry])>1:
+ gene['is_alt_spliced'] = 1
+ for tids in transcripts_cmpt[gene_entry]: ## transcript section related tags
+ gene['transcripts'].append(tids['ID'])
+ if len(exons_cmpt) != 0:
+ if (gene['chr'], tids['ID']) in exons_cmpt:
+ exon_cod=[[feat_exon['start'], feat_exon['stop']] for feat_exon in exons_cmpt[(gene['chr'], tids['ID'])]]
+ else: ## build exon coordinates from UTR3, UTR5 and CDS
+ utr5_pos, cds_pos, utr3_pos = [], [], []
+ if (gene['chr'], tids['ID']) in utr5_cmpt:
+ utr5_pos=[[feat_utr5['start'], feat_utr5['stop']] for feat_utr5 in utr5_cmpt[(gene['chr'], tids['ID'])]]
+ if (gene['chr'], tids['ID']) in cds_cmpt:
+ cds_pos=[[feat_cds['start'], feat_cds['stop']] for feat_cds in cds_cmpt[(gene['chr'], tids['ID'])]]
+ if (gene['chr'], tids['ID']) in utr3_cmpt:
+ utr3_pos=[[feat_utr3['start'], feat_utr3['stop']] for feat_utr3 in utr3_cmpt[(gene['chr'], tids['ID'])]]
+ exon_cod=createExon(gene['strand'], utr5_pos, cds_pos, utr3_pos)
+ if gene['strand']=='-':
+ if len(exon_cod) >1:
+ if exon_cod[0][0] > exon_cod[-1][0]:
+ exon_cod.reverse()
+ if exon_cod:
+ gene['exons'].append(exon_cod)
+ gene=FeatureValueFormat(gene) # get prepare for MAT writing
+ gene_counter+=1
+ gene_models.append(gene)
+ return gene_models
+
+def GFFParse(gff_file):
+ """Parsing GFF file based on feature relationship.
+ """
+ genes, utr5, exons=dict(), dict(), dict()
+ transcripts, utr3, cds=dict(), dict(), dict()
+ # TODO Include growing key words of different non-coding/coding transcripts
+ features=['mRNA', 'transcript', 'ncRNA', 'miRNA', 'pseudogenic_transcript', 'rRNA', 'snoRNA', 'snRNA', 'tRNA', 'scRNA']
+ gff_handle=open(gff_file, "rU")
+ for gff_line in gff_handle:
+ gff_line=gff_line.strip('\n\r').split('\t')
+ if re.match(r'#|>', gff_line[0]): # skip commented line and fasta identifier line
+ continue
+ if len(gff_line)==1: # skip fasta sequence/empty line if present
+ continue
+ assert len(gff_line)==9, '\t'.join(gff_line) # not found 9 tab-delimited fields in this line
+ if '' in gff_line: # skip this line if there any field with an empty value
+ print 'Skipping..', '\t'.join(gff_line)
+ continue
+ if gff_line[-1][-1]==';': # trim the last ';' character
+ gff_line[-1]=gff_line[-1].strip(';')
+ if gff_line[2] in ['gene', 'pseudogene']:
+ gid, gene_info=None, dict()
+ gene_info['start']=int(gff_line[3])
+ gene_info['stop']=int(gff_line[4])
+ gene_info['chr']=gff_line[0]
+ gene_info['source']=gff_line[1]
+ gene_info['strand']=gff_line[6]
+ for attb in gff_line[-1].split(';'):
+ attb=attb.split('=') # gff attributes are separated by key=value pair
+ if attb[0]=='ID':
+ gid=attb[1]
+ break
+ genes[(gff_line[0], gid)]=gene_info # store gene information based on the chromosome and gene symbol.
+ elif gff_line[2] in features:
+ gid, mrna_info=None, dict()
+ mrna_info['start']=int(gff_line[3])
+ mrna_info['stop']=int(gff_line[4])
+ mrna_info['chr']=gff_line[0]
+ mrna_info['strand']=gff_line[6]
+ for attb in gff_line[-1].split(';'):
+ attb=attb.split('=')
+ if attb[0]=='Parent':
+ gid=attb[1]
+ elif attb[0]=='ID':
+ mrna_info[attb[0]]=attb[1]
+ for fid in gid.split(','): # child may be mapped to multiple parents ex: Parent=AT01,AT01-1-Protein
+ if (gff_line[0], fid) in transcripts:
+ transcripts[(gff_line[0], fid)].append(mrna_info)
+ else:
+ transcripts[(gff_line[0], fid)]=[mrna_info]
+ elif gff_line[2] in ['exon']:
+ tids, exon_info=None, dict()
+ exon_info['start']=int(gff_line[3])
+ exon_info['stop']=int(gff_line[4])
+ exon_info['chr']=gff_line[0]
+ exon_info['strand']=gff_line[6]
+ for attb in gff_line[-1].split(';'):
+ attb=attb.split('=')
+ if attb[0]=='Parent':
+ tids=attb[1]
+ break
+ for tid in tids.split(','):
+ if (gff_line[0], tid) in exons:
+ exons[(gff_line[0], tid)].append(exon_info)
+ else:
+ exons[(gff_line[0], tid)]=[exon_info]
+ elif gff_line[2] in ['five_prime_UTR']:
+ utr5_info, tids=dict(), None
+ utr5_info['start']=int(gff_line[3])
+ utr5_info['stop']=int(gff_line[4])
+ utr5_info['chr']=gff_line[0]
+ utr5_info['strand']=gff_line[6]
+ for attb in gff_line[-1].split(';'):
+ attb=attb.split('=')
+ if attb[0]=='Parent':
+ tids=attb[1]
+ break
+ for tid in tids.split(','):
+ if (gff_line[0], tid) in utr5:
+ utr5[(gff_line[0], tid)].append(utr5_info)
+ else:
+ utr5[(gff_line[0], tid)]=[utr5_info]
+ elif gff_line[2] in ['CDS']:
+ cds_info, tids=dict(), None
+ cds_info['start']=int(gff_line[3])
+ cds_info['stop']=int(gff_line[4])
+ cds_info['chr']=gff_line[0]
+ cds_info['strand']=gff_line[6]
+ for attb in gff_line[-1].split(';'):
+ attb=attb.split('=')
+ if attb[0]=='Parent':
+ tids=attb[1]
+ break
+ for tid in tids.split(','):
+ if (gff_line[0], tid) in cds:
+ cds[(gff_line[0], tid)].append(cds_info)
+ else:
+ cds[(gff_line[0], tid)]=[cds_info]
+ elif gff_line[2] in ['three_prime_UTR']:
+ utr3_info, tids=dict(), None
+ utr3_info['start']=int(gff_line[3])
+ utr3_info['stop']=int(gff_line[4])
+ utr3_info['chr']=gff_line[0]
+ utr3_info['strand']=gff_line[6]
+ for attb in gff_line[-1].split(';'):
+ attb=attb.split('=')
+ if attb[0]=='Parent':
+ tids=attb[1]
+ break
+ for tid in tids.split(','):
+ if (gff_line[0], tid) in utr3:
+ utr3[(gff_line[0], tid)].append(utr3_info)
+ else:
+ utr3[(gff_line[0], tid)]=[utr3_info]
+ gff_handle.close()
+ return genes, transcripts, exons, utr3, utr5, cds
+
+def __main__():
+ """This function provides a best way to extract genome feature
+ information from a GFF3 file for the rQuant downstream processing.
+ """
+ try:
+ gff_file = sys.argv[1]
+ mat_file = sys.argv[2]
+ except:
+ print __doc__
+ sys.exit(-1)
+ genes, transcripts, exons, utr3, utr5, cds=GFFParse(gff_file)
+ gene_models=CreateGeneModels(genes, transcripts, exons, utr3, utr5, cds)
+ # TODO Write to matlab/octave struct instead of cell arrays.
+ sio.savemat(mat_file,
+ mdict=dict(genes=gene_models),
+ format='5',
+ oned_as='row')
+
+if __name__=='__main__':
+ __main__()
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/tools/determine_engine.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/tools/determine_engine.m Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,10 @@
+function engine = determine_engine()
+
+lserve=license;
+if ~isequal(lserve, 'GNU General Public License'),
+ engine='matlab';
+else
+ engine='octave';
+end;
+
+return
diff -r 000000000000 -r 94a108763d9e deseq-hts_1.0/tools/separate.m
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq-hts_1.0/tools/separate.m Wed May 09 20:43:47 2012 -0400
@@ -0,0 +1,7 @@
+function f = separate(str, delim)
+
+f={};
+idx=[0 find(str==delim) length(str)+1];
+for i=1:length(idx)-1
+ f{i}=deblank(str(idx(i)+1:idx(i+1)-1));
+end;