| Next changeset 1:b4ff32b63fcd (2016-03-29) |
|
Commit message:
Uploaded |
|
added:
ezBAMQC/MANIFEST.in ezBAMQC/Makefile ezBAMQC/README.rst ezBAMQC/doc/CONTACTS ezBAMQC/doc/COPYING ezBAMQC/doc/INSTALL ezBAMQC/doc/THANKS ezBAMQC/doc/bamqc-icon.png ezBAMQC/ezBAMQC ezBAMQC/setup.py ezBAMQC/src/ezBAMQC/Constants.h ezBAMQC/src/ezBAMQC/Coverage_prof.cpp ezBAMQC/src/ezBAMQC/Coverage_prof.h ezBAMQC/src/ezBAMQC/GeneFeatures.cpp ezBAMQC/src/ezBAMQC/GeneFeatures.h ezBAMQC/src/ezBAMQC/InnerDist_prof.cpp ezBAMQC/src/ezBAMQC/InnerDist_prof.h ezBAMQC/src/ezBAMQC/IntervalTree.cpp ezBAMQC/src/ezBAMQC/IntervalTree.h ezBAMQC/src/ezBAMQC/Mappability.cpp ezBAMQC/src/ezBAMQC/Mappability.h ezBAMQC/src/ezBAMQC/ReadDup_prof.cpp ezBAMQC/src/ezBAMQC/ReadDup_prof.h ezBAMQC/src/ezBAMQC/Results.cpp ezBAMQC/src/ezBAMQC/Results.h ezBAMQC/src/ezBAMQC/parseBAM.cpp ezBAMQC/src/ezBAMQC/parseBAM.h ezBAMQC/src/ezBAMQC/rRNA.cpp ezBAMQC/src/ezBAMQC/rRNA.h ezBAMQC/src/ezBAMQC/sam.h ezBAMQC/src/htslib/INSTALL ezBAMQC/src/htslib/LICENSE ezBAMQC/src/htslib/Makefile ezBAMQC/src/htslib/NEWS ezBAMQC/src/htslib/README ezBAMQC/src/htslib/bgzf.c ezBAMQC/src/htslib/bgzip.c ezBAMQC/src/htslib/config.h ezBAMQC/src/htslib/config.mk ezBAMQC/src/htslib/config.mk.in ezBAMQC/src/htslib/configure ezBAMQC/src/htslib/configure.ac ezBAMQC/src/htslib/cram/cram.h ezBAMQC/src/htslib/cram/cram_codecs.c ezBAMQC/src/htslib/cram/cram_codecs.h ezBAMQC/src/htslib/cram/cram_decode.c ezBAMQC/src/htslib/cram/cram_decode.h ezBAMQC/src/htslib/cram/cram_encode.c ezBAMQC/src/htslib/cram/cram_encode.h ezBAMQC/src/htslib/cram/cram_index.c ezBAMQC/src/htslib/cram/cram_index.h ezBAMQC/src/htslib/cram/cram_io.c ezBAMQC/src/htslib/cram/cram_io.h ezBAMQC/src/htslib/cram/cram_samtools.c ezBAMQC/src/htslib/cram/cram_samtools.h ezBAMQC/src/htslib/cram/cram_stats.c ezBAMQC/src/htslib/cram/cram_stats.h ezBAMQC/src/htslib/cram/cram_structs.h ezBAMQC/src/htslib/cram/files.c ezBAMQC/src/htslib/cram/mFILE.c ezBAMQC/src/htslib/cram/mFILE.h ezBAMQC/src/htslib/cram/md5.c ezBAMQC/src/htslib/cram/md5.h ezBAMQC/src/htslib/cram/misc.h ezBAMQC/src/htslib/cram/open_trace_file.c ezBAMQC/src/htslib/cram/open_trace_file.h ezBAMQC/src/htslib/cram/os.h ezBAMQC/src/htslib/cram/pooled_alloc.c ezBAMQC/src/htslib/cram/pooled_alloc.h ezBAMQC/src/htslib/cram/rANS_byte.h ezBAMQC/src/htslib/cram/rANS_static.c ezBAMQC/src/htslib/cram/rANS_static.h ezBAMQC/src/htslib/cram/sam_header.c ezBAMQC/src/htslib/cram/sam_header.h ezBAMQC/src/htslib/cram/string_alloc.c ezBAMQC/src/htslib/cram/string_alloc.h ezBAMQC/src/htslib/cram/thread_pool.c ezBAMQC/src/htslib/cram/thread_pool.h ezBAMQC/src/htslib/cram/vlen.c ezBAMQC/src/htslib/cram/vlen.h ezBAMQC/src/htslib/cram/zfio.c ezBAMQC/src/htslib/cram/zfio.h ezBAMQC/src/htslib/faidx.5 ezBAMQC/src/htslib/faidx.c ezBAMQC/src/htslib/hfile.c ezBAMQC/src/htslib/hfile_internal.h ezBAMQC/src/htslib/hfile_irods.c ezBAMQC/src/htslib/hfile_net.c ezBAMQC/src/htslib/hts.c ezBAMQC/src/htslib/htsfile.1 ezBAMQC/src/htslib/htsfile.c ezBAMQC/src/htslib/htslib.mk ezBAMQC/src/htslib/htslib.pc.in ezBAMQC/src/htslib/htslib/bgzf.h ezBAMQC/src/htslib/htslib/faidx.h ezBAMQC/src/htslib/htslib/hfile.h ezBAMQC/src/htslib/htslib/hts.h ezBAMQC/src/htslib/htslib/hts_defs.h ezBAMQC/src/htslib/htslib/kfunc.h ezBAMQC/src/htslib/htslib/khash.h ezBAMQC/src/htslib/htslib/khash_str2int.h ezBAMQC/src/htslib/htslib/klist.h ezBAMQC/src/htslib/htslib/knetfile.h ezBAMQC/src/htslib/htslib/kseq.h ezBAMQC/src/htslib/htslib/ksort.h ezBAMQC/src/htslib/htslib/kstring.h ezBAMQC/src/htslib/htslib/regidx.h ezBAMQC/src/htslib/htslib/sam.h ezBAMQC/src/htslib/htslib/synced_bcf_reader.h ezBAMQC/src/htslib/htslib/tbx.h ezBAMQC/src/htslib/htslib/vcf.h ezBAMQC/src/htslib/htslib/vcf_sweep.h ezBAMQC/src/htslib/htslib/vcfutils.h ezBAMQC/src/htslib/htslib_vars.mk ezBAMQC/src/htslib/kfunc.c ezBAMQC/src/htslib/knetfile.c ezBAMQC/src/htslib/kstring.c ezBAMQC/src/htslib/regidx.c ezBAMQC/src/htslib/sam.5 ezBAMQC/src/htslib/sam.c ezBAMQC/src/htslib/synced_bcf_reader.c ezBAMQC/src/htslib/tabix.1 ezBAMQC/src/htslib/tabix.c ezBAMQC/src/htslib/tbx.c ezBAMQC/src/htslib/test/auxf#values.sam ezBAMQC/src/htslib/test/auxf.fa ezBAMQC/src/htslib/test/auxf.fa.fai ezBAMQC/src/htslib/test/c1#bounds.sam ezBAMQC/src/htslib/test/c1#clip.sam ezBAMQC/src/htslib/test/c1#pad1.sam ezBAMQC/src/htslib/test/c1#pad2.sam ezBAMQC/src/htslib/test/c1#pad3.sam ezBAMQC/src/htslib/test/c1.fa ezBAMQC/src/htslib/test/c1.fa.fai ezBAMQC/src/htslib/test/ce#1.sam ezBAMQC/src/htslib/test/ce#2.sam ezBAMQC/src/htslib/test/ce#5.sam ezBAMQC/src/htslib/test/ce#5b.sam ezBAMQC/src/htslib/test/ce#large_seq.sam ezBAMQC/src/htslib/test/ce#tag_depadded.sam ezBAMQC/src/htslib/test/ce#tag_padded.sam ezBAMQC/src/htslib/test/ce#unmap.sam ezBAMQC/src/htslib/test/ce#unmap1.sam ezBAMQC/src/htslib/test/ce#unmap2.sam ezBAMQC/src/htslib/test/ce.fa ezBAMQC/src/htslib/test/ce.fa.fai ezBAMQC/src/htslib/test/compare_sam.pl ezBAMQC/src/htslib/test/fieldarith.c ezBAMQC/src/htslib/test/fieldarith.sam ezBAMQC/src/htslib/test/hfile.c ezBAMQC/src/htslib/test/sam.c ezBAMQC/src/htslib/test/test-regidx.c ezBAMQC/src/htslib/test/test-vcf-api.c ezBAMQC/src/htslib/test/test-vcf-api.out ezBAMQC/src/htslib/test/test-vcf-sweep.c ezBAMQC/src/htslib/test/test-vcf-sweep.out ezBAMQC/src/htslib/test/test.pl ezBAMQC/src/htslib/test/test_view.c ezBAMQC/src/htslib/test/test_view.pl ezBAMQC/src/htslib/test/xx#blank.sam ezBAMQC/src/htslib/test/xx#large_aux.sam ezBAMQC/src/htslib/test/xx#large_aux2.sam ezBAMQC/src/htslib/test/xx#minimal.sam ezBAMQC/src/htslib/test/xx#pair.sam ezBAMQC/src/htslib/test/xx#rg.sam ezBAMQC/src/htslib/test/xx#triplet.sam ezBAMQC/src/htslib/test/xx#unsorted.sam ezBAMQC/src/htslib/test/xx.fa ezBAMQC/src/htslib/test/xx.fa.fai ezBAMQC/src/htslib/vcf.5 ezBAMQC/src/htslib/vcf.c ezBAMQC/src/htslib/vcf_sweep.c ezBAMQC/src/htslib/vcfutils.c ezBAMQC/src/htslib/version.h ezBAMQC/test-data/exp_data/hg19_rRNA.bed ezBAMQC/test-data/exp_data/hg19_refGene.gtf.tar.gz ezBAMQC/test-data/exp_data/treat1.bam ezBAMQC/test-data/exp_data/treat2.bam ezBAMQC/test-data/exp_data/treat3.bam ezBAMQC/test-data/output/data/smp0.ReadLen_plot.r ezBAMQC/test-data/output/data/smp0.TransCoverage.r ezBAMQC/test-data/output/data/smp0.clipping_profile.r ezBAMQC/test-data/output/data/smp0.clipping_profile.xls ezBAMQC/test-data/output/data/smp0.geneAbundance.txt ezBAMQC/test-data/output/data/smp0.geneBodyCoverage.txt ezBAMQC/test-data/output/data/smp0.geneBodyCoverage_plot.r ezBAMQC/test-data/output/data/smp0.mapq_profile.r ezBAMQC/test-data/output/data/smp0.mapq_profile.xls ezBAMQC/test-data/output/data/smp0.read_distr.r ezBAMQC/test-data/output/data/smp0.read_distr_pie.r ezBAMQC/test-data/output/data/smp0.readlen_profile.xls ezBAMQC/test-data/output/data/smp0.res.txt ezBAMQC/test-data/output/data/smp1.ReadLen_plot.r ezBAMQC/test-data/output/data/smp1.TransCoverage.r ezBAMQC/test-data/output/data/smp1.clipping_profile.r ezBAMQC/test-data/output/data/smp1.clipping_profile.xls ezBAMQC/test-data/output/data/smp1.geneAbundance.txt ezBAMQC/test-data/output/data/smp1.geneBodyCoverage.txt ezBAMQC/test-data/output/data/smp1.geneBodyCoverage_plot.r ezBAMQC/test-data/output/data/smp1.mapq_profile.r ezBAMQC/test-data/output/data/smp1.mapq_profile.xls ezBAMQC/test-data/output/data/smp1.read_distr.r ezBAMQC/test-data/output/data/smp1.read_distr_pie.r ezBAMQC/test-data/output/data/smp1.readlen_profile.xls ezBAMQC/test-data/output/data/smp1.res.txt ezBAMQC/test-data/output/data/smp2.ReadLen_plot.r ezBAMQC/test-data/output/data/smp2.TransCoverage.r ezBAMQC/test-data/output/data/smp2.clipping_profile.r ezBAMQC/test-data/output/data/smp2.clipping_profile.xls ezBAMQC/test-data/output/data/smp2.geneAbundance.txt ezBAMQC/test-data/output/data/smp2.geneBodyCoverage.txt ezBAMQC/test-data/output/data/smp2.geneBodyCoverage_plot.r ezBAMQC/test-data/output/data/smp2.mapq_profile.r ezBAMQC/test-data/output/data/smp2.mapq_profile.xls ezBAMQC/test-data/output/data/smp2.read_distr.r ezBAMQC/test-data/output/data/smp2.read_distr_pie.r ezBAMQC/test-data/output/data/smp2.readlen_profile.xls ezBAMQC/test-data/output/data/smp2.res.txt ezBAMQC/test-data/output/data/smp_correlation.r ezBAMQC/test-data/output/ezBAMQC_output.html ezBAMQC/test-data/output/figs/smp0.TransCoverage.png ezBAMQC/test-data/output/figs/smp0.clipping_profile.png ezBAMQC/test-data/output/figs/smp0.geneBodyCoverage.png ezBAMQC/test-data/output/figs/smp0.mapq_profile.png ezBAMQC/test-data/output/figs/smp0.read_distr.png ezBAMQC/test-data/output/figs/smp0.read_distr_pie.png ezBAMQC/test-data/output/figs/smp0.readlen_profile.png ezBAMQC/test-data/output/figs/smp1.TransCoverage.png ezBAMQC/test-data/output/figs/smp1.clipping_profile.png ezBAMQC/test-data/output/figs/smp1.geneBodyCoverage.png ezBAMQC/test-data/output/figs/smp1.mapq_profile.png ezBAMQC/test-data/output/figs/smp1.read_distr.png ezBAMQC/test-data/output/figs/smp1.read_distr_pie.png ezBAMQC/test-data/output/figs/smp1.readlen_profile.png ezBAMQC/test-data/output/figs/smp2.TransCoverage.png ezBAMQC/test-data/output/figs/smp2.clipping_profile.png ezBAMQC/test-data/output/figs/smp2.geneBodyCoverage.png ezBAMQC/test-data/output/figs/smp2.mapq_profile.png ezBAMQC/test-data/output/figs/smp2.read_distr.png ezBAMQC/test-data/output/figs/smp2.read_distr_pie.png ezBAMQC/test-data/output/figs/smp2.readlen_profile.png ezBAMQC/test-data/output/figs/smp_corr.png ezBAMQC/test-data/output/figs/smp_cov.png ezBAMQC/test-data/output/figs/smp_qual.png ezBAMQC/test-data/output/figs/smp_reproducibility.png ezBAMQC/test-data/output/figs/smp_var.png |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/MANIFEST.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/MANIFEST.in Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,20 @@ + +# BASIC INCLUDES +include setup.py +include ezBAMQC +include README.rst + +# DOC INCLUDES +include doc/* + +# SRC ezBAMQC INCLUDES +include src/ezBAMQC/* + +# SRC HTSLIB INCLUDES +include src/htslib/* + +#SRC HTSLIB HEADER INCLUDES +include src/htslib/htslib/* + +#SRC HTSLIB CRAM INCLUDES +include src/htslib/cram/* |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/Makefile Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,133 @@ +# Makefile for ezBAMQC, utilities for the Sequence Alignment/Map format. +# +# Version 0.6.5 +# +# Copyright (C) 2015 Bioinformatics Shared Resource, CSHL. +# Portions copyright (C) 2015 Cold Spring Harbor Laboratory. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +CC = g++ +CPPFLAGS = $(DFLAGS) $(INCLUDES) +CFLAGS = -g -fpermissive -Wall -O9 -O3 -std=c++11 -fPIC +LDFLAGS = -O9 -fpermissive +LDLIBS = +DFLAGS= -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_CURSES_LIB=1 +LOBJS= src/ezBAMQC/GeneFeatures.o src/ezBAMQC/rRNA.o src/ezBAMQC/IntervalTree.o src/ezBAMQC/InnerDist_prof.o \ + src/ezBAMQC/Results.o src/ezBAMQC/Mappability.o src/ezBAMQC/Coverage_prof.o src/ezBAMQC/parseBAM.o + +INCLUDES= -I$(HTSDIR) +LIBCURSES= -lcurses # -lXCurses + +prefix = /usr/local +exec_prefix = $(prefix) +bindir = $(exec_prefix)/bin +mandir = $(prefix)/share/man +man1dir = $(mandir)/man1 + +MKDIR_P = mkdir -p +#INSTALL = install -p +#INSTALL_PROGRAM = $(INSTALL) +#INSTALL_DATA = $(INSTALL) -m 644 +#INSTALL_DIR = $(MKDIR_P) -m 755 + + +PROGRAMS = libBAMqc.so + + +all: $(PROGRAMS) + + +# Adjust $(HTSDIR) to point to your top-level htslib directory +HTSDIR = ./src/htslib +include $(HTSDIR)/htslib.mk +HTSLIB = $(HTSDIR)/libhts.a +BGZIP = $(HTSDIR)/bgzip + + +PACKAGE_VERSION = 0.5 + + +.SUFFIXES: .cpp .o + +.cpp.o: + $(CC) $(CFLAGS) $(CPPFLAGS) -c -o $@ $< + + +#lib:libbam.a + +#libbam.a:$(LOBJS) +# $(AR) -csru $@ $(LOBJS) + +#libBAMqc.so: $(LOBJS) $(HTSLIB) +# $(CC) -pthread $(LDFLAGS) -o $@ $(AOBJS) $(HTSLIB) $(LDLIBS) $(LIBCURSES) -lm -lz + +libBAMqc.so: $(LOBJS) $(HTSLIB) +# $(CC) -shared -Wl,-soname, libBAMqc.so.$(PACKAGE_VERSION) -lpthread $(LDFLAGS) -o $@ $(LOBJS) $(HTSLIB) $(LDLIBS) -lz -lm + $(CC) -shared -lpthread $(LDFLAGS) -o $@ $(LOBJS) $(HTSLIB) $(LDLIBS) -lz -lm +# ln -sf $@ libBAMqc.so.$(PACKAGE_VERSION) + +Constants_h = src/ezBAMQC/Constants.h +IntervalTree_h = src/ezBAMQC/IntervalTree.h $(Constants_h) +GeneFeatures_h = src/ezBAMQC/GeneFeatures.h $(Constants_h) +rRNA_h = src/ezBAMQC/rRNA.h $(IntervalTree_h) $(GeneFeatures_h) +Results_h = src/ezBAMQC/Resualts.h +Mappability_h = src/ezBAMQC/Mappability.h $(Constants_h) +InnerDist_prof_h = src/ezBAMQC/InnerDist_prof.h $(GeneFeatures_h) +Coverage_prof_h = src/ezBAMQC/Coverage_prof.h $(GeneFeatures_h) +parseBAM_h = src/ezBAMQC/parseBAM.h + + +IntervalTree.o: src/ezBAMQC/IntervalTree.cpp $(IntervalTree_h) +GeneFeatures.o: src/ezBAMQC/GeneFeatures.cpp $(GeneFeatures_h) +rRNA.o: src/ezBAMQC/rRNA.cpp $(rRNA_h) +Results.o: src/ezBAMQC/Results.cpp $(Results_h) +Mappability.o: src/ezBAMQC/Mappability.cpp $(Mappability_h) $(htslib_sam_h) +InnerDist_prof.o: src/ezBAMQC/InnerDist_prof.cpp $(InnerDist_prof_h) $(htslib_sam_h) +Coverage_prof.o: src/ezBAMQC/Coverage_prof.cpp $(Coverage_prof_h) +parseBAM.o: src/ezBAMQC/parseBAM.cpp $(parseBAM_h) $(htslib_sam_h) $(GeneFeatures_h) $(rRNA_h) $(Mappability_h) $(Coverage_prof_h) $(InnerDist_prof_h) + + +#install: $(PROGRAMS) $(BUILT_MISC_PROGRAMS) +# $(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(man1dir) +# $(INSTALL_PROGRAM) $(PROGRAMS) $(MISC_PROGRAMS) $(DESTDIR)$(bindir) +# $(INSTALL_DATA) samtools.1 $(DESTDIR)$(man1dir) + + +mostlyclean: + -rm -f src/*.o + +clean: mostlyclean + -rm -f $(PROGRAMS) + +distclean: clean + -rm -f TAGS + +clean-all: clean + + +tags: + ctags -f TAGS *.[ch] misc/*.[ch] + + +force: + + +.PHONY: all clean clean-all distclean force +.PHONY: mostlyclean tags |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/README.rst Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,184 @@ +.. image:: https://raw.githubusercontent.com/mhammell-laboratory/bamqc/master/doc/bamqc-icon.png + :width: 200 px + :alt: generated at codeology.braintreepayments.com/mhammell-laboratory/bamqc + :align: right + :target: http://codeology.braintreepayments.com/mhammell-laboratory/bamqc + +===== +ezBAMQC +===== +*"ezBAMQC, a tool to check the quality of mapped next generation sequencing files."* + +:Description: + + ezBAMQC is a tool to check the quality of either one or many mapped next-generation-sequencing datasets. It conducts comprehensive evaluations of aligned sequencing data from multiple aspects including: clipping profile, mapping quality distribution, mapped read length distribution, genomic/transcriptomic mapping distribution, inner distance distribution (for paired-end reads), ribosomal RNA contamination, transcript 5’ and 3’ end bias, transcription dropout rate, sample correlations, sample reproducibility, sample variations. It outputs a set of tables and plots and one HTML page that contains a summary of the results. Many metrics are designed for RNA-seq data specifically, but ezBAMQC can be applied to any mapped sequencing dataset such as RNA-seq, CLIP-seq, GRO-seq, ChIP-seq, DNA-seq and so on. :: + +:Links: + + `Github Page <https://github.com/mhammell-laboratory/bamqc>`_ + + `Pypi Page <https://pypi.python.org/pypi/ezBAMQC>`_ + + `MHammell Lab <http://hammelllab.labsites.cshl.edu/software>`_ + +:Authors: + Ying Jin, David Molik, and Molly Hammell + +:Version: 0.6.5 + +:Contact: + Ying Jin (yjin@cshl.edu) + +Installation guide for ezBAMQC for from source installs +===================================================== + +When installing ezBAMQC there are several options, but the main point is: since ezBAMQC uses C++ STD 11 you'll need a version of GCC that can support that, this useally means 4.8 or 4.9. beyond that, you'll need Python, R and Corrplot for interfacing with the C code. + +:Intallation: + `Source Code <https://github.com/mhammell-laboratory/ezBAMQC/releases>`_ + + `Pypi <https://pypi.python.org/pypi?:action=display&name=ezBAMQC>`_ + +:Prerequisites: + * `python2.7 <https://www.python.org/download/releases/2.7/>`_ + * `R <https://www.r-project.org/>`_ + * `corrplot <https://cran.r-project.org/web/packages/corrplot/>`_ + * `GCC 4.8.1 or greater <https://gcc.gnu.org/gcc-4.8/>`_ GCC 4.9.1 or greater is recomended for PyPi install + +:Notes: + * While there are multiple methods of installing the prerequistes it may help to look at (if using a yum based linux distro):* + * `Devtoolset-3 <https://access.redhat.com/documentation/en-US/Red_Hat_Developer_Toolset/3/html/User_Guide/sect-Red_Hat_Developer_Toolset-Install.html>`_ for GCC compilers + * `IUS <https://ius.io/>`_ for Python2.7 + * `Software Collections <https://www.softwarecollections.org/>`_ for collections of software (like devtoolset 3 or python) + * `rpmfinder <https://www.rpmfind.net/>`_ for searching rpms across mutliple systems + +Setup +===== + +1) Make sure that the GCC comiler is in your PATH: + +:: + + export PATH=/path/to/gcc:$PATH + +2) Make sure that python2.7 is in your PYTHONPATH: + +:: + + export PYTHONPATH=/path/to/python2.7/site-packages:$PYTHONPATH + +3) There are three methods of installation of ezBAMQC, from source, from setup.py, and from pypi, once prequistes are setup. + +From Source +~~~~~~~~~~~ + +1) Download source + +2) Unpack tarball and go to the directory of the package: + +:: + + tar xvfz bamqc-0.6.6.tar.gz + + cd bamqc-0.6.6 + +3) Run make: + +:: + + make + +From Setup.py +~~~~~~~~~~~~~ + +:: + + python2.7 setup.py install + +From Pypi +~~~~~~~~~ + +:: + + pip2.7 install BAMqc + +Usage +===== + +:: + + ezBAMQC [-h] -i alignment_files [alignment_files ...] -r [refgene] + [-f [attrID]] [--rRNA [rRNA]] -o [dir] [--stranded [stranded]] + [-q [mapq]] [-l labels [labels ...]] [-t NUMTHREADS] + +optional arguments: + +:: + + -h, --help show this help message and exit. + -i, --inputFile alignment files. Could be multiple SAM/BAM files separated by space. Required. + -r, --refgene gene annotation file in GTF format. Required + -f the read summation at which feature level in the GTF file. DEFAULT: gene_id. + --rRNA rRNA coordinates in BED format. + -o, --outputDir output directory. Required. + --stranded strandness of the library? + yes : sense stranded + reverse : reverse stranded + no : not stranded + DEFAULT: yes. + -q, --mapq Minimum mapping quality (phred scaled) for an alignment to be called uniquely mapped. DEFAULT:30 + -l, --label Labels of input files. DEFAULT:smp1 smp2 ... + -t, --threads Number of threads to use. DEFAULT:1 + +Example: + +:: + + ezBAMQC -i test-data/exp_data/treat1.bam test-data/exp_data/treat2.bam test-data/exp_data/treat3.bam -r test-data/exp_data/hg9_refGene.gtf -q 30 --rRNA test-data/exp_data/hg19_rRNA.bed -o exp_output2 + + Please find the example output from folder test-data. + +FAQ +==== +Q: Why use ezBAMQC? + +A: ezBAMQC is efficient and easy to use. With one command line, it reports a comprehensive evaluation of the data with a set of plots and tables.The ability to assess multiple samples together with high efficiency make it especially useful in cases where there are a large number of samples from the same condition, genotype, or treatment. ezBAMQC was written in C++ and supports multithreading. A mouse RNA-seq sample with 120M alignments can be done in 8 minutes with 5 threads. + +Q: Why the total number of reads reported by ezBAMQC does not match with samtools flagstat? + +A: The difference is because of non-uniquely mapped reads or multiply aligned reads (multi-reads). Samtools flagstat counts each multiple aligment as a different reads, but ezBAMQC counts reads accoriding to the read ID, i.e., each individual read will be counted once no matter that it is a uniquely mapped read or multi-read. + +Q: What is "Low Quality Reads" ? + +A: Reads marked as qc fail accoriding to SAM format or reads with mapping quality lower than the value set by the option -q will be considered as "Low Quality Reads". + +Q: How the setting of option -q alter the results? + +A: Reads with low quality, i.e., did not pass -q cutoff, are only counted in Total Reads, Mapped Reads, and Mappability by mapping quality plot. The rest of the report does not include low quality reads. + +Q: Do multi-reads (non-uniquely mapped reads) have been considered in Read distribution and gene quantification? + +A: No. Only uniquely mapped reads were counted. + + +Acknowledgements +================ + +#) Samtools contributors +#) Users' valuable feedback + +Copying & Distribution +====================== + +ezBAMQC is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but *WITHOUT ANY WARRANTY*; without even the implied warranty of +*MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE*. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with ezBAMQC. If not, see `this website <http://www.gnu.org/licenses/>`_ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/doc/CONTACTS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/doc/CONTACTS Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,1 @@ +Ying Jin: yjin@cshl.edu |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/doc/COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/doc/COPYING Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,619 @@\n+ GNU GENERAL PUBLIC LICENSE\n+ Version 3, 29 June 2007\n+\n+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>\n+ Everyone is permitted to copy and distribute verbatim copies\n+ of this license document, but changing it is not allowed.\n+\n+ Preamble\n+\n+ The GNU General Public License is a free, copyleft license for\n+software and other kinds of works.\n+\n+ The licenses for most software and other practical works are designed\n+to take away your freedom to share and change the works. By contrast,\n+the GNU General Public License is intended to guarantee your freedom to\n+share and change all versions of a program--to make sure it remains free\n+software for all its users. We, the Free Software Foundation, use the\n+GNU General Public License for most of our software; it applies also to\n+any other work released this way by its authors. You can apply it to\n+your programs, too.\n+\n+ When we speak of free software, we are referring to freedom, not\n+price. Our General Public Licenses are designed to make sure that you\n+have the freedom to distribute copies of free software (and charge for\n+them if you wish), that you receive source code or can get it if you\n+want it, that you can change the software or use pieces of it in new\n+free programs, and that you know you can do these things.\n+\n+ To protect your rights, we need to prevent others from denying you\n+these rights or asking you to surrender the rights. Therefore, you have\n+certain responsibilities if you distribute copies of the software, or if\n+you modify it: responsibilities to respect the freedom of others.\n+\n+ For example, if you distribute copies of such a program, whether\n+gratis or for a fee, you must pass on to the recipients the same\n+freedoms that you received. You must make sure that they, too, receive\n+or can get the source code. And you must show them these terms so they\n+know their rights.\n+\n+ Developers that use the GNU GPL protect your rights with two steps:\n+(1) assert copyright on the software, and (2) offer you this License\n+giving you legal permission to copy, distribute and/or modify it.\n+\n+ For the developers\' and authors\' protection, the GPL clearly explains\n+that there is no warranty for this free software. For both users\' and\n+authors\' sake, the GPL requires that modified versions be marked as\n+changed, so that their problems will not be attributed erroneously to\n+authors of previous versions.\n+\n+ Some devices are designed to deny users access to install or run\n+modified versions of the software inside them, although the manufacturer\n+can do so. This is fundamentally incompatible with the aim of\n+protecting users\' freedom to change the software. The systematic\n+pattern of such abuse occurs in the area of products for individuals to\n+use, which is precisely where it is most unacceptable. Therefore, we\n+have designed this version of the GPL to prohibit the practice for those\n+products. If such problems arise substantially in other domains, we\n+stand ready to extend this provision to those domains in future versions\n+of the GPL, as needed to protect the freedom of users.\n+\n+ Finally, every program is threatened constantly by software patents.\n+States should not allow patents to restrict development and use of\n+software on general-purpose computers, but in those that do, we wish to\n+avoid the special danger that patents applied to a free program could\n+make it effectively proprietary. To prevent this, the GPL assures that\n+patents cannot be used to render the program non-free.\n+\n+ The precise terms and conditions for copying, distribution and\n+modification follow.\n+\n+ TERMS AND CONDITIONS\n+\n+ 0. Definitions.\n+\n+ "This License" refers to version 3 of the GNU General Public License.\n+\n+ "Copyright" also means copyright-like laws that apply to other kinds of\n+works, such as semiconductor masks.\n+\n+ "The Program" refers to a'..b' If you cannot convey a\n+covered work so as to satisfy simultaneously your obligations under this\n+License and any other pertinent obligations, then as a consequence you may\n+not convey it at all. For example, if you agree to terms that obligate you\n+to collect a royalty for further conveying from those to whom you convey\n+the Program, the only way you could satisfy both those terms and this\n+License would be to refrain entirely from conveying the Program.\n+\n+ 13. Use with the GNU Affero General Public License.\n+\n+ Notwithstanding any other provision of this License, you have\n+permission to link or combine any covered work with a work licensed\n+under version 3 of the GNU Affero General Public License into a single\n+combined work, and to convey the resulting work. The terms of this\n+License will continue to apply to the part which is the covered work,\n+but the special requirements of the GNU Affero General Public License,\n+section 13, concerning interaction through a network will apply to the\n+combination as such.\n+\n+ 14. Revised Versions of this License.\n+\n+ The Free Software Foundation may publish revised and/or new versions of\n+the GNU General Public License from time to time. Such new versions will\n+be similar in spirit to the present version, but may differ in detail to\n+address new problems or concerns.\n+\n+ Each version is given a distinguishing version number. If the\n+Program specifies that a certain numbered version of the GNU General\n+Public License "or any later version" applies to it, you have the\n+option of following the terms and conditions either of that numbered\n+version or of any later version published by the Free Software\n+Foundation. If the Program does not specify a version number of the\n+GNU General Public License, you may choose any version ever published\n+by the Free Software Foundation.\n+\n+ If the Program specifies that a proxy can decide which future\n+versions of the GNU General Public License can be used, that proxy\'s\n+public statement of acceptance of a version permanently authorizes you\n+to choose that version for the Program.\n+\n+ Later license versions may give you additional or different\n+permissions. However, no additional obligations are imposed on any\n+author or copyright holder as a result of your choosing to follow a\n+later version.\n+\n+ 15. Disclaimer of Warranty.\n+\n+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY\n+APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT\n+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY\n+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,\n+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n+PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM\n+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\n+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n+\n+ 16. Limitation of Liability.\n+\n+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\n+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\n+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\n+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\n+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\n+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\n+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\n+SUCH DAMAGES.\n+\n+ 17. Interpretation of Sections 15 and 16.\n+\n+ If the disclaimer of warranty and limitation of liability provided\n+above cannot be given local legal effect according to their terms,\n+reviewing courts shall apply local law that most closely approximates\n+an absolute waiver of all civil liability in connection with the\n+Program, unless a warranty or assumption of liability accompanies a\n+copy of the Program in return for a fee.\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/doc/INSTALL --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/doc/INSTALL Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,74 @@ + +Installation guide for ezBAMQC for from source installs +===================================================== + +When installing ezBAMQC there are several options, but the main point is: since ezBAMQC uses C++ STD 11 you'll need a version of GCC that can support that, this useally means 4.8 or 4.9. beyond that, you'll need Python, R and Corrplot for interfacing with the C code. + +:Intallation: + `Source Code <https://github.com/mhammell-laboratory/bamqc/archive/0.6.4.tar.gz>`_ + + `Pypi <https://pypi.python.org/pypi?:action=display&name=BAMQC&version=0.6.4>`_ + +:Prerequisites: + * `python2.7 <https://www.python.org/download/releases/2.7/>`_ + * `R <https://www.r-project.org/>`_ + * `corrplot <https://cran.r-project.org/web/packages/corrplot/>`_ + * `GCC 4.8.1 or greater <https://gcc.gnu.org/gcc-4.8/>`_ GCC 4.9.1 or greater is recomended for PyPi install + +:Notes: + * While there are multiple methods of installing the prerequistes it may help to look at (if using a yum based linux distro):* + * `Devtoolset-3 <https://access.redhat.com/documentation/en-US/Red_Hat_Developer_Toolset/3/html/User_Guide/sect-Red_Hat_Developer_Toolset-Install.html>`_ for GCC compilers + * `IUS <https://ius.io/>`_ for Python2.7 + * `Software Collections <https://www.softwarecollections.org/>`_ for collections of software (like devtoolset 3 or python) + * `rpmfinder <https://www.rpmfind.net/>`_ for searching rpms across mutliple systems + +Setup +===== + +1) Make sure that the GCC comiler is in your PATH: + +:: + + export PATH=/path/to/gcc:$PATH + +2) Make sure that python2.7 is in your PYTHONPATH: + +:: + + export PYTHONPATH=/path/to/python2.7/site-packages:$PYTHONPATH + +3) There are three methods of installation of ezBAMQC, from source, from setup.py, and from pypi, once prequistes are setup. + +From Source +~~~~~~~~~~~ + +1) Download source + +2) Unpack tarball and go to the directory of the package: + +:: + + tar xvfz bamqc-0.6.4.tar.gz + + cd bamqc-0.6.4 + +3) Run make: + +:: + + make + +From Setup.py +~~~~~~~~~~~~~ + +:: + + python2.7 setup.py install + +From Pypi +~~~~~~~~~ + +:: + + pip2.7 install BAMqc + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/doc/THANKS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/doc/THANKS Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,3 @@ +Acknowledgements goes to: +1) Samtools contributors +4) Users' valuable feedback |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/doc/bamqc-icon.png |
| b |
| Binary file ezBAMQC/doc/bamqc-icon.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/ezBAMQC --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/ezBAMQC Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1832 @@\n+#!/usr/bin/env python2.7\n+\n+\'\'\'\n+\n+Created on July 24, 2015\n+\n+\n+\n+@author: Ying Jin\n+\n+@contact: yjin@cshl.edu\n+\n+@status: \n+\n+@version: 0.6.6\n+\n+\'\'\'\n+\n+import argparse, subprocess,traceback\n+\n+import sys, os, time, string, re\n+\n+import warnings, logging\n+\n+import collections\n+\n+import math, copy\n+\n+import sets\n+\n+from time import strftime\n+\n+from datetime import datetime\n+\n+import ctypes\n+\n+import multiprocessing,threading,Queue\n+\n+\n+\n+def locate(name, path):\n+\n+ for root, dirs, files in os.walk(path):\n+\n+ if name in files:\n+\n+ return os.path.join(root, name)\n+\n+\n+\n+def locBAMqc(loc):\n+\n+ for p in os.environ[loc].split(os.pathsep):\n+\n+ potential_file = locate(\'libBAMqc.so\',p)\n+\n+ if potential_file:\n+\n+ return potential_file\n+\n+\n+\n+in_path = locBAMqc(\'PATH\')\n+\n+in_pythonpath = locBAMqc(\'PYTHONPATH\')\n+\n+in_local = locate(\'libBAMqc.so\',\'./\')\n+\n+\n+\n+if in_local:\n+\n+ so=ctypes.CDLL(in_local)\n+\n+elif in_path:\n+\n+ so=ctypes.CDLL(in_path)\n+\n+elif in_pythonpath:\n+\n+ so=ctypes.CDLL(in_pythonpath)\n+\n+else:\n+\n+ print "can not find libBAMqc.so, you\'re not setup correctly, exiting\\n"\n+\n+ sys.exit()\n+\n+\n+\n+if sys.version_info[0] != 2 or sys.version_info[1] != 7:\n+\n+ print >>sys.stderr, "\\nYou are using python" + str(sys.version_info[0]) + \'.\' + str(sys.version_info[1]) + " ezBAMQC needs python2.7!\\n"\n+\n+ sys.exit()\n+\n+ \n+\n+\n+\n+class pyResults :\n+\n+ \n+\n+ def __init__(self):\n+\n+ self.filename = ""\n+\n+ self.is_pairEnd = False\n+\n+ self.clipping_plot_file = ""\n+\n+ self.mapq_plot_file = ""\n+\n+ self.mapq_file = ""\n+\n+ self.read_cov_plot_file = ""\n+\n+ self.trans_cov_plot_file = ""\n+\n+ self.insert_plot_file = ""\n+\n+ self.insert_file = ""\n+\n+ self.read_dist_plot_file1 = ""\n+\n+ self.read_dist_plot_file2 = ""\n+\n+ self.read_dup_plot_file = ""\n+\n+ self.readLen_plot_file = ""\n+\n+ self.geneCount_file = ""\n+\n+ \n+\n+ self.seqDeDup_percent = 0\n+\n+ self.posDeDup_percent = 0\n+\n+ \n+\n+ self.no_clipping = False\n+\n+ self.no_rRNA = False\n+\n+ \n+\n+ \n+\n+ self.total_reads = 0\n+\n+ self.uniq_mapped_reads = 0\n+\n+ self.multi_mapped_reads = 0\n+\n+ self.unmapped_reads = 0\n+\n+ self.low_qual = 0\n+\n+ self.low_qual_read1 = 0\n+\n+ self.low_qual_read2 = 0\n+\n+ self.pcr_dup = 0\n+\n+ \n+\n+ self.unmapped_read1 = 0\n+\n+ self.unmapped_read2 = 0\n+\n+ self.mapped_read1 = 0\n+\n+ self.mapped_read2 = 0 \n+\n+ self.forward_read = 0\n+\n+ self.reverse_read = 0\n+\n+ self.paired_reads = 0\n+\n+ \n+\n+ self.mapped_plus_minus = 0\n+\n+ self.mapped_plus_plus = 0\n+\n+ self.mapped_minus_plus = 0\n+\n+ self.mapped_minus_minus = 0\n+\n+ \n+\n+ self.ins_read = 0\n+\n+ self.del_read = 0\n+\n+ \n+\n+ self.noSplice = 0\n+\n+ self.splice = 0\n+\n+ self.paired_diff_chrom = 0\n+\n+ \n+\n+ self.rRNA_read = 0\n+\n+ self.intron_read = 0\n+\n+ self.cds_exon_read = 0\n+\n+ self.utr_5_read = 0\n+\n+ self.utr_3_read = 0\n+\n+ self.intergenic_up1kb_read = 0\n+\n+ self.intergenic_down1kb_read = 0\n+\n+ self.intergenic_read = 0\n+\n+\n+\n+\n+\n+def read_in_res(cur_data_dir,label):\n+\n+ res = pyResults()\n+\n+ fname = cur_data_dir+label+\'.res.txt\'\n+\n+ \n+\n+ try :\n+\n+ if os.path.exists(fname) :\n+\n+ f = open(fname,\'r\')\n+\n+ for line in f :\n+\n+ line = line.strip()\n+\n+ (name,value) = line.split(\'\\t\')\n+\n+ if name == "rRNA_read" :\n+\n+ res.rRNA_read = int(value)\n+\n+ if name == "low_qual_read1" :\n+\n+ res.low_qual_read1 = int(value)\n+\n+ if name == "low_qual_read2" :\n+\n+ res.low_qual_read2 = int(value)\n+\n+ '..b'ting insert size distribution. DEFAULT:-250\')\n+\n+ #parser.add_argument(\'-u\', \'--upperBound\', metavar = \'ub\', dest=\'ub\', nargs = \'?\', default=250, type=int,\n+\n+ # help = \'Upper bound for plotting insert size distribution. DEFAULT:250\')\n+\n+ #parser.add_argument(\'-s\', \'--stepSize\', metavar = \'stepsize\', dest=\'step_size\', nargs = \'?\', default=5, type=int,\n+\n+ # help = \'Step size for plotting insert size distribution. DEFAULT:5\')\n+\n+ parser.add_argument(\'-l\',\'--label\',metavar = \'labels\', dest = \'labels\', nargs = \'+\', \n+\n+ help = \'Labels of input files. DEFAULT:smp1 smp2 ...\')\n+\n+ #parser.add_argument(\'-p\', \'--processes\', dest=\'numProc\', default=1, type=int,help=\'Number of processes to use .DEFAULT:1\')\n+\n+ parser.add_argument(\'-t\', \'--threads\', dest=\'numThreads\', default=1, type=int,help=\'Number of threads to use .DEFAULT:1\')\n+\n+\n+\n+ return parser\n+\n+\n+\n+\n+\n+\n+\n+def read_opts(parser):\n+\n+ \'\'\' object parser contains parsed options \'\'\'\n+\n+ \n+\n+ args = parser.parse_args()\n+\n+ args.numProc = 1\n+\n+ # logging object\n+\n+ logging.basicConfig(level=20,\n+\n+ format=\'%(levelname)-5s @ %(asctime)s: %(message)s \',\n+\n+ datefmt=\'%a, %d %b %Y %H:%M:%S\',\n+\n+ stream=sys.stderr,\n+\n+ filemode="w"\n+\n+ )\n+\n+ \n+\n+ #treatment files\n+\n+ if args.labels is not None :\n+\n+ if len(args.labels) >0 and len(args.ifiles) != len(args.labels) :\n+\n+ logging.error("Number of labels does not match with the number of samples.\\n")\n+\n+ sys.exit(1)\n+\n+ \n+\n+ \n+\n+ if args.labels is None :\n+\n+ args.labels = []\n+\n+ \n+\n+ for i in range(len(args.ifiles)) :\n+\n+ if not os.path.isfile(args.ifiles[i]) :\n+\n+ logging.error("No such file: %s !\\n" % (args.ifiles[i]))\n+\n+ sys.exit(1)\n+\n+ if len(args.labels) < len(args.ifiles) : \n+\n+ args.labels.append("smp"+str(i))\n+\n+ \n+\n+\n+\n+# if args.trIdx is None :\n+\n+# logging.warning("Trancsriptome index file is not available.\\n")\n+\n+#\n+\n+# else :\n+\n+# if not os.path.isfile(args.trIdx) :\n+\n+# logging.error("No such file : %s !\\n" %(args.trIdx))\n+\n+# sys.exit(1)\n+\n+\n+\n+ if args.stranded not in [\'yes\', \'no\', \'reverse\'] :\n+\n+ logging.error("Does not support such stranded value: %s !\\n" % (args.stranded))\n+\n+ sys.exit(1)\n+\n+ \n+\n+ if args.mapq is None :\n+\n+ args.mapq = 30\n+\n+ \n+\n+ if args.rRNA_model is not None and args.rRNA_model != "":\n+\n+ if not os.path.isfile(args.rRNA_model) :\n+\n+ logging.error("No such file : %s \\n" %(args.rRNA_model))\n+\n+ sys.exit(1)\n+\n+\n+\n+ if args.rRNA_model is None :\n+\n+ args.rRNA_model = ""\n+\n+\n+\n+ if args.attrID is None or args.attrID == "":\n+\n+ logging.error("please specify the read summation at which feature level in the GTF file\\n")\n+\n+ sys.exit(1)\n+\n+\n+\n+ if args.ref_gene_model is None :\n+\n+ logging.error("reference gene model is required.\\n")\n+\n+ sys.exit(1)\n+\n+ else :\n+\n+ if not os.path.isfile(args.ref_gene_model) :\n+\n+ logging.error("No such file : %s !\\n" %(args.ref_gene_model))\n+\n+ sys.exit(1)\n+\n+ \n+\n+ # logging alias\n+\n+ args.critical = logging.critical\n+\n+ args.error = logging.error\n+\n+ args.warn = logging.warning\n+\n+ args.debug = logging.debug\n+\n+ args.info = logging.info \n+\n+ \n+\n+ return args \n+\n+\n+\n+\n+\n+if __name__ == \'__main__\':\n+\n+ try:\n+\n+ start_time = time.time()\n+\n+ main()\n+\n+ end_time = time.time()\n+\n+ sys.stderr.write("Elapsed time was " + str(round((end_time - start_time) / 60, 2)) + " minutes.\\n")\n+\n+ except KeyboardInterrupt:\n+\n+ sys.stderr.write("User interrupt !\\n")\n+\n+ sys.exit(0)\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/setup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/setup.py Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,226 @@\n+#!/usr/bin/env python2.7\n+# Setup for ezBAMQC, utilities for the Sequence Alignment/Map format.\n+#\n+# Copyright (C) 2015 Bioinformatics Shared Resource, CSHL.\n+# Portions copyright (C) 2015 Cold Spring Harbor Laboratory.\n+#\n+# Permission is hereby granted, free of charge, to any person obtaining a copy\n+# of this software and associated documentation files (the "Software"), to deal\n+# in the Software without restriction, including without limitation the rights\n+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+# copies of the Software, and to permit persons to whom the Software is\n+# furnished to do so, subject to the following conditions:\n+#\n+# The above copyright notice and this permission notice shall be included in\n+# all copies or substantial portions of the Software.\n+#\n+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+# DEALINGS IN THE SOFTWARE.\n+\n+import argparse\n+import sys, os, glob, fnmatch\n+\n+## Added 10 Jan 2008\n+from distutils.core import setup, Extension\n+import distutils.command.install_data\n+\n+## Code borrowed from wxPython\'s setup and config files\n+## Thanks to Robin Dunn for the suggestion.\n+## I am not 100% sure what\'s going on, but it works!\n+def opj(*args):\n+ path = os.path.join(*args)\n+ return os.path.normpath(path)\n+\n+## Added 10 Jan 2008\n+# Specializations of some distutils command classes\n+class wx_smart_install_data(distutils.command.install_data.install_data):\n+ """need to change self.install_dir to the actual library dir"""\n+ def run(self):\n+ install_cmd = self.get_finalized_command(\'install\')\n+ self.install_dir = getattr(install_cmd, \'install_lib\')\n+ return distutils.command.install_data.install_data.run(self)\n+\n+def find_data_files(srcdir, *wildcards, **kw):\n+ # get a list of all files under the srcdir matching wildcards,\n+ # returned in a format to be used for install_data\n+ def walk_helper(arg, dirname, files):\n+ if \'.svn\' in dirname:\n+ return\n+ names = []\n+ lst, wildcards = arg\n+ for wc in wildcards:\n+ wc_name = opj(dirname, wc)\n+ for f in files:\n+ filename = opj(dirname, f)\n+\n+ if fnmatch.fnmatch(filename, wc_name) and not os.path.isdir(filename):\n+ names.append(filename)\n+ if names:\n+ lst.append( (dirname, names ) )\n+\n+ file_list = []\n+ recursive = kw.get(\'recursive\', True)\n+ if recursive:\n+ os.path.walk(srcdir, walk_helper, (file_list, wildcards))\n+ else:\n+ walk_helper((file_list, wildcards),\n+ srcdir,\n+ [os.path.basename(f) for f in glob.glob(opj(srcdir, \'*\'))])\n+ return file_list\n+\n+## This is a list of files to install, and where:\n+## Make sure the MANIFEST.in file points to all the right \n+## directories too.\n+files = find_data_files(\'ezBAMQC/\', \'*.*\')\n+\n+from distutils.core import setup\n+\n+def readme():\n+\twith open(\'README.rst\') as f:\n+\t\treturn f.read()\n+\n+if sys.version_info[0] != 2 or sys.version_info[1] < 7:\n+\tprint >> sys.stderr, "ERROR: ezBAMQC requires Python 2.7"\n+\tsys.exit()\n+\n+BAMQC_HEADER = [\n+ \'src/bamqc/Constants.h\',\n+ \'src/bamqc/Coverage_prof.h\',\n+ \'src/bamqc/GeneFeatures.h\',\n+ \'src/bamqc/InnerDist_prof.h\',\n+ \'src/bamqc/IntervalTree.h\',\n+ \'src/bamqc/Mappability.h\',\n+ \'src/bamqc/parseBAM.h\',\n+ \'src/bamqc/ReadDup_prof.h\',\n+ \'src/bamqc/Results.h\',\n+ \'src/bamqc/rRNA.h\'\n+]\n+\n+BAMQC_SOURCE = [\n+ \'src/bamqc/Coverage_prof.cpp\',\n+ \'src/bamqc/GeneFeatures.cpp\',\n+ '..b'.cpp\',\n+ \'src/bamqc/IntervalTree.cpp\',\n+ \'src/bamqc/Mappability.cpp\',\n+ \'src/bamqc/parseBAM.cpp\',\n+ \'src/bamqc/ReadDup_prof.cpp\',\n+ \'src/bamqc/Results.cpp\',\n+ \'src/bamqc/rRNA.cpp\'\n+]\n+\n+###TODO HAVE TO SPLIT INTO TWO AND MAKE THE A FILE\n+HTSLIB_PUBLIC_HEADERS = [\n+\t\'src/htslib/bgzf.h\',\n+\t\'src/htslib/faidx.h\',\n+\t\'src/htslib/hfile.h\',\n+\t\'src/htslib/hts.h\',\n+\t\'src/htslib/hts_defs.h\',\n+\t\'src/htslib/khash.h\',\n+\t\'src/htslib/klist.h\',\n+\t\'src/htslib/knetfile.h\',\n+\t\'src/htslib/kseq.h\',\n+\t\'src/htslib/ksort.h\',\n+\t\'src/htslib/kstring.h\',\n+\t\'src/htslib/regidx.h\',\n+\t\'src/htslib/sam.h\',\n+\t\'src/htslib/synced_bcf_reader.h\',\n+\t\'src/htslib/tbx.h\',\n+\t\'src/htslib/vcf.h\',\n+\t\'src/htslib/vcf_sweep.h\',\n+\t\'src/htslib/vcfutils.h\'\n+]\n+\n+\n+HTSLIB = [\n+\t\'src/htslib/bgzf.c\',\n+\t\'src/htslib/faidx.c\',\n+\t\'src/htslib/hfile.c\',\n+\t\'src/htslib/hfile_net.c\',\n+\t\'src/htslib/hts.c\',\n+ \'src/htslib/kfunc.c\',\n+\t\'src/htslib/knetfile.c\',\n+\t\'src/htslib/kstring.c\',\n+\t\'src/htslib/regidx.c\',\n+\t\'src/htslib/sam.c\',\n+\t\'src/htslib/synced_bcf_reader.c\',\n+\t\'src/htslib/tbx.c\',\n+\t\'src/htslib/vcf.c\',\n+\t\'src/htslib/vcfutils.c\',\n+\t\'src/htslib/cram/cram_codecs.c\',\n+\t\'src/htslib/cram/cram_decode.c\',\n+\t\'src/htslib/cram/cram_encode.c\',\n+\t\'src/htslib/cram/cram_index.c\',\n+\t\'src/htslib/cram/cram_io.c\',\n+\t\'src/htslib/cram/cram_samtools.c\',\n+\t\'src/htslib/cram/cram_stats.c\',\n+\t\'src/htslib/cram/files.c\',\n+\t\'src/htslib/cram/mFILE.c\',\n+\t\'src/htslib/cram/md5.c\',\n+\t\'src/htslib/cram/open_trace_file.c\',\n+\t\'src/htslib/cram/pooled_alloc.c\',\n+ \'src/htslib/cram/rANS_static.c\',\n+\t\'src/htslib/cram/sam_header.c\',\n+\t\'src/htslib/cram/string_alloc.c\',\n+\t\'src/htslib/cram/thread_pool.c\',\n+\t\'src/htslib/cram/vlen.c\',\n+\t\'src/htslib/cram/zfio.c\'\n+]\n+\n+BAMqc_CFLAGS = [\'-fpermissive\',\'-O3\',\'-std=c++11\',\'-Wno-error=declaration-after-statement\'] \n+BAMqc_DFLAGS = [(\'_FILE_OFFSET_BITS\',\'64\'),(\'_LARGEFILE64_SOURCE\',\'\'),(\'_CURSES_LIB\',\'1\')]\n+BAMqc_INCLUDES = [\'./src/htslib\']\n+BAMqc_HEADERS = [\'./src/bamqc\']\n+BAMqc_EXTRA = [\'build/lib.linux-x86_64-2.7/htslib.so\']\n+\n+htslib_CFLAGS = [\'-Wno-error=declaration-after-statement\']\n+htslib_HEADERS = [\'./src/htslib\',\'./src/htslib/htslib\',\'./src/htslib/cram\']\n+htslib_DFLAGS = [(\'_FILE_OFFSET_BITS\',\'64\'),(\'_USE_KNETFILE\',\'\')]\n+\n+setup(name = "ezBAMQC",\n+ version = "0.6.5",\n+ description = \'Quality control tools for NGS alignment file\',\n+ keywords = \'Quality control BAM file\',\n+\t# make sure to add all the nessacary requires\n+ dependency_links=[\'https://gcc.gnu.org/gcc-4.8/\',\'https://www.r-project.org/\',\'https://cran.r-project.org/web/packages/corrplot/\'],\n+ cmdclass = { \'install_data\': wx_smart_install_data },\n+ scripts = ["ezBAMQC"],\n+ author = "Ying Jin",\n+ author_email ="yjin@cshl.edu",\n+ license=\'GPLv3\',\n+ platforms = [\'Linux\'],\n+ url=\'http://hammelllab.labsites.cshl.edu/software#BAMqc\',\n+ long_description=readme(),\n+ classifiers=[\n+ \'Development Status :: 4 - Beta\',\n+ \'Natural Language :: English\',\n+ \'License :: OSI Approved :: GNU General Public License v3 (GPLv3)\',\n+ \'Topic :: Scientific/Engineering :: Bio-Informatics\',\n+ \'Intended Audience :: Science/Research\',\n+ \'Programming Language :: Python :: 2.7\',\n+ \'Programming Language :: C++\',\n+ \'Operating System :: Unix\',\n+ ],\n+ zip_safe = False,\n+ include_package_data=True,\n+ ext_modules = [ \n+ Extension(\'htslib\',\n+ sources = HTSLIB,\n+ include_dirs = htslib_HEADERS,\n+ extra_compile_args = htslib_CFLAGS,\n+ define_macros = htslib_DFLAGS,\n+\t\t\t\t\tlibraries=[\'z\']\n+ ),\n+\t\t Extension(\'libBAMqc\',\n+ sources = BAMQC_SOURCE, \n+ extra_compile_args = BAMqc_CFLAGS,\n+ include_dirs = BAMqc_HEADERS + htslib_HEADERS,\n+ extra_objects = BAMqc_EXTRA,\n+\t\t\t\t\tdefine_macros = BAMqc_DFLAGS\n+ )\n+ ] \n+ )\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/Constants.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/Constants.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,37 @@ +// +// Constants.h +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef BAMQC_c___Constants_h +#define BAMQC_c___Constants_h + +#include <limits> + +#define BIN_SIZE 100000 + +#define MAX_BUCKET 128 +#define MIN_BUCKET 16 +#define DEPTH 16 + +//#define SAMPLESIZE 500000 +#define SAMPLESIZE std::numeric_limits<int>::max() +#define LOW_BOUND -200 +#define UPPER_BOUND 1000 +#define STEP 10 + +#define CDS 1 +#define UTR5 2 +#define UTR3 3 +#define INTRON 4 +#define ITGUP1K 5 +#define ITGDN1K 6 +#define INTERGENIC 7 +#define RRNA 8 + +#define MAX_READ_LEN 1000 + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/Coverage_prof.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/Coverage_prof.cpp Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,217 @@ +// +// Coverage_prof.cpp +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#include "Coverage_prof.h" + +#include <string> +//#include <stdio> +#include <iostream> +#include <fstream> +#include <stdlib.h> + + + +Coverage_prof::Coverage_prof(std::string outfile_data,std::string outfile_fig, GeneFeatures * geneIdx) +{ + frag_num = 0; + cov_script_file = outfile_data + ".geneBodyCoverage_plot.r"; + cov_data_file = outfile_data + ".geneBodyCoverage.txt"; + cov_fig_file = outfile_fig + ".geneBodyCoverage.png"; + + transcov_fig_file = outfile_fig + ".TransCoverage.png"; + transcov_script_file = outfile_data + ".TransCoverage.r"; + transcov_data_file = outfile_data + ".geneAbundance.txt"; + + int num_of_genes = geneIdx->get_numofgenes(); + int num_of_exons = geneIdx->total_exon; + //geneCounts_list[num_of_genes]; + for (int i=0; i< num_of_genes; i++) { + geneCounts_list.push_back(0); + std::vector<int> idx_cnt ; + for(int j=0;j<=100; j++) + { + idx_cnt.push_back(0); + } + gene_percentile_base.push_back(idx_cnt); + } + //std::cout << num_of_genes << std::endl; + for (int i=0; i< num_of_exons; i++) { + mapped_exon.push_back(0); + } + gene_Idx = geneIdx; + total_exons = num_of_exons; +} + +Coverage_prof::Coverage_prof(GeneFeatures * geneIdx){ + gene_Idx = geneIdx; + frag_num = 0; + total_exons = geneIdx->total_exon; + + int num_of_genes = geneIdx->get_numofgenes(); + int num_of_exons = geneIdx->total_exon; + //geneCounts_list[num_of_genes]; + for (int i=0; i< num_of_genes; i++) { + geneCounts_list.push_back(0); + std::vector<int> idx_cnt ; + for(int j=0;j<=100; j++) + { + idx_cnt.push_back(0); + } + gene_percentile_base.push_back(idx_cnt); + } + //std::cout << num_of_genes << std::endl; + for (int i=0; i< num_of_exons; i++) { + mapped_exon.push_back(0); + } + +} + +int Coverage_prof::write(int totalReads){ + int coverage[101] = {0}; + //total_exons = 0; + int zero_exons = 0; + //std::cout << "start to write" << std::endl; + + for(size_t i=0;i< gene_percentile_base.size();i++) + { + std::vector<int> percentile_list = gene_percentile_base[i]; + for(size_t j=0;j< percentile_list.size();j++){ + coverage[j] += percentile_list[j]; + + } + + } + std::string geneCnt_str = ""; + for (size_t i=0;i<geneCounts_list.size(); i++) { + geneCnt_str += ',' + std::to_string(geneCounts_list[i]); + } + std::string x_coord=""; + std::string y_coord=""; + try { + std::ofstream OUT1, OUT2, OUT3, OUT4; + OUT2.open(cov_data_file,std::ofstream::out); + OUT3.open(transcov_data_file,std::ofstream::out); + OUT3 << "gene" << "\t" << "Counts\n"; + if (geneCnt_str !=""){ + OUT4.open(transcov_script_file,std::ofstream::out); + + OUT4 << "png(\'" << transcov_fig_file << "\',width=500,height=500,units='px')\n"; + OUT4 << "a=c("<< geneCnt_str.substr(1) << ")\n"; + OUT4 << "Fn = ecdf(a)\n"; + OUT4 << "max_x = round(log(max(knots(Fn)),2),0)\n"; + OUT4 << "xx = c(0,2^seq(0,max_x,by=2))\n"; + OUT4 << "y=Fn(xx)\n"; + OUT4 << "xlog = log(xx[2:length(xx)],base=2)\n"; + OUT4 << "plot(x=c(-1,xlog),y=y,xaxt = 'n',type=\"b\",col=\"blue\",pch=20,xlab=\"Number of Reads\",ylab=\"Cumulative proportion of Genes\")\n"; + OUT4 << "axis(1,at = c(-1,seq(0,max_x,by=2)),labels=c(0,2^seq(0,max_x,by=2)))\n"; + OUT4 << "dev.state = dev.off()"; + OUT4.close(); + } + OUT2 << "Total reads: " << std::to_string(totalReads) << "\n"; + OUT2 << "Fragment number: " << frag_num << "\n"; + OUT2 << "percentile\tcount\n"; + + for(int i=0; i< 101;i++){ + x_coord += ',' + std::to_string(i); + y_coord += ',' + std::to_string(coverage[i]); + + OUT2 << std::to_string(i) << '\t' << std::to_string(coverage[i]) << "\n"; + } + if(x_coord != ""){ + OUT1.open(cov_script_file,std::ofstream::out); + OUT1 << "png(\'" << cov_fig_file << "\',width=500,height=500,units='px')\n"; + OUT1 << "x=c(" << x_coord.substr(1) << ")\n"; + OUT1 << "y=c(" << y_coord.substr(1) << ")\n"; + + OUT1 << "smoothsp = smooth.spline(x,y,spar=0.35)\n"; + OUT1 << "plot(smoothsp,type=\"l\",col=\"blue\",xlab=\"Percentile of Gene Body (5\'->3\')\",ylab=\"Number of read\",xlim=c(0,100))\n"; + OUT1 << "dev.state = dev.off()"; + + OUT1.close(); + } + OUT2.close(); + + for(size_t i=0;i<geneCounts_list.size();i++) + { + + OUT3 << gene_Idx->get_name(i) << "\t" << std::to_string(geneCounts_list[i]) << "\n"; + } + OUT3.close(); + + }catch(std::ofstream::failure e ){ + std::cout << "Error in writing clipping profile." << std::endl; + return -1; + } + zero_exons = total_exons; + for (size_t i=0; i< mapped_exon.size(); i++) { + if (mapped_exon[i] > 0) { + zero_exons --; + } + } + return zero_exons; +} + +void Coverage_prof::add(Coverage_prof * cov_prof) +{ + frag_num += cov_prof->frag_num; + + for(size_t i=0;i< geneCounts_list.size();i++){ + geneCounts_list[i] += cov_prof->geneCounts_list[i]; + } + + for(size_t i=0;i<gene_percentile_base.size();i++){ + std::vector<int> * g0_perc_cnt = &(gene_percentile_base[i]); + std::vector<int> g1_perc_cnt = cov_prof->gene_percentile_base[i]; + for (size_t j=0; j<= 100; j++ ) { + g0_perc_cnt->at(j) = g1_perc_cnt[j] + g0_perc_cnt->at(j); + } + } + for (int i=0; i< total_exons; i++) { + mapped_exon[i] += cov_prof->mapped_exon[i]; + } +} + +void Coverage_prof::count(int gene,std::vector<std::pair<int,int> > exon_blocks1,std::vector<std::pair<int,int> > exon_blocks2,std::vector<int> exons) +{ + for (size_t i=0; i < exons.size(); i++) { + mapped_exon[exons[i]] =1; + } + if (gene != -1){ + frag_num += exon_blocks1.size() + exon_blocks2.size(); + geneCounts_list[gene] += 1; + + //__per_base_count(gene,exon_blocks,exon_blocks2); + int gene_start = gene_Idx->get_start(gene); + int gene_stop = gene_Idx->get_stop(gene); + for (auto& kv : exon_blocks1) { + if (kv.second >= gene_start && kv.first <= gene_stop) + { + for (int j=kv.first; j<=kv.second; j++) { + int idx = gene_Idx->exist_in_percentile_list(gene,j); + if (idx != -1) { + gene_percentile_base[gene][idx] +=1; + } + } + } + } + + for (auto& kv : exon_blocks2) { + if (kv.second >= gene_start && kv.first <= gene_stop) + { + for (int j=kv.first; j<=kv.second; j++) { + int idx = gene_Idx->exist_in_percentile_list(gene,j); + if (idx != -1) { + gene_percentile_base[gene][idx] +=1; + } + } + } + } + + } +} + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/Coverage_prof.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/Coverage_prof.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,43 @@ +// +// Coverage_prof.h +// BAMQC_c++ +// +// Created by Ying Jin on 11/18/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef __BAMQC_c____Coverage_prof__ +#define __BAMQC_c____Coverage_prof__ + +#include <stdio.h> +#include <vector> +#include <utility> + +#include "GeneFeatures.h" + +class Coverage_prof{ +public: + int frag_num ; + int total_exons; + GeneFeatures * gene_Idx; + + std::string cov_script_file ; + std::string cov_data_file ; + std::string cov_fig_file ; + + std::vector<int> geneCounts_list; + std::vector<int> mapped_exon; + std::vector<std::vector<int> > gene_percentile_base; + + std::string transcov_fig_file ; + std::string transcov_script_file ; + std::string transcov_data_file ; + + Coverage_prof(GeneFeatures * geneIdx); + Coverage_prof(std::string outfile_data,std::string outfile_fig, GeneFeatures * geneIdx); + int write(int totalReads); + void add(Coverage_prof * cov_prof); + void count(int gene,std::vector<std::pair<int,int> > exon_blocks1,std::vector<std::pair<int,int> > exon_blocks2,std::vector<int> exons); +}; + +#endif /* defined(__BAMQC_c____Coverage_prof__) */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/GeneFeatures.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/GeneFeatures.cpp Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1065 @@\n+//\n+// GeneFeatures.cpp\n+// BAMQC-0.5\n+//\n+// Created by Ying Jin on 9/15/15.\n+// Copyright (c) 2015 Ying Jin. All rights reserved.\n+//\n+\n+#include "GeneFeatures.h"\n+\n+#include <cmath>\n+#include <fstream>\n+#include <sstream>\n+//#include <regex>\n+#include "stdlib.h"\n+#include <algorithm>\n+#include <iostream>\n+#include <math.h>\n+#include <iterator>\n+\n+\n+//#include <boost/tokenizer.hpp>\n+\n+//template <class T1, class T2, class Pred = std::less<T2> >\n+//struct sort_pair_second {\n+//(const std::pair<T1,T2>&left, const std::pair<T1,T2>&right) {\n+// Pred p;\n+// return p(left.second, right.second);\n+// }\n+//};\n+//bool sort_pair_second(std::pair<int,int> first, std::pair<int,int> second)\n+//{\n+// return first.second > second.second ;\n+//}\n+\n+bool itv_comp(Interval first, Interval second){\n+ return first.start < second.start ;\n+}\n+\n+int pivot(std::vector<Interval> &intervals, int first, int last)\n+{\n+ int p = first;\n+ int pivotElement = intervals[first].start;\n+ \n+ \n+ for(int i = first+1 ; i <= last ; i++)\n+ {\n+ /* If you want to sort the list in the other order, change "<=" to ">" */\n+ if(intervals[i].start <= pivotElement)\n+ {\n+ std::swap(intervals[i],intervals[p]);\n+ p++;\n+ \n+ }\n+ }\n+ \n+ return p;\n+}\n+\n+void quick_sort(std::vector<Interval> &intervals, int first, int last){\n+ \n+ int pivotElement;\n+ \n+ if(first < last)\n+ {\n+ pivotElement = pivot(intervals, first, last);\n+ quick_sort(intervals, first, pivotElement-1);\n+ quick_sort(intervals, pivotElement+1, last);\n+ }\n+ \n+}\n+\n+\n+bool reverse_ord_func (int i,int j) { return (j<i); }\n+\n+\n+Gene::Gene(std::string gid, std::string ss){\n+ id = gid;\n+ strand = ss ;\n+ min_start = std::numeric_limits<int>::max();\n+ max_stop = std::numeric_limits<int>::min();\n+ gene_actual_len = 0;\n+ stop_codon_st = -1;\n+ stop_codon_end = -1;\n+}\n+\n+Gene::~Gene(){}\n+\n+void Gene::add_cds(int st, int end){\n+ std::pair<int,int> cds_interval (st,end);\n+ cds.push_back(cds_interval);\n+}\n+\n+void Gene::add_exons(int st, int end){\n+ if (this->min_start > st) {\n+ this->min_start = st;\n+ }\n+ if (this->max_stop<end) {\n+ this->max_stop = end;\n+ }\n+ std::pair<int,int> exon_interval (st,end);\n+ gene_actual_len += (end - st+1);\n+ exons.push_back(exon_interval);\n+}\n+void Gene::set_stop_codon(int st,int end)\n+{\n+ stop_codon_st = st;\n+ stop_codon_end = end;\n+}\n+\n+\n+void Gene::get_others(){\n+ std::vector<std::pair<int,int> > left_cds;\n+ std::vector<std::pair<int,int> > left_exons;\n+ //int idx[exons.size()];\n+ size_t i; //,j;\n+ int itgUp1k_st, itgUp1k_end,itgDn1k_st,itgDn1k_end ;\n+\n+ sort(exons.begin(),exons.end());\n+ sort(cds.begin(),cds.end());\n+ \n+\n+ for (i = 1; i < exons.size(); i++) {\n+ intron.push_back(std::make_pair(exons[i-1].second +1, exons[i].first -1));\n+ }\n+\n+ if(strand == "+") {\n+ itgUp1k_st = std::max(int(0),exons[0].first-1000);\n+ itgUp1k_end = std::max(int(0),exons[0].first-1 );\n+ itgDn1k_st = exons[exons.size()-1].second + 1;\n+ itgDn1k_end = exons[exons.size()-1].second + 1000 ;\n+ itg1k.push_back(std::make_pair(itgUp1k_st,itgUp1k_end)) ;\n+ itg1k.push_back(std::make_pair(itgDn1k_st,itgDn1k_end)) ;\n+ \n+ if (stop_codon_st == -1) {\n+ utr5 = exons;\n+ }\n+ else {\n+ cds[cds.size()-1].second = stop_codon_end; \n+ for( i=0;i < exons.size(); i++) {\n+ \n+ if (exons[i].second < cds[0].first) { utr5.push_back(exons[i]); }\n+ \n+ if (exons[i].first < cds[0].first && exons[i].second > cds[0].first ) { utr5.push_back(std::make_pair(exons[i].first,cds[0].first - 1)); }\n+ \n+ if (exons[i].first <= stop_codon_st && exons[i].second > stop_codon_end )\n+ { utr3.push_back'..b' ++) {\n+ if (itv_list[j].first < fs[i].stop && itv_list[j].second > fs[i].start) {\n+ int ovp_st = itv_list[j].first > fs[i].start ? itv_list[j].first : fs[i].start;\n+ int ovp_end = itv_list[j].second > fs[i].stop ? fs[i].stop : itv_list[j].second;\n+ ovp_len += ovp_end - ovp_st+1;\n+ }\n+ }\n+ if (gene_ovp_len_map.find(fs[i].gene) != gene_ovp_len_map.end()) {\n+ gene_ovp_len_map[fs[i].gene] += ovp_len;\n+ if (fs[i].type <= min_type) {\n+ gene_type_map[fs[i].gene] = fs[i].type;\n+ min_type = fs[i].type;\n+ }\n+ \n+ } else {\n+ gene_ovp_len_map.insert(std::pair<int,int> (fs[i].gene,ovp_len));\n+ gene_type_map.insert(std::pair<int,int>(fs[i].gene,fs[i].type));\n+ min_type = fs[i].type;\n+ }\n+ }\n+ else { //not CDS or UTR\n+ if (None_gene_type_map.find(fs[i].gene) != None_gene_type_map.end())\n+ {\n+ if(fs[i].type < None_gene_type_map[fs[i].gene])\n+ {\n+ None_gene_type_map[fs[i].gene] = fs[i].type;\n+ }\n+ }\n+ else {\n+ None_gene_type_map.insert(std::pair<int, int> (fs[i].gene,fs[i].type));\n+ }\n+ }\n+\n+ //genes.push_back(std::pair<int,int>(fs[i].type,fs[i].gene));\n+ if (fs[i].exon != -1) {\n+ mapped_exons->push_back(fs[i].exon);\n+ }\n+ }\n+ \n+ if (gene_type_map.size() == 0) {\n+ return None_gene_type_map;\n+ }\n+ else {\n+ std::map<int,int> res_gene_type_map;\n+ int max_ovp_len = 0;\n+ for (auto& kv : gene_ovp_len_map) {\n+ if( max_ovp_len < kv.second) { max_ovp_len = kv.second; }\n+ }\n+ for (auto& kv : gene_ovp_len_map) {\n+ if (gene_ovp_len_map[kv.first] == max_ovp_len) {\n+ res_gene_type_map.insert(std::pair<int,int> (kv.first,gene_type_map[kv.first]));\n+ }\n+ }\n+ return res_gene_type_map;\n+ }\n+ \n+ return gene_type_map;\n+\n+}\n+/*\n+\n+int main() {\n+ std::string filename = "test.gtf";\n+ std::string id_attr = "gene_id";\n+ \n+ std::vector<std::pair<int,int> > itv_list ;\n+ chr_ITV exp ;\n+ exp.chrom = "chr4";\n+ int start = 1048489;\n+ int end = 1049900;\n+ std::pair<int,int> p (start,end);\n+ \n+ itv_list.push_back(p);\n+ \n+ std::cout << "start to build tree " << std::endl;\n+ GeneFeatures gIdx (filename,id_attr);\n+ std::cout << "after build tree " << std::endl;\n+ \n+ std::cout << "total exon " << gIdx.total_exon << std::endl;\n+ std::cout << "total gene " << gIdx.features.size() << std::endl;\n+\n+ // for (int i=0; i < itv_list.size(); i++) {\n+ // std::cout << itv_list[i].start << std::endl;\n+ //}\n+ std::vector<int> *exons = new std::vector<int>();\n+ std::vector<int> res = gIdx.Gene_annotation("chr4",itv_list,".",exons);\n+ std::vector<std::pair<int,int> > exon_list = gIdx.get_exons("chr4",start,end,".");\n+ for (auto& p : exon_list){\n+ std::cout << p.first << "\\t" << p.second << std::endl;\n+ }\n+ std::vector<std::vector<int> > gene_percentile_list = gIdx.gene_percentile_list;\n+ \n+ for (int i=0; i<gene_percentile_list.size(); i++) {\n+ std::cout << i << std::endl;\n+ std::vector<int> perc_list = gene_percentile_list[i];\n+ for (int j=0; j<perc_list.size(); j++) {\n+ std::cout << perc_list[j] << "\\t";\n+ }\n+ std::cout << "\\n";\n+ }\n+ \n+ std::cout << exons->size() << std::endl;\n+ \n+ for (int i=0;i<res.size(); i++) {\n+ std::cout << res[i] << std::endl;\n+ //std::cout << exons->operator[](i) << std::endl;\n+ \n+ }\n+ delete exons;\n+ \n+\n+ bool test_bool = false;\n+ std::cout << test_bool << std::endl;\n+ \n+}*/\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/GeneFeatures.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/GeneFeatures.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,113 @@ +// +// GeneFeatures.h +// BAMQC-0.5 +// +// Created by Ying Jin on 9/15/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef __BAMQC_0_5__GeneFeatures__ +#define __BAMQC_0_5__GeneFeatures__ + +#include <stdio.h> +#include <string.h> +#include <vector> +#include <map> +#include <bitset> +#include <utility> + +#include "IntervalTree.h" +#include "Constants.h" +#include <limits> + +typedef struct { + std::string chrom; + int start; + int end; +} chr_ITV; + +extern "C" bool itv_comp(Interval, Interval); + + + +typedef std::map<std::string, std::vector<std::string> > gene_exon_Dict ; +typedef gene_exon_Dict::iterator gene_exon_Dict_It; + +typedef std::map<std::string,std::map<int, IntervalTree *> > chrom_itvTree_Dict ; +typedef chrom_itvTree_Dict::iterator chrom_itvTree_Dict_itr; +//typedef Dict::const_iterator gene_exon_Dict_It; + + +class Gene { +public: + std::vector<std::pair<int,int> > exons; + std::vector<std::pair<int,int> > cds; + std::string id; + std::string strand; + int min_start ; + int max_stop ; + int gene_actual_len; + int stop_codon_st; + int stop_codon_end; + int start_codon_st; + int start_codon_end; + std::vector<std::pair<int,int> > utr5; + std::vector<std::pair<int,int> > utr3 ; + std::vector<std::pair<int,int> > intron ; + std::vector<std::pair<int,int> > itg1k ; + Gene(std::string gid, std::string ss); + //Gene& operator= (const Gene& gg); + //Gene(const std::string gid, const std::string ss); + ~Gene(); + void set_stop_codon(int st, int end); + void add_cds(int st, int end); + void add_exons(int st,int end); + void get_others(); +}; + +class GeneFeatures{ + +public: + std::vector<std::string> features; + std::vector<std::vector<int> > gene_percentile_list; + + std::vector<int> gene_starts; + std::vector<int> gene_ends; + std::vector<int> gene_lengths; + + int total_exon; + + chrom_itvTree_Dict cds_exon_idx_plus ; + chrom_itvTree_Dict cds_exon_idx_minus ; + + GeneFeatures(std::string GTFfilename,std::string id_attribute); + ~GeneFeatures(); + + std::vector<std::string> getFeatures() ; + std::string get_name(int g); + + int get_start(int g); + int get_stop(int g); + int get_numofgenes(); + int exist_in_percentile_list(int gene,int pos); + + std::map<int,int> Gene_annotation(std::string chrom, std::vector<std::pair<int,int> > itv_list, std::string strand,std::vector<int> * mapped_exons); + + std::vector<std::pair<int,int> > get_exons(std::string chrom,int read1_end,int read2_start,std::string strand); + +private: + + void read_features(std::string gff_filename, std::string id_attribute) ; + void build_tree(std::map<std::string, std::map<std::string,Gene> > temp_plus, std::map<std::string, std::map<std::string,Gene> > temp_minus); + +}; + +/*extern "C" { + void quick_sort(std::vector<Interval> &intervals, int first, int last); + int pivot(std::vector<Interval> &intervals, int first, int last); + int get_first(const std::pair<int, int>& p); + + int get_last(const std::pair<int, int>& p); +};*/ + +#endif /* defined(__BAMQC_0_5__GeneFeatures__) */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/InnerDist_prof.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/InnerDist_prof.cpp Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,178 @@ +// +// InnerDist_prof.cpp +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#include "InnerDist_prof.h" +#include "Mappability.h" +#include <map> +#include <iostream> +#include <fstream> +//#include <stdio> +#include <stdlib.h> +#include <math.h> +#include <htslib/sam.h> + +InnerDist_prof::InnerDist_prof(std::string outfile_data,std::string outfile_fig,int sample_size,int l_bound,int u_bound,int s) +{ + + InnDist_data_file = outfile_data + ".inner_distance_freq.txt"; + InnDist_script_file= outfile_data + ".inner_distance_plot.r"; + InnDist_fig_file = outfile_fig+".inner_distance_plot.png"; + lower_bound = l_bound; + upper_bound = u_bound; + + //samplesize = sample_size; + step = s; + + //window_left_bound = range(lower_bound,up_bound,step); + +} +InnerDist_prof::InnerDist_prof(int sample_size,int l_bound,int u_bound,int s) +{ + lower_bound = l_bound; + upper_bound = u_bound; + + //samplesize = sample_size; + step = s; +} +InnerDist_prof::~InnerDist_prof(){ + +} +void InnerDist_prof::write() +{ + //estimate the inner distance of mRNA pair end fragment. fragment size = insert_size + 2 x read_length''' + if (pair_num == 0){ + return ; + } + try{ + + std::string pos_str = ""; + std::string cnt_str = ""; + int size = ceil((upper_bound-lower_bound)/step)-1; + int to_plot[size]; + + for(int i=0;i<size;i++) + { + to_plot[i] = 0; + } + for (auto& kv: counts){ + if (kv.first >= lower_bound && kv.first <= upper_bound) { + int pos = ceil((kv.first - lower_bound - step/2)/step); + if (pos < 0) { + pos = 0; + } + to_plot[pos] += kv.second; + } + } + std::ofstream FQ ; + FQ.open(InnDist_data_file,std::ofstream::out); + + for(int i=0; i < size; i++) { + int pos = (int)(i*step + lower_bound + step/2); + int pos1 = i*step + lower_bound; + + pos_str +=','+ std::to_string(pos); + cnt_str += ',' + std::to_string(to_plot[i]); + + FQ << pos1 << '\t' << std::to_string(pos1 + step-1) << '\t' << to_plot[i] << '\n' ; + } + FQ.close(); + if(pos_str != ""){ + std::ofstream RS ; + RS.open(InnDist_script_file,std::ofstream::out); + //plot_file = outfile_fig+".inner_distance_plot.png" + RS << "png(\"" << InnDist_fig_file << "\",width=500,height=500,units=\"px\")\n"; + + RS << "fragsize=rep(c(" << pos_str.substr(1) << "),times=c(" << cnt_str.substr(1) << + "))\n"; + RS << "frag_sd = round(sd(fragsize),digits=0)\n"; + RS << "frag_mean = round(mean(fragsize),digits=0)\n"; + RS << "frag_median = round(median(fragsize),digits=0)\n"; + + RS << "hist(fragsize,probability=T,breaks="<< size + 1 <<",xlim=c("<< lower_bound << ',' <<upper_bound << "),xlab=\"Inner distance (bp)\",main=paste(c(\"Median=\",frag_median,\";\",\"Mean=\",frag_mean,\";\",\"SD=\",frag_sd),collapse=\"\"),border=\"blue\")\n"; + + RS << "lines(density(fragsize,bw=" << 2 * step <<"),col='red')\n"; + RS << "abline(v=frag_median,lty=2)\n"; + RS << "dev.state = dev.off()\n"; + + RS.close(); + } + }catch(std::ofstream::failure e){ + std::cout << "Error in writing inner distance profile." << std::endl; + return; + } +} + +void InnerDist_prof::add(InnerDist_prof * inDist) +{ + //samplesize += inDist->samplesize; + pair_num += inDist->pair_num; + + for(auto& kv: inDist->counts) + { + std::map<int, int>::iterator it; + it = counts.find(kv.first); + if (it != counts.end()) { + counts[kv.first] += kv.second; + } + else { + counts[kv.first] = kv.second; + } + } +} + +void InnerDist_prof::count(GeneFeatures * geneIdx,int type,bam1_t * aligned_read,std::string chrom,std::vector<std::pair<int,int> > intron_blocks,std::string strand) +{ + //if (pair_num >= samplesize) + // return ; + int splice_intron_size=0; + //int read1_len = aligned_read->core.l_qseq; //infer_query_length() + uint32_t *cigar = bam_get_cigar(aligned_read); + int read1_len = bam_cigar2mapped_read_len(aligned_read->core.n_cigar,cigar); + + int read1_start = aligned_read->core.pos; + int read2_start = aligned_read->core.mpos; + int read1_end = 0; + + for(auto& intron : intron_blocks){ + splice_intron_size += intron.second - intron.first; + } + + read1_end = read1_start + read1_len + splice_intron_size; + int inner_distance = read2_start - read1_end +1; + // if(inner_distance > -130 && inner_distance < -120) +//{ + // char * name = bam_get_qname(aligned_read); +// printf("read name %s \n", name); + +//} + if (inner_distance >= lower_bound && inner_distance <= upper_bound) + { + pair_num ++; + std::vector<std::pair<int,int> > exons ; + if (type == CDS || type== UTR5 || type== UTR3 ) { + exons = geneIdx->get_exons(chrom,read1_end,read2_start,strand); + } + + if (exons.size() > 0 ) + { + int size = 0; + for (auto& p : exons){ + size += p.second - p.first; + } + if (size <= inner_distance && size > 1){ + counts[size] += 1; + } + else{ + counts[inner_distance] += 1; + } + } + else{ + counts[inner_distance] += 1; + } + } + +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/InnerDist_prof.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/InnerDist_prof.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,42 @@ +// +// InnerDist_prof.h +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef __BAMQC_c____InnerDist_prof__ +#define __BAMQC_c____InnerDist_prof__ + +#include <stdio.h> +#include <vector> +#include <utility> + +#include "GeneFeatures.h" +#include "Constants.h" +#include "htslib/sam.h" + +class InnerDist_prof{ +public: + + std::string InnDist_data_file; + std::string InnDist_fig_file; + std::string InnDist_script_file; + int samplesize = 0; + int step= 0; + int lower_bound = 0; + int upper_bound = 0; + int pair_num = 0; + + std::map<int,int> counts ; + + InnerDist_prof(int sample_size=SAMPLESIZE,int low_bound= LOW_BOUND,int up_bound= UPPER_BOUND,int step= STEP); + InnerDist_prof(std::string data_dir,std::string fig_dir,int sample_size=SAMPLESIZE,int low_bound= LOW_BOUND,int up_bound= UPPER_BOUND,int step= STEP); + ~InnerDist_prof(); + void write(); + void count(GeneFeatures * geneIdx, int type,bam1_t * aligned_read,std::string chrom,std::vector<std::pair<int,int> > intron_blocks,std::string strand); + void add(InnerDist_prof * inDist_prof); +}; + +#endif /* defined(__BAMQC_c____InnerDist_prof__) */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/IntervalTree.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/IntervalTree.cpp Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,250 @@ +// +// IntervalTree.cpp +// BAMQC-0.5 +// +// Created by Ying Jin on 9/15/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#include "IntervalTree.h" +#include <iostream> + +Interval::Interval(int g, int e, int st, int end, int t) { + gene = g; + start = st; + stop = end; + type = t; + exon = e; +} + +Interval::Interval(){} +Interval::~Interval() { } + + + +IntervalTree::IntervalTree(){} + +IntervalTree::IntervalTree(std::vector<Interval> intervals, int depth, int minbucket, int extent_st , int extent_end, int maxbucket){ + + std::vector<Interval> lefts ; + std::vector<Interval> rights ; + int left_ext = 0; + int right_ext = 0; + int max_stop = 0; + float center_pos = 0.0 ; + Interval itv ; + size_t i ; + + left = nullptr; + right = nullptr; + + depth -= 1; + if ((depth == 0 || intervals.size() < (size_t)minbucket) && intervals.size() < (size_t)maxbucket){ + itvlist = intervals; + left = right = nullptr; + center = -1; + return ; + } + + //if (extent_st == -1){ + // sorting the first time through allows it to get + // better performance in searching later. + + // quick_sort(intervals,0,intervals.size()-1); + //} + + for(i=0;i<intervals.size(); i++){ + itv = intervals[i]; + if (itv.stop > max_stop) { max_stop = itv.stop; } + } + + if(extent_st != -1){ + left_ext = extent_st; + right_ext = extent_end; + } + else { + + left_ext = intervals[0].start; + right_ext = max_stop; + } + //left, right = _extent or (intervals[0].start, max_stop) + //center = intervals[len(intervals)/ 2].stop + center_pos = (left_ext + right_ext) / 2.0; + + //std::cout << "left_ext " << left_ext << std::endl; + //std::cout << "right_ext " << right_ext << std::endl; + //std::cout << "center " << center_pos << std::endl; + + for(i=0;i<intervals.size();i++){ + + itv = intervals[i]; + + if (itv.stop < center_pos) { lefts.push_back(itv); } + else { + if (itv.start > center_pos) { rights.push_back(itv);} else { itvlist.push_back(itv) ;} + } + } + if (lefts.empty()) { + left = nullptr; + } + else{ + + left = new IntervalTree(lefts,depth,minbucket,intervals[0].start,center_pos,maxbucket); + } + if (rights.empty()) { + right = nullptr; + } + else { + right = new IntervalTree(rights, depth, minbucket, center_pos,right_ext,maxbucket); + } + + center = center_pos; + +} + +void IntervalTree::clear(IntervalTree * node){ + // std::cout << "clear node" << std::endl; + //if (node != nullptr) { + // std::cout << node->center << std::endl; + //} + if (node != nullptr) { + clear(node->left); + clear(node->right); + // delete node; + } +} + +IntervalTree::~IntervalTree() { + + clear(left); + clear(right); + +/* if (left != nullptr){ + + if( left->left == nullptr && left->right == nullptr) { + delete left; + } + + else { + //if (left->left != nullptr) { + (*left).~IntervalTree(); + // } + //if (left->right != nullptr) { + // (*left->right).~IntervalTree(); + //} + } + } + if (right != nullptr ) { + + if(right->left == nullptr && right->right == nullptr) { + delete right; + } + else{ + // if (right->left != nullptr) { + // (*right->left).~IntervalTree(); + //} + //if (right->right != nullptr) { + // (*right->right).~IntervalTree(); + //} + (*right).~IntervalTree(); + } + } + delete this;*/ +} + + + +std::vector<Interval> IntervalTree::find(int start, int stop){ +//"""find all elements between (or overlapping) start and stop""" + std::vector<Interval> overlapping ; + std::vector<Interval> temp ; + size_t i; + + if (!itvlist.empty() && stop >= itvlist[0].start) { + for (i=0;i< itvlist.size(); i++) { + if (itvlist[i].stop >= start && itvlist[i].start <= stop) { + overlapping.push_back(itvlist[i]); + } + + } + } + + + if( left != nullptr && start <= center) { + temp = (*left).find(start, stop); + overlapping.insert(overlapping.end(), temp.begin(),temp.end()); + } + + if(right != nullptr && stop >= center){ + temp = (*right).find(start, stop); + + overlapping.insert(overlapping.end(),temp.begin(),temp.end()); + } + + return overlapping ; +} + +std::vector<int> IntervalTree::find_gene(int start, int stop){ + + std::vector<int> overlapping ; + std::vector<int> temp ; + size_t i; + + if (!itvlist.empty() && stop >= itvlist[0].start) { + for (i=0;i< itvlist.size(); i++) { + if (itvlist[i].stop >= start && itvlist[i].start <= stop) { + overlapping.push_back(itvlist[i].gene); + } + + } + } + + + if( left != nullptr && start <= center) { + temp = (*left).find_gene(start, stop); + overlapping.insert(overlapping.end(), temp.begin(),temp.end()); + } + + if(right != nullptr && stop >= center){ + temp = (*right).find_gene(start, stop); + + overlapping.insert(overlapping.end(),temp.begin(),temp.end()); + } + + return overlapping ; +} + + +/* +int main(){ + std::vector<Interval> ll; + IntervalTree *Idx; + std::vector<Interval> res; + std::vector<std::string> res_gene; + int i; + + + ll.push_back(Interval("gid1",100,300,"cds")); + ll.push_back(Interval("gid2",10,100,"cds")); + ll.push_back(Interval("gid3",1000,3000,"cds")); + ll.push_back(Interval("gid4",500,800,"cds")); + ll.push_back(Interval("gid5",200,700,"cds")); + + std::cout << "before create idx" << std::endl; + + Idx = new IntervalTree(ll,16, 2, -1, -1, 3); + + std::cout << "after create idx" << std::endl; + + res = (*Idx).find(20,600); + res_gene = (*Idx).find_gene(20,600); + + for (i = 0; i<res.size(); i++) { + std::cout << res[i].gene << std::endl; + } + + for (i = 0; i<res_gene.size(); i++) { + std::cout << res[i].gene << std::endl; + } +} +*/ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/IntervalTree.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/IntervalTree.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,56 @@ +// +// IntervalTree.h +// BAMQC-0.5 +// +// Created by Ying Jin on 9/15/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef __BAMQC_0_5__IntervalTree__ +#define __BAMQC_0_5__IntervalTree__ + +#include <stdio.h> +#include <string> +#include <vector> + +#include "Constants.h" + +class Interval{ +public: + int start; + int stop; + //std::string gene; + int gene; + //std::string type; + int type; + int exon; + + Interval(); + Interval(int g, int exon,int st, int end, int t); + ~Interval(); +}; + + + + + +class IntervalTree{ +public : + std::vector<Interval> itvlist; + IntervalTree* left ; + IntervalTree* right ; + float center ; + + IntervalTree(); + IntervalTree(std::vector<Interval> intervals, int depth = DEPTH, int minbucket= MIN_BUCKET, int extent_st=-1, int extent_end = -1 , int maxbucket= MAX_BUCKET); + ~IntervalTree(); + std::vector<Interval> find(int start, int stop); + std::vector<int> find_gene(int start, int stop); +private: + void clear(IntervalTree * node); + +}; + + + +#endif /* defined(__BAMQC_0_5__IntervalTree__) */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/Mappability.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/Mappability.cpp Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,326 @@\n+//\n+// Mappablity.cpp\n+// BAMQC_c++\n+//\n+// Created by Ying Jin on 10/28/15.\n+// Copyright (c) 2015 Ying Jin. All rights reserved.\n+//\n+\n+#include "Mappability.h"\n+#include "htslib/sam.h"\n+//#include <stdio>\n+#include <iostream>\n+#include <fstream>\n+\n+#include <stdlib.h>\n+#include <vector>\n+\n+\n+int bam_cigar2mapped_read_len(int n_cigar, const uint32_t *cigar)\n+{\n+ int k, l;\n+\t for (k = l = 0; k < n_cigar; ++k)\n+\t\t{\n+ if (bam_cigar_op(cigar[k])==BAM_CMATCH) \n+\t\t\t\t { l += bam_cigar_oplen(cigar[k]);}\n+\t }\n+\t\t return l;\n+}\n+\n+void bam_cigar2Clip(int n_cigar, const uint32_t *cigar,std::vector<std::pair<int, int> > * clip_pos_len)\n+{\n+ int k, pos;\n+\n+ //std::cout << "number of cigar " << n_cigar <<std::endl; \n+\n+ for (k = pos = 0; k < n_cigar; ++k)\n+ if (bam_cigar_op(cigar[k]) == BAM_CSOFT_CLIP )\n+ {\n+ clip_pos_len->push_back(std::pair<int,int> (pos,bam_cigar_oplen(cigar[k])));\n+ pos += bam_cigar_oplen(cigar[k]);\n+ }\n+ else {\n+\t\tif (bam_cigar_op(cigar[k]) == BAM_CMATCH || bam_cigar_op(cigar[k]) == BAM_CINS || bam_cigar_op(cigar[k]) == BAM_CEQUAL || bam_cigar_op(cigar[k]) == BAM_CDIFF )\n+ { pos += bam_cigar_oplen(cigar[k]);}\n+ }\n+\n+ return ;\n+}\n+\n+\n+Clipping_prof::Clipping_prof(std::string outfile_data,std::string outfile_fig,int qcut,int sample_size)\n+{\n+ \n+ \n+ clip_data_file = outfile_data + ".clipping_profile.xls";\n+ clip_script_file = outfile_data + ".clipping_profile.r";\n+ clip_fig_file = outfile_fig+ ".clipping_profile.png";\n+ \n+ mapq_fig_file = outfile_fig+ ".mapq_profile.png";\n+ mapq_data_file = outfile_data + ".mapq_profile.xls";\n+ mapq_script_file = outfile_data + ".mapq_profile.r";\n+ \n+ readlen_data_file = outfile_data + ".readlen_profile.xls";\n+ readlen_fig_file = outfile_fig+ ".readlen_profile.png";\n+ readlen_script_file = outfile_data + ".ReadLen_plot.r";\n+ \n+ samplesize = sample_size;\n+ \n+ q_cutoff = qcut;\n+ read_len =0;\n+}\n+Clipping_prof::~Clipping_prof(){\n+ \n+}\n+Clipping_prof::Clipping_prof(int mapq,int sample_size){\n+ q_cutoff = mapq;\n+ samplesize = sample_size;\n+ read_len =0;\n+\n+}\n+int Clipping_prof::get_max_read_len(){\n+ return this->read_len;\n+}\n+void Clipping_prof::write(int total_read)\n+{\n+ std::string read_pos ="";\n+ std::string clip_count = "";\n+ std::string mapq_val = "";\n+ std::string mapq_count = "";\n+ std::string readlen_val = "";\n+ std::string readlen_count = "";\n+ \n+ int max_mapq = 0;\n+ \n+ if (read_len > MAX_READ_LEN) {\n+ std::cout << "read length greater than 10000." << std::endl;\n+ return;\n+ }\n+ try{\n+ std::ofstream OUT ;\n+ OUT.open (readlen_data_file, std::ofstream::out);\n+ OUT << "Position\\tRead_Total\\tRead_Len_mapped\\n";\n+ \n+ //soft_clip_profile[0] = 1;\n+ for(auto& kv : readLen_list ){\n+ readlen_val += \',\' + std::to_string(kv.first);\n+ readlen_count += \',\' + std::to_string(kv.second);\n+ \n+ OUT << kv.first << \'\\t\' << total_read << \'\\t\' << kv.second << \'\\n\';\n+ }\n+ \n+ OUT.close();\n+ \n+ \n+ }catch(std::ofstream::failure e ){\n+ std::cout << "Error in writing clipping profile." << std::endl;\n+ return;\n+ }\n+ \n+ try{\n+ std::ofstream OUT ;\n+ OUT.open (clip_data_file, std::ofstream::out);\n+ OUT << "Position\\tRead_Total\\tRead_clipped\\n";\n+ \n+ //soft_clip_profile[0] = 1;\n+ for(int i=0; i< read_len; i++ ){\n+ read_pos += \',\' + std::to_string(i);\n+ clip_count += \',\' + std::to_string(soft_clip_profile[i]);\n+ \n+ OUT << i << \'\\t\' << total_read << \'\\t\' << soft_clip_profile[i] << \'\\n\';\n+ }\n+\n+ OUT.close();\n+ \n+ \n+ }catch(std::ofstream::failure e ){\n+ std::cout << "Error in writing clipping profile." '..b'OUT << "freq[4] = sum(mapq_count[which(mapq_val<30)])/" << std::to_string(total_read) << "*100\\n";\n+ ROUT<< "freq[5] = 100\\n";\n+ \n+ ROUT << "barplot(freq,beside=T,xlab=\\"Mapping Quality\\",border=\\"NA\\",space=1.5,main=\\"Mapping Quality\\",ylim=c(0,100),ylab=\\"Cumulative proportion (%)\\",col=\\"blue\\",names.arg=xname)\\n";\n+ \n+ ROUT << "dev.state=dev.off()\\n";\n+ ROUT.close();\n+ }\n+ }catch(std::ofstream::failure e ){\n+ std::cout << "Error in writing mapping quality script file." << std::endl;\n+ return;\n+ }\n+ }\n+\t//std::cout << readLen_list.size() << std::endl;\n+ if (readLen_list.size() > 0) {\n+ try {\n+ std::ofstream ROUT;\n+ ROUT.open(readlen_script_file,std::ofstream::out);\n+ \n+ ROUT << "png(\\"" << readlen_fig_file << "\\",width=500,height=500,units=\\"px\\")\\n";\n+ ROUT << "readlen_val=c(" << readlen_val.substr(1) << ")\\n";\n+ ROUT<< "readlen_count=c(" << readlen_count.substr(1) << ")\\n";\n+ \n+ ROUT << "plot(readlen_val,(readlen_count/" << total_read <<"),pch=20,xlab=\\"Mapped Read Length\\",ylab=\\"Proportion\\",col=\\"blue\\")\\n";\n+ \n+ ROUT << "dev.state=dev.off()\\n";\n+ ROUT.close();\n+ \n+ }catch(std::ofstream::failure e ){\n+ std::cout << "Error in writing mapping quality script file." << std::endl;\n+ return;\n+ }\n+ }\n+}\n+\n+void Clipping_prof::add(Clipping_prof * clip_prof)\n+{\n+ for (auto& j : clip_prof->mapqlist){\n+ std::map<int,int>::iterator it;\n+ it = mapqlist.find(j.first);\n+ \n+ if (it != mapqlist.end()){\n+ mapqlist[j.first] += j.second;\n+ }\n+ else {\n+ mapqlist[j.first] = j.second;\n+ }\n+ }\n+ \n+ for (int i=0;i< MAX_READ_LEN; i++){\n+ soft_clip_profile[i] += clip_prof->soft_clip_profile[i];\n+ }\n+ if (clip_prof->get_max_read_len() > read_len) {\n+ read_len = clip_prof->get_max_read_len();\n+ }\n+ for (auto& kv : clip_prof->readLen_list){\n+ std::map<int,int>::iterator it;\n+ it = readLen_list.find(kv.first);\n+ \n+ if (it != readLen_list.end()){\n+ readLen_list[kv.first] += kv.second;\n+ }\n+ else {\n+ readLen_list[kv.first] = kv.second;\n+ }\n+ }\n+}\n+\n+void Clipping_prof::set_qual(int mapq)\n+{\n+ std::map<int,int>::iterator it;\n+ it = mapqlist.find(mapq);\n+ if (it != mapqlist.end()) {\n+ mapqlist[mapq] += 1;\n+ }\n+ else{\n+ mapqlist[mapq] = 1;\n+ }\n+}\n+void Clipping_prof::set(int len,uint32_t n_cigar, uint32_t * cigar,int mapq)\n+{\n+ int r = bam_cigar2mapped_read_len(n_cigar,cigar);\n+\t//std::cout << "read len " << r << std::endl;\n+ //std::cout << "number of cigar " << n_cigar <<std::endl; \n+ if(read_len < len)\n+ {\n+ read_len = len;\n+ }\n+ std::map<int,int>::iterator it;\n+ it = mapqlist.find(mapq);\n+ if (it != mapqlist.end()) {\n+ mapqlist[mapq] += 1;\n+ }\n+ else{\n+ mapqlist[mapq] = 1;\n+ }\n+\t//std::cout << "after mapq " << mapq << std::endl;\n+ it = readLen_list.find(r);\n+ if(it!=readLen_list.end()){\n+ readLen_list[r] += 1;\n+ }\n+ else{\n+ readLen_list[r] = 1;\n+ }\n+ \n+\t//std::cout << "after read len " << r << std::endl;\n+ std::vector<std::pair<int, int> > soft_clip_pos_len ;\n+ \n+ bam_cigar2Clip(n_cigar,cigar,&soft_clip_pos_len);\n+ \n+ \n+\t//std::cout << "soft clip len " << soft_clip_pos_len.size() << std::endl;\n+ if(soft_clip_pos_len.size() > 0) {\n+ for (auto p : soft_clip_pos_len){\n+ int pos = p.first;\n+ int len = p.second;\n+ \n+\t//std::cout << pos << "\\t" << len << std::endl;\n+ for (int j = 0; j< len;j++){\n+ soft_clip_profile[pos+j] +=1;\n+ }\n+ \n+ }\n+ }\n+\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/Mappability.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/Mappability.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,54 @@ +// +// Mappablity.h +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef __BAMQC_c____Mappablity__ +#define __BAMQC_c____Mappablity__ + +#include <stdio.h> +#include <string> +#include "Constants.h" +#include <utility> +#include <map> + +int bam_cigar2mapped_read_len(int n_cigar, const uint32_t *cigar); +class Clipping_prof{ +public: + int q_cutoff; + int samplesize; + + int soft_clip_profile[MAX_READ_LEN]={0}; + std::string clip_data_file; + std::string clip_fig_file; + std::string clip_script_file; + + std::string readlen_data_file; + std::string readlen_script_file; + std::string readlen_fig_file; + + std::string mapq_fig_file; + std::string mapq_data_file; + std::string mapq_script_file; + + std::map<int, int> mapqlist; + std::map<int, int> readLen_list; + + Clipping_prof(int qcut, int sample_size = SAMPLESIZE); + Clipping_prof(std::string data_dir,std::string fig_dir,int qcut, int sample_size = SAMPLESIZE); + + ~Clipping_prof(); + void set(int len,uint32_t n_cigar, uint32_t * cigar,int mapq); + void set_qual(int mapq); + void write(int total_read); + void add(Clipping_prof * clip_prof); + + int get_max_read_len(); +private: + int read_len; +}; + +#endif /* defined(__BAMQC_c____Mappablity__) */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/ReadDup_prof.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/ReadDup_prof.cpp Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,9 @@ +// +// ReadDup_prof.cpp +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#include "ReadDup_prof.h" |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/ReadDup_prof.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/ReadDup_prof.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,14 @@ +// +// ReadDup_prof.h +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef __BAMQC_c____ReadDup_prof__ +#define __BAMQC_c____ReadDup_prof__ + +#include <stdio.h> + +#endif /* defined(__BAMQC_c____ReadDup_prof__) */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/Results.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/Results.cpp Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,133 @@ +// +// Results.cpp +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#include <stdio.h> +#include <iostream> +#include <fstream> +#include <string> + +#include "Results.h" + +Results::Results(){ + +} + +Results::~Results(){ + +} + +void Results::write(std::string fname) +{ + try{ + std::ofstream f; + f.open(fname,std::ofstream::out); + + f << "filename" << "\t" << fname << "\n"; + f << "is_pairEnd" << "\t" << this->is_pairEnd << "\n"; + f << "clipping_plot_file" << "\t" << this->clipping_plot_file << "\n"; + f << "mapq_plot_file"<< "\t" <<this->mapq_plot_file << "\n"; + f << "mapq_file"<< "\t" <<this->mapq_file << "\n"; + f << "read_cov_plot_file"<< "\t" <<this->read_cov_plot_file << "\n"; + f << "trans_cov_plot_file"<< "\t" <<this->trans_cov_plot_file<< "\n"; + f << "insert_plot_file" << "\t" << this->insert_plot_file<< "\n"; + f << "insert_file" << "\t" << this->insert_file<< "\n"; + f << "read_dist_plot_file1" << "\t" << this->read_dist_plot_file1<< "\n"; + f << "read_dist_plot_file2" << "\t" <<this->read_dist_plot_file2<< "\n"; + f << "readLen_plot_file" << "\t" << this->readLen_plot_file<< "\n"; + f << "geneCount_file" << "\t" << this->geneCount_file<< "\n"; + f << "seqDeDup_percent" << "\t" <<std::to_string(this->seqDeDup_percent)<< "\n"; + f << "posDeDup_percent" << "\t" << std::to_string(this->posDeDup_percent)<< "\n"; + f << "no_clipping" << "\t" << this->no_clipping << "\n"; + f << "no_rRNA" << "\t" << this->no_rRNA << "\n"; + f << "total_reads" << "\t" <<std::to_string(this->total_reads) << "\n"; + f << "uniq_mapped_reads" << "\t" <<std::to_string(this->uniq_mapped_reads)<< "\n"; + f << "multi_mapped_reads" << "\t" <<std::to_string(this->multi_mapped_reads)<< "\n"; + f << "unmapped_reads" << "\t" <<std::to_string(this->unmapped_reads)<< "\n"; + f << "low_qual" << "\t" <<std::to_string(this->low_qual)<< "\n"; + f << "low_qual_read1" << "\t" <<std::to_string(this->low_qual_read1)<< "\n"; + f << "low_qual_read2" << "\t" <<std::to_string(this->low_qual_read2)<< "\n"; + f << "pcr_dup" << "\t" <<std::to_string(this->pcr_dup)<< "\n"; + f << "rRNA_read" << "\t" <<std::to_string(this->rRNA_read)<< "\n"; + + f << "cds_read" << "\t" <<std::to_string(this->cds_exon_read)<< "\n"; + f << "utr5_read" << "\t" <<std::to_string(this->utr_5_read)<< "\n"; + f << "utr3_read" << "\t" <<std::to_string(this->utr_3_read)<< "\n"; + f << "intron_read" << "\t" <<std::to_string(this->intron_read)<< "\n"; + f << "itgup1k_read" << "\t" <<std::to_string(this->intergenic_up1kb_read)<< "\n"; + f << "itgdn1k_read" << "\t" <<std::to_string(this->intergenic_down1kb_read)<< "\n"; + f << "itg_read" << "\t" <<std::to_string(this->intergenic_read)<< "\n"; + + + f << "unmapped_read1" << "\t" <<std::to_string(this->unmapped_read1)<< "\n"; + f << "unmapped_read2" << "\t" <<std::to_string(this->unmapped_read2)<< "\n"; + f << "mapped_read1"<< "\t" <<std::to_string(this->mapped_read1)<< "\n"; + f << "mapped_read2"<< "\t" <<std::to_string(this->mapped_read2) << "\n"; + f << "forward_read"<< "\t" <<std::to_string(this->forward_read)<< "\n"; + f << "reverse_read" << "\t" << std::to_string(this->reverse_read)<< "\n"; + f << "paired_reads"<< "\t" << std::to_string(this->paired_reads) + "\n"; + f << "mapped_plus_minus"<< "\t" <<std::to_string(this->mapped_plus_minus) << "\n"; + f << "mapped_plus_plus"<< "\t" <<std::to_string(this->mapped_plus_plus)<< "\n"; + f << "mapped_minus_plus" << "\t" <<std::to_string(this->mapped_minus_plus)<< "\n"; + f << "mapped_minus_minus" << "\t" <<std::to_string(this->mapped_minus_minus)<< "\n"; + f << "ins_read" << "\t" <<std::to_string(this->ins_read)<< "\n"; + f << "del_read"<< "\t" <<std::to_string(this->del_read)<< "\n"; + f << "noSplice"<< "\t" <<std::to_string(this->noSplice)<< "\n"; + f << "splice"<< "\t" <<std::to_string(this->splice)<< "\n"; + f << "paired_diff_chrom" << "\t" <<std::to_string(this->paired_diff_chrom)<< "\n"; + f.close(); + + }catch(std::ofstream::failure e ){ + std::cout << "Error in writing result ." << std::endl; + return; + } + +} + +void Results::add(Results *res){ + + no_clipping = (no_clipping || res->no_clipping); + no_rRNA = (no_rRNA && res->no_rRNA); + + rRNA_read += res->rRNA_read; + total_reads += res->total_reads; + uniq_mapped_reads +=res->uniq_mapped_reads; + multi_mapped_reads +=res->multi_mapped_reads; + unmapped_reads += res->unmapped_reads; + low_qual += res->low_qual; + low_qual_read1 += res->low_qual_read1; + low_qual_read2 += res->low_qual_read2; + pcr_dup += res->pcr_dup; + + unmapped_read1 += res->unmapped_read1; + unmapped_read2 += res->unmapped_read2; + mapped_read1 += res->mapped_read1; + mapped_read2 += res->mapped_read2; + forward_read += res->forward_read; + reverse_read += res->reverse_read; + paired_reads += res->paired_reads; + + mapped_plus_minus += res->mapped_plus_minus; + mapped_plus_plus += res->mapped_plus_plus; + mapped_minus_plus += res->mapped_minus_plus; + mapped_minus_minus += res->mapped_minus_minus; + + ins_read += res->ins_read; + del_read += res->del_read; + + noSplice += res->noSplice; + splice += res->splice; + paired_diff_chrom += res->paired_diff_chrom; + + cds_exon_read += res->cds_exon_read; + utr_5_read += res->utr_5_read; + utr_3_read += res->utr_3_read; + intron_read += res->intron_read; + intergenic_up1kb_read += res->intergenic_up1kb_read; + intergenic_down1kb_read += res->intergenic_down1kb_read; + intergenic_read += res->intergenic_read; +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/Results.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/Results.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,87 @@ +// +// Results.h +// BAMQC_c++ +// +// Created by Ying Jin on 10/28/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef BAMQC_c___Results_h +#define BAMQC_c___Results_h + +#include <string> + + +class Results { + +public: + Results(); + ~Results(); + void add(Results *res); + void write(std::string fname); + + std::string filename = ""; + + std::string clipping_plot_file = ""; + std::string mapq_plot_file = ""; + std::string mapq_file = ""; + std::string read_cov_plot_file = "";; + std::string trans_cov_plot_file = ""; + std::string insert_plot_file = ""; + std::string insert_file = ""; + std::string read_dist_plot_file1 = ""; + std::string read_dist_plot_file2 = ""; + std::string read_dup_plot_file = ""; + std::string readLen_plot_file = ""; + std::string geneCount_file = ""; + + double seqDeDup_percent = 0; + double posDeDup_percent = 0; + bool is_pairEnd = false; + + bool no_clipping = false; + bool no_rRNA = false; + + + int total_reads = 0; + int uniq_mapped_reads = 0; + int multi_mapped_reads = 0; + int unmapped_reads = 0; + int low_qual = 0; + int low_qual_read1 = 0; + int low_qual_read2 = 0; + int pcr_dup = 0; + int rRNA_read = 0; + + int cds_exon_read =0; + int utr_5_read =0; + int utr_3_read =0; + int intron_read =0; + int intergenic_up1kb_read =0; + int intergenic_down1kb_read = 0; + int intergenic_read = 0; + + int unmapped_read1 = 0; + int unmapped_read2 = 0; + int mapped_read1 = 0; + int mapped_read2 = 0; + int forward_read = 0; + int reverse_read = 0; + int paired_reads = 0; + + int mapped_plus_minus = 0; + int mapped_plus_plus = 0; + int mapped_minus_plus = 0; + int mapped_minus_minus = 0; + + int ins_read = 0; + int del_read = 0; + + int noSplice = 0; + int splice = 0; + int paired_diff_chrom = 0; + +}; + + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/parseBAM.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/parseBAM.cpp Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1055 @@\n+//\n+// parseBAM.cpp\n+// BAMQC_c++\n+//\n+// Created by Ying Jin on 11/11/15.\n+// Copyright (c) 2015 Ying Jin. All rights reserved.\n+//\n+\n+#include "parseBAM.h"\n+#include "Coverage_prof.h"\n+#include "GeneFeatures.h"\n+#include "InnerDist_prof.h"\n+#include "Mappability.h"\n+#include "Results.h"\n+#include "rRNA.h"\n+#include "Constants.h"\n+\n+//#include <malloc.h>\n+#include <string>\n+#include <stdlib.h>\n+#include <pthread.h>\n+//#include <stdio.h>\n+#include <iostream>\n+#include <fstream>\n+#include <unistd.h>\n+\n+#include "htslib/sam.h"\n+\n+\n+\n+unsigned int tick_time =3000;\n+\n+#define IS_PAIRED(bam) ((bam)->core.flag&BAM_FPAIRED)\n+#define IS_QCFAIL(bam) ((bam)->core.flag & BAM_FQCFAIL)\n+\n+#define IS_PAIRED_AND_MAPPED(bam) (((bam)->core.flag&BAM_FPAIRED) && !((bam)->core.flag&BAM_FUNMAP) && !((bam)->core.flag&BAM_FMUNMAP))\n+\n+#define IS_PROPERLYPAIRED(bam) (((bam)->core.flag&(BAM_FPAIRED|BAM_FPROPER_PAIR)) == (BAM_FPAIRED|BAM_FPROPER_PAIR) && !((bam)->core.flag&BAM_FUNMAP))\n+\n+#define IS_UNMAPPED(bam) ((bam)->core.flag&BAM_FUNMAP)\n+#define IS_REVERSE(bam) ((bam)->core.flag&BAM_FREVERSE)\n+#define IS_MATE_REVERSE(bam) ((bam)->core.flag&BAM_FMREVERSE)\n+#define IS_READ1(bam) ((bam)->core.flag&BAM_FREAD1)\n+#define IS_READ2(bam) ((bam)->core.flag&BAM_FREAD2)\n+#define IS_DUP(bam) ((bam)->core.flag&BAM_FDUP)\n+//#defind READ_NAME(bam) (bam_get_qname(bam))\n+\n+/*DEF BAM_CMATCH = 0\n+DEF BAM_CINS = 1\n+DEF BAM_CDEL = 2\n+DEF BAM_CREF_SKIP = 3\n+DEF BAM_CSOFT_CLIP = 4\n+DEF BAM_CHARD_CLIP = 5\n+DEF BAM_CPAD = 6\n+DEF BAM_CEQUAL = 7\n+DEF BAM_CDIFF = 8*/\n+\n+typedef struct\n+{\n+ bam1_t * first;\n+ bam1_t * second;\n+ int type;\n+} read_pair_t ;\n+\n+typedef struct\n+{\n+ unsigned short thread_id;\n+ pthread_t thread_object;\n+ //pthread_spinlock_t cur_reads_lock;\n+ pthread_spinlock_t cur_reads_lock;\n+ \n+ //std::vector<std::pair<bam1_t *,bam1_t *> > *cur_reads;\n+ std::vector<read_pair_t> * cur_reads;\n+ \n+ Results * res;\n+ InnerDist_prof * inDist_prof;\n+ Clipping_prof * clip_prof;\n+ Coverage_prof * cov_prof;\n+ \n+} thread_context_t;\n+\n+typedef struct\n+{\n+ unsigned short thread_number;\n+ int all_finished;\n+ std::string format;\n+ std::string stranded;\n+ //bam_hdr_t * header;\n+\tstd::vector<std::string> refnames;\n+ std::vector<thread_context_t *> thread_contexts;\n+ \n+ GeneFeatures * geneIdx;\n+ rRNA * rRNAIdx;\n+ \n+} global_context_t;\n+\n+struct arg_struct {\n+ global_context_t * arg1;\n+\tthread_context_t * arg2;\n+};\n+std::vector<std::pair<int, int> > fetch_intron(int st, uint32_t * cigar, uint32_t n_cigar,std::string format)\n+{\n+ //\'\'\' fetch intron regions defined by cigar. st must be zero based return list of tuple of (st, end)\'\'\'\n+ //match = re.compile(r\'(\\d+)(\\D)\')\n+ int chrom_st = st;\n+ if (format == "BAM") { chrom_st += 1 ;}\n+ \n+ std::vector<std::pair<int,int> > intron_bound ;\n+ \n+ for (unsigned int i=0; i < n_cigar; i++){ //code and size\n+ if (bam_cigar_op(cigar[i])==BAM_CMATCH) { chrom_st += bam_cigar_oplen(cigar[i]);} //match\n+ if (bam_cigar_op(cigar[i])==BAM_CINS) {continue;} //insertion\n+ if (bam_cigar_op(cigar[i])==BAM_CDEL) { chrom_st += bam_cigar_oplen(cigar[i]);} //deletion\n+ if (bam_cigar_op(cigar[i])==BAM_CREF_SKIP) { intron_bound.push_back(std::pair<int,int> (chrom_st,chrom_st+ bam_cigar_oplen(cigar[i])-1)); } //gap\n+ if (bam_cigar_op(cigar[i])==BAM_CSOFT_CLIP) { chrom_st += bam_cigar_oplen(cigar[i]);}// soft clipping\n+ }\n+ \n+ return intron_bound ;\n+}\n+\n+std::vector<std::pair<int, int> > fetch_exon(int st,uint32_t * cigar,uint32_t n_cigar,std::string format)\n+{\n+ //\'\'\' fetch exon regions defined by cigar. st must be zero based return list of tuple of (st, end)\'\'\'\n+ //match = re.compile(r\'(\\d+)(\\D)\')\n+ int chrom_st = st;\n+ if (format == "BAM") { chrom_st += 1;}\n+ \n+ std::vector<std::pair<int,int> >exon_bound;\n+\n+ for (unsigned int i=0; i < n_cig'..b'of.write();\n+ int zero_exons = main_cov_prof.write(main_res.total_reads);\n+\n+ main_res.read_dist_plot_file1 = outdir_fig +smp_name+ ".read_distr.png";\n+ main_res.read_dist_plot_file2 = outdir_fig + smp_name + ".read_distr_pie.png";\n+ \n+ //std::cout << main_res.rRNA_read << std::endl; \n+ try {\n+ std::ofstream ROUT;\n+ \n+ ROUT.open (outdir +smp_name+ ".read_distr.r", std::ofstream::out);\n+ //ROUT = fopen(outfile + \'.read_distr.r\', \'w\')\n+ ROUT << "png(\\"" << main_res.read_dist_plot_file1 << "\\",width=500,height=500,units=\\"px\\")\\n";\n+ ROUT << "M=c(" << std::to_string(main_res.cds_exon_read) << "," << std::to_string(main_res.utr_5_read) << "," << std::to_string(main_res.utr_3_read) << "," << std::to_string(main_res.intron_read) << "," << std::to_string(main_res.intergenic_up1kb_read) << "," << std::to_string(main_res.intergenic_down1kb_read) << "," << std::to_string(main_res.rRNA_read) << "," << std::to_string(main_res.intergenic_read) << ")\\n";\n+ \n+ ROUT << "Mname=c(\\"CDS\\",\\"5UTR\\",\\"3UTR\\",\\"Intron\\",\\"TSS_Up_1Kb\\",\\"TES_Down_1Kb\\",\\"rRNA\\",\\"Others\\")\\n";\n+ ROUT << "val = barplot(M,xlab=\\"\\",space=1,ylab=\\"Read Counts\\",col=\\"blue\\",border=\\"NA\\")\\n";\n+ ROUT << "text(x=seq(val[1],val[8],by=2),y=rep(0,8),srt=60,adj=0,offset=2,pos=1,xpd=T,labels=Mname)\\n";\n+ ROUT << "dev.state = dev.off()\\n";\n+ ROUT.close();\n+\n+ ROUT.open(outdir + smp_name+".read_distr_pie.r", std::ofstream::out);\n+ if (geneIdx->total_exon != 0 ){\n+ ROUT << "png(\\"" << main_res.read_dist_plot_file2 << "\\",width=500,height=500,units=\\"px\\")\\n";\n+ ROUT << "pie(c(" << std::to_string(geneIdx->total_exon-zero_exons) << \',\' << std::to_string(zero_exons) << "),labels=c(\\"Covered " << (geneIdx->total_exon - zero_exons) << " exons\\",\\"Uncovered\\"),main=\\"Exons\\",radius=0.6,clockwise=T,col=c(\\"blue\\",\\"white\\"))\\n";\n+ ROUT << "dev.state = dev.off()\\n";\n+ }\n+ ROUT.close();\n+ }catch(std::ofstream::failure e ){\n+ std::cout << "Error in writing plotting scripts.\\n" << std::endl;\n+ }\n+\n+ main_res.insert_plot_file = main_inDist_prof.InnDist_fig_file;\n+ main_res.insert_file = main_inDist_prof.InnDist_data_file;\n+ \n+ main_res.clipping_plot_file = main_clip_prof.clip_fig_file;\n+ main_res.mapq_plot_file = main_clip_prof.mapq_fig_file;\n+ main_res.mapq_file = main_clip_prof.mapq_data_file;\n+ \n+ //res.read_dup_plot_file = rDup_prof.plot_file\n+ main_res.readLen_plot_file = main_clip_prof.readlen_fig_file;\n+ \n+ main_res.read_cov_plot_file = main_cov_prof.cov_fig_file;\n+ main_res.geneCount_file = main_cov_prof.transcov_data_file;\n+ main_res.trans_cov_plot_file = main_cov_prof.transcov_fig_file;\n+ //res.seqDeDup_percent = rDup_prof.seqDeDup_percent\n+ //res.posDeDup_percent = rDup_prof.posDeDup_percent\n+ return main_res;\n+}\n+\n+int run_qc(char* out_dir, char* outfig_dir,char * ann_file, char* attrID, char* input_file,char* rRNA_file,char* label,int mapq,char* stranded,int thread_num)\n+{\n+ std::string gtf_fname (ann_file);\n+ std::string id_attrID (attrID);\n+ //std::string ifile (input_file);\n+ std::string smp_name (label);\n+ std::string strand_info (stranded);\n+ std::string data_outdir (out_dir);\n+ std::string fig_outdir (outfig_dir);\n+ std::string smp_res_fname = data_outdir+smp_name+".res.txt";\n+ \n+ //int thread_num = 1;\n+ \n+ std::string rRNA_fname (rRNA_file);\n+ rRNA * rRNAIdx = NULL;\n+ \n+ GeneFeatures * geneIdx = new GeneFeatures(gtf_fname,id_attrID);\n+ if (rRNA_fname != "") {\n+ rRNAIdx = new rRNA(rRNA_fname);\n+ }\n+ Results res = QC(smp_name,geneIdx,rRNAIdx,input_file,data_outdir,fig_outdir,strand_info,thread_num,mapq);\n+ \n+ res.write(smp_res_fname);\n+ \n+ delete geneIdx;\n+ delete rRNAIdx;\n+ \n+ return 1;\n+}\n+\n+\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/parseBAM.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/parseBAM.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,17 @@ +// +// parseBAM.h +// BAMQC_c++ +// +// Created by Ying Jin on 11/18/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef __BAMQC_c____parseBAM__ +#define __BAMQC_c____parseBAM__ + +#include <stdio.h> + +extern "C" { +int run_qc(char* out_dir, char* outfig_dir,char * ann_file, char* attrID, char* input_file,char* rRNA_file,char* label,int maqp,char* stranded,int threadNum); +} +#endif /* defined(__BAMQC_c____parseBAM__) */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/rRNA.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/rRNA.cpp Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,230 @@ +// +// GeneFeatures.cpp +// BAMQC-0.5 +// +// Created by Ying Jin on 9/15/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#include "rRNA.h" +#include "Constants.h" + +#include <cmath> +#include <fstream> +#include <sstream> +//#include <regex> +#include "stdlib.h" +#include <algorithm> +#include <iostream> + + +rRNA::rRNA(std::string rRNAfilename){ + + read_features(rRNAfilename); + +} + +rRNA::~rRNA(){ + std::map<std::string,IntervalTree *>::iterator it; + + for (it=featureIdxs_plus.begin(); it != featureIdxs_plus.end(); it++) { + IntervalTree *tmp = it->second; + delete tmp; + + } + for (it=featureIdxs_minus.begin(); it != featureIdxs_minus.end(); it++) { + IntervalTree *tmp = it->second; + delete tmp; + } + +} + +//Reading & processing annotation files +void rRNA::read_features(std::string rRNA_filename) +{ + + //dict of dicts since the builtin type doesn't support it for some reason + std::map<std::string, std::vector<Interval> > temp_plus ; + std::map<std::string, std::vector<Interval> > temp_minus ; + std::map<std::string, std::vector<Interval> >::iterator tmp_itr; + + int i = 0; + //int counts = 0 ; + int line_no = 0; + int start; + int end; + int idx = 0; + + std::ifstream input; //(gff_filename); + + try{ + input.open (rRNA_filename, std::ifstream::in); + + while(! input.eof()){ + + std::string line,chrom,feature,start_ss,end_ss,score,strand; + std::stringstream ss; + + if (! std::getline(input,line)){ break; } + line_no ++; + + if (line == "\n" || !line.compare(0,1,"#")) { continue; } + + ss << line; + std::getline(ss,chrom,'\t'); + std::getline(ss,start_ss,'\t'); + std::getline(ss,end_ss,'\t'); + std::getline(ss,feature,'\t'); + std::getline(ss,score,'\t'); + std::getline(ss,strand,'\t'); + + try{ + start = std::stol(start_ss); + end = std::stol(end_ss); + } + catch (const std::invalid_argument& ia) { + std::cerr << "Invalid argument: " << ia.what() << '\n'; + std::exit(1); + + } + + if (strand == "+" ){ + tmp_itr = temp_plus.find(chrom); + if (tmp_itr != temp_plus.end()) { + std::vector<Interval> *tmp_ptr = &(tmp_itr->second); + //(*tmp_ptr).push_back(Interval(feature,-1,start,end,"rRNA")); + (*tmp_ptr).push_back(Interval(idx,-1,start,end,RRNA)); + idx++; + } + else{ + std::vector<Interval> tmp ; + tmp.push_back(Interval(idx,-1,start,end,RRNA)); + + temp_plus.insert(std::pair<std::string,std::vector<Interval> >(chrom,tmp)); + idx++; + } + } + + if (strand == "-" ) { + + tmp_itr = temp_minus.find(chrom); + if (tmp_itr != temp_minus.end()) { + std::vector<Interval> *tmp_ptr = &(tmp_itr->second); + (*tmp_ptr).push_back(Interval(idx,-1,start,end,RRNA)); + idx++; + } + else{ + std::vector<Interval> tmp ; + tmp.push_back(Interval(idx,-1,start,end,RRNA)); + + temp_minus.insert(std::pair<std::string,std::vector<Interval> >(chrom,tmp)); + idx++; + } + } + + i += 1 ; + if (i % 100000 == 0 ) + { + //sys.stderr.write("%d GTF lines processed.\n" % i); + std::cout << i << " rRNA lines processed." << std::endl; + } + + } + + input.close(); + } + catch(std::ifstream::failure e){ + std::cout << "error in read file " << rRNA_filename << std::endl; + } + //build interval trees + + + for (tmp_itr = temp_plus.begin(); tmp_itr != temp_plus.end(); tmp_itr++) { + std::string chr = tmp_itr->first; + std::vector<Interval> itemlist = tmp_itr->second; + + std::sort(itemlist.begin(),itemlist.end(),itv_comp) ; + + featureIdxs_plus[chr] = new IntervalTree(itemlist); + } + + for (tmp_itr = temp_minus.begin(); tmp_itr != temp_minus.end(); tmp_itr++) { + std::string chr = tmp_itr->first; + + std::vector<Interval> itemlist = tmp_itr->second; + + std::sort(itemlist.begin(),itemlist.end(),itv_comp) ; + + featureIdxs_minus[chr] = new IntervalTree(itemlist); + } + + +} + +//find exons of given gene that overlap with the given intervals +//return list of tuples +bool rRNA::is_rRNA(std::string chrom, std::vector<std::pair<int,int> > itv_list,std::string strand) +{ + std::vector<std::string> rRNAs; + std::vector<Interval> fs ; + std::map<std::string, IntervalTree*>::iterator chrom_it; + size_t i; + if (strand == "+" || strand == ".") { + chrom_it = featureIdxs_plus.find(chrom); + if (chrom_it != featureIdxs_plus.end()) { + + for (i=0; i < itv_list.size(); i ++) { + //std::cout << "start to search tree" << std::endl; + fs = (*featureIdxs_plus[chrom]).find(itv_list[i].first,itv_list[i].second); + //} + } + } + } + + if (strand == "-" or strand == ".") { + chrom_it = featureIdxs_minus.find(chrom); + if (chrom_it != featureIdxs_minus.end()) { + + for (i=0; i < itv_list.size(); i ++) { + std::vector<Interval> tmp = (*featureIdxs_minus[chrom]).find(itv_list[i].first,itv_list[i].second); + + fs.insert(fs.end(),tmp.begin(),tmp.end()); + } + + } + } + + //std::cout << fs.size() << std::endl; + if (fs.size() > 0 ) { + return true; + } + else{ + return false; + } + +} +/* +int main() { + std::string filename = "./test.bed"; + std::string id_attr = "gene_id"; + + std::vector<chr_ITV> itv_list ; + chr_ITV exp ; + exp.chrom = "chr1"; + exp.start = 11870; + exp.end = 73220; + itv_list.push_back(exp); + + std::cout << "start to build tree " << std::endl; + rRNA gIdx (filename); + std::cout << "after build tree " << std::endl; + + // for (int i=0; i < itv_list.size(); i++) { + // std::cout << itv_list[i].start << std::endl; + //} + bool res = gIdx.is_rRNA(itv_list,"."); + //for (int i=0;i<res.size(); i++) { + std::cout << res << std::endl; + //} + +}*/ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/rRNA.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/rRNA.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,41 @@ +// +// rRNA.h +// BAMQC-0.5 +// +// Created by Ying Jin on 9/15/15. +// Copyright (c) 2015 Ying Jin. All rights reserved. +// + +#ifndef __BAMQC_0_5__rRNA__ +#define __BAMQC_0_5__rRNA__ + +#include <stdio.h> +#include <string.h> +#include <vector> +#include <map> +#include <utility> + +#include "IntervalTree.h" +#include "GeneFeatures.h" + + +class rRNA{ + +public: + + std::map<std::string, IntervalTree *> featureIdxs_plus ; + std::map<std::string, IntervalTree *> featureIdxs_minus ; + + rRNA(std::string rRNAfilename); + ~rRNA(); + + bool is_rRNA(std::string chrom,std::vector<std::pair<int,int> > itv_list,std::string strand); + +private: + void read_features(std::string rRNA_filename) ; + //void build_tree(std::map<std::string, std::map<std::string,Gene> > temp_plus, std::map<std::string, std::map<std::string,Gene> > temp_minus); + +}; + + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/ezBAMQC/sam.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/ezBAMQC/sam.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,426 @@\n+/* sam.h -- SAM and BAM file I/O and manipulation.\n+\n+ Copyright (C) 2008, 2009, 2013-2014 Genome Research Ltd.\n+ Copyright (C) 2010, 2012, 2013 Broad Institute.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#ifndef HTSLIB_SAM_H\n+#define HTSLIB_SAM_H\n+\n+#include <stdint.h>\n+#include "hts.h"\n+\n+/**********************\n+ *** SAM/BAM header ***\n+ **********************/\n+\n+/*! @typedef\n+ @abstract Structure for the alignment header.\n+ @field n_targets number of reference sequences\n+ @field l_text length of the plain text in the header\n+ @field target_len lengths of the reference sequences\n+ @field target_name names of the reference sequences\n+ @field text plain text\n+ @field sdict header dictionary\n+ */\n+\n+typedef struct {\n+ int32_t n_targets, ignore_sam_err;\n+ uint32_t l_text;\n+ uint32_t *target_len;\n+ int8_t *cigar_tab;\n+ char **target_name;\n+ char *text;\n+ void *sdict;\n+} bam_hdr_t;\n+\n+/****************************\n+ *** CIGAR related macros ***\n+ ****************************/\n+\n+#define BAM_CMATCH 0\n+#define BAM_CINS 1\n+#define BAM_CDEL 2\n+#define BAM_CREF_SKIP 3\n+#define BAM_CSOFT_CLIP 4\n+#define BAM_CHARD_CLIP 5\n+#define BAM_CPAD 6\n+#define BAM_CEQUAL 7\n+#define BAM_CDIFF 8\n+#define BAM_CBACK 9\n+\n+#define BAM_CIGAR_STR "MIDNSHP=XB"\n+#define BAM_CIGAR_SHIFT 4\n+#define BAM_CIGAR_MASK 0xf\n+#define BAM_CIGAR_TYPE 0x3C1A7\n+\n+#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)\n+#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)\n+#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])\n+#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))\n+\n+/* bam_cigar_type returns a bit flag with:\n+ * bit 1 set if the cigar operation consumes the query\n+ * bit 2 set if the cigar operation consumes the reference\n+ *\n+ * For reference, the unobfuscated truth table for this function is:\n+ * BAM_CIGAR_TYPE QUERY REFERENCE\n+ * --------------------------------\n+ * BAM_CMATCH 1 1\n+ * BAM_CINS 1 0\n+ * BAM_CDEL 0 1\n+ * BAM_CREF_SKIP 0 1\n+ * BAM_CSOFT_CLIP 1 0\n+ * BAM_CHARD_CLIP 0 0\n+ * BAM_CPAD 0 0\n+ * BAM_CEQUAL 1 1\n+ * BAM_CDIFF 1 1\n+ * BAM_CBACK 0 0\n+ * --------------------------------\n+ */\n+#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference\n+\n+/*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */\n+#define BAM_FPAIRED 1\n+/*! @abstract the read is mapped in a proper pair */\n+#define BAM_FPROPER_PAIR 2\n+/*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */\n+#define BAM_FUNMAP 4\n+/*! @abstract the mate is unmapped */\n+#define BAM_FMUNMAP 8\n+/*! @abstract the read is mapped to the reverse strand */\n+#define BAM_FREVERSE 16\n+/*! @abstract the mate is mapped to the revers'..b'close(fp)\n+\n+ int sam_open_mode(char *mode, const char *fn, const char *format);\n+\n+ typedef htsFile samFile;\n+ bam_hdr_t *sam_hdr_parse(int l_text, const char *text);\n+ bam_hdr_t *sam_hdr_read(samFile *fp);\n+ int sam_hdr_write(samFile *fp, const bam_hdr_t *h);\n+\n+ int sam_parse1(kstring_t *s, bam_hdr_t *h, bam1_t *b);\n+ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str);\n+ int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b);\n+ int sam_write1(samFile *fp, const bam_hdr_t *h, const bam1_t *b);\n+\n+ /*************************************\n+ *** Manipulating auxiliary fields ***\n+ *************************************/\n+\n+ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);\n+ int32_t bam_aux2i(const uint8_t *s);\n+ double bam_aux2f(const uint8_t *s);\n+ char bam_aux2A(const uint8_t *s);\n+ char *bam_aux2Z(const uint8_t *s);\n+\n+ void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);\n+ int bam_aux_del(bam1_t *b, uint8_t *s);\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+/**************************\n+ *** Pileup and Mpileup ***\n+ **************************/\n+\n+#if !defined(BAM_NO_PILEUP)\n+\n+/*! @typedef\n+ @abstract Structure for one alignment covering the pileup position.\n+ @field b pointer to the alignment\n+ @field qpos position of the read base at the pileup site, 0-based\n+ @field indel indel length; 0 for no indel, positive for ins and negative for del\n+ @field level the level of the read in the "viewer" mode\n+ @field is_del 1 iff the base on the padded read is a deletion\n+ @field is_head ???\n+ @field is_tail ???\n+ @field is_refskip ???\n+ @field aux ???\n+\n+ @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The\n+ difference between the two functions is that the former does not\n+ set bam_pileup1_t::level, while the later does. Level helps the\n+ implementation of alignment viewers, but calculating this has some\n+ overhead.\n+ */\n+typedef struct {\n+ bam1_t *b;\n+ int32_t qpos;\n+ int indel, level;\n+ uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;\n+} bam_pileup1_t;\n+\n+typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);\n+\n+struct __bam_plp_t;\n+typedef struct __bam_plp_t *bam_plp_t;\n+\n+struct __bam_mplp_t;\n+typedef struct __bam_mplp_t *bam_mplp_t;\n+\n+#ifdef __cplusplus\n+extern "C" {\n+#endif\n+\n+ /**\n+ * bam_plp_init() - sets an iterator over multiple\n+ * @func: see mplp_func in bam_plcmd.c in samtools for an example. Expected return\n+ * status: 0 on success, -1 on end, < -1 on non-recoverable errors\n+ * @data: user data to pass to @func\n+ */\n+ bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);\n+ void bam_plp_destroy(bam_plp_t iter);\n+ int bam_plp_push(bam_plp_t iter, const bam1_t *b);\n+ const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);\n+ const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);\n+ void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);\n+ void bam_plp_reset(bam_plp_t iter);\n+\n+ bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);\n+ /**\n+ * bam_mplp_init_overlaps() - if called, mpileup will detect overlapping\n+ * read pairs and for each base pair set the base quality of the\n+ * lower-quality base to zero, thus effectively discarding it from\n+ * calling. If the two bases are identical, the quality of the other base\n+ * is increased to the sum of their qualities (capped at 200), otherwise\n+ * it is multiplied by 0.8.\n+ */\n+ void bam_mplp_init_overlaps(bam_mplp_t iter);\n+ void bam_mplp_destroy(bam_mplp_t iter);\n+ void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);\n+ int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif // ~!defined(BAM_NO_PILEUP)\n+\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/INSTALL --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/INSTALL Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,79 @@ +Basic Installation +================== + +To build and install HTSlib, 'cd' to the htslib-1.x directory containing +the package's source and type the following commands: + + ./configure + make + make install + +The './configure' command checks your build environment and allows various +optional functionality to be enabled (see Configuration below). If you +don't want to select any optional functionality, you may wish to omit +configure and just type 'make; make install' as for previous versions +of HTSlib. However if the build fails you should run './configure' as +it can diagnose the common reasons for build failures. + +The 'make' command builds the HTSlib library and and various useful +utilities: bgzip, htsfile, and tabix. If compilation fails you should +run './configure' as it can diagnose problems with your build environment +that cause build failures. + +The 'make install' command installs the libraries, library header files, +utilities, several manual pages, and a pkgconfig file to /usr/local. +The installation location can be changed by configuring with --prefix=DIR +or via 'make prefix=DIR install' (see Installation Locations below). + + +Configuration +============= + +By default, './configure' examines your build environment, checking for +requirements such as the zlib development files, and arranges for a plain +HTSlib build. The following configure options can be used to enable +various features and specify further optional external requirements: + +--with-irods[=DIR] + Specifies the location of the iRODS client library to use to enable + access to data objects stored in iRODS (<http://irods.org/>) via file + paths like 'irods:DATAOBJ'. DIR is the base of an iRODS source tree + such that the library is present as DIR/lib/core/obj/libRodsAPI.* and + headers are present under DIR/lib/api/include and so on. If '=DIR' is + omitted, $IRODS_HOME will be used as a base directory. + +The configure script also accepts the usual options and environment variables +for tuning installation locations and compilers: type './configure --help' +for details. For example, + + ./configure CC=icc --prefix=/opt/icc-compiled + +would specify that HTSlib is to be built with icc and installed into bin, +lib, etc subdirectories under /opt/icc-compiled. + + +Installation Locations +====================== + +By default, 'make install' installs HTSlib libraries under /usr/local/lib, +HTSlib header files under /usr/local/include, utility programs under +/usr/local/bin, etc. (To be precise, the header files are installed within +a fixed 'htslib' subdirectory under the specified .../include location.) + +You can specify a different location to install HTSlib by configuring +with --prefix=DIR or specify locations for particular parts of HTSlib by +configuring with --libdir=DIR and so on. Type './configure --help' for +the full list of such install directory options. + +Alternatively you can specify different locations at install time by +typing 'make prefix=DIR install' or 'make libdir=DIR install' and so on. +Consult the list of prefix/exec_prefix/etc variables near the top of the +Makefile for the full list of such variables that can be overridden. + +You can also specify a staging area by typing 'make DESTDIR=DIR install', +possibly in conjunction with other --prefix or prefix=DIR settings. +For example, + + make DESTDIR=/tmp/staging prefix=/opt + +would install into bin, lib, etc subdirectories under /tmp/staging/opt. |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/LICENSE Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,69 @@ +[Files in this distribution outwith the cram/ subdirectory are distributed +according to the terms of the following MIT/Expat license.] + +The MIT/Expat License + +Copyright (C) 2012-2014 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + + +[Files within the cram/ subdirectory in this distribution are distributed +according to the terms of the following Modified 3-Clause BSD license.] + +The Modified-BSD License + +Copyright (C) 2012-2014 Genome Research Ltd. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the names Genome Research Ltd and Wellcome Trust Sanger Institute + nor the names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR ITS CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +[The use of a range of years within a copyright notice in this distribution +should be interpreted as being equivalent to a list of years including the +first and last year specified and all consecutive years between them. + +For example, a copyright notice that reads "Copyright (C) 2005, 2007-2009, +2011-2012" should be interpreted as being identical to a notice that reads +"Copyright (C) 2005, 2007, 2008, 2009, 2011, 2012" and a copyright notice +that reads "Copyright (C) 2005-2012" should be interpreted as being identical +to a notice that reads "Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, +2011, 2012".] |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/Makefile Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,386 @@\n+# Makefile for htslib, a C library for high-throughput sequencing data formats.\n+#\n+# Copyright (C) 2013-2015 Genome Research Ltd.\n+#\n+# Author: John Marshall <jm18@sanger.ac.uk>\n+#\n+# Permission is hereby granted, free of charge, to any person obtaining a copy\n+# of this software and associated documentation files (the "Software"), to deal\n+# in the Software without restriction, including without limitation the rights\n+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+# copies of the Software, and to permit persons to whom the Software is\n+# furnished to do so, subject to the following conditions:\n+#\n+# The above copyright notice and this permission notice shall be included in\n+# all copies or substantial portions of the Software.\n+#\n+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+# DEALINGS IN THE SOFTWARE.\n+\n+CC = gcc\n+AR = ar\n+RANLIB = ranlib\n+\n+CPPFLAGS = -I.\n+# TODO: probably update cram code to make it compile cleanly with -Wc++-compat\n+CFLAGS = -g -Wall -O2 \n+EXTRA_CFLAGS_PIC = -fPIC\n+LDFLAGS =\n+LDLIBS =\n+\n+# For now these don\'t work too well as samtools also needs to know to\n+# add -lbz2 and -llzma if linking against the static libhts.a library.\n+# TODO This needs configury and adding to htslib.pc.in.\n+#\n+# # Bzip2 support; optionally used by CRAM.\n+# HAVE_LIBBZ2 := $(shell echo -e "\\#include <bzlib.h>\\012int main(void){return 0;}" > .test.c && $(CC) $(CFLAGS) $(CPPFLAGS) -o .test .test.c -lbz2 2>/dev/null && echo yes)\n+# ifeq "$(HAVE_LIBBZ2)" "yes"\n+# CPPFLAGS += -DHAVE_LIBBZ2\n+# LDLIBS += -lbz2\n+# endif\n+#\n+# # Lzma support; optionally used by CRAM.\n+# HAVE_LIBLZMA := $(shell echo -e "\\#include <lzma.h>\\012int main(void){return 0;}" > .test.c && $(CC) $(CFLAGS) $(CPPFLAGS) -o .test .test.c -llzma 2>/dev/null && echo yes)\n+# ifeq "$(HAVE_LIBLZMA)" "yes"\n+# CPPFLAGS += -DHAVE_LIBLZMA\n+# LDLIBS += -llzma\n+# endif\n+\n+prefix = /usr/local\n+exec_prefix = $(prefix)\n+bindir = $(exec_prefix)/bin\n+includedir = $(prefix)/include\n+libdir = $(exec_prefix)/lib\n+datarootdir = $(prefix)/share\n+mandir = $(datarootdir)/man\n+man1dir = $(mandir)/man1\n+man5dir = $(mandir)/man5\n+pkgconfigdir= $(libdir)/pkgconfig\n+\n+MKDIR_P = mkdir -p\n+INSTALL = install -p\n+INSTALL_PROGRAM = $(INSTALL)\n+INSTALL_DATA = $(INSTALL) -m 644\n+INSTALL_DIR = $(MKDIR_P) -m 755\n+\n+BUILT_PROGRAMS = \\\n+\tbgzip \\\n+\thtsfile \\\n+\ttabix\n+\n+BUILT_TEST_PROGRAMS = \\\n+\ttest/fieldarith \\\n+\ttest/hfile \\\n+\ttest/sam \\\n+\ttest/test-regidx \\\n+\ttest/test_view \\\n+\ttest/test-vcf-api \\\n+\ttest/test-vcf-sweep\n+\n+all: lib-static lib-shared $(BUILT_PROGRAMS) $(BUILT_TEST_PROGRAMS)\n+\n+HTSPREFIX =\n+include htslib_vars.mk\n+\n+lib-static: libhts.a\n+\n+# $(shell), :=, and ifeq/.../endif are GNU Make-specific. If you don\'t have\n+# GNU Make, comment out the parts of this conditional that don\'t apply.\n+PLATFORM := $(shell uname -s)\n+ifeq "$(PLATFORM)" "Darwin"\n+SHLIB_FLAVOUR = dylib\n+lib-shared: libhts.dylib\n+else\n+SHLIB_FLAVOUR = so\n+lib-shared: libhts.so\n+endif\n+\n+\n+PACKAGE_VERSION = 1.2.1\n+LIBHTS_SOVERSION = 1\n+\n+\n+# $(NUMERIC_VERSION) is for items that must have a numeric X.Y.Z string\n+# even if this is a dirty or untagged Git working tree.\n+NUMERIC_VERSION = $(PACKAGE_VERSION)\n+\n+# If building from a Git repository, replace $(PACKAGE_VERSION) with the Git\n+# description of the working tree: either a release tag with the same value\n+# as $(PACKAGE_VERSION) above, or an exact description likely based on a tag.\n+# Much of this is also GNU Make-specific. If you don\'t have GNU Make and/or\n+# are n'..b'LDFLAGS) -o $@ test/fieldarith.o libhts.a $(LDLIBS) -lz\n+\n+test/hfile: test/hfile.o libhts.a\n+\t$(CC) $(LDFLAGS) -o $@ test/hfile.o libhts.a $(LDLIBS) -lz\n+\n+test/sam: test/sam.o libhts.a\n+\t$(CC) -pthread $(LDFLAGS) -o $@ test/sam.o libhts.a $(LDLIBS) -lz\n+\n+test/test-regidx: test/test-regidx.o libhts.a\n+\t$(CC) -pthread $(LDFLAGS) -o $@ test/test-regidx.o libhts.a $(LDLIBS) -lz\n+\n+test/test_view: test/test_view.o libhts.a\n+\t$(CC) -pthread $(LDFLAGS) -o $@ test/test_view.o libhts.a $(LDLIBS) -lz\n+\n+test/test-vcf-api: test/test-vcf-api.o libhts.a\n+\t$(CC) -pthread $(LDFLAGS) -o $@ test/test-vcf-api.o libhts.a $(LDLIBS) -lz\n+\n+test/test-vcf-sweep: test/test-vcf-sweep.o libhts.a\n+\t$(CC) -pthread $(LDFLAGS) -o $@ test/test-vcf-sweep.o libhts.a $(LDLIBS) -lz\n+\n+test/fieldarith.o: test/fieldarith.c $(htslib_sam_h)\n+test/hfile.o: test/hfile.c $(htslib_hfile_h) $(htslib_hts_defs_h)\n+test/test-regidx.o: test/test-regidx.c $(htslib_regidx_h)\n+test/sam.o: test/sam.c $(htslib_sam_h) $(htslib_faidx_h) htslib/kstring.h\n+test/test_view.o: test/test_view.c $(cram_h) $(htslib_sam_h)\n+test/test-vcf-api.o: test/test-vcf-api.c $(htslib_hts_h) $(htslib_vcf_h) htslib/kstring.h\n+test/test-vcf-sweep.o: test/test-vcf-sweep.c $(htslib_vcf_sweep_h)\n+\n+\n+install: libhts.a $(BUILT_PROGRAMS) installdirs install-$(SHLIB_FLAVOUR) install-pkgconfig\n+\t$(INSTALL_PROGRAM) $(BUILT_PROGRAMS) $(DESTDIR)$(bindir)\n+\t$(INSTALL_DATA) htslib/*.h $(DESTDIR)$(includedir)/htslib\n+\t$(INSTALL_DATA) libhts.a $(DESTDIR)$(libdir)/libhts.a\n+\t$(INSTALL_DATA) htsfile.1 tabix.1 $(DESTDIR)$(man1dir)\n+\t$(INSTALL_DATA) faidx.5 sam.5 vcf.5 $(DESTDIR)$(man5dir)\n+\n+installdirs:\n+\t$(INSTALL_DIR) $(DESTDIR)$(bindir) $(DESTDIR)$(includedir) $(DESTDIR)$(includedir)/htslib $(DESTDIR)$(libdir) $(DESTDIR)$(man1dir) $(DESTDIR)$(man5dir) $(DESTDIR)$(pkgconfigdir)\n+\n+# After installation, the real file in $(libdir) will be libhts.so.X.Y.Z,\n+# with symlinks libhts.so (used via -lhts during linking of client programs)\n+# and libhts.so.NN (used by client executables at runtime).\n+\n+install-so: libhts.so installdirs\n+\t$(INSTALL_DATA) libhts.so $(DESTDIR)$(libdir)/libhts.so.$(PACKAGE_VERSION)\n+\tln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so\n+\tln -sf libhts.so.$(PACKAGE_VERSION) $(DESTDIR)$(libdir)/libhts.so.$(LIBHTS_SOVERSION)\n+\n+install-dylib: libhts.dylib installdirs\n+\t$(INSTALL_PROGRAM) libhts.dylib $(DESTDIR)$(libdir)/libhts.$(PACKAGE_VERSION).dylib\n+\tln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.dylib\n+\tln -sf libhts.$(PACKAGE_VERSION).dylib $(DESTDIR)$(libdir)/libhts.$(LIBHTS_SOVERSION).dylib\n+\n+# Substitute these pseudo-autoconf variables only at install time\n+# so that "make install prefix=/prefix/path" etc continue to work.\n+install-pkgconfig: installdirs\n+\tsed -e \'s#@includedir@#$(includedir)#g;s#@libdir@#$(libdir)#g;s#@PACKAGE_VERSION@#$(PACKAGE_VERSION)#g\' htslib.pc.in > $(DESTDIR)$(pkgconfigdir)/htslib.pc\n+\tchmod 644 $(DESTDIR)$(pkgconfigdir)/htslib.pc\n+\n+# A pkg-config file (suitable for copying to $PKG_CONFIG_PATH) that provides\n+# flags for building against the uninstalled library in this build directory.\n+htslib-uninstalled.pc: htslib.pc.in\n+\tsed -e \'s#@includedir@#\'`pwd`\'#g;s#@libdir@#\'`pwd`\'#g\' htslib.pc.in > $@\n+\n+\n+testclean:\n+\t-rm -f test/*.tmp test/*.tmp.*\n+\n+mostlyclean: testclean\n+\t-rm -f *.o *.pico cram/*.o cram/*.pico test/*.o test/*.dSYM version.h\n+\n+clean: mostlyclean clean-$(SHLIB_FLAVOUR)\n+\t-rm -f libhts.a $(BUILT_PROGRAMS) $(BUILT_TEST_PROGRAMS)\n+\n+distclean: clean\n+\t-rm -f config.cache config.log config.mk config.status\n+\t-rm -f TAGS *-uninstalled.pc\n+\n+clean-so:\n+\t-rm -f libhts.so libhts.so.*\n+\n+clean-dylib:\n+\t-rm -f libhts.dylib libhts.*.dylib\n+\n+\n+tags:\n+\tctags -f TAGS *.[ch] cram/*.[ch] htslib/*.h\n+\n+\n+force:\n+\n+\n+.PHONY: all check clean distclean force install install-pkgconfig installdirs\n+.PHONY: lib-shared lib-static mostlyclean print-version tags test testclean\n+.PHONY: clean-so install-so\n+.PHONY: clean-dylib install-dylib\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/NEWS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/NEWS Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,50 @@ +Noteworthy changes in release 1.2.1 (3 February 2015) + +* Reinstated hts_file_type() and FT_* macros, which were available until 1.1 + but briefly removed in 1.2. This function is deprecated and will be removed + in a future release -- you should use hts_detect_format() etc instead + + +Noteworthy changes in release 1.2 (2 February 2015) + +* HTSlib now has a configure script which checks your build environment + and allows for selection of optional extras. See INSTALL for details + +* By default, reference sequences are fetched from the EBI CRAM Reference + Registry and cached in your $HOME cache directory. This behaviour can + be controlled by setting REF_PATH and REF_CACHE enviroment variables + (see the samtools(1) man page for details) + +* Numerous CRAM improvements: + - Support for CRAM v3.0, an upcoming revision to CRAM supporting + better compression and per-container checksums + - EOF checking for v2.1 and v3.0 (similar to checking BAM EOF blocks) + - Non-standard values for PNEXT and TLEN fields are now preserved + - hts_set_fai_filename() now provides a reference file when encoding + - Generated read names are now numbered from 1, rather than being + labelled 'slice:record-in-slice' + - Multi-threading and speed improvements + +* New htsfile command for identifying file formats, and corresponding + file format detection APIs + +* New tabix --regions FILE, --targets FILE options for filtering via BED files + +* Optional iRODS file access, disabled by default. Configure with --with-irods + to enable accessing iRODS data objects directly via 'irods:DATAOBJ' + +* All occurences of 2^29 in the source have been eliminated, so indexing + and querying against reference sequences larger than 512Mbp works (when + using CSI indices) + +* Support for plain GZIP compression in various places + +* VCF header editing speed improvements + +* Added seq_nt16_int[] (equivalent to the samtools API's bam_nt16_nt4_table) + +* Reinstated faidx_fetch_nseq(), which was accidentally removed from 1.1. + Now faidx_fetch_nseq() and faidx_nseq() are equivalent; eventually + faidx_fetch_nseq() will be deprecated and removed [#156] + +* Fixed bugs #141, #152, #155, #158, #159, and various memory leaks |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/README --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/README Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +HTSlib is an implementation of a unified C library for accessing common file +formats, such as SAM, CRAM, VCF, and BCF, used for high-throughput sequencing +data. It is the core library used by samtools and bcftools. + +See INSTALL for building and installation instructions. |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/bgzf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/bgzf.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1124 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology\n+ 2011, 2012 Attractive Chaos <attractor@live.co.uk>\n+ Copyright (C) 2009, 2013, 2014 Genome Research Ltd\n+\n+ Permission is hereby granted, free of charge, to any person obtaining a copy\n+ of this software and associated documentation files (the "Software"), to deal\n+ in the Software without restriction, including without limitation the rights\n+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ copies of the Software, and to permit persons to whom the Software is\n+ furnished to do so, subject to the following conditions:\n+\n+ The above copyright notice and this permission notice shall be included in\n+ all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n+ THE SOFTWARE.\n+*/\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <errno.h>\n+#include <unistd.h>\n+#include <assert.h>\n+#include <pthread.h>\n+#include <sys/types.h>\n+#include <inttypes.h>\n+\n+#include "htslib/hts.h"\n+#include "htslib/bgzf.h"\n+#include "htslib/hfile.h"\n+\n+#define BGZF_CACHE\n+#define BGZF_MT\n+\n+#define BLOCK_HEADER_LENGTH 18\n+#define BLOCK_FOOTER_LENGTH 8\n+\n+\n+/* BGZF/GZIP header (speciallized from RFC 1952; little endian):\n+ +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n+ | 31|139| 8| 4| 0| 0|255| 6| 66| 67| 2|BLK_LEN|\n+ +---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+---+\n+ BGZF extension:\n+ ^ ^ ^ ^\n+ | | | |\n+ FLG.EXTRA XLEN B C\n+\n+ BGZF format is compatible with GZIP. It limits the size of each compressed\n+ block to 2^16 bytes and adds and an extra "BC" field in the gzip header which\n+ records the size.\n+\n+*/\n+static const uint8_t g_magic[19] = "\\037\\213\\010\\4\\0\\0\\0\\0\\0\\377\\6\\0\\102\\103\\2\\0\\0\\0";\n+\n+#ifdef BGZF_CACHE\n+typedef struct {\n+ int size;\n+ uint8_t *block;\n+ int64_t end_offset;\n+} cache_t;\n+#include "htslib/khash.h"\n+KHASH_MAP_INIT_INT64(cache, cache_t)\n+#endif\n+\n+typedef struct\n+{\n+ uint64_t uaddr; // offset w.r.t. uncompressed data\n+ uint64_t caddr; // offset w.r.t. compressed data\n+}\n+bgzidx1_t;\n+\n+struct __bgzidx_t\n+{\n+ int noffs, moffs; // the size of the index, n:used, m:allocated\n+ bgzidx1_t *offs; // offsets\n+ uint64_t ublock_addr; // offset of the current block (uncompressed data)\n+};\n+\n+void bgzf_index_destroy(BGZF *fp);\n+int bgzf_index_add_block(BGZF *fp);\n+\n+static inline void packInt16(uint8_t *buffer, uint16_t value)\n+{\n+ buffer[0] = value;\n+ buffer[1] = value >> 8;\n+}\n+\n+static inline int unpackInt16(const uint8_t *buffer)\n+{\n+ return buffer[0] | buffer[1] << 8;\n+}\n+\n+static inline void packInt32(uint8_t *buffer, uint32_t value)\n+{\n+ buffer[0] = value;\n+ buffer[1] = value >> 8;\n+ buffer[2] = value >> 16;\n+ buffer[3] = value >> 24;\n+}\n+\n+static BGZF *bgzf_read_init(hFILE *hfpr)\n+{\n+ BGZF *fp;\n+ uint8_t magic[18];\n+ ssize_t n = hpeek(hfpr, magic, 18);\n+ if (n < 0) return NULL;\n+\n+ fp = (BGZF*)calloc(1, sizeof(BGZF));\n+ if (fp == NULL) return NULL;\n+\n+ fp->is_write = 0;\n+ fp->is_compressed = (n==2 && magic[0]==0x1f && magic[1]==0x8b);\n+ fp->uncompressed_block = malloc(BGZF_MAX_BLOCK_SIZE);\n+ fp->compressed_block = malloc(BGZF_MAX_BLOCK_SIZE);\n+ fp->is_compressed = (n==18 &'..b'ord is not present when opened for writing.\n+ // This is not a bug.\n+\n+ int i;\n+ if ( fp->is_be )\n+ {\n+ uint64_t x = fp->idx->noffs - 1;\n+ fwrite(ed_swap_8p(&x), 1, sizeof(x), idx);\n+ for (i=1; i<fp->idx->noffs; i++)\n+ {\n+ x = fp->idx->offs[i].caddr; fwrite(ed_swap_8p(&x), 1, sizeof(x), idx);\n+ x = fp->idx->offs[i].uaddr; fwrite(ed_swap_8p(&x), 1, sizeof(x), idx);\n+ }\n+ }\n+ else\n+ {\n+ uint64_t x = fp->idx->noffs - 1;\n+ fwrite(&x, 1, sizeof(x), idx);\n+ for (i=1; i<fp->idx->noffs; i++)\n+ {\n+ fwrite(&fp->idx->offs[i].caddr, 1, sizeof(fp->idx->offs[i].caddr), idx);\n+ fwrite(&fp->idx->offs[i].uaddr, 1, sizeof(fp->idx->offs[i].uaddr), idx);\n+ }\n+ }\n+ fclose(idx);\n+ return 0;\n+}\n+\n+\n+int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix)\n+{\n+ char *tmp = NULL;\n+ if ( suffix )\n+ {\n+ int blen = strlen(bname);\n+ int slen = strlen(suffix);\n+ tmp = (char*) malloc(blen + slen + 1);\n+ if ( !tmp ) return -1;\n+ memcpy(tmp,bname,blen);\n+ memcpy(tmp+blen,suffix,slen+1);\n+ }\n+\n+ FILE *idx = fopen(tmp?tmp:bname,"rb");\n+ if ( tmp ) free(tmp);\n+ if ( !idx ) return -1;\n+\n+ fp->idx = (bgzidx_t*) calloc(1,sizeof(bgzidx_t));\n+ uint64_t x;\n+ if ( fread(&x, 1, sizeof(x), idx) != sizeof(x) ) return -1;\n+\n+ fp->idx->noffs = fp->idx->moffs = 1 + (fp->is_be ? ed_swap_8(x) : x);\n+ fp->idx->offs = (bgzidx1_t*) malloc(fp->idx->moffs*sizeof(bgzidx1_t));\n+ fp->idx->offs[0].caddr = fp->idx->offs[0].uaddr = 0;\n+\n+ int i;\n+ if ( fp->is_be )\n+ {\n+ int ret = 0;\n+ for (i=1; i<fp->idx->noffs; i++)\n+ {\n+ ret += fread(&x, 1, sizeof(x), idx); fp->idx->offs[i].caddr = ed_swap_8(x);\n+ ret += fread(&x, 1, sizeof(x), idx); fp->idx->offs[i].uaddr = ed_swap_8(x);\n+ }\n+ if ( ret != sizeof(x)*2*(fp->idx->noffs-1) ) return -1;\n+ }\n+ else\n+ {\n+ int ret = 0;\n+ for (i=1; i<fp->idx->noffs; i++)\n+ {\n+ ret += fread(&x, 1, sizeof(x), idx); fp->idx->offs[i].caddr = x;\n+ ret += fread(&x, 1, sizeof(x), idx); fp->idx->offs[i].uaddr = x;\n+ }\n+ if ( ret != sizeof(x)*2*(fp->idx->noffs-1) ) return -1;\n+ }\n+ fclose(idx);\n+ return 0;\n+\n+}\n+\n+int bgzf_useek(BGZF *fp, long uoffset, int where)\n+{\n+ if ( !fp->is_compressed )\n+ {\n+ if (hseek(fp->fp, uoffset, SEEK_SET) < 0)\n+ {\n+ fp->errcode |= BGZF_ERR_IO;\n+ return -1;\n+ }\n+ fp->block_length = 0; // indicates current block has not been loaded\n+ fp->block_address = uoffset;\n+ fp->block_offset = 0;\n+ bgzf_read_block(fp);\n+ fp->uncompressed_address = uoffset;\n+ return 0;\n+ }\n+\n+ if ( !fp->idx )\n+ {\n+ fp->errcode |= BGZF_ERR_IO;\n+ return -1;\n+ }\n+\n+ // binary search\n+ int ilo = 0, ihi = fp->idx->noffs - 1;\n+ while ( ilo<=ihi )\n+ {\n+ int i = (ilo+ihi)*0.5;\n+ if ( uoffset < fp->idx->offs[i].uaddr ) ihi = i - 1;\n+ else if ( uoffset >= fp->idx->offs[i].uaddr ) ilo = i + 1;\n+ else break;\n+ }\n+ int i = ilo-1;\n+ if (hseek(fp->fp, fp->idx->offs[i].caddr, SEEK_SET) < 0)\n+ {\n+ fp->errcode |= BGZF_ERR_IO;\n+ return -1;\n+ }\n+ fp->block_length = 0; // indicates current block has not been loaded\n+ fp->block_address = fp->idx->offs[i].caddr;\n+ fp->block_offset = 0;\n+ if ( bgzf_read_block(fp) < 0 ) return -1;\n+ if ( uoffset - fp->idx->offs[i].uaddr > 0 )\n+ {\n+ fp->block_offset = uoffset - fp->idx->offs[i].uaddr;\n+ assert( fp->block_offset <= fp->block_length ); // todo: skipped, unindexed, blocks\n+ }\n+ fp->uncompressed_address = uoffset;\n+ return 0;\n+}\n+\n+long bgzf_utell(BGZF *fp)\n+{\n+ return fp->uncompressed_address; // currently maintained only when reading\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/bgzip.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/bgzip.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,284 @@\n+/* bgzip.c -- Block compression/decompression utility.\n+\n+ Copyright (C) 2008, 2009 Broad Institute / Massachusetts Institute of Technology\n+ Copyright (C) 2010, 2013, 2014 Genome Research Ltd.\n+\n+ Permission is hereby granted, free of charge, to any person obtaining a copy\n+ of this software and associated documentation files (the "Software"), to deal\n+ in the Software without restriction, including without limitation the rights\n+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ copies of the Software, and to permit persons to whom the Software is\n+ furnished to do so, subject to the following conditions:\n+\n+ The above copyright notices and this permission notice shall be included in\n+ all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n+ THE SOFTWARE.\n+*/\n+\n+#include <stdlib.h>\n+#include <string.h>\n+#include <stdio.h>\n+#include <fcntl.h>\n+#include <unistd.h>\n+#include <errno.h>\n+#include <stdarg.h>\n+#include <getopt.h>\n+#include <sys/select.h>\n+#include <sys/stat.h>\n+#include "htslib/bgzf.h"\n+#include "htslib/hts.h"\n+\n+static const int WINDOW_SIZE = 64 * 1024;\n+\n+static void error(const char *format, ...)\n+{\n+ va_list ap;\n+ va_start(ap, format);\n+ vfprintf(stderr, format, ap);\n+ va_end(ap);\n+ exit(EXIT_FAILURE);\n+}\n+\n+static int write_open(const char *fn, int is_forced)\n+{\n+ int fd = -1;\n+ char c;\n+ if (!is_forced) {\n+ if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) {\n+ fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn);\n+ if ( scanf("%c", &c) != 1 ) c = \'n\';\n+ if (c != \'Y\' && c != \'y\') {\n+ fprintf(stderr, "[bgzip] not overwritten\\n");\n+ exit(EXIT_FAILURE);\n+ }\n+ }\n+ }\n+ if (fd < 0) {\n+ if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) {\n+ fprintf(stderr, "[bgzip] %s: Fail to write\\n", fn);\n+ exit(EXIT_FAILURE);\n+ }\n+ }\n+ return fd;\n+}\n+\n+static int bgzip_main_usage(void)\n+{\n+ fprintf(stderr, "\\n");\n+ fprintf(stderr, "Version: %s\\n", hts_version());\n+ fprintf(stderr, "Usage: bgzip [OPTIONS] [FILE] ...\\n");\n+ fprintf(stderr, "Options:\\n");\n+ fprintf(stderr, " -b, --offset INT decompress at virtual file pointer (0-based uncompressed offset)\\n");\n+ fprintf(stderr, " -c, --stdout write on standard output, keep original files unchanged\\n");\n+ fprintf(stderr, " -d, --decompress decompress\\n");\n+ fprintf(stderr, " -f, --force overwrite files without asking\\n");\n+ fprintf(stderr, " -h, --help give this help\\n");\n+ fprintf(stderr, " -i, --index compress and create BGZF index\\n");\n+ fprintf(stderr, " -I, --index-name FILE name of BGZF index file [file.gz.gzi]\\n");\n+ fprintf(stderr, " -r, --reindex (re)index compressed file\\n");\n+ fprintf(stderr, " -s, --size INT decompress INT bytes (uncompressed size)\\n");\n+ fprintf(stderr, "\\n");\n+ return 1;\n+}\n+\n+int main(int argc, char **argv)\n+{\n+ int c, compress, pstdout, is_forced, index = 0, reindex = 0;\n+ BGZF *fp;\n+ void *buffer;\n+ long start, end, size;\n+ char *index_fname = NULL;\n+\n+ static struct option loptions[] =\n+ {\n+ {"help",0,0,\'h\'},\n+ {"offset",1,0,\'b\'},\n+ {"stdout",0,0,\'c\'},\n+ {"decompress",0,0,\'d\'},\n+ {"fo'..b' // f_dst will be closed here\n+ if ( index )\n+ {\n+ if ( index_fname ) bgzf_index_dump(fp, index_fname, NULL);\n+ else bgzf_index_dump(fp, argv[optind], ".gz.gzi");\n+ }\n+ if (bgzf_close(fp) < 0) error("Close failed: Error %d", fp->errcode);\n+ if (argc > optind && !pstdout) unlink(argv[optind]);\n+ free(buffer);\n+ close(f_src);\n+ return 0;\n+ }\n+ else if ( reindex )\n+ {\n+ if ( argc>optind )\n+ {\n+ fp = bgzf_open(argv[optind], "r");\n+ if ( !fp ) error("[bgzip] Could not open file: %s\\n", argv[optind]);\n+ }\n+ else\n+ {\n+ if ( !index_fname ) error("[bgzip] Index file name expected when reading from stdin\\n");\n+ fp = bgzf_fdopen(fileno(stdin), "r");\n+ if ( !fp ) error("[bgzip] Could not read from stdin: %s\\n", strerror(errno));\n+ }\n+\n+ buffer = malloc(BGZF_BLOCK_SIZE);\n+ bgzf_index_build_init(fp);\n+ int ret;\n+ while ( (ret=bgzf_read(fp, buffer, BGZF_BLOCK_SIZE))>0 ) ;\n+ free(buffer);\n+ if ( ret<0 ) error("Is the file gzipped or bgzipped? The latter is required for indexing.\\n");\n+\n+ if ( index_fname )\n+ bgzf_index_dump(fp, index_fname, NULL);\n+ else\n+ bgzf_index_dump(fp, argv[optind], ".gzi");\n+\n+ if ( bgzf_close(fp)<0 ) error("Close failed: Error %d\\n",fp->errcode);\n+ return 0;\n+ }\n+ else\n+ {\n+ struct stat sbuf;\n+ int f_dst;\n+\n+ if ( argc>optind )\n+ {\n+ if ( stat(argv[optind],&sbuf)<0 )\n+ {\n+ fprintf(stderr, "[bgzip] %s: %s\\n", strerror(errno), argv[optind]);\n+ return 1;\n+ }\n+ char *name;\n+ int len = strlen(argv[optind]);\n+ if ( strcmp(argv[optind]+len-3,".gz") )\n+ {\n+ fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\\n", argv[optind]);\n+ return 1;\n+ }\n+ fp = bgzf_open(argv[optind], "r");\n+ if (fp == NULL) {\n+ fprintf(stderr, "[bgzip] Could not open file: %s\\n", argv[optind]);\n+ return 1;\n+ }\n+\n+ if (pstdout) {\n+ f_dst = fileno(stdout);\n+ }\n+ else {\n+ name = strdup(argv[optind]);\n+ name[strlen(name) - 3] = \'\\0\';\n+ f_dst = write_open(name, is_forced);\n+ free(name);\n+ }\n+ }\n+ else if (!pstdout && isatty(fileno((FILE *)stdin)) )\n+ return bgzip_main_usage();\n+ else\n+ {\n+ f_dst = fileno(stdout);\n+ fp = bgzf_fdopen(fileno(stdin), "r");\n+ if (fp == NULL) {\n+ fprintf(stderr, "[bgzip] Could not read from stdin: %s\\n", strerror(errno));\n+ return 1;\n+ }\n+ }\n+ buffer = malloc(WINDOW_SIZE);\n+ if ( start>0 )\n+ {\n+ if ( bgzf_index_load(fp, argv[optind], ".gzi") < 0 ) error("Could not load index: %s.gzi\\n", argv[optind]);\n+ if ( bgzf_useek(fp, start, SEEK_SET) < 0 ) error("Could not seek to %d-th (uncompressd) byte\\n", start);\n+ }\n+ while (1) {\n+ if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE);\n+ else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start));\n+ if (c == 0) break;\n+ if (c < 0) error("Could not read %d bytes: Error %d\\n", (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start), fp->errcode);\n+ start += c;\n+ if ( write(f_dst, buffer, c) != c ) error("Could not write %d bytes\\n", c);\n+ if (end >= 0 && start >= end) break;\n+ }\n+ free(buffer);\n+ if (bgzf_close(fp) < 0) error("Close failed: Error %d\\n",fp->errcode);\n+ if (!pstdout) unlink(argv[optind]);\n+ return 0;\n+ }\n+ return 0;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/config.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/config.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,3 @@ +#define _USE_KNETFILE +#define BGZF_CACHE +#define BGZF_MT |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/config.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/config.mk Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,72 @@ +# Optional configure Makefile overrides for htslib. +# +# Copyright (C) 2015 Genome Research Ltd. +# +# Author: John Marshall <jm18@sanger.ac.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# This is config.mk. Generated from config.mk.in by configure. +# +# If you use configure, this file overrides variables and augments rules +# in the Makefile to reflect your configuration choices. If you don't run +# configure, the main Makefile contains suitable conservative defaults. + +prefix = /usr/local +exec_prefix = ${prefix} +bindir = ${exec_prefix}/bin +includedir = ${prefix}/include +libdir = ${exec_prefix}/lib +datarootdir = ${prefix}/share +mandir = ${datarootdir}/man + +CC = gcc +RANLIB = ranlib + +CFLAGS = -g -O2 +LDFLAGS = +LDLIBS = + + +# ifeq/.../endif, +=, and target-specific variables are GNU Make-specific. +# If you don't have GNU Make, comment out this conditional and note that +# to enable iRODS you will need to implement the following elsewhere. +ifeq "iRODS-disabled" "iRODS-enabled" + +IRODS_HOME ?= /disabled + +EXTRA_CPPFLAGS_IRODS = \ + -I$(IRODS_HOME)/lib/api/include \ + -I$(IRODS_HOME)/lib/core/include \ + -I$(IRODS_HOME)/lib/md5/include \ + -I$(IRODS_HOME)/lib/sha1/include \ + -I$(IRODS_HOME)/server/core/include \ + -I$(IRODS_HOME)/server/drivers/include \ + -I$(IRODS_HOME)/server/icat/include + +LDFLAGS += -L$(IRODS_HOME)/lib/core/obj +LDLIBS += -lRodsAPIs -lgssapi_krb5 + +LIBHTS_OBJS += hfile_irods.o + +hfile.o hfile.pico: CPPFLAGS += -DHAVE_IRODS + +hfile_irods.o hfile_irods.pico: CPPFLAGS += $(EXTRA_CPPFLAGS_IRODS) + +endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/config.mk.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/config.mk.in Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,72 @@ +# Optional configure Makefile overrides for htslib. +# +# Copyright (C) 2015 Genome Research Ltd. +# +# Author: John Marshall <jm18@sanger.ac.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# This is @configure_input@ +# +# If you use configure, this file overrides variables and augments rules +# in the Makefile to reflect your configuration choices. If you don't run +# configure, the main Makefile contains suitable conservative defaults. + +prefix = @prefix@ +exec_prefix = @exec_prefix@ +bindir = @bindir@ +includedir = @includedir@ +libdir = @libdir@ +datarootdir = @datarootdir@ +mandir = @mandir@ + +CC = @CC@ +RANLIB = @RANLIB@ + +CFLAGS = @CFLAGS@ +LDFLAGS = @LDFLAGS@ +LDLIBS = @LIBS@ + + +# ifeq/.../endif, +=, and target-specific variables are GNU Make-specific. +# If you don't have GNU Make, comment out this conditional and note that +# to enable iRODS you will need to implement the following elsewhere. +ifeq "iRODS-@irods@" "iRODS-enabled" + +@define_IRODS_HOME@ + +EXTRA_CPPFLAGS_IRODS = \ + -I$(IRODS_HOME)/lib/api/include \ + -I$(IRODS_HOME)/lib/core/include \ + -I$(IRODS_HOME)/lib/md5/include \ + -I$(IRODS_HOME)/lib/sha1/include \ + -I$(IRODS_HOME)/server/core/include \ + -I$(IRODS_HOME)/server/drivers/include \ + -I$(IRODS_HOME)/server/icat/include + +LDFLAGS += -L$(IRODS_HOME)/lib/core/obj +LDLIBS += -lRodsAPIs -lgssapi_krb5 + +LIBHTS_OBJS += hfile_irods.o + +hfile.o hfile.pico: CPPFLAGS += -DHAVE_IRODS + +hfile_irods.o hfile_irods.pico: CPPFLAGS += $(EXTRA_CPPFLAGS_IRODS) + +endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/configure --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/configure Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,4012 @@\n+#! /bin/sh\n+# Guess values for system-dependent variables and create Makefiles.\n+# Generated by GNU Autoconf 2.68 for HTSlib 1.2.1.\n+#\n+# Report bugs to <samtools-help@lists.sourceforge.net>.\n+#\n+#\n+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,\n+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software\n+# Foundation, Inc.\n+#\n+#\n+# This configure script is free software; the Free Software Foundation\n+# gives unlimited permission to copy, distribute and modify it.\n+#\n+# Portions copyright (C) 2015 Genome Research Ltd.\n+#\n+# This configure script is free software: you are free to change and\n+# redistribute it. There is NO WARRANTY, to the extent permitted by law.\n+## -------------------- ##\n+## M4sh Initialization. ##\n+## -------------------- ##\n+\n+# Be more Bourne compatible\n+DUALCASE=1; export DUALCASE # for MKS sh\n+if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then :\n+ emulate sh\n+ NULLCMD=:\n+ # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which\n+ # is contrary to our usage. Disable this feature.\n+ alias -g \'${1+"$@"}\'=\'"$@"\'\n+ setopt NO_GLOB_SUBST\n+else\n+ case `(set -o) 2>/dev/null` in #(\n+ *posix*) :\n+ set -o posix ;; #(\n+ *) :\n+ ;;\n+esac\n+fi\n+\n+\n+as_nl=\'\n+\'\n+export as_nl\n+# Printing a long string crashes Solaris 7 /usr/bin/printf.\n+as_echo=\'\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\'\n+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo\n+as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo\n+# Prefer a ksh shell builtin over an external printf program on Solaris,\n+# but without wasting forks for bash or zsh.\n+if test -z "$BASH_VERSION$ZSH_VERSION" \\\n+ && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then\n+ as_echo=\'print -r --\'\n+ as_echo_n=\'print -rn --\'\n+elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then\n+ as_echo=\'printf %s\\n\'\n+ as_echo_n=\'printf %s\'\n+else\n+ if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then\n+ as_echo_body=\'eval /usr/ucb/echo -n "$1$as_nl"\'\n+ as_echo_n=\'/usr/ucb/echo -n\'\n+ else\n+ as_echo_body=\'eval expr "X$1" : "X\\\\(.*\\\\)"\'\n+ as_echo_n_body=\'eval\n+ arg=$1;\n+ case $arg in #(\n+ *"$as_nl"*)\n+\texpr "X$arg" : "X\\\\(.*\\\\)$as_nl";\n+\targ=`expr "X$arg" : ".*$as_nl\\\\(.*\\\\)"`;;\n+ esac;\n+ expr "X$arg" : "X\\\\(.*\\\\)" | tr -d "$as_nl"\n+ \'\n+ export as_echo_n_body\n+ as_echo_n=\'sh -c $as_echo_n_body as_echo\'\n+ fi\n+ export as_echo_body\n+ as_echo=\'sh -c $as_echo_body as_echo\'\n+fi\n+\n+# The user is always right.\n+if test "${PATH_SEPARATOR+set}" != set; then\n+ PATH_SEPARATOR=:\n+ (PATH=\'/bin;/bin\'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && {\n+ (PATH=\'/bin:/bin\'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 ||\n+ PATH_SEPARATOR=\';\'\n+ }\n+fi\n+\n+\n+# IFS\n+# We need space, tab and new line, in precisely that order. Quoting is\n+# there to prevent editors from complaining about space-tab.\n+# (If _AS_PATH_WALK were called with IFS unset, it would disable word\n+# splitting by setting IFS to empty value.)\n+IFS=" ""\t$as_nl"\n+\n+# Find who we are. Look in the path if we contain no directory separator.\n+as_myself=\n+case $0 in #((\n+ *[\\\\/]* ) as_myself=$0 ;;\n+ *) as_save_IFS=$IFS; IFS=$PATH_SEPARATOR\n+for as_dir in $PATH\n+do\n+ IFS=$as_save_IFS\n+ test -z "$as_dir" && as_dir=.\n+ test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break\n+ done\n+IFS=$as_save_IFS\n+\n+ ;;\n+esac\n+# We did not find ourselves, most probably we were run as `sh COMMAND\'\n+# in which case we are not to be found in the path.\n+if test "x$as_myself" = x; then\n+ as_myself=$0\n+fi\n+if test ! -f "$as_myself"; then\n+ $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2\n+ exit 1\n+fi\n+\n+# Unset variables that we do not need and which cause bugs (e.g. in\n+# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1"\n+# suppresses any "Segmentation fau'..b'uld be removed a few years after 2.60.\n+ac_datarootdir_hack=; ac_datarootdir_seen=\n+ac_sed_dataroot=\'\n+/datarootdir/ {\n+ p\n+ q\n+}\n+/@datadir@/p\n+/@docdir@/p\n+/@infodir@/p\n+/@localedir@/p\n+/@mandir@/p\'\n+case `eval "sed -n \\"\\$ac_sed_dataroot\\" $ac_file_inputs"` in\n+*datarootdir*) ac_datarootdir_seen=yes;;\n+*@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*)\n+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5\n+$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;}\n+_ACEOF\n+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\n+ ac_datarootdir_hack=\'\n+ s&@datadir@&$datadir&g\n+ s&@docdir@&$docdir&g\n+ s&@infodir@&$infodir&g\n+ s&@localedir@&$localedir&g\n+ s&@mandir@&$mandir&g\n+ s&\\\\\\${datarootdir}&$datarootdir&g\' ;;\n+esac\n+_ACEOF\n+\n+# Neutralize VPATH when `$srcdir\' = `.\'.\n+# Shell code in configure.ac might set extrasub.\n+# FIXME: do we really want to maintain this feature?\n+cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1\n+ac_sed_extra="$ac_vpsub\n+$extrasub\n+_ACEOF\n+cat >>$CONFIG_STATUS <<\\_ACEOF || ac_write_fail=1\n+:t\n+/@[a-zA-Z_][a-zA-Z_0-9]*@/!b\n+s|@configure_input@|$ac_sed_conf_input|;t t\n+s&@top_builddir@&$ac_top_builddir_sub&;t t\n+s&@top_build_prefix@&$ac_top_build_prefix&;t t\n+s&@srcdir@&$ac_srcdir&;t t\n+s&@abs_srcdir@&$ac_abs_srcdir&;t t\n+s&@top_srcdir@&$ac_top_srcdir&;t t\n+s&@abs_top_srcdir@&$ac_abs_top_srcdir&;t t\n+s&@builddir@&$ac_builddir&;t t\n+s&@abs_builddir@&$ac_abs_builddir&;t t\n+s&@abs_top_builddir@&$ac_abs_top_builddir&;t t\n+$ac_datarootdir_hack\n+"\n+eval sed \\"\\$ac_sed_extra\\" "$ac_file_inputs" | $AWK -f "$ac_tmp/subs.awk" \\\n+ >$ac_tmp/out || as_fn_error $? "could not create $ac_file" "$LINENO" 5\n+\n+test -z "$ac_datarootdir_hack$ac_datarootdir_seen" &&\n+ { ac_out=`sed -n \'/\\${datarootdir}/p\' "$ac_tmp/out"`; test -n "$ac_out"; } &&\n+ { ac_out=`sed -n \'/^[\t ]*datarootdir[\t ]*:*=/p\' \\\n+ "$ac_tmp/out"`; test -z "$ac_out"; } &&\n+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \\`datarootdir\'\n+which seems to be undefined. Please make sure it is defined" >&5\n+$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \\`datarootdir\'\n+which seems to be undefined. Please make sure it is defined" >&2;}\n+\n+ rm -f "$ac_tmp/stdin"\n+ case $ac_file in\n+ -) cat "$ac_tmp/out" && rm -f "$ac_tmp/out";;\n+ *) rm -f "$ac_file" && mv "$ac_tmp/out" "$ac_file";;\n+ esac \\\n+ || as_fn_error $? "could not create $ac_file" "$LINENO" 5\n+ ;;\n+\n+\n+\n+ esac\n+\n+done # for ac_tag\n+\n+\n+as_fn_exit 0\n+_ACEOF\n+ac_clean_files=$ac_clean_files_save\n+\n+test $ac_write_fail = 0 ||\n+ as_fn_error $? "write failure creating $CONFIG_STATUS" "$LINENO" 5\n+\n+\n+# configure is writing to config.log, and then calls config.status.\n+# config.status does its own redirection, appending to config.log.\n+# Unfortunately, on DOS this fails, as config.log is still kept open\n+# by configure, so config.status won\'t be able to write to it; its\n+# output is simply discarded. So we exec the FD to /dev/null,\n+# effectively closing config.log, so it can be properly (re)opened and\n+# appended to by config.status. When coming back to configure, we\n+# need to make the FD available again.\n+if test "$no_create" != yes; then\n+ ac_cs_success=:\n+ ac_config_status_args=\n+ test "$silent" = yes &&\n+ ac_config_status_args="$ac_config_status_args --quiet"\n+ exec 5>/dev/null\n+ $SHELL $CONFIG_STATUS $ac_config_status_args || ac_cs_success=false\n+ exec 5>>config.log\n+ # Use ||, not &&, to avoid exiting from the if with $? = 1, which\n+ # would make configure fail if this is the last instruction.\n+ $ac_cs_success || as_fn_exit 1\n+fi\n+if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then\n+ { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5\n+$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;}\n+fi\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/configure.ac --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/configure.ac Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,93 @@ +# Configure script for htslib, a C library for high-throughput sequencing data. +# +# Copyright (C) 2015 Genome Research Ltd. +# +# Author: John Marshall <jm18@sanger.ac.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +dnl Process this file with autoconf to produce a configure script +AC_INIT([HTSlib], m4_esyscmd_s([make print-version]), + [samtools-help@lists.sourceforge.net], [], [http://www.htslib.org/]) +AC_PREREQ(2.63) dnl This version introduced 4-argument AC_CHECK_HEADER +AC_CONFIG_SRCDIR(hts.c) + +dnl Copyright notice to be copied into the generated configure script +AC_COPYRIGHT([Portions copyright (C) 2015 Genome Research Ltd. + +This configure script is free software: you are free to change and +redistribute it. There is NO WARRANTY, to the extent permitted by law.]) + +AC_PROG_CC +AC_PROG_RANLIB + +AC_ARG_WITH([irods], + [AS_HELP_STRING([[--with-irods[=DIR]]], + [use RodsAPIs library (in DIR) to support iRODS URLs])], + [case $withval in + no) irods=disabled ;; + yes) irods=enabled ;; + *) irods=enabled; IRODS_HOME=$withval ;; + esac], + [irods=disabled]) + +save_LIBS=$LIBS +zlib_devel=ok +dnl Set a trivial non-empty INCLUDES to avoid excess default includes tests +AC_CHECK_HEADER([zlib.h], [], [zlib_devel=missing], [;]) +AC_CHECK_LIB(z, inflate, [], [zlib_devel=missing]) +LIBS=$save_LIBS + +if test $zlib_devel != ok; then + AC_MSG_ERROR([zlib development files not found + +HTSlib uses compression routines from the zlib library <http://zlib.net>. +Building HTSlib requires zlib development files to be installed on the build +machine; you may need to ensure a package such as zlib1g-dev (on Debian or +Ubuntu Linux) or zlib-devel (on RPM-based Linux distributions) is installed. + +FAILED. This error must be resolved in order to build HTSlib successfully.]) +fi + +if test $irods = enabled; then + # TODO Also test whether we require libgssapi_krb5 and AC_CHECK_LIB it + save_LDFLAGS=$LDFLAGS + LDFLAGS="$LDFLAGS -L$IRODS_HOME/lib/core/obj" + AC_CHECK_LIB([RodsAPIs], [getRodsEnvFileName], + [case $with_irods in + yes) define_IRODS_HOME='# Uses $(IRODS_HOME) from the environment' ;; + *) define_IRODS_HOME="IRODS_HOME = $with_irods" ;; + esac], + [AC_MSG_ERROR([iRODS development files not found + +Support for iRODS URLs requires the libRodsAPI client library and headers. +Configure with --with-irods=DIR (or just --with-irods if \$IRODS_HOME has +been exported with a suitable value), where DIR is the base of an iRODS tree +such that the library is present as DIR/lib/core/obj/libRodsAPI.* and headers +are present under DIR/lib/api/include and so on.])], + [-lgssapi_krb5 -lpthread]) + LDFLAGS=$save_LDFLAGS +else + define_IRODS_HOME='IRODS_HOME ?= /disabled' +fi +AC_SUBST([irods]) +AC_SUBST([define_IRODS_HOME]) + +AC_CONFIG_FILES(config.mk) +AC_OUTPUT |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,63 @@ +/* +Copyright (c) 2012-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/*! \file + * CRAM interface. + * + * Consider using the higher level hts_*() API for programs that wish to + * be file format agnostic (see htslib/hts.h). + * + * This API should be used for CRAM specific code. The specifics of the + * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h + * although these should not be included directly (use this file instead). + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef _CRAM_H_ +#define _CRAM_H_ + +#include "cram/cram_samtools.h" +#include "cram/sam_header.h" +#include "cram_structs.h" +#include "cram_io.h" +#include "cram_encode.h" +#include "cram_decode.h" +#include "cram_stats.h" +#include "cram_codecs.h" +#include "cram_index.h" + +#endif + +#ifdef __cplusplus +} +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_codecs.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_codecs.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1846 @@\n+/*\n+Copyright (c) 2012-2013 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*\n+ * FIXME: add checking of cram_external_type to return NULL on unsupported\n+ * {codec,type} tuples.\n+ */\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdlib.h>\n+#include <string.h>\n+#include <assert.h>\n+#include <limits.h>\n+\n+#include "cram/cram.h"\n+\n+static char *codec2str(enum cram_encoding codec) {\n+ switch (codec) {\n+ case E_NULL: return "NULL";\n+ case E_EXTERNAL: return "EXTERNAL";\n+ case E_GOLOMB: return "GOLOMB";\n+ case E_HUFFMAN: return "HUFFMAN";\n+ case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN";\n+ case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP";\n+ case E_BETA: return "BETA";\n+ case E_SUBEXP: return "SUBEXP";\n+ case E_GOLOMB_RICE: return "GOLOMB_RICE";\n+ case E_GAMMA: return "GAMMA";\n+ }\n+\n+ return "(unknown)";\n+}\n+\n+/*\n+ * ---------------------------------------------------------------------------\n+ * Block bit-level I/O functions.\n+ * All defined static here to promote easy inlining by the compiler.\n+ */\n+\n+#if 0\n+/* Get a single bit, MSB first */\n+static signed int get_bit_MSB(cram_block *block) {\n+ unsigned int val;\n+\n+ if (block->byte > block->alloc)\n+\treturn -1;\n+\n+ val = block->data[block->byte] >> block->bit;\n+ if (--block->bit == -1) {\n+\tblock->bit = 7;\n+\tblock->byte++;\n+\t//printf("(%02X)", block->data[block->byte]);\n+ }\n+\n+ //printf("-B%d-", val&1);\n+\n+ return val & 1;\n+}\n+#endif\n+\n+/*\n+ * Count number of successive 0 and 1 bits\n+ */\n+static int get_one_bits_MSB(cram_block *block) {\n+ int n = 0, b;\n+ do {\n+\tb = block->data[block->byte] >> block->bit;\n+\tif (--block->bit == -1) {\n+\t block->bit = 7;\n+\t block->byte++;\n+\t}\n+\tn++;\n+ } while (b&1);\n+\n+ return n-1;\n+}\n+\n+static int get_zero_bits_MSB(cram_block *block) {\n+ int n = 0, b;\n+ do {\n+\tb = block->data[block->byte] >> block->bit;\n+\tif (--block->bit == -1) {\n+\t block->bit = 7;\n+\t block->byte++;\n+\t}\n+\tn++;\n+ } while (!(b&1));\n+\n+ return n-1;\n+}\n+\n+#if 0\n+/* Stores a single bit */\n+static void store_bit_MSB(cram_block *block, unsigned int bit) {\n+ if (block->byte >= block->alloc) {\n+\tblock->alloc = block->alloc ? block->alloc*2 : 1024;\n+\tblock->data = realloc(block->data, block->alloc);\n+ }\n+\n+ if (bit)\n+\tblock->data[block->byte] |= (1 << block->bit);\n+\n+ if (--block'..b'itf8_put(cp, c->e_byte_array_stop.content_id);\n+ }\n+\n+ BLOCK_APPEND(b, buf, cp-buf);\n+ len += cp-buf;\n+\n+ return len;\n+}\n+\n+cram_codec *cram_byte_array_stop_encode_init(cram_stats *st,\n+\t\t\t\t\t enum cram_external_type option,\n+\t\t\t\t\t void *dat,\n+\t\t\t\t\t int version) {\n+ cram_codec *c;\n+\n+ c = malloc(sizeof(*c));\n+ if (!c)\n+\treturn NULL;\n+ c->codec = E_BYTE_ARRAY_STOP;\n+ c->free = cram_byte_array_stop_encode_free;\n+ c->encode = cram_byte_array_stop_encode;\n+ c->store = cram_byte_array_stop_encode_store;\n+\n+ c->e_byte_array_stop.stop = ((int *)dat)[0];\n+ c->e_byte_array_stop.content_id = ((int *)dat)[1];\n+\n+ return c;\n+}\n+\n+/*\n+ * ---------------------------------------------------------------------------\n+ */\n+\n+char *cram_encoding2str(enum cram_encoding t) {\n+ switch (t) {\n+ case E_NULL: return "NULL";\n+ case E_EXTERNAL: return "EXTERNAL";\n+ case E_GOLOMB: return "GOLOMB";\n+ case E_HUFFMAN: return "HUFFMAN";\n+ case E_BYTE_ARRAY_LEN: return "BYTE_ARRAY_LEN";\n+ case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP";\n+ case E_BETA: return "BETA";\n+ case E_SUBEXP: return "SUBEXP";\n+ case E_GOLOMB_RICE: return "GOLOMB_RICE";\n+ case E_GAMMA: return "GAMMA";\n+ }\n+ return "?";\n+}\n+\n+static cram_codec *(*decode_init[])(char *data,\n+\t\t\t\t int size,\n+\t\t\t\t enum cram_external_type option,\n+\t\t\t\t int version) = {\n+ NULL,\n+ cram_external_decode_init,\n+ NULL,\n+ cram_huffman_decode_init,\n+ cram_byte_array_len_decode_init,\n+ cram_byte_array_stop_decode_init,\n+ cram_beta_decode_init,\n+ cram_subexp_decode_init,\n+ NULL,\n+ cram_gamma_decode_init,\n+};\n+\n+cram_codec *cram_decoder_init(enum cram_encoding codec,\n+\t\t\t char *data, int size,\n+\t\t\t enum cram_external_type option,\n+\t\t\t int version) {\n+ if (decode_init[codec]) {\n+\treturn decode_init[codec](data, size, option, version);\n+ } else {\n+\tfprintf(stderr, "Unimplemented codec of type %s\\n", codec2str(codec));\n+\treturn NULL;\n+ }\n+}\n+\n+static cram_codec *(*encode_init[])(cram_stats *stx,\n+\t\t\t\t enum cram_external_type option,\n+\t\t\t\t void *opt,\n+\t\t\t\t int version) = {\n+ NULL,\n+ cram_external_encode_init,\n+ NULL,\n+ cram_huffman_encode_init,\n+ cram_byte_array_len_encode_init,\n+ cram_byte_array_stop_encode_init,\n+ cram_beta_encode_init,\n+ NULL, //cram_subexp_encode_init,\n+ NULL,\n+ NULL, //cram_gamma_encode_init,\n+};\n+\n+cram_codec *cram_encoder_init(enum cram_encoding codec,\n+\t\t\t cram_stats *st,\n+\t\t\t enum cram_external_type option,\n+\t\t\t void *dat,\n+\t\t\t int version) {\n+ if (st && !st->nvals)\n+\treturn NULL;\n+\n+ if (encode_init[codec]) {\n+\tcram_codec *r;\n+\tif ((r = encode_init[codec](st, option, dat, version)))\n+\t r->out = NULL;\n+\treturn r;\n+ } else {\n+\tfprintf(stderr, "Unimplemented codec of type %s\\n", codec2str(codec));\n+\tabort();\n+ }\n+}\n+\n+/*\n+ * Returns the content_id used by this codec, also in id2 if byte_array_len.\n+ * Returns -1 for the CORE block and -2 for unneeded.\n+ * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.\n+ */\n+int cram_codec_to_id(cram_codec *c, int *id2) {\n+ int bnum1, bnum2 = -2;\n+\n+ switch (c->codec) {\n+ case E_HUFFMAN:\n+\tbnum1 = c->huffman.ncodes == 1 ? -2 : -1;\n+\tbreak;\n+ case E_GOLOMB:\n+ case E_BETA:\n+ case E_SUBEXP:\n+ case E_GOLOMB_RICE:\n+ case E_GAMMA:\n+\tbnum1 = -1;\n+\tbreak;\n+ case E_EXTERNAL:\n+\tbnum1 = c->external.content_id;\n+\tbreak;\n+ case E_BYTE_ARRAY_LEN:\n+\tbnum1 = cram_codec_to_id(c->byte_array_len.len_codec, NULL);\n+\tbnum2 = cram_codec_to_id(c->byte_array_len.value_codec, NULL);\n+\tbreak;\n+ case E_BYTE_ARRAY_STOP:\n+\tbnum1 = c->byte_array_stop.content_id;\n+\tbreak;\n+ case E_NULL:\n+\tbnum1 = -2;\n+\tbreak;\n+ default:\n+\tfprintf(stderr, "Unknown codec type %d\\n", c->codec);\n+\tbnum1 = -1;\n+ }\n+\n+ if (id2)\n+\t*id2 = bnum2;\n+ return bnum1;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_codecs.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_codecs.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,165 @@ +/* +Copyright (c) 2012-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _CRAM_ENCODINGS_H_ +#define _CRAM_ENCODINGS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <inttypes.h> + +struct cram_codec; + +/* + * Slow but simple huffman decoder to start with. + * Read a bit at a time, keeping track of {length, value} + * eg. 1 1 0 1 => {1,1}, {2,3}, {3,6}, {4,13} + * + * Keep track of this through the huffman code table. + * For fast scanning we have an index of where the first code of length X + * appears. + */ +typedef struct { + int32_t symbol; + int32_t p; // next code start value, minus index to codes[] + int32_t code; + int32_t len; +} cram_huffman_code; + +typedef struct { + int ncodes; + cram_huffman_code *codes; +} cram_huffman_decoder; + +#define MAX_HUFF 128 +typedef struct { + cram_huffman_code *codes; + int nvals; + int val2code[MAX_HUFF+1]; // value to code lookup for small values +} cram_huffman_encoder; + +typedef struct { + int32_t offset; + int32_t nbits; +} cram_beta_decoder; + +typedef struct { + int32_t offset; +} cram_gamma_decoder; + +typedef struct { + int32_t offset; + int32_t k; +} cram_subexp_decoder; + +typedef struct { + int32_t content_id; + enum cram_external_type type; +} cram_external_decoder; + +typedef struct { + struct cram_codec *len_codec; + struct cram_codec *value_codec; +} cram_byte_array_len_decoder; + +typedef struct { + unsigned char stop; + int32_t content_id; +} cram_byte_array_stop_decoder; + +typedef struct { + enum cram_encoding len_encoding; + enum cram_encoding val_encoding; + void *len_dat; + void *val_dat; + struct cram_codec *len_codec; + struct cram_codec *val_codec; +} cram_byte_array_len_encoder; + +/* + * A generic codec structure. + */ +typedef struct cram_codec { + enum cram_encoding codec; + cram_block *out; + void (*free)(struct cram_codec *codec); + int (*decode)(cram_slice *slice, struct cram_codec *codec, + cram_block *in, char *out, int *out_size); + int (*encode)(cram_slice *slice, struct cram_codec *codec, + char *in, int in_size); + int (*store)(struct cram_codec *codec, cram_block *b, char *prefix, + int version); + union { + cram_huffman_decoder huffman; + cram_external_decoder external; + cram_beta_decoder beta; + cram_gamma_decoder gamma; + cram_subexp_decoder subexp; + cram_byte_array_len_decoder byte_array_len; + cram_byte_array_stop_decoder byte_array_stop; + + cram_huffman_encoder e_huffman; + cram_external_decoder e_external; + cram_byte_array_stop_decoder e_byte_array_stop; + cram_byte_array_len_encoder e_byte_array_len; + cram_beta_decoder e_beta; + }; +} cram_codec; + +char *cram_encoding2str(enum cram_encoding t); + +cram_codec *cram_decoder_init(enum cram_encoding codec, char *data, int size, + enum cram_external_type option, + int version); +cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st, + enum cram_external_type option, void *dat, + int version); + +//int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size); +//void cram_decoder_free(void *codes); + +//#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++)) + +#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7) + +/* + * Returns the content_id used by this codec, also in id2 if byte_array_len. + * Returns -1 for the CORE block and -2 for unneeded. + * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs. + */ +int cram_codec_to_id(cram_codec *c, int *id2); + +#ifdef __cplusplus +} +#endif + +#endif /* _CRAM_ENCODINGS_H_ */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_decode.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_decode.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,2775 @@\n+/*\n+Copyright (c) 2012-2014 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*\n+ * - In-memory decoding of CRAM data structures.\n+ * - Iterator for reading CRAM record by record.\n+ */\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdio.h>\n+#include <errno.h>\n+#include <assert.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <zlib.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include <math.h>\n+#include <ctype.h>\n+\n+#include "cram/cram.h"\n+#include "cram/os.h"\n+#include "cram/md5.h"\n+\n+//Whether CIGAR has just M or uses = and X to indicate match and mismatch\n+//#define USE_X\n+\n+/* ----------------------------------------------------------------------\n+ * CRAM compression headers\n+ */\n+\n+/*\n+ * Decodes the Tag Dictionary record in the preservation map\n+ * Updates the cram compression header.\n+ * \n+ * Returns number of bytes decoded on success\n+ * -1 on failure\n+ */\n+int cram_decode_TD(char *cp, cram_block_compression_hdr *h) {\n+ char *op = cp;\n+ unsigned char *dat;\n+ cram_block *b;\n+ int32_t blk_size;\n+ int nTL, i, sz;\n+\n+ if (!(b = cram_new_block(0, 0)))\n+\treturn -1;\n+ h->TD_blk = b;\n+\n+ /* Decode */\n+ cp += itf8_get(cp, &blk_size);\n+ if (!blk_size) {\n+\th->nTL = 0;\n+\th->TL = NULL;\n+\tcram_free_block(b);\n+ return cp - op;\n+ }\n+\n+ BLOCK_APPEND(b, cp, blk_size);\n+ cp += blk_size;\n+ sz = cp - op;\n+\n+ // Force nul termination if missing\n+ if (BLOCK_DATA(b)[BLOCK_SIZE(b)-1])\n+\tBLOCK_APPEND_CHAR(b, \'\\0\');\n+\n+ /* Set up TL lookup table */\n+ dat = BLOCK_DATA(b);\n+\n+ // Count\n+ for (nTL = i = 0; i < BLOCK_SIZE(b); i++) {\n+\tnTL++;\n+\twhile (dat[i])\n+\t i++;\n+ }\n+\n+ // Copy\n+ h->nTL = nTL;\n+ if (!(h->TL = calloc(h->nTL, sizeof(unsigned char *))))\n+\treturn -1;\n+ for (nTL = i = 0; i < BLOCK_SIZE(b); i++) {\n+\th->TL[nTL++] = &dat[i];\n+\twhile (dat[i])\n+\t i++;\n+ }\n+ \n+ return sz;\n+}\n+\n+/*\n+ * Decodes a CRAM block compression header.\n+ * Returns header ptr on success\n+ * NULL on failure\n+ */\n+cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd,\n+\t\t\t\t\t\t\t cram_block *b) {\n+ char *cp, *cp_copy;\n+ cram_block_compression_hdr *hdr = calloc(1, sizeof(*hdr));\n+ int i;\n+ int32_t map_size, map_count;\n+\n+ if (!hdr)\n+\treturn NULL;\n+\n+ if (b->method != RAW) {\n+\tif (cram_uncompress_block(b)) {\n+\t free(hdr);\n+\t return NULL;\n+\t}\n+ }\n+\n+ cp = (char *)b->d'..b'r(fd, c->comp_hdr_block);\n+\t\tif (!c->comp_hdr)\n+\t\t return NULL;\n+\n+\t\tif (!c->comp_hdr->AP_delta) {\n+\t\t pthread_mutex_lock(&fd->ref_lock);\n+\t\t fd->unsorted = 1;\n+\t\t pthread_mutex_unlock(&fd->ref_lock);\n+\t\t}\n+\t }\n+\n+\t if (c->num_records == 0) {\n+\t\tcram_free_container(c); c = NULL;\n+\t\tgoto empty_container;\n+\t }\n+\n+\n+\t if (!(s = c->slice = cram_read_slice(fd)))\n+\t\treturn NULL;\n+\t c->curr_slice++;\n+\t c->curr_rec = 0;\n+\t c->max_rec = s->hdr->num_records;\n+\n+\t s->last_apos = s->hdr->ref_seq_start;\n+\t \n+\t /* Skip slices not yet spanning our range */\n+\t if (fd->range.refid != -2 && s->hdr->ref_seq_id != -2) {\n+\t\tif (s->hdr->ref_seq_id != fd->range.refid) {\n+\t\t fd->eof = 1;\n+\t\t cram_free_slice(s);\n+\t\t c->slice = NULL;\n+\t\t return NULL;\n+\t\t}\n+\n+\t\tif (s->hdr->ref_seq_start > fd->range.end) {\n+\t\t fd->eof = 1;\n+\t\t cram_free_slice(s);\n+\t\t c->slice = NULL;\n+\t\t return NULL;\n+\t\t}\n+\n+\t\tif (s->hdr->ref_seq_start + s->hdr->ref_seq_span-1 <\n+\t\t fd->range.start) {\n+\t\t cram_free_slice(s);\n+\t\t c->slice = NULL;\n+\t\t cram_free_container(c);\n+\t\t c = NULL;\n+\t\t continue;\n+\t\t}\n+\t }\n+\t}\n+\n+\t/* Test decoding of 1st seq */\n+\tif (!c || !s)\n+\t break;\n+\n+\tif (cram_decode_slice_mt(fd, c, s, fd->header) != 0) {\n+\t //\tif (cram_decode_slice(fd, c, s, fd->header) != 0) {\n+\t fprintf(stderr, "Failure to decode slice\\n");\n+\t cram_free_slice(s);\n+\t c->slice = NULL;\n+\t return NULL;\n+\t}\n+\n+\tif (!fd->pool || fd->job_pending)\n+\t break;\n+\n+\t// Push it a bit far, to qsize in queue rather than pending arrival,\n+\t// as cram tends to be a bit bursty in decode timings.\n+\tif (t_pool_results_queue_len(fd->rqueue) > fd->pool->qsize)\n+\t break;\n+ }\n+\n+ if (fd->pool) {\n+\tt_pool_result *res;\n+\tcram_decode_job *j;\n+\t\n+//\tfprintf(stderr, "Thread pool len = %d, %d\\n",\n+//\t\tt_pool_results_queue_len(fd->rqueue),\n+//\t\tt_pool_results_queue_sz(fd->rqueue));\n+\n+\tif (fd->ooc && t_pool_results_queue_empty(fd->rqueue))\n+\t return NULL;\n+\n+\tres = t_pool_next_result_wait(fd->rqueue);\n+\n+\tif (!res || !res->data) {\n+\t fprintf(stderr, "t_pool_next_result failure\\n");\n+\t return NULL;\n+\t}\n+\n+\tj = (cram_decode_job *)res->data;\n+\tc = j->c;\n+\ts = j->s;\n+\n+\tfd->ctr = c;\n+\n+\tt_pool_delete_result(res, 1);\n+ }\n+\n+ *cp = c;\n+ return s;\n+}\n+\n+/*\n+ * Read the next cram record and return it.\n+ * Note that to decode cram_record the caller will need to look up some data\n+ * in the current slice, pointed to by fd->ctr->slice. This is valid until\n+ * the next call to cram_get_seq (which may invalidate it).\n+ *\n+ * Returns record pointer on success (do not free)\n+ * NULL on failure\n+ */\n+cram_record *cram_get_seq(cram_fd *fd) {\n+ cram_container *c;\n+ cram_slice *s;\n+\n+ for (;;) {\n+\tc = fd->ctr;\n+\tif (c && c->slice && c->curr_rec < c->max_rec) {\n+\t s = c->slice;\n+\t} else {\n+\t if (!(s = cram_next_slice(fd, &c)))\n+\t\treturn NULL;\n+\t}\n+\n+\tif (fd->range.refid != -2) {\n+\t if (s->crecs[c->curr_rec].ref_id < fd->range.refid) {\n+\t\tc->curr_rec++;\n+\t\tcontinue;\n+\t }\n+\n+\t if (s->crecs[c->curr_rec].ref_id != fd->range.refid) {\n+\t\tfd->eof = 1;\n+\t\tcram_free_slice(s);\n+\t\tc->slice = NULL;\n+\t\treturn NULL;\n+\t }\n+\n+\t if (s->crecs[c->curr_rec].apos > fd->range.end) {\n+\t\tfd->eof = 1;\n+\t\tcram_free_slice(s);\n+\t\tc->slice = NULL;\n+\t\treturn NULL;\n+\t }\n+\n+\t if (s->crecs[c->curr_rec].aend < fd->range.start) {\n+\t\tc->curr_rec++;\n+\t\tcontinue;\n+\t }\n+\t}\n+\n+\tbreak;\n+ }\n+\n+ fd->ctr = c;\n+ c->slice = s;\n+ return &s->crecs[c->curr_rec++];\n+}\n+\n+/*\n+ * Read the next cram record and convert it to a bam_seq_t struct.\n+ *\n+ * Returns 0 on success\n+ * -1 on EOF or failure (check fd->err)\n+ */\n+int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam) {\n+ cram_record *cr;\n+ cram_container *c;\n+ cram_slice *s;\n+\n+ if (!(cr = cram_get_seq(fd)))\n+\treturn -1;\n+\n+ c = fd->ctr;\n+ s = c->slice;\n+\n+ return cram_to_bam(fd->header, fd, s, cr, c->curr_rec-1, bam);\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_decode.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_decode.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,112 @@ +/* +Copyright (c) 2012-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/*! \file + * Include cram.h instead. + * + * This is an internal part of the CRAM system and is automatically included + * when you #include cram.h. + * + * Implements the decoding portion of CRAM I/O. Also see + * cram_codecs.[ch] for the actual encoding functions themselves. + */ + +#ifndef _CRAM_READ_H_ +#define _CRAM_READ_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* ---------------------------------------------------------------------- + * CRAM sequence iterators. + */ + +/*! Read the next cram record and return it as a cram_record. + * + * Note that to decode cram_record the caller will need to look up some data + * in the current slice, pointed to by fd->ctr->slice. This is valid until + * the next call to cram_get_seq (which may invalidate it). + * + * @return + * Returns record pointer on success (do not free); + * NULL on failure + */ +cram_record *cram_get_seq(cram_fd *fd); + +/*! Read the next cram record and convert it to a bam_seq_t struct. + * + * @return + * Returns 0 on success; + * -1 on EOF or failure (check fd->err) + */ +int cram_get_bam_seq(cram_fd *fd, bam_seq_t **bam); + + +/* ---------------------------------------------------------------------- + * Internal functions + */ + +/*! INTERNAL: + * Decodes a CRAM block compression header. + * + * @return + * Returns header ptr on success; + * NULL on failure + */ +cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, + cram_block *b); + +/*! INTERNAL: + * Decodes a CRAM (un)mapped slice header block. + * + * @return + * Returns slice header ptr on success; + * NULL on failure + */ +cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b); + + +/*! INTERNAL: + * Decode an entire slice from container blocks. Fills out s->crecs[] array. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_decode_slice(cram_fd *fd, cram_container *c, cram_slice *s, + SAM_hdr *hdr); + + +#ifdef __cplusplus +} +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_encode.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_encode.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,3068 @@\n+/*\n+Copyright (c) 2012-2013 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdio.h>\n+#include <errno.h>\n+#include <assert.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <zlib.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include <math.h>\n+#include <ctype.h>\n+\n+#include "cram/cram.h"\n+#include "cram/os.h"\n+#include "cram/md5.h"\n+\n+#define Z_CRAM_STRAT Z_FILTERED\n+//#define Z_CRAM_STRAT Z_RLE\n+//#define Z_CRAM_STRAT Z_HUFFMAN_ONLY\n+//#define Z_CRAM_STRAT Z_DEFAULT_STRATEGY\n+\n+static int process_one_read(cram_fd *fd, cram_container *c,\n+\t\t\t cram_slice *s, cram_record *cr,\n+\t\t\t bam_seq_t *b, int rnum);\n+\n+/*\n+ * Returns index of val into key.\n+ * Basically strchr(key, val)-key;\n+ */\n+static int sub_idx(char *key, char val) {\n+ int i;\n+\n+ for (i = 0; *key && *key++ != val; i++);\n+ return i;\n+}\n+\n+/*\n+ * Encodes a compression header block into a generic cram_block structure.\n+ *\n+ * Returns cram_block ptr on success\n+ * NULL on failure\n+ */\n+cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c,\n+\t\t\t\t\t cram_block_compression_hdr *h) {\n+ cram_block *cb = cram_new_block(COMPRESSION_HEADER, 0);\n+ cram_block *map = cram_new_block(COMPRESSION_HEADER, 0);\n+ int i, mc;\n+\n+ if (!cb || !map)\n+\treturn NULL;\n+\n+ /*\n+ * This is a concatenation of several blocks of data:\n+ * header + landmarks, preservation map, read encoding map, and the tag\n+ * encoding map.\n+ * All 4 are variable sized and we need to know how large these are\n+ * before creating the compression header itself as this starts with\n+ * the total size (stored as a variable length string).\n+ */\n+\n+ // Duplicated from container itself, and removed in 1.1\n+ if (CRAM_MAJOR_VERS(fd->version) == 1) {\n+\titf8_put_blk(cb, h->ref_seq_id);\n+\titf8_put_blk(cb, h->ref_seq_start);\n+\titf8_put_blk(cb, h->ref_seq_span);\n+\titf8_put_blk(cb, h->num_records);\n+\titf8_put_blk(cb, h->num_landmarks);\n+\tfor (i = 0; i < h->num_landmarks; i++) {\n+\t itf8_put_blk(cb, h->landmark[i]);\n+\t}\n+ }\n+\n+ /* Create in-memory preservation map */\n+ /* FIXME: should create this when we create the container */\n+ {\n+\tkhint_t k;\n+\tint r;\n+\n+\tif (!(h->preservation_map = kh_init(map)))\n+\t return NULL;\n+\n+\tk = kh_put(map, h->preservation_map, "RN", &r);\n+\tif (-1 == r) return NULL;\n+\tkh_val(h->preservation_map, k).i = 1;\n+\n'..b'bam_mate_ref(b));\n+\t}\n+ }\n+\n+ cr->mqual = bam_map_qual(b);\n+ cram_stats_add(c->stats[DS_MQ], cr->mqual);\n+\n+ cr->mate_ref_id = bam_mate_ref(b);\n+\n+ if (!(bam_flag(b) & BAM_FUNMAP)) {\n+\tif (c->first_base > cr->apos)\n+\t c->first_base = cr->apos;\n+\n+\tif (c->last_base < cr->aend)\n+\t c->last_base = cr->aend;\n+ }\n+\n+ return 0;\n+}\n+\n+/*\n+ * Write iterator: put BAM format sequences into a CRAM file.\n+ * We buffer up a containers worth of data at a time.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b) {\n+ cram_container *c;\n+\n+ if (!fd->ctr) {\n+\tfd->ctr = cram_new_container(fd->seqs_per_slice,\n+\t\t\t\t fd->slices_per_container);\n+\tif (!fd->ctr)\n+\t return -1;\n+\tfd->ctr->record_counter = fd->record_counter;\n+ }\n+ c = fd->ctr;\n+\n+ if (!c->slice || c->curr_rec == c->max_rec ||\n+\t(bam_ref(b) != c->curr_ref && c->curr_ref >= -1)) {\n+\tint slice_rec, curr_rec, multi_seq = fd->multi_seq == 1;\n+\tint curr_ref = c->slice ? c->curr_ref : bam_ref(b);\n+\n+\n+\t/*\n+\t * Start packing slices when we routinely have under 1/4tr full.\n+\t *\n+\t * This option isn\'t available if we choose to embed references\n+\t * since we can only have one per slice.\n+\t */\n+\tif (fd->multi_seq == -1 && c->curr_rec < c->max_rec/4+10 &&\n+\t fd->last_slice && fd->last_slice < c->max_rec/4+10 &&\n+\t !fd->embed_ref) {\n+\t if (fd->verbose && !c->multi_seq)\n+\t\tfprintf(stderr, "Multi-ref enabled for this container\\n");\n+\t multi_seq = 1;\n+\t}\n+\n+\tslice_rec = c->slice_rec;\n+\tcurr_rec = c->curr_rec;\n+\n+\tif (CRAM_MAJOR_VERS(fd->version) == 1 ||\n+\t c->curr_rec == c->max_rec || fd->multi_seq != 1 || !c->slice) {\n+\t if (NULL == (c = cram_next_container(fd, b))) {\n+\t\tif (fd->ctr) {\n+\t\t // prevent cram_close attempting to flush\n+\t\t cram_free_container(fd->ctr);\n+\t\t fd->ctr = NULL;\n+\t\t}\n+\t\treturn -1;\n+\t }\n+\t}\n+\n+\t/*\n+\t * Due to our processing order, some things we\'ve already done we\n+\t * cannot easily undo. So when we first notice we should be packing\n+\t * multiple sequences per container we emit the small partial\n+\t * container as-is and then start a fresh one in a different mode.\n+\t */\n+\tif (multi_seq) {\n+\t fd->multi_seq = 1;\n+\t c->multi_seq = 1;\n+\t c->pos_sorted = 0; // required atm for multi_seq slices\n+\n+\t if (!c->refs_used) {\n+\t\tpthread_mutex_lock(&fd->ref_lock);\n+\t\tc->refs_used = calloc(fd->refs->nref, sizeof(int));\n+\t\tpthread_mutex_unlock(&fd->ref_lock);\n+\t\tif (!c->refs_used)\n+\t\t return -1;\n+\t }\n+\t}\n+\n+\tfd->last_slice = curr_rec - slice_rec;\n+\tc->slice_rec = c->curr_rec;\n+\n+\t// Have we seen this reference before?\n+\tif (bam_ref(b) >= 0 && bam_ref(b) != curr_ref && !fd->embed_ref &&\n+\t !fd->unsorted && multi_seq) {\n+\t \n+\t if (!c->refs_used) {\n+\t\tpthread_mutex_lock(&fd->ref_lock);\n+\t\tc->refs_used = calloc(fd->refs->nref, sizeof(int));\n+\t\tpthread_mutex_unlock(&fd->ref_lock);\n+\t\tif (!c->refs_used)\n+\t\t return -1;\n+\t } else if (c->refs_used && c->refs_used[bam_ref(b)]) {\n+\t\tfprintf(stderr, "Unsorted mode enabled\\n");\n+\t\tpthread_mutex_lock(&fd->ref_lock);\n+\t\tfd->unsorted = 1;\n+\t\tpthread_mutex_unlock(&fd->ref_lock);\n+\t\tfd->multi_seq = 1;\n+\t }\n+\t}\n+\n+\tc->curr_ref = bam_ref(b);\n+\tif (c->refs_used && c->curr_ref >= 0) c->refs_used[c->curr_ref]++;\n+ }\n+\n+ if (!c->bams) {\n+\t/* First time through, allocate a set of bam pointers */\n+\tpthread_mutex_lock(&fd->bam_list_lock);\n+\tif (fd->bl) {\n+\t spare_bams *spare = fd->bl;\n+\t c->bams = spare->bams;\n+\t fd->bl = spare->next;\n+\t free(spare);\n+\t} else {\n+\t c->bams = calloc(c->max_c_rec, sizeof(bam_seq_t *));\n+\t if (!c->bams)\n+\t\treturn -1;\n+\t}\n+\tpthread_mutex_unlock(&fd->bam_list_lock);\n+ }\n+\n+ /* Copy or alloc+copy the bam record, for later encoding */\n+ if (c->bams[c->curr_c_rec])\n+\tbam_copy1(c->bams[c->curr_c_rec], b);\n+ else\n+\tc->bams[c->curr_c_rec] = bam_dup(b);\n+\n+ c->curr_rec++;\n+ c->curr_c_rec++;\n+ fd->record_counter++;\n+\n+ return 0;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_encode.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_encode.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,105 @@ +/* +Copyright (c) 2012-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/*! \file + * Include cram.h instead. + * + * This is an internal part of the CRAM system and is automatically included + * when you #include cram.h. + * + * Implements the encoding portion of CRAM I/O. Also see + * cram_codecs.[ch] for the actual encoding functions themselves. + */ + +#ifndef _CRAM_WRITE_H_ +#define _CRAM_WRITE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* ---------------------------------------------------------------------- + * CRAM sequence iterators. + */ + +/*! Write iterator: put BAM format sequences into a CRAM file. + * + * We buffer up a containers worth of data at a time. + * + * FIXME: break this into smaller pieces. + * + * @return + * Returns 0 on success; + * -1 on failure + */ +int cram_put_bam_seq(cram_fd *fd, bam_seq_t *b); + + +/* ---------------------------------------------------------------------- + * Internal functions + */ + +/*! INTERNAL: + * Encodes a compression header block into a generic cram_block structure. + * + * @return + * Returns cram_block ptr on success; + * NULL on failure + */ +cram_block *cram_encode_compression_header(cram_fd *fd, cram_container *c, + cram_block_compression_hdr *h); + +/*! INTERNAL: + * Encodes a slice compression header. + * + * @return + * Returns cram_block on success; + * NULL on failure + */ +cram_block *cram_encode_slice_header(cram_fd *fd, cram_slice *s); + +/*! INTERNAL: + * Encodes all slices in a container into blocks. + * + * @return + * Returns 0 on success; + * -1 on failure + * + * FIXME: separate into encode_container and write_container. Ideally + * we should be able to do read_container / write_container or + * decode_container / encode_container. + */ +int cram_encode_container(cram_fd *fd, cram_container *c); + +#ifdef __cplusplus +} +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_index.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_index.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,557 @@\n+/*\n+Copyright (c) 2013-2014 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*\n+ * The index is a gzipped tab-delimited text file with one line per slice.\n+ * The columns are:\n+ * 1: reference number (0 to N-1, as per BAM ref_id)\n+ * 2: reference position of 1st read in slice (1..?)\n+ * 3: number of reads in slice\n+ * 4: offset of container start (relative to end of SAM header, so 1st\n+ * container is offset 0).\n+ * 5: slice number within container (ie which landmark).\n+ *\n+ * In memory, we hold this in a nested containment list. Each list element is\n+ * a cram_index struct. Each element in turn can contain its own list of\n+ * cram_index structs.\n+ *\n+ * Any start..end range which is entirely contained within another (and\n+ * earlier as it is sorted) range will be held within it. This ensures that\n+ * the outer list will never have containments and we can safely do a\n+ * binary search to find the first range which overlaps any given coordinate.\n+ */\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdio.h>\n+#include <errno.h>\n+#include <assert.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <zlib.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include <math.h>\n+#include <ctype.h>\n+\n+#include "htslib/hfile.h"\n+#include "cram/cram.h"\n+#include "cram/os.h"\n+#include "cram/zfio.h"\n+\n+#if 0\n+static void dump_index_(cram_index *e, int level) {\n+ int i, n;\n+ n = printf("%*s%d / %d .. %d, ", level*4, "", e->refid, e->start, e->end);\n+ printf("%*soffset %"PRId64"\\n", MAX(0,50-n), "", e->offset);\n+ for (i = 0; i < e->nslice; i++) {\n+\tdump_index_(&e->e[i], level+1);\n+ }\n+}\n+\n+static void dump_index(cram_fd *fd) {\n+ int i;\n+ for (i = 0; i < fd->index_sz; i++) {\n+\tdump_index_(&fd->index[i], 0);\n+ }\n+}\n+#endif\n+\n+static int kget_int32(kstring_t *k, size_t *pos, int32_t *val_p) {\n+ int sign = 1;\n+ int32_t val = 0;\n+ size_t p = *pos;\n+\n+ while (p < k->l && (k->s[p] == \' \' || k->s[p] == \'\\t\'))\n+\t p++;\n+\n+ if (p < k->l && k->s[p] == \'-\')\n+\tsign = -1, p++;\n+\n+ if (p >= k->l || !(k->s[p] >= \'0\' && k->s[p] <= \'9\'))\n+\treturn -1;\n+\n+ while (p < k->l && k->s[p] >= \'0\' && k->s[p] <= \'9\')\n+\tval = val*10 + k->s[p++]-\'0\';\n+ \n+ *pos = p;\n+ *val_p = sign*val;\n+\n+ return 0;\n+}\n+\n+static int kget_int64(kstring_t *k, size_t *pos, int64_t *val_p) {\n+ int sign = 1;\n+ int64_t val = 0;\n+ size_t p = *pos;\n+\n+ while (p < k->l && (k->s[p] == \' \' '..b'specified cram_range.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+int cram_seek_to_refpos(cram_fd *fd, cram_range *r) {\n+ cram_index *e;\n+\n+ // Ideally use an index, so see if we have one.\n+ if ((e = cram_index_query(fd, r->refid, r->start, NULL))) {\n+\tif (0 != cram_seek(fd, e->offset, SEEK_SET))\n+\t if (0 != cram_seek(fd, e->offset - fd->first_container, SEEK_CUR))\n+\t\treturn -1;\n+ } else {\n+\tfprintf(stderr, "Unknown reference ID. Missing from index?\\n");\n+\treturn -1;\n+ }\n+\n+ if (fd->ctr) {\n+\tcram_free_container(fd->ctr);\n+\tfd->ctr = NULL;\n+\tfd->ooc = 0;\n+ }\n+\n+ return 0;\n+}\n+\n+\n+/*\n+ * A specialised form of cram_index_build (below) that deals with slices\n+ * having multiple references in this (ref_id -2). In this scenario we\n+ * decode the slice to look at the RI data series instead.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+static int cram_index_build_multiref(cram_fd *fd,\n+\t\t\t\t cram_container *c,\n+\t\t\t\t cram_slice *s,\n+\t\t\t\t zfp *fp,\n+\t\t\t\t off_t cpos,\n+\t\t\t\t int32_t landmark,\n+\t\t\t\t int sz) {\n+ int i, ref = -2, ref_start = 0, ref_end;\n+ char buf[1024];\n+\n+ if (0 != cram_decode_slice(fd, c, s, fd->header))\n+\treturn -1;\n+\n+ ref_end = INT_MIN;\n+ for (i = 0; i < s->hdr->num_records; i++) {\n+\tif (s->crecs[i].ref_id == ref) {\n+\t if (ref_end < s->crecs[i].aend)\n+\t\tref_end = s->crecs[i].aend;\n+\t continue;\n+\t}\n+\n+\tif (ref != -2) {\n+\t sprintf(buf, "%d\\t%d\\t%d\\t%"PRId64"\\t%d\\t%d\\n",\n+\t\t ref, ref_start, ref_end - ref_start + 1,\n+\t\t (int64_t)cpos, landmark, sz);\n+\t zfputs(buf, fp);\n+\t}\n+\n+\tref = s->crecs[i].ref_id;\n+\tref_start = s->crecs[i].apos;\n+\tref_end = INT_MIN;\n+ }\n+\n+ if (ref != -2) {\n+\tsprintf(buf, "%d\\t%d\\t%d\\t%"PRId64"\\t%d\\t%d\\n",\n+\t\tref, ref_start, ref_end - ref_start + 1,\n+\t\t(int64_t)cpos, landmark, sz);\n+\tzfputs(buf, fp);\n+ }\n+\n+ return 0;\n+}\n+\n+/*\n+ * Builds an index file.\n+ *\n+ * fd is a newly opened cram file that we wish to index.\n+ * fn_base is the filename of the associated CRAM file. Internally we\n+ * add ".crai" to this to get the index filename.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+int cram_index_build(cram_fd *fd, const char *fn_base) {\n+ cram_container *c;\n+ off_t cpos, spos, hpos;\n+ zfp *fp;\n+ char fn_idx[PATH_MAX];\n+\n+ if (strlen(fn_base) > PATH_MAX-6)\n+\treturn -1;\n+\n+ sprintf(fn_idx, "%s.crai", fn_base);\n+ if (!(fp = zfopen(fn_idx, "wz"))) {\n+ perror(fn_idx);\n+ return -1;\n+ }\n+\n+ cpos = htell(fd->fp);\n+ while ((c = cram_read_container(fd))) {\n+ int j;\n+\n+ if (fd->err) {\n+ perror("Cram container read");\n+ return 1;\n+ }\n+\n+ hpos = htell(fd->fp);\n+\n+ if (!(c->comp_hdr_block = cram_read_block(fd)))\n+ return 1;\n+ assert(c->comp_hdr_block->content_type == COMPRESSION_HEADER);\n+\n+ c->comp_hdr = cram_decode_compression_header(fd, c->comp_hdr_block);\n+ if (!c->comp_hdr)\n+ return -1;\n+\n+ // 2.0 format\n+ for (j = 0; j < c->num_landmarks; j++) {\n+ char buf[1024];\n+ cram_slice *s;\n+ int sz;\n+\n+ spos = htell(fd->fp);\n+ assert(spos - cpos - c->offset == c->landmark[j]);\n+\n+ if (!(s = cram_read_slice(fd))) {\n+\t\tzfclose(fp);\n+\t\treturn -1;\n+\t }\n+\n+ sz = (int)(htell(fd->fp) - spos);\n+\n+\t if (s->hdr->ref_seq_id == -2) {\n+\t\tcram_index_build_multiref(fd, c, s, fp,\n+\t\t\t\t\t cpos, c->landmark[j], sz);\n+\t } else {\n+\t\tsprintf(buf, "%d\\t%d\\t%d\\t%"PRId64"\\t%d\\t%d\\n",\n+\t\t\ts->hdr->ref_seq_id, s->hdr->ref_seq_start,\n+\t\t\ts->hdr->ref_seq_span, (int64_t)cpos,\n+\t\t\tc->landmark[j], sz);\n+\t\tzfputs(buf, fp);\n+\t }\n+\n+ cram_free_slice(s);\n+ }\n+\n+ cpos = htell(fd->fp);\n+ assert(cpos == hpos + c->length);\n+\n+ cram_free_container(c);\n+ }\n+ if (fd->err) {\n+\tzfclose(fp);\n+\treturn -1;\n+ }\n+\t\n+\n+ return zfclose(fp);\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_index.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_index.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,98 @@ +/* +Copyright (c) 2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _CRAM_INDEX_H_ +#define _CRAM_INDEX_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Loads a CRAM .crai index into memory. + * Returns 0 for success + * -1 for failure + */ +int cram_index_load(cram_fd *fd, const char *fn); + +void cram_index_free(cram_fd *fd); + +/* + * Searches the index for the first slice overlapping a reference ID + * and position. + * + * Returns the cram_index pointer on sucess + * NULL on failure + */ +cram_index *cram_index_query(cram_fd *fd, int refid, int pos, cram_index *frm); + +/* + * Skips to a container overlapping the start coordinate listed in + * cram_range. + * + * Returns 0 on success + * -1 on failure + */ +int cram_seek_to_refpos(cram_fd *fd, cram_range *r); + +void cram_index_free(cram_fd *fd); + +/* + * Skips to a container overlapping the start coordinate listed in + * cram_range. + * + * In theory we call cram_index_query multiple times, once per slice + * overlapping the range. However slices may be absent from the index + * which makes this problematic. Instead we find the left-most slice + * and then read from then on, skipping decoding of slices and/or + * whole containers when they don't overlap the specified cram_range. + * + * Returns 0 on success + * -1 on failure + */ +int cram_seek_to_refpos(cram_fd *fd, cram_range *r); + +/* + * Builds an index file. + * + * fd is a newly opened cram file that we wish to index. + * fn_base is the filename of the associated CRAM file. Internally we + * add ".crai" to this to get the index filename. + * + * Returns 0 on success + * -1 on failure + */ +int cram_index_build(cram_fd *fd, const char *fn_base); + +#ifdef __cplusplus +} +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_io.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_io.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,4202 @@\n+/*\n+Copyright (c) 2012-2014 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*\n+ * CRAM I/O primitives.\n+ *\n+ * - ITF8 encoding and decoding.\n+ * - Block based I/O\n+ * - Zlib inflating and deflating (memory)\n+ * - CRAM basic data structure reading and writing\n+ * - File opening / closing\n+ * - Reference sequence handling\n+ */\n+\n+/*\n+ * TODO: BLOCK_GROW, BLOCK_RESIZE, BLOCK_APPEND and itf8_put_blk all need\n+ * a way to return errors for when malloc fails.\n+ */\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdio.h>\n+#include <errno.h>\n+#include <assert.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <zlib.h>\n+#ifdef HAVE_LIBBZ2\n+#include <bzlib.h>\n+#endif\n+#ifdef HAVE_LIBLZMA\n+#include <lzma.h>\n+#endif\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include <math.h>\n+#include <ctype.h>\n+\n+#include "cram/cram.h"\n+#include "cram/os.h"\n+#include "cram/md5.h"\n+#include "cram/open_trace_file.h"\n+#include "cram/rANS_static.h"\n+\n+//#define REF_DEBUG\n+\n+#ifdef REF_DEBUG\n+#include <sys/syscall.h>\n+#define gettid() (int)syscall(SYS_gettid)\n+\n+#define RP(...) fprintf (stderr, __VA_ARGS__)\n+#else\n+#define RP(...) \n+#endif\n+\n+#include "htslib/hfile.h"\n+#include "htslib/bgzf.h"\n+#include "htslib/faidx.h"\n+\n+#define TRIAL_SPAN 50\n+#define NTRIALS 3\n+\n+\n+/* ----------------------------------------------------------------------\n+ * ITF8 encoding and decoding.\n+ *\n+* Also see the itf8_get and itf8_put macros in cram_io.h\n+ */\n+\n+/*\n+ * Reads an integer in ITF-8 encoding from \'cp\' and stores it in\n+ * *val.\n+ *\n+ * Returns the number of bytes read on success\n+ * -1 on failure\n+ */\n+int itf8_decode(cram_fd *fd, int32_t *val_p) {\n+ static int nbytes[16] = {\n+\t0,0,0,0, 0,0,0,0, // 0000xxxx - 0111xxxx\n+\t1,1,1,1, // 1000xxxx - 1011xxxx\n+\t2,2, // 1100xxxx - 1101xxxx\n+\t3, // 1110xxxx\n+\t4, // 1111xxxx\n+ };\n+\n+ static int nbits[16] = {\n+\t0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, // 0000xxxx - 0111xxxx\n+\t0x3f, 0x3f, 0x3f, 0x3f, // 1000xxxx - 1011xxxx\n+\t0x1f, 0x1f, // 1100xxxx - 1101xxxx\n+\t0x0f, // 1110xxxx\n+\t0x0f, // 1111xxxx\n+ };\n+\n+ int32_t'..b'\n+int cram_eof(cram_fd *fd) {\n+ return fd->eof;\n+}\n+\n+\n+/* \n+ * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h.\n+ * Use this immediately after opening.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+int cram_set_option(cram_fd *fd, enum cram_option opt, ...) {\n+ int r;\n+ va_list args;\n+\n+ va_start(args, opt);\n+ r = cram_set_voption(fd, opt, args);\n+ va_end(args);\n+\n+ return r;\n+}\n+\n+/*\n+ * Sets options on the cram_fd. See CRAM_OPT_* definitions in cram_structs.h.\n+ * Use this immediately after opening.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+int cram_set_voption(cram_fd *fd, enum cram_option opt, va_list args) {\n+ refs_t *refs;\n+\n+ if (!fd)\n+\treturn -1;\n+\n+ switch (opt) {\n+ case CRAM_OPT_DECODE_MD:\n+\tfd->decode_md = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_PREFIX:\n+\tif (fd->prefix)\n+\t free(fd->prefix);\n+\tif (!(fd->prefix = strdup(va_arg(args, char *))))\n+\t return -1;\n+\tbreak;\n+\n+ case CRAM_OPT_VERBOSITY:\n+\tfd->verbose = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_SEQS_PER_SLICE:\n+\tfd->seqs_per_slice = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_SLICES_PER_CONTAINER:\n+\tfd->slices_per_container = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_EMBED_REF:\n+\tfd->embed_ref = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_NO_REF:\n+\tfd->no_ref = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_IGNORE_MD5:\n+\tfd->ignore_md5 = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_USE_BZIP2:\n+\tfd->use_bz2 = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_USE_RANS:\n+\tfd->use_rans = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_USE_LZMA:\n+\tfd->use_lzma = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_SHARED_REF:\n+\tfd->shared_ref = 1;\n+\trefs = va_arg(args, refs_t *);\n+\tif (refs != fd->refs) {\n+\t if (fd->refs)\n+\t\trefs_free(fd->refs);\n+\t fd->refs = refs;\n+\t fd->refs->count++;\n+\t}\n+\tbreak;\n+\n+ case CRAM_OPT_RANGE:\n+\tfd->range = *va_arg(args, cram_range *);\n+\treturn cram_seek_to_refpos(fd, &fd->range);\n+\n+ case CRAM_OPT_REFERENCE:\n+\treturn cram_load_reference(fd, va_arg(args, char *));\n+\n+ case CRAM_OPT_VERSION: {\n+\tint major, minor;\n+\tchar *s = va_arg(args, char *);\n+\tif (2 != sscanf(s, "%d.%d", &major, &minor)) {\n+\t fprintf(stderr, "Malformed version string %s\\n", s);\n+\t return -1;\n+\t}\n+\tif (!((major == 1 && minor == 0) ||\n+\t (major == 2 && (minor == 0 || minor == 1)) ||\n+\t (major == 3 && minor == 0))) {\n+\t fprintf(stderr, "Unknown version string; "\n+\t\t "use 1.0, 2.0, 2.1 or 3.0\\n");\n+\t return -1;\n+\t}\n+\tfd->version = major*256 + minor;\n+\n+\tif (CRAM_MAJOR_VERS(fd->version) >= 3)\n+\t fd->use_rans = 1;\n+\tbreak;\n+ }\n+\n+ case CRAM_OPT_MULTI_SEQ_PER_SLICE:\n+\tfd->multi_seq = va_arg(args, int);\n+\tbreak;\n+\n+ case CRAM_OPT_NTHREADS: {\n+\tint nthreads = va_arg(args, int);\n+ if (nthreads > 1) {\n+ if (!(fd->pool = t_pool_init(nthreads*2, nthreads)))\n+ return -1;\n+\n+\t fd->rqueue = t_results_queue_init();\n+\t pthread_mutex_init(&fd->metrics_lock, NULL);\n+\t pthread_mutex_init(&fd->ref_lock, NULL);\n+\t pthread_mutex_init(&fd->bam_list_lock, NULL);\n+\t fd->shared_ref = 1;\n+\t fd->own_pool = 1;\n+ }\n+\tbreak;\n+ }\n+\n+ case CRAM_OPT_THREAD_POOL:\n+\tfd->pool = va_arg(args, t_pool *);\n+\tif (fd->pool) {\n+\t fd->rqueue = t_results_queue_init();\n+\t pthread_mutex_init(&fd->metrics_lock, NULL);\n+\t pthread_mutex_init(&fd->ref_lock, NULL);\n+\t pthread_mutex_init(&fd->bam_list_lock, NULL);\n+\t}\n+\tfd->shared_ref = 1; // Needed to avoid clobbering ref between threads\n+\tfd->own_pool = 0;\n+\n+\t//fd->qsize = 1;\n+\t//fd->decoded = calloc(fd->qsize, sizeof(cram_container *));\n+\t//t_pool_dispatch(fd->pool, cram_decoder_thread, fd);\n+\tbreak;\n+\n+ case CRAM_OPT_REQUIRED_FIELDS:\n+\tfd->required_fields = va_arg(args, int);\n+\tbreak;\n+\n+ default:\n+\tfprintf(stderr, "Unknown CRAM option code %d\\n", opt);\n+\treturn -1;\n+ }\n+\n+ return 0;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_io.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_io.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,596 @@\n+/*\n+Copyright (c) 2012-2014 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*! \\file\n+ * Include cram.h instead.\n+ *\n+ * This is an internal part of the CRAM system and is automatically included\n+ * when you #include cram.h.\n+ *\n+ * Implements the low level CRAM I/O primitives.\n+ * This includes basic data types such as byte, int, ITF-8,\n+ * maps, bitwise I/O, etc.\n+ */\n+\n+#ifndef _CRAM_IO_H_\n+#define _CRAM_IO_H_\n+\n+#ifdef __cplusplus\n+extern "C" {\n+#endif\n+\n+#define ITF8_MACROS\n+\n+#include <stdint.h>\n+#include <cram/misc.h>\n+\n+/**@{ ----------------------------------------------------------------------\n+ * ITF8 encoding and decoding.\n+ *\n+ * Also see the itf8_get and itf8_put macros.\n+ */\n+\n+/*! INTERNAL: Converts two characters into an integer for use in switch{} */\n+#define CRAM_KEY(a,b) (((a)<<8)|((b)))\n+\n+/*! Reads an integer in ITF-8 encoding from \'fd\' and stores it in\n+ * *val.\n+ *\n+ * @return\n+ * Returns the number of bytes read on success;\n+ * -1 on failure\n+ */\n+int itf8_decode(cram_fd *fd, int32_t *val);\n+\n+#ifndef ITF8_MACROS\n+/*! Reads an integer in ITF-8 encoding from \'cp\' and stores it in\n+ * *val.\n+ *\n+ * @return\n+ * Returns the number of bytes read on success;\n+ * -1 on failure\n+ */\n+int itf8_get(char *cp, int32_t *val_p);\n+\n+/*! Stores a value to memory in ITF-8 format.\n+ *\n+ * @return\n+ * Returns the number of bytes required to store the number.\n+ * This is a maximum of 5 bytes.\n+ */\n+int itf8_put(char *cp, int32_t val);\n+\n+#else\n+\n+/*\n+ * Macro implementations of the above\n+ */\n+#define itf8_get(c,v) (((uc)(c)[0]<0x80)?(*(v)=(uc)(c)[0],1):(((uc)(c)[0]<0xc0)?(*(v)=(((uc)(c)[0]<<8)|(uc)(c)[1])&0x3fff,2):(((uc)(c)[0]<0xe0)?(*(v)=(((uc)(c)[0]<<16)|((uc)(c)[1]<<8)|(uc)(c)[2])&0x1fffff,3):(((uc)(c)[0]<0xf0)?(*(v)=(((uc)(c)[0]<<24)|((uc)(c)[1]<<16)|((uc)(c)[2]<<8)|(uc)(c)[3])&0x0fffffff,4):(*(v)=(((uc)(c)[0]&0x0f)<<28)|((uc)(c)[1]<<20)|((uc)(c)[2]<<12)|((uc)(c)[3]<<4)|((uc)(c)[4]&0x0f),5)))))\n+\n+#define itf8_put(c,v) ((!((v)&~0x7f))?((c)[0]=(v),1):(!((v)&~0x3fff))?((c)[0]=((v)>>8)|0x80,(c)[1]=(v)&0xff,2):(!((v)&~0x1fffff))?((c)[0]=((v)>>16)|0xc0,(c)[1]=((v)>>8)&0xff,(c)[2]=(v)&0xff,3):(!((v)&~0xfffffff))?((c)[0]=((v)>>24)|0xe0,(c)[1]=((v)>>16)&0xff,(c)[2]=((v)>>8)&0xff,(c)[3]=(v)&0xff,4):((c)[0]=0xf0|(((v)>>28)&0xff),(c)[1]=((v)>>20)&0xff,(c)[2]=((v)>>12)&0xff,(c)[3]=((v)>>4)&0xff,(c)[4]=(v)&0xf,5))\n+\n+#define itf8_size(v) ((!((v)&~0x7f))?1:(!((v)&~0x3fff))?2:(!((v)&~0x1fffff))?3:(!((v)'..b' on success;\n+ * NULL on failure\n+ */\n+cram_slice *cram_new_slice(enum cram_content_type type, int nrecs);\n+\n+/*! Loads an entire slice.\n+ *\n+ * FIXME: In 1.0 the native unit of slices within CRAM is broken\n+ * as slices contain references to objects in other slices.\n+ * To work around this while keeping the slice oriented outer loop\n+ * we read all slices and stitch them together into a fake large\n+ * slice instead.\n+ *\n+ * @return\n+ * Returns cram_slice ptr on success;\n+ * NULL on failure\n+ */\n+cram_slice *cram_read_slice(cram_fd *fd);\n+\n+\n+\n+/**@}*/\n+/**@{ ----------------------------------------------------------------------\n+ * CRAM file definition (header)\n+ */\n+\n+/*! Reads a CRAM file definition structure.\n+ *\n+ * @return\n+ * Returns file_def ptr on success;\n+ * NULL on failure\n+ */\n+cram_file_def *cram_read_file_def(cram_fd *fd);\n+\n+/*! Writes a cram_file_def structure to cram_fd.\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int cram_write_file_def(cram_fd *fd, cram_file_def *def);\n+\n+/*! Frees a cram_file_def structure. */\n+void cram_free_file_def(cram_file_def *def);\n+\n+\n+/**@}*/\n+/**@{ ----------------------------------------------------------------------\n+ * SAM header I/O\n+ */\n+\n+/*! Reads the SAM header from the first CRAM data block.\n+ *\n+ * Also performs minimal parsing to extract read-group\n+ * and sample information.\n+ *\n+ * @return\n+ * Returns SAM hdr ptr on success;\n+ * NULL on failure\n+ */\n+SAM_hdr *cram_read_SAM_hdr(cram_fd *fd);\n+\n+/*! Writes a CRAM SAM header.\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int cram_write_SAM_hdr(cram_fd *fd, SAM_hdr *hdr);\n+\n+\n+/**@}*/\n+/**@{ ----------------------------------------------------------------------\n+ * The top-level cram opening, closing and option handling\n+ */\n+\n+/*! Opens a CRAM file for read (mode "rb") or write ("wb").\n+ *\n+ * The filename may be "-" to indicate stdin or stdout.\n+ *\n+ * @return\n+ * Returns file handle on success;\n+ * NULL on failure.\n+ */\n+cram_fd *cram_open(const char *filename, const char *mode);\n+\n+/*! Opens an existing stream for reading or writing.\n+ *\n+ * @return\n+ * Returns file handle on success;\n+ * NULL on failure.\n+ */\n+cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode);\n+\n+/*! Closes a CRAM file.\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int cram_close(cram_fd *fd);\n+\n+/*\n+ * Seek within a CRAM file.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+int cram_seek(cram_fd *fd, off_t offset, int whence);\n+\n+/*\n+ * Flushes a CRAM file.\n+ * Useful for when writing to stdout without wishing to close the stream.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+int cram_flush(cram_fd *fd);\n+\n+/*! Checks for end of file on a cram_fd stream.\n+ *\n+ * @return\n+ * Returns 0 if not at end of file\n+ * 1 if we hit an expected EOF (end of range or EOF block)\n+ * 2 for other EOF (end of stream without EOF block)\n+ */\n+int cram_eof(cram_fd *fd);\n+\n+/*! Sets options on the cram_fd.\n+ *\n+ * See CRAM_OPT_* definitions in cram_structs.h.\n+ * Use this immediately after opening.\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int cram_set_option(cram_fd *fd, enum cram_option opt, ...);\n+\n+/*! Sets options on the cram_fd.\n+ *\n+ * See CRAM_OPT_* definitions in cram_structs.h.\n+ * Use this immediately after opening.\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int cram_set_voption(cram_fd *fd, enum cram_option opt, va_list args);\n+\n+/*!\n+ * Attaches a header to a cram_fd.\n+ *\n+ * This should be used when creating a new cram_fd for writing where\n+ * we have an SAM_hdr already constructed (eg from a file we\'ve read\n+ * in).\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int cram_set_header(cram_fd *fd, SAM_hdr *hdr);\n+\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif /* _CRAM_IO_H_ */\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_samtools.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_samtools.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,147 @@ +/* +Copyright (c) 2010-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include <assert.h> +#include <string.h> +#include <stdlib.h> + +#include "cram/cram.h" +#include "htslib/sam.h" + +/*--------------------------------------------------------------------------- + * Samtools compatibility portion + */ +int bam_construct_seq(bam_seq_t **bp, size_t extra_len, + const char *qname, size_t qname_len, + int flag, + int rname, // Ref ID + int pos, + int end, // aligned start/end coords + int mapq, + uint32_t ncigar, const uint32_t *cigar, + int mrnm, // Mate Ref ID + int mpos, + int isize, + int len, + const char *seq, + const char *qual) { + static const char L[256] = { + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15, 0,15,15, + 15, 1,14, 2,13,15,15, 4,11,15,15,12,15, 3,15,15, + 15,15, 5, 6, 8,15, 7, 9,15,10,15,15,15,15,15,15, + 15, 1,14, 2,13,15,15, 4,11,15,15,12,15, 3,15,15, + 15,15, 5, 6, 8,15, 7, 9,15,10,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15, + 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15 + }; + bam1_t *b = (bam1_t *)*bp; + uint8_t *cp; + int i, bam_len; + + //b->l_aux = extra_len; // we fill this out later + + bam_len = qname_len + 1 + ncigar*4 + (len+1)/2 + len + extra_len; + if (b->m_data < bam_len) { + b->m_data = bam_len; + kroundup32(b->m_data); + b->data = (uint8_t*)realloc(b->data, b->m_data); + if (!b->data) + return -1; + } + b->l_data = bam_len; + + b->core.tid = rname; + b->core.pos = pos-1; + b->core.bin = bam_reg2bin(pos, end); + b->core.qual = mapq; + b->core.l_qname = qname_len+1; + b->core.flag = flag; + b->core.n_cigar = ncigar; + b->core.l_qseq = len; + b->core.mtid = mrnm; + b->core.mpos = mpos-1; + b->core.isize = isize; + + cp = b->data; + + strncpy((char *)cp, qname, qname_len); + cp[qname_len] = 0; + cp += qname_len+1; + memcpy(cp, cigar, ncigar*4); + cp += ncigar*4; + + for (i = 0; i+1 < len; i+=2) { + *cp++ = (L[(uc)seq[i]]<<4) + L[(uc)seq[i+1]]; + } + if (i < len) + *cp++ = L[(uc)seq[i]]<<4; + + if (qual) + memcpy(cp, qual, len); + else + memset(cp, '\xff', len); + + return 0; +} + +bam_hdr_t *cram_header_to_bam(SAM_hdr *h) { + int i; + bam_hdr_t *header = bam_hdr_init(); + + header->l_text = ks_len(&h->text); + header->text = malloc(header->l_text+1); + memcpy(header->text, ks_str(&h->text), header->l_text); + header->text[header->l_text] = 0; + + header->n_targets = h->nref; + header->target_name = (char **)calloc(header->n_targets, + sizeof(char *)); + header->target_len = (uint32_t *)calloc(header->n_targets, 4); + + for (i = 0; i < h->nref; i++) { + header->target_name[i] = strdup(h->ref[i].name); + header->target_len[i] = h->ref[i].len; + } + + return header; +} + +SAM_hdr *bam_header_to_cram(bam_hdr_t *h) { + return sam_hdr_parse_(h->text, h->l_text); +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_samtools.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_samtools.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,97 @@ +/* +Copyright (c) 2010-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _CRAM_SAMTOOLS_H_ +#define _CRAM_SAMTOOLS_H_ + +/* Samtools compatible API */ +#define bam_blk_size(b) ((b)->l_data) +#define bam_set_blk_size(b,v) ((b)->data_len = (v)) + +#define bam_ref(b) (b)->core.tid +#define bam_pos(b) (b)->core.pos +#define bam_mate_pos(b) (b)->core.mpos +#define bam_mate_ref(b) (b)->core.mtid +#define bam_ins_size(b) (b)->core.isize +#define bam_seq_len(b) (b)->core.l_qseq +#define bam_cigar_len(b) (b)->core.n_cigar +#define bam_flag(b) (b)->core.flag +#define bam_bin(b) (b)->core.bin +#define bam_map_qual(b) (b)->core.qual +#define bam_name_len(b) (b)->core.l_qname +#define bam_name(b) bam_get_qname((b)) +#define bam_qual(b) bam_get_qual((b)) +#define bam_seq(b) bam_get_seq((b)) +#define bam_cigar(b) bam_get_cigar((b)) +#define bam_aux(b) bam_get_aux((b)) + +#define bam_dup(b) bam_copy1(bam_init1(), (b)) + +#define bam_free(b) bam_destroy1((b)) + +#define bam_reg2bin(beg,end) hts_reg2bin((beg),(end),14,5) + +#include "htslib/sam.h" + +enum cigar_op { + BAM_CMATCH_=BAM_CMATCH, + BAM_CINS_=BAM_CINS, + BAM_CDEL_=BAM_CDEL, + BAM_CREF_SKIP_=BAM_CREF_SKIP, + BAM_CSOFT_CLIP_=BAM_CSOFT_CLIP, + BAM_CHARD_CLIP_=BAM_CHARD_CLIP, + BAM_CPAD_=BAM_CPAD, + BAM_CBASE_MATCH=BAM_CEQUAL, + BAM_CBASE_MISMATCH=BAM_CDIFF +}; + +typedef bam1_t bam_seq_t; + +#include "cram/sam_header.h" + +bam_hdr_t *cram_header_to_bam(SAM_hdr *h); +SAM_hdr *bam_header_to_cram(bam_hdr_t *h); + +int bam_construct_seq(bam_seq_t **bp, size_t extra_len, + const char *qname, size_t qname_len, + int flag, + int rname, // Ref ID + int pos, + int end, // aligned start/end coords + int mapq, + uint32_t ncigar, const uint32_t *cigar, + int mrnm, // Mate Ref ID + int mpos, + int isize, + int len, + const char *seq, + const char *qual); + +#endif /* _CRAM_SAMTOOLS_H_ */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_stats.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_stats.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,450 @@\n+/*\n+Copyright (c) 2012-2013 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdio.h>\n+#include <errno.h>\n+#include <assert.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <zlib.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include <math.h>\n+#include <ctype.h>\n+\n+#include "cram/cram.h"\n+#include "cram/os.h"\n+\n+cram_stats *cram_stats_create(void) {\n+ return calloc(1, sizeof(cram_stats));\n+}\n+\n+void cram_stats_add(cram_stats *st, int32_t val) {\n+ st->nsamp++;\n+\n+ //assert(val >= 0);\n+\n+ if (val < MAX_STAT_VAL && val >= 0) {\n+\tst->freqs[val]++;\n+ } else {\n+\tkhint_t k;\n+\tint r;\n+\n+\tif (!st->h) {\n+\t st->h = kh_init(m_i2i);\n+\t}\n+\n+\tk = kh_put(m_i2i, st->h, val, &r);\n+\tif (r == 0)\n+\t kh_val(st->h, k)++;\n+\telse if (r != -1)\n+\t kh_val(st->h, k) = 1;\n+\telse\n+\t ; // FIXME: handle error\n+ }\n+}\n+\n+void cram_stats_del(cram_stats *st, int32_t val) {\n+ st->nsamp--;\n+\n+ //assert(val >= 0);\n+\n+ if (val < MAX_STAT_VAL && val >= 0) {\n+\tst->freqs[val]--;\n+\tassert(st->freqs[val] >= 0);\n+ } else if (st->h) {\n+\tkhint_t k = kh_get(m_i2i, st->h, val);\n+\n+\tif (k != kh_end(st->h)) {\n+\t if (--kh_val(st->h, k) == 0)\n+\t\tkh_del(m_i2i, st->h, k);\n+\t} else {\n+\t fprintf(stderr, "Failed to remove val %d from cram_stats\\n", val);\n+\t st->nsamp++;\n+\t}\n+ } else {\n+\tfprintf(stderr, "Failed to remove val %d from cram_stats\\n", val);\n+\tst->nsamp++;\n+ }\n+}\n+\n+void cram_stats_dump(cram_stats *st) {\n+ int i;\n+ fprintf(stderr, "cram_stats:\\n");\n+ for (i = 0; i < MAX_STAT_VAL; i++) {\n+\tif (!st->freqs[i])\n+\t continue;\n+\tfprintf(stderr, "\\t%d\\t%d\\n", i, st->freqs[i]);\n+ }\n+ if (st->h) {\n+\tkhint_t k;\n+\tfor (k = kh_begin(st->h); k != kh_end(st->h); k++) {\n+\t if (!kh_exist(st->h, k))\n+\t\tcontinue;\n+\n+\t fprintf(stderr, "\\t%d\\t%d\\n", kh_key(st->h, k), kh_val(st->h, k));\n+\t}\n+ }\n+}\n+\n+#if 1\n+/* Returns the number of bits set in val; it the highest bit used */\n+static int nbits(int v) {\n+ static const int MultiplyDeBruijnBitPosition[32] = {\n+\t1, 10, 2, 11, 14, 22, 3, 30, 12, 15, 17, 19, 23, 26, 4, 31,\n+\t9, 13, 21, 29, 16, 18, 25, 8, 20, 28, 24, 7, 27, 6, 5, 32\n+ };\n+\n+ v |= v >> 1; // first up to set all bits 1 after the first 1 */\n+ v |= v >> 2;\n+ v |= v >> 4;\n+ v |= v >> 8;\n+ v |= v >> 16;\n+\n+ // DeBruijn magic to find top bit\n+ return MultiplyDeBruijnBitPosition[(uint32_t)(v * 0'..b'\t//fprintf(stderr, "Val %d = %d x %d (e %f, %d)\\n", i, i, F[i], x, X);\n+\t }\n+\t}\n+\n+\t//fprintf(stderr, "CORE Entropy = %f, %f\\n", dbits/8, dbitsH/8);\n+\t//fprintf(stderr, "Ext. Entropy = %f, %f\\n", dbitsE/8, dbitsEH/8);\n+\n+\tif (dbitsE < 1000 || dbitsE / dbits > 1.1) {\n+\t //fprintf(stderr, "=> %d < 200 ? E_HUFFMAN : E_BETA\\n", nvals);\n+\t free(vals); free(freqs);\n+\t return nvals < 200 ? E_HUFFMAN : E_BETA;\n+\t}\n+#endif\n+\tfree(vals); free(freqs);\n+\treturn E_EXTERNAL;\n+ }\n+\n+ /*\n+ * Avoid complex stats for now, just do heuristic of HUFFMAN for small\n+ * alphabets and BETA for anything large.\n+ */\n+ free(vals); free(freqs);\n+ return nvals < 200 ? E_HUFFMAN : E_BETA;\n+ //return E_HUFFMAN;\n+ //return E_EXTERNAL;\n+\n+\n+ /* We only support huffman now anyway... */\n+ //free(vals); free(freqs); return E_HUFFMAN;\n+\n+ /* Beta */\n+ bits = nbits(max_val - min_val) * ntot;\n+ if (fd->verbose > 1)\n+\tfprintf(stderr, "BETA = %d\\n", bits);\n+ if (best_size > bits)\n+\tbest_size = bits, best_encoding = E_BETA;\n+\n+#if 0\n+ /* Unary */\n+ if (min_val >= 0) {\n+\tfor (bits = i = 0; i < nvals; i++)\n+\t bits += freqs[i]*(vals[i]+1);\n+\tif (fd->verbose > 1)\n+\t fprintf(stderr, "UNARY = %d\\n", bits);\n+\tif (best_size > bits)\n+\t best_size = bits, best_encoding = E_NULL; //E_UNARY;\n+ }\n+\n+ /* Gamma */\n+ for (bits = i = 0; i < nvals; i++)\n+\tbits += ((nbits(vals[i]-min_val+1)-1) + nbits(vals[i]-min_val+1)) * freqs[i];\n+ if (fd->verbose > 1)\n+\tfprintf(stderr, "GAMMA = %d\\n", bits);\n+ if (best_size > bits)\n+\tbest_size = bits, best_encoding = E_GAMMA;\n+\n+ /* Subexponential */\n+ for (k = 0; k < 10; k++) {\n+\tfor (bits = i = 0; i < nvals; i++) {\n+\t if (vals[i]-min_val < (1<<k))\n+\t\tbits += (1 + k)*freqs[i];\n+\t else\n+\t\tbits += (nbits(vals[i]-min_val)*2-k)*freqs[i];\n+\t}\n+\n+\tif (fd->verbose > 1)\n+\t fprintf(stderr, "SUBEXP%d = %d\\n", k, bits);\n+\tif (best_size > bits)\n+\t best_size = bits, best_encoding = E_SUBEXP;\n+ }\n+#endif\n+\n+ /* byte array len */\n+\n+ /* byte array stop */\n+\n+ /* External? Guesswork! */\n+\n+ /* Huffman */\n+// qsort(freqs, nvals, sizeof(freqs[0]), sort_freqs);\n+// for (i = 0; i < nvals; i++) {\n+//\tfprintf(stderr, "%d = %d\\n", i, freqs[i]);\n+//\tvals[i] = 0;\n+// }\n+\n+ /* Grow freqs to 2*freqs, to store sums */\n+ /* Vals holds link data */\n+ freqs = realloc(freqs, 2*nvals*sizeof(*freqs));\n+ codes = calloc(2*nvals, sizeof(*codes));\n+ if (!freqs || !codes)\n+\treturn E_HUFFMAN; // Cannot do much else atm\n+\n+ /* Inefficient, use pointers to form chain so we can insert and maintain\n+ * a sorted list? This is currently O(nvals^2) complexity.\n+ */\n+ for (;;) {\n+\tint low1 = INT_MAX, low2 = INT_MAX;\n+\tint ind1 = 0, ind2 = 0;\n+\tfor (i = 0; i < nvals; i++) {\n+\t if (freqs[i] < 0)\n+\t\tcontinue;\n+\t if (low1 > freqs[i]) \n+\t\tlow2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i;\n+\t else if (low2 > freqs[i])\n+\t\tlow2 = freqs[i], ind2 = i;\n+\t}\n+\tif (low2 == INT_MAX)\n+\t break;\n+\n+\t//fprintf(stderr, "Merge ind %d (%d), %d (%d) = %d+%d, => %d=%d\\n",\n+\t//\tind1, vals[ind1], ind2, vals[ind2], low1, low2,\n+\t//\tnvals, low1+low2);\n+\n+\tfreqs[nvals] = low1 + low2;\n+\tcodes[ind1] = nvals;\n+\tcodes[ind2] = nvals;\n+\tfreqs[ind1] *= -1;\n+\tfreqs[ind2] *= -1;\n+\tnvals++;\n+ }\n+ nvals = nvals/2+1;\n+\n+ for (i = 0; i < nvals; i++) {\n+\tint code_len = 0;\n+\tfor (k = codes[i]; k; k = codes[k])\n+\t code_len++;\n+\tcodes[i] = code_len;\n+\tfreqs[i] *= -1;\n+\t//fprintf(stderr, "%d / %d => %d\\n", vals[i], freqs[i], codes[i]);\n+ }\n+\n+ for (bits = i = 0; i < nvals; i++) {\n+\tbits += freqs[i] * codes[i];\n+ }\n+ if (fd->verbose > 1)\n+\tfprintf(stderr, "HUFFMAN = %d\\n", bits);\n+ if (best_size >= bits)\n+\tbest_size = bits, best_encoding = E_HUFFMAN;\n+ free(codes);\n+\n+ free(vals);\n+ free(freqs);\n+\n+ return best_encoding;\n+}\n+\n+void cram_stats_free(cram_stats *st) {\n+ if (st->h)\n+\tkh_destroy(m_i2i, st->h);\n+ free(st);\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_stats.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_stats.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,59 @@ +/* +Copyright (c) 2012-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _CRAM_STATS_H_ +#define _CRAM_STATS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +cram_stats *cram_stats_create(void); +void cram_stats_add(cram_stats *st, int32_t val); +void cram_stats_del(cram_stats *st, int32_t val); +void cram_stats_dump(cram_stats *st); +void cram_stats_free(cram_stats *st); + +/* + * Computes entropy from integer frequencies for various encoding methods and + * picks the best encoding. + * + * FIXME: we could reuse some of the code here for the actual encoding + * parameters too. Eg the best 'k' for SUBEXP or the code lengths for huffman. + * + * Returns the best codec to use. + */ +enum cram_encoding cram_stats_encoding(cram_fd *fd, cram_stats *st); + +#ifdef __cplusplus +} +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/cram_structs.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/cram_structs.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,809 @@\n+/*\n+Copyright (c) 2012-2013 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+#ifndef _CRAM_STRUCTS_H_\n+#define _CRAM_STRUCTS_H_\n+\n+#ifdef __cplusplus\n+extern "C" {\n+#endif\n+\n+/*\n+ * Defines in-memory structs for the basic file-format objects in the\n+ * CRAM format.\n+ *\n+ * The basic file format is:\n+ * File-def SAM-hdr Container Container ...\n+ *\n+ * Container:\n+ * Service-block data-block data-block ...\n+ *\n+ * Multiple blocks in a container are grouped together as slices,\n+ * also sometimes referred to as landmarks in the spec.\n+ */\n+\n+\n+#include <stdint.h>\n+\n+#include "cram/thread_pool.h"\n+#include "cram/string_alloc.h"\n+#include "htslib/khash.h"\n+\n+// Generic hash-map integer -> integer\n+KHASH_MAP_INIT_INT(m_i2i, int)\n+\n+// Generic hash-set integer -> (existance)\n+KHASH_SET_INIT_INT(s_i2i)\n+\n+// For brevity\n+typedef unsigned char uc;\n+\n+/*\n+ * A union for the preservation map. Required for khash.\n+ */\n+typedef union {\n+ int i;\n+ char *p;\n+} pmap_t;\n+\n+// Generates static functions here which isn\'t ideal, but we have no way\n+// currently to declare the kh_map_t structure here without also declaring a\n+// duplicate in the .c files due to the nature of the KHASH macros.\n+KHASH_MAP_INIT_STR(map, pmap_t)\n+\n+struct hFILE;\n+\n+#define SEQS_PER_SLICE 10000\n+#define SLICE_PER_CNT 1\n+\n+#define CRAM_SUBST_MATRIX "CGTNAGTNACTNACGNACGT"\n+\n+#define MAX_STAT_VAL 1024\n+//#define MAX_STAT_VAL 16\n+typedef struct {\n+ int freqs[MAX_STAT_VAL];\n+ khash_t(m_i2i) *h;\n+ int nsamp; // total number of values added\n+ int nvals; // total number of unique values added\n+} cram_stats;\n+\n+/* NB: matches java impl, not the spec */\n+enum cram_encoding {\n+ E_NULL = 0,\n+ E_EXTERNAL = 1,\n+ E_GOLOMB = 2,\n+ E_HUFFMAN = 3,\n+ E_BYTE_ARRAY_LEN = 4,\n+ E_BYTE_ARRAY_STOP = 5,\n+ E_BETA = 6,\n+ E_SUBEXP = 7,\n+ E_GOLOMB_RICE = 8,\n+ E_GAMMA = 9\n+};\n+\n+enum cram_external_type {\n+ E_INT = 1,\n+ E_LONG = 2,\n+ E_BYTE = 3,\n+ E_BYTE_ARRAY = 4,\n+ E_BYTE_ARRAY_BLOCK = 5,\n+};\n+\n+/* External IDs used by this implementation (only assumed during writing) */\n+enum cram_DS_ID {\n+ DS_CORE = 0,\n+ DS_aux = 1, // aux_blk\n+ DS_aux_OQ = 2,\n+ DS_aux_BQ = 3,\n+ DS_aux_BD = 4,\n+ DS_aux_BI = 5,\n+ DS_aux_FZ = 6, // also ZM:B\n+ DS_aux_oq = '..b'ture\n+ char *ref, *ref_free; // current portion held in memory\n+ int ref_id;\n+ int ref_start;\n+ int ref_end;\n+ char *ref_fn; // reference fasta filename\n+\n+ // compression level and metrics\n+ int level;\n+ cram_metrics *m[DS_END];\n+\n+ // options\n+ int decode_md; // Whether to export MD and NM tags\n+ int verbose;\n+ int seqs_per_slice;\n+ int slices_per_container;\n+ int embed_ref;\n+ int no_ref;\n+ int ignore_md5;\n+ int use_bz2;\n+ int use_rans;\n+ int use_lzma;\n+ int shared_ref;\n+ unsigned int required_fields;\n+ cram_range range;\n+\n+ // lookup tables, stored here so we can be trivially multi-threaded\n+ unsigned int bam_flag_swap[0x1000]; // cram -> bam flags\n+ unsigned int cram_flag_swap[0x1000];// bam -> cram flags\n+ unsigned char L1[256]; // ACGT{*} ->0123{4}\n+ unsigned char L2[256]; // ACGTN{*}->01234{5}\n+ char cram_sub_matrix[32][32];\t// base substituion codes\n+\n+ int index_sz;\n+ cram_index *index; // array, sizeof index_sz\n+ off_t first_container;\n+ int eof;\n+ int last_slice; // number of recs encoded in last slice\n+ int multi_seq;\n+ int unsorted;\n+ int empty_container; \t\t// Marker for EOF block\n+ \n+ // thread pool\n+ int own_pool;\n+ t_pool *pool;\n+ t_results_queue *rqueue;\n+ pthread_mutex_t metrics_lock;\n+ pthread_mutex_t ref_lock;\n+ spare_bams *bl;\n+ pthread_mutex_t bam_list_lock;\n+ void *job_pending;\n+ int ooc; // out of containers.\n+} cram_fd;\n+\n+// Translation of required fields to cram data series\n+enum cram_fields {\n+ CRAM_BF = 0x00000001,\n+ CRAM_AP = 0x00000002,\n+ CRAM_FP = 0x00000004,\n+ CRAM_RL = 0x00000008,\n+ CRAM_DL = 0x00000010,\n+ CRAM_NF = 0x00000020,\n+ CRAM_BA = 0x00000040,\n+ CRAM_QS = 0x00000080,\n+ CRAM_FC = 0x00000100,\n+ CRAM_FN = 0x00000200,\n+ CRAM_BS = 0x00000400,\n+ CRAM_IN = 0x00000800,\n+ CRAM_RG = 0x00001000,\n+ CRAM_MQ = 0x00002000,\n+ CRAM_TL = 0x00004000,\n+ CRAM_RN = 0x00008000,\n+ CRAM_NS = 0x00010000,\n+ CRAM_NP = 0x00020000,\n+ CRAM_TS = 0x00040000,\n+ CRAM_MF = 0x00080000,\n+ CRAM_CF = 0x00100000,\n+ CRAM_RI = 0x00200000,\n+ CRAM_RS = 0x00400000,\n+ CRAM_PD = 0x00800000,\n+ CRAM_HC = 0x01000000,\n+ CRAM_SC = 0x02000000,\n+ CRAM_BB = 0x04000000,\n+ CRAM_BB_len = 0x08000000,\n+ CRAM_QQ = 0x10000000,\n+ CRAM_QQ_len = 0x20000000,\n+ CRAM_aux= 0x40000000,\n+ CRAM_ALL= 0x7fffffff,\n+};\n+\n+// A CIGAR opcode, but not necessarily the implications of it. Eg FC/FP may\n+// encode a base difference, but we don\'t need to know what it is for CIGAR.\n+// If we have a soft-clip or insertion, we do need SC/IN though to know how\n+// long that array is.\n+#define CRAM_CIGAR (CRAM_FN | CRAM_FP | CRAM_FC | CRAM_DL | CRAM_IN | \\\n+\t\t CRAM_SC | CRAM_HC | CRAM_PD | CRAM_RS | CRAM_RL | CRAM_BF)\n+\n+#define CRAM_SEQ (CRAM_CIGAR | CRAM_BA | CRAM_QS | CRAM_BS | \\\n+\t\t CRAM_RL | CRAM_AP | CRAM_BB | CRAM_QQ)\n+\n+/* BF bitfields */\n+/* Corrected in 1.1. Use bam_flag_swap[bf] and BAM_* macros for 1.0 & 1.1 */\n+#define CRAM_FPAIRED 256\n+#define CRAM_FPROPER_PAIR 128\n+#define CRAM_FUNMAP 64\n+#define CRAM_FREVERSE 32\n+#define CRAM_FREAD1 16\n+#define CRAM_FREAD2 8\n+#define CRAM_FSECONDARY 4\n+#define CRAM_FQCFAIL 2\n+#define CRAM_FDUP 1\n+\n+#define DS_aux_S "\\001"\n+#define DS_aux_OQ_S "\\002"\n+#define DS_aux_BQ_S "\\003"\n+#define DS_aux_BD_S "\\004"\n+#define DS_aux_BI_S "\\005"\n+#define DS_aux_FZ_S "\\006"\n+#define DS_aux_oq_S "\\007"\n+#define DS_aux_os_S "\\010"\n+#define DS_aux_oz_S "\\011"\n+\n+#define CRAM_M_REVERSE 1\n+#define CRAM_M_UNMAP 2\n+\n+\n+/* CF bitfields */\n+#define CRAM_FLAG_PRESERVE_QUAL_SCORES (1<<0)\n+#define CRAM_FLAG_DETACHED (1<<1)\n+#define CRAM_FLAG_MATE_DOWNSTREAM (1<<2)\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif /* _CRAM_STRUCTS_H_ */\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/files.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/files.c Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,76 @@ +/* +Copyright (c) 1994, 1996-1997, 2000, 2003 MEDICAL RESEARCH COUNCIL +All rights reserved + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1 Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2 Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF +MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or +promote products derived from this software without specific prior written +permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "io_lib_config.h" +#endif + +#include "cram/misc.h" + +#include <sys/types.h> +#include <sys/stat.h> +/* Alliant's Concentrix <sys/stat.h> is hugely deficient */ +/* Define things we require in this program */ +/* Methinks S_IFMT and S_IFDIR aren't defined in POSIX */ +#ifndef S_ISDIR +#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) +#endif /*!S_ISDIR*/ +#ifndef S_ISREG +#define S_ISREG(m) (((m)&S_IFMT) == S_IFREG) +#endif /*!S_ISREG*/ + +int is_directory(char * fn) +{ + struct stat buf; + if ( stat(fn,&buf) ) return 0; + return S_ISDIR(buf.st_mode); +} + +int is_file(char * fn) +{ + struct stat buf; + if ( stat(fn,&buf) ) return 0; + return S_ISREG(buf.st_mode); +} + +int file_exists(char * fn) +{ + struct stat buf; + return ( stat(fn,&buf) == 0); +} + +int file_size(char * fn) +{ + struct stat buf; + if ( stat(fn,&buf) != 0) return 0; + return buf.st_size; +} + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/mFILE.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/mFILE.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,634 @@\n+/*\n+Copyright (c) 2005-2006, 2008-2009, 2013 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <errno.h>\n+#include <string.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include <fcntl.h>\n+#include <unistd.h>\n+#include <stdarg.h>\n+\n+#include "cram/os.h"\n+#include "cram/mFILE.h"\n+#include "cram/vlen.h"\n+\n+/*\n+ * This file contains memory-based versions of the most commonly used\n+ * (by io_lib) stdio functions.\n+ *\n+ * Actual file IO takes place either on opening or closing an mFILE.\n+ *\n+ * Coupled to this are a bunch of rather scary macros which can be obtained\n+ * by including stdio_hack.h. It is recommended though that you use mFILE.h\n+ * instead and replace fopen with mfopen (etc). This is more or less\n+ * mandatory if you wish to use both FILE and mFILE structs in a single file.\n+ */\n+\n+static mFILE *m_channel[3]; /* stdin, stdout and stderr fakes */\n+\n+/*\n+ * Reads the entirety of fp into memory. If \'fn\' exists it is the filename\n+ * associated with fp. This will be used for more optimal reading (via a\n+ * stat to identify the size and a single read). Otherwise we use successive\n+ * reads until EOF.\n+ *\n+ * Returns a malloced buffer on success of length *size\n+ * NULL on failure\n+ */\n+static char *mfload(FILE *fp, const char *fn, size_t *size, int binary) {\n+ struct stat sb;\n+ char *data = NULL;\n+ size_t allocated = 0, used = 0;\n+ int bufsize = 8192;\n+\n+#ifdef _WIN32\n+ if (binary)\n+\t_setmode(_fileno(fp), _O_BINARY);\n+ else \n+\t_setmode(_fileno(fp), _O_TEXT);\n+#endif\n+\n+ if (fn && -1 != stat(fn, &sb)) {\n+\tdata = malloc(allocated = sb.st_size);\n+\tbufsize = sb.st_size;\n+ } else {\n+\tfn = NULL;\n+ }\n+\n+ do {\n+\tsize_t len;\n+\tif (used + bufsize > allocated) {\n+\t allocated += bufsize;\n+\t data = realloc(data, allocated);\n+\t}\n+\tlen = fread(data + used, 1, allocated - used, fp);\n+\tif (len > 0)\n+\t used += len;\n+ } while (!feof(fp) && (fn == NULL || used < sb.st_size));\n+\n+ *size = used;\n+\n+ return data;\n+}\n+\n+/*\n+ * Creates and returns m_channel[0].\n+ * We initialise this on the first attempted read, which then slurps in\n+ * all of stdin until EOF is met.\n+ */\n+mFILE *mstdin(void) {\n+ if (m_channel[0])\n+\treturn m_channel[0];\n+\n+ m_channel[0] = mfcreate(NULL, 0);\n+ if (NULL == m_channel[0]) return NULL;\n+ m_channel[0]->fp = stdin;\n+ return '..b"size * nmemb);\n+ mf->offset += size * nmemb;\n+ if (mf->size < mf->offset)\n+\tmf->size = mf->offset;\n+\n+ return nmemb;\n+}\n+\n+int mfgetc(mFILE *mf) {\n+ if (mf == m_channel[0]) init_mstdin();\n+ if (mf->offset < mf->size) {\n+\treturn (unsigned char)mf->data[mf->offset++];\n+ }\n+\n+ mf->eof = 1;\n+ return -1;\n+}\n+\n+int mungetc(int c, mFILE *mf) {\n+ if (mf->offset > 0) {\n+\tmf->data[--mf->offset] = c;\n+\treturn c;\n+ }\n+ \n+ mf->eof = 1;\n+ return -1;\n+}\n+\n+char *mfgets(char *s, int size, mFILE *mf) {\n+ int i;\n+\n+ if (mf == m_channel[0]) init_mstdin();\n+ *s = 0;\n+ for (i = 0; i < size-1;) {\n+\tif (mf->offset < mf->size) {\n+\t s[i] = mf->data[mf->offset++];\n+\t if (s[i++] == '\\n')\n+\t\tbreak;\n+\t} else {\n+\t mf->eof = 1;\n+\t break;\n+\t}\n+ }\n+\n+ s[i] = 0;\n+ return i ? s : NULL;\n+}\n+\n+/*\n+ * Flushes an mFILE. If this is a real open of a file in write mode then\n+ * mFILE->fp will be set. We then write out any new data in mFILE since the\n+ * last flush. We cannot tell what may have been modified as we don't keep\n+ * track of that, so we typically rewrite out the entire file contents between\n+ * the last flush_pos and the end of file.\n+ *\n+ * For stderr/stdout we also reset the offsets so we cannot modify things\n+ * we've already output.\n+ */\n+int mfflush(mFILE *mf) {\n+ if (!mf->fp)\n+\treturn 0;\n+\n+ /* FIXME: only do this when opened in write mode */\n+ if (mf == m_channel[1] || mf == m_channel[2]) {\n+\tif (mf->flush_pos < mf->size) {\n+\t size_t bytes = mf->size - mf->flush_pos;\n+\t if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)\n+\t\treturn -1;\n+\t if (0 != fflush(mf->fp))\n+\t\treturn -1;\n+\t}\n+\n+\t/* Stdout & stderr are non-seekable streams so throw away the data */\n+\tmf->offset = mf->size = mf->flush_pos = 0;\n+ }\n+\n+ /* only flush when opened in write mode */\n+ if (mf->mode & MF_WRITE) {\n+\tif (mf->flush_pos < mf->size) {\n+\t size_t bytes = mf->size - mf->flush_pos;\n+\t if (!(mf->mode & MF_MODEX)) {\n+\t\tfseek(mf->fp, mf->flush_pos, SEEK_SET);\n+\t }\n+\t if (fwrite(mf->data + mf->flush_pos, 1, bytes, mf->fp) < bytes)\n+\t\treturn -1;\n+\t if (0 != fflush(mf->fp))\n+\t\treturn -1;\n+\t}\n+\tif (ftell(mf->fp) != -1 &&\n+\t ftruncate(fileno(mf->fp), ftell(mf->fp)) == -1)\n+\t\treturn -1;\n+\tmf->flush_pos = mf->size;\n+ }\n+\n+ return 0;\n+}\n+\n+/*\n+ * A wrapper around vsprintf() to write to an mFILE. This also uses vflen() to\n+ * estimate how many additional bytes of storage will be required for the\n+ * vsprintf to work.\n+ */\n+int mfprintf(mFILE *mf, char *fmt, ...) {\n+ int ret;\n+ size_t est_length;\n+ va_list args;\n+\n+ va_start(args, fmt);\n+ est_length = vflen(fmt, args);\n+ va_end(args);\n+ while (est_length + mf->offset > mf->alloced) {\n+\tsize_t new_alloced = mf->alloced ? mf->alloced * 2 : 1024;\n+\tvoid * new_data = realloc(mf->data, new_alloced);\n+\tif (NULL == new_data) return -1;\n+\tmf->alloced = new_alloced;\n+\tmf->data = new_data;\n+ }\n+\n+ va_start(args, fmt);\n+ ret = vsprintf(&mf->data[mf->offset], fmt, args);\n+ va_end(args);\n+\n+ if (ret > 0) {\n+\tmf->offset += ret;\n+\tif (mf->size < mf->offset)\n+\t mf->size = mf->offset;\n+ }\n+\n+ if (mf->fp == stderr) {\n+\t/* Auto-flush for stderr */\n+\tif (0 != mfflush(mf)) return -1;\n+ }\n+\n+ return ret;\n+}\n+\n+/*\n+ * Converts an mFILE from binary to ascii mode by replacing all\n+ * cr-nl with nl.\n+ *\n+ * Primarily used on windows when we've uncompressed a binary file which\n+ * happens to be a text file (eg Experiment File). Previously we would have\n+ * seeked back to the start and used _setmode(fileno(fp), _O_TEXT).\n+ *\n+ * Side effect: resets offset and flush_pos back to the start.\n+ */\n+void mfascii(mFILE *mf) {\n+ size_t p1, p2;\n+\n+ for (p1 = p2 = 1; p1 < mf->size; p1++, p2++) {\n+\tif (mf->data[p1] == '\\n' && mf->data[p1-1] == '\\r') {\n+\t p2--; /* delete the \\r */\n+\t}\n+\tmf->data[p2] = mf->data[p1];\n+ }\n+ mf->size = p2;\n+\n+ mf->offset = mf->flush_pos = 0;\n+}\n" |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/mFILE.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/mFILE.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,88 @@ +/* +Copyright (c) 2005-2006, 2008-2009 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _MFILE_H_ +#define _MFILE_H_ + +#include <stdio.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + FILE *fp; + char *data; + size_t alloced; + int eof; + int mode; /* open mode in MF_?? define bit pattern */ + size_t size; + size_t offset; + size_t flush_pos; +} mFILE; + +#define MF_READ 1 +#define MF_WRITE 2 +#define MF_APPEND 4 +#define MF_BINARY 8 +#define MF_TRUNC 16 +#define MF_MODEX 32 + +mFILE *mfreopen(const char *path, const char *mode, FILE *fp); +mFILE *mfopen(const char *path, const char *mode); +int mfdetach(mFILE *mf); +int mfclose(mFILE *mf); +int mfdestroy(mFILE *mf); +int mfseek(mFILE *mf, long offset, int whence); +long mftell(mFILE *mf); +void mrewind(mFILE *mf); +void mftruncate(mFILE *mf, long offset); +int mfeof(mFILE *mf); +size_t mfread(void *ptr, size_t size, size_t nmemb, mFILE *mf); +size_t mfwrite(void *ptr, size_t size, size_t nmemb, mFILE *mf); +int mfgetc(mFILE *mf); +int mungetc(int c, mFILE *mf); +mFILE *mfcreate(char *data, int size); +mFILE *mfcreate_from(const char *path, const char *mode_str, FILE *fp); +void mfrecreate(mFILE *mf, char *data, int size); +void *mfsteal(mFILE *mf, size_t *size_out); +char *mfgets(char *s, int size, mFILE *mf); +int mfflush(mFILE *mf); +int mfprintf(mFILE *mf, char *fmt, ...); +mFILE *mstdin(void); +mFILE *mstdout(void); +mFILE *mstderr(void); +void mfascii(mFILE *mf); + +#ifdef __cplusplus +} +#endif + +#endif /* _MFILE_H_ */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/md5.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/md5.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,295 @@\n+/*\n+ * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.\n+ * MD5 Message-Digest Algorithm (RFC 1321).\n+ *\n+ * Homepage:\n+ * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5\n+ *\n+ * Author:\n+ * Alexander Peslyak, better known as Solar Designer <solar at openwall.com>\n+ *\n+ * This software was written by Alexander Peslyak in 2001. No copyright is\n+ * claimed, and the software is hereby placed in the public domain.\n+ * In case this attempt to disclaim copyright and place the software in the\n+ * public domain is deemed null and void, then the software is\n+ * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the\n+ * general public under the following terms:\n+ *\n+ * Redistribution and use in source and binary forms, with or without\n+ * modification, are permitted.\n+ *\n+ * There\'s ABSOLUTELY NO WARRANTY, express or implied.\n+ *\n+ * (This is a heavily cut-down "BSD license".)\n+ *\n+ * This differs from Colin Plumb\'s older public domain implementation in that\n+ * no exactly 32-bit integer data type is required (any 32-bit or wider\n+ * unsigned integer data type will do), there\'s no compile-time endianness\n+ * configuration, and the function prototypes match OpenSSL\'s. No code from\n+ * Colin Plumb\'s implementation has been reused; this comment merely compares\n+ * the properties of the two independent implementations.\n+ *\n+ * The primary goals of this implementation are portability and ease of use.\n+ * It is meant to be fast, but not as fast as possible. Some known\n+ * optimizations are not included to reduce source code size and avoid\n+ * compile-time configuration.\n+ */\n+ \n+#ifndef HAVE_OPENSSL\n+ \n+#include <string.h>\n+ \n+#include "md5.h"\n+ \n+/*\n+ * The basic MD5 functions.\n+ *\n+ * F and G are optimized compared to their RFC 1321 definitions for\n+ * architectures that lack an AND-NOT instruction, just like in Colin Plumb\'s\n+ * implementation.\n+ */\n+#define F(x, y, z)\t\t\t((z) ^ ((x) & ((y) ^ (z))))\n+#define G(x, y, z)\t\t\t((y) ^ ((z) & ((x) ^ (y))))\n+#define H(x, y, z)\t\t\t((x) ^ (y) ^ (z))\n+#define I(x, y, z)\t\t\t((y) ^ ((x) | ~(z)))\n+ \n+/*\n+ * The MD5 transformation for all four rounds.\n+ */\n+#define STEP(f, a, b, c, d, x, t, s) \\\n+\t(a) += f((b), (c), (d)) + (x) + (t); \\\n+\t(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \\\n+\t(a) += (b);\n+ \n+/*\n+ * SET reads 4 input bytes in little-endian byte order and stores them\n+ * in a properly aligned word in host byte order.\n+ *\n+ * The check for little-endian architectures that tolerate unaligned\n+ * memory accesses is just an optimization. Nothing will break if it\n+ * doesn\'t work.\n+ */\n+#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)\n+#define SET(n) \\\n+\t(*(MD5_u32plus *)&ptr[(n) * 4])\n+#define GET(n) \\\n+\tSET(n)\n+#else\n+#define SET(n) \\\n+\t(ctx->block[(n)] = \\\n+\t(MD5_u32plus)ptr[(n) * 4] | \\\n+\t((MD5_u32plus)ptr[(n) * 4 + 1] << 8) | \\\n+\t((MD5_u32plus)ptr[(n) * 4 + 2] << 16) | \\\n+\t((MD5_u32plus)ptr[(n) * 4 + 3] << 24))\n+#define GET(n) \\\n+\t(ctx->block[(n)])\n+#endif\n+ \n+/*\n+ * This processes one or more 64-byte data blocks, but does NOT update\n+ * the bit counters. There are no alignment requirements.\n+ */\n+static void *body(MD5_CTX *ctx, void *data, unsigned long size)\n+{\n+\tunsigned char *ptr;\n+\tMD5_u32plus a, b, c, d;\n+\tMD5_u32plus saved_a, saved_b, saved_c, saved_d;\n+ \n+\tptr = data;\n+ \n+\ta = ctx->a;\n+\tb = ctx->b;\n+\tc = ctx->c;\n+\td = ctx->d;\n+ \n+\tdo {\n+\t\tsaved_a = a;\n+\t\tsaved_b = b;\n+\t\tsaved_c = c;\n+\t\tsaved_d = d;\n+ \n+/* Round 1 */\n+\t\tSTEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)\n+\t\tSTEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)\n+\t\tSTEP(F, c, d, a, b, SET(2), 0x242070db, 17)\n+\t\tSTEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)\n+\t\tSTEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)\n+\t\tSTEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)\n+\t\tSTEP(F, c, d, a, b, SET(6), 0xa8304613, 17)\n+\t\tSTEP(F, b, c, d, a, SET(7), 0xfd469501, 22)\n+\t\tSTEP(F, a, b, c, d, SET(8), 0x698098d8, 7)\n+\t\tSTEP(F, d, a, b, c, SET(9), 0x8b44f7a'..b' 9)\n+\t\tSTEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)\n+\t\tSTEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)\n+\t\tSTEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)\n+\t\tSTEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)\n+\t\tSTEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)\n+\t\tSTEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)\n+ \n+/* Round 3 */\n+\t\tSTEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)\n+\t\tSTEP(H, d, a, b, c, GET(8), 0x8771f681, 11)\n+\t\tSTEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)\n+\t\tSTEP(H, b, c, d, a, GET(14), 0xfde5380c, 23)\n+\t\tSTEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)\n+\t\tSTEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11)\n+\t\tSTEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)\n+\t\tSTEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23)\n+\t\tSTEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)\n+\t\tSTEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11)\n+\t\tSTEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)\n+\t\tSTEP(H, b, c, d, a, GET(6), 0x04881d05, 23)\n+\t\tSTEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)\n+\t\tSTEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11)\n+\t\tSTEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)\n+\t\tSTEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23)\n+ \n+/* Round 4 */\n+\t\tSTEP(I, a, b, c, d, GET(0), 0xf4292244, 6)\n+\t\tSTEP(I, d, a, b, c, GET(7), 0x432aff97, 10)\n+\t\tSTEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)\n+\t\tSTEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)\n+\t\tSTEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)\n+\t\tSTEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)\n+\t\tSTEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)\n+\t\tSTEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)\n+\t\tSTEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)\n+\t\tSTEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)\n+\t\tSTEP(I, c, d, a, b, GET(6), 0xa3014314, 15)\n+\t\tSTEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)\n+\t\tSTEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)\n+\t\tSTEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)\n+\t\tSTEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)\n+\t\tSTEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)\n+ \n+\t\ta += saved_a;\n+\t\tb += saved_b;\n+\t\tc += saved_c;\n+\t\td += saved_d;\n+ \n+\t\tptr += 64;\n+\t} while (size -= 64);\n+ \n+\tctx->a = a;\n+\tctx->b = b;\n+\tctx->c = c;\n+\tctx->d = d;\n+ \n+\treturn ptr;\n+}\n+ \n+void MD5_Init(MD5_CTX *ctx)\n+{\n+\tctx->a = 0x67452301;\n+\tctx->b = 0xefcdab89;\n+\tctx->c = 0x98badcfe;\n+\tctx->d = 0x10325476;\n+ \n+\tctx->lo = 0;\n+\tctx->hi = 0;\n+}\n+ \n+void MD5_Update(MD5_CTX *ctx, void *data, unsigned long size)\n+{\n+\tMD5_u32plus saved_lo;\n+\tunsigned long used, free;\n+ \n+\tsaved_lo = ctx->lo;\n+\tif ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)\n+\t\tctx->hi++;\n+\tctx->hi += size >> 29;\n+ \n+\tused = saved_lo & 0x3f;\n+ \n+\tif (used) {\n+\t\tfree = 64 - used;\n+ \n+\t\tif (size < free) {\n+\t\t\tmemcpy(&ctx->buffer[used], data, size);\n+\t\t\treturn;\n+\t\t}\n+ \n+\t\tmemcpy(&ctx->buffer[used], data, free);\n+\t\tdata = (unsigned char *)data + free;\n+\t\tsize -= free;\n+\t\tbody(ctx, ctx->buffer, 64);\n+\t}\n+ \n+\tif (size >= 64) {\n+\t\tdata = body(ctx, data, size & ~(unsigned long)0x3f);\n+\t\tsize &= 0x3f;\n+\t}\n+ \n+\tmemcpy(ctx->buffer, data, size);\n+}\n+ \n+void MD5_Final(unsigned char *result, MD5_CTX *ctx)\n+{\n+\tunsigned long used, free;\n+ \n+\tused = ctx->lo & 0x3f;\n+ \n+\tctx->buffer[used++] = 0x80;\n+ \n+\tfree = 64 - used;\n+ \n+\tif (free < 8) {\n+\t\tmemset(&ctx->buffer[used], 0, free);\n+\t\tbody(ctx, ctx->buffer, 64);\n+\t\tused = 0;\n+\t\tfree = 64;\n+\t}\n+ \n+\tmemset(&ctx->buffer[used], 0, free - 8);\n+ \n+\tctx->lo <<= 3;\n+\tctx->buffer[56] = ctx->lo;\n+\tctx->buffer[57] = ctx->lo >> 8;\n+\tctx->buffer[58] = ctx->lo >> 16;\n+\tctx->buffer[59] = ctx->lo >> 24;\n+\tctx->buffer[60] = ctx->hi;\n+\tctx->buffer[61] = ctx->hi >> 8;\n+\tctx->buffer[62] = ctx->hi >> 16;\n+\tctx->buffer[63] = ctx->hi >> 24;\n+ \n+\tbody(ctx, ctx->buffer, 64);\n+ \n+\tresult[0] = ctx->a;\n+\tresult[1] = ctx->a >> 8;\n+\tresult[2] = ctx->a >> 16;\n+\tresult[3] = ctx->a >> 24;\n+\tresult[4] = ctx->b;\n+\tresult[5] = ctx->b >> 8;\n+\tresult[6] = ctx->b >> 16;\n+\tresult[7] = ctx->b >> 24;\n+\tresult[8] = ctx->c;\n+\tresult[9] = ctx->c >> 8;\n+\tresult[10] = ctx->c >> 16;\n+\tresult[11] = ctx->c >> 24;\n+\tresult[12] = ctx->d;\n+\tresult[13] = ctx->d >> 8;\n+\tresult[14] = ctx->d >> 16;\n+\tresult[15] = ctx->d >> 24;\n+ \n+\tmemset(ctx, 0, sizeof(*ctx));\n+}\n+ \n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/md5.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/md5.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,54 @@ +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer <solar at openwall.com> + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * See md5.c for more information. + */ + +#ifdef HAVE_OPENSSL +#include <openssl/md5.h> +#elif !defined(_MD5_H) +#define _MD5_H + +/* Any 32-bit or wider unsigned integer data type will do */ +typedef unsigned int MD5_u32plus; + +typedef struct { + MD5_u32plus lo, hi; + MD5_u32plus a, b, c, d; + unsigned char buffer[64]; + MD5_u32plus block[16]; +} MD5_CTX; + +extern void MD5_Init(MD5_CTX *ctx); +extern void MD5_Update(MD5_CTX *ctx, void *data, unsigned long size); +extern void MD5_Final(unsigned char *result, MD5_CTX *ctx); + +#endif + +#ifdef __cplusplus +} +#endif + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/misc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/misc.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,110 @@ +/* +Copyright (c) 1994-1997, 2001-2002 MEDICAL RESEARCH COUNCIL +All rights reserved + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1 Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2 Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF +MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or +promote products derived from this software without specific prior written +permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* +Copyright (c) 2003-2013 Genome Research Ltd. + +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _misc_h +#define _misc_h + +#include "cram/os.h" + +#include <stdio.h> +#include <stdarg.h> /* varargs needed for v*printf() prototypes */ +#include <sys/types.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This informs gcc that crash() doesn't return, so it doesn't need to + * concern itself that code paths going via crash could mean some variables + * being undefined and then issuing uninitialised variable warnings. + * This particularly affected convert. + */ +#ifdef __GNUC__ +# define __NORETURN__ __attribute__ ((__noreturn__)) +#else +# define __NORETURN__ +#endif + +/* + * Used for printf style argument checking. We can request a function such + * as vTcl_SetResult does argument checking, avoiding bugs with using + * %d and passing in a 64-bit record. + */ +#ifdef __GNUC__ +# define __PRINTF_FORMAT__(a,b) __attribute__ ((format (printf, a, b))) +#else +# define __PRINTF_FORMAT__(a,b) +#endif + +extern int is_directory(char * fn); +extern int is_file(char * fn); +extern int file_size(char * fn); + +#define MIN(A,B) ( ( (A) < (B) ) ? (A) : (B) ) +#define MAX(A,B) ( ( (A) > (B) ) ? (A) : (B) ) + +#ifdef __cplusplus +} +#endif + +#endif /*_misc_h*/ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/open_trace_file.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/open_trace_file.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,351 @@\n+/*\n+Author: James Bonfield\n+\n+Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL\n+All rights reserved\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF \n+MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or \n+promote products derived from this software without specific prior written \n+permission.\n+\n+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR \n+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; \n+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON \n+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT \n+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS \n+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*\n+Copyright (c) 2008, 2009, 2013, 2014 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+#include <stdlib.h>\n+#include <stdio.h>\n+#include <string.h>\n+#include <unistd.h>\n+#include <ctype.h>\n+#include <limits.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include "cram/os.h"\n+#ifndef PATH_MAX\n+# define PATH_MAX 1024\n+#endif\n+\n+#include "cram/open_trace_file.h"\n+#include "cram/misc.h"\n+#include "htslib/hfile.h"\n+\n+/*\n+ * Tokenises the search path splitting on colons (unix) or semicolons\n+ * (windows).\n+ * We also explicitly add a "./" to the end of the search path\n+ *\n+ * Returns: A new search path with items separated by nul chars. Two nul\n+ * chars in a row represent the end of the tokenised path.\n+ * Returns NULL for a failure.\n+ *\n+ * The returned data has been malloced. It is up to the caller to'..b'y \'dirname\'. If it finds it, it opens\n+ * it. This also searches for compressed versions of the file in dirname\n+ * too.\n+ *\n+ * Returns mFILE pointer if found\n+ * NULL if not\n+ */\n+static mFILE *find_file_dir(char *file, char *dirname) {\n+ char path[PATH_MAX+1];\n+ size_t len = strlen(dirname);\n+ char *cp;\n+\n+ if (dirname[len-1] == \'/\')\n+\tlen--;\n+\n+ /* Special case for "./" or absolute filenames */\n+ if (*file == \'/\' || (len==1 && *dirname == \'.\')) {\n+\tsprintf(path, "%s", file);\n+ } else {\n+\t/* Handle %[0-9]*s expansions, if required */\n+\tchar *path_end = path;\n+\t*path = 0;\n+\twhile ((cp = strchr(dirname, \'%\'))) {\n+\t char *endp;\n+\t long l = strtol(cp+1, &endp, 10);\n+\t if (*endp != \'s\') {\n+\t\tstrncpy(path_end, dirname, (endp+1)-dirname);\n+\t\tpath_end += (endp+1)-dirname;\n+\t\tdirname = endp+1;\n+\t\tcontinue;\n+\t }\n+\t \n+\t strncpy(path_end, dirname, cp-dirname);\n+\t path_end += cp-dirname;\n+\t if (l) {\n+\t\tstrncpy(path_end, file, l);\n+\t\tpath_end += MIN(strlen(file), l);\n+\t\tfile += MIN(strlen(file), l);\n+\t } else {\n+\t\tstrcpy(path_end, file);\n+\t\tpath_end += strlen(file);\n+\t\tfile += strlen(file);\n+\t }\n+\t len -= (endp+1) - dirname;\n+\t dirname = endp+1;\n+\t}\n+\tstrncpy(path_end, dirname, len);\n+\tpath_end += MIN(strlen(dirname), len);\n+\t*path_end = 0;\n+\tif (*file) {\n+\t *path_end++ = \'/\';\n+\t strcpy(path_end, file);\n+\t}\n+\n+\t//fprintf(stderr, "*PATH=\\"%s\\"\\n", path);\n+ }\n+\n+ if (is_file(path)) {\n+\treturn mfopen(path, "rb");\n+ }\n+\n+ return NULL;\n+}\n+\n+/*\n+ * ------------------------------------------------------------------------\n+ * Public functions below.\n+ */\n+\n+/*\n+ * Opens a trace file named \'file\'. This is initially looked for as a\n+ * pathname relative to a file named "relative_to". This may (for\n+ * example) be the name of an experiment file referencing the trace\n+ * file. In this case by passing relative_to as the experiment file\n+ * filename the trace file will be picked up in the same directory as\n+ * the experiment file. Relative_to may be supplied as NULL.\n+ *\n+ * \'file\' is looked for at relative_to, then the current directory, and then\n+ * all of the locations listed in \'path\' (which is a colon separated list).\n+ * If \'path\' is NULL it uses the RAWDATA environment variable instead.\n+ *\n+ * Returns a mFILE pointer when found.\n+ * NULL otherwise.\n+ */\n+mFILE *open_path_mfile(char *file, char *path, char *relative_to) {\n+ char *newsearch;\n+ char *ele;\n+ mFILE *fp;\n+\n+ /* Use path first */\n+ if (!path)\n+\tpath = getenv("RAWDATA");\n+ if (NULL == (newsearch = tokenise_search_path(path)))\n+\treturn NULL;\n+ \n+ /*\n+ * Step through the search path testing out each component.\n+ * We now look through each path element treating some prefixes as\n+ * special, otherwise we treat the element as a directory.\n+ */\n+ for (ele = newsearch; *ele; ele += strlen(ele)+1) {\n+\tchar *ele2;\n+\n+\t/*\n+\t * \'|\' prefixing a path component indicates that we do not\n+\t * wish to perform the compression extension searching in that\n+\t * location.\n+\t *\n+\t * NB: this has been removed from the htslib implementation.\n+\t */\n+\tif (*ele == \'|\') {\n+\t ele2 = ele+1;\n+\t} else {\n+\t ele2 = ele;\n+\t}\n+\n+\tif (0 == strncmp(ele2, "URL=", 4)) {\n+\t if ((fp = find_file_url(file, ele2+4))) {\n+\t\tfree(newsearch);\n+\t\treturn fp;\n+\t }\n+\t} else if (!strncmp(ele2, "http:", 5) ||\n+\t\t !strncmp(ele2, "ftp:", 4)) {\n+\t if ((fp = find_file_url(file, ele2))) {\n+\t\tfree(newsearch);\n+\t\treturn fp;\n+\t }\n+\t} else if ((fp = find_file_dir(file, ele2))) {\n+\t free(newsearch);\n+\t return fp;\n+\t} \n+ }\n+\n+ free(newsearch);\n+\n+ /* Look in the same location as the incoming \'relative_to\' filename */\n+ if (relative_to) {\n+\tchar *cp;\n+\tchar relative_path[PATH_MAX+1];\n+\tstrcpy(relative_path, relative_to);\n+\tif ((cp = strrchr(relative_path, \'/\')))\n+\t *cp = 0;\n+\tif ((fp = find_file_dir(file, relative_path)))\n+\t return fp;\n+ }\n+\n+ return NULL;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/open_trace_file.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/open_trace_file.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,115 @@ +/* +Author: James Bonfield + +Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL +All rights reserved + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + . Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + . Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + . Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF +MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or +promote products derived from this software without specific prior written +permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* +Copyright (c) 2008, 2009, 2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _OPEN_TRACE_FILE_H_ +#define _OPEN_TRACE_FILE_H_ + +#include "cram/mFILE.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Tokenises the search path splitting on colons (unix) or semicolons + * (windows). + * We also explicitly add a "./" to the end of the search path + * + * Returns: A new search path with items separated by nul chars. Two nul + * chars in a row represent the end of the tokenised path. + * Returns NULL for a failure. + * + * The returned data has been malloced. It is up to the caller to free this + * memory. + */ +char *tokenise_search_path(char *searchpath); + +/* + * Opens a trace file named 'file'. This is initially looked for as a + * pathname relative to a file named "relative_to". This may (for + * example) be the name of an experiment file referencing the trace + * file. In this case by passing relative_to as the experiment file + * filename the trace file will be picked up in the same directory as + * the experiment file. Relative_to may be supplied as NULL. + * + * 'file' is looked for at relative_to, then the current directory, and then + * all of the locations listed in 'path' (which is a colon separated list). + * If 'path' is NULL it uses the RAWDATA environment variable instead. + * + * Returns a mFILE pointer when found. + * NULL otherwise. + */ +mFILE *open_path_mfile(char *file, char *path, char *relative_to); + +/* + * Returns a mFILE containing the entire contents of the url; + * NULL on failure. + */ +mFILE *find_file_url(char *file, char *url); + + +#ifdef __cplusplus +} +#endif + +#endif /* _OPEN_TRACE_FILE_H_ */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/os.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/os.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,308 @@\n+/*\n+Copyright (c) 1993, 1995-2002 MEDICAL RESEARCH COUNCIL\n+All rights reserved\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1 Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2 Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF \n+MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or \n+promote products derived from this software without specific prior written \n+permission.\n+\n+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR \n+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; \n+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON \n+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT \n+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS \n+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*\n+Copyright (c) 2004, 2006, 2009-2011, 2013 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*\n+ * File: os.h\n+ *\n+ * Author: \n+ * MRC Laboratory of Molecular Biology\n+ *\t Hills Road\n+ *\t Cambridge CB2 2QH\n+ *\t United Kingdom\n+ *\n+ * Description: operating system specific type definitions\n+ *\n+ */\n+\n+#ifndef _OS_H_\n+#define _OS_H_\n+\n+#include <limits.h>\n+#include <inttypes.h>\n+\n+#ifdef __cplusplus\n+extern "C" {\n+#endif\n+\n+/*-----------------------------------------------------------------------------\n+ * Detection of endianness. The main part of this is done in autoconf, but\n+ * for the case of MacOS FAT binaries we fall back on auto-sensing based on\n+ * processor type too.\n+ */\n+\n+/* Set by autoconf */\n+#define SP_LITTLE_ENDIAN\n+\n+/* Mac FAT binaries or unknown. Auto detect based on CPU type */\n+#if !defined(SP_BIG_ENDIA'..b'(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__i686__) || defined(__i686)\n+# define ALLOW_UAC\n+#endif\n+\n+/*-----------------------------------------------------------------------------\n+ * Byte swapping macros\n+ */\n+\n+/*\n+ * Our new swap runs at the same speed on Ultrix, but substantially faster\n+ * (300% for swap_int4, ~50% for swap_int2) on an Alpha (due to the lack of\n+ * decent \'char\' support).\n+ *\n+ * They also have the ability to swap in situ (src == dst). Newer code now\n+ * relies on this so don\'t change back!\n+ */\n+#define iswap_int8(x) \\\n+ (((x & 0x00000000000000ffLL) << 56) + \\\n+ ((x & 0x000000000000ff00LL) << 40) + \\\n+ ((x & 0x0000000000ff0000LL) << 24) + \\\n+ ((x & 0x00000000ff000000LL) << 8) + \\\n+ ((x & 0x000000ff00000000LL) >> 8) + \\\n+ ((x & 0x0000ff0000000000LL) >> 24) + \\\n+ ((x & 0x00ff000000000000LL) >> 40) + \\\n+ ((x & 0xff00000000000000LL) >> 56))\n+\n+#define iswap_int4(x) \\\n+ (((x & 0x000000ff) << 24) + \\\n+ ((x & 0x0000ff00) << 8) + \\\n+ ((x & 0x00ff0000) >> 8) + \\\n+ ((x & 0xff000000) >> 24))\n+\n+#define iswap_int2(x) \\\n+ (((x & 0x00ff) << 8) + \\\n+ ((x & 0xff00) >> 8))\n+\n+/*\n+ * Linux systems may use byteswap.h to get assembly versions of byte-swap\n+ * on intel systems. This can be as trivial as the bswap opcode, which works\n+ * out at over 2-times faster than iswap_int4 above.\n+ */\n+#if 0\n+#if defined(__linux__)\n+# include <byteswap.h>\n+# undef iswap_int8\n+# undef iswap_int4\n+# undef iswap_int2\n+# define iswap_int8 bswap_64\n+# define iswap_int4 bswap_32\n+# define iswap_int2 bswap_16\n+#endif\n+#endif\n+\n+\n+/*\n+ * Macros to specify that data read in is of a particular endianness.\n+ * The macros here swap to the appropriate order for the particular machine\n+ * running the macro and return the new answer. These may also be used when\n+ * writing to a file to specify that we wish to write in (eg) big endian\n+ * format.\n+ *\n+ * This leads to efficient code as most of the time these macros are\n+ * trivial.\n+ */\n+#ifdef SP_BIG_ENDIAN\n+#define le_int4(x) iswap_int4((x))\n+#define le_int2(x) iswap_int2((x))\n+#endif\n+\n+#ifdef SP_LITTLE_ENDIAN\n+#define le_int4(x) (x)\n+#define le_int2(x) (x)\n+#endif\n+\n+/*-----------------------------------------------------------------------------\n+ * <inttypes.h> definitions, incase they\'re not present\n+ */\n+\n+#ifndef PRId64\n+#define __PRI64__ "l"\n+#define PRId64 __PRI64__ "d"\n+#define PRId32 "d"\n+#define PRId16 "d"\n+#define PRId8 "d"\n+#define PRIu64 __PRI64__ "u"\n+#define PRIu32 "u"\n+#define PRIu16 "u"\n+#define PRIu8 "u"\n+#endif\n+\n+/*-----------------------------------------------------------------------------\n+ * Operating system specifics.\n+ * These ought to be done by autoconf, but are legacy code.\n+ */\n+/*\n+ * SunOS 4.x\n+ * Even though we use the ANSI gcc, we make use the the standard SunOS 4.x\n+ * libraries and include files, which are non-ansi\n+ */\n+#if defined(__sun__) && !defined(__svr4__)\n+#define SEEK_SET 0\n+#define SEEK_CUR 1\n+#define SEEK_END 2\n+#endif\n+\n+/*\n+ * Microsoft Visual C++\n+ * Windows\n+ */\n+#if defined(_MSC_VER)\n+#define popen _popen\n+#define pclose _pclose\n+#define ftruncate(fd,len) _chsize(fd,len)\n+#endif\n+\n+\n+/*\n+ * Microsoft Windows running MinGW\n+ */\n+#if defined(__MINGW32__)\n+/* #define mkdir(filename,mode) mkdir((filename)) */\n+#define sysconf(x) 512\n+#define ftruncate(fd,len) _chsize(fd,len)\n+#endif\n+\n+/* Generic WIN32 API issues */\n+#ifdef _WIN32\n+# ifndef HAVE_FSEEKO\n+# if __MSVCRT_VERSION__ >= 0x800\n+ /* if you have MSVCR80 installed then you can use these definitions: */\n+# define off_t __int64\n+# define fseeko _fseeki64\n+# define ftello _ftelli64\n+# else\n+ /* otherwise we\'re stuck with 32-bit file support */\n+# define off_t long\n+# define fseeko fseek\n+# define ftello ftell\n+# endif\n+# endif /* !HAVE_FSEEKO */\n+#endif /* _WIN32 */\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif /*_OS_H_*/\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/pooled_alloc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/pooled_alloc.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,170 @@ +/* +Copyright (c) 2009 Genome Research Ltd. +Author: Rob Davies <rmd@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "io_lib_config.h" +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> + +#include "cram/pooled_alloc.h" + +//#define TEST_MAIN + +#define PSIZE 1024*1024 + +pool_alloc_t *pool_create(size_t dsize) { + pool_alloc_t *p; + + if (NULL == (p = (pool_alloc_t *)malloc(sizeof(*p)))) + return NULL; + + /* Minimum size is a pointer, for free list */ + dsize = (dsize + sizeof(void *) - 1) & ~(sizeof(void *)-1); + if (dsize < sizeof(void *)) + dsize = sizeof(void *); + p->dsize = dsize; + + p->npools = 0; + p->pools = NULL; + p->free = NULL; + + return p; +} + +static pool_t *new_pool(pool_alloc_t *p) { + size_t n = PSIZE / p->dsize; + pool_t *pool; + + pool = realloc(p->pools, (p->npools + 1) * sizeof(*p->pools)); + if (NULL == pool) return NULL; + p->pools = pool; + pool = &p->pools[p->npools]; + + pool->pool = malloc(n * p->dsize); + if (NULL == pool->pool) return NULL; + + pool->used = 0; + + p->npools++; + + return pool; +} + +void pool_destroy(pool_alloc_t *p) { + size_t i; + + for (i = 0; i < p->npools; i++) { + free(p->pools[i].pool); + } + free(p->pools); + free(p); +} + +void *pool_alloc(pool_alloc_t *p) { + pool_t *pool; + void *ret; + + /* Look on free list */ + if (NULL != p->free) { + ret = p->free; + p->free = *((void **)p->free); + return ret; + } + + /* Look for space in the last pool */ + if (p->npools) { + pool = &p->pools[p->npools - 1]; + if (pool->used + p->dsize < PSIZE) { + ret = ((char *) pool->pool) + pool->used; + pool->used += p->dsize; + return ret; + } + } + + /* Need a new pool */ + pool = new_pool(p); + if (NULL == pool) return NULL; + + pool->used = p->dsize; + return pool->pool; +} + +void pool_free(pool_alloc_t *p, void *ptr) { + *(void **)ptr = p->free; + p->free = ptr; +} + +#ifdef TEST_MAIN +typedef struct { + int x, y, z; +} xyz; + +#define NP 10000 +int main(void) { + int i; + xyz *item; + xyz **items; + pool_alloc_t *p = pool_create(sizeof(xyz)); + + items = (xyz **)malloc(NP * sizeof(*items)); + + for (i = 0; i < NP; i++) { + item = pool_alloc(p); + item->x = i; + item->y = i+1; + item->z = i+2; + items[i] = item; + } + + for (i = 0; i < NP; i++) { + item = items[i]; + if (i % 3) + pool_free(p, item); + } + + for (i = 0; i < NP; i++) { + item = pool_alloc(p); + item->x = 1000000+i; + item->y = 1000000+i+1; + item->z = 1000000+i+2; + } + + for (i = 0; i < NP; i++) { + item = items[i]; + printf("%d\t%d\t%d\t%d\n", i, item->x, item->y, item->z); + pool_free(p, item); + } + + return 0; +} +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/pooled_alloc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/pooled_alloc.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,56 @@ +/* +Copyright (c) 2009 Genome Research Ltd. +Author: Rob Davies <rmd@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _POOLED_ALLOC_H_ +#define _POOLED_ALLOC_H_ + +/* + * Implements a pooled block allocator where all items are the same size, + * but we need many of them. + */ +typedef struct { + void *pool; + size_t used; +} pool_t; + +typedef struct { + size_t dsize; + size_t npools; + pool_t *pools; + void *free; +} pool_alloc_t; + +pool_alloc_t *pool_create(size_t dsize); +void pool_destroy(pool_alloc_t *p); +void *pool_alloc(pool_alloc_t *p); +void pool_free(pool_alloc_t *p, void *ptr); + + +#endif /*_POOLED_ALLOC_H_*/ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/rANS_byte.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/rANS_byte.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,336 @@\n+/* rans_byte.h originally from https://github.com/rygorous/ryg_rans\n+ *\n+ * This is a public-domain implementation of several rANS variants. rANS is an\n+ * entropy coder from the ANS family, as described in Jarek Duda\'s paper\n+ * "Asymmetric numeral systems" (http://arxiv.org/abs/1311.2540).\n+ */\n+\n+/*-------------------------------------------------------------------------- */\n+\n+// Simple byte-aligned rANS encoder/decoder - public domain - Fabian \'ryg\' Giesen 2014\n+//\n+// Not intended to be "industrial strength"; just meant to illustrate the general\n+// idea.\n+\n+#ifndef RANS_BYTE_HEADER\n+#define RANS_BYTE_HEADER\n+\n+#include <stdint.h>\n+\n+#ifdef assert\n+#define RansAssert assert\n+#else\n+#define RansAssert(x)\n+#endif\n+\n+// READ ME FIRST:\n+//\n+// This is designed like a typical arithmetic coder API, but there\'s three\n+// twists you absolutely should be aware of before you start hacking:\n+//\n+// 1. You need to encode data in *reverse* - last symbol first. rANS works\n+// like a stack: last in, first out.\n+// 2. Likewise, the encoder outputs bytes *in reverse* - that is, you give\n+// it a pointer to the *end* of your buffer (exclusive), and it will\n+// slowly move towards the beginning as more bytes are emitted.\n+// 3. Unlike basically any other entropy coder implementation you might\n+// have used, you can interleave data from multiple independent rANS\n+// encoders into the same bytestream without any extra signaling;\n+// you can also just write some bytes by yourself in the middle if\n+// you want to. This is in addition to the usual arithmetic encoder\n+// property of being able to switch models on the fly. Writing raw\n+// bytes can be useful when you have some data that you know is\n+// incompressible, and is cheaper than going through the rANS encode\n+// function. Using multiple rANS coders on the same byte stream wastes\n+// a few bytes compared to using just one, but execution of two\n+// independent encoders can happen in parallel on superscalar and\n+// Out-of-Order CPUs, so this can be *much* faster in tight decoding\n+// loops.\n+//\n+// This is why all the rANS functions take the write pointer as an\n+// argument instead of just storing it in some context struct.\n+\n+// --------------------------------------------------------------------------\n+\n+// L (\'l\' in the paper) is the lower bound of our normalization interval.\n+// Between this and our byte-aligned emission, we use 31 (not 32!) bits.\n+// This is done intentionally because exact reciprocals for 31-bit uints\n+// fit in 32-bit uints: this permits some optimizations during encoding.\n+#define RANS_BYTE_L (1u << 23) // lower bound of our normalization interval\n+\n+// State for a rANS encoder. Yep, that\'s all there is to it.\n+typedef uint32_t RansState;\n+\n+// Initialize a rANS encoder.\n+static inline void RansEncInit(RansState* r)\n+{\n+ *r = RANS_BYTE_L;\n+}\n+\n+// Renormalize the encoder. Internal function.\n+static inline RansState RansEncRenorm(RansState x, uint8_t** pptr, uint32_t freq, uint32_t scale_bits)\n+{\n+ uint32_t x_max = ((RANS_BYTE_L >> scale_bits) << 8) * freq; // this turns into a shift.\n+ if (x >= x_max) {\n+ uint8_t* ptr = *pptr;\n+ do {\n+ *--ptr = (uint8_t) (x & 0xff);\n+ x >>= 8;\n+ } while (x >= x_max);\n+ *pptr = ptr;\n+ }\n+ return x;\n+}\n+\n+// Encodes a single symbol with range start "start" and frequency "freq".\n+// All frequencies are assumed to sum to "1 << scale_bits", and the\n+// resulting bytes get written to ptr (which is updated).\n+//\n+// NOTE: With rANS, you need to encode symbols in *reverse order*, i.e. from\n+// beginning to end! Likewise, the output bytestream is written *backwards*:\n+// ptr starts pointing at the end of the output buffer and keeps decrementing.\n+static inline void RansEncPut(RansState* r, uint8_t** pptr, uint32_t start, uint32_t freq, uint32_t scale_bits)\n+{\n+ // renormalize\n+ RansState x = RansEncRe'..b' mul_hi(x, (1<<32) - 1)) >> 0\n+ // = floor(x - x/(2^32))\n+ // = x - 1 if 1 <= x < 2^32\n+ // and we know that x>0 (x=0 is never in a valid normalization interval).\n+ //\n+ // So we now need to choose the other parameters such that\n+ // x_new = x*M + start\n+ // plug it in:\n+ // x*M + start (desired result)\n+ // = bias + x + q*cmpl_freq (*)\n+ // = bias + x + (x - 1)*(M - 1) (plug in q=x-1, cmpl_freq)\n+ // = bias + 1 + (x - 1)*M\n+ // = x*M + (bias + 1 - M)\n+ //\n+ // so we have start = bias + 1 - M, or equivalently\n+ // bias = start + M - 1.\n+ s->rcp_freq = ~0u;\n+ s->rcp_shift = 0;\n+ s->bias = start + (1 << scale_bits) - 1;\n+ } else {\n+ // Alverson, "Integer Division using reciprocals"\n+ // shift=ceil(log2(freq))\n+ uint32_t shift = 0;\n+ while (freq > (1u << shift))\n+ shift++;\n+\n+ s->rcp_freq = (uint32_t) (((1ull << (shift + 31)) + freq-1) / freq);\n+ s->rcp_shift = shift - 1;\n+\n+ // With these values, \'q\' is the correct quotient, so we\n+ // have bias=start.\n+ s->bias = start;\n+ }\n+\n+ s->rcp_shift += 32; // Avoid the extra >>32 in RansEncPutSymbol\n+}\n+\n+// Initialize a decoder symbol to start "start" and frequency "freq"\n+static inline void RansDecSymbolInit(RansDecSymbol* s, uint32_t start, uint32_t freq)\n+{\n+ RansAssert(start <= (1 << 16));\n+ RansAssert(freq <= (1 << 16) - start);\n+ s->start = (uint16_t) start;\n+ s->freq = (uint16_t) freq;\n+}\n+\n+// Encodes a given symbol. This is faster than straight RansEnc since we can do\n+// multiplications instead of a divide.\n+//\n+// See RansEncSymbolInit for a description of how this works.\n+static inline void RansEncPutSymbol(RansState* r, uint8_t** pptr, RansEncSymbol const* sym)\n+{\n+ RansAssert(sym->x_max != 0); // can\'t encode symbol with freq=0\n+\n+ // renormalize\n+ uint32_t x = *r;\n+ uint32_t x_max = sym->x_max;\n+\n+ if (x >= x_max) {\n+\tuint8_t* ptr = *pptr;\n+\tdo {\n+\t *--ptr = (uint8_t) (x & 0xff);\n+\t x >>= 8;\n+\t} while (x >= x_max);\n+\t*pptr = ptr;\n+ }\n+\n+ // x = C(s,x)\n+ // NOTE: written this way so we get a 32-bit "multiply high" when\n+ // available. If you\'re on a 64-bit platform with cheap multiplies\n+ // (e.g. x64), just bake the +32 into rcp_shift.\n+ //uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> 32) >> sym->rcp_shift;\n+\n+ // The extra >>32 has already been added to RansEncSymbolInit\n+ uint32_t q = (uint32_t) (((uint64_t)x * sym->rcp_freq) >> sym->rcp_shift);\n+ *r = x + sym->bias + q * sym->cmpl_freq;\n+}\n+\n+// Equivalent to RansDecAdvance that takes a symbol.\n+static inline void RansDecAdvanceSymbol(RansState* r, uint8_t** pptr, RansDecSymbol const* sym, uint32_t scale_bits)\n+{\n+ RansDecAdvance(r, pptr, sym->start, sym->freq, scale_bits);\n+}\n+\n+// Advances in the bit stream by "popping" a single symbol with range start\n+// "start" and frequency "freq". All frequencies are assumed to sum to "1 << scale_bits".\n+// No renormalization or output happens.\n+static inline void RansDecAdvanceStep(RansState* r, uint32_t start, uint32_t freq, uint32_t scale_bits)\n+{\n+ uint32_t mask = (1u << scale_bits) - 1;\n+\n+ // s, x = D(x)\n+ uint32_t x = *r;\n+ *r = freq * (x >> scale_bits) + (x & mask) - start;\n+}\n+\n+// Equivalent to RansDecAdvanceStep that takes a symbol.\n+static inline void RansDecAdvanceSymbolStep(RansState* r, RansDecSymbol const* sym, uint32_t scale_bits)\n+{\n+ RansDecAdvanceStep(r, sym->start, sym->freq, scale_bits);\n+}\n+\n+// Renormalize.\n+static inline void RansDecRenorm(RansState* r, uint8_t** pptr)\n+{\n+ // renormalize\n+ uint32_t x = *r;\n+\n+ if (x < RANS_BYTE_L) {\n+ uint8_t* ptr = *pptr;\n+ do x = (x << 8) | *ptr++; while (x < RANS_BYTE_L);\n+ *pptr = ptr;\n+ }\n+\n+ *r = x;\n+}\n+\n+#endif // RANS_BYTE_HEADER\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/rANS_static.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/rANS_static.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,841 @@\n+/*\n+ * Copyright (c) 2014 Genome Research Ltd.\n+ * Author(s): James Bonfield\n+ *\n+ * Redistribution and use in source and binary forms, with or without\n+ * modification, are permitted provided that the following conditions are met:\n+ *\n+ * 1. Redistributions of source code must retain the above copyright notice,\n+ * this list of conditions and the following disclaimer.\n+ *\n+ * 2. Redistributions in binary form must reproduce the above\n+ * copyright notice, this list of conditions and the following\n+ * disclaimer in the documentation and/or other materials provided\n+ * with the distribution.\n+ *\n+ * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+ * Institute nor the names of its contributors may be used to endorse\n+ * or promote products derived from this software without specific\n+ * prior written permission.\n+ *\n+ * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS\n+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED\n+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A\n+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH\n+ * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+/*\n+ * Author: James Bonfield, Wellcome Trust Sanger Institute. 2014\n+ */\n+\n+#include <stdint.h>\n+#include <stdlib.h>\n+#include <stdio.h>\n+#include <unistd.h>\n+#include <assert.h>\n+#include <string.h>\n+#include <sys/time.h>\n+\n+#include "cram/rANS_static.h"\n+#include "cram/rANS_byte.h"\n+\n+#define TF_SHIFT 12\n+#define TOTFREQ (1<<TF_SHIFT)\n+\n+#define ABS(a) ((a)>0?(a):-(a))\n+#ifndef BLK_SIZE\n+# define BLK_SIZE 1024*1024\n+#endif\n+\n+// Room to allow for expanded BLK_SIZE on worst case compression.\n+#define BLK_SIZE2 ((int)(1.05*BLK_SIZE))\n+\n+/*-----------------------------------------------------------------------------\n+ * Memory to memory compression functions.\n+ *\n+ * These are original versions without any manual loop unrolling. They\n+ * are easier to understand, but can be up to 2x slower.\n+ */\n+\n+unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size,\n+\t\t\t\tunsigned int *out_size) {\n+ unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9);\n+ unsigned char *cp, *out_end;\n+ RansEncSymbol syms[256];\n+ RansState rans0, rans1, rans2, rans3;\n+ uint8_t* ptr;\n+ int F[256] = {0}, i, j, tab_size, rle, x, fsum = 0;\n+ int m = 0, M = 0;\n+ uint64_t tr;\n+\n+ if (!out_buf)\n+\treturn NULL;\n+\n+ ptr = out_end = out_buf + (int)(1.05*in_size) + 257*257*3 + 9;\n+\n+ // Compute statistics\n+ for (i = 0; i < in_size; i++) {\n+\tF[in[i]]++;\n+ }\n+ tr = ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size;\n+\n+ // Normalise so T[i] == TOTFREQ\n+ for (m = M = j = 0; j < 256; j++) {\n+\tif (!F[j])\n+\t continue;\n+\n+\tif (m < F[j])\n+\t m = F[j], M = j;\n+\n+\tif ((F[j] = (F[j]*tr)>>31) == 0)\n+\t F[j] = 1;\n+\tfsum += F[j];\n+ }\n+\n+ fsum++;\n+ if (fsum < TOTFREQ)\n+\tF[M] += TOTFREQ-fsum;\n+ else\n+\tF[M] -= fsum-TOTFREQ;\n+\n+ //printf("F[%d]=%d\\n", M, F[M]);\n+ assert(F[M]>0);\n+\n+ // Encode statistics.\n+ cp = out_buf+9;\n+\n+ for (x = rle = j = 0; j < 256; j++) {\n+\tif (F[j]) {\n+\t // j\n+\t if (rle) {\n+\t\trle--;\n+\t } else {\n+\t\t*cp++ = j;\n+\t\tif (!rle && j && F[j-1]) {\n+\t\t for(rle=j+1; rle<256 && F[rle]; rle++)\n+\t\t\t;\n+\t\t rle -= j+1;\n+\t\t *cp++ = rle;\n+\t\t}\n+\t\t//fprintf(stderr, "%d: %d %d\\n", j, rle, N[j]);\n+\t }\n+\n+\t // F[j]\n+\t if (F[j]<128) {\n+\t\t*cp++ = F[j];\n+\t } else {\n'..b'yms[l3][c[3]].freq * (R[3]>>TF_SHIFT);\n+\n+\tR[0] += m[0] - syms[l0][c[0]].start;\n+\tR[1] += m[1] - syms[l1][c[1]].start;\n+\tR[2] += m[2] - syms[l2][c[2]].start;\n+\tR[3] += m[3] - syms[l3][c[3]].start;\n+\n+\tRansDecRenorm(&R[0], &ptr);\n+\tRansDecRenorm(&R[1], &ptr);\n+\tRansDecRenorm(&R[2], &ptr);\n+\tRansDecRenorm(&R[3], &ptr);\n+\n+\tl0 = c[0];\n+\tl1 = c[1];\n+\tl2 = c[2];\n+\tl3 = c[3];\n+ }\n+\n+ rans0 = R[0];\n+ rans1 = R[1];\n+ rans2 = R[2];\n+ rans3 = R[3];\n+\n+ // Remainder\n+ for (; i4[3] < out_sz; i4[3]++) {\n+\tunsigned char c3 = D[l3].R[RansDecGet(&rans3, TF_SHIFT)];\n+\tout_buf[i4[3]] = c3;\n+\tRansDecAdvanceSymbol(&rans3, &ptr, &syms[l3][c3], TF_SHIFT);\n+\tl3 = c3;\n+ }\n+\n+ *out_size = out_sz;\n+\n+ for (i = 0; i < 256; i++)\n+\tif (D[i].R) free(D[i].R);\n+\n+ return (unsigned char *)out_buf;\n+}\n+\n+/*-----------------------------------------------------------------------------\n+ * Simple interface to the order-0 vs order-1 encoders and decoders.\n+ */\n+unsigned char *rans_compress(unsigned char *in, unsigned int in_size,\n+\t\t\t unsigned int *out_size, int order) {\n+ return order\n+\t? rans_compress_O1(in, in_size, out_size)\n+\t: rans_compress_O0(in, in_size, out_size);\n+}\n+\n+unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size,\n+\t\t\t unsigned int *out_size) {\n+ return in[0]\n+\t? rans_uncompress_O1(in, in_size, out_size)\n+\t: rans_uncompress_O0(in, in_size, out_size);\n+}\n+\n+\n+#ifdef TEST_MAIN\n+/*-----------------------------------------------------------------------------\n+ * Main.\n+ *\n+ * This is a simple command line tool for testing order-0 and order-1\n+ * compression using the rANS codec. Simply compile with\n+ *\n+ * gcc -DTEST_MAIN -O3 -I. cram/rANS_static.c -o cram/rANS_static\n+ *\n+ * Usage: cram/rANS_static -o0 < file > file.o0\n+ * cram/rANS_static -d < file.o0 > file2\n+ *\n+ * cram/rANS_static -o1 < file > file.o1\n+ * cram/rANS_static -d < file.o1 > file2\n+ */\n+int main(int argc, char **argv) {\n+ int opt, order = 0;\n+ unsigned char in_buf[BLK_SIZE2+257*257*3];\n+ int decode = 0;\n+ FILE *infp = stdin, *outfp = stdout;\n+ struct timeval tv1, tv2;\n+ size_t bytes = 0;\n+\n+ extern char *optarg;\n+ extern int optind;\n+\n+ while ((opt = getopt(argc, argv, "o:d")) != -1) {\n+\tswitch (opt) {\n+\tcase \'o\':\n+\t order = atoi(optarg);\n+\t break;\n+\n+\tcase \'d\':\n+\t decode = 1;\n+\t break;\n+\t}\n+ }\n+\n+ order = order ? 1 : 0; // Only support O(0) and O(1)\n+\n+ if (optind < argc) {\n+\tif (!(infp = fopen(argv[optind], "rb"))) {\n+\t perror(argv[optind]);\n+\t return 1;\n+\t}\n+\toptind++;\n+ }\n+\n+ if (optind < argc) {\n+\tif (!(outfp = fopen(argv[optind], "wb"))) {\n+\t perror(argv[optind]);\n+\t return 1;\n+\t}\n+\toptind++;\n+ }\n+\n+ gettimeofday(&tv1, NULL);\n+\n+ if (decode) {\n+\t// Only used in some test implementations of RC_GetFreq()\n+\t//RC_init();\n+\t//RC_init2();\n+\n+\tfor (;;) {\n+\t uint32_t in_size, out_size;\n+\t unsigned char *out;\n+\n+\t if (4 != fread(&in_size, 1, 4, infp))\n+\t\tbreak;\n+\t if (in_size != fread(in_buf, 1, in_size, infp)) {\n+\t\tfprintf(stderr, "Truncated input\\n");\n+\t\texit(1);\n+\t }\n+\t out = rans_uncompress(in_buf, in_size, &out_size);\n+\t if (!out)\n+\t\tabort();\n+\n+\t fwrite(out, 1, out_size, outfp);\n+\t free(out);\n+\n+\t bytes += out_size;\n+\t}\n+ } else {\n+\tfor (;;) {\n+\t uint32_t in_size, out_size;\n+\t unsigned char *out;\n+\n+\t in_size = fread(in_buf, 1, BLK_SIZE, infp);\n+\t if (in_size <= 0)\n+\t\tbreak;\n+\n+\t out = rans_compress(in_buf, in_size, &out_size, order);\n+\n+\t fwrite(&out_size, 1, 4, outfp);\n+\t fwrite(out, 1, out_size, outfp);\n+\t free(out);\n+\n+\t bytes += in_size;\n+\t}\n+ }\n+\n+ gettimeofday(&tv2, NULL);\n+\n+ fprintf(stderr, "Took %ld microseconds, %5.1f MB/s\\n",\n+\t (long)(tv2.tv_sec - tv1.tv_sec)*1000000 +\n+\t tv2.tv_usec - tv1.tv_usec,\n+\t (double)bytes / ((long)(tv2.tv_sec - tv1.tv_sec)*1000000 +\n+\t\t\t tv2.tv_usec - tv1.tv_usec));\n+ return 0;\n+}\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/rANS_static.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/rANS_static.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2014 Genome Research Ltd. + * Author(s): James Bonfield + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger + * Institute nor the names of its contributors may be used to endorse + * or promote products derived from this software without specific + * prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH + * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#ifndef RANS_STATIC_H +#define RANS_STATIC_H + +unsigned char *rans_compress(unsigned char *in, unsigned int in_size, + unsigned int *out_size, int order); +unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size, + unsigned int *out_size); + + +#endif /* RANS_STATIC_H */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/sam_header.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/sam_header.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1218 @@\n+/*\n+Copyright (c) 2013 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <string.h>\n+#include <assert.h>\n+\n+#include "cram/sam_header.h"\n+#include "cram/string_alloc.h"\n+\n+static void sam_hdr_error(char *msg, char *line, int len, int lno) {\n+ int j;\n+ \n+ for (j = 0; j < len && line[j] != \'\\n\'; j++)\n+\t;\n+ fprintf(stderr, "%s at line %d: \\"%.*s\\"\\n", msg, lno, j, line);\n+}\n+\n+void sam_hdr_dump(SAM_hdr *hdr) {\n+ khint_t k;\n+ int i;\n+\n+ printf("===DUMP===\\n");\n+ for (k = kh_begin(hdr->h); k != kh_end(hdr->h); k++) {\n+\tSAM_hdr_type *t1, *t2;\n+\tchar c[2];\n+\n+\tif (!kh_exist(hdr->h, k))\n+\t continue;\n+\n+\tt1 = t2 = kh_val(hdr->h, k);\n+\tc[0] = kh_key(hdr->h, k)>>8;\n+\tc[1] = kh_key(hdr->h, k)&0xff;\n+\tprintf("Type %.2s, count %d\\n", c, t1->prev->order+1);\n+\n+\tdo {\n+\t SAM_hdr_tag *tag;\n+\t printf(">>>%d ", t1->order);\n+\t for (tag = t1->tag; tag; tag=tag->next) {\n+\t\tprintf("\\"%.2s\\":\\"%.*s\\"\\t",\n+\t\t tag->str, tag->len-3, tag->str+3);\n+\t }\n+\t putchar(\'\\n\');\n+\t t1 = t1->next;\n+\t} while (t1 != t2);\n+ }\n+\n+ /* Dump out PG chains */\n+ printf("\\n@PG chains:\\n");\n+ for (i = 0; i < hdr->npg_end; i++) {\n+\tint j;\n+\tprintf(" %d:", i);\n+\tfor (j = hdr->pg_end[i]; j != -1; j = hdr->pg[j].prev_id) {\n+\t printf("%s%d(%.*s)", \n+\t\t j == hdr->pg_end[i] ? " " : "->",\n+\t\t j, hdr->pg[j].name_len, hdr->pg[j].name);\n+\t}\n+\tprintf("\\n");\n+ }\n+\n+ puts("===END DUMP===");\n+}\n+\n+/* Updates the hash tables in the SAM_hdr structure.\n+ *\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+static int sam_hdr_update_hashes(SAM_hdr *sh,\n+\t\t\t\t int type,\n+\t\t\t\t SAM_hdr_type *h_type) {\n+ /* Add to reference hash? */\n+ if ((type>>8) == \'S\' && (type&0xff) == \'Q\') {\n+\tSAM_hdr_tag *tag;\n+\tint nref = sh->nref;\n+\n+\tsh->ref = realloc(sh->ref, (sh->nref+1)*sizeof(*sh->ref));\n+\tif (!sh->ref)\n+\t return -1;\n+\n+\ttag = h_type->tag;\n+\tsh->ref[nref].name = NULL;\n+\tsh->ref[nref].len = 0;\n+\tsh->ref[nref].ty = h_type;\n+\tsh->ref[nref].tag = tag;\n+\n+\twhile (tag) {\n+\t if (tag->str[0] == \'S\' && tag->str[1] == \'N\') {\n+\t\tif (!(sh->ref[nref].name = malloc(tag->len)))\n+\t\t return -1;\n+\t\tstrncpy(sh->ref[nref].name, tag->str+3, tag->len-3);\n+\t\tsh->ref[nref].name[tag->len-3] = 0;\n+\t } else if (tag->str[0] == \'L\' && tag->str[1] == \'N\') {\n+\t\tsh->ref[nref].len = atoi(tag->str+3);\n+\t }\n+\t tag = tag->next;\n+\t}\n+\n+\tif (sh->ref[nref]'..b'n this doesn\'t need doing, but incase\n+ * our header is out of order this goes through the sh->pg[] array\n+ * setting the prev_id field.\n+ *\n+ * Note we can have multiple complete chains. This code should identify the\n+ * tails of these chains as these are the entries we have to link to in\n+ * subsequent PP records.\n+ *\n+ * Returns 0 on sucess\n+ * -1 on failure (indicating broken PG/PP records)\n+ */\n+int sam_hdr_link_pg(SAM_hdr *hdr) {\n+ int i, j, ret = 0;\n+\n+ hdr->npg_end_alloc = hdr->npg;\n+ hdr->pg_end = realloc(hdr->pg_end, hdr->npg * sizeof(*hdr->pg_end));\n+ if (!hdr->pg_end)\n+\treturn -1;\n+\n+ for (i = 0; i < hdr->npg; i++)\n+\thdr->pg_end[i] = i;\n+\n+ for (i = 0; i < hdr->npg; i++) {\n+\tkhint_t k;\n+\tSAM_hdr_tag *tag;\n+\tchar tmp;\n+\n+\tfor (tag = hdr->pg[i].tag; tag; tag = tag->next) {\n+\t if (tag->str[0] == \'P\' && tag->str[1] == \'P\')\n+\t\tbreak;\n+\t}\n+\tif (!tag) {\n+\t /* Chain start points */\n+\t continue;\n+\t}\n+\n+\ttmp = tag->str[tag->len]; tag->str[tag->len] = 0;\n+\tk = kh_get(m_s2i, hdr->pg_hash, tag->str+3);\n+\ttag->str[tag->len] = tmp;\n+\n+\tif (k == kh_end(hdr->pg_hash)) {\n+\t ret = -1;\n+\t continue;\n+\t}\n+\n+\thdr->pg[i].prev_id = hdr->pg[kh_val(hdr->pg_hash, k)].id;\n+\thdr->pg_end[kh_val(hdr->pg_hash, k)] = -1;\n+ }\n+\n+ for (i = j = 0; i < hdr->npg; i++) {\n+\tif (hdr->pg_end[i] != -1)\n+\t hdr->pg_end[j++] = hdr->pg_end[i];\n+ }\n+ hdr->npg_end = j;\n+\n+ return ret;\n+}\n+\n+/*\n+ * Returns a unique ID from a base name.\n+ *\n+ * The value returned is valid until the next call to\n+ * this function.\n+ */\n+const char *sam_hdr_PG_ID(SAM_hdr *sh, const char *name) {\n+ khint_t k = kh_get(m_s2i, sh->pg_hash, name);\n+ if (k == kh_end(sh->pg_hash))\n+\treturn name;\n+\n+ do {\n+\tsprintf(sh->ID_buf, "%.1000s.%d", name, sh->ID_cnt++);\n+\tk = kh_get(m_s2i, sh->pg_hash, sh->ID_buf);\n+ } while (k == kh_end(sh->pg_hash));\n+\n+ return sh->ID_buf;\n+}\n+\n+/*\n+ * Add an @PG line.\n+ *\n+ * If we wish complete control over this use sam_hdr_add() directly. This\n+ * function uses that, but attempts to do a lot of tedious house work for\n+ * you too.\n+ *\n+ * - It will generate a suitable ID if the supplied one clashes.\n+ * - It will generate multiple @PG records if we have multiple PG chains.\n+ *\n+ * Call it as per sam_hdr_add() with a series of key,value pairs ending\n+ * in NULL.\n+ *\n+ * Returns 0 on success\n+ * -1 on failure\n+ */\n+int sam_hdr_add_PG(SAM_hdr *sh, const char *name, ...) {\n+ va_list args;\n+ va_start(args, name);\n+\n+ if (sh->npg_end) {\n+\t/* Copy ends array to avoid us looping while modifying it */\n+\tint *end = malloc(sh->npg_end * sizeof(int));\n+\tint i, nends = sh->npg_end;\n+\n+\tif (!end)\n+\t return -1;\n+\n+\tmemcpy(end, sh->pg_end, nends * sizeof(*end));\n+\n+\tfor (i = 0; i < nends; i++) {\n+\t if (-1 == sam_hdr_vadd(sh, "PG", args,\n+\t\t\t\t "ID", sam_hdr_PG_ID(sh, name),\n+\t\t\t\t "PN", name,\n+\t\t\t\t "PP", sh->pg[end[i]].name,\n+\t\t\t\t NULL)) {\n+\t\tfree(end);\n+\t\treturn -1;\n+\t }\n+\t}\n+\n+\tfree(end);\n+ } else {\n+\tif (-1 == sam_hdr_vadd(sh, "PG", args,\n+\t\t\t "ID", sam_hdr_PG_ID(sh, name),\n+\t\t\t "PN", name,\n+\t\t\t NULL))\n+\t return -1;\n+ }\n+\n+ //sam_hdr_dump(sh);\n+\n+ return 0;\n+}\n+\n+/*\n+ * A function to help with construction of CL tags in @PG records.\n+ * Takes an argc, argv pair and returns a single space-separated string.\n+ * This string should be deallocated by the calling function.\n+ * \n+ * Returns malloced char * on success\n+ * NULL on failure\n+ */\n+char *stringify_argv(int argc, char *argv[]) {\n+ char *str, *cp;\n+ size_t nbytes = 1;\n+ int i, j;\n+\n+ /* Allocate */\n+ for (i = 0; i < argc; i++) {\n+\tnbytes += strlen(argv[i]) + 1;\n+ }\n+ if (!(str = malloc(nbytes)))\n+\treturn NULL;\n+\n+ /* Copy */\n+ cp = str;\n+ for (i = 0; i < argc; i++) {\n+\tj = 0;\n+\twhile (argv[i][j]) {\n+\t if (argv[i][j] == \'\\t\')\n+\t\t*cp++ = \' \';\n+\t else\n+\t\t*cp++ = argv[i][j];\n+\t j++;\n+\t}\n+\t*cp++ = \' \';\n+ }\n+ *cp++ = 0;\n+\n+ return str;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/sam_header.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/sam_header.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,444 @@\n+/*\n+Copyright (c) 2013-2014 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+/*! \\file\n+ * SAM header parsing.\n+ *\n+ * These functions can be shared between SAM, BAM and CRAM file\n+ * formats as all three internally use the same string encoding for\n+ * header fields.\n+ */ \n+\n+/*\n+ * TODO.\n+ *\n+ * - Sort order (parse to struct, enum type, updating funcs)\n+ * - Removal of lines.\n+ * - Updating of lines\n+ */\n+\n+#ifndef _SAM_HDR_H_\n+#define _SAM_HDR_H_\n+\n+#ifdef __cplusplus\n+extern "C" {\n+#endif\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdarg.h>\n+\n+#include "cram/string_alloc.h"\n+#include "cram/pooled_alloc.h"\n+\n+#include "htslib/khash.h"\n+#include "htslib/kstring.h"\n+\n+// For structure assignment. Eg kstring_t s = KS_INITIALIZER;\n+#define KS_INITIALIZER {0,0,0}\n+\n+// For initialisation elsewhere. Eg KS_INIT(x->str);\n+#define KS_INIT(ks) ((ks)->l = 0, (ks)->m = 0, (ks)->s = NULL)\n+\n+// Frees the string subfield only. Assumes \'s\' itself is static.\n+#define KS_FREE(ks) do { if ((ks)->s) free((ks)->s); } while(0)\n+\n+/*\n+ * Proposed new SAM header parsing\n+\n+1 @SQ ID:foo LN:100\n+2 @SQ ID:bar LN:200\n+3 @SQ ID:ram LN:300 UR:xyz\n+4 @RG ID:r ...\n+5 @RG ID:s ...\n+\n+Hash table for 2-char @keys without dup entries.\n+If dup lines, we form a circular linked list. Ie hash keys = {RG, SQ}.\n+\n+HASH("SQ")--\\\n+ |\n+ (3) <-> 1 <-> 2 <-> 3 <-> (1)\n+\n+HASH("RG")--\\\n+ |\n+ (5) <-> 4 <-> 5 <-> (4)\n+\n+Items stored in the hash values also form their own linked lists:\n+Ie SQ->ID(foo)->LN(100)\n+ SQ->ID(bar)->LN(200)\n+ SQ->ID(ram)->LN(300)->UR(xyz)\n+ RG->ID(r)\n+ */\n+\n+/*! A single key:value pair on a header line\n+ *\n+ * These form a linked list and hold strings. The strings are\n+ * allocated from a string_alloc_t pool referenced in the master\n+ * SAM_hdr structure. Do not attempt to free, malloc or manipulate\n+ * these strings directly.\n+ */\n+typedef struct SAM_hdr_tag_s {\n+ struct SAM_hdr_tag_s *next;\n+ char *str;\n+ int len;\n+} SAM_hdr_tag;\n+\n+/*! The parsed version of the SAM header string.\n+ * \n+ * Each header type (SQ, RG, HD, etc) points to its own SAM_hdr_type\n+ * struct via the main hash table h in the SAM_hdr struct.\n+ *\n+ * These in turn consist of circular bi-directional linked lists (ie\n+ * rings) to hold the multiple instances of the same header type\n+ * code. For example if we have 5 \\@SQ lines the primary hash table\n+ * will key on \\@SQ pointing to the first '..b'args, "ID", "foo", "LN", "100", NULL).\n+ *\n+ * The purpose of the additional va_list parameter is to permit other\n+ * varargs functions to call this while including their own additional\n+ * parameters; an example is in sam_hdr_add_PG().\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int sam_hdr_vadd(SAM_hdr *sh, const char *type, va_list ap, ...);\n+\n+/*!\n+ * @return\n+ * Returns the first header item matching \'type\'. If ID is non-NULL it checks\n+ * for the tag ID: and compares against the specified ID.\n+ *\n+ * Returns NULL if no type/ID is found\n+ */\n+SAM_hdr_type *sam_hdr_find(SAM_hdr *hdr, char *type,\n+\t\t\t char *ID_key, char *ID_value);\n+\n+/*!\n+ *\n+ * As per SAM_hdr_type, but returns a complete line of formatted text\n+ * for a specific head type/ID combination. If ID is NULL then it returns\n+ * the first line of the specified type.\n+ *\n+ * The returned string is malloced and should be freed by the calling\n+ * function with free().\n+ *\n+ * @return\n+ * Returns NULL if no type/ID is found.\n+ */\n+char *sam_hdr_find_line(SAM_hdr *hdr, char *type,\n+\t\t\tchar *ID_key, char *ID_value);\n+\n+/*! Looks for a specific key in a single sam header line.\n+ *\n+ * If prev is non-NULL it also fills this out with the previous tag, to\n+ * permit use in key removal. *prev is set to NULL when the tag is the first\n+ * key in the list. When a tag isn\'t found, prev (if non NULL) will be the last\n+ * tag in the existing list.\n+ *\n+ * @return\n+ * Returns the tag pointer on success;\n+ * NULL on failure\n+ */\n+SAM_hdr_tag *sam_hdr_find_key(SAM_hdr *sh,\n+\t\t\t SAM_hdr_type *type,\n+\t\t\t char *key,\n+\t\t\t SAM_hdr_tag **prev);\n+\n+/*! Adds or updates tag key,value pairs in a header line.\n+ *\n+ * Eg for adding M5 tags to @SQ lines or updating sort order for the\n+ * @HD line (although use the sam_hdr_sort_order() function for\n+ * HD manipulation, which is a wrapper around this funuction).\n+ *\n+ * Specify multiple key,value pairs ending in NULL.\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int sam_hdr_update(SAM_hdr *hdr, SAM_hdr_type *type, ...);\n+\n+/*! Reconstructs the kstring from the header hash table.\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int sam_hdr_rebuild(SAM_hdr *hdr);\n+\n+/*! Looks up a reference sequence by name and returns the numerical ID.\n+ * @return\n+ * Returns -1 if unknown reference.\n+ */\n+int sam_hdr_name2ref(SAM_hdr *hdr, const char *ref);\n+\n+/*! Looks up a read-group by name and returns a pointer to the start of the\n+ * associated tag list.\n+ *\n+ * @return\n+ * Returns NULL on failure\n+ */\n+SAM_RG *sam_hdr_find_rg(SAM_hdr *hdr, const char *rg);\n+\n+/*! Fixes any PP links in @PG headers.\n+ *\n+ * If the entries are in order then this doesn\'t need doing, but incase\n+ * our header is out of order this goes through the sh->pg[] array\n+ * setting the prev_id field.\n+ *\n+ * @return\n+ * Returns 0 on sucess;\n+ * -1 on failure (indicating broken PG/PP records)\n+ */\n+int sam_hdr_link_pg(SAM_hdr *hdr);\n+\n+\n+/*! Add an @PG line.\n+ *\n+ * If we wish complete control over this use sam_hdr_add() directly. This\n+ * function uses that, but attempts to do a lot of tedious house work for\n+ * you too.\n+ *\n+ * - It will generate a suitable ID if the supplied one clashes.\n+ * - It will generate multiple @PG records if we have multiple PG chains.\n+ *\n+ * Call it as per sam_hdr_add() with a series of key,value pairs ending\n+ * in NULL.\n+ *\n+ * @return\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+int sam_hdr_add_PG(SAM_hdr *sh, const char *name, ...);\n+\n+/*!\n+ * A function to help with construction of CL tags in @PG records.\n+ * Takes an argc, argv pair and returns a single space-separated string.\n+ * This string should be deallocated by the calling function.\n+ * \n+ * @return\n+ * Returns malloced char * on success;\n+ * NULL on failure\n+ */\n+char *stringify_argv(int argc, char *argv[]);\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif /* _SAM_HDR_H_ */\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/string_alloc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/string_alloc.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,153 @@ +/* +Copyright (c) 2010 Genome Research Ltd. +Author: Andrew Whitwham <aw7@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +/* + A pooled string allocator intended to cut down on the + memory overhead of many small string allocations. + + Andrew Whitwham, September 2010. +*/ + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> + +#include "string_alloc.h" + +#define MIN_STR_SIZE 1024 + + +/* creates the string pool. max_length is the initial size + a single string can be. Tha max_length can grow as + needed */ + +string_alloc_t *string_pool_create(size_t max_length) { + string_alloc_t *a_str; + + if (NULL == (a_str = (string_alloc_t *)malloc(sizeof(*a_str)))) { + return NULL; + } + + if (max_length < MIN_STR_SIZE) max_length = MIN_STR_SIZE; + + a_str->nstrings = 0; + a_str->max_length = max_length; + a_str->strings = NULL; + + return a_str; +} + + +/* internal function to do the actual memory allocation */ + +static string_t *new_string_pool(string_alloc_t *a_str) { + string_t *str; + + str = realloc(a_str->strings, (a_str->nstrings + 1) * sizeof(*a_str->strings)); + + if (NULL == str) return NULL; + + a_str->strings = str; + str = &a_str->strings[a_str->nstrings]; + + str->str = malloc(a_str->max_length);; + + if (NULL == str->str) return NULL; + + str->used = 0; + a_str->nstrings++; + + return str; +} + + +/* free allocated memory */ + +void string_pool_destroy(string_alloc_t *a_str) { + size_t i; + + for (i = 0; i < a_str->nstrings; i++) { + free(a_str->strings[i].str); + } + + free(a_str->strings); + free(a_str); +} + + +/* allocate space for a string */ + +char *string_alloc(string_alloc_t *a_str, size_t length) { + string_t *str; + char *ret; + + if (length <= 0) return NULL; + + // add to last string pool if we have space + if (a_str->nstrings) { + str = &a_str->strings[a_str->nstrings - 1]; + + if (str->used + length < a_str->max_length) { + ret = str->str + str->used; + str->used += length; + return ret; + } + } + + // increase the max length if needs be + if (length > a_str->max_length) a_str->max_length = length; + + // need a new string pool + str = new_string_pool(a_str); + + if (NULL == str) return NULL; + + str->used = length; + return str->str; +} + + +/* equivalent to strdup */ + +char *string_dup(string_alloc_t *a_str, char *instr) { + return string_ndup(a_str, instr, strlen(instr)); +} + +char *string_ndup(string_alloc_t *a_str, char *instr, size_t len) { + char *str = string_alloc(a_str, len + 1); + + if (NULL == str) return NULL; + + strncpy(str, instr, len); + str[len] = 0; + + return str; +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/string_alloc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/string_alloc.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,69 @@ +/* +Copyright (c) 2010 Genome Research Ltd. +Author: Andrew Whitwham <aw7@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _STRING_ALLOC_H_ +#define _STRING_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdlib.h> + +/* + * A pooled string allocator intended to cut down on the + * memory overhead of many small string allocations. + * + * Andrew Whitwham, September 2010. + */ + +typedef struct { + char *str; + size_t used; +} string_t; + +typedef struct { + size_t max_length; + size_t nstrings; + string_t *strings; +} string_alloc_t; + +string_alloc_t *string_pool_create(size_t max_length); +void string_pool_destroy(string_alloc_t *a_str); +char *string_alloc(string_alloc_t *a_str, size_t length); +char *string_dup(string_alloc_t *a_str, char *instr); +char *string_ndup(string_alloc_t *a_str, char *instr, size_t len); + +#endif + +#ifdef __cplusplus +} +#endif + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/thread_pool.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/thread_pool.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,811 @@\n+/*\n+Copyright (c) 2013 Genome Research Ltd.\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+#include <stdlib.h>\n+\n+#include <signal.h>\n+#include <errno.h>\n+#include <stdio.h>\n+#include <string.h>\n+#include <sys/time.h>\n+#include <assert.h>\n+\n+#include "cram/thread_pool.h"\n+\n+//#define DEBUG\n+//#define DEBUG_TIME\n+\n+#define IN_ORDER\n+\n+#ifdef DEBUG\n+static int worker_id(t_pool *p) {\n+ int i;\n+ pthread_t s = pthread_self();\n+ for (i = 0; i < p->tsize; i++) {\n+\tif (pthread_equal(s, p->t[i].tid))\n+\t return i;\n+ }\n+ return -1;\n+}\n+#endif\n+\n+/* ----------------------------------------------------------------------------\n+ * A queue to hold results from the thread pool.\n+ *\n+ * Each thread pool may have jobs of multiple types being queued up and\n+ * interleaved, so we allow several results queue per pool.\n+ *\n+ * The jobs themselves are expected to push their results onto their\n+ * appropriate results queue.\n+ */\n+\n+/*\n+ * Adds a result to the end of the result queue.\n+ *\n+ * Returns 0 on success;\n+ * -1 on failure\n+ */\n+static int t_pool_add_result(t_pool_job *j, void *data) {\n+ t_results_queue *q = j->q;\n+ t_pool_result *r;\n+\n+#ifdef DEBUG\n+ fprintf(stderr, "%d: Adding resulting to queue %p, serial %d\\n",\n+\t worker_id(j->p), q, j->serial);\n+#endif\n+\n+ /* No results queue is fine if we don\'t want any results back */\n+ if (!q)\n+\treturn 0;\n+\n+ if (!(r = malloc(sizeof(*r))))\n+\treturn -1;\n+\n+ r->next = NULL;\n+ r->data = data;\n+ r->serial = j->serial;\n+\n+ pthread_mutex_lock(&q->result_m);\n+ if (q->result_tail) {\n+\tq->result_tail->next = r;\n+\tq->result_tail = r;\n+ } else {\n+\tq->result_head = q->result_tail = r;\n+ }\n+ q->queue_len++;\n+ q->pending--;\n+\n+#ifdef DEBUG\n+ fprintf(stderr, "%d: Broadcasting result_avail (id %d)\\n",\n+\t worker_id(j->p), r->serial);\n+#endif\n+ pthread_cond_signal(&q->result_avail_c);\n+#ifdef DEBUG\n+ fprintf(stderr, "%d: Broadcast complete\\n", worker_id(j->p));\n+#endif\n+\n+ pthread_mutex_unlock(&q->result_m);\n+\n+ return 0;\n+}\n+\n+/* Core of t_pool_next_result() */\n+static t_pool_result *t_pool_next_result_locked(t_results_queue *q) {\n+ t_pool_result *r, *last;\n+\n+ for (last = NULL, r = q->result_head; r; last = r, r = r->next) {\n+\tif (r->serial == q->next_serial)\n+\t break;\n+ }\n+\n+ if (r) {\n+\tif (q->result_head == r)\n+\t q->result_head = r->next;\n+\telse\n+\t last->next = r->next;\n+\n+\tif (q->result_tail == r)\n+\t q->resu'..b' int i;\n+\n+#ifdef DEBUG\n+ fprintf(stderr, "Flushing pool %p\\n", p);\n+#endif\n+\n+ // Drains the queue\n+ pthread_mutex_lock(&p->pool_m);\n+\n+ // Wake up everything for the final sprint!\n+ for (i = 0; i < p->tsize; i++)\n+\tif (p->t_stack[i])\n+\t pthread_cond_signal(&p->t[i].pending_c);\n+\n+ while (p->njobs || p->nwaiting != p->tsize)\n+\tpthread_cond_wait(&p->empty_c, &p->pool_m);\n+\n+ pthread_mutex_unlock(&p->pool_m);\n+\n+#ifdef DEBUG\n+ fprintf(stderr, "Flushed complete for pool %p, njobs=%d, nwaiting=%d\\n",\n+\t p, p->njobs, p->nwaiting);\n+#endif\n+\n+ return 0;\n+}\n+\n+/*\n+ * Destroys a thread pool. If \'kill\' is true the threads are terminated now,\n+ * otherwise they are joined into the main thread so they will finish their\n+ * current work load.\n+ *\n+ * Use t_pool_destroy(p,0) after a t_pool_flush(p) on a normal shutdown or\n+ * t_pool_destroy(p,1) to quickly exit after a fatal error.\n+ */\n+void t_pool_destroy(t_pool *p, int kill) {\n+ int i;\n+ \n+#ifdef DEBUG\n+ fprintf(stderr, "Destroying pool %p, kill=%d\\n", p, kill);\n+#endif\n+\n+ /* Send shutdown message to worker threads */\n+ if (!kill) {\n+\tpthread_mutex_lock(&p->pool_m);\n+\tp->shutdown = 1;\n+\n+#ifdef DEBUG\n+\tfprintf(stderr, "Sending shutdown request\\n");\n+#endif\n+\n+#ifdef IN_ORDER\n+\tfor (i = 0; i < p->tsize; i++)\n+\t pthread_cond_signal(&p->t[i].pending_c);\n+#else\n+\tpthread_cond_broadcast(&p->pending_c);\n+#endif\n+\tpthread_mutex_unlock(&p->pool_m);\n+\n+#ifdef DEBUG\n+\tfprintf(stderr, "Shutdown complete\\n");\n+#endif\n+\tfor (i = 0; i < p->tsize; i++)\n+\t pthread_join(p->t[i].tid, NULL);\n+ } else {\n+\tfor (i = 0; i < p->tsize; i++)\n+\t pthread_kill(p->t[i].tid, SIGINT);\n+ }\n+\n+ pthread_mutex_destroy(&p->pool_m);\n+ pthread_cond_destroy(&p->empty_c);\n+ pthread_cond_destroy(&p->full_c);\n+#ifdef IN_ORDER\n+ for (i = 0; i < p->tsize; i++)\n+\tpthread_cond_destroy(&p->t[i].pending_c);\n+#else\n+ pthread_cond_destroy(&p->pending_c);\n+#endif\n+\n+#ifdef DEBUG_TIME\n+ fprintf(stderr, "Total time=%f\\n", p->total_time / 1000000.0);\n+ fprintf(stderr, "Wait time=%f\\n", p->wait_time / 1000000.0);\n+ fprintf(stderr, "%d%% utilisation\\n",\n+\t (int)(100 - ((100.0 * p->wait_time) / p->total_time + 0.5)));\n+ for (i = 0; i < p->tsize; i++)\n+\tfprintf(stderr, "%d: Wait time=%f\\n", i,\n+\t\tp->t[i].wait_time / 1000000.0);\n+#endif\n+\n+ if (p->t_stack)\n+\tfree(p->t_stack);\n+\n+ free(p->t);\n+ free(p);\n+\n+#ifdef DEBUG\n+ fprintf(stderr, "Destroyed pool %p\\n", p);\n+#endif\n+}\n+\n+\n+/*-----------------------------------------------------------------------------\n+ * Test app.\n+ */\n+\n+#ifdef TEST_MAIN\n+\n+#include <stdio.h>\n+#include <math.h>\n+\n+void *doit(void *arg) {\n+ int i, k, x = 0;\n+ int job = *(int *)arg;\n+ int *res;\n+\n+ printf("Worker: execute job %d\\n", job);\n+\n+ usleep(random() % 1000000); // to coerce job completion out of order\n+ if (0) {\n+\tfor (k = 0; k < 100; k++) {\n+\t for (i = 0; i < 100000; i++) {\n+\t\tx++;\n+\t\tx += x * sin(i);\n+\t\tx += x * cos(x);\n+\t }\n+\t}\n+\tx *= 100;\n+\tx += job;\n+ } else {\n+\tx = job*job;\n+ }\n+\n+ printf("Worker: job %d terminating, x=%d\\n", job, x);\n+\n+ free(arg);\n+\n+ res = malloc(sizeof(*res));\n+ *res = x;\n+\n+ return res;\n+}\n+\n+#define NTHREADS 8\n+\n+int main(int argc, char **argv) {\n+ t_pool *p = t_pool_init(NTHREADS*2, NTHREADS);\n+ t_results_queue *q = t_results_queue_init();\n+ int i;\n+ t_pool_result *r;\n+\n+ // Dispatch jobs\n+ for (i = 0; i < 20; i++) {\n+\tint *ip = malloc(sizeof(*ip));\n+\t*ip = i;\n+\tprintf("Submitting %d\\n", i);\n+\tt_pool_dispatch(p, q, doit, ip);\n+\t\n+\t// Check for results\n+\tif ((r = t_pool_next_result(q))) {\n+\t printf("RESULT: %d\\n", *(int *)r->data);\n+\t t_pool_delete_result(r, 1);\n+\t}\n+ }\n+\n+ t_pool_flush(p);\n+\n+ while ((r = t_pool_next_result(q))) {\n+\tprintf("RESULT: %d\\n", *(int *)r->data);\n+\tt_pool_delete_result(r, 1);\n+ }\n+\n+ t_pool_destroy(p, 0);\n+ t_results_queue_destroy(q);\n+\n+ return 0;\n+}\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/thread_pool.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/thread_pool.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,210 @@ +/* +Copyright (c) 2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * This file implements a thread pool for multi-threading applications. + * It consists of two distinct interfaces: thread pools an results queues. + * + * The pool of threads is given a function pointer and void* data to pass in. + * This means the pool can run jobs of multiple types, albeit first come + * first served with no job scheduling. + * + * Upon completion, the return value from the function pointer is added to + * a results queue. We may have multiple queues in use for the one pool. + * + * An example: reading from BAM and writing to CRAM with 10 threads. We'll + * have a pool of 10 threads and two results queues holding decoded BAM blocks + * and encoded CRAM blocks respectively. + */ + +#ifndef _THREAD_POOL_H_ +#define _THREAD_POOL_H_ + +#include <pthread.h> + +struct t_pool; +struct t_results_queue; + +typedef struct t_pool_job { + void *(*func)(void *arg); + void *arg; + struct t_pool_job *next; + + struct t_pool *p; + struct t_results_queue *q; + int serial; +} t_pool_job; + +typedef struct t_res { + struct t_res *next; + int serial; // sequential number for ordering + void *data; // result itself +} t_pool_result; + +struct t_pool; + +typedef struct { + struct t_pool *p; + int idx; + pthread_t tid; + pthread_cond_t pending_c; + long long wait_time; +} t_pool_worker_t; + +typedef struct t_pool { + int qsize; // size of queue + int njobs; // pending job count + int nwaiting; // how many workers waiting for new jobs + int shutdown; // true if pool is being destroyed + + // queue of pending jobs + t_pool_job *head, *tail; + + // threads + int tsize; // maximum number of jobs + t_pool_worker_t *t; + + // Mutexes + pthread_mutex_t pool_m; // used when updating head/tail + + pthread_cond_t empty_c; + pthread_cond_t pending_c; // not empty + pthread_cond_t full_c; + + // array of worker IDs free + int *t_stack, t_stack_top; + + // Debugging to check wait time + long long total_time, wait_time; +} t_pool; + +typedef struct t_results_queue { + t_pool_result *result_head; + t_pool_result *result_tail; + int next_serial; + int curr_serial; + int queue_len; // number of items in queue + int pending; // number of pending items (in progress or in pool list) + pthread_mutex_t result_m; + pthread_cond_t result_avail_c; +} t_results_queue; + + +/* + * Creates a worker pool of length qsize with tsize worker threads. + * + * Returns pool pointer on success; + * NULL on failure + */ +t_pool *t_pool_init(int qsize, int tsize); + +/* + * Adds an item to the work pool. + * + * FIXME: Maybe return 1,0,-1 and distinguish between job dispathed vs + * result returned. Ie rather than blocking on full queue we're permitted + * to return early on "result available" event too. + * Caller would then have a while loop around t_pool_dispatch. + * Or, return -1 and set errno to E_AGAIN to indicate job not yet submitted. + * + * Returns 0 on success + * -1 on failure + */ +int t_pool_dispatch(t_pool *p, t_results_queue *q, + void *(*func)(void *arg), void *arg); +int t_pool_dispatch2(t_pool *p, t_results_queue *q, + void *(*func)(void *arg), void *arg, int nonblock); + +/* + * Flushes the pool, but doesn't exit. This simply drains the queue and + * ensures all worker threads have finished their current task. + * + * Returns 0 on success; + * -1 on failure + */ +int t_pool_flush(t_pool *p); + +/* + * Destroys a thread pool. If 'kill' is true the threads are terminated now, + * otherwise they are joined into the main thread so they will finish their + * current work load. + * + * Use t_pool_destroy(p,0) after a t_pool_flush(p) on a normal shutdown or + * t_pool_destroy(p,1) to quickly exit after a fatal error. + */ +void t_pool_destroy(t_pool *p, int kill); + +/* + * Pulls a result off the head of the result queue. Caller should + * free it (and any internals as appropriate) after use. This doesn't + * wait for a result to be present. + * + * Results will be returned in strict order. + * + * Returns t_pool_result pointer if a result is ready. + * NULL if not. + */ +t_pool_result *t_pool_next_result(t_results_queue *q); +t_pool_result *t_pool_next_result_wait(t_results_queue *q); + +/* + * Frees a result 'r' and if free_data is true also frees + * the internal r->data result too. + */ +void t_pool_delete_result(t_pool_result *r, int free_data); + +/* + * Initialises a results queue. + * + * Results queue pointer on success; + * NULL on failure + */ +t_results_queue *t_results_queue_init(void); + +/* Deallocates memory for a results queue */ +void t_results_queue_destroy(t_results_queue *q); + +/* + * Returns true if there are no items on the finished results queue and + * also none still pending. + */ +int t_pool_results_queue_empty(t_results_queue *q); + +/* + * Returns the number of completed jobs on the results queue. + */ +int t_pool_results_queue_len(t_results_queue *q); + +/* + * Returns the number of completed jobs plus the number queued up to run. + */ +int t_pool_results_queue_sz(t_results_queue *q); + +#endif /* _THREAD_POOL_H_ */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/vlen.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/vlen.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,432 @@\n+/*\n+Author: James Bonfield (jkb@sanger.ac.uk)\n+\n+Copyright (c) 1995-1996 MEDICAL RESEARCH COUNCIL\n+All rights reserved\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1 Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2 Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF \n+MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or \n+promote products derived from this software without specific prior written \n+permission.\n+\n+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR \n+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; \n+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON \n+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT \n+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS \n+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+ */\n+\n+/*\n+Copyright (c) 2004, 2009, 2011-2012 Genome Research Ltd.\n+\n+Author: James Bonfield <jkb@sanger.ac.uk>\n+\n+Redistribution and use in source and binary forms, with or without \n+modification, are permitted provided that the following conditions are met:\n+\n+ 1. Redistributions of source code must retain the above copyright notice, \n+this list of conditions and the following disclaimer.\n+\n+ 2. Redistributions in binary form must reproduce the above copyright notice, \n+this list of conditions and the following disclaimer in the documentation \n+and/or other materials provided with the distribution.\n+\n+ 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger\n+Institute nor the names of its contributors may be used to endorse or promote\n+products derived from this software without specific prior written permission.\n+\n+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND \n+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED \n+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE \n+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE\n+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\n+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n+*/\n+\n+#ifdef HAVE_CONFIG_H\n+#include "io_lib_config.h"\n+#endif\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <stdarg.h>\n+#include <sys/types.h>\n+#include <string.h>\n+\n+#include "cram/vlen.h"\n+#include "cram/os.h"\n+\n+#ifndef MAX\n+#define MAX(a,b) ((a)>(b)?(a):(b))\n+#endif\n+\n+#ifndef ABS\n+#define ABS(a) ((a)>0?(a):-(a))\n+#endif\n+\n+/* #define DEBUG_printf(a,n) printf(a,n) */\n+#define DEBUG_printf(a,n)\n+\n+/*\n+ * vlen: 27/10/95 written by James Bonfield, jkb@mrc-lmb.cam.ac.uk\n+ *\n+ * Given sprintf style of arguments this routine returns the maximum\n+ * size of buffer needed to allocate to use with sprintf. It errs on\n+ * the side of caution by being simplistic in its approach: we assume\n+ * all numbers are of maxi'..b'-\' + 6 + \'.\' + \'E[+-]xxx\' == 13.\n+\t\t */\n+\t\tlen += MAX(conv_len, 13);\n+\t\tbreak;\n+\n+\t case \'p\':\n+\t\tl = (long)va_arg(ap, void *);\n+\t\t/*\n+\t\t * Max pointer is 64bits == 16 chars (on alpha),\n+\t\t * == 20 with + "0x".\n+\t\t */\n+\t\tDEBUG_printf("%p", (void *)l);\n+\t\tlen += MAX(conv_len, 20);\n+\t\tbreak;\n+\n+\t case \'n\':\n+\t\t/* produces no output */\n+\t\tbreak;\n+\n+\t case \'s\': {\n+\t\tchar *s = (char *)va_arg(ap, char *);\n+\t\tDEBUG_printf("%s", s);\n+\n+\t\tif (!conv_len2) {\n+\t\t len += MAX(conv_len, (int)strlen(s));\n+\t\t} else {\n+\t\t len += conv_len;\n+\t\t}\n+\t\tbreak;\n+\t }\n+\n+\t default:\n+\t\t/* wchar_t types of \'C\' and \'S\' aren\'t supported */\n+\t\tDEBUG_printf("Arg is %c\\n", *cp);\n+\t }\n+\t \n+\t}\n+\n+\tcase \'\\0\':\n+\t break;\n+\n+\tdefault:\n+\t DEBUG_printf("%c", *cp);\n+\t len++;\n+\t}\n+ }\n+\n+ va_end(ap);\n+\n+ return len+1; /* one for the null character */\n+}\n+\n+#if 0\n+int main() {\n+ int l;\n+ char buf[10000];\n+\n+ sprintf(buf, "d: %d\\n", 500);\n+ l = flen("d: %d\\n", 500);\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "");\n+ l = flen("");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%s\\n","test");\n+ l = flen("%s\\n", "test");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%c\\n", \'a\');\n+ l = flen("%c\\n", \'a\');\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%31.30f\\n", -9999.99);\n+ l = flen("%31.30f\\n", -9999.99);\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%f\\n", -1e308);\n+ l = flen("%f\\n", -1e308);\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%.9f\\n", -1e308);\n+ l = flen("%.9f\\n", -1e308);\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%10.20f\\n", -1.999222333);\n+ l = flen("%10.20f\\n", -1.999222333);\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%#g\\n", -3.14159265358e-222);\n+ l = flen("%#g\\n", -3.1415927e-222);\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%e\\n", -123456789123456789.1);\n+ l = flen("%e\\n", -123456789123456789.1);\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%c %f %d %s %c %g %ld %s\\n", \'a\', 3.1, 9, "one", \'b\', 4.2, 9, "two");\n+ l = flen("%c %f %d %s %c %g %ld %s\\n", \'a\', 3.1, 9, "one", \'b\', 4.2, 9, "two");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%*.*e %*c\\n", 10, 5, 9.0, 20, \'x\');\n+ l = flen("%*.*e %*c\\n", 10, 5, 9.0, 20, \'x\');\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%10c\\n", \'z\');\n+ l = flen("%10c\\n", \'z\');\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%.10c\\n", \'z\');\n+ l = flen("%.10c\\n", \'z\');\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%10d\\n", \'z\');\n+ l = flen("%10d\\n", \'z\');\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%.10d\\n", \'z\');\n+ l = flen("%.10d\\n", \'z\');\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%10%\\n");\n+ l = flen("%10%\\n");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%.10%\\n");\n+ l = flen("%.10%\\n");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%s\\n", "0123456789");\n+ l = flen("%s\\n", "0123456789");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%5s\\n", "0123456789");\n+ l = flen("%5s\\n", "0123456789");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%50s\\n", "0123456789");\n+ l = flen("%50s\\n", "0123456789");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%.5s\\n", "0123456789");\n+ l = flen("%.5s\\n", "0123456789");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%.50s\\n", "0123456789");\n+ l = flen("%.50s\\n", "0123456789");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%5.50s\\n", "0123456789");\n+ l = flen("%5.50s\\n", "0123456789");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ sprintf(buf, "%50.5s\\n", "0123456789");\n+ l = flen("%50.5s\\n", "0123456789");\n+ printf("%d %d\\n\\n", strlen(buf), l);\n+\n+ return 0;\n+}\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/vlen.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/vlen.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,48 @@ +/* +Author: James Bonfield (jkb@sanger.ac.uk) + +Copyright (c) 1995-1996 MEDICAL RESEARCH COUNCIL +All rights reserved + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1 Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2 Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3 Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF +MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or +promote products derived from this software without specific prior written +permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _VLEN_H_ +#define _VLEN_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +extern int vflen(char *fmt, va_list ap); +extern int flen(char *fmt, ...); + +#ifdef __cplusplus +} +#endif + +#endif /* _VLEN_H_ */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/zfio.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/zfio.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,185 @@ +/* +Copyright (c) 2009-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "io_lib_config.h" +#endif + +#include <stdlib.h> +#include <unistd.h> + +#include "cram/os.h" +#include "cram/zfio.h" + +/* ------------------------------------------------------------------------ */ +/* Some wrappers around FILE * vs gzFile *, allowing for either */ + +/* + * gzopen() works on both compressed and uncompressed data, but it has + * a significant performance hit even for uncompressed data (tested as + * 25s using FILE* to 46s via gzOpen and 66s via gzOpen when gzipped). + * + * Hence we use our own wrapper 'zfp' which is a FILE* when uncompressed + * and gzFile* when compressed. This also means we could hide bzopen in + * there too if desired. + */ + +off_t zftello(zfp *zf) { + return zf->fp ? ftello(zf->fp) : -1; +} + +int zfseeko(zfp *zf, off_t offset, int whence) { + return zf->fp ? fseeko(zf->fp, offset, whence) : -1; +} + + +/* + * A wrapper for either fgets or gzgets depending on what has been + * opened. + */ +char *zfgets(char *line, int size, zfp *zf) { + if (zf->fp) + return fgets(line, size, zf->fp); + else + return gzgets(zf->gz, line, size); +} + +/* + * A wrapper for either fputs or gzputs depending on what has been + * opened. + */ +int zfputs(char *line, zfp *zf) { + if (zf->fp) + return fputs(line, zf->fp); + else + return gzputs(zf->gz, line) ? 0 : EOF; +} + +/* + * Peeks at and returns the next character without consuming it from the + * input. (Ie a combination of getc and ungetc). + */ +int zfpeek(zfp *zf) { + int c; + + if (zf->fp) { + c = getc(zf->fp); + if (c != EOF) + ungetc(c, zf->fp); + } else { + c = gzgetc(zf->gz); + if (c != EOF) + gzungetc(c, zf->gz); + } + + return c; +} + +/* A replacement for either feof of gzeof */ +int zfeof(zfp *zf) { + return zf->fp ? feof(zf->fp) : gzeof(zf->gz); +} + +/* A replacement for either fopen or gzopen */ +zfp *zfopen(const char *path, const char *mode) { + char path2[1024]; + zfp *zf; + + if (!(zf = (zfp *)malloc(sizeof(*zf)))) + return NULL; + zf->fp = NULL; + zf->gz = NULL; + + /* Try normal fopen */ + if (mode[0] != 'z' && mode[1] != 'z' && + NULL != (zf->fp = fopen(path, mode))) { + unsigned char magic[2]; + if (2 != fread(magic, 1, 2, zf->fp)) { + free(zf); + return NULL; + } + if (!(magic[0] == 0x1f && + magic[1] == 0x8b)) { + fseeko(zf->fp, 0, SEEK_SET); + return zf; + } + + fclose(zf->fp); + zf->fp = NULL; + } + +#ifdef HAVE_POPEN + /* + * I've no idea why, by gzgets is VERY slow, maybe because it handles + * arbitrary seeks. + * popen to gzip -cd is 3 times faster though. + */ + if (*mode == 'w') { + } else { + if (access(path, R_OK) == 0) { + sprintf(path2, "gzip -cd < %.*s", 1000, path); + if (NULL != (zf->fp = popen(path2, "r"))) + return zf; + } + + sprintf(path2, "gzip -cd < %.*s.gz", 1000, path); + if (NULL != (zf->fp = popen(path2, "r"))) + return zf; + + printf("Failed on %s\n", path); + } else { + sprintf(path2, "gzip > %.*s", 1000, path); + if (NULL != (zf->fp = popen(path2, "w"))) + return zf; + } + + printf("Failed on %s\n", path); + } +#else + /* Gzopen instead */ + if ((zf->gz = gzopen(path, mode))) + return zf; + + sprintf(path2, "%.*s.gz", 1020, path); + if ((zf->gz = gzopen(path2, mode))) + return zf; +#endif + + perror(path); + + free(zf); + return NULL; +} + +int zfclose(zfp *zf) { + int r = (zf->fp) ? fclose(zf->fp) : gzclose(zf->gz); + free(zf); + return r; +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/cram/zfio.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/cram/zfio.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,54 @@ +/* +Copyright (c) 2009-2013 Genome Research Ltd. +Author: James Bonfield <jkb@sanger.ac.uk> + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + + 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger +Institute nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _ZFIO_H_ +#define _ZFIO_H_ + +#include <stdio.h> +#include <zlib.h> + +/* + * Either a gzFile or a FILE. + */ +typedef struct { + FILE *fp; + gzFile gz; +} zfp; + +off_t zftello(zfp *zf); +int zfseeko(zfp *zf, off_t offset, int whence); +char *zfgets(char *line, int size, zfp *zf); +int zfputs(char *line, zfp *zf); +zfp *zfopen(const char *path, const char *mode); +int zfclose(zfp *zf); +int zfpeek(zfp *zf); +int zfeof(zfp *zf); + +#endif /* _ZFIO_H_ */ |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/faidx.5 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/faidx.5 Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,147 @@ +'\" t +.TH faidx 5 "August 2013" "htslib" "Bioinformatics formats" +.SH NAME +faidx \- an index enabling random access to FASTA files +.\" +.\" Copyright (C) 2013 Genome Research Ltd. +.\" +.\" Author: John Marshall <jm18@sanger.ac.uk> +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +.SH SYNOPSIS +.IR file.fa .fai, +.IR file.fasta .fai +.SH DESCRIPTION +Using an \fBfai index\fP file in conjunction with a FASTA file containing +reference sequences enables efficient access to arbitrary regions within +those reference sequences. +The index file typically has the same filename as the corresponding FASTA +file, with \fB.fai\fP appended. +.P +An \fBfai index\fP file is a text file consisting of lines each with +five TAB-delimited columns: +.TS +lbl. +NAME Name of this reference sequence +LENGTH Total length of this reference sequence, in bases +OFFSET Offset within the FASTA file of this sequence's first base +LINEBASES The number of bases on each line +LINEWIDTH The number of bytes in each line, including the newline +.TE +.P +The \fBNAME\fP and \fBLENGTH\fP columns contain the same +data as would appear in the \fBSN\fP and \fBLN\fP fields of a +SAM \fB@SQ\fP header for the same reference sequence. +.P +The \fBOFFSET\fP column contains the offset within the FASTA file, in bytes +starting from zero, of the first base of this reference sequence, i.e., of +the character following the newline at the end of the "\fB>\fP" header line. +Typically the lines of a \fBfai index\fP file appear in the order in which the +reference sequences appear in the FASTA file, so \fB.fai\fP files are typically +sorted according to this column. +.P +The \fBLINEBASES\fP column contains the number of bases in each of the sequence +lines that form the body of this reference sequence, apart from the final line +which may be shorter. +The \fBLINEWIDTH\fP column contains the number of \fIbytes\fP in each of +the sequence lines (except perhaps the final line), thus differing from +\fBLINEBASES\fP in that it also counts the bytes forming the line terminator. +.SS FASTA Files +In order to be indexed with \fBsamtools faidx\fP, a FASTA file must be a text +file of the form +.LP +.RS +.RI > name +.RI [ description ...] +.br +ATGCATGCATGCATGCATGCATGCATGCAT +.br +GCATGCATGCATGCATGCATGCATGCATGC +.br +ATGCAT +.br +.RI > name +.RI [ description ...] +.br +ATGCATGCATGCAT +.br +GCATGCATGCATGC +.br +[...] +.RE +.LP +In particular, each reference sequence must be "well-formatted", i.e., all +of its sequence lines must be the same length, apart from the final sequence +line which may be shorter. +(While this sequence line length must be the same within each sequence, +it may vary between different reference sequences in the same FASTA file.) +.P +This also means that although the FASTA file may have Unix- or Windows-style +or other line termination, the newline characters present must be consistent, +at least within each reference sequence. +.P +The \fBsamtools\fP implementation uses the first word of the "\fB>\fP" header +line text (i.e., up to the first whitespace character) as the \fBNAME\fP column. +At present, there may be no whitespace between the +">" character and the \fIname\fP. +.SH EXAMPLE +For example, given this FASTA file +.LP +.RS +>one +.br +ATGCATGCATGCATGCATGCATGCATGCAT +.br +GCATGCATGCATGCATGCATGCATGCATGC +.br +ATGCAT +.br +>two another chromosome +.br +ATGCATGCATGCAT +.br +GCATGCATGCATGC +.br +.RE +.LP +formatted with Unix-style (LF) line termination, the corresponding fai index +would be +.RS +.TS +lnnnn. +one 66 5 30 31 +two 28 98 14 15 +.TE +.RE +.LP +If the FASTA file were formatted with Windows-style (CR-LF) line termination, +the fai index would be +.RS +.TS +lnnnn. +one 66 6 30 32 +two 28 103 14 16 +.TE +.RE +.SH SEE ALSO +.IR samtools (1) +.TP +http://en.wikipedia.org/wiki/FASTA_format +Further description of the FASTA format |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/faidx.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/faidx.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,475 @@\n+/* faidx.c -- FASTA random access.\n+\n+ Copyright (C) 2008, 2009, 2013-2015 Genome Research Ltd.\n+ Portions copyright (C) 2011 Broad Institute.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <ctype.h>\n+#include <string.h>\n+#include <stdlib.h>\n+#include <stdio.h>\n+#include <stdint.h>\n+\n+#include "htslib/bgzf.h"\n+#include "htslib/faidx.h"\n+#include "htslib/hfile.h"\n+#include "htslib/khash.h"\n+\n+typedef struct {\n+ int32_t line_len, line_blen;\n+ int64_t len;\n+ uint64_t offset;\n+} faidx1_t;\n+KHASH_MAP_INIT_STR(s, faidx1_t)\n+\n+struct __faidx_t {\n+ BGZF *bgzf;\n+ int n, m;\n+ char **name;\n+ khash_t(s) *hash;\n+};\n+\n+#ifndef kroundup32\n+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))\n+#endif\n+\n+static inline void fai_insert_index(faidx_t *idx, const char *name, int len, int line_len, int line_blen, uint64_t offset)\n+{\n+ khint_t k;\n+ int ret;\n+ faidx1_t t;\n+ if (idx->n == idx->m) {\n+ idx->m = idx->m? idx->m<<1 : 16;\n+ idx->name = (char**)realloc(idx->name, sizeof(char*) * idx->m);\n+ }\n+ idx->name[idx->n] = strdup(name);\n+ k = kh_put(s, idx->hash, idx->name[idx->n], &ret);\n+ t.len = len; t.line_len = line_len; t.line_blen = line_blen; t.offset = offset;\n+ kh_value(idx->hash, k) = t;\n+ ++idx->n;\n+}\n+\n+faidx_t *fai_build_core(BGZF *bgzf)\n+{\n+ char *name;\n+ int c;\n+ int l_name, m_name;\n+ int line_len, line_blen, state;\n+ int l1, l2;\n+ faidx_t *idx;\n+ uint64_t offset;\n+ int64_t len;\n+\n+ idx = (faidx_t*)calloc(1, sizeof(faidx_t));\n+ idx->hash = kh_init(s);\n+ name = 0; l_name = m_name = 0;\n+ len = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;\n+ while ( (c=bgzf_getc(bgzf))>=0 ) {\n+ if (c == \'\\n\') { // an empty line\n+ if (state == 1) {\n+ offset = bgzf_utell(bgzf);\n+ continue;\n+ } else if ((state == 0 && len < 0) || state == 2) continue;\n+ }\n+ if (c == \'>\') { // fasta header\n+ if (len >= 0)\n+ fai_insert_index(idx, name, len, line_len, line_blen, offset);\n+ l_name = 0;\n+ while ( (c=bgzf_getc(bgzf))>=0 && !isspace(c)) {\n+ if (m_name < l_name + 2) {\n+ m_name = l_name + 2;\n+ kroundup32(m_name);\n+ name = (char*)realloc(name, m_name);\n+ }\n+ name[l_name++] = c;\n+ }\n+ name[l_name] = \'\\0\';\n+ if ( c<0 ) {\n+ fprintf(stderr, "[fai_build_core] the last entry has no sequence\\n");\n+ free(name); fai_destroy(idx);\n+ return 0;\n+ }\n+ if (c != \'\\n\') while ( (c=bgzf_getc(bgzf))>=0 && c != \'\\n\');\n+ state = 1; len = 0;\n+ offset = bgzf_utell(bgzf);\n+ } else {\n+ if (state == 3) {\n+ '..b'; i >= 0; --i) if (s[i] == \':\') break; // look for colon from the end\n+ if (i >= 0) name_end = i;\n+ if (name_end < l) { // check if this is really the end\n+ int n_hyphen = 0;\n+ for (i = name_end + 1; i < l; ++i) {\n+ if (s[i] == \'-\') ++n_hyphen;\n+ else if (!isdigit(s[i]) && s[i] != \',\') break;\n+ }\n+ if (i < l || n_hyphen > 1) name_end = l; // malformated region string; then take str as the name\n+ s[name_end] = 0;\n+ iter = kh_get(s, h, s);\n+ if (iter == kh_end(h)) { // cannot find the sequence name\n+ iter = kh_get(s, h, str); // try str as the name\n+ if (iter == kh_end(h)) {\n+ *len = 0;\n+ free(s); return 0;\n+ } else s[name_end] = \':\', name_end = l;\n+ }\n+ } else iter = kh_get(s, h, str);\n+ if(iter == kh_end(h)) {\n+ fprintf(stderr, "[fai_fetch] Warning - Reference %s not found in FASTA file, returning empty sequence\\n", str);\n+ free(s);\n+ *len = -2;\n+ return 0;\n+ };\n+ val = kh_value(h, iter);\n+ // parse the interval\n+ if (name_end < l) {\n+ for (i = k = name_end + 1; i < l; ++i)\n+ if (s[i] != \',\') s[k++] = s[i];\n+ s[k] = 0;\n+ beg = atoi(s + name_end + 1);\n+ for (i = name_end + 1; i != k; ++i) if (s[i] == \'-\') break;\n+ end = i < k? atoi(s + i + 1) : val.len;\n+ if (beg > 0) --beg;\n+ } else beg = 0, end = val.len;\n+ if (beg >= val.len) beg = val.len;\n+ if (end >= val.len) end = val.len;\n+ if (beg > end) beg = end;\n+ free(s);\n+\n+ // now retrieve the sequence\n+ int ret = bgzf_useek(fai->bgzf, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET);\n+ if ( ret<0 )\n+ {\n+ *len = -1;\n+ fprintf(stderr, "[fai_fetch] Error: fai_fetch failed. (Seeking in a compressed, .gzi unindexed, file?)\\n");\n+ return NULL;\n+ }\n+ l = 0;\n+ s = (char*)malloc(end - beg + 2);\n+ while ( (c=bgzf_getc(fai->bgzf))>=0 && l < end - beg )\n+ if (isgraph(c)) s[l++] = c;\n+ s[l] = \'\\0\';\n+ *len = l;\n+ return s;\n+}\n+\n+int faidx_fetch_nseq(const faidx_t *fai)\n+{\n+ return fai->n;\n+}\n+\n+int faidx_nseq(const faidx_t *fai)\n+{\n+ return fai->n;\n+}\n+\n+const char *faidx_iseq(const faidx_t *fai, int i)\n+{\n+ return fai->name[i];\n+}\n+\n+int faidx_seq_len(const faidx_t *fai, const char *seq)\n+{\n+ khint_t k = kh_get(s, fai->hash, seq);\n+ if ( k == kh_end(fai->hash) ) return -1;\n+ return kh_val(fai->hash, k).len;\n+}\n+\n+char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len)\n+{\n+ int l, c;\n+ khiter_t iter;\n+ faidx1_t val;\n+ char *seq=NULL;\n+\n+ // Adjust position\n+ iter = kh_get(s, fai->hash, c_name);\n+ if (iter == kh_end(fai->hash))\n+ {\n+ *len = -2;\n+ fprintf(stderr, "[fai_fetch_seq] The sequence \\"%s\\" not found\\n", c_name);\n+ return NULL;\n+ }\n+ val = kh_value(fai->hash, iter);\n+ if(p_end_i < p_beg_i) p_beg_i = p_end_i;\n+ if(p_beg_i < 0) p_beg_i = 0;\n+ else if(val.len <= p_beg_i) p_beg_i = val.len - 1;\n+ if(p_end_i < 0) p_end_i = 0;\n+ else if(val.len <= p_end_i) p_end_i = val.len - 1;\n+\n+ // Now retrieve the sequence\n+ int ret = bgzf_useek(fai->bgzf, val.offset + p_beg_i / val.line_blen * val.line_len + p_beg_i % val.line_blen, SEEK_SET);\n+ if ( ret<0 )\n+ {\n+ *len = -1;\n+ fprintf(stderr, "[fai_fetch_seq] Error: fai_fetch failed. (Seeking in a compressed, .gzi unindexed, file?)\\n");\n+ return NULL;\n+ }\n+ l = 0;\n+ seq = (char*)malloc(p_end_i - p_beg_i + 2);\n+ while ( (c=bgzf_getc(fai->bgzf))>=0 && l < p_end_i - p_beg_i + 1)\n+ if (isgraph(c)) seq[l++] = c;\n+ seq[l] = \'\\0\';\n+ *len = l;\n+ return seq;\n+}\n+\n+int faidx_has_seq(const faidx_t *fai, const char *seq)\n+{\n+ khiter_t iter = kh_get(s, fai->hash, seq);\n+ if (iter == kh_end(fai->hash)) return 0;\n+ return 1;\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/hfile.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/hfile.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,547 @@\n+/* hfile.c -- buffered low-level input/output streams.\n+\n+ Copyright (C) 2013-2015 Genome Research Ltd.\n+\n+ Author: John Marshall <jm18@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <errno.h>\n+\n+#include "htslib/hfile.h"\n+#include "hfile_internal.h"\n+\n+/* hFILE fields are used as follows:\n+\n+ char *buffer; // Pointer to the start of the I/O buffer\n+ char *begin; // First not-yet-read character / unused position\n+ char *end; // First unfilled/unfillable position\n+ char *limit; // Pointer to the first position past the buffer\n+\n+ const hFILE_backend *backend; // Methods to refill/flush I/O buffer\n+\n+ off_t offset; // Offset within the stream of buffer position 0\n+ int at_eof:1; // For reading, whether EOF has been seen\n+ int has_errno; // Error number from the last failure on this stream\n+\n+For reading, begin is the first unread character in the buffer and end is the\n+first unfilled position:\n+\n+ -----------ABCDEFGHIJKLMNO---------------\n+ ^buffer ^begin ^end ^limit\n+\n+For writing, begin is the first unused position and end is unused so remains\n+equal to buffer:\n+\n+ ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------\n+ ^buffer ^begin ^limit\n+ ^end\n+\n+Thus if begin > end then there is a non-empty write buffer, if begin < end\n+then there is a non-empty read buffer, and if begin == end then both buffers\n+are empty. In all cases, the stream\'s file position indicator corresponds\n+to the position pointed to by begin. */\n+\n+hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)\n+{\n+ hFILE *fp = (hFILE *) malloc(struct_size);\n+ if (fp == NULL) goto error;\n+\n+ if (capacity == 0) capacity = 32768;\n+ // FIXME For now, clamp input buffer sizes so mpileup doesn\'t eat memory\n+ if (strchr(mode, \'r\') && capacity > 32768) capacity = 32768;\n+\n+ fp->buffer = (char *) malloc(capacity);\n+ if (fp->buffer == NULL) goto error;\n+\n+ fp->begin = fp->end = fp->buffer;\n+ fp->limit = &fp->buffer[capacity];\n+\n+ fp->offset = 0;\n+ fp->at_eof = 0;\n+ fp->has_errno = 0;\n+ return fp;\n+\n+error:\n+ hfile_destroy(fp);\n+ return NULL;\n+}\n+\n+void hfile_destroy(hFILE *fp)\n+{\n+ int save = errno;\n+ if (fp) free(fp->buffer);\n+ free(fp);\n+ errno = save;\n+}\n+\n+static inline int writebuffer_is_nonempty(hFILE *fp)\n+{\n+ return fp->begin > fp->end;\n+}\n+\n+/* Refills the read buffer from the backend (once, so may only partially\n+ fill the buffer), returning the number of additional characters read\n+ (which might be 0), or negative when an error occurred. */\n+static ssize_t refill_buffer(hFILE *fp)\n+{\n+ ssize_t n;\n+\n+ // Move any unread characters to the start of the buffer\n+ if (fp->begin > fp->buffer) {\n+ fp->offset += fp->begin - fp->buffer;\n+ memmove(fp->buffer, fp->begin, fp->end - fp->'..b' if (fd < 0) goto error;\n+\n+ fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));\n+ if (fp == NULL) goto error;\n+\n+ fp->fd = fd;\n+ fp->is_socket = 0;\n+ fp->base.backend = &fd_backend;\n+ return &fp->base;\n+\n+error:\n+ if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }\n+ hfile_destroy((hFILE *) fp);\n+ return NULL;\n+}\n+\n+hFILE *hdopen(int fd, const char *mode)\n+{\n+ hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));\n+ if (fp == NULL) return NULL;\n+\n+ fp->fd = fd;\n+ fp->is_socket = (strchr(mode, \'s\') != NULL);\n+ fp->base.backend = &fd_backend;\n+ return &fp->base;\n+}\n+\n+static hFILE *hopen_fd_stdinout(const char *mode)\n+{\n+ int fd = (strchr(mode, \'r\') != NULL)? STDIN_FILENO : STDOUT_FILENO;\n+ // TODO Set binary mode (for Windows)\n+ return hdopen(fd, mode);\n+}\n+\n+int hfile_oflags(const char *mode)\n+{\n+ int rdwr = 0, flags = 0;\n+ const char *s;\n+ for (s = mode; *s; s++)\n+ switch (*s) {\n+ case \'r\': rdwr = O_RDONLY; break;\n+ case \'w\': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break;\n+ case \'a\': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break;\n+ case \'+\': rdwr = O_RDWR; break;\n+ default: break;\n+ }\n+\n+#ifdef O_BINARY\n+ flags |= O_BINARY;\n+#endif\n+\n+ return rdwr | flags;\n+}\n+\n+\n+/*********************\n+ * In-memory backend *\n+ *********************/\n+\n+typedef struct {\n+ hFILE base;\n+ const char *buffer;\n+ size_t length, pos;\n+} hFILE_mem;\n+\n+static ssize_t mem_read(hFILE *fpv, void *buffer, size_t nbytes)\n+{\n+ hFILE_mem *fp = (hFILE_mem *) fpv;\n+ size_t avail = fp->length - fp->pos;\n+ if (nbytes > avail) nbytes = avail;\n+ memcpy(buffer, fp->buffer + fp->pos, nbytes);\n+ fp->pos += nbytes;\n+ return nbytes;\n+}\n+\n+static off_t mem_seek(hFILE *fpv, off_t offset, int whence)\n+{\n+ hFILE_mem *fp = (hFILE_mem *) fpv;\n+ size_t absoffset = (offset >= 0)? offset : -offset;\n+ size_t origin;\n+\n+ switch (whence) {\n+ case SEEK_SET: origin = 0; break;\n+ case SEEK_CUR: origin = fp->pos; break;\n+ case SEEK_END: origin = fp->length; break;\n+ default: errno = EINVAL; return -1;\n+ }\n+\n+ if ((offset < 0 && absoffset > origin) ||\n+ (offset >= 0 && absoffset > fp->length - origin)) {\n+ errno = EINVAL;\n+ return -1;\n+ }\n+\n+ fp->pos = origin + offset;\n+ return fp->pos;\n+}\n+\n+static int mem_close(hFILE *fpv)\n+{\n+ return 0;\n+}\n+\n+static const struct hFILE_backend mem_backend =\n+{\n+ mem_read, NULL, mem_seek, NULL, mem_close\n+};\n+\n+static hFILE *hopen_mem(const char *data, const char *mode)\n+{\n+ // TODO Implement write modes, which will require memory allocation\n+ if (strchr(mode, \'r\') == NULL) { errno = EINVAL; return NULL; }\n+\n+ hFILE_mem *fp = (hFILE_mem *) hfile_init(sizeof (hFILE_mem), mode, 0);\n+ if (fp == NULL) return NULL;\n+\n+ fp->buffer = data;\n+ fp->length = strlen(data);\n+ fp->pos = 0;\n+ fp->base.backend = &mem_backend;\n+ return &fp->base;\n+}\n+\n+\n+/******************************\n+ * hopen() backend dispatcher *\n+ ******************************/\n+\n+hFILE *hopen(const char *fname, const char *mode)\n+{\n+ if (strncmp(fname, "http://", 7) == 0 ||\n+ strncmp(fname, "ftp://", 6) == 0) return hopen_net(fname, mode);\n+#ifdef HAVE_IRODS\n+ else if (strncmp(fname, "irods:", 6) == 0) return hopen_irods(fname, mode);\n+#endif\n+ else if (strncmp(fname, "data:", 5) == 0) return hopen_mem(fname + 5, mode);\n+ else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);\n+ else return hopen_fd(fname, mode);\n+}\n+\n+int hisremote(const char *fname)\n+{\n+ // FIXME Make a new backend entry to return this\n+ if (strncmp(fname, "http://", 7) == 0 ||\n+ strncmp(fname, "https://", 8) == 0 ||\n+ strncmp(fname, "ftp://", 6) == 0) return 1;\n+#ifdef HAVE_IRODS\n+ else if (strncmp(fname, "irods:", 6) == 0) return 1;\n+#endif\n+ else return 0;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/hfile_internal.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/hfile_internal.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,76 @@ +/* hfile_internal.h -- internal parts of low-level input/output streams. + + Copyright (C) 2013-2015 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HFILE_INTERNAL_H +#define HFILE_INTERNAL_H + +#include "htslib/hfile.h" + +struct hFILE_backend { + /* As per read(2), returning the number of bytes read (possibly 0) or + negative (and setting errno) on errors. Front-end code will call this + repeatedly if necessary to attempt to get the desired byte count. */ + ssize_t (*read)(hFILE *fp, void *buffer, size_t nbytes) HTS_RESULT_USED; + + /* As per write(2), returning the number of bytes written or negative (and + setting errno) on errors. Front-end code will call this repeatedly if + necessary until the desired block is written or an error occurs. */ + ssize_t (*write)(hFILE *fp, const void *buffer, size_t nbytes) + HTS_RESULT_USED; + + /* As per lseek(2), returning the resulting offset within the stream or + negative (and setting errno) on errors. */ + off_t (*seek)(hFILE *fp, off_t offset, int whence) HTS_RESULT_USED; + + /* Performs low-level flushing, if any, e.g., fsync(2); for writing streams + only. Returns 0 for success or negative (and sets errno) on errors. */ + int (*flush)(hFILE *fp) HTS_RESULT_USED; + + /* Closes the underlying stream (for output streams, the buffer will + already have been flushed), returning 0 for success or negative (and + setting errno) on errors, as per close(2). */ + int (*close)(hFILE *fp) HTS_RESULT_USED; +}; + +/* These are called from the hopen() dispatcher, and should call hfile_init() + to malloc a struct "derived" from hFILE and initialise it appropriately, + including setting base.backend to their own backend vector. */ +hFILE *hopen_irods(const char *filename, const char *mode); +hFILE *hopen_net(const char *filename, const char *mode); + +/* May be called by hopen_*() functions to decode a fopen()-style mode into + open(2)-style flags. */ +int hfile_oflags(const char *mode); + +/* Must be called by hopen_*() functions to allocate the hFILE struct and set + up its base. Capacity is a suggested buffer size (e.g., via fstat(2)) + or 0 for a default-sized buffer. */ +hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity); + +/* May be called by hopen_*() functions to undo the effects of hfile_init() + in the event opening the stream subsequently fails. (This is safe to use + even if fp is NULL. This takes care to preserve errno.) */ +void hfile_destroy(hFILE *fp); + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/hfile_irods.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/hfile_irods.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,243 @@ +/* hfile_irods.c -- iRODS backend for low-level file streams. + + Copyright (C) 2013, 2015 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include "hfile_internal.h" + +#include <rcConnect.h> +#include <dataObjOpen.h> +#include <dataObjRead.h> +#include <dataObjWrite.h> +#include <dataObjFsync.h> +#include <dataObjLseek.h> +#include <dataObjClose.h> + +typedef struct { + hFILE base; + int descriptor; +} hFILE_irods; + +static int status_errno(int status) +{ + switch (status) { + case SYS_NO_API_PRIV: return EACCES; + case SYS_MALLOC_ERR: return ENOMEM; + case SYS_OUT_OF_FILE_DESC: return ENFILE; + case SYS_BAD_FILE_DESCRIPTOR: return EBADF; + case CAT_NO_ROWS_FOUND: return ENOENT; + case CATALOG_ALREADY_HAS_ITEM_BY_THAT_NAME: return EEXIST; + default: return EIO; + } +} + +static void set_errno(int status) +{ + int err = abs(status) % 1000; + errno = err? err : status_errno(status); +} + +static struct { + rcComm_t *conn; + rodsEnv env; +} irods = { NULL }; + +static void irods_exit() +{ + (void) rcDisconnect(irods.conn); + irods.conn = NULL; +} + +static int irods_init() +{ + rErrMsg_t err; + int ret; + + ret = getRodsEnv(&irods.env); + if (ret < 0) goto error; + + irods.conn = rcConnect(irods.env.rodsHost, irods.env.rodsPort, + irods.env.rodsUserName, irods.env.rodsZone, + NO_RECONN, &err); + if (irods.conn == NULL) { ret = err.status; goto error; } + + if (strcmp(irods.env.rodsUserName, PUBLIC_USER_NAME) != 0) { + ret = clientLogin(irods.conn); + if (ret != 0) goto error; + } + + // In the unlikely event atexit() fails, it's better to succeed here and + // carry on and do the I/O; then eventually when the program exits, we'll + // merely disconnect from the server uncleanly, as if we had aborted. + (void) atexit(irods_exit); + + return 0; + +error: + if (irods.conn) { (void) rcDisconnect(irods.conn); } + irods.conn = NULL; + set_errno(ret); + return -1; +} + +static ssize_t irods_read(hFILE *fpv, void *buffer, size_t nbytes) +{ + hFILE_irods *fp = (hFILE_irods *) fpv; + openedDataObjInp_t args; + bytesBuf_t buf; + int ret; + + memset(&args, 0, sizeof args); + args.l1descInx = fp->descriptor; + args.len = nbytes; + + buf.buf = buffer; + buf.len = nbytes; + + ret = rcDataObjRead(irods.conn, &args, &buf); + if (ret < 0) set_errno(ret); + return ret; +} + +static ssize_t irods_write(hFILE *fpv, const void *buffer, size_t nbytes) +{ + hFILE_irods *fp = (hFILE_irods *) fpv; + openedDataObjInp_t args; + bytesBuf_t buf; + int ret; + + memset(&args, 0, sizeof args); + args.l1descInx = fp->descriptor; + args.len = nbytes; + + buf.buf = (void *) buffer; // ...the iRODS API is not const-correct here + buf.len = nbytes; + + ret = rcDataObjWrite(irods.conn, &args, &buf); + if (ret < 0) set_errno(ret); + return ret; +} + +static off_t irods_seek(hFILE *fpv, off_t offset, int whence) +{ + hFILE_irods *fp = (hFILE_irods *) fpv; + openedDataObjInp_t args; + fileLseekOut_t *out = NULL; + int ret; + + memset(&args, 0, sizeof args); + args.l1descInx = fp->descriptor; + args.offset = offset; + args.whence = whence; + + ret = rcDataObjLseek(irods.conn, &args, &out); + + if (out) { offset = out->offset; free(out); } + else offset = -1; + if (ret < 0) { set_errno(ret); return -1; } + return offset; +} + +static int irods_flush(hFILE *fpv) +{ +// FIXME rcDataObjFsync() doesn't seem to function as expected. +// For now, flush is a no-op: see https://github.com/samtools/htslib/issues/168 +#if 0 + hFILE_irods *fp = (hFILE_irods *) fpv; + openedDataObjInp_t args; + int ret; + + memset(&args, 0, sizeof args); + args.l1descInx = fp->descriptor; + + ret = rcDataObjFsync(irods.conn, &args); + if (ret < 0) set_errno(ret); + return ret; +#endif + return 0; +} + +static int irods_close(hFILE *fpv) +{ + hFILE_irods *fp = (hFILE_irods *) fpv; + openedDataObjInp_t args; + int ret; + + memset(&args, 0, sizeof args); + args.l1descInx = fp->descriptor; + + ret = rcDataObjClose(irods.conn, &args); + if (ret < 0) set_errno(ret); + return ret; +} + +static const struct hFILE_backend irods_backend = +{ + irods_read, irods_write, irods_seek, irods_flush, irods_close +}; + +hFILE *hopen_irods(const char *filename, const char *mode) +{ + hFILE_irods *fp; + rodsPath_t path; + dataObjInp_t args; + int ret; + + // Initialise the iRODS connection if this is the first use. + if (irods.conn == NULL) { if (irods_init() < 0) return NULL; } + + if (strncmp(filename, "irods:", 6) == 0) filename += 6; + else { errno = EINVAL; return NULL; } + + fp = (hFILE_irods *) hfile_init(sizeof (hFILE_irods), mode, 0); + if (fp == NULL) return NULL; + + strncpy(path.inPath, filename, MAX_NAME_LEN-1); + path.inPath[MAX_NAME_LEN-1] = '\0'; + + ret = parseRodsPath(&path, &irods.env); + if (ret < 0) goto error; + + memset(&args, 0, sizeof args); + strcpy(args.objPath, path.outPath); + args.openFlags = hfile_oflags(mode); + if (args.openFlags & O_CREAT) { + args.createMode = 0666; + addKeyVal(&args.condInput, DEST_RESC_NAME_KW,irods.env.rodsDefResource); + } + + ret = rcDataObjOpen(irods.conn, &args); + if (ret < 0) goto error; + fp->descriptor = ret; + + fp->base.backend = &irods_backend; + return &fp->base; + +error: + hfile_destroy((hFILE *) fp); + set_errno(ret); + return NULL; +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/hfile_net.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/hfile_net.c Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,99 @@ +/* hfile_net.c -- network backend for low-level input/output streams. + + Copyright (C) 2013-2014 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include <stdlib.h> +#include <errno.h> + +#include "hfile_internal.h" + +#include "htslib/knetfile.h" + +typedef struct { + hFILE base; + knetFile *netfp; +} hFILE_net; + +static int net_inited = 0; + +#ifdef _WIN32 +static void net_exit(void) +{ + knet_win32_destroy(); +} +#endif + +static int net_init(void) +{ +#ifdef _WIN32 + if (knet_win32_init() != 0) return -1; + + // In the unlikely event atexit() fails, it's better to succeed here and + // carry on and do the I/O; then eventually when the program exits, we'll + // merely have failed to clean up properly, as if we had aborted. + (void) atexit(net_exit); +#endif + + net_inited = 1; + return 0; +} + +static ssize_t net_read(hFILE *fpv, void *buffer, size_t nbytes) +{ + hFILE_net *fp = (hFILE_net *) fpv; + return knet_read(fp->netfp, buffer, nbytes); +} + +static off_t net_seek(hFILE *fpv, off_t offset, int whence) +{ + hFILE_net *fp = (hFILE_net *) fpv; + return knet_seek(fp->netfp, offset, whence); +} + +static int net_close(hFILE *fpv) +{ + hFILE_net *fp = (hFILE_net *) fpv; + return knet_close(fp->netfp); +} + +static const struct hFILE_backend net_backend = +{ + net_read, NULL, net_seek, NULL, net_close +}; + +hFILE *hopen_net(const char *filename, const char *mode) +{ + hFILE_net *fp; + + // Do any networking initialisation if this is the first use. + if (! net_inited) { if (net_init() < 0) return NULL; } + + fp = (hFILE_net *) hfile_init(sizeof (hFILE_net), mode, 0); + if (fp == NULL) return NULL; + + fp->netfp = knet_open(filename, mode); + if (fp->netfp == NULL) { hfile_destroy((hFILE *) fp); return NULL; } + + fp->base.backend = &net_backend; + return &fp->base; +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/hts.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/hts.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1665 @@\n+/* hts.c -- format-neutral I/O, indexing, and iterator API functions.\n+\n+ Copyright (C) 2008, 2009, 2012-2015 Genome Research Ltd.\n+ Copyright (C) 2012, 2013 Broad Institute.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <zlib.h>\n+#include <ctype.h>\n+#include <stdio.h>\n+#include <string.h>\n+#include <stdlib.h>\n+#include <limits.h>\n+#include <fcntl.h>\n+#include <errno.h>\n+#include <sys/stat.h>\n+#include "htslib/bgzf.h"\n+#include "htslib/hts.h"\n+#include "cram/cram.h"\n+#include "htslib/hfile.h"\n+#include "version.h"\n+\n+#include "htslib/kseq.h"\n+#define KS_BGZF 1\n+#if KS_BGZF\n+ // bgzf now supports gzip-compressed files, the gzFile branch can be removed\n+ KSTREAM_INIT2(, BGZF*, bgzf_read, 65536)\n+#else\n+ KSTREAM_INIT2(, gzFile, gzread, 16384)\n+#endif\n+\n+#include "htslib/khash.h"\n+KHASH_INIT2(s2i,, kh_cstr_t, int64_t, 1, kh_str_hash_func, kh_str_hash_equal)\n+\n+int hts_verbose = 3;\n+\n+const char *hts_version()\n+{\n+ return HTS_VERSION;\n+}\n+\n+const unsigned char seq_nt16_table[256] = {\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,\n+ 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,\n+ 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,\n+ 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,\n+ 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,\n+\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+ 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15\n+};\n+\n+const char seq_nt16_str[] = "=ACMGRSVTWYHKDBN";\n+\n+const int seq_nt16_int[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };\n+\n+/**********************\n+ *** Basic file I/O ***\n+ **********************/\n+\n+static enum htsFormatCategory format_category(enum htsExactFormat fmt)\n+{\n+ switch (fmt) {\n+ case bam:\n+ case sam:\n+ case cram:\n+ return sequence_data;\n+\n+ case vcf:\n+ case bcf:\n+ return variant_data;\n+\n+ case bai:\n+ case crai:\n+ case csi:\n+ case gzi:\n+ case tbi:\n+ return index_file;\n+\n+ case bed:\n+ return region_list;\n+\n+ case unknown_format:\n+ case binary_format:\n+ case text_format:\n+ case format_maximum:\n+ break;\n+ }\n+\n+ return unknown_category;\n+}\n+\n+// Decompress up to ten or so bytes by peeking at the file, which must be\n+// positioned at the start of a GZIP block.\n+static size_t decompress_peek(hFILE *fp, unsigned char *dest, size_t destsize)\n+{\n+ // Typically at most'..b'urn ret;\n+ }\n+ if (iter->off == 0) return -1;\n+ for (;;) {\n+ if (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk\n+ if (iter->i == iter->n_off - 1) { ret = -1; break; } // no more chunks\n+ if (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek\n+ bgzf_seek(fp, iter->off[iter->i+1].u, SEEK_SET);\n+ iter->curr_off = bgzf_tell(fp);\n+ }\n+ ++iter->i;\n+ }\n+ if ((ret = iter->readrec(fp, data, r, &tid, &beg, &end)) >= 0) {\n+ iter->curr_off = bgzf_tell(fp);\n+ if (tid != iter->tid || beg >= iter->end) { // no need to proceed\n+ ret = -1; break;\n+ } else if (end > iter->beg && iter->end > beg) {\n+ iter->curr_tid = tid;\n+ iter->curr_beg = beg;\n+ iter->curr_end = end;\n+ return ret;\n+ }\n+ } else break; // end of file or error\n+ }\n+ iter->finished = 1;\n+ return ret;\n+}\n+\n+/**********************\n+ *** Retrieve index ***\n+ **********************/\n+\n+static char *test_and_fetch(const char *fn)\n+{\n+ FILE *fp;\n+ if (hisremote(fn)) {\n+ const int buf_size = 1 * 1024 * 1024;\n+ hFILE *fp_remote;\n+ uint8_t *buf;\n+ int l;\n+ const char *p;\n+ for (p = fn + strlen(fn) - 1; p >= fn; --p)\n+ if (*p == \'/\') break;\n+ ++p; // p now points to the local file name\n+ // Attempt to open local file first\n+ if ((fp = fopen((char*)p, "rb")) != 0)\n+ {\n+ fclose(fp);\n+ return (char*)p;\n+ }\n+ // Attempt to open remote file. Stay quiet on failure, it is OK to fail when trying first .csi then .tbi index.\n+ if ((fp_remote = hopen(fn, "r")) == 0) return 0;\n+ if ((fp = fopen(p, "w")) == 0) {\n+ if (hts_verbose >= 1) fprintf(stderr, "[E::%s] fail to create file \'%s\' in the working directory\\n", __func__, p);\n+ hclose_abruptly(fp_remote);\n+ return 0;\n+ }\n+ if (hts_verbose >= 3) fprintf(stderr, "[M::%s] downloading file \'%s\' to local directory\\n", __func__, fn);\n+ buf = (uint8_t*)calloc(buf_size, 1);\n+ while ((l = hread(fp_remote, buf, buf_size)) > 0) fwrite(buf, 1, l, fp);\n+ free(buf);\n+ fclose(fp);\n+ if (hclose(fp_remote) != 0) fprintf(stderr, "[E::%s] fail to close remote file \'%s\'\\n", __func__, fn);\n+ return (char*)p;\n+ } else {\n+ if ((fp = fopen(fn, "rb")) == 0) return 0;\n+ fclose(fp);\n+ return (char*)fn;\n+ }\n+}\n+\n+char *hts_idx_getfn(const char *fn, const char *ext)\n+{\n+ int i, l_fn, l_ext;\n+ char *fnidx, *ret;\n+ l_fn = strlen(fn); l_ext = strlen(ext);\n+ fnidx = (char*)calloc(l_fn + l_ext + 1, 1);\n+ strcpy(fnidx, fn); strcpy(fnidx + l_fn, ext);\n+ if ((ret = test_and_fetch(fnidx)) == 0) {\n+ for (i = l_fn - 1; i > 0; --i)\n+ if (fnidx[i] == \'.\') break;\n+ strcpy(fnidx + i, ext);\n+ ret = test_and_fetch(fnidx);\n+ }\n+ if (ret == 0) {\n+ free(fnidx);\n+ return 0;\n+ }\n+ l_fn = strlen(ret);\n+ memmove(fnidx, ret, l_fn + 1);\n+ return fnidx;\n+}\n+\n+hts_idx_t *hts_idx_load(const char *fn, int fmt)\n+{\n+ char *fnidx;\n+ hts_idx_t *idx;\n+ fnidx = hts_idx_getfn(fn, ".csi");\n+ if (fnidx) fmt = HTS_FMT_CSI;\n+ else fnidx = hts_idx_getfn(fn, fmt == HTS_FMT_BAI? ".bai" : ".tbi");\n+ if (fnidx == 0) return 0;\n+\n+ // Check that the index file is up to date, the main file might have changed\n+ struct stat stat_idx,stat_main;\n+ if ( !stat(fn, &stat_main) && !stat(fnidx, &stat_idx) )\n+ {\n+ if ( stat_idx.st_mtime < stat_main.st_mtime )\n+ fprintf(stderr, "Warning: The index file is older than the data file: %s\\n", fnidx);\n+ }\n+ idx = hts_idx_load_local(fnidx, fmt);\n+ free(fnidx);\n+ return idx;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htsfile.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htsfile.1 Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,71 @@ +.TH htsfile 1 "3 February 2015" "htslib-1.2.1" "Bioinformatics tools" +.SH NAME +htsfile \- identify high-throughput sequencing data files +.\" +.\" Copyright (C) 2015 Genome Research Ltd. +.\" +.\" Author: John Marshall <jm18@sanger.ac.uk> +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +.SH SYNOPSIS +.B htsfile +.RB [ -chH ] +.IR FILE ... +.SH DESCRIPTION +The \fBhtsfile\fR utility attempts to identify what kind of high-throughput +sequencing data files the specified files are, and provides minimal viewing +capabilities for some kinds of data file. +.P +It can identify sequencing data files such as SAM, BAM, and CRAM; +variant calling data files such as VCF and BCF; +index files used to index these data files; +and compressed versions of many of them. +.P +For each \fIFILE\fR given, \fBhtsfile\fP prints a description of the file +format determined, using similar keyword conventions to \fBfile\fP(1): +"text" indicates a textual file that can probably be viewed on a terminal; +"data" indicates binary data; +"sequence", "variant calling", and "index" indicate different categories of +data file. +When it can be identified, the name of the particular file format (such as +"BAM" or "VCF") is printed at the start of the description. +.P +When used to view file contents as text, \fBhtsfile\fP can optionally show +only headers or only data records, but has no other filtering capabilities. +Use \fBsamtools\fR or \fBbcftools\fR if you need more extensive viewing or +filtering capabilities. +.P +The following options are accepted: +.TP 4n +.BR -c ", " --view +Instead of identifying the specified files, display a textual representation +of their contents on standard output. +.TP +.BR -h ", " --header-only +Display data file headers only. +Implies \fB--view\fR. +.TP +.BR -H ", " --no-header +When viewing files, display data records only. +.PP +.SH SEE ALSO +.IR bcftools (1), +.IR file (1), +.IR samtools (1) |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htsfile.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htsfile.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,168 @@ +/* htsfile.c -- file identifier and minimal viewer. + + Copyright (C) 2014-2015 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <getopt.h> +#include <unistd.h> + +#include "htslib/hfile.h" +#include "htslib/hts.h" +#include "htslib/sam.h" +#include "htslib/vcf.h" + +enum { identify, view_headers, view_all } mode = identify; +int show_headers = 1; + +static htsFile *dup_stdout(const char *mode) +{ + int fd = dup(STDOUT_FILENO); + hFILE *hfp = (fd >= 0)? hdopen(fd, mode) : NULL; + return hfp? hts_hopen(hfp, "-", mode) : NULL; +} + +static int view_sam(hFILE *hfp, const char *filename) +{ + samFile *in = hts_hopen(hfp, filename, "r"); + if (in == NULL) return 0; + samFile *out = dup_stdout("w"); + bam_hdr_t *hdr = sam_hdr_read(in); + + if (show_headers) sam_hdr_write(out, hdr); + if (mode == view_all) { + bam1_t *b = bam_init1(); + while (sam_read1(in, hdr, b) >= 0) + sam_write1(out, hdr, b); + bam_destroy1(b); + } + + bam_hdr_destroy(hdr); + hts_close(out); + hts_close(in); + return 1; +} + +static int view_vcf(hFILE *hfp, const char *filename) +{ + vcfFile *in = hts_hopen(hfp, filename, "r"); + if (in == NULL) return 0; + vcfFile *out = dup_stdout("w"); + bcf_hdr_t *hdr = bcf_hdr_read(in); + + if (show_headers) bcf_hdr_write(out, hdr); + if (mode == view_all) { + bcf1_t *rec = bcf_init(); + while (bcf_read(in, hdr, rec) >= 0) + bcf_write(out, hdr, rec); + bcf_destroy(rec); + } + + bcf_hdr_destroy(hdr); + hts_close(out); + hts_close(in); + return 1; +} + +static void usage(FILE *fp, int status) +{ + fprintf(fp, +"Usage: htsfile [-chH] FILE...\n" +"Options:\n" +" -c, --view Write textual form of FILEs to standard output\n" +" -h, --header-only Display only headers in view mode, not records\n" +" -H, --no-header Suppress header display in view mode\n"); + exit(status); +} + +int main(int argc, char **argv) +{ + static const struct option options[] = { + { "header-only", no_argument, NULL, 'h' }, + { "no-header", no_argument, NULL, 'H' }, + { "view", no_argument, NULL, 'c' }, + { "help", no_argument, NULL, '?' }, + { "version", no_argument, NULL, 1 }, + { NULL, 0, NULL, 0 } + }; + + int status = EXIT_SUCCESS; + int c, i; + while ((c = getopt_long(argc, argv, "chH?", options, NULL)) >= 0) + switch (c) { + case 'c': mode = view_all; break; + case 'h': mode = view_headers; show_headers = 1; break; + case 'H': show_headers = 0; break; + case 1: + printf( +"htsfile (htslib) %s\n" +"Copyright (C) 2015 Genome Research Ltd.\n", + hts_version()); + exit(EXIT_SUCCESS); + break; + case '?': usage(stdout, EXIT_SUCCESS); break; + default: usage(stderr, EXIT_FAILURE); break; + } + + if (optind == argc) usage(stderr, EXIT_FAILURE); + + for (i = optind; i < argc; i++) { + htsFormat fmt; + hFILE *fp = hopen(argv[i], "r"); + if (fp == NULL) { + fprintf(stderr, "htsfile: can't open \"%s\": %s\n", argv[i], strerror(errno)); + status = EXIT_FAILURE; + continue; + } + + if (hts_detect_format(fp, &fmt) < 0) { + fprintf(stderr, "htsfile: detecting \"%s\" format failed: %s\n", argv[i], strerror(errno)); + hclose_abruptly(fp); + status = EXIT_FAILURE; + continue; + } + + if (mode == identify) { + char *description = hts_format_description(&fmt); + printf("%s:\t%s\n", argv[i], description); + free(description); + } + else + switch (fmt.category) { + case sequence_data: if (view_sam(fp, argv[i])) fp = NULL; break; + case variant_data: if (view_vcf(fp, argv[i])) fp = NULL; break; + default: + fprintf(stderr, "htsfile: can't view %s: unknown format\n", argv[i]); + status = EXIT_FAILURE; + break; + } + + if (fp && hclose(fp) < 0) { + fprintf(stderr, "htsfile: closing %s failed\n", argv[i]); + status = EXIT_FAILURE; + } + } + + return status; +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib.mk Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,150 @@ +# Makefile rules useful for third-party code using htslib's public API. +# +# Copyright (C) 2013-2015 Genome Research Ltd. +# +# Author: John Marshall <jm18@sanger.ac.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# The makefile fragment included below provides variables that can be used +# to express dependencies on headers supplied by an in-development htslib. +# If your source file foo.c #includes <htslib/hts.h> and <htslib/kstring.h>, +# you can write the correct prerequisites for foo.o as: +# +# HTSDIR = <path to htslib top-level directory> +# include $(HTSDIR)/htslib.mk +# +# foo.o: foo.c $(htslib_hts_h) $(HTSDIR)/htslib/kstring.h +# +# Variables are not provided for k*.h, as those never include other headers. + +HTSPREFIX = $(HTSDIR)/ +include $(HTSDIR)/htslib_vars.mk + +# Rules for rebuilding an in-development htslib's static and shared libraries. +# If your program foo links with libhts, adding the appropriate prerequisite +# will cause the library to be rebuilt as necessary: +# +# foo: foo.o $(HTSDIR)/libhts.a +# +# or similarly if your target requires any of the tools supplied: +# +# bar.bed.bgz.tbi: bar.bed.bgz $(HTSDIR)/tabix +# $(HTSDIR)/tabix -p bed bar.bed.bgz + +HTSLIB_PUBLIC_HEADERS = \ + $(HTSDIR)/htslib/bgzf.h \ + $(HTSDIR)/htslib/faidx.h \ + $(HTSDIR)/htslib/hfile.h \ + $(HTSDIR)/htslib/hts.h \ + $(HTSDIR)/htslib/hts_defs.h \ + $(HTSDIR)/htslib/khash.h \ + $(HTSDIR)/htslib/klist.h \ + $(HTSDIR)/htslib/knetfile.h \ + $(HTSDIR)/htslib/kseq.h \ + $(HTSDIR)/htslib/ksort.h \ + $(HTSDIR)/htslib/kstring.h \ + $(HTSDIR)/htslib/regidx.h \ + $(HTSDIR)/htslib/sam.h \ + $(HTSDIR)/htslib/synced_bcf_reader.h \ + $(HTSDIR)/htslib/tbx.h \ + $(HTSDIR)/htslib/vcf.h \ + $(HTSDIR)/htslib/vcf_sweep.h \ + $(HTSDIR)/htslib/vcfutils.h + +HTSLIB_ALL = \ + $(HTSLIB_PUBLIC_HEADERS) \ + $(HTSDIR)/bgzf.c \ + $(HTSDIR)/faidx.c \ + $(HTSDIR)/hfile_internal.h \ + $(HTSDIR)/hfile.c \ + $(HTSDIR)/hfile_irods.c \ + $(HTSDIR)/hfile_net.c \ + $(HTSDIR)/hts.c \ + $(HTSDIR)/knetfile.c \ + $(HTSDIR)/kstring.c \ + $(HTSDIR)/regidx.c \ + $(HTSDIR)/sam.c \ + $(HTSDIR)/synced_bcf_reader.c \ + $(HTSDIR)/tbx.c \ + $(HTSDIR)/vcf.c \ + $(HTSDIR)/vcf_sweep.c \ + $(HTSDIR)/vcfutils.c \ + $(HTSDIR)/cram/cram.h \ + $(HTSDIR)/cram/cram_codecs.c \ + $(HTSDIR)/cram/cram_codecs.h \ + $(HTSDIR)/cram/cram_decode.c \ + $(HTSDIR)/cram/cram_decode.h \ + $(HTSDIR)/cram/cram_encode.c \ + $(HTSDIR)/cram/cram_encode.h \ + $(HTSDIR)/cram/cram_index.c \ + $(HTSDIR)/cram/cram_index.h \ + $(HTSDIR)/cram/cram_io.c \ + $(HTSDIR)/cram/cram_io.h \ + $(HTSDIR)/cram/cram_samtools.c \ + $(HTSDIR)/cram/cram_samtools.h \ + $(HTSDIR)/cram/cram_stats.c \ + $(HTSDIR)/cram/cram_stats.h \ + $(HTSDIR)/cram/cram_structs.h \ + $(HTSDIR)/cram/files.c \ + $(HTSDIR)/cram/mFILE.c \ + $(HTSDIR)/cram/mFILE.h \ + $(HTSDIR)/cram/md5.c \ + $(HTSDIR)/cram/md5.h \ + $(HTSDIR)/cram/misc.h \ + $(HTSDIR)/cram/open_trace_file.c \ + $(HTSDIR)/cram/open_trace_file.h \ + $(HTSDIR)/cram/os.h \ + $(HTSDIR)/cram/pooled_alloc.c \ + $(HTSDIR)/cram/pooled_alloc.h \ + $(HTSDIR)/cram/sam_header.c \ + $(HTSDIR)/cram/sam_header.h \ + $(HTSDIR)/cram/string_alloc.c \ + $(HTSDIR)/cram/string_alloc.h \ + $(HTSDIR)/cram/thread_pool.c \ + $(HTSDIR)/cram/thread_pool.h \ + $(HTSDIR)/cram/vlen.c \ + $(HTSDIR)/cram/vlen.h \ + $(HTSDIR)/cram/zfio.c \ + $(HTSDIR)/cram/zfio.h + +$(HTSDIR)/libhts.a: $(HTSLIB_ALL) + +cd $(HTSDIR) && $(MAKE) lib-static + +$(HTSDIR)/libhts.so $(HTSDIR)/libhts.dylib: $(HTSLIB_ALL) + +cd $(HTSDIR) && $(MAKE) lib-shared + +$(HTSDIR)/bgzip: $(HTSDIR)/bgzip.c $(HTSLIB_PUBLIC_HEADERS) + +cd $(HTSDIR) && $(MAKE) bgzip + +$(HTSDIR)/htsfile: $(HTSDIR)/htsfile.c $(HTSLIB_PUBLIC_HEADERS) + +cd $(HTSDIR) && $(MAKE) htsfile + +$(HTSDIR)/tabix: $(HTSDIR)/tabix.c $(HTSLIB_PUBLIC_HEADERS) + +cd $(HTSDIR) && $(MAKE) tabix + +# Rules for phony targets. You may wish to have your corresponding phony +# targets invoke these in addition to their own recipes: +# +# clean: clean-htslib + +clean-htslib install-htslib: + +cd $(HTSDIR) && $(MAKE) $(@:-htslib=) + +.PHONY: clean-htslib install-htslib |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib.pc.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib.pc.in Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,10 @@ +includedir=@includedir@ +libdir=@libdir@ + +Name: htslib +Description: C library for high-throughput sequencing data formats +Version: @PACKAGE_VERSION@ +Cflags: -I${includedir} +Libs: -L${libdir} -lhts +Libs.private: -L${libdir} -lhts -lm -lpthread +Requires.private: zlib |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/bgzf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/bgzf.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,315 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology\n+ 2011, 2012 Attractive Chaos <attractor@live.co.uk>\n+ Copyright (C) 2009, 2013, 2014 Genome Research Ltd\n+\n+ Permission is hereby granted, free of charge, to any person obtaining a copy\n+ of this software and associated documentation files (the "Software"), to deal\n+ in the Software without restriction, including without limitation the rights\n+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ copies of the Software, and to permit persons to whom the Software is\n+ furnished to do so, subject to the following conditions:\n+\n+ The above copyright notice and this permission notice shall be included in\n+ all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n+ THE SOFTWARE.\n+*/\n+\n+/* The BGZF library was originally written by Bob Handsaker from the Broad\n+ * Institute. It was later improved by the SAMtools developers. */\n+\n+#ifndef HTSLIB_BGZF_H\n+#define HTSLIB_BGZF_H\n+\n+#include <stdint.h>\n+#include <stdio.h>\n+#include <zlib.h>\n+#include <sys/types.h>\n+\n+#define BGZF_BLOCK_SIZE 0xff00 // make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE\n+#define BGZF_MAX_BLOCK_SIZE 0x10000\n+\n+#define BGZF_ERR_ZLIB 1\n+#define BGZF_ERR_HEADER 2\n+#define BGZF_ERR_IO 4\n+#define BGZF_ERR_MISUSE 8\n+\n+struct hFILE;\n+struct bgzf_mtaux_t;\n+typedef struct __bgzidx_t bgzidx_t;\n+\n+struct BGZF {\n+ int errcode:16, is_write:2, is_be:2, compress_level:9, is_compressed:2, is_gzip:1;\n+ int cache_size;\n+ int block_length, block_offset;\n+ int64_t block_address, uncompressed_address;\n+ void *uncompressed_block, *compressed_block;\n+ void *cache; // a pointer to a hash table\n+ struct hFILE *fp; // actual file handle\n+ struct bgzf_mtaux_t *mt; // only used for multi-threading\n+ bgzidx_t *idx; // BGZF index\n+ int idx_build_otf; // build index on the fly, set by bgzf_index_build_init()\n+ z_stream *gz_stream;// for gzip-compressed files\n+};\n+#ifndef HTS_BGZF_TYPEDEF\n+typedef struct BGZF BGZF;\n+#define HTS_BGZF_TYPEDEF\n+#endif\n+\n+#ifndef KSTRING_T\n+#define KSTRING_T kstring_t\n+typedef struct __kstring_t {\n+ size_t l, m;\n+ char *s;\n+} kstring_t;\n+#endif\n+\n+#ifdef __cplusplus\n+extern "C" {\n+#endif\n+\n+ /******************\n+ * Basic routines *\n+ ******************/\n+\n+ /**\n+ * Open an existing file descriptor for reading or writing.\n+ *\n+ * @param fd file descriptor\n+ * @param mode mode matching /[rwag][u0-9]+/: \'r\' for reading, \'w\' for\n+ * writing, \'a\' for appending, \'g\' for gzip rather than BGZF\n+ * compression (with \'w\' only), and digit specifies the zlib\n+ * compression level. \n+ * Note that there is a distinction between \'u\' and \'0\': the\n+ * first yields plain uncompressed output whereas the latter\n+ * outputs uncompressed data wrapped in the zlib format.\n+ * @return BGZF file handler; 0 on error\n+ */\n+ BGZF* bgzf_dopen(int fd, const char *mode);\n+\n+ #define bgzf_fdopen(fd, mode) bgzf_dopen((fd), (mode)) // for backward compatibility\n+\n+ /**\n+ * Open the specified file for reading or writing.\n+ */\n+ BGZF* bgzf_open(const char* path, const char *mode);\n+\n+ /**\n+ * Open an existing hFILE stream for reading or writing.\n+ */\n+ BGZF* bgzf_hopen(struct hFILE *fp, const char *mode);\n+\n+ /**\n'..b"nce);\n+\n+ /**\n+ * Check if the BGZF end-of-file (EOF) marker is present\n+ *\n+ * @param fp BGZF file handler opened for reading\n+ * @return 1 if the EOF marker is present and correct;\n+ * 2 if it can't be checked, e.g., because fp isn't seekable;\n+ * 0 if the EOF marker is absent;\n+ * -1 (with errno set) on error\n+ */\n+ int bgzf_check_EOF(BGZF *fp);\n+\n+ /**\n+ * Check if a file is in the BGZF format\n+ *\n+ * @param fn file name\n+ * @return 1 if _fn_ is BGZF; 0 if not or on I/O error\n+ */\n+ int bgzf_is_bgzf(const char *fn);\n+\n+ /*********************\n+ * Advanced routines *\n+ *********************/\n+\n+ /**\n+ * Set the cache size. Only effective when compiled with -DBGZF_CACHE.\n+ *\n+ * @param fp BGZF file handler\n+ * @param size size of cache in bytes; 0 to disable caching (default)\n+ */\n+ void bgzf_set_cache_size(BGZF *fp, int size);\n+\n+ /**\n+ * Flush the file if the remaining buffer size is smaller than _size_\n+ * @return 0 if flushing succeeded or was not needed; negative on error\n+ */\n+ int bgzf_flush_try(BGZF *fp, ssize_t size);\n+\n+ /**\n+ * Read one byte from a BGZF file. It is faster than bgzf_read()\n+ * @param fp BGZF file handler\n+ * @return byte read; -1 on end-of-file or error\n+ */\n+ int bgzf_getc(BGZF *fp);\n+\n+ /**\n+ * Read one line from a BGZF file. It is faster than bgzf_getc()\n+ *\n+ * @param fp BGZF file handler\n+ * @param delim delimitor\n+ * @param str string to write to; must be initialized\n+ * @return length of the string; 0 on end-of-file; negative on error\n+ */\n+ int bgzf_getline(BGZF *fp, int delim, kstring_t *str);\n+\n+ /**\n+ * Read the next BGZF block.\n+ */\n+ int bgzf_read_block(BGZF *fp);\n+\n+ /**\n+ * Enable multi-threading (only effective on writing and when the\n+ * library was compiled with -DBGZF_MT)\n+ *\n+ * @param fp BGZF file handler; must be opened for writing\n+ * @param n_threads #threads used for writing\n+ * @param n_sub_blks #blocks processed by each thread; a value 64-256 is recommended\n+ */\n+ int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks);\n+\n+\n+ /*******************\n+ * bgzidx routines *\n+ *******************/\n+\n+ /**\n+ * Position BGZF at the uncompressed offset\n+ *\n+ * @param fp BGZF file handler; must be opened for reading\n+ * @param uoffset file offset in the uncompressed data\n+ * @param where SEEK_SET supported atm\n+ *\n+ * Returns 0 on success and -1 on error.\n+ */\n+ int bgzf_useek(BGZF *fp, long uoffset, int where);\n+\n+ /**\n+ * Position in uncompressed BGZF\n+ *\n+ * @param fp BGZF file handler; must be opened for reading\n+ *\n+ * Returns the current offset on success and -1 on error.\n+ */\n+ long bgzf_utell(BGZF *fp);\n+\n+ /**\n+ * Tell BGZF to build index while compressing.\n+ *\n+ * @param fp BGZF file handler; can be opened for reading or writing.\n+ *\n+ * Returns 0 on success and -1 on error.\n+ */\n+ int bgzf_index_build_init(BGZF *fp);\n+\n+ /**\n+ * Load BGZF index\n+ *\n+ * @param fp BGZF file handler\n+ * @param bname base name\n+ * @param suffix suffix to add to bname (can be NULL)\n+ *\n+ * Returns 0 on success and -1 on error.\n+ */\n+ int bgzf_index_load(BGZF *fp, const char *bname, const char *suffix);\n+\n+ /**\n+ * Save BGZF index\n+ *\n+ * @param fp BGZF file handler\n+ * @param bname base name\n+ * @param suffix suffix to add to bname (can be NULL)\n+ *\n+ * Returns 0 on success and -1 on error.\n+ */\n+ int bgzf_index_dump(BGZF *fp, const char *bname, const char *suffix);\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif\n" |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/faidx.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/faidx.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,135 @@ +/* faidx.h -- FASTA random access. + + Copyright (C) 2008, 2009, 2013, 2014 Genome Research Ltd. + + Author: Heng Li <lh3@sanger.ac.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef HTSLIB_FAIDX_H +#define HTSLIB_FAIDX_H + +/*! + @header + + Index FASTA files and extract subsequence. + + The fai file index columns are: + - chromosome name + - chromosome length: number of bases + - offset: number of bytes to skip to get to the first base + from the beginning of the file, including the length + of the sequence description string (">chr ..\n") + - line length: number of bases per line (excluding \n) + - binary line length: number of bytes, including \n + + @copyright The Wellcome Trust Sanger Institute. + */ + +struct __faidx_t; +typedef struct __faidx_t faidx_t; + +#ifdef __cplusplus +extern "C" { +#endif + + /*! + @abstract Build index for a FASTA or bgzip-compressed FASTA file. + @param fn FASTA file name + @return 0 on success; or -1 on failure + @discussion File "fn.fai" will be generated. + */ + int fai_build(const char *fn); + + /*! + @abstract Destroy a faidx_t struct. + @param fai Pointer to the struct to be destroyed + */ + void fai_destroy(faidx_t *fai); + + /*! + @abstract Load index from "fn.fai". + @param fn File name of the FASTA file + */ + faidx_t *fai_load(const char *fn); + + /*! + @abstract Fetch the sequence in a region. + @param fai Pointer to the faidx_t struct + @param reg Region in the format "chr2:20,000-30,000" + @param len Length of the region; -2 if seq not present, -1 general error + @return Pointer to the sequence; null on failure + + @discussion The returned sequence is allocated by malloc family + and should be destroyed by end users by calling free() on it. + */ + char *fai_fetch(const faidx_t *fai, const char *reg, int *len); + + /*! + @abstract Fetch the number of sequences. + @param fai Pointer to the faidx_t struct + @return The number of sequences + */ + int faidx_fetch_nseq(const faidx_t *fai); + + /*! + @abstract Fetch the sequence in a region. + @param fai Pointer to the faidx_t struct + @param c_name Region name + @param p_beg_i Beginning position number (zero-based) + @param p_end_i End position number (zero-based) + @param len Length of the region; -2 if c_name not present, -1 general error + @return Pointer to the sequence; null on failure + + @discussion The returned sequence is allocated by malloc family + and should be destroyed by end users by calling free() on it. + */ + char *faidx_fetch_seq(const faidx_t *fai, const char *c_name, int p_beg_i, int p_end_i, int *len); + + /*! + @abstract Query if sequence is present + @param fai Pointer to the faidx_t struct + @param seq Sequence name + @return 1 if present or 0 if absent + */ + int faidx_has_seq(const faidx_t *fai, const char *seq); + + /*! + @abstract Return number of sequences in fai index + */ + int faidx_nseq(const faidx_t *fai); + + /*! + @abstract Return name of i-th sequence + */ + const char *faidx_iseq(const faidx_t *fai, int i); + + /*! + @abstract Return sequence length, -1 if not present + */ + int faidx_seq_len(const faidx_t *fai, const char *seq); + +#ifdef __cplusplus +} +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/hfile.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/hfile.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,212 @@ +/* hfile.h -- buffered low-level input/output streams. + + Copyright (C) 2013-2015 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_HFILE_H +#define HTSLIB_HFILE_H + +#include <string.h> + +#include <sys/types.h> + +#include "hts_defs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* These fields are declared here solely for the benefit of the inline functions + below. They may change in future releases. User code should not use them + directly; you should imagine that hFILE is an opaque incomplete type. */ +struct hFILE_backend; +typedef struct hFILE { + char *buffer, *begin, *end, *limit; + const struct hFILE_backend *backend; + off_t offset; + int at_eof:1; + int has_errno; +} hFILE; + +/*! + @abstract Open the named file or URL as a stream + @return An hFILE pointer, or NULL (with errno set) if an error occurred. +*/ +hFILE *hopen(const char *filename, const char *mode) HTS_RESULT_USED; + +/*! + @abstract Associate a stream with an existing open file descriptor + @return An hFILE pointer, or NULL (with errno set) if an error occurred. + @notes For socket descriptors (on Windows), mode should contain 's'. +*/ +hFILE *hdopen(int fd, const char *mode) HTS_RESULT_USED; + +/*! + @abstract Report whether the file name or URL denotes remote storage + @return 0 if local, 1 if remote. + @notes "Remote" means involving e.g. explicit network access, with the + implication that callers may wish to cache such files' contents locally. +*/ +int hisremote(const char *filename) HTS_RESULT_USED; + +/*! + @abstract Flush (for output streams) and close the stream + @return 0 if successful, or EOF (with errno set) if an error occurred. +*/ +int hclose(hFILE *fp) HTS_RESULT_USED; + +/*! + @abstract Close the stream, without flushing or propagating errors + @notes For use while cleaning up after an error only. Preserves errno. +*/ +void hclose_abruptly(hFILE *fp); + +/*! + @abstract Return the stream's error indicator + @return Non-zero (in fact, an errno value) if an error has occurred. + @notes This would be called herror() and return true/false to parallel + ferror(3), but a networking-related herror(3) function already exists. */ +static inline int herrno(hFILE *fp) +{ + return fp->has_errno; +} + +/*! + @abstract Clear the stream's error indicator +*/ +static inline void hclearerr(hFILE *fp) +{ + fp->has_errno = 0; +} + +/*! + @abstract Reposition the read/write stream offset + @return The resulting offset within the stream (as per lseek(2)), + or negative if an error occurred. +*/ +off_t hseek(hFILE *fp, off_t offset, int whence) HTS_RESULT_USED; + +/*! + @abstract Report the current stream offset + @return The offset within the stream, starting from zero. +*/ +static inline off_t htell(hFILE *fp) +{ + return fp->offset + (fp->begin - fp->buffer); +} + +/*! + @abstract Read one character from the stream + @return The character read, or EOF on end-of-file or error +*/ +static inline int hgetc(hFILE *fp) +{ + extern int hgetc2(hFILE *); + return (fp->end > fp->begin)? (unsigned char) *(fp->begin++) : hgetc2(fp); +} + +/*! + @abstract Peek at characters to be read without removing them from buffers + @param fp The file stream + @param buffer The buffer to which the peeked bytes will be written + @param nbytes The number of bytes to peek at; limited by the size of the + internal buffer, which could be as small as 4K. + @return The number of bytes peeked, which may be less than nbytes if EOF + is encountered; or negative, if there was an I/O error. + @notes The characters peeked at remain in the stream's internal buffer, + and will be returned by later hread() etc calls. +*/ +ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) HTS_RESULT_USED; + +/*! + @abstract Read a block of characters from the file + @return The number of bytes read, or negative if an error occurred. + @notes The full nbytes requested will be returned, except as limited + by EOF or I/O errors. +*/ +static inline ssize_t HTS_RESULT_USED +hread(hFILE *fp, void *buffer, size_t nbytes) +{ + extern ssize_t hread2(hFILE *, void *, size_t, size_t); + + size_t n = fp->end - fp->begin; + if (n > nbytes) n = nbytes; + memcpy(buffer, fp->begin, n); + fp->begin += n; + return (n == nbytes)? (ssize_t) n : hread2(fp, buffer, nbytes, n); +} + +/*! + @abstract Write a character to the stream + @return The character written, or EOF if an error occurred. +*/ +static inline int hputc(int c, hFILE *fp) +{ + extern int hputc2(int, hFILE *); + if (fp->begin < fp->limit) *(fp->begin++) = c; + else c = hputc2(c, fp); + return c; +} + +/*! + @abstract Write a string to the stream + @return 0 if successful, or EOF if an error occurred. +*/ +static inline int hputs(const char *text, hFILE *fp) +{ + extern int hputs2(const char *, size_t, size_t, hFILE *); + + size_t nbytes = strlen(text), n = fp->limit - fp->begin; + if (n > nbytes) n = nbytes; + memcpy(fp->begin, text, n); + fp->begin += n; + return (n == nbytes)? 0 : hputs2(text, nbytes, n, fp); +} + +/*! + @abstract Write a block of characters to the file + @return Either nbytes, or negative if an error occurred. + @notes In the absence of I/O errors, the full nbytes will be written. +*/ +static inline ssize_t HTS_RESULT_USED +hwrite(hFILE *fp, const void *buffer, size_t nbytes) +{ + extern ssize_t hwrite2(hFILE *, const void *, size_t, size_t); + + size_t n = fp->limit - fp->begin; + if (n > nbytes) n = nbytes; + memcpy(fp->begin, buffer, n); + fp->begin += n; + return (n==nbytes)? (ssize_t) n : hwrite2(fp, buffer, nbytes, n); +} + +/*! + @abstract For writing streams, flush buffered output to the underlying stream + @return 0 if successful, or EOF if an error occurred. +*/ +int hflush(hFILE *fp) HTS_RESULT_USED; + +#ifdef __cplusplus +} +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/hts.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/hts.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,456 @@\n+/* hts.h -- format-neutral I/O, indexing, and iterator API functions.\n+\n+ Copyright (C) 2012-2014 Genome Research Ltd.\n+ Copyright (C) 2012 Broad Institute.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#ifndef HTSLIB_HTS_H\n+#define HTSLIB_HTS_H\n+\n+#include <stddef.h>\n+#include <stdint.h>\n+\n+#ifndef HTS_BGZF_TYPEDEF\n+typedef struct BGZF BGZF;\n+#define HTS_BGZF_TYPEDEF\n+#endif\n+struct cram_fd;\n+struct hFILE;\n+\n+#ifndef KSTRING_T\n+#define KSTRING_T kstring_t\n+typedef struct __kstring_t {\n+ size_t l, m;\n+ char *s;\n+} kstring_t;\n+#endif\n+\n+#ifndef kroundup32\n+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))\n+#endif\n+\n+/**\n+ * hts_expand() - expands memory block pointed to by $ptr;\n+ * hts_expand0() the latter sets the newly allocated part to 0.\n+ *\n+ * @param n requested number of elements of type type_t\n+ * @param m size of memory allocated\n+ */\n+#define hts_expand(type_t, n, m, ptr) if ((n) > (m)) { \\\n+ (m) = (n); kroundup32(m); \\\n+ (ptr) = (type_t*)realloc((ptr), (m) * sizeof(type_t)); \\\n+ }\n+#define hts_expand0(type_t, n, m, ptr) if ((n) > (m)) { \\\n+ int t = (m); (m) = (n); kroundup32(m); \\\n+ (ptr) = (type_t*)realloc((ptr), (m) * sizeof(type_t)); \\\n+ memset(((type_t*)ptr)+t,0,sizeof(type_t)*((m)-t)); \\\n+ }\n+\n+/************\n+ * File I/O *\n+ ************/\n+\n+// Add new entries only at the end (but before the *_maximum entry)\n+// of these enums, as their numbering is part of the htslib ABI.\n+\n+enum htsFormatCategory {\n+ unknown_category,\n+ sequence_data, // Sequence data -- SAM, BAM, CRAM, etc\n+ variant_data, // Variant calling data -- VCF, BCF, etc\n+ index_file, // Index file associated with some data file\n+ region_list, // Coordinate intervals or regions -- BED, etc\n+ category_maximum = 32767\n+};\n+\n+enum htsExactFormat {\n+ unknown_format,\n+ binary_format, text_format,\n+ sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed,\n+ format_maximum = 32767\n+};\n+\n+enum htsCompression {\n+ no_compression, gzip, bgzf, custom,\n+ compression_maximum = 32767\n+};\n+\n+typedef struct htsFormat {\n+ enum htsFormatCategory category;\n+ enum htsExactFormat format;\n+ struct { short major, minor; } version;\n+ enum htsCompression compression;\n+ short compression_level; // currently unused\n+ void *specific; // currently unused\n+} htsFormat;\n+\n+// Maintainers note htsFile cannot be an opaque structure because some of its\n+// fields are part of libhts.so\'s ABI (hence these fields must not be moved):\n+// - fp is used in the public sam_itr_next()/etc macros\n+// - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1\n+// - is_write and is_cram are used directly in samtools <= 1.1\n+// - fp is used directly in samtools (up to and including current develop)\n+// - line is used directly in bcftools (up to and includ'..b' curr_beg, curr_end;\n+ uint64_t curr_off;\n+ hts_pair64_t *off;\n+ hts_readrec_func *readrec;\n+ struct {\n+ int n, m;\n+ int *a;\n+ } bins;\n+} hts_itr_t;\n+\n+#ifdef __cplusplus\n+extern "C" {\n+#endif\n+\n+ #define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7)\n+ #define hts_bin_parent(l) (((l) - 1) >> 3)\n+\n+ hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls);\n+ void hts_idx_destroy(hts_idx_t *idx);\n+ int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int is_mapped);\n+ void hts_idx_finish(hts_idx_t *idx, uint64_t final_offset);\n+\n+ void hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt);\n+ hts_idx_t *hts_idx_load(const char *fn, int fmt);\n+\n+ uint8_t *hts_idx_get_meta(hts_idx_t *idx, int *l_meta);\n+ void hts_idx_set_meta(hts_idx_t *idx, int l_meta, uint8_t *meta, int is_copy);\n+\n+ int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped);\n+ uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx);\n+\n+ const char *hts_parse_reg(const char *s, int *beg, int *end);\n+ hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);\n+ void hts_itr_destroy(hts_itr_t *iter);\n+\n+ typedef int (*hts_name2id_f)(void*, const char*);\n+ typedef const char *(*hts_id2name_f)(void*, int);\n+ typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);\n+\n+ hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec);\n+ int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data);\n+ const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values\n+\n+ /**\n+ * hts_file_type() - Convenience function to determine file type\n+ * DEPRECATED: This function has been replaced by hts_detect_format().\n+ * It and these FT_* macros will be removed in a future HTSlib release.\n+ */\n+ #define FT_UNKN 0\n+ #define FT_GZ 1\n+ #define FT_VCF 2\n+ #define FT_VCF_GZ (FT_GZ|FT_VCF)\n+ #define FT_BCF (1<<2)\n+ #define FT_BCF_GZ (FT_GZ|FT_BCF)\n+ #define FT_STDIN (1<<3)\n+ int hts_file_type(const char *fname);\n+\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+static inline int hts_reg2bin(int64_t beg, int64_t end, int min_shift, int n_lvls)\n+{\n+ int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7;\n+ for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l))\n+ if (beg>>s == end>>s) return t + (beg>>s);\n+ return 0;\n+}\n+\n+static inline int hts_bin_bot(int bin, int n_lvls)\n+{\n+ int l, b;\n+ for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); // compute the level of bin\n+ return (bin - hts_bin_first(l)) << (n_lvls - l) * 3;\n+}\n+\n+/**************\n+ * Endianness *\n+ **************/\n+\n+static inline int ed_is_big(void)\n+{\n+ long one= 1;\n+ return !(*((char *)(&one)));\n+}\n+static inline uint16_t ed_swap_2(uint16_t v)\n+{\n+ return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));\n+}\n+static inline void *ed_swap_2p(void *x)\n+{\n+ *(uint16_t*)x = ed_swap_2(*(uint16_t*)x);\n+ return x;\n+}\n+static inline uint32_t ed_swap_4(uint32_t v)\n+{\n+ v = ((v & 0x0000FFFFU) << 16) | (v >> 16);\n+ return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);\n+}\n+static inline void *ed_swap_4p(void *x)\n+{\n+ *(uint32_t*)x = ed_swap_4(*(uint32_t*)x);\n+ return x;\n+}\n+static inline uint64_t ed_swap_8(uint64_t v)\n+{\n+ v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);\n+ v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);\n+ return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);\n+}\n+static inline void *ed_swap_8p(void *x)\n+{\n+ *(uint64_t*)x = ed_swap_8(*(uint64_t*)x);\n+ return x;\n+}\n+\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/hts_defs.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/hts_defs.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,47 @@ +/* hts_defs.h -- Miscellaneous definitions. + + Copyright (C) 2013-2014 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_HTS_DEFS_H +#define HTSLIB_HTS_DEFS_H + +#if __clang__major__ >= 2 || __GNUC__ >= 3 +#define HTS_NORETURN __attribute__ ((__noreturn__)) +#else +#define HTS_NORETURN +#endif + +#if (defined __clang__ && __clang_major__ >= 3) || \ + (defined __GNUC__ && (__GNUC__ > 4 || (__GNUC__==4 && __GNUC_MINOR__ >= 5))) +#define HTS_RESULT_USED __attribute__ ((__warn_unused_result__)) +#else +#define HTS_RESULT_USED +#endif + +#if defined __clang__ || defined __GNUC__ +#define HTS_UNUSED __attribute__ ((__unused__)) +#else +#define HTS_UNUSED +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/kfunc.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/kfunc.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,75 @@ +/* The MIT License + + Copyright (C) 2010, 2013 Genome Research Ltd. + Copyright (C) 2011 Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef HTSLIB_KFUNC_H +#define HTSLIB_KFUNC_H + +/* Log gamma function + * \log{\Gamma(z)} + * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245 + */ +double kf_lgamma(double z); + +/* complementary error function + * \frac{2}{\sqrt{\pi}} \int_x^{\infty} e^{-t^2} dt + * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66 + */ +double kf_erfc(double x); + +/* The following computes regularized incomplete gamma functions. + * Formulas are taken from Wiki, with additional input from Numerical + * Recipes in C (for modified Lentz's algorithm) and AS245 + * (http://lib.stat.cmu.edu/apstat/245). + * + * A good online calculator is available at: + * + * http://www.danielsoper.com/statcalc/calc23.aspx + * + * It calculates upper incomplete gamma function, which equals + * kf_gammaq(s,z)*tgamma(s). + */ + +double kf_gammap(double s, double z); +double kf_gammaq(double s, double z); + +/* Regularized incomplete beta function. The method is taken from + * Numerical Recipe in C, 2nd edition, section 6.4. The following web + * page calculates the incomplete beta function, which equals + * kf_betai(a,b,x) * gamma(a) * gamma(b) / gamma(a+b): + * + * http://www.danielsoper.com/statcalc/calc36.aspx + */ +double kf_betai(double a, double b, double x); + +/* + * n11 n12 | n1_ + * n21 n22 | n2_ + * -----------+---- + * n_1 n_2 | n + */ +double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two); + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/khash.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/khash.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,619 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008, 2009, 2011 by Attractive Chaos <attractor@live.co.uk>\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/*\n+ An example:\n+\n+#include "khash.h"\n+KHASH_MAP_INIT_INT(32, char)\n+int main() {\n+\tint ret, is_missing;\n+\tkhiter_t k;\n+\tkhash_t(32) *h = kh_init(32);\n+\tk = kh_put(32, h, 5, &ret);\n+\tkh_value(h, k) = 10;\n+\tk = kh_get(32, h, 10);\n+\tis_missing = (k == kh_end(h));\n+\tk = kh_get(32, h, 5);\n+\tkh_del(32, h, k);\n+\tfor (k = kh_begin(h); k != kh_end(h); ++k)\n+\t\tif (kh_exist(h, k)) kh_value(h, k) = 1;\n+\tkh_destroy(32, h);\n+\treturn 0;\n+}\n+*/\n+\n+/*\n+ 2013-05-02 (0.2.8):\n+\n+\t* Use quadratic probing. When the capacity is power of 2, stepping function\n+\t i*(i+1)/2 guarantees to traverse each bucket. It is better than double\n+\t hashing on cache performance and is more robust than linear probing.\n+\n+\t In theory, double hashing should be more robust than quadratic probing.\n+\t However, my implementation is probably not for large hash tables, because\n+\t the second hash function is closely tied to the first hash function,\n+\t which reduce the effectiveness of double hashing.\n+\n+\tReference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php\n+\n+ 2011-12-29 (0.2.7):\n+\n+ * Minor code clean up; no actual effect.\n+\n+ 2011-09-16 (0.2.6):\n+\n+\t* The capacity is a power of 2. This seems to dramatically improve the\n+\t speed for simple keys. Thank Zilong Tan for the suggestion. Reference:\n+\n+\t - http://code.google.com/p/ulib/\n+\t - http://nothings.org/computer/judy/\n+\n+\t* Allow to optionally use linear probing which usually has better\n+\t performance for random input. Double hashing is still the default as it\n+\t is more robust to certain non-random input.\n+\n+\t* Added Wang\'s integer hash function (not used by default). This hash\n+\t function is more robust to certain non-random input.\n+\n+ 2011-02-14 (0.2.5):\n+\n+ * Allow to declare global functions.\n+\n+ 2009-09-26 (0.2.4):\n+\n+ * Improve portability\n+\n+ 2008-09-19 (0.2.3):\n+\n+\t* Corrected the example\n+\t* Improved interfaces\n+\n+ 2008-09-11 (0.2.2):\n+\n+\t* Improved speed a little in kh_put()\n+\n+ 2008-09-10 (0.2.1):\n+\n+\t* Added kh_clear()\n+\t* Fixed a compiling error\n+\n+ 2008-09-02 (0.2.0):\n+\n+\t* Changed to token concatenation which increases flexibility.\n+\n+ 2008-08-31 (0.1.2):\n+\n+\t* Fixed a bug in kh_get(), which has not been tested previously.\n+\n+ 2008-08-31 (0.1.1):\n+\n+\t* Added destructor\n+*/\n+\n+\n+#ifndef __AC_KHASH_H\n+#define __AC_KHASH_H\n+\n+/*!\n+ @header\n+\n+ Generic hash table library.\n+ */\n+\n+#define AC_VERSION_KHASH_H "0.2.8"\n+\n+#include <stdlib.h>\n+#include <string.h>\n+#include <limits.h>\n+\n+/* compiler specific configuration */\n+\n+#if UINT_MAX == 0xffffffffu\n+typedef unsigned int khint32_t;\n+#elif ULONG_MAX == 0xffffffffu\n+typedef unsigned long khint32_t;\n+#endif\n+\n+#if ULONG_MAX == ULLONG_MAX\n+typedef unsigned long khint64_t;\n+#else\n+typedef unsigned long long khint64_'..b'r to the bucket [khint_t]\n+ @return Value [type of values]\n+ @discussion For hash sets, calling this results in segfault.\n+ */\n+#define kh_val(h, x) ((h)->vals[x])\n+\n+/*! @function\n+ @abstract Alias of kh_val()\n+ */\n+#define kh_value(h, x) ((h)->vals[x])\n+\n+/*! @function\n+ @abstract Get the start iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return The start iterator [khint_t]\n+ */\n+#define kh_begin(h) (khint_t)(0)\n+\n+/*! @function\n+ @abstract Get the end iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return The end iterator [khint_t]\n+ */\n+#define kh_end(h) ((h)->n_buckets)\n+\n+/*! @function\n+ @abstract Get the number of elements in the hash table\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return Number of elements in the hash table [khint_t]\n+ */\n+#define kh_size(h) ((h)->size)\n+\n+/*! @function\n+ @abstract Get the number of buckets in the hash table\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return Number of buckets in the hash table [khint_t]\n+ */\n+#define kh_n_buckets(h) ((h)->n_buckets)\n+\n+/*! @function\n+ @abstract Iterate over the entries in the hash table\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param kvar Variable to which key will be assigned\n+ @param vvar Variable to which value will be assigned\n+ @param code Block of code to execute\n+ */\n+#define kh_foreach(h, kvar, vvar, code) { khint_t __i;\t\t\\\n+\tfor (__i = kh_begin(h); __i != kh_end(h); ++__i) {\t\t\\\n+\t\tif (!kh_exist(h,__i)) continue;\t\t\t\t\t\t\\\n+\t\t(kvar) = kh_key(h,__i);\t\t\t\t\t\t\t\t\\\n+\t\t(vvar) = kh_val(h,__i);\t\t\t\t\t\t\t\t\\\n+\t\tcode;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t} }\n+\n+/*! @function\n+ @abstract Iterate over the values in the hash table\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param vvar Variable to which value will be assigned\n+ @param code Block of code to execute\n+ */\n+#define kh_foreach_value(h, vvar, code) { khint_t __i;\t\t\\\n+\tfor (__i = kh_begin(h); __i != kh_end(h); ++__i) {\t\t\\\n+\t\tif (!kh_exist(h,__i)) continue;\t\t\t\t\t\t\\\n+\t\t(vvar) = kh_val(h,__i);\t\t\t\t\t\t\t\t\\\n+\t\tcode;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t} }\n+\n+/* More conenient interfaces */\n+\n+/*! @function\n+ @abstract Instantiate a hash set containing integer keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_INT(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, khint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing integer keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_INT(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, khint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing 64-bit integer keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_INT64(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, khint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing 64-bit integer keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_INT64(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, khint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)\n+\n+typedef const char *kh_cstr_t;\n+/*! @function\n+ @abstract Instantiate a hash map containing const char* keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_STR(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing const char* keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_STR(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)\n+\n+#endif /* __AC_KHASH_H */\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/khash_str2int.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/khash_str2int.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,133 @@ +/* khash_str2int.h -- C-string to integer hash table. + + Copyright (C) 2013 Genome Research Ltd. + + Author: Petr Danecek <pd3@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_KHASH_STR2INT_H +#define HTSLIB_KHASH_STR2INT_H + +#include <htslib/khash.h> + +KHASH_MAP_INIT_STR(str2int, int) + +/* + * Wrappers for khash dictionaries used by mpileup. + */ + +static inline void *khash_str2int_init(void) +{ + return kh_init(str2int); +} + +/* + * Destroy the hash structure, but not the keys + */ +static inline void khash_str2int_destroy(void *_hash) +{ + khash_t(str2int) *hash = (khash_t(str2int)*)_hash; + if (hash) kh_destroy(str2int, hash); // Note that strings are not freed. +} + +/* + * Destroys both the hash structure and the keys + */ +static inline void khash_str2int_destroy_free(void *_hash) +{ + khash_t(str2int) *hash = (khash_t(str2int)*)_hash; + khint_t k; + if (hash == 0) return; + for (k = 0; k < kh_end(hash); ++k) + if (kh_exist(hash, k)) free((char*)kh_key(hash, k)); + kh_destroy(str2int, hash); +} + +/* + * Returns 1 if key exists or 0 if not + */ +static inline int khash_str2int_has_key(void *_hash, const char *str) +{ + khash_t(str2int) *hash = (khash_t(str2int)*)_hash; + khint_t k = kh_get(str2int, hash, str); + if ( k == kh_end(hash) ) return 0; + return 1; +} + +/* + * Returns 0 on success and -1 when the key is not present. On success, + * *value is set, unless NULL is passed. + */ +static inline int khash_str2int_get(void *_hash, const char *str, int *value) +{ + khash_t(str2int) *hash = (khash_t(str2int)*)_hash; + khint_t k; + if ( !hash ) return -1; + k = kh_get(str2int, hash, str); + if ( k == kh_end(hash) ) return -1; + if ( !value ) return 0; + *value = kh_val(hash, k); + return 0; +} + +/* + * Add a new string to the dictionary, auto-incrementing the value. + * On success returns the newly inserted integer id, on error -1 + * is returned. Note that the key must continue to exist throughout + * the whole life of _hash. + */ +static inline int khash_str2int_inc(void *_hash, const char *str) +{ + khint_t k; + int ret; + khash_t(str2int) *hash = (khash_t(str2int)*)_hash; + if ( !hash ) return -1; + k = kh_put(str2int, hash, str, &ret); + if (ret == 0) return kh_val(hash, k); + kh_val(hash, k) = kh_size(hash) - 1; + return kh_val(hash, k); +} + +/* + * Set a new key,value pair. On success returns the bin index, on + * error -1 is returned. Note that the key must contnue to exist + * throughout the whole life of _hash. + */ +static inline int khash_str2int_set(void *_hash, const char *str, int value) +{ + khint_t k; + int ret; + khash_t(str2int) *hash = (khash_t(str2int)*)_hash; + if ( !hash ) return -1; + k = kh_put(str2int, hash, str, &ret); + kh_val(hash,k) = value; + return k; +} + +/* + * Return the number of keys in the hash table. + */ +static inline int khash_str2int_size(void *_hash) +{ + khash_t(str2int) *hash = (khash_t(str2int)*)_hash; + return kh_size(hash); +} + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/klist.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/klist.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,121 @@ +/* The MIT License + + Copyright (c) 2008-2009, by Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef _AC_KLIST_H +#define _AC_KLIST_H + +#include <stdlib.h> + +#define KMEMPOOL_INIT(name, kmptype_t, kmpfree_f) \ + typedef struct { \ + size_t cnt, n, max; \ + kmptype_t **buf; \ + } kmp_##name##_t; \ + static inline kmp_##name##_t *kmp_init_##name(void) { \ + return calloc(1, sizeof(kmp_##name##_t)); \ + } \ + static inline void kmp_destroy_##name(kmp_##name##_t *mp) { \ + size_t k; \ + for (k = 0; k < mp->n; ++k) { \ + kmpfree_f(mp->buf[k]); free(mp->buf[k]); \ + } \ + free(mp->buf); free(mp); \ + } \ + static inline kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \ + ++mp->cnt; \ + if (mp->n == 0) return calloc(1, sizeof(kmptype_t)); \ + return mp->buf[--mp->n]; \ + } \ + static inline void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \ + --mp->cnt; \ + if (mp->n == mp->max) { \ + mp->max = mp->max? mp->max<<1 : 16; \ + mp->buf = realloc(mp->buf, sizeof(kmptype_t *) * mp->max); \ + } \ + mp->buf[mp->n++] = p; \ + } + +#define kmempool_t(name) kmp_##name##_t +#define kmp_init(name) kmp_init_##name() +#define kmp_destroy(name, mp) kmp_destroy_##name(mp) +#define kmp_alloc(name, mp) kmp_alloc_##name(mp) +#define kmp_free(name, mp, p) kmp_free_##name(mp, p) + +#define KLIST_INIT(name, kltype_t, kmpfree_t) \ + struct __kl1_##name { \ + kltype_t data; \ + struct __kl1_##name *next; \ + }; \ + typedef struct __kl1_##name kl1_##name; \ + KMEMPOOL_INIT(name, kl1_##name, kmpfree_t) \ + typedef struct { \ + kl1_##name *head, *tail; \ + kmp_##name##_t *mp; \ + size_t size; \ + } kl_##name##_t; \ + static inline kl_##name##_t *kl_init_##name(void) { \ + kl_##name##_t *kl = calloc(1, sizeof(kl_##name##_t)); \ + kl->mp = kmp_init(name); \ + kl->head = kl->tail = kmp_alloc(name, kl->mp); \ + kl->head->next = 0; \ + return kl; \ + } \ + static inline void kl_destroy_##name(kl_##name##_t *kl) { \ + kl1_##name *p; \ + for (p = kl->head; p != kl->tail; p = p->next) \ + kmp_free(name, kl->mp, p); \ + kmp_free(name, kl->mp, p); \ + kmp_destroy(name, kl->mp); \ + free(kl); \ + } \ + static inline kltype_t *kl_pushp_##name(kl_##name##_t *kl) { \ + kl1_##name *q, *p = kmp_alloc(name, kl->mp); \ + q = kl->tail; p->next = 0; kl->tail->next = p; kl->tail = p; \ + ++kl->size; \ + return &q->data; \ + } \ + static inline int kl_shift_##name(kl_##name##_t *kl, kltype_t *d) { \ + kl1_##name *p; \ + if (kl->head->next == 0) return -1; \ + --kl->size; \ + p = kl->head; kl->head = kl->head->next; \ + if (d) *d = p->data; \ + kmp_free(name, kl->mp, p); \ + return 0; \ + } + +#define kliter_t(name) kl1_##name +#define klist_t(name) kl_##name##_t +#define kl_val(iter) ((iter)->data) +#define kl_next(iter) ((iter)->next) +#define kl_begin(kl) ((kl)->head) +#define kl_end(kl) ((kl)->tail) + +#define kl_init(name) kl_init_##name() +#define kl_destroy(name, kl) kl_destroy_##name(kl) +#define kl_pushp(name, kl) kl_pushp_##name(kl) +#define kl_shift(name, kl, d) kl_shift_##name(kl, d) + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/knetfile.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/knetfile.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,101 @@ +/* The MIT License + + Copyright (c) 2008 by Genome Research Ltd (GRL). + 2010 by Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef KNETFILE_H +#define KNETFILE_H + +#include <stdint.h> +#include <fcntl.h> + +#ifndef _WIN32 +#define netread(fd, ptr, len) read(fd, ptr, len) +#define netwrite(fd, ptr, len) write(fd, ptr, len) +#define netclose(fd) close(fd) +#else +#include <winsock2.h> +#define netread(fd, ptr, len) recv(fd, ptr, len, 0) +#define netwrite(fd, ptr, len) send(fd, ptr, len, 0) +#define netclose(fd) closesocket(fd) +#endif + +// FIXME: currently I/O is unbuffered + +#define KNF_TYPE_LOCAL 1 +#define KNF_TYPE_FTP 2 +#define KNF_TYPE_HTTP 3 + +typedef struct knetFile_s { + int type, fd; + int64_t offset; + char *host, *port; + + // the following are for FTP only + int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; + char *response, *retr, *size_cmd; + int64_t seek_offset; // for lazy seek + int64_t file_size; + + // the following are for HTTP only + char *path, *http_host; +} knetFile; + +#define knet_tell(fp) ((fp)->offset) +#define knet_fileno(fp) ((fp)->fd) + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 + int knet_win32_init(); + void knet_win32_destroy(); +#endif + + knetFile *knet_open(const char *fn, const char *mode); + + /* + This only works with local files. + */ + knetFile *knet_dopen(int fd, const char *mode); + + /* + If ->is_ready==0, this routine updates ->fd; otherwise, it simply + reads from ->fd. + */ + ssize_t knet_read(knetFile *fp, void *buf, size_t len); + + /* + This routine only sets ->offset and ->is_ready=0. It does not + communicate with the FTP server. + */ + off_t knet_seek(knetFile *fp, off_t off, int whence); + int knet_close(knetFile *fp); + +#ifdef __cplusplus +} +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/kseq.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/kseq.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,253 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008, 2009, 2011 Attractive Chaos <attractor@live.co.uk>\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Last Modified: 05MAR2012 */\n+\n+#ifndef AC_KSEQ_H\n+#define AC_KSEQ_H\n+\n+#include <ctype.h>\n+#include <string.h>\n+#include <stdlib.h>\n+\n+#define KS_SEP_SPACE 0 // isspace(): \\t, \\n, \\v, \\f, \\r\n+#define KS_SEP_TAB 1 // isspace() && !\' \'\n+#define KS_SEP_LINE 2 // line separator: "\\n" (Unix) or "\\r\\n" (Windows)\n+#define KS_SEP_MAX 2\n+\n+#define __KS_TYPE(type_t) \\\n+\ttypedef struct __kstream_t { \\\n+\t\tint begin, end; \\\n+\t\tint is_eof:2, bufsize:30; \\\n+ uint64_t seek_pos; \\\n+\t\ttype_t f; \\\n+\t\tunsigned char *buf; \\\n+\t} kstream_t;\n+\n+#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)\n+#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)\n+\n+#define __KS_BASIC(SCOPE, type_t, __bufsize) \\\n+\tSCOPE kstream_t *ks_init(type_t f) \\\n+\t{ \\\n+\t\tkstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t)); \\\n+\t\tks->f = f; ks->bufsize = __bufsize; \\\n+\t\tks->buf = (unsigned char*)malloc(__bufsize); \\\n+\t\treturn ks; \\\n+\t} \\\n+\tSCOPE void ks_destroy(kstream_t *ks) \\\n+\t{ \\\n+\t\tif (!ks) return; \\\n+\t\tfree(ks->buf); \\\n+\t\tfree(ks); \\\n+\t}\n+\n+#define __KS_INLINED(__read) \\\n+\tstatic inline int ks_getc(kstream_t *ks) \\\n+\t{ \\\n+\t\tif (ks->is_eof && ks->begin >= ks->end) return -1; \\\n+\t\tif (ks->begin >= ks->end) { \\\n+\t\t\tks->begin = 0; \\\n+\t\t\tks->end = __read(ks->f, ks->buf, ks->bufsize); \\\n+\t\t\tif (ks->end == 0) { ks->is_eof = 1; return -1; } \\\n+\t\t} \\\n+ ks->seek_pos++; \\\n+\t\treturn (int)ks->buf[ks->begin++]; \\\n+\t} \\\n+\tstatic inline int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \\\n+\t{ return ks_getuntil2(ks, delimiter, str, dret, 0); }\n+\n+#ifndef KSTRING_T\n+#define KSTRING_T kstring_t\n+typedef struct __kstring_t {\n+\tsize_t l, m;\n+\tchar *s;\n+} kstring_t;\n+#endif\n+\n+#ifndef kroundup32\n+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))\n+#endif\n+\n+#define __KS_GETUNTIL(SCOPE, __read) \\\n+\tSCOPE int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append) \\\n+\t{ \\\n+\t\tint gotany = 0; \\\n+\t\tif (dret) *dret = 0; \\\n+\t\tstr->l = append? str->l : 0; \\\n+ uint64_t seek_pos = str->l; \\\n+\t\tfor (;;) { \\\n+\t\t\tint i; \\\n+\t\t\tif (ks->begin >= ks->end) { \\\n+\t\t\t\tif (!ks->is_eof) { \\\n+\t\t\t\t\tks->begin = 0; \\\n+\t\t\t\t\tks->end = __read(ks->f, ks->buf, ks->bufsize); \\\n+\t\t\t\t\tif (ks->end == 0) { ks->is_eof = 1; break; } \\\n+\t\t\t\t} else break; \\\n+\t\t\t} \\\n+\t\t\tif (delimiter == KS_SEP_LINE) { \\\n+\t\t\t\tfor (i = ks->begin; i < ks->end; ++i) \\\n+\t\t\t\t\tif (ks->buf[i] == \'\\n\') break; \\\n+\t\t\t} else if (delimiter > KS_SEP_MAX) { \\\n+\t\t\t\tfor (i = ks->begin; i < ks->end; ++i) \\\n+\t\t\t\t\tif (ks->buf[i] == delimiter) break; \\\n+\t\t\t} else if (delimiter == KS_SEP_SPACE) { \\\n+\t\t\t\tfor (i = ks->begin; i < ks->end; ++i) \\\n+\t\t\t\t\tif (isspace(ks->buf[i])) break; \\\n+\t\t\t} else if (delimiter == KS_SEP_TAB)'..b"M_INIT(type_t, __read, __bufsize) KSTREAM_INIT2(static, type_t, __read, __bufsize)\n+\n+#define KSTREAM_DECLARE(type_t, __read) \\\n+\t__KS_TYPE(type_t) \\\n+\textern int ks_getuntil2(kstream_t *ks, int delimiter, kstring_t *str, int *dret, int append); \\\n+\textern kstream_t *ks_init(type_t f); \\\n+\textern void ks_destroy(kstream_t *ks); \\\n+\t__KS_INLINED(__read)\n+\n+/******************\n+ * FASTA/Q parser *\n+ ******************/\n+\n+#define kseq_rewind(ks) ((ks)->last_char = (ks)->f->is_eof = (ks)->f->begin = (ks)->f->end = 0)\n+\n+#define __KSEQ_BASIC(SCOPE, type_t)\t\t\t\t\t\t\t\t\t\t\\\n+\tSCOPE kseq_t *kseq_init(type_t fd)\t\t\t\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));\t\t\t\t\t\\\n+\t\ts->f = ks_init(fd);\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\treturn s;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tSCOPE void kseq_destroy(kseq_t *ks)\t\t\t\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (!ks) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tfree(ks->name.s); free(ks->comment.s); free(ks->seq.s);\tfree(ks->qual.s); \\\n+\t\tks_destroy(ks->f);\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tfree(ks);\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\n+\n+/* Return value:\n+ >=0 length of the sequence (normal)\n+ -1 end-of-file\n+ -2 truncated quality string\n+ */\n+#define __KSEQ_READ(SCOPE) \\\n+\tSCOPE int kseq_read(kseq_t *seq) \\\n+\t{ \\\n+\t\tint c; \\\n+\t\tkstream_t *ks = seq->f; \\\n+\t\tif (seq->last_char == 0) { /* then jump to the next header line */ \\\n+\t\t\twhile ((c = ks_getc(ks)) != -1 && c != '>' && c != '@'); \\\n+\t\t\tif (c == -1) return -1; /* end of file */ \\\n+\t\t\tseq->last_char = c; \\\n+\t\t} /* else: the first header char has been read in the previous call */ \\\n+\t\tseq->comment.l = seq->seq.l = seq->qual.l = 0; /* reset all members */ \\\n+\t\tif (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1; /* normal exit: EOF */ \\\n+\t\tif (c != '\\n') ks_getuntil(ks, KS_SEP_LINE, &seq->comment, 0); /* read FASTA/Q comment */ \\\n+\t\tif (seq->seq.s == 0) { /* we can do this in the loop below, but that is slower */ \\\n+\t\t\tseq->seq.m = 256; \\\n+\t\t\tseq->seq.s = (char*)malloc(seq->seq.m); \\\n+\t\t} \\\n+\t\twhile ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \\\n+\t\t\tif (c == '\\n') continue; /* skip empty lines */ \\\n+\t\t\tseq->seq.s[seq->seq.l++] = c; /* this is safe: we always have enough space for 1 char */ \\\n+\t\t\tks_getuntil2(ks, KS_SEP_LINE, &seq->seq, 0, 1); /* read the rest of the line */ \\\n+\t\t} \\\n+\t\tif (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */\t\\\n+\t\tif (seq->seq.l + 1 >= seq->seq.m) { /* seq->seq.s[seq->seq.l] below may be out of boundary */ \\\n+\t\t\tseq->seq.m = seq->seq.l + 2; \\\n+\t\t\tkroundup32(seq->seq.m); /* rounded to the next closest 2^k */ \\\n+\t\t\tseq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \\\n+\t\t} \\\n+\t\tseq->seq.s[seq->seq.l] = 0;\t/* null terminated string */ \\\n+\t\tif (c != '+') return seq->seq.l; /* FASTA */ \\\n+\t\tif (seq->qual.m < seq->seq.m) {\t/* allocate memory for qual in case insufficient */ \\\n+\t\t\tseq->qual.m = seq->seq.m; \\\n+\t\t\tseq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m); \\\n+\t\t} \\\n+\t\twhile ((c = ks_getc(ks)) != -1 && c != '\\n'); /* skip the rest of '+' line */ \\\n+\t\tif (c == -1) return -2; /* error: no quality string */ \\\n+\t\twhile (ks_getuntil2(ks, KS_SEP_LINE, &seq->qual, 0, 1) >= 0 && seq->qual.l < seq->seq.l); \\\n+\t\tseq->last_char = 0;\t/* we have not come to the next header line */ \\\n+\t\tif (seq->seq.l != seq->qual.l) return -2; /* error: qual string is of a different length */ \\\n+\t\treturn seq->seq.l; \\\n+\t}\n+\n+#define __KSEQ_TYPE(type_t)\t\t\t\t\t\t\\\n+\ttypedef struct {\t\t\t\t\t\t\t\\\n+\t\tkstring_t name, comment, seq, qual;\t\t\\\n+\t\tint last_char;\t\t\t\t\t\t\t\\\n+\t\tkstream_t *f;\t\t\t\t\t\t\t\\\n+\t} kseq_t;\n+\n+#define KSEQ_INIT2(SCOPE, type_t, __read)\t\t\\\n+\tKSTREAM_INIT(type_t, __read, 16384)\t\t\t\\\n+\t__KSEQ_TYPE(type_t)\t\t\t\t\t\t\t\\\n+\t__KSEQ_BASIC(SCOPE, type_t)\t\t\t\t\t\\\n+\t__KSEQ_READ(SCOPE)\n+\n+#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)\n+\n+#define KSEQ_DECLARE(type_t) \\\n+\t__KS_TYPE(type_t) \\\n+\t__KSEQ_TYPE(type_t) \\\n+\textern kseq_t *kseq_init(type_t fd); \\\n+\tvoid kseq_destroy(kseq_t *ks); \\\n+\tint kseq_read(kseq_t *seq);\n+\n+#endif\n" |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/ksort.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/ksort.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,285 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+/*\n+ 2012-12-11 (0.1.4):\n+\n+ * Defined __ks_insertsort_##name as static to compile with C99.\n+\n+ 2008-11-16 (0.1.4):\n+\n+ * Fixed a bug in introsort() that happens in rare cases.\n+\n+ 2008-11-05 (0.1.3):\n+\n+ * Fixed a bug in introsort() for complex comparisons.\n+\n+\t* Fixed a bug in mergesort(). The previous version is not stable.\n+\n+ 2008-09-15 (0.1.2):\n+\n+\t* Accelerated introsort. On my Mac (not on another Linux machine),\n+\t my implementation is as fast as std::sort on random input.\n+\n+\t* Added combsort and in introsort, switch to combsort if the\n+\t recursion is too deep.\n+\n+ 2008-09-13 (0.1.1):\n+\n+\t* Added k-small algorithm\n+\n+ 2008-09-05 (0.1.0):\n+\n+\t* Initial version\n+\n+*/\n+\n+#ifndef AC_KSORT_H\n+#define AC_KSORT_H\n+\n+#include <stdlib.h>\n+#include <string.h>\n+\n+typedef struct {\n+\tvoid *left, *right;\n+\tint depth;\n+} ks_isort_stack_t;\n+\n+#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }\n+\n+#define KSORT_INIT(name, type_t, __sort_lt)\t\t\t\t\t\t\t\t\\\n+\tvoid ks_mergesort_##name(size_t n, type_t array[], type_t temp[])\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t *a2[2], *a, *b;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tint curr, shift;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ta2[0] = array;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ta2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);\t\t\\\n+\t\tfor (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {\t\t\t\\\n+\t\t\ta = a2[curr]; b = a2[1-curr];\t\t\t\t\t\t\t\t\\\n+\t\t\tif (shift == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\ttype_t *p = b, *i, *eb = a + n;\t\t\t\t\t\t\t\\\n+\t\t\t\tfor (i = a; i < eb; i += 2) {\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (i == eb - 1) *p++ = *i;\t\t\t\t\t\t\t\\\n+\t\t\t\t\telse {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tif (__sort_lt(*(i+1), *i)) {\t\t\t\t\t\\\n+\t\t\t\t\t\t\t*p++ = *(i+1); *p++ = *i;\t\t\t\t\t\\\n+\t\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t*p++ = *i; *p++ = *(i+1);\t\t\t\t\t\\\n+\t\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tsize_t i, step = 1ul<<shift;\t\t\t\t\t\t\t\\\n+\t\t\t\tfor (i = 0; i < n; i += step<<1) {\t\t\t\t\t\t\\\n+\t\t\t\t\ttype_t *p, *j, *k, *ea, *eb;\t\t\t\t\t\t\\\n+\t\t\t\t\tif (n < i + step) {\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tea = a + n; eb = a;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tea = a + i + step;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\teb = a + (n < i + (step<<1)? n : i + (step<<1)); \\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tj = a + i; k = a + i + step; p = b + i;\t\t\t\t\\\n+\t\t\t\t\twhile (j < ea && k < eb) {\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tif (__sort_lt(*k, *j)) *p++ = *k++;\t\t\t\t\\\n+\t\t\t\t\t\telse *p++ = *j++;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\twhile (j < ea) *p++ = *j++;\t\t\t\t\t\t\t\\\n+\t\t\t\t\twhile (k < eb) *p++ = *k++;\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tcurr = 1 - curr;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (curr == 1) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\ttype_t *p = a2[0], *i = a2[1], *eb = array + n;\t\t\t\t\\\n+\t\t\tfor (; p < eb; ++i) *p++ = *i;\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (temp == 0) free(a2[1]);\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tv'..b'mp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \\\n+\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tfor (d = 2; 1ul<<d < n; ++d);\t\t\t\t\t\t\t\t\t\\\n+\t\tstack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \\\n+\t\ttop = stack; s = a; t = a + (n-1); d <<= 1;\t\t\t\t\t\t\\\n+\t\twhile (1) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (s < t) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (--d == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tks_combsort_##name(t - s + 1, s);\t\t\t\t\t\\\n+\t\t\t\t\tt = s;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tcontinue;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\ti = s; j = t; k = i + ((j-i)>>1) + 1;\t\t\t\t\t\\\n+\t\t\t\tif (__sort_lt(*k, *i)) {\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (__sort_lt(*k, *j)) k = j;\t\t\t\t\t\t\\\n+\t\t\t\t} else k = __sort_lt(*j, *i)? i : j;\t\t\t\t\t\\\n+\t\t\t\trp = *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }\t\\\n+\t\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tdo ++i; while (__sort_lt(*i, rp));\t\t\t\t\t\\\n+\t\t\t\t\tdo --j; while (i <= j && __sort_lt(rp, *j));\t\t\\\n+\t\t\t\t\tif (j <= i) break;\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tswap_tmp = *i; *i = *j; *j = swap_tmp;\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tswap_tmp = *i; *i = *t; *t = swap_tmp;\t\t\t\t\t\\\n+\t\t\t\tif (i-s > t-i) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \\\n+\t\t\t\t\ts = t-i > 16? i+1 : t;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \\\n+\t\t\t\t\tt = i-s > 16? i-1 : s;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (top == stack) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tfree(stack);\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t__ks_insertsort_##name(a, a+n);\t\t\t\t\t\t\\\n+\t\t\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t/* This function is adapted from: http://ndevilla.free.fr/median/ */ \\\n+\t/* 0 <= kk < n */\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\ttype_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t *low, *high, *k, *ll, *hh, *mid;\t\t\t\t\t\t\t\\\n+\t\tlow = arr; high = arr + n - 1; k = arr + kk;\t\t\t\t\t\\\n+\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (high <= low) return *k;\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (high == low + 1) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n+\t\t\t\treturn *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tmid = low + (high - low) / 2;\t\t\t\t\t\t\t\t\\\n+\t\t\tif (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \\\n+\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n+\t\t\tif (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);\t\\\n+\t\t\tKSORT_SWAP(type_t, *mid, *(low+1));\t\t\t\t\t\t\t\\\n+\t\t\tll = low + 1; hh = high;\t\t\t\t\t\t\t\t\t\\\n+\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tdo ++ll; while (__sort_lt(*ll, *low));\t\t\t\t\t\\\n+\t\t\t\tdo --hh; while (__sort_lt(*low, *hh));\t\t\t\t\t\\\n+\t\t\t\tif (hh < ll) break;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tKSORT_SWAP(type_t, *ll, *hh);\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tKSORT_SWAP(type_t, *low, *hh);\t\t\t\t\t\t\t\t\\\n+\t\t\tif (hh <= k) low = ll;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (hh >= k) high = hh - 1;\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tvoid ks_shuffle_##name(size_t n, type_t a[])\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tint i, j;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tfor (i = n; i > 1; --i) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\ttype_t tmp;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tj = (int)(drand48() * i);\t\t\t\t\t\t\t\t\t\\\n+\t\t\ttmp = a[j]; a[j] = a[i-1]; a[i-1] = tmp;\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\n+\n+#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)\n+#define ks_introsort(name, n, a) ks_introsort_##name(n, a)\n+#define ks_combsort(name, n, a) ks_combsort_##name(n, a)\n+#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)\n+#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)\n+#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)\n+#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)\n+#define ks_shuffle(name, n, a) ks_shuffle_##name(n, a)\n+\n+#define ks_lt_generic(a, b) ((a) < (b))\n+#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)\n+\n+typedef const char *ksstr_t;\n+\n+#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)\n+#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)\n+\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/kstring.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/kstring.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,270 @@ +/* The MIT License + + Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef KSTRING_H +#define KSTRING_H + +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <stdint.h> +#include <stdio.h> + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4) +#define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg))) +#else +#define KS_ATTR_PRINTF(fmt, arg) +#endif + + +/* kstring_t is a simple non-opaque type whose fields are likely to be + * used directly by user code (but see also ks_str() and ks_len() below). + * A kstring_t object is initialised by either of + * kstring_t str = { 0, 0, NULL }; + * kstring_t str; ...; str.l = str.m = 0; str.s = NULL; + * and either ownership of the underlying buffer should be given away before + * the object disappears (see ks_release() below) or the kstring_t should be + * destroyed with free(str.s); */ +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + size_t l, m; + char *s; +} kstring_t; +#endif + +typedef struct { + uint64_t tab[4]; + int sep, finished; + const char *p; // end of the current token +} ks_tokaux_t; + +#ifdef __cplusplus +extern "C" { +#endif + + int kvsprintf(kstring_t *s, const char *fmt, va_list ap) KS_ATTR_PRINTF(2,0); + int ksprintf(kstring_t *s, const char *fmt, ...) KS_ATTR_PRINTF(2,3); + int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); + char *kstrstr(const char *str, const char *pat, int **_prep); + char *kstrnstr(const char *str, const char *pat, int n, int **_prep); + void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep); + + /* kstrtok() is similar to strtok_r() except that str is not + * modified and both str and sep can be NULL. For efficiency, it is + * actually recommended to set both to NULL in the subsequent calls + * if sep is not changed. */ + char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux); + +#ifdef __cplusplus +} +#endif + +static inline int ks_resize(kstring_t *s, size_t size) +{ + if (s->m < size) { + char *tmp; + s->m = size; + kroundup32(s->m); + if ((tmp = (char*)realloc(s->s, s->m))) + s->s = tmp; + else + return -1; + } + return 0; +} + +static inline char *ks_str(kstring_t *s) +{ + return s->s; +} + +static inline size_t ks_len(kstring_t *s) +{ + return s->l; +} + +// Give ownership of the underlying buffer away to something else (making +// that something else responsible for freeing it), leaving the kstring_t +// empty and ready to be used again, or ready to go out of scope without +// needing free(str.s) to prevent a memory leak. +static inline char *ks_release(kstring_t *s) +{ + char *ss = s->s; + s->l = s->m = 0; + s->s = NULL; + return ss; +} + +static inline int kputsn(const char *p, int l, kstring_t *s) +{ + if (s->l + l + 1 >= s->m) { + char *tmp; + s->m = s->l + l + 2; + kroundup32(s->m); + if ((tmp = (char*)realloc(s->s, s->m))) + s->s = tmp; + else + return EOF; + } + memcpy(s->s + s->l, p, l); + s->l += l; + s->s[s->l] = 0; + return l; +} + +static inline int kputs(const char *p, kstring_t *s) +{ + return kputsn(p, strlen(p), s); +} + +static inline int kputc(int c, kstring_t *s) +{ + if (s->l + 1 >= s->m) { + char *tmp; + s->m = s->l + 2; + kroundup32(s->m); + if ((tmp = (char*)realloc(s->s, s->m))) + s->s = tmp; + else + return EOF; + } + s->s[s->l++] = c; + s->s[s->l] = 0; + return c; +} + +static inline int kputc_(int c, kstring_t *s) +{ + if (s->l + 1 > s->m) { + char *tmp; + s->m = s->l + 1; + kroundup32(s->m); + if ((tmp = (char*)realloc(s->s, s->m))) + s->s = tmp; + else + return EOF; + } + s->s[s->l++] = c; + return 1; +} + +static inline int kputsn_(const void *p, int l, kstring_t *s) +{ + if (s->l + l > s->m) { + char *tmp; + s->m = s->l + l; + kroundup32(s->m); + if ((tmp = (char*)realloc(s->s, s->m))) + s->s = tmp; + else + return EOF; + } + memcpy(s->s + s->l, p, l); + s->l += l; + return l; +} + +static inline int kputw(int c, kstring_t *s) +{ + char buf[16]; + int i, l = 0; + unsigned int x = c; + if (c < 0) x = -x; + do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0); + if (c < 0) buf[l++] = '-'; + if (s->l + l + 1 >= s->m) { + char *tmp; + s->m = s->l + l + 2; + kroundup32(s->m); + if ((tmp = (char*)realloc(s->s, s->m))) + s->s = tmp; + else + return EOF; + } + for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; + s->s[s->l] = 0; + return 0; +} + +static inline int kputuw(unsigned c, kstring_t *s) +{ + char buf[16]; + int l, i; + unsigned x; + if (c == 0) return kputc('0', s); + for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; + if (s->l + l + 1 >= s->m) { + char *tmp; + s->m = s->l + l + 2; + kroundup32(s->m); + if ((tmp = (char*)realloc(s->s, s->m))) + s->s = tmp; + else + return EOF; + } + for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; + s->s[s->l] = 0; + return 0; +} + +static inline int kputl(long c, kstring_t *s) +{ + char buf[32]; + int i, l = 0; + unsigned long x = c; + if (c < 0) x = -x; + do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0); + if (c < 0) buf[l++] = '-'; + if (s->l + l + 1 >= s->m) { + char *tmp; + s->m = s->l + l + 2; + kroundup32(s->m); + if ((tmp = (char*)realloc(s->s, s->m))) + s->s = tmp; + else + return EOF; + } + for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; + s->s[s->l] = 0; + return 0; +} + +/* + * Returns 's' split by delimiter, with *n being the number of components; + * NULL on failue. + */ +static inline int *ksplit(kstring_t *s, int delimiter, int *n) +{ + int max = 0, *offsets = 0; + *n = ksplit_core(s->s, delimiter, &max, &offsets); + return offsets; +} + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/regidx.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/regidx.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,147 @@ +/* + Copyright (C) 2014 Genome Research Ltd. + + Author: Petr Danecek <pd3@sanger.ac.uk> + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +/* + Regions indexing with an optional payload. Inspired by samtools/bedidx.c. + This code is intended as future replacement of bcf_sr_regions_t. + + Example of usage: + + // Init the parser and print regions. In this example the payload is a + // pointer to a string. For the description of parse_custom and + // free_custom functions, see regidx_parse_f and regidx_free_f below, + // and for working example see test/test-regidx.c. + regidx_t *idx = regidx_init(in_fname,parse_custom,free_custom,sizeof(char*),NULL); + + // Query overlap with chr:from-to + regitr_t itr; + if ( regidx_overlap(idx, chr,from,to, &itr) ) printf("There is an overlap!\n"); + + while ( REGITR_OVERLAP(itr,from,to) ) + { + printf("[%d,%d] overlaps with [%d,%d], payload=%s\n", from,to, + REGITR_START(itr), REGITR_END(itr), REGITR_PAYLOAD(itr,char*)); + itr.i++; + } + + regidx_destroy(regs); +*/ + +#ifndef HTSLIB_REGIDX_H +#define HTSLIB_REGIDX_H + +#include <stdio.h> +#include <inttypes.h> + +typedef struct _regidx_t regidx_t; +typedef struct +{ + uint32_t start, end; +} +reg_t; +typedef struct +{ + int i, n; + reg_t *reg; + void *payload; +} +regitr_t; + +#define REGITR_START(itr) (itr).reg[(itr).i].start +#define REGITR_END(itr) (itr).reg[(itr).i].end +#define REGITR_PAYLOAD(itr,type_t) ((type_t*)(itr).payload)[(itr).i] +#define REGITR_OVERLAP(itr,from,to) (itr.i < itr.n && REGITR_START(itr)<=to && REGITR_END(itr)>=from ) + +/* + * regidx_parse_f - Function to parse one input line, such as regidx_parse_bed + * or regidx_parse_tab below. The function is expected to set `chr_from` and + * `chr_to` to point to first and last character of chromosome name and set + * coordinates `reg->start` and `reg->end` (0-based, inclusive). If + * regidx_init() was called with non-zero payload_size, the `payload` points + * to a memory location of the payload_size and `usr` is data passed to + * regidx_init(). Any memory allocated by the function will be freed by + * regidx_free_f on regidx_destroy(). + * + * Return value: 0 on success, -1 to skip a record, -2 on fatal error. + */ +typedef int (*regidx_parse_f)(const char *line, char **chr_beg, char **chr_end, reg_t *reg, void *payload, void *usr); +typedef void (*regidx_free_f)(void *payload); + +int regidx_parse_bed(const char*,char**,char**,reg_t*,void*,void*); // CHROM,FROM,TO (0-based,right-open) +int regidx_parse_tab(const char*,char**,char**,reg_t*,void*,void*); // CHROM,POS (1-based, inclusive) + +/* + * regidx_init() - creates new index + * @param fname: input file name or NULL if regions will be added one-by-one via regidx_insert() + * @param parsef: regidx_parse_bed, regidx_parse_tab or see description of regidx_parse_f. If NULL, + * the format will be autodected, currently either regidx_parse_tab (the default) or + * regidx_parse_bed (file must be named 'bed' or 'bed.gz') will be used. Note that + * the exact autodetection algorithm will change. + * @param freef: NULL or see description of regidx_parse_f + * @param payload_size: 0 with regidx_parse_bed, regidx_parse_tab or see regidx_parse_f + * @param usr: optional user data passed to regidx_parse_f + * + * Returns index on success or NULL on error. + */ +regidx_t *regidx_init(const char *fname, regidx_parse_f parsef, regidx_free_f freef, size_t payload_size, void *usr); + +/* + * regidx_destroy() - free memory allocated by regidx_init + */ +void regidx_destroy(regidx_t *idx); + +/* + * regidx_overlap() - check overlap of the location chr:from-to with regions + * @param start,end: 0-based start, end coordinate (inclusive) + * @param itr: pointer to iterator, can be NULL if not needed + * + * Returns 0 if there is no overlap or 1 if overlap is found. The overlapping + * regions can be iterated as shown in the example above. + */ +int regidx_overlap(regidx_t *idx, const char *chr, uint32_t start, uint32_t end, regitr_t *itr); + +/* + * regidx_insert() - add a new region. + * + * After last region has been added, call regidx_insert(idx,NULL) to + * build the index. + * + * Returns 0 on success or -1 on error. + */ +int regidx_insert(regidx_t *idx, char *line); + +/* + * regidx_seq_names() - return list of all sequence names + */ +char **regidx_seq_names(regidx_t *idx, int *n); + +/* + * regidx_seq_nregs() - number of regions + * regidx_nregs() - total number of regions + */ +int regidx_seq_nregs(regidx_t *idx, const char *seq); +int regidx_nregs(regidx_t *idx); + +#endif + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/sam.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/sam.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,426 @@\n+/* sam.h -- SAM and BAM file I/O and manipulation.\n+\n+ Copyright (C) 2008, 2009, 2013-2014 Genome Research Ltd.\n+ Copyright (C) 2010, 2012, 2013 Broad Institute.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#ifndef HTSLIB_SAM_H\n+#define HTSLIB_SAM_H\n+\n+#include <stdint.h>\n+#include "hts.h"\n+\n+/**********************\n+ *** SAM/BAM header ***\n+ **********************/\n+\n+/*! @typedef\n+ @abstract Structure for the alignment header.\n+ @field n_targets number of reference sequences\n+ @field l_text length of the plain text in the header\n+ @field target_len lengths of the reference sequences\n+ @field target_name names of the reference sequences\n+ @field text plain text\n+ @field sdict header dictionary\n+ */\n+\n+typedef struct {\n+ int32_t n_targets, ignore_sam_err;\n+ uint32_t l_text;\n+ uint32_t *target_len;\n+ int8_t *cigar_tab;\n+ char **target_name;\n+ char *text;\n+ void *sdict;\n+} bam_hdr_t;\n+\n+/****************************\n+ *** CIGAR related macros ***\n+ ****************************/\n+\n+#define BAM_CMATCH 0\n+#define BAM_CINS 1\n+#define BAM_CDEL 2\n+#define BAM_CREF_SKIP 3\n+#define BAM_CSOFT_CLIP 4\n+#define BAM_CHARD_CLIP 5\n+#define BAM_CPAD 6\n+#define BAM_CEQUAL 7\n+#define BAM_CDIFF 8\n+#define BAM_CBACK 9\n+\n+#define BAM_CIGAR_STR "MIDNSHP=XB"\n+#define BAM_CIGAR_SHIFT 4\n+#define BAM_CIGAR_MASK 0xf\n+#define BAM_CIGAR_TYPE 0x3C1A7\n+\n+#define bam_cigar_op(c) ((c)&BAM_CIGAR_MASK)\n+#define bam_cigar_oplen(c) ((c)>>BAM_CIGAR_SHIFT)\n+#define bam_cigar_opchr(c) (BAM_CIGAR_STR[bam_cigar_op(c)])\n+#define bam_cigar_gen(l, o) ((l)<<BAM_CIGAR_SHIFT|(o))\n+\n+/* bam_cigar_type returns a bit flag with:\n+ * bit 1 set if the cigar operation consumes the query\n+ * bit 2 set if the cigar operation consumes the reference\n+ *\n+ * For reference, the unobfuscated truth table for this function is:\n+ * BAM_CIGAR_TYPE QUERY REFERENCE\n+ * --------------------------------\n+ * BAM_CMATCH 1 1\n+ * BAM_CINS 1 0\n+ * BAM_CDEL 0 1\n+ * BAM_CREF_SKIP 0 1\n+ * BAM_CSOFT_CLIP 1 0\n+ * BAM_CHARD_CLIP 0 0\n+ * BAM_CPAD 0 0\n+ * BAM_CEQUAL 1 1\n+ * BAM_CDIFF 1 1\n+ * BAM_CBACK 0 0\n+ * --------------------------------\n+ */\n+#define bam_cigar_type(o) (BAM_CIGAR_TYPE>>((o)<<1)&3) // bit 1: consume query; bit 2: consume reference\n+\n+/*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */\n+#define BAM_FPAIRED 1\n+/*! @abstract the read is mapped in a proper pair */\n+#define BAM_FPROPER_PAIR 2\n+/*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */\n+#define BAM_FUNMAP 4\n+/*! @abstract the mate is unmapped */\n+#define BAM_FMUNMAP 8\n+/*! @abstract the read is mapped to the reverse strand */\n+#define BAM_FREVERSE 16\n+/*! @abstract the mate is mapped to the revers'..b'close(fp)\n+\n+ int sam_open_mode(char *mode, const char *fn, const char *format);\n+\n+ typedef htsFile samFile;\n+ bam_hdr_t *sam_hdr_parse(int l_text, const char *text);\n+ bam_hdr_t *sam_hdr_read(samFile *fp);\n+ int sam_hdr_write(samFile *fp, const bam_hdr_t *h);\n+\n+ int sam_parse1(kstring_t *s, bam_hdr_t *h, bam1_t *b);\n+ int sam_format1(const bam_hdr_t *h, const bam1_t *b, kstring_t *str);\n+ int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b);\n+ int sam_write1(samFile *fp, const bam_hdr_t *h, const bam1_t *b);\n+\n+ /*************************************\n+ *** Manipulating auxiliary fields ***\n+ *************************************/\n+\n+ uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);\n+ int32_t bam_aux2i(const uint8_t *s);\n+ double bam_aux2f(const uint8_t *s);\n+ char bam_aux2A(const uint8_t *s);\n+ char *bam_aux2Z(const uint8_t *s);\n+\n+ void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);\n+ int bam_aux_del(bam1_t *b, uint8_t *s);\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+/**************************\n+ *** Pileup and Mpileup ***\n+ **************************/\n+\n+#if !defined(BAM_NO_PILEUP)\n+\n+/*! @typedef\n+ @abstract Structure for one alignment covering the pileup position.\n+ @field b pointer to the alignment\n+ @field qpos position of the read base at the pileup site, 0-based\n+ @field indel indel length; 0 for no indel, positive for ins and negative for del\n+ @field level the level of the read in the "viewer" mode\n+ @field is_del 1 iff the base on the padded read is a deletion\n+ @field is_head ???\n+ @field is_tail ???\n+ @field is_refskip ???\n+ @field aux ???\n+\n+ @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The\n+ difference between the two functions is that the former does not\n+ set bam_pileup1_t::level, while the later does. Level helps the\n+ implementation of alignment viewers, but calculating this has some\n+ overhead.\n+ */\n+typedef struct {\n+ bam1_t *b;\n+ int32_t qpos;\n+ int indel, level;\n+ uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;\n+} bam_pileup1_t;\n+\n+typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);\n+\n+struct __bam_plp_t;\n+typedef struct __bam_plp_t *bam_plp_t;\n+\n+struct __bam_mplp_t;\n+typedef struct __bam_mplp_t *bam_mplp_t;\n+\n+#ifdef __cplusplus\n+extern "C" {\n+#endif\n+\n+ /**\n+ * bam_plp_init() - sets an iterator over multiple\n+ * @func: see mplp_func in bam_plcmd.c in samtools for an example. Expected return\n+ * status: 0 on success, -1 on end, < -1 on non-recoverable errors\n+ * @data: user data to pass to @func\n+ */\n+ bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);\n+ void bam_plp_destroy(bam_plp_t iter);\n+ int bam_plp_push(bam_plp_t iter, const bam1_t *b);\n+ const bam_pileup1_t *bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);\n+ const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);\n+ void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);\n+ void bam_plp_reset(bam_plp_t iter);\n+\n+ bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);\n+ /**\n+ * bam_mplp_init_overlaps() - if called, mpileup will detect overlapping\n+ * read pairs and for each base pair set the base quality of the\n+ * lower-quality base to zero, thus effectively discarding it from\n+ * calling. If the two bases are identical, the quality of the other base\n+ * is increased to the sum of their qualities (capped at 200), otherwise\n+ * it is multiplied by 0.8.\n+ */\n+ void bam_mplp_init_overlaps(bam_mplp_t iter);\n+ void bam_mplp_destroy(bam_mplp_t iter);\n+ void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);\n+ int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif // ~!defined(BAM_NO_PILEUP)\n+\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/synced_bcf_reader.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/synced_bcf_reader.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,301 @@\n+/* synced_bcf_reader.h -- stream through multiple VCF files.\n+\n+ Copyright (C) 2012-2014 Genome Research Ltd.\n+\n+ Author: Petr Danecek <pd3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+/*\n+ The synced_bcf_reader allows to keep multiple VCFs open and stream them\n+ using the next_line iterator in a seamless matter without worrying about\n+ chromosomes and synchronizing the sites. This is used by vcfcheck to\n+ compare multiple VCFs simultaneously and is used also for merging,\n+ creating intersections, etc.\n+\n+ The synced_bcf_reader also provides API for reading indexed BCF/VCF,\n+ hiding differences in BCF/VCF opening, indexing and reading.\n+\n+\n+ Example of usage:\n+\n+ bcf_srs_t *sr = bcf_sr_init();\n+ for (i=0; i<nfiles; i++)\n+ bcf_sr_add_reader(sr,files[i]);\n+ while ( bcf_sr_next_line(sr) )\n+ {\n+ for (i=0; i<nfiles; i++)\n+ {\n+ bcf1_t *line = bcf_sr_get_line(sr,i);\n+ ...\n+ }\n+ }\n+ bcf_sr_destroy(sr);\n+*/\n+\n+#ifndef HTSLIB_SYNCED_BCF_READER_H\n+#define HTSLIB_SYNCED_BCF_READER_H\n+\n+#include "hts.h"\n+#include "vcf.h"\n+#include "tbx.h"\n+\n+// How should be treated sites with the same position but different alleles\n+#define COLLAPSE_NONE 0 // require the exact same set of alleles in all files\n+#define COLLAPSE_SNPS 1 // allow different alleles, as long as they all are SNPs\n+#define COLLAPSE_INDELS 2 // the same as above, but with indels\n+#define COLLAPSE_ANY 4 // any combination of alleles can be returned by bcf_sr_next_line()\n+#define COLLAPSE_SOME 8 // at least some of the ALTs must match\n+#define COLLAPSE_BOTH (COLLAPSE_SNPS|COLLAPSE_INDELS)\n+\n+typedef struct _bcf_sr_regions_t\n+{\n+ // for reading from tabix-indexed file (big data)\n+ tbx_t *tbx; // tabix index\n+ hts_itr_t *itr; // tabix iterator\n+ kstring_t line; // holder of the current line, set only when reading from tabix-indexed files\n+ htsFile *file;\n+ char *fname;\n+ int is_bin; // is open in binary mode (tabix access)\n+ char **als; // parsed alleles if targets_als set and _regions_match_alleles called\n+ kstring_t als_str; // block of parsed alleles\n+ int nals, mals; // number of set alleles and the size of allocated array\n+ int als_type; // alleles type, currently VCF_SNP or VCF_INDEL\n+\n+ // user handler to deal with skipped regions without a counterpart in VCFs\n+ void (*missed_reg_handler)(struct _bcf_sr_regions_t *, void *);\n+ void *missed_reg_data;\n+\n+ // for in-memory regions (small data)\n+ struct _region_t *regs; // the regions\n+\n+ // shared by both tabix-index and in-memory regions\n+ void *seq_hash; // keys: sequence names, values: index to seqs\n+ char **seq_names; // sequence names\n+ int nseqs; // number of sequences (chromosomes) i'..b'-to)\n+ * 1: targets is a tabix indexed file with a list of regions\n+ * (<chr,pos> or <chr,from,to>)\n+ *\n+ * Returns 0 if the call succeeded, or -1 on error.\n+ *\n+ * Both functions behave the same way, unlisted positions will be skipped by\n+ * bcf_sr_next_line(). However, there is an important difference: regions use\n+ * index to jump to desired positions while targets streams the whole files\n+ * and merely skip unlisted positions.\n+ *\n+ * Moreover, bcf_sr_set_targets() accepts an optional parameter $alleles which\n+ * is intepreted as a 1-based column index in the tab-delimited file where\n+ * alleles are listed. This in principle enables to perform the COLLAPSE_*\n+ * logic also with tab-delimited files. However, the current implementation\n+ * considers the alleles merely as a suggestion for prioritizing one of possibly\n+ * duplicate VCF lines. It is up to the caller to examine targets->als if\n+ * perfect match is sought after. Note that the duplicate positions in targets\n+ * file are currently not supported.\n+ * Targets (but not regions) can be prefixed with "^" to request logical complement,\n+ * for example "^X,Y,MT" indicates that sequences X, Y and MT should be skipped.\n+ */\n+int bcf_sr_set_targets(bcf_srs_t *readers, const char *targets, int is_file, int alleles);\n+int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file);\n+\n+\n+\n+/*\n+ * bcf_sr_regions_init()\n+ * @regions: regions can be either a comma-separated list of regions\n+ * (chr|chr:pos|chr:from-to|chr:from-) or VCF, BED, or\n+ * tab-delimited file (the default). Uncompressed files\n+ * are stored in memory while bgzip-compressed and tabix-indexed\n+ * region files are streamed.\n+ * @is_file: 0: regions is a comma-separated list of regions\n+ * (chr|chr:pos|chr:from-to|chr:from-)\n+ * 1: VCF, BED or tab-delimited file\n+ * @chr, from, to:\n+ * Column indexes of chromosome, start position and end position\n+ * in the tab-delimited file. The positions are 1-based and\n+ * inclusive.\n+ * These parameters are ignored when reading from VCF, BED or\n+ * tabix-indexed files. When end position column is not present,\n+ * supply \'from\' in place of \'to\'. When \'to\' is negative, first\n+ * abs(to) will be attempted and if that fails, \'from\' will be used\n+ * instead.\n+ */\n+bcf_sr_regions_t *bcf_sr_regions_init(const char *regions, int is_file, int chr, int from, int to);\n+void bcf_sr_regions_destroy(bcf_sr_regions_t *regions);\n+\n+/*\n+ * bcf_sr_regions_seek() - seek to the chromosome block\n+ *\n+ * Returns 0 on success or -1 on failure. Sets reg->seq appropriately and\n+ * reg->start,reg->end to -1.\n+ */\n+int bcf_sr_regions_seek(bcf_sr_regions_t *regions, const char *chr);\n+\n+/*\n+ * bcf_sr_regions_next() - retrieves next region. Returns 0 on success and -1\n+ * when all regions have been read. The fields reg->seq, reg->start and\n+ * reg->end are filled with the genomic coordinates on succes or with\n+ * NULL,-1,-1 when no region is available. The coordinates are 0-based,\n+ * inclusive.\n+ */\n+int bcf_sr_regions_next(bcf_sr_regions_t *reg);\n+\n+/*\n+ * bcf_sr_regions_overlap() - checks if the interval <start,end> overlaps any of\n+ * the regions, the coordinates are 0-based, inclusive. The coordinate queries\n+ * must come in ascending order.\n+ *\n+ * Returns 0 if the position is in regions; -1 if the position is not in the\n+ * regions and more regions exist; -2 if not in the regions and there are no more\n+ * regions left.\n+ */\n+int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, int start, int end);\n+\n+/*\n+ * bcf_sr_regions_flush() - calls repeatedly regs->missed_reg_handler() until\n+ * all remaining records are processed.\n+ */\n+void bcf_sr_regions_flush(bcf_sr_regions_t *regs);\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/tbx.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/tbx.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,77 @@ +/* tbx.h -- tabix API functions. + + Copyright (C) 2009, 2012-2014 Genome Research Ltd. + Copyright (C) 2010, 2012 Broad Institute. + + Author: Heng Li <lh3@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_TBX_H +#define HTSLIB_TBX_H + +#include "hts.h" + +#define TBX_MAX_SHIFT 31 + +#define TBX_GENERIC 0 +#define TBX_SAM 1 +#define TBX_VCF 2 +#define TBX_UCSC 0x10000 + +typedef struct { + int32_t preset; + int32_t sc, bc, ec; // seq col., beg col. and end col. + int32_t meta_char, line_skip; +} tbx_conf_t; + +typedef struct { + tbx_conf_t conf; + hts_idx_t *idx; + void *dict; +} tbx_t; + +extern tbx_conf_t tbx_conf_gff, tbx_conf_bed, tbx_conf_psltbl, tbx_conf_sam, tbx_conf_vcf; + +#ifdef __cplusplus +extern "C" { +#endif + + #define tbx_itr_destroy(iter) hts_itr_destroy(iter) + #define tbx_itr_queryi(tbx, tid, beg, end) hts_itr_query((tbx)->idx, (tid), (beg), (end), tbx_readrec) + #define tbx_itr_querys(tbx, s) hts_itr_querys((tbx)->idx, (s), (hts_name2id_f)(tbx_name2id), (tbx), hts_itr_query, tbx_readrec) + #define tbx_itr_next(htsfp, tbx, itr, r) hts_itr_next(hts_get_bgzfp(htsfp), (itr), (r), (tbx)) + #define tbx_bgzf_itr_next(bgzfp, tbx, itr, r) hts_itr_next((bgzfp), (itr), (r), (tbx)) + + int tbx_name2id(tbx_t *tbx, const char *ss); + + /* Internal helper function used by tbx_itr_next() */ + BGZF *hts_get_bgzfp(htsFile *fp); + int tbx_readrec(BGZF *fp, void *tbxv, void *sv, int *tid, int *beg, int *end); + + int tbx_index_build(const char *fn, int min_shift, const tbx_conf_t *conf); + tbx_t *tbx_index_load(const char *fn); + const char **tbx_seqnames(tbx_t *tbx, int *n); // free the array but not the values + void tbx_destroy(tbx_t *tbx); + +#ifdef __cplusplus +} +#endif + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/vcf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/vcf.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,883 @@\n+/* vcf.h -- VCF/BCF API functions.\n+\n+ Copyright (C) 2012, 2013 Broad Institute.\n+ Copyright (C) 2012-2014 Genome Research Ltd.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+/*\n+ todo:\n+ - make the function names consistent\n+ - provide calls to abstract away structs as much as possible\n+ */\n+\n+#ifndef HTSLIB_VCF_H\n+#define HTSLIB_VCF_H\n+\n+#include <stdint.h>\n+#include <limits.h>\n+#include <assert.h>\n+#include "hts.h"\n+#include "kstring.h"\n+\n+\n+/*****************\n+ * Header struct *\n+ *****************/\n+\n+#define BCF_HL_FLT 0 // header line\n+#define BCF_HL_INFO 1\n+#define BCF_HL_FMT 2\n+#define BCF_HL_CTG 3\n+#define BCF_HL_STR 4 // structured header line TAG=<A=..,B=..>\n+#define BCF_HL_GEN 5 // generic header line\n+\n+#define BCF_HT_FLAG 0 // header type\n+#define BCF_HT_INT 1\n+#define BCF_HT_REAL 2\n+#define BCF_HT_STR 3\n+\n+#define BCF_VL_FIXED 0 // variable length\n+#define BCF_VL_VAR 1\n+#define BCF_VL_A 2\n+#define BCF_VL_G 3\n+#define BCF_VL_R 4\n+\n+/* === Dictionary ===\n+\n+ The header keeps three dictonaries. The first keeps IDs in the\n+ "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths\n+ in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[]\n+ is the actual hash table, which is opaque to the end users. In the hash\n+ table, the key is the ID or sample name as a C string and the value is a\n+ bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash\n+ table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the\n+ size of the hash table or, equivalently, the length of the id[] arrays.\n+*/\n+\n+#define BCF_DT_ID 0 // dictionary type\n+#define BCF_DT_CTG 1\n+#define BCF_DT_SAMPLE 2\n+\n+// Complete textual representation of a header line\n+typedef struct {\n+ int type; // One of the BCF_HL_* type\n+ char *key; // The part before \'=\', i.e. FILTER/INFO/FORMAT/contig/fileformat etc.\n+ char *value; // Set only for generic lines, NULL for FILTER/INFO, etc.\n+ int nkeys; // Number of structured fields\n+ char **keys, **vals; // The key=value pairs\n+} bcf_hrec_t;\n+\n+typedef struct {\n+ uint32_t info[3]; // stores Number:20, var:4, Type:4, ColType:4 in info[0..2]\n+ // for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG\n+ bcf_hrec_t *hrec[3];\n+ int id;\n+} bcf_idinfo_t;\n+\n+typedef struct {\n+ const char *key;\n+ const bcf_idinfo_t *val;\n+} bcf_idpair_t;\n+\n+typedef struct {\n+ int32_t n[3];\n+ bcf_idpair_t *id[3];\n+ void *dict[3]; // ID dictionary, contig dict and sample dict\n+ char **samples;\n+ bcf_hrec_t **hrec;\n+ int nhrec, dirty;\n+ int ntransl, *transl[2]; // for bcf_translate()\n+ int nsamples_ori; // for bcf_hdr_set_samples()\n+ uint8_t *keep_samples;\n+ kstring_t mem;\n+} bcf_hdr_t;\n+\n+extern uint8_t bcf_type_shift[];\n+\n+/'..b'IN\n+#define bcf_int16_missing INT16_MIN\n+#define bcf_int32_missing INT32_MIN\n+#define bcf_str_missing 0x07\n+extern uint32_t bcf_float_vector_end;\n+extern uint32_t bcf_float_missing;\n+static inline void bcf_float_set(float *ptr, uint32_t value)\n+{\n+ union { uint32_t i; float f; } u;\n+ u.i = value;\n+ *ptr = u.f;\n+}\n+#define bcf_float_set_vector_end(x) bcf_float_set(&(x),bcf_float_vector_end)\n+#define bcf_float_set_missing(x) bcf_float_set(&(x),bcf_float_missing)\n+static inline int bcf_float_is_missing(float f)\n+{\n+ union { uint32_t i; float f; } u;\n+ u.f = f;\n+ return u.i==bcf_float_missing ? 1 : 0;\n+}\n+static inline int bcf_float_is_vector_end(float f)\n+{\n+ union { uint32_t i; float f; } u;\n+ u.f = f;\n+ return u.i==bcf_float_vector_end ? 1 : 0;\n+}\n+\n+static inline void bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)\n+{\n+ #define BRANCH(type_t, missing, vector_end) { \\\n+ type_t *ptr = (type_t*) (fmt->p + isample*fmt->size); \\\n+ int i; \\\n+ for (i=0; i<fmt->n && ptr[i]!=vector_end; i++) \\\n+ { \\\n+ if ( i ) kputc("/|"[ptr[i]&1], str); \\\n+ if ( !(ptr[i]>>1) ) kputc(\'.\', str); \\\n+ else kputw((ptr[i]>>1) - 1, str); \\\n+ } \\\n+ if (i == 0) kputc(\'.\', str); \\\n+ }\n+ switch (fmt->type) {\n+ case BCF_BT_INT8: BRANCH(int8_t, bcf_int8_missing, bcf_int8_vector_end); break;\n+ case BCF_BT_INT16: BRANCH(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;\n+ case BCF_BT_INT32: BRANCH(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;\n+ default: fprintf(stderr,"FIXME: type %d in bcf_format_gt?\\n", fmt->type); abort(); break;\n+ }\n+ #undef BRANCH\n+}\n+\n+static inline void bcf_enc_size(kstring_t *s, int size, int type)\n+{\n+ if (size >= 15) {\n+ kputc(15<<4|type, s);\n+ if (size >= 128) {\n+ if (size >= 32768) {\n+ int32_t x = size;\n+ kputc(1<<4|BCF_BT_INT32, s);\n+ kputsn((char*)&x, 4, s);\n+ } else {\n+ int16_t x = size;\n+ kputc(1<<4|BCF_BT_INT16, s);\n+ kputsn((char*)&x, 2, s);\n+ }\n+ } else {\n+ kputc(1<<4|BCF_BT_INT8, s);\n+ kputc(size, s);\n+ }\n+ } else kputc(size<<4|type, s);\n+}\n+\n+static inline int bcf_enc_inttype(long x)\n+{\n+ if (x <= INT8_MAX && x > bcf_int8_missing) return BCF_BT_INT8;\n+ if (x <= INT16_MAX && x > bcf_int16_missing) return BCF_BT_INT16;\n+ return BCF_BT_INT32;\n+}\n+\n+static inline void bcf_enc_int1(kstring_t *s, int32_t x)\n+{\n+ if (x == bcf_int32_vector_end) {\n+ bcf_enc_size(s, 1, BCF_BT_INT8);\n+ kputc(bcf_int8_vector_end, s);\n+ } else if (x == bcf_int32_missing) {\n+ bcf_enc_size(s, 1, BCF_BT_INT8);\n+ kputc(bcf_int8_missing, s);\n+ } else if (x <= INT8_MAX && x > bcf_int8_missing) {\n+ bcf_enc_size(s, 1, BCF_BT_INT8);\n+ kputc(x, s);\n+ } else if (x <= INT16_MAX && x > bcf_int16_missing) {\n+ int16_t z = x;\n+ bcf_enc_size(s, 1, BCF_BT_INT16);\n+ kputsn((char*)&z, 2, s);\n+ } else {\n+ int32_t z = x;\n+ bcf_enc_size(s, 1, BCF_BT_INT32);\n+ kputsn((char*)&z, 4, s);\n+ }\n+}\n+\n+static inline int32_t bcf_dec_int1(const uint8_t *p, int type, uint8_t **q)\n+{\n+ if (type == BCF_BT_INT8) {\n+ *q = (uint8_t*)p + 1;\n+ return *(int8_t*)p;\n+ } else if (type == BCF_BT_INT16) {\n+ *q = (uint8_t*)p + 2;\n+ return *(int16_t*)p;\n+ } else {\n+ *q = (uint8_t*)p + 4;\n+ return *(int32_t*)p;\n+ }\n+}\n+\n+static inline int32_t bcf_dec_typed_int1(const uint8_t *p, uint8_t **q)\n+{\n+ return bcf_dec_int1(p + 1, *p&0xf, q);\n+}\n+\n+static inline int32_t bcf_dec_size(const uint8_t *p, uint8_t **q, int *type)\n+{\n+ *type = *p & 0xf;\n+ if (*p>>4 != 15) {\n+ *q = (uint8_t*)p + 1;\n+ return *p>>4;\n+ } else return bcf_dec_typed_int1(p + 1, q);\n+}\n+\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/vcf_sweep.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/vcf_sweep.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,39 @@ +/* vcf_sweep.h -- forward/reverse sweep API. + + Copyright (C) 2013 Genome Research Ltd. + + Author: Petr Danecek <pd3@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_VCF_SWEEP_H +#define HTSLIB_VCF_SWEEP_H + +#include "hts.h" +#include "vcf.h" + +typedef struct _bcf_sweep_t bcf_sweep_t; + +bcf_sweep_t *bcf_sweep_init(const char *fname); +void bcf_sweep_destroy(bcf_sweep_t *sw); +bcf_hdr_t *bcf_sweep_hdr(bcf_sweep_t *sw); +bcf1_t *bcf_sweep_fwd(bcf_sweep_t *sw); +bcf1_t *bcf_sweep_bwd(bcf_sweep_t *sw); + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib/vcfutils.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib/vcfutils.h Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,112 @@ +/* vcfutils.h -- allele-related utility functions. + + Copyright (C) 2012, 2013 Genome Research Ltd. + + Author: Petr Danecek <pd3@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#ifndef HTSLIB_VCFUTILS_H +#define HTSLIB_VCFUTILS_H + +#include "vcf.h" + + +/** + * bcf_trim_alleles() - remove ALT alleles unused in genotype fields + * @header: for access to BCF_DT_ID dictionary + * @line: VCF line obtain from vcf_parse1 + * + * Returns the number of removed alleles on success or negative + * on error: + * -1 .. some allele index is out of bounds + */ +int bcf_trim_alleles(const bcf_hdr_t *header, bcf1_t *line); + + +/** + * bcf_remove_alleles() - remove ALT alleles according to bitmask @mask + * @header: for access to BCF_DT_ID dictionary + * @line: VCF line obtained from vcf_parse1 + * @mask: alleles to remove + */ +void bcf_remove_alleles(const bcf_hdr_t *header, bcf1_t *line, int mask); + + +/** + * bcf_calc_ac() - calculate the number of REF and ALT alleles + * @header: for access to BCF_DT_ID dictionary + * @line: VCF line obtained from vcf_parse1 + * @ac: array of length line->n_allele + * @which: determine if INFO/AN,AC and indv fields be used + * + * Returns 1 if the call succeeded, or 0 if the value could not + * be determined. + * + * The value of @which determines if existing INFO/AC,AN can be + * used (BCF_UN_INFO) and and if indv fields can be splitted + * (BCF_UN_FMT). + */ +int bcf_calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which); + + +/** + * bcf_gt_type() - determines type of the genotype + * @fmt_ptr: the GT format field as set for example by set_fmt_ptr + * @isample: sample index (starting from 0) + * @ial: index of the 1st non-reference allele (starting from 1) + * @jal: index of the 2nd non-reference allele (starting from 1) + * + * Returns the type of the genotype (one of GT_HOM_RR, GT_HET_RA, + * GT_HOM_AA, GT_HET_AA, GT_HAPL_R, GT_HAPL_A or GT_UNKN). If $ial + * is not NULL and the genotype has one or more non-reference + * alleles, $ial will be set. In case of GT_HET_AA, $ial is the + * position of the allele which appeared first in ALT. If $jal is + * not null and the genotype is GT_HET_AA, $jal will be set and is + * the position of the second allele in ALT. + */ +#define GT_HOM_RR 0 // note: the actual value of GT_* matters, used in dosage r2 calculation +#define GT_HOM_AA 1 +#define GT_HET_RA 2 +#define GT_HET_AA 3 +#define GT_HAPL_R 4 +#define GT_HAPL_A 5 +#define GT_UNKN 6 +int bcf_gt_type(bcf_fmt_t *fmt_ptr, int isample, int *ial, int *jal); + +static inline int bcf_acgt2int(char c) +{ + if ( (int)c>96 ) c -= 32; + if ( c=='A' ) return 0; + if ( c=='C' ) return 1; + if ( c=='G' ) return 2; + if ( c=='T' ) return 3; + return -1; +} +#define bcf_int2acgt(i) "ACGT"[i] + +/** + * bcf_ij2G() - common task: allele indexes to Number=G index (diploid) + * @i,j: allele indexes, 0-based, i<=j + * + * Returns index to the Number=G diploid array + */ +#define bcf_ij2G(i, j) ((j)*((j)+1)/2+(i)) + +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/htslib_vars.mk --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/htslib_vars.mk Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,39 @@ +# Makefile variables useful for third-party code using htslib's public API. +# +# Copyright (C) 2013-2014 Genome Research Ltd. +# +# Author: John Marshall <jm18@sanger.ac.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# These variables can be used to express dependencies on htslib headers. +# See htslib.mk for details. + +htslib_bgzf_h = $(HTSPREFIX)htslib/bgzf.h +htslib_faidx_h = $(HTSPREFIX)htslib/faidx.h +htslib_hfile_h = $(HTSPREFIX)htslib/hfile.h $(htslib_hts_defs_h) +htslib_hts_h = $(HTSPREFIX)htslib/hts.h +htslib_hts_defs_h = $(HTSPREFIX)htslib/hts_defs.h +htslib_regidx_h = $(HTSPREFIX)htslib/regidx.h +htslib_sam_h = $(HTSPREFIX)htslib/sam.h $(htslib_hts_h) +htslib_synced_bcf_reader_h = $(HTSPREFIX)htslib/synced_bcf_reader.h $(htslib_hts_h) $(htslib_vcf_h) $(htslib_tbx_h) +htslib_tbx_h = $(HTSPREFIX)htslib/tbx.h $(htslib_hts_h) +htslib_vcf_h = $(HTSPREFIX)htslib/vcf.h $(htslib_hts_h) $(HTSPREFIX)htslib/kstring.h +htslib_vcf_sweep_h = $(HTSPREFIX)htslib/vcf_sweep.h $(htslib_hts_h) $(htslib_vcf_h) +htslib_vcfutils_h = $(HTSPREFIX)htslib/vcfutils.h $(htslib_vcf_h) |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/kfunc.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/kfunc.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,280 @@\n+/* The MIT License\n+\n+ Copyright (C) 2010, 2013 Genome Research Ltd.\n+ Copyright (C) 2011 Attractive Chaos <attractor@live.co.uk>\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+#include <math.h>\n+#include <stdlib.h>\n+#include "htslib/kfunc.h"\n+\n+/* Log gamma function\n+ * \\log{\\Gamma(z)}\n+ * AS245, 2nd algorithm, http://lib.stat.cmu.edu/apstat/245\n+ */\n+double kf_lgamma(double z)\n+{\n+\tdouble x = 0;\n+\tx += 0.1659470187408462e-06 / (z+7);\n+\tx += 0.9934937113930748e-05 / (z+6);\n+\tx -= 0.1385710331296526 / (z+5);\n+\tx += 12.50734324009056 / (z+4);\n+\tx -= 176.6150291498386 / (z+3);\n+\tx += 771.3234287757674 / (z+2);\n+\tx -= 1259.139216722289 / (z+1);\n+\tx += 676.5203681218835 / z;\n+\tx += 0.9999999999995183;\n+\treturn log(x) - 5.58106146679532777 - z + (z-0.5) * log(z+6.5);\n+}\n+\n+/* complementary error function\n+ * \\frac{2}{\\sqrt{\\pi}} \\int_x^{\\infty} e^{-t^2} dt\n+ * AS66, 2nd algorithm, http://lib.stat.cmu.edu/apstat/66\n+ */\n+double kf_erfc(double x)\n+{\n+\tconst double p0 = 220.2068679123761;\n+\tconst double p1 = 221.2135961699311;\n+\tconst double p2 = 112.0792914978709;\n+\tconst double p3 = 33.912866078383;\n+\tconst double p4 = 6.37396220353165;\n+\tconst double p5 = .7003830644436881;\n+\tconst double p6 = .03526249659989109;\n+\tconst double q0 = 440.4137358247522;\n+\tconst double q1 = 793.8265125199484;\n+\tconst double q2 = 637.3336333788311;\n+\tconst double q3 = 296.5642487796737;\n+\tconst double q4 = 86.78073220294608;\n+\tconst double q5 = 16.06417757920695;\n+\tconst double q6 = 1.755667163182642;\n+\tconst double q7 = .08838834764831844;\n+\tdouble expntl, z, p;\n+\tz = fabs(x) * M_SQRT2;\n+\tif (z > 37.) return x > 0.? 0. : 2.;\n+\texpntl = exp(z * z * - .5);\n+\tif (z < 10. / M_SQRT2) // for small z\n+\t p = expntl * ((((((p6 * z + p5) * z + p4) * z + p3) * z + p2) * z + p1) * z + p0)\n+\t\t\t/ (((((((q7 * z + q6) * z + q5) * z + q4) * z + q3) * z + q2) * z + q1) * z + q0);\n+\telse p = expntl / 2.506628274631001 / (z + 1. / (z + 2. / (z + 3. / (z + 4. / (z + .65)))));\n+\treturn x > 0.? 2. * p : 2. * (1. - p);\n+}\n+\n+/* The following computes regularized incomplete gamma functions.\n+ * Formulas are taken from Wiki, with additional input from Numerical\n+ * Recipes in C (for modified Lentz\'s algorithm) and AS245\n+ * (http://lib.stat.cmu.edu/apstat/245).\n+ *\n+ * A good online calculator is available at:\n+ *\n+ * http://www.danielsoper.com/statcalc/calc23.aspx\n+ *\n+ * It calculates upper incomplete gamma function, which equals\n+ * kf_gammaq(s,z)*tgamma(s).\n+ */\n+\n+#define KF_GAMMA_EPS 1e-14\n+#define KF_TINY 1e-290\n+\n+// regularized lower incomplete gamma function, by series expansion\n+static double _kf_gammap(double s, double z)\n+{\n+\tdouble sum, x;\n+\tint k;\n+\tfor (k = 1, sum = x = 1.; k < 100; ++k) {\n+\t\tsum += (x *= z / (s + k));\n+\t\tif (x / sum < KF_GAMMA_EPS) break;\n+\t}\n+\treturn exp(s * log(z) - z - kf_lgamma(s + 1.) + log(sum));\n+}\n+// regularized upper incomplete gamma function'..b'? -(a + m) * (a + b + m) * x / ((a + 2*m) * (a + 2*m + 1))\n+\t\t\t: m * (b - m) * x / ((a + 2*m - 1) * (a + 2*m));\n+\t\tD = 1. + aa * D;\n+\t\tif (D < KF_TINY) D = KF_TINY;\n+\t\tC = 1. + aa / C;\n+\t\tif (C < KF_TINY) C = KF_TINY;\n+\t\tD = 1. / D;\n+\t\td = C * D;\n+\t\tf *= d;\n+\t\tif (fabs(d - 1.) < KF_GAMMA_EPS) break;\n+\t}\n+\treturn exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b) + a * log(x) + b * log(1.-x)) / a / f;\n+}\n+double kf_betai(double a, double b, double x)\n+{\n+\treturn x < (a + 1.) / (a + b + 2.)? kf_betai_aux(a, b, x) : 1. - kf_betai_aux(b, a, 1. - x);\n+}\n+\n+#ifdef KF_MAIN\n+#include <stdio.h>\n+int main(int argc, char *argv[])\n+{\n+\tdouble x = 5.5, y = 3;\n+\tdouble a, b;\n+\tprintf("erfc(%lg): %lg, %lg\\n", x, erfc(x), kf_erfc(x));\n+\tprintf("upper-gamma(%lg,%lg): %lg\\n", x, y, kf_gammaq(y, x)*tgamma(y));\n+\ta = 2; b = 2; x = 0.5;\n+\tprintf("incomplete-beta(%lg,%lg,%lg): %lg\\n", a, b, x, kf_betai(a, b, x) / exp(kf_lgamma(a+b) - kf_lgamma(a) - kf_lgamma(b)));\n+\treturn 0;\n+}\n+#endif\n+\n+\n+// log\\binom{n}{k}\n+static double lbinom(int n, int k)\n+{\n+ if (k == 0 || n == k) return 0;\n+ return lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);\n+}\n+\n+// n11 n12 | n1_\n+// n21 n22 | n2_\n+//-----------+----\n+// n_1 n_2 | n\n+\n+// hypergeometric distribution\n+static double hypergeo(int n11, int n1_, int n_1, int n)\n+{\n+ return exp(lbinom(n1_, n11) + lbinom(n-n1_, n_1-n11) - lbinom(n, n_1));\n+}\n+\n+typedef struct {\n+ int n11, n1_, n_1, n;\n+ double p;\n+} hgacc_t;\n+\n+// incremental version of hypergenometric distribution\n+static double hypergeo_acc(int n11, int n1_, int n_1, int n, hgacc_t *aux)\n+{\n+ if (n1_ || n_1 || n) {\n+ aux->n11 = n11; aux->n1_ = n1_; aux->n_1 = n_1; aux->n = n;\n+ } else { // then only n11 changed; the rest fixed\n+ if (n11%11 && n11 + aux->n - aux->n1_ - aux->n_1) {\n+ if (n11 == aux->n11 + 1) { // incremental\n+ aux->p *= (double)(aux->n1_ - aux->n11) / n11\n+ * (aux->n_1 - aux->n11) / (n11 + aux->n - aux->n1_ - aux->n_1);\n+ aux->n11 = n11;\n+ return aux->p;\n+ }\n+ if (n11 == aux->n11 - 1) { // incremental\n+ aux->p *= (double)aux->n11 / (aux->n1_ - n11)\n+ * (aux->n11 + aux->n - aux->n1_ - aux->n_1) / (aux->n_1 - n11);\n+ aux->n11 = n11;\n+ return aux->p;\n+ }\n+ }\n+ aux->n11 = n11;\n+ }\n+ aux->p = hypergeo(aux->n11, aux->n1_, aux->n_1, aux->n);\n+ return aux->p;\n+}\n+\n+double kt_fisher_exact(int n11, int n12, int n21, int n22, double *_left, double *_right, double *two)\n+{\n+ int i, j, max, min;\n+ double p, q, left, right;\n+ hgacc_t aux;\n+ int n1_, n_1, n;\n+\n+ n1_ = n11 + n12; n_1 = n11 + n21; n = n11 + n12 + n21 + n22; // calculate n1_, n_1 and n\n+ max = (n_1 < n1_) ? n_1 : n1_; // max n11, for right tail\n+ min = n1_ + n_1 - n; // not sure why n11-n22 is used instead of min(n_1,n1_)\n+ if (min < 0) min = 0; // min n11, for left tail\n+ *two = *_left = *_right = 1.;\n+ if (min == max) return 1.; // no need to do test\n+ q = hypergeo_acc(n11, n1_, n_1, n, &aux); // the probability of the current table\n+ // left tail\n+ p = hypergeo_acc(min, 0, 0, 0, &aux);\n+ for (left = 0., i = min + 1; p < 0.99999999 * q && i<=max; ++i) // loop until underflow\n+ left += p, p = hypergeo_acc(i, 0, 0, 0, &aux);\n+ --i;\n+ if (p < 1.00000001 * q) left += p;\n+ else --i;\n+ // right tail\n+ p = hypergeo_acc(max, 0, 0, 0, &aux);\n+ for (right = 0., j = max - 1; p < 0.99999999 * q && j>=0; --j) // loop until underflow\n+ right += p, p = hypergeo_acc(j, 0, 0, 0, &aux);\n+ ++j;\n+ if (p < 1.00000001 * q) right += p;\n+ else ++j;\n+ // two-tail\n+ *two = left + right;\n+ if (*two > 1.) *two = 1.;\n+ // adjust left and right\n+ if (abs(i - n11) < abs(j - n11)) right = 1. - left + q;\n+ else left = 1.0 - right + q;\n+ *_left = left; *_right = right;\n+ return q;\n+}\n+\n+\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/knetfile.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/knetfile.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,632 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 by Genome Research Ltd (GRL).\n+ 2010 by Attractive Chaos <attractor@live.co.uk>\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Probably I will not do socket programming in the next few years and\n+ therefore I decide to heavily annotate this file, for Linux and\n+ Windows as well. -ac */\n+\n+#include <time.h>\n+#include <stdio.h>\n+#include <ctype.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <errno.h>\n+#include <unistd.h>\n+#include <sys/types.h>\n+\n+#ifndef _WIN32\n+#include <netdb.h>\n+#include <arpa/inet.h>\n+#include <sys/socket.h>\n+#endif\n+\n+#include "htslib/knetfile.h"\n+\n+/* In winsock.h, the type of a socket is SOCKET, which is: "typedef\n+ * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed\n+ * integer -1. In knetfile.c, I use "int" for socket type\n+ * throughout. This should be improved to avoid confusion.\n+ *\n+ * In Linux/Mac, recv() and read() do almost the same thing. You can see\n+ * in the header file that netread() is simply an alias of read(). In\n+ * Windows, however, they are different and using recv() is mandatory.\n+ */\n+\n+/* This function tests if the file handler is ready for reading (or\n+ * writing if is_read==0). */\n+static int socket_wait(int fd, int is_read)\n+{\n+\tfd_set fds, *fdr = 0, *fdw = 0;\n+\tstruct timeval tv;\n+\tint ret;\n+\ttv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out\n+\tFD_ZERO(&fds);\n+\tFD_SET(fd, &fds);\n+\tif (is_read) fdr = &fds;\n+\telse fdw = &fds;\n+\tret = select(fd+1, fdr, fdw, 0, &tv);\n+#ifndef _WIN32\n+\tif (ret == -1) perror("select");\n+#else\n+\tif (ret == 0)\n+\t\tfprintf(stderr, "select time-out\\n");\n+\telse if (ret == SOCKET_ERROR)\n+\t\tfprintf(stderr, "select: %d\\n", WSAGetLastError());\n+#endif\n+\treturn ret;\n+}\n+\n+#ifndef _WIN32\n+/* This function does not work with Windows due to the lack of\n+ * getaddrinfo() in winsock. It is addapted from an example in "Beej\'s\n+ * Guide to Network Programming" (http://beej.us/guide/bgnet/). */\n+static int socket_connect(const char *host, const char *port)\n+{\n+#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)\n+\n+\tint ai_err, on = 1, fd;\n+\tstruct linger lng = { 0, 0 };\n+\tstruct addrinfo hints, *res = 0;\n+\tmemset(&hints, 0, sizeof(struct addrinfo));\n+\thints.ai_family = AF_UNSPEC;\n+\thints.ai_socktype = SOCK_STREAM;\n+\t/* In Unix/Mac, getaddrinfo() is the most convenient way to get\n+\t * server information. */\n+\tif ((ai_err = getaddrinfo(host, port, &hints, &res)) != 0) { fprintf(stderr, "can\'t resolve %s:%s: %s\\n", host, port, gai_strerror(ai_err)); return -1; }\n+\tif ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");\n+\t/* The following two setsockopt() are used by ftplib\n+\t * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they\n+\t * necessary. */\n+\tif (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockop'..b'1, sizeof(knetFile));\n+\t\tfp->type = KNF_TYPE_LOCAL;\n+\t\tfp->fd = fd;\n+\t\tfp->ctrl_fd = -1;\n+\t}\n+\tif (fp && fp->fd == -1) {\n+\t\tknet_close(fp);\n+\t\treturn 0;\n+\t}\n+\treturn fp;\n+}\n+\n+knetFile *knet_dopen(int fd, const char *mode)\n+{\n+\tknetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));\n+\tfp->type = KNF_TYPE_LOCAL;\n+\tfp->fd = fd;\n+\treturn fp;\n+}\n+\n+ssize_t knet_read(knetFile *fp, void *buf, size_t len)\n+{\n+\toff_t l = 0;\n+\tif (fp->fd == -1) return 0;\n+\tif (fp->type == KNF_TYPE_FTP) {\n+\t\tif (fp->is_ready == 0) {\n+\t\t\tif (!fp->no_reconnect) kftp_reconnect(fp);\n+\t\t\tkftp_connect_file(fp);\n+\t\t}\n+\t} else if (fp->type == KNF_TYPE_HTTP) {\n+\t\tif (fp->is_ready == 0)\n+\t\t\tkhttp_connect_file(fp);\n+\t}\n+\tif (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX\n+\t\tsize_t rest = len;\n+\t\tssize_t curr;\n+\t\twhile (rest) {\n+\t\t\tdo {\n+\t\t\t\tcurr = read(fp->fd, (void*)((char*)buf + l), rest);\n+\t\t\t} while (curr < 0 && EINTR == errno);\n+\t\t\tif (curr < 0) return -1;\n+\t\t\tif (curr == 0) break;\n+\t\t\tl += curr; rest -= curr;\n+\t\t}\n+\t} else l = my_netread(fp->fd, buf, len);\n+\tfp->offset += l;\n+\treturn l;\n+}\n+\n+off_t knet_seek(knetFile *fp, off_t off, int whence)\n+{\n+\tif (whence == SEEK_SET && off == fp->offset) return 0;\n+\tif (fp->type == KNF_TYPE_LOCAL) {\n+\t\t/* Be aware that lseek() returns the offset after seeking, while fseek() returns zero on success. */\n+\t\toff_t offset = lseek(fp->fd, off, whence);\n+\t\tif (offset == -1) return -1;\n+\t\tfp->offset = offset;\n+\t\treturn fp->offset;\n+\t} else if (fp->type == KNF_TYPE_FTP) {\n+\t\tif (whence == SEEK_CUR) fp->offset += off;\n+\t\telse if (whence == SEEK_SET) fp->offset = off;\n+\t\telse if (whence == SEEK_END) fp->offset = fp->file_size + off;\n+\t\telse return -1;\n+\t\tfp->is_ready = 0;\n+\t\treturn fp->offset;\n+\t} else if (fp->type == KNF_TYPE_HTTP) {\n+\t\tif (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?\n+\t\t\tfprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\\n");\n+\t\t\terrno = ESPIPE;\n+\t\t\treturn -1;\n+\t\t}\n+\t\tif (whence == SEEK_CUR) fp->offset += off;\n+\t\telse if (whence == SEEK_SET) fp->offset = off;\n+\t\telse return -1;\n+\t\tfp->is_ready = 0;\n+\t\treturn fp->offset;\n+\t}\n+\terrno = EINVAL;\n+\tfprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n+\treturn -1;\n+}\n+\n+int knet_close(knetFile *fp)\n+{\n+\tif (fp == 0) return 0;\n+\tif (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific\n+\tif (fp->fd != -1) {\n+\t\t/* On Linux/Mac, netclose() is an alias of close(), but on\n+\t\t * Windows, it is an alias of closesocket(). */\n+\t\tif (fp->type == KNF_TYPE_LOCAL) close(fp->fd);\n+\t\telse netclose(fp->fd);\n+\t}\n+\tfree(fp->host); free(fp->port);\n+\tfree(fp->response); free(fp->retr); // FTP specific\n+\tfree(fp->path); free(fp->http_host); // HTTP specific\n+\tfree(fp);\n+\treturn 0;\n+}\n+\n+#ifdef KNETFILE_MAIN\n+int main(void)\n+{\n+\tchar *buf;\n+\tknetFile *fp;\n+\tint type = 4, l;\n+#ifdef _WIN32\n+\tknet_win32_init();\n+#endif\n+\tbuf = calloc(0x100000, 1);\n+\tif (type == 0) {\n+\t\tfp = knet_open("knetfile.c", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 1) { // NCBI FTP, large file\n+\t\tfp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");\n+\t\tknet_seek(fp, 2500000000ll, SEEK_SET);\n+\t\tl = knet_read(fp, buf, 255);\n+\t} else if (type == 2) {\n+\t\tfp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 3) {\n+\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 4) {\n+\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");\n+\t\tknet_read(fp, buf, 10000);\n+\t\tknet_seek(fp, 20000, SEEK_SET);\n+\t\tknet_seek(fp, 10000, SEEK_SET);\n+\t\tl = knet_read(fp, buf+10000, 10000000) + 10000;\n+\t}\n+\tif (type != 4 && type != 1) {\n+\t\tknet_read(fp, buf, 255);\n+\t\tbuf[255] = 0;\n+\t\tprintf("%s\\n", buf);\n+\t} else write(fileno(stdout), buf, l);\n+\tknet_close(fp);\n+\tfree(buf);\n+\treturn 0;\n+}\n+#endif\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/kstring.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/kstring.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,254 @@ +/* The MIT License + + Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#include <stdarg.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <stdint.h> +#include "htslib/kstring.h" + +int kvsprintf(kstring_t *s, const char *fmt, va_list ap) +{ + va_list args; + int l; + va_copy(args, ap); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); // This line does not work with glibc 2.0. See `man snprintf'. + va_end(args); + if (l + 1 > s->m - s->l) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + va_copy(args, ap); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, args); + va_end(args); + } + s->l += l; + return l; +} + +int ksprintf(kstring_t *s, const char *fmt, ...) +{ + va_list ap; + int l; + va_start(ap, fmt); + l = kvsprintf(s, fmt, ap); + va_end(ap); + return l; +} + +char *kstrtok(const char *str, const char *sep, ks_tokaux_t *aux) +{ + const char *p, *start; + if (sep) { // set up the table + if (str == 0 && (aux->tab[0]&1)) return 0; // no need to set up if we have finished + aux->finished = 0; + if (sep[1]) { + aux->sep = -1; + aux->tab[0] = aux->tab[1] = aux->tab[2] = aux->tab[3] = 0; + for (p = sep; *p; ++p) aux->tab[*p>>6] |= 1ull<<(*p&0x3f); + } else aux->sep = sep[0]; + } + if (aux->finished) return 0; + else if (str) aux->p = str - 1, aux->finished = 0; + if (aux->sep < 0) { + for (p = start = aux->p + 1; *p; ++p) + if (aux->tab[*p>>6]>>(*p&0x3f)&1) break; + } else { + for (p = start = aux->p + 1; *p; ++p) + if (*p == aux->sep) break; + } + aux->p = p; // end of token + if (*p == 0) aux->finished = 1; // no more tokens + return (char*)start; +} + +// s MUST BE a null terminated string; l = strlen(s) +int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) +{ + int i, n, max, last_char, last_start, *offsets, l; + n = 0; max = *_max; offsets = *_offsets; + l = strlen(s); + +#define __ksplit_aux do { \ + if (_offsets) { \ + s[i] = 0; \ + if (n == max) { \ + int *tmp; \ + max = max? max<<1 : 2; \ + if ((tmp = (int*)realloc(offsets, sizeof(int) * max))) { \ + offsets = tmp; \ + } else { \ + free(offsets); \ + *_offsets = NULL; \ + return 0; \ + } \ + } \ + offsets[n++] = last_start; \ + } else ++n; \ + } while (0) + + for (i = 0, last_char = last_start = 0; i <= l; ++i) { + if (delimiter == 0) { + if (isspace(s[i]) || s[i] == 0) { + if (isgraph(last_char)) __ksplit_aux; // the end of a field + } else { + if (isspace(last_char) || last_char == 0) last_start = i; + } + } else { + if (s[i] == delimiter || s[i] == 0) { + if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field + } else { + if (last_char == delimiter || last_char == 0) last_start = i; + } + } + last_char = s[i]; + } + *_max = max; *_offsets = offsets; + return n; +} + +/********************** + * Boyer-Moore search * + **********************/ + +typedef unsigned char ubyte_t; + +// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html +static int *ksBM_prep(const ubyte_t *pat, int m) +{ + int i, *suff, *prep, *bmGs, *bmBc; + prep = (int*)calloc(m + 256, sizeof(int)); + bmGs = prep; bmBc = prep + m; + { // preBmBc() + for (i = 0; i < 256; ++i) bmBc[i] = m; + for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; + } + suff = (int*)calloc(m, sizeof(int)); + { // suffixes() + int f = 0, g; + suff[m - 1] = m; + g = m - 1; + for (i = m - 2; i >= 0; --i) { + if (i > g && suff[i + m - 1 - f] < i - g) + suff[i] = suff[i + m - 1 - f]; + else { + if (i < g) g = i; + f = i; + while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; + suff[i] = f - g; + } + } + } + { // preBmGs() + int j = 0; + for (i = 0; i < m; ++i) bmGs[i] = m; + for (i = m - 1; i >= 0; --i) + if (suff[i] == i + 1) + for (; j < m - 1 - i; ++j) + if (bmGs[j] == m) + bmGs[j] = m - 1 - i; + for (i = 0; i <= m - 2; ++i) + bmGs[m - 1 - suff[i]] = m - 1 - i; + } + free(suff); + return prep; +} + +void *kmemmem(const void *_str, int n, const void *_pat, int m, int **_prep) +{ + int i, j, *prep = 0, *bmGs, *bmBc; + const ubyte_t *str, *pat; + str = (const ubyte_t*)_str; pat = (const ubyte_t*)_pat; + prep = (_prep == 0 || *_prep == 0)? ksBM_prep(pat, m) : *_prep; + if (_prep && *_prep == 0) *_prep = prep; + bmGs = prep; bmBc = prep + m; + j = 0; + while (j <= n - m) { + for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); + if (i >= 0) { + int max = bmBc[str[i+j]] - m + 1 + i; + if (max < bmGs[i]) max = bmGs[i]; + j += max; + } else return (void*)(str + j); + } + if (_prep == 0) free(prep); + return 0; +} + +char *kstrstr(const char *str, const char *pat, int **_prep) +{ + return (char*)kmemmem(str, strlen(str), pat, strlen(pat), _prep); +} + +char *kstrnstr(const char *str, const char *pat, int n, int **_prep) +{ + return (char*)kmemmem(str, n, pat, strlen(pat), _prep); +} + +/*********************** + * The main() function * + ***********************/ + +#ifdef KSTRING_MAIN +#include <stdio.h> +int main() +{ + kstring_t *s; + int *fields, n, i; + ks_tokaux_t aux; + char *p; + s = (kstring_t*)calloc(1, sizeof(kstring_t)); + // test ksprintf() + ksprintf(s, " abcdefg: %d ", 100); + printf("'%s'\n", s->s); + // test ksplit() + fields = ksplit(s, 0, &n); + for (i = 0; i < n; ++i) + printf("field[%d] = '%s'\n", i, s->s + fields[i]); + // test kstrtok() + s->l = 0; + for (p = kstrtok("ab:cde:fg/hij::k", ":/", &aux); p; p = kstrtok(0, 0, &aux)) { + kputsn(p, aux.p - p, s); + kputc('\n', s); + } + printf("%s", s->s); + // free + free(s->s); free(s); free(fields); + + { + static char *str = "abcdefgcdgcagtcakcdcd"; + static char *pat = "cd"; + char *ret, *s = str; + int *prep = 0; + while ((ret = kstrstr(s, pat, &prep)) != 0) { + printf("match: %s\n", ret); + s = ret + prep[0]; + } + free(prep); + } + return 0; +} +#endif |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/regidx.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/regidx.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,338 @@\n+/* \n+ Copyright (C) 2014 Genome Research Ltd.\n+\n+ Author: Petr Danecek <pd3@sanger.ac.uk>\n+\n+ Permission is hereby granted, free of charge, to any person obtaining a copy\n+ of this software and associated documentation files (the "Software"), to deal\n+ in the Software without restriction, including without limitation the rights\n+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ copies of the Software, and to permit persons to whom the Software is\n+ furnished to do so, subject to the following conditions:\n+ \n+ The above copyright notice and this permission notice shall be included in\n+ all copies or substantial portions of the Software.\n+ \n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n+ THE SOFTWARE.\n+*/\n+\n+#include "htslib/hts.h"\n+#include "htslib/kstring.h"\n+#include "htslib/kseq.h"\n+#include "htslib/khash_str2int.h"\n+#include "htslib/regidx.h"\n+\n+#define LIDX_SHIFT 13 // number of insignificant index bits\n+\n+// List of regions for one chromosome\n+typedef struct\n+{\n+ int *idx, nidx;\n+ int nregs, mregs; // n:used, m:alloced\n+ reg_t *regs;\n+ void *payload;\n+}\n+reglist_t;\n+\n+// Container of all sequences\n+struct _regidx_t\n+{\n+ int nseq, mseq; // n:used, m:alloced\n+ reglist_t *seq; // regions for each sequence\n+ void *seq2regs; // hash for fast lookup from chr name to regions\n+ char **seq_names;\n+ regidx_free_f free; // function to free any data allocated by regidx_parse_f\n+ regidx_parse_f parse; // parse one input line\n+ void *usr; // user data to pass to regidx_parse_f\n+\n+ // temporary data for index initialization\n+ kstring_t str;\n+ int rid_prev, start_prev, end_prev;\n+ int payload_size;\n+ void *payload;\n+};\n+\n+int regidx_seq_nregs(regidx_t *idx, const char *seq)\n+{\n+ int iseq;\n+ if ( khash_str2int_get(idx->seq2regs, seq, &iseq)!=0 ) return 0; // no such sequence\n+ return idx->seq[iseq].nregs;\n+}\n+\n+int regidx_nregs(regidx_t *idx)\n+{\n+ int i, nregs = 0;\n+ for (i=0; i<idx->nseq; i++) nregs += idx->seq[i].nregs;\n+ return nregs;\n+}\n+\n+char **regidx_seq_names(regidx_t *idx, int *n)\n+{\n+ *n = idx->nseq;\n+ return idx->seq_names;\n+}\n+\n+int _regidx_build_index(regidx_t *idx)\n+{\n+ int iseq;\n+ for (iseq=0; iseq<idx->nseq; iseq++)\n+ {\n+ reglist_t *list = &idx->seq[iseq];\n+ int j,k, imax = 0; // max index bin\n+ for (j=0; j<list->nregs; j++)\n+ {\n+ int ibeg = list->regs[j].start >> LIDX_SHIFT;\n+ int iend = list->regs[j].end >> LIDX_SHIFT;\n+ if ( imax < iend + 1 )\n+ {\n+ int old_imax = imax; \n+ imax = iend + 1;\n+ kroundup32(imax);\n+ list->idx = (int*) realloc(list->idx, imax*sizeof(int));\n+ for (k=old_imax; k<imax; k++) list->idx[k] = -1;\n+ }\n+ if ( ibeg==iend )\n+ {\n+ if ( list->idx[ibeg]<0 ) list->idx[ibeg] = j;\n+ }\n+ else\n+ {\n+ for (k=ibeg; k<=iend; k++)\n+ if ( list->idx[k]<0 ) list->idx[k] = j;\n+ }\n+ list->nidx = iend + 1;\n+ }\n+ }\n+ return 0;\n+}\n+\n+int regidx_insert(regidx_t *idx, char *line)\n+{\n+ if ( !line )\n+ return _regidx_build_index(idx);\n+\n+ char *chr_from, *chr_to;\n+ reg_t reg;\n+ int ret = idx->parse(line,&chr_from,&chr_to,®,idx->payload,idx->usr);\n+ if ( ret==-2 ) return -1; // error\n+ i'..b'size ) idx->payload = malloc(payload_size);\n+\n+ if ( !fname ) return idx;\n+ \n+ kstring_t str = {0,0,0};\n+\n+ htsFile *fp = hts_open(fname,"r");\n+ if ( !fp ) goto error;\n+\n+ while ( hts_getline(fp, KS_SEP_LINE, &str) > 0 )\n+ {\n+ if ( regidx_insert(idx, str.s) ) goto error;\n+ }\n+ regidx_insert(idx, NULL);\n+ \n+ free(str.s);\n+ hts_close(fp);\n+ return idx;\n+\n+error:\n+ free(str.s);\n+ if ( fp ) hts_close(fp);\n+ regidx_destroy(idx);\n+ return NULL;\n+}\n+\n+void regidx_destroy(regidx_t *idx)\n+{\n+ int i, j;\n+ for (i=0; i<idx->nseq; i++)\n+ {\n+ reglist_t *list = &idx->seq[i];\n+ if ( idx->free )\n+ {\n+ for (j=0; j<list->nregs; j++)\n+ idx->free(list->payload + idx->payload_size*j);\n+ }\n+ free(list->payload);\n+ free(list->regs);\n+ free(list->idx);\n+ }\n+ free(idx->seq_names);\n+ free(idx->seq);\n+ free(idx->str.s);\n+ free(idx->payload);\n+ khash_str2int_destroy_free(idx->seq2regs);\n+ free(idx);\n+}\n+\n+int regidx_overlap(regidx_t *idx, const char *chr, uint32_t from, uint32_t to, regitr_t *itr)\n+{\n+ if ( itr ) itr->i = itr->n = 0;\n+\n+ int iseq;\n+ if ( khash_str2int_get(idx->seq2regs, chr, &iseq)!=0 ) return 0; // no such sequence\n+\n+ reglist_t *list = &idx->seq[iseq];\n+ if ( !list->nregs ) return 0;\n+\n+ int i, ibeg = from>>LIDX_SHIFT; \n+ int ireg = ibeg < list->nidx ? list->idx[ibeg] : list->idx[ list->nidx - 1 ];\n+ if ( ireg < 0 )\n+ {\n+ // linear search; if slow, replace with binary search\n+ if ( ibeg > list->nidx ) ibeg = list->nidx;\n+ for (i=ibeg - 1; i>=0; i--)\n+ if ( list->idx[i] >=0 ) break;\n+ ireg = i>=0 ? list->idx[i] : 0;\n+ }\n+ for (i=ireg; i<list->nregs; i++)\n+ {\n+ if ( list->regs[i].start > to ) return 0; // no match\n+ if ( list->regs[i].end >= from && list->regs[i].start <= to ) break; // found\n+ }\n+\n+ if ( i>=list->nregs ) return 0; // no match\n+\n+ if ( !itr ) return 1;\n+\n+ itr->i = 0;\n+ itr->n = list->nregs - i;\n+ itr->reg = &idx->seq[iseq].regs[i];\n+ if ( idx->payload_size )\n+ itr->payload = idx->seq[iseq].payload + i*idx->payload_size;\n+ else\n+ itr->payload = NULL;\n+\n+ return 1;\n+}\n+\n+int regidx_parse_bed(const char *line, char **chr_beg, char **chr_end, reg_t *reg, void *payload, void *usr)\n+{\n+ char *ss = (char*) line;\n+ while ( *ss && isspace(*ss) ) ss++;\n+ if ( !*ss ) return -1; // skip blank lines\n+ if ( *ss==\'#\' ) return -1; // skip comments\n+ \n+ char *se = ss;\n+ while ( *se && !isspace(*se) ) se++;\n+ if ( !*se ) { fprintf(stderr,"Could not parse bed line: %s\\n", line); return -2; }\n+\n+ *chr_beg = ss;\n+ *chr_end = se-1;\n+\n+ ss = se+1;\n+ reg->start = strtol(ss, &se, 10);\n+ if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\\n", line); return -2; }\n+\n+ ss = se+1;\n+ reg->end = strtol(ss, &se, 10) - 1;\n+ if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\\n", line); return -2; }\n+ \n+ return 0;\n+}\n+\n+int regidx_parse_tab(const char *line, char **chr_beg, char **chr_end, reg_t *reg, void *payload, void *usr)\n+{\n+ char *ss = (char*) line;\n+ while ( *ss && isspace(*ss) ) ss++;\n+ if ( !*ss ) return -1; // skip blank lines\n+ if ( *ss==\'#\' ) return -1; // skip comments\n+ \n+ char *se = ss;\n+ while ( *se && !isspace(*se) ) se++;\n+ if ( !*se ) { fprintf(stderr,"Could not parse bed line: %s\\n", line); return -2; }\n+\n+ *chr_beg = ss;\n+ *chr_end = se-1;\n+\n+ ss = se+1;\n+ reg->start = strtol(ss, &se, 10) - 1;\n+ if ( ss==se ) { fprintf(stderr,"Could not parse bed line: %s\\n", line); return -2; }\n+\n+ if ( !se[0] || !se[1] )\n+ reg->end = reg->start;\n+ else\n+ {\n+ ss = se+1;\n+ reg->end = strtol(ss, &se, 10);\n+ if ( ss==se ) reg->end = reg->start;\n+ else reg->end--;\n+ }\n+ \n+ return 0;\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/sam.5 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/sam.5 Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,68 @@ +'\" t +.TH sam 5 "August 2013" "htslib" "Bioinformatics formats" +.SH NAME +sam \- Sequence Alignment/Map file format +.\" +.\" Copyright (C) 2009, 2013 Genome Research Ltd. +.\" +.\" Author: Heng Li <lh3@sanger.ac.uk> +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +.SH DESCRIPTION +Sequence Alignment/Map (SAM) format is TAB-delimited. Apart from the header lines, which are started +with the `@' symbol, each alignment line consists of: +.TS +nlbl. +1 QNAME Query template/pair NAME +2 FLAG bitwise FLAG +3 RNAME Reference sequence NAME +4 POS 1-based leftmost POSition/coordinate of clipped sequence +5 MAPQ MAPping Quality (Phred-scaled) +6 CIGAR extended CIGAR string +7 MRNM Mate Reference sequence NaMe (`=' if same as RNAME) +8 MPOS 1-based Mate POSistion +9 TLEN inferred Template LENgth (insert size) +10 SEQ query SEQuence on the same strand as the reference +11 QUAL query QUALity (ASCII-33 gives the Phred base quality) +12+ OPT variable OPTional fields in the format TAG:VTYPE:VALUE +.TE +.PP +Each bit in the FLAG field is defined as: +.TS +lcbl. +0x0001 p the read is paired in sequencing +0x0002 P the read is mapped in a proper pair +0x0004 u the query sequence itself is unmapped +0x0008 U the mate is unmapped +0x0010 r strand of the query (1 for reverse) +0x0020 R strand of the mate +0x0040 1 the read is the first read in a pair +0x0080 2 the read is the second read in a pair +0x0100 s the alignment is not primary +0x0200 f the read fails platform/vendor quality checks +0x0400 d the read is either a PCR or an optical duplicate +0x0800 S the alignment is supplementary +.TE +.P +where the second column gives the string representation of the FLAG field. +.SH SEE ALSO +.TP +https://github.com/samtools/hts-specs +The full SAM/BAM file format specification |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/sam.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/sam.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1882 @@\n+/* sam.c -- SAM and BAM file I/O and manipulation.\n+\n+ Copyright (C) 2008-2010, 2012-2014 Genome Research Ltd.\n+ Copyright (C) 2010, 2012, 2013 Broad Institute.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <errno.h>\n+#include <ctype.h>\n+#include <zlib.h>\n+#include "htslib/sam.h"\n+#include "htslib/bgzf.h"\n+#include "cram/cram.h"\n+#include "htslib/hfile.h"\n+\n+#include "htslib/khash.h"\n+KHASH_DECLARE(s2i, kh_cstr_t, int64_t)\n+\n+typedef khash_t(s2i) sdict_t;\n+\n+/**********************\n+ *** BAM header I/O ***\n+ **********************/\n+\n+bam_hdr_t *bam_hdr_init()\n+{\n+ return (bam_hdr_t*)calloc(1, sizeof(bam_hdr_t));\n+}\n+\n+void bam_hdr_destroy(bam_hdr_t *h)\n+{\n+ int32_t i;\n+ if (h == NULL) return;\n+ if (h->target_name) {\n+ for (i = 0; i < h->n_targets; ++i)\n+ free(h->target_name[i]);\n+ free(h->target_name);\n+ free(h->target_len);\n+ }\n+ free(h->text); free(h->cigar_tab);\n+ if (h->sdict) kh_destroy(s2i, (sdict_t*)h->sdict);\n+ free(h);\n+}\n+\n+bam_hdr_t *bam_hdr_dup(const bam_hdr_t *h0)\n+{\n+ if (h0 == NULL) return NULL;\n+ bam_hdr_t *h;\n+ if ((h = bam_hdr_init()) == NULL) return NULL;\n+ // copy the simple data\n+ h->n_targets = h0->n_targets;\n+ h->ignore_sam_err = h0->ignore_sam_err;\n+ h->l_text = h0->l_text;\n+ // Then the pointery stuff\n+ h->cigar_tab = NULL;\n+ h->sdict = NULL;\n+ h->text = (char*)calloc(h->l_text + 1, 1);\n+ memcpy(h->text, h0->text, h->l_text);\n+ h->target_len = (uint32_t*)calloc(h->n_targets, sizeof(uint32_t));\n+ h->target_name = (char**)calloc(h->n_targets, sizeof(char*));\n+ int i;\n+ for (i = 0; i < h->n_targets; ++i) {\n+ h->target_len[i] = h0->target_len[i];\n+ h->target_name[i] = strdup(h0->target_name[i]);\n+ }\n+ return h;\n+}\n+\n+\n+static bam_hdr_t *hdr_from_dict(sdict_t *d)\n+{\n+ bam_hdr_t *h;\n+ khint_t k;\n+ h = bam_hdr_init();\n+ h->sdict = d;\n+ h->n_targets = kh_size(d);\n+ h->target_len = (uint32_t*)malloc(sizeof(uint32_t) * h->n_targets);\n+ h->target_name = (char**)malloc(sizeof(char*) * h->n_targets);\n+ for (k = kh_begin(d); k != kh_end(d); ++k) {\n+ if (!kh_exist(d, k)) continue;\n+ h->target_name[kh_val(d, k)>>32] = (char*)kh_key(d, k);\n+ h->target_len[kh_val(d, k)>>32] = kh_val(d, k)<<32>>32;\n+ kh_val(d, k) >>= 32;\n+ }\n+ return h;\n+}\n+\n+bam_hdr_t *bam_hdr_read(BGZF *fp)\n+{\n+ bam_hdr_t *h;\n+ char buf[4];\n+ int magic_len, has_EOF;\n+ int32_t i = 1, name_len;\n+ // check EOF\n+ has_EOF = bgzf_check_EOF(fp);\n+ if (has_EOF < 0) {\n+ perror("[W::sam_hdr_read] bgzf_check_EOF");\n+ } else if (has_EOF == 0 && hts_verbose >= 2)\n+ fprintf(stderr, "[W::%s] EOF marker is absent. The input is probably truncated.\\n", __func__);\n+ // read "BAM1"\n+ magic_len = bgzf_read(fp, buf, 4);\n+ if '..b'd > iter->pos || iter->tail->b.core.tid > iter->tid) {\n+ iter->tail->next = mp_alloc(iter->mp);\n+ iter->tail = iter->tail->next;\n+ }\n+ } else iter->is_eof = 1;\n+ return 0;\n+}\n+\n+const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)\n+{\n+ const bam_pileup1_t *plp;\n+ if (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; }\n+ if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n+ else { // no pileup line can be obtained; read alignments\n+ *_n_plp = 0;\n+ if (iter->is_eof) return 0;\n+ int ret;\n+ while ( (ret=iter->func(iter->data, iter->b)) >= 0) {\n+ if (bam_plp_push(iter, iter->b) < 0) {\n+ *_n_plp = -1;\n+ return 0;\n+ }\n+ if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n+ // otherwise no pileup line can be returned; read the next alignment.\n+ }\n+ if ( ret < -1 ) { iter->error = ret; *_n_plp = -1; return 0; }\n+ bam_plp_push(iter, 0);\n+ if ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n+ return 0;\n+ }\n+}\n+\n+void bam_plp_reset(bam_plp_t iter)\n+{\n+ lbnode_t *p, *q;\n+ iter->max_tid = iter->max_pos = -1;\n+ iter->tid = iter->pos = 0;\n+ iter->is_eof = 0;\n+ for (p = iter->head; p->next;) {\n+ overlap_remove(iter, NULL);\n+ q = p->next;\n+ mp_free(iter->mp, p);\n+ p = q;\n+ }\n+ iter->head = iter->tail;\n+}\n+\n+void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt)\n+{\n+ iter->maxcnt = maxcnt;\n+}\n+\n+/************************\n+ *** Mpileup iterator ***\n+ ************************/\n+\n+struct __bam_mplp_t {\n+ int n;\n+ uint64_t min, *pos;\n+ bam_plp_t *iter;\n+ int *n_plp;\n+ const bam_pileup1_t **plp;\n+};\n+\n+bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data)\n+{\n+ int i;\n+ bam_mplp_t iter;\n+ iter = (bam_mplp_t)calloc(1, sizeof(struct __bam_mplp_t));\n+ iter->pos = (uint64_t*)calloc(n, sizeof(uint64_t));\n+ iter->n_plp = (int*)calloc(n, sizeof(int));\n+ iter->plp = (const bam_pileup1_t**)calloc(n, sizeof(bam_pileup1_t*));\n+ iter->iter = (bam_plp_t*)calloc(n, sizeof(bam_plp_t));\n+ iter->n = n;\n+ iter->min = (uint64_t)-1;\n+ for (i = 0; i < n; ++i) {\n+ iter->iter[i] = bam_plp_init(func, data[i]);\n+ iter->pos[i] = iter->min;\n+ }\n+ return iter;\n+}\n+\n+void bam_mplp_init_overlaps(bam_mplp_t iter)\n+{\n+ int i;\n+ for (i = 0; i < iter->n; ++i)\n+ bam_plp_init_overlaps(iter->iter[i]);\n+}\n+\n+void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt)\n+{\n+ int i;\n+ for (i = 0; i < iter->n; ++i)\n+ iter->iter[i]->maxcnt = maxcnt;\n+}\n+\n+void bam_mplp_destroy(bam_mplp_t iter)\n+{\n+ int i;\n+ for (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]);\n+ free(iter->iter); free(iter->pos); free(iter->n_plp); free(iter->plp);\n+ free(iter);\n+}\n+\n+int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp)\n+{\n+ int i, ret = 0;\n+ uint64_t new_min = (uint64_t)-1;\n+ for (i = 0; i < iter->n; ++i) {\n+ if (iter->pos[i] == iter->min) {\n+ int tid, pos;\n+ iter->plp[i] = bam_plp_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]);\n+ if ( iter->iter[i]->error ) return -1;\n+ iter->pos[i] = iter->plp[i] ? (uint64_t)tid<<32 | pos : 0;\n+ }\n+ if (iter->plp[i] && iter->pos[i] < new_min) new_min = iter->pos[i];\n+ }\n+ iter->min = new_min;\n+ if (new_min == (uint64_t)-1) return 0;\n+ *_tid = new_min>>32; *_pos = (uint32_t)new_min;\n+ for (i = 0; i < iter->n; ++i) {\n+ if (iter->pos[i] == iter->min) { // FIXME: valgrind reports "uninitialised value(s) at this line"\n+ n_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i];\n+ ++ret;\n+ } else n_plp[i] = 0, plp[i] = 0;\n+ }\n+ return ret;\n+}\n+\n+#endif // ~!defined(BAM_NO_PILEUP)\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/synced_bcf_reader.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/synced_bcf_reader.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,1257 @@\n+/* synced_bcf_reader.c -- stream through multiple VCF files.\n+\n+ Copyright (C) 2012-2014 Genome Research Ltd.\n+\n+ Author: Petr Danecek <pd3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <stdio.h>\n+#include <unistd.h>\n+#include <string.h>\n+#include <limits.h>\n+#include <errno.h>\n+#include <ctype.h>\n+#include <sys/stat.h>\n+#include "htslib/synced_bcf_reader.h"\n+#include "htslib/kseq.h"\n+#include "htslib/khash_str2int.h"\n+\n+#define MAX_CSI_COOR 0x7fffffff // maximum indexable coordinate of .csi\n+\n+typedef struct\n+{\n+ uint32_t start, end;\n+}\n+region1_t;\n+\n+typedef struct _region_t\n+{\n+ region1_t *regs;\n+ int nregs, mregs, creg;\n+}\n+region_t;\n+\n+static void _regions_add(bcf_sr_regions_t *reg, const char *chr, int start, int end);\n+static bcf_sr_regions_t *_regions_init_string(const char *str);\n+static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec);\n+\n+char *bcf_sr_strerror(int errnum)\n+{\n+ switch (errnum)\n+ {\n+ case open_failed: \n+ return strerror(errno); break;\n+ case not_bgzf:\n+ return "not compressed with bgzip"; break;\n+ case idx_load_failed:\n+ return "could not load index"; break;\n+ case file_type_error:\n+ return "unknown file type"; break;\n+ case api_usage_error:\n+ return "API usage error"; break;\n+ case header_error:\n+ return "could not parse header"; break;\n+ default: return ""; \n+ }\n+}\n+\n+static int *init_filters(bcf_hdr_t *hdr, const char *filters, int *nfilters)\n+{\n+ kstring_t str = {0,0,0};\n+ const char *tmp = filters, *prev = filters;\n+ int nout = 0, *out = NULL;\n+ while ( 1 )\n+ {\n+ if ( *tmp==\',\' || !*tmp )\n+ {\n+ out = (int*) realloc(out, (nout+1)*sizeof(int));\n+ if ( tmp-prev==1 && *prev==\'.\' )\n+ out[nout] = -1;\n+ else\n+ {\n+ str.l = 0;\n+ kputsn(prev, tmp-prev, &str);\n+ out[nout] = bcf_hdr_id2int(hdr, BCF_DT_ID, str.s);\n+ }\n+ nout++;\n+ if ( !*tmp ) break;\n+ prev = tmp+1;\n+ }\n+ tmp++;\n+ }\n+ if ( str.m ) free(str.s);\n+ *nfilters = nout;\n+ return out;\n+}\n+\n+int bcf_sr_set_regions(bcf_srs_t *readers, const char *regions, int is_file)\n+{\n+ assert( !readers->regions );\n+ if ( readers->nreaders )\n+ {\n+ fprintf(stderr,"[%s:%d %s] Error: bcf_sr_set_regions() must be called before bcf_sr_add_reader()\\n", __FILE__,__LINE__,__FUNCTION__);\n+ return -1;\n+ }\n+ readers->regions = bcf_sr_regions_init(regions,is_file,0,1,-2);\n+ if ( !readers->regions ) return -1;\n+ readers->explicit_regs = 1;\n+ readers->require_index = 1;\n+ return 0;\n+}\n+int bcf_sr_set_targets(bcf_srs_t *readers, const char *targets, int is_file, int alleles)\n+{\n+ assert( !readers->targets );\n+ if ( targets[0]==\'^\' )\n+ {\n+ '..b' reg->is_bin = 0;\n+ }\n+\n+ // tabix index absent, reading the whole file\n+ ret = hts_getline(reg->file, KS_SEP_LINE, ®->line);\n+ if ( ret<0 ) { reg->iseq = -1; return -1; }\n+ }\n+ ret = _regions_parse_line(reg->line.s, ichr,ifrom,ito, &chr,&chr_end,&from,&to);\n+ if ( ret<0 )\n+ {\n+ fprintf(stderr,"[%s:%d] Could not parse the file %s, using the columns %d,%d,%d\\n", __FILE__,__LINE__,reg->fname,ichr+1,ifrom+1,ito+1);\n+ return -1;\n+ }\n+ }\n+ if ( is_bed ) from++;\n+\n+ *chr_end = 0;\n+ if ( khash_str2int_get(reg->seq_hash, chr, ®->iseq)<0 )\n+ {\n+ fprintf(stderr,"Broken tabix index? The sequence \\"%s\\" not in dictionary [%s]\\n", chr,reg->line.s);\n+ exit(1);\n+ }\n+ *chr_end = \'\\t\';\n+\n+ reg->start = from - 1;\n+ reg->end = to - 1;\n+ return 0;\n+}\n+\n+static int _regions_match_alleles(bcf_sr_regions_t *reg, int als_idx, bcf1_t *rec)\n+{\n+ int i = 0, max_len = 0;\n+ if ( !reg->nals )\n+ {\n+ char *ss = reg->line.s;\n+ while ( i<als_idx && *ss )\n+ {\n+ if ( *ss==\'\\t\' ) i++;\n+ ss++;\n+ }\n+ char *se = ss;\n+ reg->nals = 1;\n+ while ( *se && *se!=\'\\t\' )\n+ {\n+ if ( *se==\',\' ) reg->nals++;\n+ se++;\n+ }\n+ ks_resize(®->als_str, se-ss+1+reg->nals);\n+ reg->als_str.l = 0;\n+ hts_expand(char*,reg->nals,reg->mals,reg->als);\n+ reg->nals = 0;\n+\n+ se = ss;\n+ while ( *(++se) )\n+ {\n+ if ( *se==\'\\t\' ) break;\n+ if ( *se!=\',\' ) continue;\n+ reg->als[reg->nals] = ®->als_str.s[reg->als_str.l];\n+ kputsn(ss,se-ss,®->als_str);\n+ if ( ®->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = ®->als_str.s[reg->als_str.l] - reg->als[reg->nals];\n+ reg->als_str.l++;\n+ reg->nals++;\n+ ss = ++se;\n+ }\n+ reg->als[reg->nals] = ®->als_str.s[reg->als_str.l];\n+ kputsn(ss,se-ss,®->als_str);\n+ if ( ®->als_str.s[reg->als_str.l] - reg->als[reg->nals] > max_len ) max_len = ®->als_str.s[reg->als_str.l] - reg->als[reg->nals];\n+ reg->nals++;\n+ reg->als_type = max_len > 1 ? VCF_INDEL : VCF_SNP; // this is a simplified check, see vcf.c:bcf_set_variant_types\n+ }\n+ int type = bcf_get_variant_types(rec);\n+ if ( reg->als_type & VCF_INDEL )\n+ return type & VCF_INDEL ? 1 : 0;\n+ return !(type & VCF_INDEL) ? 1 : 0;\n+}\n+\n+int bcf_sr_regions_overlap(bcf_sr_regions_t *reg, const char *seq, int start, int end)\n+{\n+ int iseq;\n+ if ( khash_str2int_get(reg->seq_hash, seq, &iseq)<0 ) return -1; // no such sequence\n+\n+ if ( reg->prev_seq==-1 || iseq!=reg->prev_seq || reg->prev_start > start ) // new chromosome or after a seek\n+ {\n+ // flush regions left on previous chromosome\n+ if ( reg->missed_reg_handler && reg->prev_seq!=-1 && reg->iseq!=-1 )\n+ bcf_sr_regions_flush(reg);\n+\n+ bcf_sr_regions_seek(reg, seq);\n+ reg->start = reg->end = -1;\n+ }\n+ if ( reg->prev_seq==iseq && reg->iseq!=iseq ) return -2; // no more regions on this chromosome\n+ reg->prev_seq = reg->iseq;\n+ reg->prev_start = start;\n+\n+ while ( iseq==reg->iseq && reg->end < start )\n+ {\n+ if ( bcf_sr_regions_next(reg) < 0 ) return -2; // no more regions left\n+ if ( reg->iseq != iseq ) return -1; // does not overlap any regions\n+ if ( reg->missed_reg_handler && reg->end < start ) reg->missed_reg_handler(reg, reg->missed_reg_data);\n+ }\n+ if ( reg->start <= end ) return 0; // region overlap\n+ return -1; // no overlap\n+}\n+\n+void bcf_sr_regions_flush(bcf_sr_regions_t *reg)\n+{\n+ if ( !reg->missed_reg_handler || reg->prev_seq==-1 ) return;\n+ while ( !bcf_sr_regions_next(reg) ) reg->missed_reg_handler(reg, reg->missed_reg_data);\n+ return;\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/tabix.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/tabix.1 Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,180 @@ +.TH tabix 1 "3 February 2015" "htslib-1.2.1" "Bioinformatics tools" +.SH NAME +.PP +bgzip \- Block compression/decompression utility +.PP +tabix \- Generic indexer for TAB-delimited genome position files +.\" +.\" Copyright (C) 2009-2011 Broad Institute. +.\" +.\" Author: Heng Li <lh3@sanger.ac.uk> +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +.SH SYNOPSIS +.PP +.B bgzip +.RB [ -cdhB ] +.RB [ -b +.IR virtualOffset ] +.RB [ -s +.IR size ] +.RI [ file ] +.PP +.B tabix +.RB [ -0lf ] +.RB [ -p +gff|bed|sam|vcf] +.RB [ -s +.IR seqCol ] +.RB [ -b +.IR begCol ] +.RB [ -e +.IR endCol ] +.RB [ -S +.IR lineSkip ] +.RB [ -c +.IR metaChar ] +.I in.tab.bgz +.RI [ "region1 " [ "region2 " [ ... "]]]" + +.SH DESCRIPTION +.PP +Tabix indexes a TAB-delimited genome position file +.I in.tab.bgz +and creates an index file ( +.I in.tab.bgz.tbi +or +.I in.tab.bgz.csi +) when +.I region +is absent from the command-line. The input data file must be position +sorted and compressed by +.B bgzip +which has a +.BR gzip (1) +like interface. After indexing, tabix is able to quickly retrieve data +lines overlapping +.I regions +specified in the format "chr:beginPos-endPos". Fast data retrieval also +works over network if URI is given as a file name and in this case the +index file will be downloaded if it is not present locally. + +.SH INDEXING OPTIONS +.TP 10 +.B -0, --zero-based +Specify that the position in the data file is 0-based (e.g. UCSC files) +rather than 1-based. +.TP +.BI "-b, --begin " INT +Column of start chromosomal position. [4] +.TP +.BI "-c, --comment " CHAR +Skip lines started with character CHAR. [#] +.TP +.BI "-C, --csi" +Skip lines started with character CHAR. [#] +.TP +.BI "-e, --end " INT +Column of end chromosomal position. The end column can be the same as the +start column. [5] +.TP +.B "-f, --force " +Force to overwrite the index file if it is present. +.TP +.BI "-m, --min-shift" INT +set minimal interval size for CSI indices to 2^INT [14] +.TP +.BI "-p, --preset " STR +Input format for indexing. Valid values are: gff, bed, sam, vcf. +This option should not be applied together with any of +.BR -s ", " -b ", " -e ", " -c " and " -0 ; +it is not used for data retrieval because this setting is stored in +the index file. [gff] +.TP +.BI "-s, --sequence " INT +Column of sequence name. Option +.BR -s ", " -b ", " -e ", " -S ", " -c " and " -0 +are all stored in the index file and thus not used in data retrieval. [1] +.TP +.BI "-S, --skip-lines " INT +Skip first INT lines in the data file. [0] + +.SH QUERYING AND OTHER OPTIONS +.TP +.B "-h, --print-header " +Print also the header/meta lines. +.TP +.B "-H, --only-header " +Print only the header/meta lines. +.TP +.B "-i, --file-info " +Print file format info. +.TP +.B "-l, --list-chroms " +List the sequence names stored in the index file. +.TP +.B "-r, --reheader " FILE +Replace the header with the content of FILE +.TP +.B "-R, --regions " FILE +Restrict to regions listed in the FILE. The FILE can be BED file (requires .bed, .bed.gz, .bed.bgz +file name extension) or a TAB-delimited file with CHROM, POS, and, optionally, +POS_TO columns, where positions are 1-based and inclusive. When this option is in use, the input +file may not be sorted. +regions. +.TP +.B "-T, --targets" FILE +Similar to +.B -R +but the entire input will be read sequentially and regions not listed in FILE will be skipped. +.PP +.SH EXAMPLE +(grep ^"#" in.gff; grep -v ^"#" in.gff | sort -k1,1 -k4,4n) | bgzip > sorted.gff.gz; + +tabix -p gff sorted.gff.gz; + +tabix sorted.gff.gz chr1:10,000,000-20,000,000; + +.SH NOTES +It is straightforward to achieve overlap queries using the standard +B-tree index (with or without binning) implemented in all SQL databases, +or the R-tree index in PostgreSQL and Oracle. But there are still many +reasons to use tabix. Firstly, tabix directly works with a lot of widely +used TAB-delimited formats such as GFF/GTF and BED. We do not need to +design database schema or specialized binary formats. Data do not need +to be duplicated in different formats, either. Secondly, tabix works on +compressed data files while most SQL databases do not. The GenCode +annotation GTF can be compressed down to 4%. Thirdly, tabix is +fast. The same indexing algorithm is known to work efficiently for an +alignment with a few billion short reads. SQL databases probably cannot +easily handle data at this scale. Last but not the least, tabix supports +remote data retrieval. One can put the data file and the index at an FTP +or HTTP server, and other users or even web services will be able to get +a slice without downloading the entire file. + +.SH AUTHOR +.PP +Tabix was written by Heng Li. The BGZF library was originally +implemented by Bob Handsaker and modified by Heng Li for remote file +access and in-memory caching. + +.SH SEE ALSO +.PP +.BR samtools (1) |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/tabix.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/tabix.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,519 @@\n+/* tabix.c -- Generic indexer for TAB-delimited genome position files.\n+\n+ Copyright (C) 2009-2011 Broad Institute.\n+ Copyright (C) 2010-2012, 2014 Genome Research Ltd.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <unistd.h>\n+#include <string.h>\n+#include <getopt.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include <errno.h>\n+#include "htslib/tbx.h"\n+#include "htslib/sam.h"\n+#include "htslib/vcf.h"\n+#include "htslib/kseq.h"\n+#include "htslib/bgzf.h"\n+#include "htslib/hts.h"\n+#include "htslib/regidx.h"\n+\n+typedef struct\n+{\n+ char *regions_fname, *targets_fname;\n+ int print_header, header_only;\n+}\n+args_t;\n+\n+static void error(const char *format, ...)\n+{\n+ va_list ap;\n+ va_start(ap, format);\n+ vfprintf(stderr, format, ap);\n+ va_end(ap);\n+ exit(EXIT_FAILURE);\n+}\n+\n+#define IS_GFF (1<<0)\n+#define IS_BED (1<<1)\n+#define IS_SAM (1<<2)\n+#define IS_VCF (1<<3)\n+#define IS_BCF (1<<4)\n+#define IS_BAM (1<<5)\n+#define IS_CRAM (1<<6)\n+#define IS_TXT (IS_GFF|IS_BED|IS_SAM|IS_VCF)\n+\n+int file_type(const char *fname)\n+{\n+ int l = strlen(fname);\n+ int strcasecmp(const char *s1, const char *s2);\n+ if (l>=7 && strcasecmp(fname+l-7, ".gff.gz") == 0) return IS_GFF;\n+ else if (l>=7 && strcasecmp(fname+l-7, ".bed.gz") == 0) return IS_BED;\n+ else if (l>=7 && strcasecmp(fname+l-7, ".sam.gz") == 0) return IS_SAM;\n+ else if (l>=7 && strcasecmp(fname+l-7, ".vcf.gz") == 0) return IS_VCF;\n+ else if (l>=4 && strcasecmp(fname+l-4, ".bcf") == 0) return IS_BCF;\n+ else if (l>=4 && strcasecmp(fname+l-4, ".bam") == 0) return IS_BAM;\n+ else if (l>=4 && strcasecmp(fname+l-5, ".cram") == 0) return IS_CRAM;\n+\n+ htsFile *fp = hts_open(fname,"r");\n+ enum htsExactFormat format = fp->format.format;\n+ hts_close(fp);\n+ if ( format == bcf ) return IS_BCF;\n+ if ( format == bam ) return IS_BAM;\n+ if ( format == cram ) return IS_CRAM;\n+ if ( format == vcf ) return IS_VCF;\n+\n+ return 0;\n+}\n+\n+static char **parse_regions(char *regions_fname, char **argv, int argc, int *nregs)\n+{\n+ kstring_t str = {0,0,0};\n+ int iseq = 0, ireg = 0;\n+ char **regs = NULL;\n+ *nregs = argc;\n+\n+ if ( regions_fname )\n+ {\n+ // improve me: this is a too heavy machinery for parsing regions...\n+\n+ regidx_t *idx = regidx_init(regions_fname, NULL, NULL, 0, NULL);\n+ if ( !idx ) error("Could not read %s\\n", regions_fname);\n+\n+ (*nregs) += regidx_nregs(idx);\n+ regs = (char**) malloc(sizeof(char*)*(*nregs));\n+\n+ int nseq;\n+ char **seqs = regidx_seq_names(idx, &nseq);\n+ for (iseq=0; iseq<nseq; iseq++)\n+ {\n+ regitr_t itr;\n+ regidx_overlap(idx, seqs[iseq], 0, UINT32_MAX, &itr);\n+ while ( itr.i < itr.n )\n+ {\n+ str.l = 0;\n+ ksprintf(&str, "%s:%d-%d", seqs[iseq], REGITR_START(itr)+'..b' 1; break;\n+ case \'m\': min_shift = atoi(optarg); break;\n+ case \'p\':\n+ if (strcmp(optarg, "gff") == 0) conf_ptr = &tbx_conf_gff;\n+ else if (strcmp(optarg, "bed") == 0) conf_ptr = &tbx_conf_bed;\n+ else if (strcmp(optarg, "sam") == 0) conf_ptr = &tbx_conf_sam;\n+ else if (strcmp(optarg, "vcf") == 0) conf_ptr = &tbx_conf_vcf;\n+ else if (strcmp(optarg, "bcf") == 0) ; // bcf is autodetected, preset is not needed\n+ else if (strcmp(optarg, "bam") == 0) ; // same as bcf\n+ else error("The preset string not recognised: \'%s\'\\n", optarg);\n+ break;\n+ case \'s\': conf.sc = atoi(optarg); break;\n+ case \'S\': conf.line_skip = atoi(optarg); break;\n+ default: return usage();\n+ }\n+ }\n+\n+ if ( optind==argc ) return usage();\n+\n+ if ( list_chroms )\n+ return query_chroms(argv[optind]);\n+\n+ if ( argc > optind+1 || args.header_only || args.regions_fname || args.targets_fname )\n+ {\n+ int nregs = 0;\n+ char **regs = NULL;\n+ if ( !args.header_only )\n+ regs = parse_regions(args.regions_fname, argv+optind+1, argc-optind-1, &nregs);\n+ return query_regions(&args, argv[optind], regs, nregs);\n+ }\n+\n+ char *fname = argv[optind];\n+ int ftype = file_type(fname);\n+ if ( !conf_ptr ) // no preset given\n+ {\n+ if ( ftype==IS_GFF ) conf_ptr = &tbx_conf_gff;\n+ else if ( ftype==IS_BED ) conf_ptr = &tbx_conf_bed;\n+ else if ( ftype==IS_SAM ) conf_ptr = &tbx_conf_sam;\n+ else if ( ftype==IS_VCF )\n+ {\n+ conf_ptr = &tbx_conf_vcf;\n+ if ( !min_shift && do_csi ) min_shift = 14;\n+ }\n+ else if ( ftype==IS_BCF )\n+ {\n+ if ( !min_shift ) min_shift = 14;\n+ }\n+ else if ( ftype==IS_BAM )\n+ {\n+ if ( !min_shift ) min_shift = 14;\n+ }\n+ }\n+ if ( do_csi )\n+ {\n+ if ( !min_shift ) min_shift = 14;\n+ min_shift *= do_csi; // positive for CSIv2, negative for CSIv1\n+ }\n+ if ( min_shift!=0 && !do_csi ) do_csi = 1;\n+\n+ if ( reheader )\n+ return reheader_file(fname, reheader, ftype, conf_ptr);\n+\n+ if ( conf_ptr )\n+ conf = *conf_ptr;\n+\n+ char *suffix = ".tbi";\n+ if ( do_csi ) suffix = ".csi";\n+ else if ( ftype==IS_BAM ) suffix = ".bai";\n+ else if ( ftype==IS_CRAM ) suffix = ".crai";\n+\n+ char *idx_fname = calloc(strlen(fname) + 5, 1);\n+ strcat(strcpy(idx_fname, fname), suffix);\n+\n+ struct stat stat_tbi, stat_file;\n+ if ( !is_force && stat(idx_fname, &stat_tbi)==0 )\n+ {\n+ // Before complaining about existing index, check if the VCF file isn\'t\n+ // newer. This is a common source of errors, people tend not to notice\n+ // that tabix failed\n+ stat(fname, &stat_file);\n+ if ( stat_file.st_mtime <= stat_tbi.st_mtime )\n+ error("[tabix] the index file exists. Please use \'-f\' to overwrite.\\n");\n+ }\n+ free(idx_fname);\n+\n+ if ( ftype==IS_CRAM )\n+ {\n+ if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\\n", fname);\n+ return 0;\n+ }\n+ else if ( do_csi )\n+ {\n+ if ( ftype==IS_BCF )\n+ {\n+ if ( bcf_index_build(fname, min_shift)!=0 ) error("bcf_index_build failed: %s\\n", fname);\n+ return 0;\n+ }\n+ if ( ftype==IS_BAM )\n+ {\n+ if ( bam_index_build(fname, min_shift)!=0 ) error("bam_index_build failed: %s\\n", fname);\n+ return 0;\n+ }\n+ if ( tbx_index_build(fname, min_shift, &conf)!=0 ) error("tbx_index_build failed: %s\\n", fname);\n+ return 0;\n+ }\n+ else // TBI index\n+ {\n+ if ( tbx_index_build(fname, min_shift, &conf) ) error("tbx_index_build failed: %s\\n", fname);\n+ return 0;\n+ }\n+ return 0;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/tbx.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/tbx.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,320 @@\n+/* tbx.c -- tabix API functions.\n+\n+ Copyright (C) 2009, 2010, 2012-2014 Genome Research Ltd.\n+ Copyright (C) 2010-2012 Broad Institute.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <stdlib.h>\n+#include <string.h>\n+#include <ctype.h>\n+#include <stdio.h>\n+#include <assert.h>\n+#include "htslib/tbx.h"\n+#include "htslib/bgzf.h"\n+\n+#include "htslib/khash.h"\n+KHASH_DECLARE(s2i, kh_cstr_t, int64_t)\n+\n+tbx_conf_t tbx_conf_gff = { 0, 1, 4, 5, \'#\', 0 };\n+tbx_conf_t tbx_conf_bed = { TBX_UCSC, 1, 2, 3, \'#\', 0 };\n+tbx_conf_t tbx_conf_psltbl = { TBX_UCSC, 15, 17, 18, \'#\', 0 };\n+tbx_conf_t tbx_conf_sam = { TBX_SAM, 3, 4, 0, \'@\', 0 };\n+tbx_conf_t tbx_conf_vcf = { TBX_VCF, 1, 2, 0, \'#\', 0 };\n+\n+typedef struct {\n+ int64_t beg, end;\n+ char *ss, *se;\n+ int tid;\n+} tbx_intv_t;\n+\n+static inline int get_tid(tbx_t *tbx, const char *ss, int is_add)\n+{\n+ khint_t k;\n+ khash_t(s2i) *d;\n+ if (tbx->dict == 0) tbx->dict = kh_init(s2i);\n+ d = (khash_t(s2i)*)tbx->dict;\n+ if (is_add) {\n+ int absent;\n+ k = kh_put(s2i, d, ss, &absent);\n+ if (absent) {\n+ kh_key(d, k) = strdup(ss);\n+ kh_val(d, k) = kh_size(d) - 1;\n+ }\n+ } else k = kh_get(s2i, d, ss);\n+ return k == kh_end(d)? -1 : kh_val(d, k);\n+}\n+\n+int tbx_name2id(tbx_t *tbx, const char *ss)\n+{\n+ return get_tid(tbx, ss, 0);\n+}\n+\n+int tbx_parse1(const tbx_conf_t *conf, int len, char *line, tbx_intv_t *intv)\n+{\n+ int i, b = 0, id = 1, ncols = 0;\n+ char *s;\n+ intv->ss = intv->se = 0; intv->beg = intv->end = -1;\n+ for (i = 0; i <= len; ++i) {\n+ if (line[i] == \'\\t\' || line[i] == 0) {\n+ ++ncols;\n+ if (id == conf->sc) {\n+ intv->ss = line + b; intv->se = line + i;\n+ } else if (id == conf->bc) {\n+ // here ->beg is 0-based.\n+ intv->beg = intv->end = strtol(line + b, &s, 0);\n+ if ( s==line+b ) return -1; // expected int\n+ if (!(conf->preset&TBX_UCSC)) --intv->beg;\n+ else ++intv->end;\n+ if (intv->beg < 0) intv->beg = 0;\n+ if (intv->end < 1) intv->end = 1;\n+ } else {\n+ if ((conf->preset&0xffff) == TBX_GENERIC) {\n+ if (id == conf->ec)\n+ {\n+ intv->end = strtol(line + b, &s, 0);\n+ if ( s==line+b ) return -1; // expected int\n+ }\n+ } else if ((conf->preset&0xffff) == TBX_SAM) {\n+ if (id == 6) { // CIGAR\n+ int l = 0, op;\n+ char *t;\n+ for (s = line + b; s < line + i;) {\n+ long x = strtol(s, &t, 10);\n+ op = toupper(*t);\n+ if (op == \'M\' || op == \'D\' || op == \'N\') l += x;\n+ s = t + 1;\n+ '..b'k)) continue;\n+ name[kh_val(d, k)] = (char*)kh_key(d, k);\n+ l += strlen(kh_key(d, k)) + 1; // +1 to include \'\\0\'\n+ }\n+ l_nm = x[6] = l;\n+ meta = (uint8_t*)malloc(l_nm + 28);\n+ if (ed_is_big())\n+ for (i = 0; i < 7; ++i)\n+ x[i] = ed_swap_4(x[i]);\n+ memcpy(meta, x, 28);\n+ for (l = 28, i = 0; i < (int)kh_size(d); ++i) {\n+ int x = strlen(name[i]) + 1;\n+ memcpy(meta + l, name[i], x);\n+ l += x;\n+ }\n+ free(name);\n+ hts_idx_set_meta(tbx->idx, l, meta, 0);\n+}\n+\n+tbx_t *tbx_index(BGZF *fp, int min_shift, const tbx_conf_t *conf)\n+{\n+ tbx_t *tbx;\n+ kstring_t str;\n+ int ret, first = 0, n_lvls, fmt;\n+ int64_t lineno = 0;\n+ uint64_t last_off = 0;\n+ tbx_intv_t intv;\n+\n+ str.s = 0; str.l = str.m = 0;\n+ tbx = (tbx_t*)calloc(1, sizeof(tbx_t));\n+ tbx->conf = *conf;\n+ if (min_shift > 0) n_lvls = (TBX_MAX_SHIFT - min_shift + 2) / 3, fmt = HTS_FMT_CSI;\n+ else min_shift = 14, n_lvls = 5, fmt = HTS_FMT_TBI;\n+ while ((ret = bgzf_getline(fp, \'\\n\', &str)) >= 0) {\n+ ++lineno;\n+ if (lineno <= tbx->conf.line_skip || str.s[0] == tbx->conf.meta_char) {\n+ last_off = bgzf_tell(fp);\n+ continue;\n+ }\n+ if (first == 0) {\n+ tbx->idx = hts_idx_init(0, fmt, last_off, min_shift, n_lvls);\n+ first = 1;\n+ }\n+ get_intv(tbx, &str, &intv, 1);\n+ ret = hts_idx_push(tbx->idx, intv.tid, intv.beg, intv.end, bgzf_tell(fp), 1);\n+ if (ret < 0)\n+ {\n+ free(str.s);\n+ tbx_destroy(tbx);\n+ return NULL;\n+ }\n+ }\n+ if ( !tbx->idx ) tbx->idx = hts_idx_init(0, fmt, last_off, min_shift, n_lvls); // empty file\n+ if ( !tbx->dict ) tbx->dict = kh_init(s2i);\n+ hts_idx_finish(tbx->idx, bgzf_tell(fp));\n+ tbx_set_meta(tbx);\n+ free(str.s);\n+ return tbx;\n+}\n+\n+void tbx_destroy(tbx_t *tbx)\n+{\n+ khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict;\n+ if (d != NULL)\n+ {\n+ khint_t k;\n+ for (k = kh_begin(d); k != kh_end(d); ++k)\n+ if (kh_exist(d, k)) free((char*)kh_key(d, k));\n+ }\n+ hts_idx_destroy(tbx->idx);\n+ kh_destroy(s2i, d);\n+ free(tbx);\n+}\n+\n+int tbx_index_build(const char *fn, int min_shift, const tbx_conf_t *conf)\n+{\n+ tbx_t *tbx;\n+ BGZF *fp;\n+ if ( bgzf_is_bgzf(fn)!=1 ) { fprintf(stderr,"Not a BGZF file: %s\\n", fn); return -1; }\n+ if ((fp = bgzf_open(fn, "r")) == 0) return -1;\n+ if ( !fp->is_compressed ) { bgzf_close(fp); return -1; }\n+ tbx = tbx_index(fp, min_shift, conf);\n+ bgzf_close(fp);\n+ if ( !tbx ) return -1;\n+ hts_idx_save(tbx->idx, fn, min_shift > 0? HTS_FMT_CSI : HTS_FMT_TBI);\n+ tbx_destroy(tbx);\n+ return 0;\n+}\n+\n+tbx_t *tbx_index_load(const char *fn)\n+{\n+ tbx_t *tbx;\n+ uint8_t *meta;\n+ char *nm, *p;\n+ uint32_t x[7];\n+ int l_meta, l_nm;\n+ tbx = (tbx_t*)calloc(1, sizeof(tbx_t));\n+ tbx->idx = hts_idx_load(fn, HTS_FMT_TBI);\n+ if ( !tbx->idx )\n+ {\n+ free(tbx);\n+ return NULL;\n+ }\n+ meta = hts_idx_get_meta(tbx->idx, &l_meta);\n+ if ( !meta )\n+ {\n+ free(tbx);\n+ return NULL;\n+ }\n+ memcpy(x, meta, 28);\n+ memcpy(&tbx->conf, x, 24);\n+ p = nm = (char*)meta + 28;\n+ l_nm = x[6];\n+ for (; p - nm < l_nm; p += strlen(p) + 1) get_tid(tbx, p, 1);\n+ return tbx;\n+}\n+\n+const char **tbx_seqnames(tbx_t *tbx, int *n)\n+{\n+ khash_t(s2i) *d = (khash_t(s2i)*)tbx->dict;\n+ if (d == NULL)\n+ {\n+ *n = 0;\n+ return NULL;\n+ }\n+ int tid, m = kh_size(d);\n+ const char **names = (const char**) calloc(m,sizeof(const char*));\n+ khint_t k;\n+ for (k=kh_begin(d); k<kh_end(d); k++)\n+ {\n+ if ( !kh_exist(d,k) ) continue;\n+ tid = kh_val(d,k);\n+ assert( tid<m );\n+ names[tid] = kh_key(d,k);\n+ }\n+ // sanity check: there should be no gaps\n+ for (tid=0; tid<m; tid++)\n+ assert(names[tid]);\n+ *n = m;\n+ return names;\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/auxf#values.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/auxf#values.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +@HD VN:1.4 SO:unsorted +@SQ SN:Sheila LN:20 +@RG ID:ID SM:foo +Fred 16 Sheila 1 86 10M * 0 0 GCTAGCTCAG ********** RG:Z:ID A!:A:! Ac:A:c AC:A:C I0:i:0 I1:i:1 I2:i:127 I3:i:128 I4:i:255 I5:i:256 I6:i:32767 I7:i:32768 I8:i:65535 I9:i:65536 IA:i:2147483647 i1:i:-1 i2:i:-127 i3:i:-128 i4:i:-255 i5:i:-256 i6:i:-32767 i7:i:-32768 i8:i:-65535 i9:i:-65536 iA:i:-2147483647 iB:i:-2147483648 F0:f:-1 F1:f:0 F2:f:1 F3:f:9.9e-19 F4:f:-9.9e-19 F5:f:9.9e+19 F6:f:-9.9e+19 H0:H:AA H1:H:dead00beef Z0:Z:space space +Jim 16 Sheila 11 11 10M * 0 0 AAAAAAAAAA * BC:B:C,0,127,128,255 Bc:B:c,-128,-127,0,127 BS:B:S,0,32767,32768,65535 Bs:B:s,-32768,-32767,0,32767 BI:B:I,0,2147483647,2147483648,4294967295 Bi:B:i,-2147483648,-2147483647,0,2147483647 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/auxf.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/auxf.fa Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,2 @@ +>Sheila +GCTAGCTCAGAAAAAAAAAA |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/auxf.fa.fai --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/auxf.fa.fai Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,1 @@ +Sheila 20 8 20 21 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/c1#bounds.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/c1#bounds.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,4 @@ +@SQ SN:c1 LN:10 +s0 0 c1 1 0 10M * 0 0 AACCGCGGTT ********** +s1 0 c1 2 0 10M * 0 0 ACCGCGGTTC ********** +s2 0 c1 3 0 10M * 0 0 CCGCGGTTCG ********** |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/c1#clip.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/c1#clip.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,8 @@ +@SQ SN:c1 LN:10 +s0a 0 c1 1 0 10M * 0 0 AACCGCGGTT ********** +s0A 0 c1 1 0 3M4N3M * 0 0 AACGTT ****** +s0b 0 c1 2 0 1S8M1S * 0 0 AACCGCGGTT ********** +s0B 0 c1 2 0 1H8M1H * 0 0 ACCGCGGT ******** +s0c 0 c1 3 0 2S6M2S * 0 0 AACCGCGGTT ********** +s0c 0 c1 3 0 2S3M2I3M2S * 0 0 AACCGNNCGGTT ************ +s0C 0 c1 3 0 2H6M2H * 0 0 CCGCGG ****** |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/c1#pad1.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/c1#pad1.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,10 @@ +@SQ SN:c1 LN:10 +s0a 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0b 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0c 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s1 0 c1 1 0 5M6I5M * 0 0 AACCGGTTAACCGGTT * +s2 0 c1 1 0 5M1P4I1P5M * 0 0 AACCGTTAACGGTT * +s3 0 c1 1 0 5M3I3P5M * 0 0 AACCGGTTCGGTT * +s4 0 c1 1 0 5M3P3I5M * 0 0 AACCGAACCGGTT * +s5 0 c1 1 0 4M1D2P2I2P1D4M * 0 0 AACCTAGGTT * +s6 0 c1 1 0 2M3D6I3D2M * 0 0 AAGTTAACTT * |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/c1#pad2.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/c1#pad2.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,14 @@ +@SQ SN:c1 LN:50 +s0a 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0b 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0c 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s0d 0 c1 1 0 10M * 0 0 AACCGCGGTT * +s1 0 c1 1 0 5M6I5M * 0 0 AACCGGTTAACCGGTT * +s2 0 c1 1 0 5M1P4I1P5M * 0 0 AACCGTTAACGGTT * +s3 0 c1 1 0 5M3I3P5M * 0 0 AACCGGTTCGGTT * +s4 0 c1 1 0 5M3P3I5M * 0 0 AACCGAACCGGTT * +s5 0 c1 1 0 4M1D2P2I2P1D4M * 0 0 AACCTAGGTT * +s6 0 c1 1 0 2M3D6I3D2M * 0 0 AAGTTAACTT * +s7 0 c1 1 0 4M2D4M * 0 0 AACCGGTT * +s8 0 c1 1 0 5D2P2I2P5D * 0 0 TA * +s9 0 c1 5 0 1M2P2I2P * 0 0 GTA * |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/c1#pad3.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/c1#pad3.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,14 @@ +@SQ SN:c1 LN:16 +@RG ID:p.sam SM:unknown LB:p.sam +s0a 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0b 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0c 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s0d 0 c1 6 0 5I6P5M * 0 0 AACCGCGGTT * RG:Z:p.sam +s1 0 c1 6 0 11I5M * 0 0 AACCGGTTAACCGGTT * RG:Z:p.sam +s2 0 c1 6 0 5I1P4I1P5M * 0 0 AACCGTTAACGGTT * RG:Z:p.sam +s3 0 c1 6 0 8I3P5M * 0 0 AACCGGTTCGGTT * RG:Z:p.sam +s4 0 c1 6 0 5I3P3I5M * 0 0 AACCGAACCGGTT * RG:Z:p.sam +s5 0 c1 6 0 4I3P2I2P1D4M * 0 0 AACCTAGGTT * RG:Z:p.sam +s6 0 c1 6 0 2I3P6I3D2M * 0 0 AAGTTAACTT * RG:Z:p.sam +s7 0 c1 6 0 4I7P1D4M * 0 0 AACCGGTT * RG:Z:p.sam +s8 0 c1 6 0 7P2I2P * 0 0 TA !! RG:Z:p.sam |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/c1.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/c1.fa Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,2 @@ +>c1 +AACCGCGGTT |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/c1.fa.fai --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/c1.fa.fai Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,1 @@ +c1 10 4 10 11 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#1.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#1.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,2 @@ +@SQ SN:CHROMOSOME_I LN:15072423 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 AS:i:-18 XS:i:-18 YT:Z:UU |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#2.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#2.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,3 @@ +@SQ SN:CHROMOSOME_I LN:15072423 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +SRR065390.921023 16 CHROMOSOME_I 3 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-13 YT:Z:UU |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#5.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#5.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,11 @@ +@SQ SN:CHROMOSOME_I LN:15072423 +@SQ SN:CHROMOSOME_II LN:15279345 +@SQ SN:CHROMOSOME_III LN:13783700 +@SQ SN:CHROMOSOME_IV LN:17493793 +@SQ SN:CHROMOSOME_V LN:20924149 +I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +II.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +III 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +IV 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +V 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +VI 2048 CHROMOSOME_I 2 1 27M100000D73M * 0 0 ACTAAGCCTAAGCCTAAGCCTAAGCCAATTATCGATTTCTGAAAAAATTATCGAATTTTCTAGAAATTTTGCAAATTTTTTCATAAAATTATCGATTTTA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#5b.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#5b.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,12 @@ +@SQ SN:CHROMOSOME_I LN:15072423 +@SQ SN:CHROMOSOME_II LN:15279345 +@SQ SN:CHROMOSOME_III LN:13783700 +@SQ SN:CHROMOSOME_IV LN:17493793 +@SQ SN:CHROMOSOME_V LN:20924149 +I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +II.14978392 16 CHROMOSOME_II 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +III 16 CHROMOSOME_III 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +IV 16 CHROMOSOME_IV 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +V 16 CHROMOSOME_V 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 XS:i:-18 AS:i:-18 YT:Z:UU +VI 0 CHROMOSOME_V 10 1 7S20M1D23M10I30M10S * 0 0 AGCCTAAGCCTAAGCCTAAGCCTAAGCTAAGCCTAAGCCTAAGCCTAAGCTTTTTTTTTTCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA * +VI 256 CHROMOSOME_V 10 1 7S20M1D23M10I30M10S * 0 0 * * |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#large_seq.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#large_seq.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,3 @@\n+@SQ\tSN:CHROMOSOME_I\tLN:15072423\n+S1\t0\tCHROMOSOME_I\t1\t255\tbn+V\t16\tCHROMOSOME_I\t2\t1\t27M1D73M\t*\t0\t0\tCCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA\t#############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#tag_depadded.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#tag_depadded.sam Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,11 @@ +@HD VN:1.4 SO:coordinate +@RG ID:UNKNOWN SM:unknown LB:UNKNOWN +@SQ SN:CHROMOSOME_I LN:102 +I 16 CHROMOSOME_I 2 1 100M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:27;28;+;STOP;Note=tag1 +II.14978392 16 CHROMOSOME_I 2 1 100M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:27;27;+;PRIM;Note=tag2|28;28;+;OLIG;Note=tag3 +III 0 CHROMOSOME_I 2 1 27M1I73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################"##@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:1;1;+;COMM|ote=consensus%0Amulti%09line%0Atag%0A!"#$%25^&*()_+<>?:@~{}%7C%0A1234567890-=[]'%3B#,./\ +* 768 CHROMOSOME_I 101 255 1M * 0 0 * * CT:Z:+;STOP |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#tag_padded.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#tag_padded.sam Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,11 @@ +@HD VN:1.4 SO:coordinate +@RG ID:UNKNOWN SM:unknown LB:UNKNOWN +@SQ SN:CHROMOSOME_I LN:102 +I 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:27;29;+;STOP;Note=tag1 +II.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:27;27;+;PRIM;Note=tag2|29;29;+;OLIG;Note=tag3 +III 0 CHROMOSOME_I 2 1 101M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTCAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########################"##@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC PT:Z:1;1;+;COMM|ote=consensus%0Amulti%09line%0Atag%0A!"#$%25^&*()_+<>?:@~{}%7C%0A1234567890-=[]'%3B#,./\ +* 768 CHROMOSOME_I 102 255 1M * 0 0 * * CT:Z:+;STOP |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#unmap.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#unmap.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
||
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#unmap1.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#unmap1.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,20 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:CHROMOSOME_I LN:15072423 +@SQ SN:CHROMOSOME_II LN:15279345 +@SQ SN:CHROMOSOME_III LN:13783700 +@SQ SN:CHROMOSOME_IV LN:17493793 +@SQ SN:CHROMOSOME_V LN:20924149 +@SQ SN:CHROMOSOME_X LN:17718866 +@SQ SN:CHROMOSOME_MtDNA LN:13794 +@RG ID:UNKNOWN SM:UNKNOWN +@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-beta|
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce#unmap2.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce#unmap2.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,29 @@ +@HD VN:1.0 SO:unsorted +@SQ SN:CHROMOSOME_I LN:15072423 +@SQ SN:CHROMOSOME_II LN:15279345 +@SQ SN:CHROMOSOME_III LN:13783700 +@SQ SN:CHROMOSOME_IV LN:17493793 +@SQ SN:CHROMOSOME_V LN:20924149 +@SQ SN:CHROMOSOME_X LN:17718866 +@SQ SN:CHROMOSOME_MtDNA LN:13794 +@RG ID:UNKNOWN SM:UNKNOWN +@PG ID:bowtie2 PN:bowtie2 VN:2.0.0-beta5 +SRR065390.14978392 16 CHROMOSOME_I 2 1 27M1D73M * 0 0 CCTAGCCCTAACCCTAACCCTAACCCTAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #############################@B?8B?BA@@DDBCDDCBC@CDCDCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:1 XM:i:5 XN:i:0 XO:i:1 AS:i:-18 XS:i:-18 YT:Z:UU +SRR065390.921023 16 CHROMOSOME_I 3 12 100M * 0 0 CTAAGCCTAAATCTAAGCCTAACCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################################???88:;98768700000<>:BBA?BBAB?BBBBBBBB>B>BB::;?:00000 XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-13 YT:Z:UU +SRR065390.1871511 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA <?@<@A8>0:BB@>B<=B@???@=8@B>BB@CA@DACDCBBCCCA@CCCCACCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.3743423 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ##################?6@:7<=@3=@ABAAB>BDBBABADABDDDBDDBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.4251890 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###########@BB=BCBBC?B>B;>B@@ADBBB@DBBBBDCCBBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.5238868 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA @,=@@D8D;?BBB>;?BBB==BB@D;>D>BBB>BBDDB<DABADCACDCCBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.6023338 0 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAAGCTAC CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC@CCDDDBCCABB=DABBA?################ XG:i:0 XM:i:3 XN:i:0 XO:i:0 AS:i:-6 XS:i:-6 YT:Z:UU +SRR065390.6815812 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############?@.@?B;B0B=;<DADB@@BDDBBDDBCBCBD@CCDCCCCCCCDCCCCCCCCACCCCCCCCCCBCCCCCCDCCCCCCCCCCCBCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.6905811 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA #######################BB@>A<BC>@@BCCB@=BACBCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i:0 YT:Z:UU +SRR065390.8289592 16 CHROMOSOME_I 3 1 100M * 0 0 CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA ###############################A?@C9@@BC=AABDD@A@DC@CB=@BA?6@CCAAC@+CCCCCCCCCCCCCCC@CCCCCCCCCCCCCCCC XG:i:0 XM:i:0 XN:i:0 XO:i:0 AS:i:0 XS:i|
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce.fa Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,20803 @@\n+>CHROMOSOME_I\n+GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC\n+CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT\n+AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA\n+GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC\n+CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT\n+AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAA\n+GCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGC\n+CTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAGCCT\n+AAGCCTAAGCCTAAGCCTAAGCCTAAGCCTAAAAAATTGAGATAAGAAAA\n+CATTTTACTTTTTCAAAATTGTTTTCATGCTAAATTCAAAACGTTTTTTT\n+TTTAGTGAAGCTTCTAGATATTTGGCGGGTACCTCTAATTTTGCCTGCCT\n+GCCAACCTATATGCTCCTGTGTTTAGGCCTAATACTAAGCCTAAGCCTAA\n+GCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAATACTAAGCCTAAGC\n+CTAAGACTAAGCCTAAGACTAAGCCTAAGACTAAGCCTAATACTAAGCCT\n+AAGCCTAAGACTAAGCCTAAGCCTAATACTAAGCCTAAGCCTAAGACTAA\n+GCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAAGACTAAGCCTAAGA\n+CTAAGCCTAATACTAAGCCTAAGCCTAAGACTAAGCCTAAGCCTAAAAGA\n+ATATGGTAGCTACAGAAACGGTAGTACACTCTTCTGAAAATACAAAAAAT\n+TTGCAATTTTTATAGCTAGGGCACTTTTTGTCTGCCCAAATATAGGCAAC\n+CAAAAATAATTGCCAAGTTTTTAATGATTTGTTGCATATTGAAAAAAACA\n+TTTTTCGGGTTTTTTGAAATGAATATCGTAGCTACAGAAACGGTTGTGCA\n+CTCATCTGAAAGTTTGTTTTTCTTGTTTTCTTGCACTTTGTGCAGAATTC\n+TTGATTCTTGATTCTTGCAGAAATTTGCAAGAAAATTCGCAAGAAATTTG\n+TATTAAAAACTGTTCAAAATTTTTGGAAATTAGTTTAAAAATCTCACATT\n+TTTTTTAGAAAAATTATTTTTAAGAATTTTTCATTTTAGGAATATTGTTA\n+TTTCAGAAAATAGCTAAATGTGATTTCTGTAATTTTGCCTGCCAAATTCG\n+TGAAATGCAATAAAAATCTAATATCCCTCATCAGTGCGATTTCCGAATCA\n+GTATATTTTTACGTAATAGCTTCTTTGACATCAATAAGTATTTGCCTATA\n+TGACTTTAGACTTGAAATTGGCTATTAATGCCAATTTCATGATATCTAGC\n+CACTTTAGTATAATTGTTTTTAGTTTTTGGCAAAACTATTGTCTAAACAG\n+ATATTCGTGTTTTCAAGAAATTTTTCATGGTTTTTCTTGGTCTTTTCTTG\n+GTATTTTTTTGACAAAAATTTTTGTTTCTTGATTCTTGCAAAAATTTTTC\n+CGTTTGACGGCCTTGATGTGCACTACCTTCGCTTAAATACTACATTTTCT\n+GAAAATGTTATAATAGTGTTCATTGTTTCATACAAATACTTATTTAATAG\n+TATTTCTGGTTATATAATTTGTATAAAAAGTGGTTGACATAACAAGGCTG\n+ACGAAACTTTGTGATGGCTGAAAATATTTTCCTAGCTTTATTGATTTTTA\n+TTTATACGTGTTTGAATAACTTGGCCAAATCGCCGAGAAGGAATAGAATA\n+CTGGACGACATTGTACATATTTTCCAAAAAATCAGAAAGTAGATGACGGG\n+ACCAATTCTTTCTGTCAGGTTTTACAACCGCCCAGTGCGTCTACGTCACA\n+TGTTGTATAAATGGTTGTAAACAATATGCGGAAACAATCAAATGCATTCC\n+CATAAGGCATAATATAGAGGCTACAGGCAATGAGTATCGCTCTTTGCTTT\n+GTTTAAAGGGGGAGTAGAGTTTGTGGGGAAATATATGTTTCTGACTCTAA\n+TTTTGCCCCTGATACCGAATATCGATGTGAAAAAATTTAAAAAAATTTCC\n+CTGATTTTATATTAATTTTTAAAATCCGAAAATCCATTGGATGCCTATAT\n+GTGAGTTTTTAAACGCAAAATTTTCCCGGCAGAGACGCCCCGCCCACGAA\n+ACCGTGCCGCACGTGTGGGTTTACGAGCTGAATATTTTCCTTCTATTTTT\n+ATTTGATTTTATACCGATTTTCGTCGATTTTTCTCATTTTTTCTCTTTTT\n+TTTGGTGTTTTTTATTGAAAATTTTGTGATTTTCGTAAATTTATTCCTAT\n+TTATTAATAAAAACAAAAACAATTCCATTAAATATCCCATTTTCAGCGCA\n+AAATCGACTGGAGACTAGGAAAATCGTCTGGAGATAGAACGGATCAACAA\n+GATTATTATTATATCATTAATAATATTTATCAATTTTCTTCTGAGAGTCT\n+CATTGAGACTCTTATTTACGCCAAGAAATAAATTTAACATTAAAATTGTT\n+CATTTTTGAAAAAAAAATAATTAAAAAAACACATTTTTTGGAAAAAAAAA\n+TAAATAAAAAAAATTGTCCTCGAGGATCCTCCGGAGCGCGTCGAATCAAT\n+GTTTCCGGAACTCTGAAAATTAAATGTTTGTATGATTGTAGAACCCTTTC\n+GCTATTGAGATTTGATAACTTTTAAGTAATAAAATTTTCGCAGTAAGACA\n+TTAAAACATTTCACAATTAAGCTGGTTCTGAACTGTGTGAAGTATATTGA\n+AAAAAACTAACTGATACAAAAATATAATTTTATGATAGTTTTCTGGATGT\n+CCCAATATAAACGATGTCAATTCTGCGACATGCTACAGTCATCCACGAAA\n+GTAACCCGAATACCGACAAAAGAAGAGGAACGCCAACTTTGGATAGACGC\n+TCTAGGGGCTGATTTTGGTCGGAAAATAGTCGGGAAAAAATAGAGGACAT\n+TACAGATGAGGATGAGGATGAAGATAGAAATTTGCCGACAACTTCGTCAT\n+GCCGCTGATTTTTTTGATGTTCTACGCTTAAATTTTCAGCGAACGAACTA\n+TTTTTTATATTTTGATTGTTTTTAAATAATATTTGCCATAAGAAATTCTC\n+ACTTTTCCAGGAAACGTCGTTTCGCCGCGATTTTCCTCGTCTCCAGTCGA\n+TTTTGCGCTGAAAATGGGATATTTAATGGAATTGTTTTTGTTTTTATTAA\n+TAAATAGGAATAAATTTACGAAAATCACAAAATTTTCAATAAAAAACACC\n+AAAAAAAAAGAGAAAAAATGAGAAAAATCGACGAAAATCGGTATAAAATC\n+AAATAAAAATAGAAGGAAAATATTCAGCTCGTAAACCCGCAAGTGCGGCA\n+CGGTTTCGTGGGCGGGGCGTCTCTGGCGGGAAAATTTTGCGTTTGAAAAC\n+TCACATATAGGCATCCAATGGATTTTCGGATTTTCAAAATTAATATAAAA\n+TCAGGGAAATTTTTTTAAATTTTGTCACATCGATATTCGGTATCAGGGGC\n+AAAATTAGAGTCAGAAACATATATTTCCCCACAAACTCTACTCCCCCTTT\n+AACAACCACCCGAGGATATATTCGACAAACGATCTATCTACTAGGAATAA\n+CTCGATTATTGACATATTATAGACTTCTTTTAGTATTTGTAAAATAGAGG\n+ATCAGACCCAAAATTCAGCCCGCGAAGGCATGACGTCAGCGCGAGGCAGT\n+AGTTTCCAGAA'..b'GATTTGTTGAACAAAGCAGATTAGTACCTGGTTAGACAAAAATTAA\n+AAGAGCAGGAGTAAAGTTGTATTTAAACTGAAAAGATATTGGCAGACATT\n+CTAAATTATCTTTGGAGGCTGAGTAGTAACTGAGAACCCTCATTAACTAC\n+TTAATTTTTTGACTCGTGTATGATCGTTTATTTTATTCTTAAGGATTATA\n+ATAAAAAATTTTTAATTTATTAAAATAGATATATACCCGGTTTATGATTT\n+AAGAAACATTTGGCCTACAATATTTTATATTATGGATTTTAGTTTTAGTT\n+AACTAAATGAAATTGTAAAAGACAGTAAAAAATTCTTAATGTATTTTTGA\n+AGATTATCTAGAAGTGGTACAAATCATCCATCAATTGCCCAAAGGGGAGT\n+AAGTTGTAGTAAAGTAGATTTAGGGGAACCTGAATCTAGTAATAAAACTA\n+TTTTTAAATATGTTTTGAAAACATGTTTTGAGGTAACTCGTAGTTTTTAA\n+GAGTTAGTTTAATATAGAATTGTTGACTGTTAATCAAAAGGTGTACCTCT\n+TAATATAAGAGTTTAGTTTAAGTTAAAACGTTAGATTGTAAATCTAAAGA\n+TTATTGCTCTTGATAATTTTAGTTTTACTTATAGTTATTTTAATGATGAT\n+TTTTATTGTTCAAAGAATCGCTTTTATTACTCTATATGAGCGTCATTTAT\n+TGGGAAGAAGACAAAATCGTCTAGGGCCCACCAAGGTTACATTTATGGGA\n+TTAGCACAAGCTTTATTGGATGGGGTTAAACTTTTAAAAAAAGAACAAAT\n+AACACCCTTAAATTCCTCTGAAGTATCATTTTTACTTGTACCAGGAATTT\n+CTTTTGTTGTAATATATTTAGAATGATTTACGTTACCATATTTTTTTGAT\n+TTTATTAGTTTTGAGTATTCAGTTTTATTTTTTTTATGTTTAATTGGATT\n+TTCTGTTTATACAACTTTAATTAGCGGTATCGTAAGAAAATCAAAATATG\n+GTATAATTGGGGCCATCCGTGCTAGAAGACAAAGAATTTCTTATGAAATT\n+GCTTTTTCTTTATATGTTTTGTGTATTATTATTCATAATAATGTTTTTAA\n+TTTTGTTTCAAAATTTAATTTGAGACTTTTAATTATTTACATCCCATTTT\n+TAATTATAGTAATTGCTGAACTTAACCGGGCGCCATTTGATTTTTCTGAA\n+GGTGAAAGGGAGTTAGTTAGAGGATTTAATGTGGAGTTTGCCAGAGTAGC\n+TTTTGTTTTATTATTTTTAAGGGAATATGGAAGATTAATTTTTTTTAGGG\n+TACTTTCTTCTGCTATATTTTTTAAATTTTCAATTTTTATAGCATTTAGT\n+ATTTTTTCATTATTAATTTTTATTCGTAGTTCATACCCTCGTTATCGTTA\n+TGATTTAATAATAAGTTTATTTTGATTTAAACTTTTACCAATCTCTTTAA\n+TTATATTGTGTTTTTACGCAGTTATTTTTTATTATTAATCAAGTTTATTT\n+TTTAGACATTTTTATATTTGTTTTTGTTTTACAATTTTTGTTTTATTTTA\n+AAGAAAGTATATTAAATACTTTAGTGAAAAAATTTCTTAATAGGTTAGTA\n+GGAGTATTTAGATATACAAATACTTTACCATTAAGGTCAGTAATTTCTAT\n+TTTTACTTTTATTGTTCTTTTAACTTGTTGTTTTGGAGGTTATTTTACTT\n+ACTCTTTTTGTCCTTGTGGAATGGTTGAATTTACTTTTGTTTATGCTGCT\n+GTAGCGTGATTAAGTACTTTGTTAACTTTTATTTCAAGAGAAAAATTTTC\n+AGTTTATATAAGAAAACCAGGAGACACATATTTGAAAACTCTTAGAATGC\n+TATTAATTGAAATCGTTAGAGAATTTTCTCGTCCACTTGCTTTAACAGTG\n+CGTTTAACAGTTAATATTACTGTTGGTCATTTAGTTAGAATAATGCTTTA\n+TCAAGGATTAGAATTAAGAATAGGTGATCAGTATATTTGATTATCAATTT\n+TAGCCATTATAATAGAATGTTTTGTTTTCTTCATTCAAAGTTATATTTTC\n+TCTCGTTTAATTTTTTTATATCTTAATGAGTAATAAAAAAAAAAAGATGT\n+TAACTTAAGTTTTAAAGTGCCAAACTTTTAATTTGGAAATGGTGGACCAC\n+ATCTTAGTTGATATAGCATAAGAAGTGCATTTGTTTTAAGCGCAAAAGAT\n+ATCCGTCAACTAACGAGTTCATAAAGCAAGTCTTCTAAATTTGTTCTAGG\n+TTAAATCCTGCTCGTTTTTGATTGTTTTTATTTCTTTATTTACCTTGTTT\n+TTAACATTATTAAGAATTTTGACTAATAACGTTATTGTTTGATGAAGAAT\n+TTTTTTATTGATAACTGTAGTTTTTATTCTATTAAATAAAAGCAGCAAGA\n+GATATACCAGAATTTTTAATTATTTTGTTATTCAAGAGTCTTTAGGTTTA\n+TTATTTCTTCTTTGTAGAGGAGGTCTATTACAATTTTTTATTATTTTATT\n+GAAAATTGGTGTAGCACCGCTCCACTTTTGAATTTTTAATGTAACAAATA\n+ACATTTTTAATTATGGGCTAATGTGGTTTTTAACATTTCAAAAATTACCA\n+TTTTTAACTATTTTATTACAAATTTTTTGGTTAAGATCCGTGTATATTTT\n+GTTATTTGGTTTATTGATTTGTTATGTTCAAATTTTTGTCATAAAAAGTT\n+ATAAAAATTTGTTAATTATTTCATCCACAGAGTCTTTTAATTGGATTGTT\n+TTGGGAGTATTTTTTTCAATGTTTAATACATTTTATTTATTTATTTATTA\n+CTTTGTATTAATAGTTTTATTAATTTCTAAGTTTTCTAAAACTAGGGGTT\n+ATAATTTTATTAATTGAGAAACAACATTAGTATTTTTAAATATTCCATTT\n+AGAGTTTCATTTTTTGTAAAAATTTTCTCATTGAGGGAAATTTTTAAATA\n+TGATAGATTCTTTACTCTATTTTTGCTTTTTACAATATTTTTATCTGTAT\n+TGGCATTTAGATTTTGATTAATTAACTTGAGAATGAAAAATAATGAAGAA\n+ACTTCAAATAATAATAAAATAAATTATTTTATTATTTTTCCGTTAATAGT\n+TATTTCTATTATTTAATTACTTTTCTAGTAAAATATATTATATTATCTTG\n+ATAAGGTAAAGTTCCAGTTGGGAGAAGTAAGATGTAAAATAGATATTACT\n+ATGTTTGGTTACGGTCCAAAAAGATGCACATCTTTGCGATCTAGTTTAGA\n+AAAAATATTTGTTTTTGGTGCAAAAGAGTTTGATTGCATTTAGTTTACTC\n+TTTTAGTTTATAATTAAAATATGGCCCTGAAGAGGCTAAGAATATTAGGA\n+GTATTGAAAATTAATAATAGATTATTAAATTTTGTTAATGGGATGTTGGT\n+GACATTGCCATCTAGAAAAACTTTAACATTAAGATGAAATTTTGGTAGTA\n+TATTGGGTATAGTTTTAATCTTTCAGATTTTAACAGGTACATTTTTAGCA\n+TTTTATTATACGCCCGATAGGTTAATAGCATTTTCAACAGTGCAGTATAT\n+TATGTATGAGGTAAATTTTGGATGAGTATTTCGAATTTTTCATTTTAATG\n+GGGCCAGGTTATTTTTTATTTTTTTGTATTTACATATTTTTAAAGGGTTA\n+TTTTTTATAAGATATCGTTTAAAAAAAGTATGAATGTCTGGTTTAACAAT\n+TTATTTATTAGTAATAATAGAAGCTTTTATAGGTTATGTTTTAGTTTGAG\n+CTCAAATAAGATTTTGAGCAGCAGTAGTTATTACTAGACTTTTAAGAGTT\n+ATTCCAATTTGAGGGCCAACTATTGTTACTTGAATTTGAAGAGGTTTTGG\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/ce.fa.fai --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/ce.fa.fai Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,7 @@ +CHROMOSOME_I 1009800 14 50 51 +CHROMOSOME_II 5000 1030025 50 51 +CHROMOSOME_III 5000 1035141 50 51 +CHROMOSOME_IV 5000 1040256 50 51 +CHROMOSOME_V 5000 1045370 50 51 +CHROMOSOME_X 5000 1050484 50 51 +CHROMOSOME_MtDNA 5000 1055602 50 51 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/compare_sam.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/compare_sam.pl Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,172 @@ +#!/usr/bin/perl -w +# +# Copyright (C) 2013 Genome Research Ltd. +# +# Author: James Bonfield <jkb@sanger.ac.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Compares two SAM files to report differences. +# Optionally can skip header or ignore specific types of diff. + +use strict; +use Getopt::Long; + +my %opts; +GetOptions(\%opts, 'noqual', 'noaux', 'notemplate', 'unknownrg', 'nomd', 'template-1', 'noflag'); + +my ($fn1, $fn2) = @ARGV; +open(my $fd1, "<", $fn1) || die $!; +open(my $fd2, "<", $fn2) || die $!; + +# Headers +my ($c1,$c2)=(1,1); +my (@hd1, @hd2, $ln1, $ln2); +while (<$fd1>) { + if (/^@/) { + push(@hd1, $_); + } else { + $ln1 = $_; + last; + } + $c1++; +} + +while (<$fd2>) { + if (/^@/) { + push(@hd2, $_); + } else { + $ln2 = $_; + last; + } + $c2++; +} + +# FIXME: to do +#print "@hd1\n"; +#print "@hd2\n"; + +# Compare lines +while ($ln1 && $ln2) { + chomp($ln1); + chomp($ln2); + + # Java CRAM adds RG:Z:UNKNOWN when the read-group is absent + if (exists $opts{unknownrg}) { + $ln1 =~ s/\tRG:Z:UNKNOWN//; + $ln2 =~ s/\tRG:Z:UNKNOWN//; + } + + if (exists $opts{nomd}) { + $ln1 =~ s/\tMD:Z:[A-Z0-9^]*//; + $ln2 =~ s/\tMD:Z:[A-Z0-9^]*//; + $ln1 =~ s/\tNM:i:\d+//; + $ln2 =~ s/\tNM:i:\d+//; + } + + my @ln1 = split("\t", $ln1); + my @ln2 = split("\t", $ln2); + + # Fix BWA bug: unmapped data should have no alignments + if ($ln1[1] & 4) { $ln1[4] = 0; $ln1[5] = "*"; } + if ($ln2[1] & 4) { $ln2[4] = 0; $ln2[5] = "*"; } + + # Rationalise order of auxiliary fields + if (exists $opts{noaux}) { + @ln1 = @ln1[0..10]; + @ln2 = @ln2[0..10]; + } else { + #my @a=@ln1[11..$#ln1];print "<<<@a>>>\n"; + @ln1[11..$#ln1] = sort @ln1[11..$#ln1]; + @ln2[11..$#ln2] = sort @ln2[11..$#ln2]; + } + + if (exists $opts{noqual}) { + $ln1[10] = "*"; + $ln2[10] = "*"; + } + + if (exists $opts{notemplate}) { + @ln1[6..8] = qw/* 0 0/; + @ln2[6..8] = qw/* 0 0/; + } + + if (exists $opts{noflag}) { + $ln1[1] = 0; $ln2[1] = 0; + } + + if (exists $opts{'template-1'}) { + if (abs($ln1[8] - $ln2[8]) == 1) { + $ln1[8] = $ln2[8]; + } + } + + # Cram doesn't uppercase the reference + $ln1[9] = uc($ln1[9]); + $ln2[9] = uc($ln2[9]); + + # Cram will populate a sequence string that starts as "*" + $ln2[9] = "*" if ($ln1[9] eq "*"); + + # Fix 0<op> cigar fields + $ln1[5] =~ s/(\D|^)0\D/$1/g; + $ln1[5] =~ s/^$/*/g; + $ln2[5] =~ s/(\D|^)0\D/$1/g; + $ln2[5] =~ s/^$/*/g; + + # Fix 10M10M cigar to 20M + $ln1[5] =~ s/(\d+)(\D)(\d+)(\2)/$1+$3.$2/e; + $ln2[5] =~ s/(\d+)(\D)(\d+)(\2)/$1+$3.$2/e; + + if ("@ln1" ne "@ln2") { + print "Diff at lines $fn1:$c1, $fn2:$c2\n"; + my @s1 = split("","@ln1"); + my @s2 = split("","@ln2"); + my $ptr = ""; + for (my $i=0; $i < $#s1; $i++) { + if ($s1[$i] eq $s2[$i]) { + $ptr .= "-"; + } else { + last; + } + } + print "1\t@ln1\n2\t@ln2\n\t$ptr^\n\n"; + exit(1); + } + + $ln1 = <$fd1>; + $ln2 = <$fd2>; + + $c1++; $c2++; +} + +if (defined($ln1)) { + print "EOF on $fn1\n"; + exit(1); +} + +if (defined($ln2)) { + print "EOF on $fn2\n"; + exit(1); +} + +close($fd1); +close($fd2); + +exit(0); |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/fieldarith.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/fieldarith.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,72 @@ +/* test/fieldarith.c -- CIGAR field arithmetic test suite. + + Copyright (C) 2013 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include <stdio.h> + +#include "htslib/sam.h" + +int ntests = 0; +int nfailures = 0; + +void check(const bam1_t *aln, const char *testname, const char *tag, int value) +{ + int32_t refvalue; + uint8_t *aux = bam_aux_get(aln, tag); + if (!aux) return; + ntests++; + refvalue = bam_aux2i(aux); + if (value != refvalue) { + fprintf(stderr, "%s FAIL for %s: computed %d != %d expected\n", + testname, bam_get_qname(aln), value, refvalue); + nfailures++; + } +} + +int main(int argc, char **argv) +{ + bam_hdr_t *header; + bam1_t *aln = bam_init1(); + int i; + + for (i = 1; i < argc; i++) { + samFile *in = sam_open(argv[i], "r"); + if (in == NULL) { perror(argv[1]); return 1; } + + header = sam_hdr_read(in); + while (sam_read1(in, header, aln) >= 0) { + check(aln, "cigar2qlen", "XQ", + bam_cigar2qlen(aln->core.n_cigar, bam_get_cigar(aln))); + check(aln, "cigar2rlen", "XR", + bam_cigar2rlen(aln->core.n_cigar, bam_get_cigar(aln))); + check(aln, "endpos", "XE", bam_endpos(aln)); + } + + bam_hdr_destroy(header); + sam_close(in); + } + + bam_destroy1(aln); + + return (nfailures > 0); +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/fieldarith.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/fieldarith.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,15 @@ +@SQ SN:one LN:1000 +@SQ SN:two LN:500 +@CO For each SAM record that has each listed aux field, performs these tests: +@CO XQ is the expected result for bam_cigar2qlen() +@CO XR is the expected result for bam_cigar2rlen() +@CO XE is the expected result for bam_endpos() +@CO (Note that these are all zero-based, while POS is one-based in SAM) +r1 0 one 50 20 8M * 0 0 ATGCATGC qqqqqqqq XQ:i:8 XR:i:8 XE:i:57 +r2 0 one 100 20 50M * 0 0 ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCAT qqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqq XQ:i:50 XR:i:50 XE:i:149 +unmapped 5 two 200 0 * two 200 0 ATGCATGC qqqqqqqq XQ:i:0 XR:i:0 XE:i:200 +hascigar 5 two 200 0 6M2S two 200 0 ATGCATGC qqqqqqqq XQ:i:8 XR:i:6 XE:i:200 +s1 0 one 300 20 2M * 0 0 AT qq XQ:i:2 XR:i:2 XE:i:301 +su1 4 * 0 0 * * 0 0 AT qq XQ:i:0 XR:i:0 XE:i:0 +su2 5 two 400 0 * two 400 0 AT qq XQ:i:0 XR:i:0 XE:i:400 +su3 4 one 500 0 2M * 0 0 AT qq XQ:i:2 XR:i:2 XE:i:500 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/hfile.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/hfile.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,204 @@ +/* test/hfile.c -- Test cases for low-level input/output streams. + + Copyright (C) 2013-2014 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> + +#include <sys/stat.h> + +#include "htslib/hfile.h" +#include "htslib/hts_defs.h" + +void HTS_NORETURN fail(const char *format, ...) +{ + int err = errno; + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + if (err != 0) fprintf(stderr, ": %s", strerror(err)); + fprintf(stderr, "\n"); + exit(EXIT_FAILURE); +} + +void check_offset(hFILE *f, off_t off, const char *message) +{ + off_t ret = htell(f); + if (ret < 0) fail("htell(%s)", message); + if (ret == off) return; + + fprintf(stderr, "%s offset incorrect: expected %ld but got %ld\n", + message, (long)off, (long)ret); + exit(EXIT_FAILURE); +} + +char *slurp(const char *filename) +{ + char *text; + struct stat sbuf; + size_t filesize; + FILE *f = fopen(filename, "r"); + if (f == NULL) fail("fopen(\"%s\", \"r\")", filename); + if (fstat(fileno(f), &sbuf) != 0) fail("fstat(\"%s\")", filename); + filesize = sbuf.st_size; + + text = (char *) malloc(filesize + 1); + if (text == NULL) fail("malloc(text)"); + + if (fread(text, 1, filesize, f) != filesize) fail("fread"); + fclose(f); + + text[filesize] = '\0'; + return text; +} + +hFILE *fin = NULL; +hFILE *fout = NULL; + +void reopen(const char *infname, const char *outfname) +{ + if (fin) { if (hclose(fin) != 0) fail("hclose(input)"); } + if (fout) { if (hclose(fout) != 0) fail("hclose(output)"); } + + fin = hopen(infname, "r"); + if (fin == NULL) fail("hopen(\"%s\")", infname); + + fout = hopen(outfname, "w"); + if (fout == NULL) fail("hopen(\"%s\")", outfname); +} + +int main(void) +{ + static const int size[] = { 1, 13, 403, 999, 30000 }; + + char buffer[40000]; + char *original; + int c, i; + ssize_t n; + off_t off; + + reopen("vcf.c", "test/hfile1.tmp"); + while ((c = hgetc(fin)) != EOF) { + if (hputc(c, fout) == EOF) fail("hputc"); + } + if (herrno(fin)) { errno = herrno(fin); fail("hgetc"); } + + reopen("test/hfile1.tmp", "test/hfile2.tmp"); + if (hpeek(fin, buffer, 50) < 0) fail("hpeek"); + while ((n = hread(fin, buffer, 17)) > 0) { + if (hwrite(fout, buffer, n) != n) fail("hwrite"); + } + if (n < 0) fail("hread"); + + reopen("test/hfile2.tmp", "test/hfile3.tmp"); + while ((n = hread(fin, buffer, sizeof buffer)) > 0) { + if (hwrite(fout, buffer, n) != n) fail("hwrite"); + if (hpeek(fin, buffer, 700) < 0) fail("hpeek"); + } + if (n < 0) fail("hread"); + + reopen("test/hfile3.tmp", "test/hfile4.tmp"); + i = 0; + off = 0; + while ((n = hread(fin, buffer, size[i++ % 5])) > 0) { + off += n; + buffer[n] = '\0'; + check_offset(fin, off, "pre-peek"); + if (hputs(buffer, fout) == EOF) fail("hputs"); + if ((n = hpeek(fin, buffer, size[(i+3) % 5])) < 0) fail("hpeek"); + check_offset(fin, off, "post-peek"); + } + if (n < 0) fail("hread"); + + reopen("test/hfile4.tmp", "test/hfile5.tmp"); + n = hread(fin, buffer, 200); + if (n < 0) fail("hread"); + else if (n != 200) fail("hread only got %d", (int)n); + if (hwrite(fout, buffer, 1000) != 1000) fail("hwrite"); + check_offset(fin, 200, "input/first200"); + check_offset(fout, 1000, "output/first200"); + + if (hseek(fin, 800, SEEK_CUR) < 0) fail("hseek/cur"); + check_offset(fin, 1000, "input/seek"); + for (off = 1000; (n = hread(fin, buffer, sizeof buffer)) > 0; off += n) + if (hwrite(fout, buffer, n) != n) fail("hwrite"); + if (n < 0) fail("hread"); + check_offset(fin, off, "input/eof"); + check_offset(fout, off, "output/eof"); + + if (hseek(fin, 200, SEEK_SET) < 0) fail("hseek/set"); + if (hseek(fout, 200, SEEK_SET) < 0) fail("hseek(output)"); + check_offset(fin, 200, "input/backto200"); + check_offset(fout, 200, "output/backto200"); + n = hread(fin, buffer, 800); + if (n < 0) fail("hread"); + else if (n != 800) fail("hread only got %d", (int)n); + if (hwrite(fout, buffer, 800) != 800) fail("hwrite"); + check_offset(fin, 1000, "input/wrote800"); + check_offset(fout, 1000, "output/wrote800"); + + if (hflush(fout) == EOF) fail("hflush"); + + original = slurp("vcf.c"); + for (i = 1; i <= 5; i++) { + char *text; + sprintf(buffer, "test/hfile%d.tmp", i); + text = slurp(buffer); + if (strcmp(original, text) != 0) { + fprintf(stderr, "%s differs from vcf.c\n", buffer); + return EXIT_FAILURE; + } + free(text); + } + free(original); + + if (hclose(fin) != 0) fail("hclose(input)"); + if (hclose(fout) != 0) fail("hclose(output)"); + + fout = hopen("test/hfile_chars.tmp", "w"); + if (fout == NULL) fail("hopen(\"test/hfile_chars.tmp\")"); + for (i = 0; i < 256; i++) + if (hputc(i, fout) != i) fail("chars: hputc (%d)", i); + if (hclose(fout) != 0) fail("hclose(test/hfile_chars.tmp)"); + + fin = hopen("test/hfile_chars.tmp", "r"); + if (fin == NULL) fail("hopen(\"test/hfile_chars.tmp\") for reading"); + for (i = 0; i < 256; i++) + if ((c = hgetc(fin)) != i) + fail("chars: hgetc (%d = 0x%x) returned %d = 0x%x", i, i, c, c); + if ((c = hgetc(fin)) != EOF) fail("chars: hgetc (EOF) returned %d", c); + if (hclose(fin) != 0) fail("hclose(test/hfile_chars.tmp) for reading"); + + fin = hopen("data:hello, world!\n", "r"); + if (fin == NULL) fail("hopen(\"data:...\")"); + n = hread(fin, buffer, 300); + if (n < 0) fail("hread"); + buffer[n] = '\0'; + if (strcmp(buffer, "hello, world!\n") != 0) fail("hread result"); + if (hclose(fin) != 0) fail("hclose(\"data:...\")"); + + return EXIT_SUCCESS; +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/sam.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/sam.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,187 @@ +/* test/sam.c -- SAM/BAM/CRAM API test cases. + + Copyright (C) 2014-2015 Genome Research Ltd. + + Author: John Marshall <jm18@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + +#include "htslib/sam.h" +#include "htslib/faidx.h" +#include "htslib/kstring.h" + +int status; + +static void fail(const char *fmt, ...) +{ + va_list args; + + fprintf(stderr, "Failed: "); + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fprintf(stderr, "\n"); + + status = EXIT_FAILURE; +} + +uint8_t *check_bam_aux_get(const bam1_t *aln, const char *tag, char type) +{ + uint8_t *p = bam_aux_get(aln, tag); + if (p) { + if (*p == type) return p; + else fail("%s field of type '%c', expected '%c'\n", tag, *p, type); + } + else fail("can't find %s field\n", tag); + + return NULL; +} + +#define PI 3.141592653589793 +#define E 2.718281828459045 +#define HELLO "Hello, world!" +#define BEEF "DEADBEEF" + +#define str(x) #x +#define xstr(x) str(x) + +static int aux_fields1(void) +{ + static const char sam[] = "data:" +"@SQ\tSN:one\tLN:1000\n" +"@SQ\tSN:two\tLN:500\n" +"r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:" xstr(PI) "\tXd:d:" xstr(E) "\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,+2\tZZ:i:1000000\tY1:i:-2147483648\tY2:i:-2147483647\tY3:i:-1\tY4:i:0\tY5:i:1\tY6:i:2147483647\tY7:i:2147483648\tY8:i:4294967295\n"; + + // Canonical form of the alignment record above, as output by sam_format1() + static const char r1[] = "r1\t0\tone\t500\t20\t8M\t*\t0\t0\tATGCATGC\tqqqqqqqq\tXA:A:k\tXi:i:37\tXf:f:3.14159\tXd:d:2.71828\tXZ:Z:" HELLO "\tXH:H:" BEEF "\tXB:B:c,-2,0,2\tZZ:i:1000000\tY1:i:-2147483648\tY2:i:-2147483647\tY3:i:-1\tY4:i:0\tY5:i:1\tY6:i:2147483647\tY7:i:2147483648\tY8:i:4294967295"; + + samFile *in = sam_open(sam, "r"); + bam_hdr_t *header = sam_hdr_read(in); + bam1_t *aln = bam_init1(); + uint8_t *p; + uint32_t n; + kstring_t ks = { 0, 0, NULL }; + + if (sam_read1(in, header, aln) >= 0) { + if ((p = check_bam_aux_get(aln, "XA", 'A')) && bam_aux2A(p) != 'k') + fail("XA field is '%c', expected 'k'", bam_aux2A(p)); + + if ((p = check_bam_aux_get(aln, "Xi", 'C')) && bam_aux2i(p) != 37) + fail("Xi field is %d, expected 37", bam_aux2i(p)); + + if ((p = check_bam_aux_get(aln, "Xf", 'f')) && fabs(bam_aux2f(p) - PI) > 1E-6) + fail("Xf field is %.12f, expected pi", bam_aux2f(p)); + + if ((p = check_bam_aux_get(aln, "Xd", 'd')) && fabs(bam_aux2f(p) - E) > 1E-6) + fail("Xf field is %.12f, expected e", bam_aux2f(p)); + + if ((p = check_bam_aux_get(aln, "XZ", 'Z')) && strcmp(bam_aux2Z(p), HELLO) != 0) + fail("XZ field is \"%s\", expected \"%s\"", bam_aux2Z(p), HELLO); + + if ((p = check_bam_aux_get(aln, "XH", 'H')) && strcmp(bam_aux2Z(p), BEEF) != 0) + fail("XH field is \"%s\", expected \"%s\"", bam_aux2Z(p), BEEF); + + // TODO Invent and use bam_aux2B() + if ((p = check_bam_aux_get(aln, "XB", 'B')) && ! (memcmp(p, "Bc", 2) == 0 && (memcpy(&n, p+2, 4), n) == 3 && memcmp(p+6, "\xfe\x00\x02", 3) == 0)) + fail("XB field is %c,..., expected c,-2,0,+2", p[1]); + + if ((p = check_bam_aux_get(aln, "ZZ", 'I')) && bam_aux2i(p) != 1000000) + fail("ZZ field is %d, expected 1000000", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y1")) && bam_aux2i(p) != -2147483647-1) + fail("Y1 field is %d, expected -2^31", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y2")) && bam_aux2i(p) != -2147483647) + fail("Y2 field is %d, expected -2^31+1", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y3")) && bam_aux2i(p) != -1) + fail("Y3 field is %d, expected -1", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y4")) && bam_aux2i(p) != 0) + fail("Y4 field is %d, expected 0", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y5")) && bam_aux2i(p) != 1) + fail("Y5 field is %d, expected 1", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y6")) && bam_aux2i(p) != 2147483647) + fail("Y6 field is %d, expected 2^31-1", bam_aux2i(p)); + + // TODO Checking these perhaps requires inventing bam_aux2u() or so +#if 0 + if ((p = bam_aux_get(aln, "Y7")) && bam_aux2i(p) != 2147483648) + fail("Y7 field is %d, expected 2^31", bam_aux2i(p)); + + if ((p = bam_aux_get(aln, "Y8")) && bam_aux2i(p) != 4294967295) + fail("Y8 field is %d, expected 2^32-1", bam_aux2i(p)); +#endif + + if (sam_format1(header, aln, &ks) < 0) + fail("can't format record"); + + if (strcmp(ks.s, r1) != 0) + fail("record formatted incorrectly: \"%s\"", ks.s); + + free(ks.s); + } + else fail("can't read record"); + + bam_destroy1(aln); + bam_hdr_destroy(header); + sam_close(in); + + return 1; +} + +static void iterators1(void) +{ + hts_itr_destroy(sam_itr_queryi(NULL, HTS_IDX_REST, 0, 0)); + hts_itr_destroy(sam_itr_queryi(NULL, HTS_IDX_NONE, 0, 0)); +} + +static void faidx1(const char *filename) +{ + int n; + faidx_t *fai = fai_load(filename); + if (fai == NULL) fail("can't load faidx file"); + + n = faidx_fetch_nseq(fai); + if (n != 7) fail("faidx_fetch_nseq returned %d, expected 7", n); + + n = faidx_nseq(fai); + if (n != 7) fail("faidx_nseq returned %d, expected 7", n); + + fai_destroy(fai); +} + +int main(int argc, char **argv) +{ + status = EXIT_SUCCESS; + + aux_fields1(); + iterators1(); + if (argc >= 2) faidx1(argv[1]); + + return status; +} |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/test-regidx.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test-regidx.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,116 @@ +/* test/test-regidx.c -- Regions index test harness. + + Copyright (C) 2014 Genome Research Ltd. + + Author: Petr Danecek <pd3@sanger.ac.uk> + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <htslib/regidx.h> + +void error(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + vfprintf(stderr, format, ap); + va_end(ap); + exit(-1); +} + +int custom_parse(const char *line, char **chr_beg, char **chr_end, reg_t *reg, void *payload, void *usr) +{ + // Use the standard parser for CHROM,FROM,TO + int i, ret = regidx_parse_tab(line,chr_beg,chr_end,reg,NULL,NULL); + if ( ret!=0 ) return ret; + + // Skip the fields that were parsed above + char *ss = (char*) line; + while ( *ss && isspace(*ss) ) ss++; + for (i=0; i<3; i++) + { + while ( *ss && !isspace(*ss) ) ss++; + if ( !*ss ) return -2; // wrong number of fields + while ( *ss && isspace(*ss) ) ss++; + } + if ( !*ss ) return -2; + + // Parse the payload + char *se = ss; + while ( *se && !isspace(*se) ) se++; + char **dat = (char**) payload; + *dat = (char*) malloc(se-ss+1); + memcpy(*dat,ss,se-ss+1); + (*dat)[se-ss] = 0; + return 0; +} +void custom_free(void *payload) +{ + char **dat = (char**)payload; + free(*dat); +} + +int main(int argc, char **argv) +{ + // Init index with no file name, we will insert the regions manually + regidx_t *idx = regidx_init(NULL,custom_parse,custom_free,sizeof(char*),NULL); + if ( !idx ) error("init failed\n"); + + // Insert regions + char *line; + line = "1 10000000 10000000 1:10000000-10000000"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + line = "1 20000000 20000001 1:20000000-20000001"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + line = "1 20000002 20000002 1:20000002-20000002"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + line = "1 30000000 30000000 1:30000000-30000000"; if ( regidx_insert(idx,line)!=0 ) error("insert failed: %s\n", line); + + // Finish initialization + regidx_insert(idx,NULL); + + // Test + regitr_t itr; + int from, to; + + from = to = 10000000; + if ( !regidx_overlap(idx,"1",from-1,to-1,&itr) ) error("query failed: 1:%d-%d\n",from,to); + if ( strcmp("1:10000000-10000000",REGITR_PAYLOAD(itr,char*)) ) error("query failed: 1:%d-%d vs %s\n", from,to,REGITR_PAYLOAD(itr,char*)); + if ( !regidx_overlap(idx,"1",from-2,to-1,&itr) ) error("query failed: 1:%d-%d\n",from-1,to); + if ( !regidx_overlap(idx,"1",from-2,to+3,&itr) ) error("query failed: 1:%d-%d\n",from-1,to+2); + if ( regidx_overlap(idx,"1",from-2,to-2,&itr) ) error("query failed: 1:%d-%d\n",from-1,to-1); + + from = to = 20000000; + if ( !regidx_overlap(idx,"1",from-1,to-1,&itr) ) error("query failed: 1:%d-%d\n",from,to); + + from = to = 20000002; + if ( !regidx_overlap(idx,"1",from-1,to-1,&itr) ) error("query failed: 1:%d-%d\n",from,to); + + from = to = 30000000; + if ( !regidx_overlap(idx,"1",from-1,to-1,&itr) ) error("query failed: 1:%d-%d\n",from,to); + + // Clean up + regidx_destroy(idx); + + return 0; +} + + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/test-vcf-api.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test-vcf-api.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,282 @@\n+/* test/test-vcf-api.c -- VCF test harness.\n+\n+ Copyright (C) 2013, 2014 Genome Research Ltd.\n+\n+ Author: Petr Danecek <pd3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <stdio.h>\n+#include <htslib/hts.h>\n+#include <htslib/vcf.h>\n+#include <htslib/kstring.h>\n+#include <htslib/kseq.h>\n+\n+void write_bcf(char *fname)\n+{\n+ // Init\n+ htsFile *fp = hts_open(fname,"wb");\n+ bcf_hdr_t *hdr = bcf_hdr_init("w");\n+ bcf1_t *rec = bcf_init1();\n+\n+ // Create VCF header\n+ kstring_t str = {0,0,0};\n+ bcf_hdr_append(hdr, "##fileDate=20090805");\n+ bcf_hdr_append(hdr, "##FORMAT=<ID=UF,Number=1,Type=Integer,Description=\\"Unused FORMAT\\">");\n+ bcf_hdr_append(hdr, "##INFO=<ID=UI,Number=1,Type=Integer,Description=\\"Unused INFO\\">");\n+ bcf_hdr_append(hdr, "##FILTER=<ID=Flt,Description=\\"Unused FILTER\\">");\n+ bcf_hdr_append(hdr, "##unused=<XX=AA,Description=\\"Unused generic\\">");\n+ bcf_hdr_append(hdr, "##unused=unformatted text 1");\n+ bcf_hdr_append(hdr, "##unused=unformatted text 2");\n+ bcf_hdr_append(hdr, "##contig=<ID=Unused,length=62435964>");\n+ bcf_hdr_append(hdr, "##source=myImputationProgramV3.1");\n+ bcf_hdr_append(hdr, "##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta");\n+ bcf_hdr_append(hdr, "##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\\"Homo sapiens\\",taxonomy=x>");\n+ bcf_hdr_append(hdr, "##phasing=partial");\n+ bcf_hdr_append(hdr, "##INFO=<ID=NS,Number=1,Type=Integer,Description=\\"Number of Samples With Data\\">");\n+ bcf_hdr_append(hdr, "##INFO=<ID=DP,Number=1,Type=Integer,Description=\\"Total Depth\\">");\n+ bcf_hdr_append(hdr, "##INFO=<ID=AF,Number=A,Type=Float,Description=\\"Allele Frequency\\">");\n+ bcf_hdr_append(hdr, "##INFO=<ID=AA,Number=1,Type=String,Description=\\"Ancestral Allele\\">");\n+ bcf_hdr_append(hdr, "##INFO=<ID=DB,Number=0,Type=Flag,Description=\\"dbSNP membership, build 129\\">");\n+ bcf_hdr_append(hdr, "##INFO=<ID=H2,Number=0,Type=Flag,Description=\\"HapMap2 membership\\">");\n+ bcf_hdr_append(hdr, "##FILTER=<ID=q10,Description=\\"Quality below 10\\">");\n+ bcf_hdr_append(hdr, "##FILTER=<ID=s50,Description=\\"Less than 50% of samples have data\\">");\n+ bcf_hdr_append(hdr, "##FORMAT=<ID=GT,Number=1,Type=String,Description=\\"Genotype\\">");\n+ bcf_hdr_append(hdr, "##FORMAT=<ID=GQ,Number=1,Type=Integer,Description=\\"Genotype Quality\\">");\n+ bcf_hdr_append(hdr, "##FORMAT=<ID=DP,Number=1,Type=Integer,Description=\\"Read Depth\\">");\n+ bcf_hdr_append(hdr, "##FORMAT=<ID=HQ,Number=2,Type=Integer,Description=\\"Haplotype Quality\\">");\n+ bcf_hdr_append(hdr, "##FORMAT=<ID=TS,Number=1,Type=String,Description=\\"Test String\\">");\n+\n+ bcf_hdr_add_sample(hdr, "NA00001");\n+ bcf_hdr_add_sample(hdr, "NA00002");\n+ bcf_hdr_add_sample(hdr, "NA00003");\n+ bcf_hdr_add_sample(hdr, NULL); // to update internal structures\n+ bcf_hdr_write(fp, hdr);\n+\n+\n+ // Add '..b'[1]);\n+ bcf_update_info_float(hdr, rec, "AF", tmpfa, 2);\n+ bcf_update_info_string(hdr, rec, "AA", "T");\n+ bcf_update_info_flag(hdr, rec, "DB", NULL, 1);\n+ tmpia[0] = bcf_gt_phased(2);\n+ tmpia[1] = bcf_int32_vector_end;\n+ tmpia[2] = bcf_gt_phased(1);\n+ tmpia[3] = bcf_int32_vector_end;\n+ tmpia[4] = bcf_gt_missing;\n+ tmpia[5] = bcf_gt_missing;\n+ bcf_update_genotypes(hdr, rec, tmpia, bcf_hdr_nsamples(hdr)*2);\n+ bcf_write1(fp, hdr, rec);\n+\n+ free(tmpia);\n+ free(tmpfa);\n+\n+ // Clean\n+ free(str.s);\n+ bcf_destroy1(rec);\n+ bcf_hdr_destroy(hdr);\n+ int ret;\n+ if ( (ret=hts_close(fp)) )\n+ {\n+ fprintf(stderr,"hts_close(%s): non-zero status %d\\n",fname,ret);\n+ exit(ret);\n+ }\n+}\n+\n+void bcf_to_vcf(char *fname)\n+{\n+ htsFile *fp = hts_open(fname,"rb");\n+ bcf_hdr_t *hdr = bcf_hdr_read(fp);\n+ bcf1_t *rec = bcf_init1();\n+\n+ char *gz_fname = (char*) malloc(strlen(fname)+4);\n+ snprintf(gz_fname,strlen(fname)+4,"%s.gz",fname);\n+ htsFile *out = hts_open(gz_fname,"wg");\n+\n+ bcf_hdr_t *hdr_out = bcf_hdr_dup(hdr);\n+ bcf_hdr_remove(hdr_out,BCF_HL_STR,"unused");\n+ bcf_hdr_remove(hdr_out,BCF_HL_GEN,"unused");\n+ bcf_hdr_remove(hdr_out,BCF_HL_FLT,"Flt");\n+ bcf_hdr_remove(hdr_out,BCF_HL_INFO,"UI");\n+ bcf_hdr_remove(hdr_out,BCF_HL_FMT,"UF");\n+ bcf_hdr_remove(hdr_out,BCF_HL_CTG,"Unused");\n+ bcf_hdr_write(out, hdr_out);\n+\n+ while ( bcf_read1(fp, hdr, rec)>=0 )\n+ {\n+ bcf_write1(out, hdr_out, rec);\n+\n+ // Test problems caused by bcf1_sync: the data block\n+ // may be realloced, also the unpacked structures must\n+ // get updated.\n+ bcf_unpack(rec, BCF_UN_STR);\n+ bcf_update_id(hdr, rec, 0);\n+ bcf_update_format_int32(hdr, rec, "GQ", NULL, 0);\n+\n+ bcf1_t *dup = bcf_dup(rec); // force bcf1_sync call\n+ bcf_write1(out, hdr_out, dup);\n+ bcf_destroy1(dup);\n+\n+ bcf_update_alleles_str(hdr_out, rec, "G,A");\n+ int32_t tmpi = 99;\n+ bcf_update_info_int32(hdr_out, rec, "DP", &tmpi, 1);\n+ int32_t tmpia[] = {9,9,9};\n+ bcf_update_format_int32(hdr_out, rec, "DP", tmpia, 3);\n+\n+ bcf_write1(out, hdr_out, rec);\n+ }\n+\n+ bcf_destroy1(rec);\n+ bcf_hdr_destroy(hdr);\n+ bcf_hdr_destroy(hdr_out);\n+ int ret;\n+ if ( (ret=hts_close(fp)) )\n+ {\n+ fprintf(stderr,"hts_close(%s): non-zero status %d\\n",fname,ret);\n+ exit(ret);\n+ }\n+ if ( (ret=hts_close(out)) )\n+ {\n+ fprintf(stderr,"hts_close(%s): non-zero status %d\\n",gz_fname,ret);\n+ exit(ret);\n+ }\n+\n+\n+ // read gzip, write stdout\n+ htsFile *gz_in = hts_open(gz_fname, "r");\n+ if ( !gz_in )\n+ {\n+ fprintf(stderr,"Could not read: %s\\n", gz_fname);\n+ exit(1);\n+ }\n+\n+ kstring_t line = {0,0,0};\n+ while ( hts_getline(gz_in, KS_SEP_LINE, &line)>0 )\n+ {\n+ kputc(\'\\n\',&line);\n+ fwrite(line.s,1,line.l,stdout);\n+ }\n+\n+ if ( (ret=hts_close(gz_in)) )\n+ {\n+ fprintf(stderr,"hts_close(%s): non-zero status %d\\n",gz_fname,ret);\n+ exit(ret);\n+ }\n+ free(line.s);\n+ free(gz_fname);\n+}\n+\n+void iterator(const char *fname)\n+{\n+ htsFile *fp = hts_open(fname, "r");\n+ bcf_hdr_t *hdr = bcf_hdr_read(fp);\n+ hts_idx_t *idx;\n+ hts_itr_t *iter;\n+\n+ bcf_index_build(fname, 0);\n+ idx = bcf_index_load(fname);\n+\n+ iter = bcf_itr_queryi(idx, bcf_hdr_name2id(hdr, "20"), 1110600, 1110800);\n+ bcf_itr_destroy(iter);\n+\n+ iter = bcf_itr_querys(idx, hdr, "20:1110600-1110800");\n+ bcf_itr_destroy(iter);\n+\n+ hts_idx_destroy(idx);\n+ bcf_hdr_destroy(hdr);\n+ int ret;\n+ if ( (ret=hts_close(fp)) )\n+ {\n+ fprintf(stderr,"hts_close(%s): non-zero status %d\\n",fname,ret);\n+ exit(ret);\n+ }\n+}\n+\n+int main(int argc, char **argv)\n+{\n+ char *fname = argc>1 ? argv[1] : "rmme.bcf";\n+ write_bcf(fname);\n+ bcf_to_vcf(fname);\n+ iterator(fname);\n+ return 0;\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/test-vcf-api.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test-vcf-api.out Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,28 @@ +##fileformat=VCFv4.2 +##FILTER=<ID=PASS,Description="All filters passed"> +##fileDate=20090805 +##unused=<XX=AA,Description="Unused generic"> +##source=myImputationProgramV3.1 +##reference=file:///seq/references/1000GenomesPilot-NCBI36.fasta +##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species="Homo sapiens",taxonomy=x> +##phasing=partial +##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data"> +##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth"> +##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency"> +##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele"> +##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129"> +##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership"> +##FILTER=<ID=q10,Description="Quality below 10"> +##FILTER=<ID=s50,Description="Less than 50% of samples have data"> +##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype"> +##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality"> +##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth"> +##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality"> +##FORMAT=<ID=TS,Number=1,Type=String,Description="Test String"> +#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 +20 14370 rs6054257 G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:GQ:DP:HQ:TS 0|0:48:1:51,51:String1 1|0:48:8:51,51:SomeOtherString2 1/1:43:5:.,.:YetAnotherString3 +20 14370 . G A 29 PASS NS=3;DP=14;AF=0.5;DB;H2 GT:DP:HQ:TS 0|0:1:51,51:String1 1|0:8:51,51:SomeOtherString2 1/1:5:.,.:YetAnotherString3 +20 14370 . G A 29 PASS NS=3;DP=99;AF=0.5;DB;H2 GT:DP:HQ:TS 0|0:9:51,51:String1 1|0:9:51,51:SomeOtherString2 1/1:9:.,.:YetAnotherString3 +20 1110696 . A G,T 67 . NS=2;DP=10;AF=0.333,.;AA=T;DB GT 2 1 ./. +20 1110696 . A G,T 67 . NS=2;DP=10;AF=0.333,.;AA=T;DB GT 2 1 ./. +20 1110696 . G A 67 . NS=2;DP=99;AF=0.333,.;AA=T;DB GT:DP 2:9 1:9 ./.:9 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/test-vcf-sweep.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test-vcf-sweep.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,112 @@ +/* test/test-vcf-sweep.c -- VCF test harness. + + Copyright (C) 2013 Genome Research Ltd. + + Author: Petr Danecek <pd3@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include <stdio.h> +#include <htslib/vcf_sweep.h> + +int main(int argc, char **argv) +{ + if ( argc!=2 ) + { + fprintf(stderr,"Usage: test-vcf-sweep <file.bcf|file.vcf>\n"); + return 1; + } + + // Init variables. The checksum is just for this test program to output + // something and verify that all sites are read in both passes - fwd and + // bwd. + bcf_sweep_t *sw = bcf_sweep_init(argv[1]); + bcf_hdr_t *hdr = bcf_sweep_hdr(sw); + int chksum = 0; + + // First we must sweep forward and read the whole file to build an index. + // If this is undesirable, we can require the presence of a .gzi index + // which can be created with `bgzip -r` from the samtools/htslib package + bcf1_t *rec; + while ( (rec = bcf_sweep_fwd(sw)) ) chksum += rec->pos+1; + printf("fwd position chksum: %d\n", chksum); + + // Now sweep backward. + chksum = 0; + while ( (rec = bcf_sweep_bwd(sw)) ) chksum += rec->pos+1; + printf("bwd position chksum: %d\n", chksum); + + // And forward and backward again, this time summing the PL vectors + int i,j, mPLs = 0, nPLs; + int32_t *PLs = NULL; + chksum = 0; + while ( (rec = bcf_sweep_fwd(sw)) ) + { + // get copy of the PL vectors + nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs); + if ( !nPLs ) continue; // PL not present + + // how many values are there per sample + int nvals = nPLs / bcf_hdr_nsamples(hdr); + + int32_t *ptr = PLs; + for (i=0; i<bcf_hdr_nsamples(hdr); i++) + { + for (j=0; j<nvals; j++) + { + // check for shorter vectors (haploid genotypes amongst diploids) + if ( ptr[j]==bcf_int32_vector_end ) break; + + // skip missing values + if ( ptr[j]==bcf_int32_missing ) continue; + + chksum += ptr[j]; + } + ptr += nvals; + } + } + printf("fwd PL chksum: %d\n", chksum); + + // And the same backwards.. + chksum = 0; + while ( (rec = bcf_sweep_bwd(sw)) ) + { + nPLs = bcf_get_format_int32(hdr, rec, "PL", &PLs, &mPLs); + if ( !nPLs ) continue; + int nvals = nPLs / bcf_hdr_nsamples(hdr); + int32_t *ptr = PLs; + for (i=0; i<bcf_hdr_nsamples(hdr); i++) + { + for (j=0; j<nvals; j++) + { + if ( ptr[j]==bcf_int32_vector_end ) break; + if ( ptr[j]==bcf_int32_missing ) continue; + chksum += ptr[j]; + } + ptr += nvals; + } + } + printf("bwd PL chksum: %d\n", chksum); + + // Clean up + bcf_sweep_destroy(sw); + return 0; +} + + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/test-vcf-sweep.out --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test-vcf-sweep.out Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,4 @@ +fwd position chksum: 1125066 +bwd position chksum: 1125066 +fwd PL chksum: 0 +bwd PL chksum: 0 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/test.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test.pl Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,202 @@ +#!/usr/bin/env perl +# +# Copyright (C) 2012-2013 Genome Research Ltd. +# +# Author: Petr Danecek <pd3@sanger.ac.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +use strict; +use warnings; +use Carp; +use FindBin; +use lib "$FindBin::Bin"; +use Getopt::Long; +use File::Temp qw/ tempfile tempdir /; + +my $opts = parse_params(); + +test_vcf_api($opts,out=>'test-vcf-api.out'); +test_vcf_sweep($opts,out=>'test-vcf-sweep.out'); + +print "\nNumber of tests:\n"; +printf " total .. %d\n", $$opts{nok}+$$opts{nfailed}; +printf " passed .. %d\n", $$opts{nok}; +printf " failed .. %d\n", $$opts{nfailed}; +print "\n"; + +exit ($$opts{nfailed} > 0); + +#-------------------- + +sub error +{ + my (@msg) = @_; + if ( scalar @msg ) { confess @msg; } + "About: samtools/htslib consistency test script\n", + "Usage: test.pl [OPTIONS]\n", + "Options:\n", + " -r, --redo-outputs Recreate expected output files.\n", + " -t, --temp-dir <path> When given, temporary files will not be removed.\n", + " -h, -?, --help This help message.\n", + "\n"; + exit 1; +} +sub parse_params +{ + my $opts = { keep_files=>0, nok=>0, nfailed=>0 }; + my $help; + Getopt::Long::Configure('bundling'); + my $ret = GetOptions ( + 't|temp-dir:s' => \$$opts{keep_files}, + 'r|redo-outputs' => \$$opts{redo_outputs}, + 'h|?|help' => \$help + ); + if ( !$ret or $help ) { error(); } + $$opts{tmp} = $$opts{keep_files} ? $$opts{keep_files} : tempdir(CLEANUP=>1); + if ( $$opts{keep_files} ) { cmd("mkdir -p $$opts{keep_files}"); } + $$opts{path} = $FindBin::RealBin; + $$opts{bin} = $FindBin::RealBin; + $$opts{bin} =~ s{/test/?$}{}; + return $opts; +} +sub _cmd +{ + my ($cmd) = @_; + my $kid_io; + my @out; + my $pid = open($kid_io, "-|"); + if ( !defined $pid ) { error("Cannot fork: $!"); } + if ($pid) + { + # parent + @out = <$kid_io>; + close($kid_io); + } + else + { + # child + exec('/bin/bash', '-o','pipefail','-c', $cmd) or error("Cannot execute the command [/bin/sh -o pipefail -c $cmd]: $!"); + } + return ($? >> 8, join('',@out)); +} +sub cmd +{ + my ($cmd) = @_; + my ($ret,$out) = _cmd($cmd); + if ( $ret ) { error("The command failed [$ret]: $cmd\n", $out); } + return $out; +} +sub test_cmd +{ + my ($opts,%args) = @_; + if ( !exists($args{out}) ) + { + if ( !exists($args{in}) ) { error("FIXME: expected out or in key\n"); } + $args{out} = "$args{in}.out"; + } + my ($package, $filename, $line, $test)=caller(1); + $test =~ s/^.+:://; + + print "$test:\n"; + print "\t$args{cmd}\n"; + + my ($ret,$out) = _cmd("$args{cmd} 2>&1"); + if ( $ret ) { failed($opts,$test); return; } + if ( $$opts{redo_outputs} && -e "$$opts{path}/$args{out}" ) + { + rename("$$opts{path}/$args{out}","$$opts{path}/$args{out}.old"); + open(my $fh,'>',"$$opts{path}/$args{out}") or error("$$opts{path}/$args{out}: $!"); + print $fh $out; + close($fh); + my ($ret,$out) = _cmd("diff -q $$opts{path}/$args{out} $$opts{path}/$args{out}.old"); + if ( !$ret && $out eq '' ) { unlink("$$opts{path}/$args{out}.old"); } + else + { + print "\tthe expected output changed, saving:\n"; + print "\t old .. $$opts{path}/$args{out}.old\n"; + print "\t new .. $$opts{path}/$args{out}\n"; + } + } + my $exp = ''; + if ( open(my $fh,'<',"$$opts{path}/$args{out}") ) + { + my @exp = <$fh>; + $exp = join('',@exp); + close($fh); + } + elsif ( !$$opts{redo_outputs} ) { failed($opts,$test,"$$opts{path}/$args{out}: $!"); return; } + + if ( $exp ne $out ) + { + open(my $fh,'>',"$$opts{path}/$args{out}.new") or error("$$opts{path}/$args{out}.new"); + print $fh $out; + close($fh); + if ( !-e "$$opts{path}/$args{out}" ) + { + rename("$$opts{path}/$args{out}.new","$$opts{path}/$args{out}") or error("rename $$opts{path}/$args{out}.new $$opts{path}/$args{out}: $!"); + print "\tthe file with expected output does not exist, creating new one:\n"; + print "\t\t$$opts{path}/$args{out}\n"; + } + else + { + failed($opts,$test,"The outputs differ:\n\t\t$$opts{path}/$args{out}\n\t\t$$opts{path}/$args{out}.new"); + } + return; + } + passed($opts,$test); +} +sub failed +{ + my ($opts,$test,$reason) = @_; + $$opts{nfailed}++; + if ( defined $reason ) { print "\n\t$reason"; } + print "\n.. failed ...\n\n"; +} +sub passed +{ + my ($opts,$test) = @_; + $$opts{nok}++; + print ".. ok\n\n"; +} +sub is_file_newer +{ + my ($afile,$bfile) = @_; + my (@astat) = stat($afile) or return 0; + my (@bstat) = stat($bfile) or return 0; + if ( $astat[9]>$bstat[9] ) { return 1 } + return 0; +} + + +# The tests -------------------------- + +sub test_vcf_api +{ + my ($opts,%args) = @_; + test_cmd($opts,%args,cmd=>"$$opts{path}/test-vcf-api $$opts{tmp}/test-vcf-api.bcf"); +} + +sub test_vcf_sweep +{ + my ($opts,%args) = @_; + test_cmd($opts,%args,cmd=>"$$opts{path}/test-vcf-sweep $$opts{tmp}/test-vcf-api.bcf"); +} + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/test_view.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test_view.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,239 @@\n+/* test/test_view.c -- simple view tool, purely for use in a test harness.\n+\n+ Copyright (C) 2012 Broad Institute.\n+ Copyright (C) 2013-2014 Genome Research Ltd.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <stdio.h>\n+#include <unistd.h>\n+#include <stdlib.h>\n+#include <string.h>\n+\n+#include "cram/cram.h"\n+\n+#include "htslib/sam.h"\n+\n+typedef struct hts_opt {\n+ enum cram_option opt;\n+ union {\n+ int i;\n+ char *s;\n+ } val;\n+ struct hts_opt *next;\n+} hts_opt;\n+\n+/*\n+ * Parses arg and appends it to the option list.\n+ * Returns 0 on success;\n+ * -1 on failure.\n+ */\n+int add_option(hts_opt **opts, char *arg) {\n+ hts_opt *o, *t;\n+ char *cp;\n+\n+ if (!(cp = strchr(arg, \'=\')))\n+ cp = "1"; // assume boolean\n+ else\n+ *cp++ = 0;\n+\n+ if (!(o = malloc(sizeof(*o))))\n+ return -1;\n+\n+ if (strcmp(arg, "DECODE_MD") == 0)\n+ o->opt = CRAM_OPT_DECODE_MD, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "VERBOSITY") == 0)\n+ o->opt = CRAM_OPT_VERBOSITY, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "SEQS_PER_SLICE") == 0)\n+ o->opt = CRAM_OPT_SEQS_PER_SLICE, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "SLICES_PER_CONTAINER") == 0)\n+ o->opt = CRAM_OPT_SLICES_PER_CONTAINER, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "EMBED_REF") == 0)\n+ o->opt = CRAM_OPT_EMBED_REF, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "NO_REF") == 0)\n+ o->opt = CRAM_OPT_NO_REF, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "IGNORE_MD5") == 0)\n+ o->opt = CRAM_OPT_IGNORE_MD5, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "USE_BZIP2") == 0)\n+ o->opt = CRAM_OPT_USE_BZIP2, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "USE_RANS") == 0)\n+ o->opt = CRAM_OPT_USE_RANS, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "USE_LZMA") == 0)\n+ o->opt = CRAM_OPT_USE_LZMA, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "REFERENCE") == 0)\n+ o->opt = CRAM_OPT_REFERENCE, o->val.s = cp;\n+ else if (strcmp(arg, "VERSION") == 0)\n+ o->opt = CRAM_OPT_VERSION, o->val.s =cp;\n+ else if (strcmp(arg, "MULTI_SEQ_PER_SLICE") == 0)\n+ o->opt = CRAM_OPT_MULTI_SEQ_PER_SLICE, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "NTHREADS") == 0)\n+ o->opt = CRAM_OPT_NTHREADS, o->val.i = atoi(cp);\n+ else if (strcmp(arg, "REQUIRED_FIELDS") == 0)\n+ o->opt = CRAM_OPT_REQUIRED_FIELDS, o->val.i = strtol(cp, NULL, 0);\n+ else {\n+ fprintf(stderr, "Unknown option \'%s\'\\n", arg);\n+ free(o);\n+ return -1;\n+ }\n+\n+ o->next = NULL;\n+\n+ if (*opts) {\n+ t = *opts;\n+ while (t->next)\n+ t = t->next;\n+ t->next = o;\n+ } else {\n+ *opts = o;\n+ }\n+\n+ return 0;\n+}\n+\n+int main(int argc, char *argv[])\n+{\n+ samFile *in;\n+ char *fn_ref = 0;\n+ int flag = 0, c, clevel = -1, ignore_sam_err = 0;\n+ char moder[8];\n+ '..b' int r = 0, exit_code = 0;\n+ hts_opt *in_opts = NULL, *out_opts = NULL, *last = NULL;\n+\n+ while ((c = getopt(argc, argv, "IbDCSl:t:i:o:")) >= 0) {\n+ switch (c) {\n+ case \'S\': flag |= 1; break;\n+ case \'b\': flag |= 2; break;\n+ case \'D\': flag |= 4; break;\n+ case \'C\': flag |= 8; break;\n+ case \'l\': clevel = atoi(optarg); flag |= 2; break;\n+ case \'t\': fn_ref = optarg; break;\n+ case \'I\': ignore_sam_err = 1; break;\n+ case \'i\': if (add_option(&in_opts, optarg)) return 1; break;\n+ case \'o\': if (add_option(&out_opts, optarg)) return 1; break;\n+ }\n+ }\n+ if (argc == optind) {\n+ fprintf(stderr, "Usage: samview [-bSCSI] [-l level] [-o option=value] <in.bam>|<in.sam>|<in.cram> [region]\\n");\n+ return 1;\n+ }\n+ strcpy(moder, "r");\n+ if (flag&4) strcat(moder, "c");\n+ else if ((flag&1) == 0) strcat(moder, "b");\n+\n+ in = sam_open(argv[optind], moder);\n+ if (in == NULL) {\n+ fprintf(stderr, "Error opening \\"%s\\"\\n", argv[optind]);\n+ return EXIT_FAILURE;\n+ }\n+ h = sam_hdr_read(in);\n+ h->ignore_sam_err = ignore_sam_err;\n+ b = bam_init1();\n+\n+ strcpy(modew, "w");\n+ if (clevel >= 0 && clevel <= 9) sprintf(modew + 1, "%d", clevel);\n+ if (flag&8) strcat(modew, "c");\n+ else if (flag&2) strcat(modew, "b");\n+ out = hts_open("-", modew);\n+ if (out == NULL) {\n+ fprintf(stderr, "Error opening standard output\\n");\n+ return EXIT_FAILURE;\n+ }\n+\n+ /* CRAM output */\n+ if (flag & 8) {\n+ // Parse input header and use for CRAM output\n+ out->fp.cram->header = sam_hdr_parse_(h->text, h->l_text);\n+\n+ // Create CRAM references arrays\n+ if (fn_ref)\n+ cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, fn_ref);\n+ else\n+ // Attempt to fill out a cram->refs[] array from @SQ headers\n+ cram_set_option(out->fp.cram, CRAM_OPT_REFERENCE, NULL);\n+ }\n+\n+ // Process any options; currently cram only.\n+ for (; in_opts; in_opts = (last=in_opts)->next, free(last)) {\n+ hts_set_opt(in, in_opts->opt, in_opts->val);\n+ if (in_opts->opt == CRAM_OPT_REFERENCE)\n+ hts_set_opt(out, in_opts->opt, in_opts->val);\n+ }\n+ for (; out_opts; out_opts = (last=out_opts)->next, free(last))\n+ hts_set_opt(out, out_opts->opt, out_opts->val);\n+\n+ sam_hdr_write(out, h);\n+ if (optind + 1 < argc && !(flag&1)) { // BAM input and has a region\n+ int i;\n+ hts_idx_t *idx;\n+ if ((idx = bam_index_load(argv[optind])) == 0) {\n+ fprintf(stderr, "[E::%s] fail to load the BAM index\\n", __func__);\n+ return 1;\n+ }\n+ for (i = optind + 1; i < argc; ++i) {\n+ hts_itr_t *iter;\n+ if ((iter = bam_itr_querys(idx, h, argv[i])) == 0) {\n+ fprintf(stderr, "[E::%s] fail to parse region \'%s\'\\n", __func__, argv[i]);\n+ continue;\n+ }\n+ while ((r = bam_itr_next(in, iter, b)) >= 0) {\n+ if (sam_write1(out, h, b) < 0) {\n+ fprintf(stderr, "Error writing output.\\n");\n+ exit_code = 1;\n+ break;\n+ }\n+ }\n+ hts_itr_destroy(iter);\n+ }\n+ hts_idx_destroy(idx);\n+ } else while ((r = sam_read1(in, h, b)) >= 0) {\n+ if (sam_write1(out, h, b) < 0) {\n+ fprintf(stderr, "Error writing output.\\n");\n+ exit_code = 1;\n+ break;\n+ }\n+ }\n+\n+ if (r < -1) {\n+ fprintf(stderr, "Error parsing input.\\n");\n+ exit_code = 1;\n+ }\n+\n+ r = sam_close(out);\n+ if (r < 0) {\n+ fprintf(stderr, "Error closing output.\\n");\n+ exit_code = 1;\n+ }\n+\n+ bam_destroy1(b);\n+ bam_hdr_destroy(h);\n+\n+ r = sam_close(in);\n+ if (r < 0) {\n+ fprintf(stderr, "Error closing input.\\n");\n+ exit_code = 1;\n+ }\n+\n+ return exit_code;\n+}\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/test_view.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/test_view.pl Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,71 @@ +#! /usr/bin/env perl +# +# Copyright (C) 2013 Genome Research Ltd. +# +# Author: James Bonfield <jkb@sanger.ac.uk> +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. +use strict; +use warnings; + +my $err_count = 0; +my $suc_count = 0; + +sub test { + my ($cmd) = @_; + print " $cmd\n"; + if (system("$cmd || exit 1") != 0) { + print "FAIL $!\n"; + $err_count++; + } else { + $suc_count++; + } +} + +foreach my $sam (glob("*#*.sam")) { + my ($base, $ref) = ($sam =~ /((.*)#.*)\.sam/); + $ref .= ".fa"; + + my $bam = "$base.tmp.bam"; + my $cram = "$base.tmp.cram"; + + print "\n=== Testing $sam, ref $ref ===\n"; + + # SAM -> BAM -> SAM + test "./test_view -S -b $sam > $bam"; + test "./test_view $bam > $bam.sam_"; + test "./compare_sam.pl $sam $bam.sam_"; + + # SAM -> CRAM -> SAM + test "./test_view -t $ref -S -C $sam > $cram"; + test "./test_view -D $cram > $cram.sam_"; + test "./compare_sam.pl -nomd $sam $cram.sam_"; + + # BAM -> CRAM -> BAM -> SAM + $cram = "$bam.cram"; + test "./test_view -t $ref -C $bam > $cram"; + test "./test_view -b -D $cram > $cram.bam"; + test "./test_view $cram.bam > $cram.bam.sam_"; + test "./compare_sam.pl -nomd $sam $cram.bam.sam_"; +} + +print "\nSuccesses $suc_count\n"; +print "\nFailures $err_count\n"; + +exit ($err_count > 0); |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx#large_aux.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx#large_aux.sam Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,4 @@\n+@SQ\tSN:xx\tLN:20\n+a1\t16\txx\t1\t1\t10M\t*\t0\t0\tAAAAAAAAAA\t*\taa:i:1\tab:i:1\tac:i:1\tad:i:1\tae:i:1\taf:i:1\tag:i:1\tah:i:1\tai:i:1\taj:i:1\tak:i:1\tal:i:1\tam:i:1\tan:i:1\tao:i:1\tap:i:1\taq:i:1\tar:i:1\tas:i:1\tat:i:1\tau:i:1\tav:i:1\taw:i:1\tax:i:1\tay:i:1\taz:i:1\tba:i:1\tbb:i:1\tbc:i:1\tbd:i:1\tbe:i:1\tbf:i:1\tbg:i:1\tbh:i:1\tbi:i:1\tbj:i:1\tbk:i:1\tbl:i:1\tbm:i:1\tbn:i:1\tbo:i:1\tbp:i:1\tbq:i:1\tbr:i:1\tbs:i:1\tbt:i:1\tbu:i:1\tbv:i:1\tbw:i:1\tbx:i:1\tby:i:1\tbz:i:1\tca:i:1\tcb:i:1\tcc:i:1\tcd:i:1\tce:i:1\tcf:i:1\tcg:i:1\tch:i:1\tci:i:1\tcj:i:1\tck:i:1\tcl:i:1\tcm:i:1\tcn:i:1\tco:i:1\tcp:i:1\tcq:i:1\tcr:i:1\tcs:i:1\tct:i:1\tcu:i:1\tcv:i:1\tcw:i:1\tcx:i:1\tcy:i:1\tcz:i:1\tda:i:1\tdb:i:1\tdc:i:1\tdd:i:1\tde:i:1\tdf:i:1\tdg:i:1\tdh:i:1\tdi:i:1\tdj:i:1\tdk:i:1\tdl:i:1\tdm:i:1\tdn:i:1\tdo:i:1\tdp:i:1\tdq:i:1\tdr:i:1\tds:i:1\tdt:i:1\tdu:i:1\tdv:i:1\tdw:i:1\tdx:i:1\tdy:i:1\tdz:i:1\tea:i:1\teb:i:1\tec:i:1\ted:i:1\tee:i:1\tef:i:1\teg:i:1\teh:i:1\tei:i:1\tej:i:1\tek:i:1\tel:i:1\tem:i:1\ten:i:1\teo:i:1\tep:i:1\teq:i:1\ter:i:1\tes:i:1\tet:i:1\teu:i:1\tev:i:1\tew:i:1\tex:i:1\tey:i:1\tez:i:1\tfa:i:1\tfb:i:1\tfc:i:1\tfd:i:1\tfe:i:1\tff:i:1\tfg:i:1\tfh:i:1\tfi:i:1\tfj:i:1\tfk:i:1\tfl:i:1\tfm:i:1\tfn:i:1\tfo:i:1\tfp:i:1\tfq:i:1\tfr:i:1\tfs:i:1\tft:i:1\tfu:i:1\tfv:i:1\tfw:i:1\tfx:i:1\tfy:i:1\tfz:i:1\tga:i:1\tgb:i:1\tgc:i:1\tgd:i:1\tge:i:1\tgf:i:1\tgg:i:1\tgh:i:1\tgi:i:1\tgj:i:1\tgk:i:1\tgl:i:1\tgm:i:1\tgn:i:1\tgo:i:1\tgp:i:1\tgq:i:1\tgr:i:1\tgs:i:1\tgt:i:1\tgu:i:1\tgv:i:1\tgw:i:1\tgx:i:1\tgy:i:1\tgz:i:1\tha:i:1\thb:i:1\thc:i:1\thd:i:1\the:i:1\thf:i:1\thg:i:1\thh:i:1\thi:i:1\thj:i:1\thk:i:1\thl:i:1\thm:i:1\thn:i:1\tho:i:1\thp:i:1\thq:i:1\thr:i:1\ths:i:1\tht:i:1\thu:i:1\thv:i:1\thw:i:1\thx:i:1\thy:i:1\thz:i:1\tia:i:1\tib:i:1\tic:i:1\tid:i:1\tie:i:1\tif:i:1\tig:i:1\tih:i:1\tii:i:1\tij:i:1\tik:i:1\til:i:1\tim:i:1\tin:i:1\tio:i:1\tip:i:1\tiq:i:1\tir:i:1\tis:i:1\tit:i:1\tiu:i:1\tiv:i:1\tiw:i:1\tix:i:1\tiy:i:1\tiz:i:1\tja:i:1\tjb:i:1\tjc:i:1\tjd:i:1\tje:i:1\tjf:i:1\tjg:i:1\tjh:i:1\tji:i:1\tjj:i:1\tjk:i:1\tjl:i:1\tjm:i:1\tjn:i:1\tjo:i:1\tjp:i:1\tjq:i:1\tjr:i:1\tjs:i:1\tjt:i:1\tju:i:1\n+a2\t16\txx\t1\t1\t10M\t*\t0\t0\tAAAAAAAAAA\t*\taa:i:1\tab:i:1\tac:i:1\tad:i:1\tae:i:1\taf:i:1\tag:i:1\tah:i:1\tai:i:1\taj:i:1\tak:i:1\tal:i:1\tam:i:1\tan:i:1\tao:i:1\tap:i:1\taq:i:1\tar:i:1\tas:i:1\tat:i:1\tau:i:1\tav:i:1\taw:i:1\tax:i:1\tay:i:1\taz:i:1\tba:i:1\tbb:i:1\tbc:i:1\tbd:i:1\tbe:i:1\tbf:i:1\tbg:i:1\tbh:i:1\tbi:i:1\tbj:i:1\tbk:i:1\tbl:i:1\tbm:i:1\tbn:i:1\tbo:i:1\tbp:i:1\tbq:i:1\tbr:i:1\tbs:i:1\tbt:i:1\tbu:i:1\tbv:i:1\tbw:i:1\tbx:i:1\tby:i:1\tbz:i:1\tca:i:1\tcb:i:1\tcc:i:1\tcd:i:1\tce:i:1\tcf:i:1\tcg:i:1\tch:i:1\tci:i:1\tcj:i:1\tck:i:1\tcl:i:1\tcm:i:1\tcn:i:1\tco:i:1\tcp:i:1\tcq:i:1\tcr:i:1\tcs:i:1\tct:i:1\tcu:i:1\tcv:i:1\tcw:i:1\tcx:i:1\tcy:i:1\tcz:i:1\tda:i:1\tdb:i:1\tdc:i:1\tdd:i:1\tde:i:1\tdf:i:1\tdg:i:1\tdh:i:1\tdi:i:1\tdj:i:1\tdk:i:1\tdl:i:1\tdm:i:1\tdn:i:1\tdo:i:1\tdp:i:1\tdq:i:1\tdr:i:1\tds:i:1\tdt:i:1\tdu:i:1\tdv:i:1\tdw:i:1\tdx:i:1\tdy:i:1\tdz:i:1\tea:i:1\teb:i:1\tec:i:1\ted:i:1\tee:i:1\tef:i:1\teg:i:1\teh:i:1\tei:i:1\tej:i:1\tek:i:1\tel:i:1\tem:i:1\ten:i:1\teo:i:1\tep:i:1\teq:i:1\ter:i:1\tes:i:1\tet:i:1\teu:i:1\tev:i:1\tew:i:1\tex:i:1\tey:i:1\tez:i:1\tfa:i:1\tfb:i:1\tfc:i:1\tfd:i:1\tfe:i:1\tff:i:1\tfg:i:1\tfh:i:1\tfi:i:1\tfj:i:1\tfk:i:1\tfl:i:1\tfm:i:1\tfn:i:1\tfo:i:1\tfp:i:1\tfq:i:1\tfr:i:1\tfs:i:1\tft:i:1\tfu:i:1\tfv:i:1\tfw:i:1\tfx:i:1\tfy:i:1\tfz:i:1\tga:i:1\tgb:i:1\tgc:i:1\tgd:i:1\tge:i:1\tgf:i:1\tgg:i:1\tgh:i:1\tgi:i:1\tgj:i:1\tgk:i:1\tgl:i:1\tgm:i:1\tgn:i:1\tgo:i:1\tgp:i:1\tgq:i:1\tgr:i:1\tgs:i:1\tgt:i:1\tgu:i:1\tgv:i:1\tgw:i:1\tgx:i:1\tgy:i:1\tgz:i:1\tha:i:1\thb:i:1\thc:i:1\thd:i:1\the:i:1\thf:i:1\thg:i:1\thh:i:1\thi:i:1\thj:i:1\thk:i:1\thl:i:1\thm:i:1\thn:i:1\tho:i:1\thp:i:1\thq:i:1\thr:i:1\ths:i:1\tht:i:1\thu:i:1\thv:i:1\thw:i:1\thx:i:1\thy:i:1\thz:i:1\tia:i:1\tib:i:1\tic:i:1\tid:i:1\tie:i:1\tif:i:1\tig:i:1\tih:i:1\tii:i:1\tij:i:1\tik:i:1\til:i:1\tim:i:1\tin:i:1\tio:i:1\tip:i:1\tiq:i:1\tir:i:1\tis:i:1\tit:i:1\tiu:i:1\tiv:i:1\tiw:i:1\tix:i:1\tiy:i:1\tiz:i:1\tja:i:1\tjb:i:1\tjc:i:1\tjd:i:1\tje:i:1\tjf:i:1\tjg:i:1\tjh:i:1\tji:i:1\tjj:i:1\tjk:i:1\tjl:i:1\tjm:i:1\tjn:i:1\tjo:i:1\tjp:i:1\tjq:i:1\tjr:i:1\tjs:i:1\tjt:i:1\tju:i:1\tAa:i:1\tAb:i:1\tAc:i:1\tAd:i:1\tAe:i:1\tAf:i:1\tAg:i:1\tAh:i:1\tAi:i:1\tAj:i:1\tAk:i:1\tAl:i:1\tAm:i:1\tAn:i:1\tAo:i:1\tAp:i:1\tAq:i:1\tAr:i:1\tAs:i:1\tAt:i:1\tAu:i:1\tAv:i:1\tAw:i:1\tAx:i:1\tAy:i:1\tAz:i:1\tBa:i:1\tBb:i:1\tBc:i:1\tBd:i:1\tBe:i:1\tBf:i:1\tBg:i:1\tBh:i:1\tBi:i:1\tBj:i:1\tBk:i:1\tBl:i:1\tBm:i:1\tBn:i:1\tBo:i:1\tBp:i:1\tBq:i:1\tBr:i:1\tBs:i:1\tBt:i:1\t'..b'SSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSSTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUUVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVVWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]]^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^____________________________________________________________________________________________________````````````````````````````````````````````````````````````````````````````````````````````````````aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffgggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiijjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjjkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkkllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllllmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnnooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppppqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqqrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssssttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttttuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx#large_aux2.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx#large_aux2.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,11 @@ +@SQ SN:xx LN:20 +a1 0 xx 1 1 1M * 0 0 A # aa:i:1 +a2 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 +a3 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 +a4 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 +a5 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 +a6 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 +a7 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 +a8 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 ah:i:1 +a9 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 ah:i:1 ai:i:1 +aA 0 xx 1 1 1M * 0 0 A # aa:i:1 ab:i:1 ac:i:1 ad:i:1 ae:i:1 af:i:1 ag:i:1 ah:i:1 ai:i:1 aj:i:1 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx#minimal.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx#minimal.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,10 @@ +@SQ SN:xx LN:20 +@SQ SN:yy LN:20 +a0 16 xx 4 1 10H * 0 0 * * +a1 16 xx 4 1 5H0M5H * 0 0 * * +a2 16 xx 4 1 5H0I10M0D5H * 0 0 * * +A0 16 yy 4 1 0H * 0 0 * * +A1 16 yy 4 1 0I * 0 0 * * +A2 16 yy 4 1 0D * 0 0 * * +A3 16 yy 4 1 0M * 0 0 * * +A4 16 yy 4 1 0P * 0 0 * * |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx#pair.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx#pair.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,7 @@ +@SQ SN:xx LN:20 +a1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +b1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +c1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +a1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +b1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +c1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx#rg.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx#rg.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,13 @@ +@HD VN:1.4 SO:coordinate +@SQ SN:xx LN:20 AS:? SP:? UR:? M5:bbf4de6d8497a119dda6e074521643dc +@RG ID:x1 SM:x1 +@RG ID:x2 SM:x2 LB:x PG:foo:bar PI:1111 +@PG ID:emacs PN:emacs VN:23.1.1 +@CO also test +@CO other headers +a1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** RG:Z:x1 +b1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** RG:Z:x2 +c1 16 xx 1 1 10M * 0 0 AAAAAAAAAA ********** +a2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** RG:Z:x1 +b2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** RG:Z:x2 +c2 16 xx 11 1 10M * 0 0 TTTTTTTTTT ********** |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx#triplet.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx#triplet.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,7 @@ +@SQ SN:xx LN:20 +@SQ SN:yy LN:20 +a1 67 xx 1 1 10M = 6 20 AAAAAAAAAA ********** +a1 35 xx 6 1 10M = 11 -20 AAAAATTTTT ********** +a1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +a1 67 yy 1 1 10M = 6 15 AAAAAAAAAA ********** +a1 3 yy 6 1 10M = 1 -15 AAAAATTTTT ********** |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx#unsorted.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx#unsorted.sam Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,8 @@ +@SQ SN:xx LN:20 +@SQ SN:yy LN:20 +b1 147 yy 11 1 10M = 1 -20 TTTTTTTTTT ********** +a1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** +a1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +b1 99 yy 1 1 10M = 11 20 AAAAAAAAAA ********** +c1 99 xx 1 1 10M = 11 20 AAAAAAAAAA ********** +c1 147 xx 11 1 10M = 1 -20 TTTTTTTTTT ********** |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx.fa Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +>xx +AAAAAAAAAATTTTTTTTTT +>yy +AAAAAAAAAATTTTTTTTTT + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/test/xx.fa.fai --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/test/xx.fa.fai Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,2 @@ +xx 20 4 20 21 +yy 20 29 20 21 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/vcf.5 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/vcf.5 Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,120 @@ +'\" t +.TH vcf 5 "August 2013" "htslib" "Bioinformatics formats" +.SH NAME +vcf \- Variant Call Format +.\" +.\" Copyright (C) 2011 Broad Institute. +.\" Copyright (C) 2013 Genome Research Ltd. +.\" +.\" Author: Heng Li <lh3@sanger.ac.uk> +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining a +.\" copy of this software and associated documentation files (the "Software"), +.\" to deal in the Software without restriction, including without limitation +.\" the rights to use, copy, modify, merge, publish, distribute, sublicense, +.\" and/or sell copies of the Software, and to permit persons to whom the +.\" Software is furnished to do so, subject to the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be included in +.\" all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +.\" IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +.\" FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +.\" THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +.\" LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +.\" DEALINGS IN THE SOFTWARE. +.\" +.SH DESCRIPTION +The Variant Call Format (VCF) is a TAB-delimited format with each data line +consisting of the following fields: +.TS +nlbl. +1 CHROM CHROMosome name +2 POS the left-most POSition of the variant +3 ID unique variant IDentifier +4 REF the REFerence allele +5 ALT the ALTernate allele(s) (comma-separated) +6 QUAL variant/reference QUALity +7 FILTER FILTERs applied +8 INFO INFOrmation related to the variant (semicolon-separated) +9 FORMAT FORMAT of the genotype fields (optional; colon-separated) +10+ SAMPLE SAMPLE genotypes and per-sample information (optional) +.TE +.P +The following table gives the \fBINFO\fP tags used by samtools and bcftools. +.TP +.B AF1 +Max-likelihood estimate of the site allele frequency (AF) of the first ALT allele +(double) +.TP +.B DP +Raw read depth (without quality filtering) +(int) +.TP +.B DP4 +# high-quality reference forward bases, ref reverse, alternate for and alt rev bases +(int[4]) +.TP +.B FQ +Consensus quality. Positive: sample genotypes different; negative: otherwise +(int) +.TP +.B MQ +Root-Mean-Square mapping quality of covering reads +(int) +.TP +.B PC2 +Phred probability of AF in group1 samples being larger (,smaller) than in group2 +(int[2]) +.TP +.B PCHI2 +Posterior weighted chi^2 P-value between group1 and group2 samples +(double) +.TP +.B PV4 +P-value for strand bias, baseQ bias, mapQ bias and tail distance bias +(double[4]) +.TP +.B QCHI2 +Phred-scaled PCHI2 +(int) +.TP +.B RP +# permutations yielding a smaller PCHI2 +(int) +.TP +.B CLR +Phred log ratio of genotype likelihoods with and without the trio/pair constraint +(int) +.TP +.B UGT +Most probable genotype configuration without the trio constraint +(string) +.TP +.B CGT +Most probable configuration with the trio constraint +(string) +.TP +.B VDB +Tests variant positions within reads. Intended for filtering RNA-seq artifacts around splice sites +(float) +.TP +.B RPB +Mann-Whitney rank-sum test for tail distance bias +(float) +.TP +.B HWE +Hardy-Weinberg equilibrium test (Wigginton et al) +(float) +.P +.SH SEE ALSO +.TP +https://github.com/samtools/hts-specs +The full VCF/BCF file format specification +.TP +.I A note on exact tests of Hardy-Weinberg equilibrium +Wigginton JE et al +PMID:15789306 +.\" (http://www.ncbi.nlm.nih.gov/pubmed/15789306) |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/vcf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/vcf.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,3212 @@\n+/* vcf.c -- VCF/BCF API functions.\n+\n+ Copyright (C) 2012, 2013 Broad Institute.\n+ Copyright (C) 2012-2014 Genome Research Ltd.\n+ Portions copyright (C) 2014 Intel Corporation.\n+\n+ Author: Heng Li <lh3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include <zlib.h>\n+#include <stdio.h>\n+#include <ctype.h>\n+#include <assert.h>\n+#include <string.h>\n+#include <stdlib.h>\n+#include <limits.h>\n+#include "htslib/kstring.h"\n+#include "htslib/bgzf.h"\n+#include "htslib/vcf.h"\n+#include "htslib/tbx.h"\n+#include "htslib/hfile.h"\n+#include "htslib/khash_str2int.h"\n+\n+#include "htslib/khash.h"\n+KHASH_MAP_INIT_STR(vdict, bcf_idinfo_t)\n+typedef khash_t(vdict) vdict_t;\n+\n+#include "htslib/kseq.h"\n+KSTREAM_DECLARE(gzFile, gzread)\n+\n+uint32_t bcf_float_missing = 0x7F800001;\n+uint32_t bcf_float_vector_end = 0x7F800002;\n+uint8_t bcf_type_shift[] = { 0, 0, 1, 2, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };\n+static bcf_idinfo_t bcf_idinfo_def = { .info = { 15, 15, 15 }, .hrec = { NULL, NULL, NULL}, .id = -1 };\n+\n+/*************************\n+ *** VCF header parser ***\n+ *************************/\n+\n+int bcf_hdr_sync(bcf_hdr_t *h);\n+\n+int bcf_hdr_add_sample(bcf_hdr_t *h, const char *s)\n+{\n+ if ( !s ) return 0;\n+\n+ const char *ss = s;\n+ while ( !*ss && isspace(*ss) ) ss++;\n+ if ( !*ss )\n+ {\n+ fprintf(stderr,"[E::%s] Empty sample name: trailing spaces/tabs in the header line?\\n", __func__);\n+ abort();\n+ }\n+\n+ vdict_t *d = (vdict_t*)h->dict[BCF_DT_SAMPLE];\n+ int ret;\n+ char *sdup = strdup(s);\n+ int k = kh_put(vdict, d, sdup, &ret);\n+ if (ret) { // absent\n+ kh_val(d, k) = bcf_idinfo_def;\n+ kh_val(d, k).id = kh_size(d) - 1;\n+ } else {\n+ if (hts_verbose >= 2)\n+ {\n+ fprintf(stderr, "[E::%s] Duplicated sample name \'%s\'\\n", __func__, s);\n+ abort();\n+ }\n+ free(sdup);\n+ return -1;\n+ }\n+ int n = kh_size(d);\n+ h->samples = (char**) realloc(h->samples,sizeof(char*)*n);\n+ h->samples[n-1] = sdup;\n+ h->dirty = 1;\n+ return 0;\n+}\n+\n+int bcf_hdr_parse_sample_line(bcf_hdr_t *h, const char *str)\n+{\n+ int ret = 0;\n+ int i = 0;\n+ const char *p, *q;\n+ // add samples\n+ for (p = q = str;; ++q) {\n+ if (*q != \'\\t\' && *q != 0 && *q != \'\\n\') continue;\n+ if (++i > 9) {\n+ char *s = (char*)malloc(q - p + 1);\n+ strncpy(s, p, q - p);\n+ s[q - p] = 0;\n+ if ( bcf_hdr_add_sample(h,s) < 0 ) ret = -1;\n+ free(s);\n+ }\n+ if (*q == 0 || *q == \'\\n\') break;\n+ p = q + 1;\n+ }\n+ bcf_hdr_add_sample(h,NULL);\n+ return ret;\n+}\n+\n+int bcf_hdr_sync(bcf_hdr_t *h)\n+{\n+ int i;\n+ for (i = 0; i < 3; i++)\n+ {\n+ vdict_t *d = (vdict_t*)h->dict[i];\n+ khint_t k;\n+\n+ // find out the largest id, there may be holes because of IDX\n+ int max_id = -1;\n+ for (k=kh_begin(d); k<kh_end(d); k++)\n+ {\n+ '..b'_UN_FMT);\n+\n+ for (i=0; i<line->n_fmt; i++)\n+ if ( line->d.fmt[i].id==tag_id ) break;\n+ if ( i==line->n_fmt ) return -3; // the tag is not present in this record\n+ bcf_fmt_t *fmt = &line->d.fmt[i];\n+\n+ int nsmpl = bcf_hdr_nsamples(hdr);\n+ if ( !*dst )\n+ {\n+ *dst = (char**) malloc(sizeof(char*)*nsmpl);\n+ if ( !*dst ) return -4; // could not alloc\n+ (*dst)[0] = NULL;\n+ }\n+ int n = (fmt->n+1)*nsmpl;\n+ if ( *ndst < n )\n+ {\n+ (*dst)[0] = realloc((*dst)[0], n);\n+ if ( !(*dst)[0] ) return -4; // could not alloc\n+ *ndst = n;\n+ }\n+ for (i=0; i<nsmpl; i++)\n+ {\n+ uint8_t *src = fmt->p + i*fmt->n;\n+ uint8_t *tmp = (uint8_t*)(*dst)[0] + i*(fmt->n+1);\n+ memcpy(tmp,src,fmt->n);\n+ tmp[fmt->n] = 0;\n+ (*dst)[i] = (char*) tmp;\n+ }\n+ return n;\n+}\n+\n+int bcf_get_format_values(const bcf_hdr_t *hdr, bcf1_t *line, const char *tag, void **dst, int *ndst, int type)\n+{\n+ int i,j, tag_id = bcf_hdr_id2int(hdr, BCF_DT_ID, tag);\n+ if ( !bcf_hdr_idinfo_exists(hdr,BCF_HL_FMT,tag_id) ) return -1; // no such FORMAT field in the header\n+ if ( tag[0]==\'G\' && tag[1]==\'T\' && tag[2]==0 )\n+ {\n+ // Ugly: GT field is considered to be a string by the VCF header but BCF represents it as INT.\n+ if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=BCF_HT_STR ) return -2;\n+ }\n+ else if ( bcf_hdr_id2type(hdr,BCF_HL_FMT,tag_id)!=type ) return -2; // expected different type\n+\n+ if ( !(line->unpacked & BCF_UN_FMT) ) bcf_unpack(line, BCF_UN_FMT);\n+\n+ for (i=0; i<line->n_fmt; i++)\n+ if ( line->d.fmt[i].id==tag_id ) break;\n+ if ( i==line->n_fmt ) return -3; // the tag is not present in this record\n+ bcf_fmt_t *fmt = &line->d.fmt[i];\n+\n+ if ( type==BCF_HT_STR )\n+ {\n+ int n = fmt->n*bcf_hdr_nsamples(hdr);\n+ if ( *ndst < n )\n+ {\n+ *dst = realloc(*dst, n);\n+ if ( !*dst ) return -4; // could not alloc\n+ *ndst = n;\n+ }\n+ memcpy(*dst,fmt->p,n);\n+ return n;\n+ }\n+\n+ // Make sure the buffer is big enough\n+ int nsmpl = bcf_hdr_nsamples(hdr);\n+ int size1 = type==BCF_HT_INT ? sizeof(int32_t) : sizeof(float);\n+ if ( *ndst < fmt->n*nsmpl )\n+ {\n+ *ndst = fmt->n*nsmpl;\n+ *dst = realloc(*dst, *ndst*size1);\n+ if ( !dst ) return -4; // could not alloc\n+ }\n+\n+ #define BRANCH(type_t, is_missing, is_vector_end, set_missing, set_vector_end, out_type_t) { \\\n+ out_type_t *tmp = (out_type_t *) *dst; \\\n+ type_t *p = (type_t*) fmt->p; \\\n+ for (i=0; i<nsmpl; i++) \\\n+ { \\\n+ for (j=0; j<fmt->n; j++) \\\n+ { \\\n+ if ( is_missing ) set_missing; \\\n+ else if ( is_vector_end ) { set_vector_end; break; } \\\n+ else *tmp = p[j]; \\\n+ tmp++; \\\n+ } \\\n+ for (; j<fmt->n; j++) { set_vector_end; tmp++; } \\\n+ p = (type_t *)((char *)p + fmt->size); \\\n+ } \\\n+ }\n+ switch (fmt->type) {\n+ case BCF_BT_INT8: BRANCH(int8_t, p[j]==bcf_int8_missing, p[j]==bcf_int8_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, int32_t); break;\n+ case BCF_BT_INT16: BRANCH(int16_t, p[j]==bcf_int16_missing, p[j]==bcf_int16_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, int32_t); break;\n+ case BCF_BT_INT32: BRANCH(int32_t, p[j]==bcf_int32_missing, p[j]==bcf_int32_vector_end, *tmp=bcf_int32_missing, *tmp=bcf_int32_vector_end, int32_t); break;\n+ case BCF_BT_FLOAT: BRANCH(float, bcf_float_is_missing(p[j]), bcf_float_is_vector_end(p[j]), bcf_float_set_missing(*tmp), bcf_float_set_vector_end(*tmp), float); break;\n+ default: fprintf(stderr,"TODO: %s:%d .. fmt->type=%d\\n", __FILE__,__LINE__, fmt->type); exit(1);\n+ }\n+ #undef BRANCH\n+ return nsmpl*fmt->n;\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/vcf_sweep.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/vcf_sweep.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,182 @@ +/* vcf_sweep.c -- forward/reverse sweep API. + + Copyright (C) 2013 Genome Research Ltd. + + Author: Petr Danecek <pd3@sanger.ac.uk> + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. */ + +#include "htslib/vcf_sweep.h" +#include "htslib/bgzf.h" + +#define SW_FWD 0 +#define SW_BWD 1 + +struct _bcf_sweep_t +{ + htsFile *file; + bcf_hdr_t *hdr; + BGZF *fp; + + int direction; // to tell if the direction has changed + int block_size; // the size of uncompressed data to hold in memory + bcf1_t *rec; // bcf buffer + int nrec, mrec; // number of used records; total size of the buffer + int lrid, lpos, lnals, lals_len, mlals; // to check uniqueness of a record + char *lals; + + uint64_t *idx; // uncompressed offsets of VCF/BCF records + int iidx, nidx, midx; // i: current offset; n: used; m: allocated + int idx_done; // the index is built during the first pass +}; + +BGZF *hts_get_bgzfp(htsFile *fp); +int hts_useek(htsFile *file, long uoffset, int where); +long hts_utell(htsFile *file); + +static inline int sw_rec_equal(bcf_sweep_t *sw, bcf1_t *rec) +{ + if ( sw->lrid!=rec->rid ) return 0; + if ( sw->lpos!=rec->pos ) return 0; + if ( sw->lnals!=rec->n_allele ) return 0; + + char *t = rec->d.allele[sw->lnals-1]; + int len = t - rec->d.allele[0] + 1; + while ( *t ) { t++; len++; } + if ( sw->lals_len!=len ) return 0; + if ( memcmp(sw->lals,rec->d.allele[0],len) ) return 0; + return 1; +} + +static void sw_rec_save(bcf_sweep_t *sw, bcf1_t *rec) +{ + sw->lrid = rec->rid; + sw->lpos = rec->pos; + sw->lnals = rec->n_allele; + + char *t = rec->d.allele[sw->lnals-1]; + int len = t - rec->d.allele[0] + 1; + while ( *t ) { t++; len++; } + sw->lals_len = len; + hts_expand(char, len, sw->mlals, sw->lals); + memcpy(sw->lals, rec->d.allele[0], len); +} + +static void sw_fill_buffer(bcf_sweep_t *sw) +{ + if ( !sw->iidx ) return; + sw->iidx--; + + int ret = hts_useek(sw->file, sw->idx[sw->iidx], 0); + assert( ret==0 ); + + sw->nrec = 0; + bcf1_t *rec = &sw->rec[sw->nrec]; + while ( (ret=bcf_read1(sw->file, sw->hdr, rec))==0 ) + { + bcf_unpack(rec, BCF_UN_STR); + + // if not in the last block, stop at the saved record + if ( sw->iidx+1 < sw->nidx && sw_rec_equal(sw,rec) ) break; + + sw->nrec++; + hts_expand0(bcf1_t, sw->nrec+1, sw->mrec, sw->rec); + rec = &sw->rec[sw->nrec]; + } + sw_rec_save(sw, &sw->rec[0]); +} + +bcf_sweep_t *bcf_sweep_init(const char *fname) +{ + bcf_sweep_t *sw = (bcf_sweep_t*) calloc(1,sizeof(bcf_sweep_t)); + sw->file = hts_open(fname, "r"); + sw->fp = hts_get_bgzfp(sw->file); + bgzf_index_build_init(sw->fp); + sw->hdr = bcf_hdr_read(sw->file); + sw->mrec = 1; + sw->rec = (bcf1_t*) calloc(sw->mrec,(sizeof(bcf1_t))); + sw->block_size = 1024*1024*3; + sw->direction = SW_FWD; + return sw; +} + +void bcf_empty1(bcf1_t *v); +void bcf_sweep_destroy(bcf_sweep_t *sw) +{ + int i; + for (i=0; i<sw->mrec; i++) bcf_empty1(&sw->rec[i]); + free(sw->idx); + free(sw->rec); + free(sw->lals); + bcf_hdr_destroy(sw->hdr); + hts_close(sw->file); + free(sw); +} + +static void sw_seek(bcf_sweep_t *sw, int direction) +{ + sw->direction = direction; + if ( direction==SW_FWD ) + hts_useek(sw->file, sw->idx[0], 0); + else + { + sw->iidx = sw->nidx; + sw->nrec = 0; + } +} + +bcf1_t *bcf_sweep_fwd(bcf_sweep_t *sw) +{ + if ( sw->direction==SW_BWD ) sw_seek(sw, SW_FWD); + + long pos = hts_utell(sw->file); + + bcf1_t *rec = &sw->rec[0]; + int ret = bcf_read1(sw->file, sw->hdr, rec); + + if ( ret!=0 ) // last record, get ready for sweeping backwards + { + sw->idx_done = 1; + sw->fp->idx_build_otf = 0; + sw_seek(sw, SW_BWD); + return NULL; + } + + if ( !sw->idx_done ) + { + if ( !sw->nidx || pos - sw->idx[sw->nidx-1] > sw->block_size ) + { + sw->nidx++; + hts_expand(uint64_t, sw->nidx, sw->midx, sw->idx); + sw->idx[sw->nidx-1] = pos; + } + } + return rec; +} + +bcf1_t *bcf_sweep_bwd(bcf_sweep_t *sw) +{ + if ( sw->direction==SW_FWD ) sw_seek(sw, SW_BWD); + if ( !sw->nrec ) sw_fill_buffer(sw); + if ( !sw->nrec ) return NULL; + return &sw->rec[ --sw->nrec ]; +} + +bcf_hdr_t *bcf_sweep_hdr(bcf_sweep_t *sw) { return sw->hdr; } + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/vcfutils.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/vcfutils.c Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b'@@ -0,0 +1,675 @@\n+/* vcfutils.c -- allele-related utility functions.\n+\n+ Copyright (C) 2012-2014 Genome Research Ltd.\n+\n+ Author: Petr Danecek <pd3@sanger.ac.uk>\n+\n+Permission is hereby granted, free of charge, to any person obtaining a copy\n+of this software and associated documentation files (the "Software"), to deal\n+in the Software without restriction, including without limitation the rights\n+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+copies of the Software, and to permit persons to whom the Software is\n+furnished to do so, subject to the following conditions:\n+\n+The above copyright notice and this permission notice shall be included in\n+all copies or substantial portions of the Software.\n+\n+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL\n+THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER\n+DEALINGS IN THE SOFTWARE. */\n+\n+#include "htslib/vcfutils.h"\n+\n+int bcf_calc_ac(const bcf_hdr_t *header, bcf1_t *line, int *ac, int which)\n+{\n+ int i;\n+ for (i=0; i<line->n_allele; i++) ac[i]=0;\n+\n+ // Use INFO/AC,AN field only when asked\n+ if ( which&BCF_UN_INFO )\n+ {\n+ bcf_unpack(line, BCF_UN_INFO);\n+ int an_id = bcf_hdr_id2int(header, BCF_DT_ID, "AN");\n+ int ac_id = bcf_hdr_id2int(header, BCF_DT_ID, "AC");\n+ int i, an=-1, ac_len=0, ac_type=0;\n+ uint8_t *ac_ptr=NULL;\n+ if ( an_id>=0 && ac_id>=0 )\n+ {\n+ for (i=0; i<line->n_info; i++)\n+ {\n+ bcf_info_t *z = &line->d.info[i];\n+ if ( z->key == an_id ) an = z->v1.i;\n+ else if ( z->key == ac_id ) { ac_ptr = z->vptr; ac_len = z->len; ac_type = z->type; }\n+ }\n+ }\n+ if ( an>=0 && ac_ptr )\n+ {\n+ int nac = 0;\n+ #define BRANCH_INT(type_t) { \\\n+ type_t *p = (type_t *) ac_ptr; \\\n+ for (i=0; i<ac_len; i++) \\\n+ { \\\n+ ac[i+1] = p[i]; \\\n+ nac += p[i]; \\\n+ } \\\n+ }\n+ switch (ac_type) {\n+ case BCF_BT_INT8: BRANCH_INT(int8_t); break;\n+ case BCF_BT_INT16: BRANCH_INT(int16_t); break;\n+ case BCF_BT_INT32: BRANCH_INT(int32_t); break;\n+ default: fprintf(stderr, "[E::%s] todo: %d at %s:%d\\n", __func__, ac_type, header->id[BCF_DT_CTG][line->rid].key, line->pos+1); exit(1); break;\n+ }\n+ #undef BRANCH_INT\n+ if ( an<nac )\n+ {\n+ fprintf(stderr,"[E::%s] Incorrect AN/AC counts at %s:%d\\n", __func__,header->id[BCF_DT_CTG][line->rid].key, line->pos+1); \n+ exit(1); \n+ }\n+ ac[0] = an - nac;\n+ return 1;\n+ }\n+ }\n+\n+ // Split genotype fields only when asked\n+ if ( which&BCF_UN_FMT )\n+ {\n+ int i, gt_id = bcf_hdr_id2int(header,BCF_DT_ID,"GT");\n+ if ( gt_id<0 ) return 0;\n+ bcf_unpack(line, BCF_UN_FMT);\n+ bcf_fmt_t *fmt_gt = NULL;\n+ for (i=0; i<(int)line->n_fmt; i++)\n+ if ( line->d.fmt[i].id==gt_id ) { fmt_gt = &line->d.fmt[i]; break; }\n+ if ( !fmt_gt ) return 0;\n+ #define BRANCH_INT(type_t,vector_end) { \\\n+ for (i=0; i<line->n_sample; i++) \\\n+ { \\\n+ type_t *p = (type_t*) (fmt_gt->p + i*fmt_gt->size); \\\n+ int ial; \\\n+ for (ial=0; ial<fmt_gt->n; ial++) \\\n+ { \\\n+ if ( p[ial]==vector_end ) break; /* smaller ploidy */ \\'..b' {\n+ assert( nori==nR_ori ); // todo: will fail if all values are missing\n+ ndat = nR_new*line->n_sample;\n+ nnew = nR_new;\n+ }\n+\n+ #define BRANCH(type_t,is_vector_end) \\\n+ { \\\n+ for (j=0; j<line->n_sample; j++) \\\n+ { \\\n+ type_t *ptr_src = ((type_t*)dat) + j*nori; \\\n+ type_t *ptr_dst = ((type_t*)dat) + j*nnew; \\\n+ int size = sizeof(type_t); \\\n+ int k_src, k_dst = 0; \\\n+ for (k_src=0; k_src<nori; k_src++) \\\n+ { \\\n+ if ( is_vector_end ) { memcpy(ptr_dst+k_dst, ptr_src+k_src, size); break; } \\\n+ if ( rm_mask & 1<<(k_src+inc) ) continue; \\\n+ memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \\\n+ k_dst++; \\\n+ } \\\n+ } \\\n+ }\n+ switch (type)\n+ {\n+ case BCF_HT_INT: BRANCH(int32_t,ptr_src[k_src]==bcf_int32_vector_end); break;\n+ case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr_src[k_src])); break;\n+ }\n+ #undef BRANCH\n+ }\n+ else // Number=G, diploid or mixture of haploid+diploid\n+ {\n+ assert( nori==nG_ori );\n+ ndat = nG_new*line->n_sample;\n+\n+ #define BRANCH(type_t,is_vector_end) \\\n+ { \\\n+ for (j=0; j<line->n_sample; j++) \\\n+ { \\\n+ type_t *ptr_src = ((type_t*)dat) + j*nori; \\\n+ type_t *ptr_dst = ((type_t*)dat) + j*nG_new; \\\n+ int size = sizeof(type_t); \\\n+ int ia, ib, k_dst = 0, k_src; \\\n+ int nset = 0; /* haploid or diploid? */ \\\n+ for (k_src=0; k_src<nG_ori; k_src++) { if ( is_vector_end ) break; nset++; } \\\n+ if ( nset==nR_ori ) /* haploid */ \\\n+ { \\\n+ for (k_src=0; k_src<nR_ori; k_src++) \\\n+ { \\\n+ if ( rm_mask & 1<<k_src ) continue; \\\n+ memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \\\n+ k_dst++; \\\n+ } \\\n+ memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \\\n+ } \\\n+ else /* diploid */ \\\n+ { \\\n+ k_src = -1; \\\n+ for (ia=0; ia<nR_ori; ia++) \\\n+ { \\\n+ for (ib=0; ib<=ia; ib++) \\\n+ { \\\n+ k_src++; \\\n+ if ( is_vector_end ) { memcpy(ptr_dst+k_dst, ptr_src+k_src, size); ia = nR_ori; break; } \\\n+ if ( rm_mask & 1<<ia || rm_mask & 1<<ib ) continue; \\\n+ memcpy(ptr_dst+k_dst, ptr_src+k_src, size); \\\n+ k_dst++; \\\n+ } \\\n+ } \\\n+ } \\\n+ } \\\n+ }\n+ switch (type)\n+ {\n+ case BCF_HT_INT: BRANCH(int32_t,ptr_src[k_src]==bcf_int32_vector_end); break;\n+ case BCF_HT_REAL: BRANCH(float,bcf_float_is_vector_end(ptr_src[k_src])); break;\n+ }\n+ #undef BRANCH\n+ }\n+ nret = bcf_update_format(header, line, bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), (void*)dat, ndat, type);\n+ if ( nret<0 )\n+ {\n+ fprintf(stderr,"[%s:%d %s] Could not update FORMAT/%s at %s:%d [%d]\\n", __FILE__,__LINE__,__FUNCTION__,\n+ bcf_hdr_int2id(header,BCF_DT_ID,fmt->id), bcf_seqname(header,line), line->pos+1, nret);\n+ exit(1);\n+ }\n+ }\n+ free(dat);\n+ free(str.s);\n+ free(map);\n+}\n+\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/src/htslib/version.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/src/htslib/version.h Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,1 @@ +#define HTS_VERSION "1.2.1" |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/exp_data/hg19_rRNA.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/exp_data/hg19_rRNA.bed Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,1769 @@\n+chr1\t1815107\t1815204\tLSU-rRNA_Hsa\t495\t+\t1815107\t1815204\t0\t1\t97\t0\n+chr1\t4417098\t4417211\tLSU-rRNA_Hsa\t234\t-\t4417098\t4417211\t0\t1\t113\t0\n+chr1\t7876433\t7876473\t5S\t282\t+\t7876433\t7876473\t0\t1\t40\t0\n+chr1\t9497766\t9497837\t5S\t467\t-\t9497766\t9497837\t0\t1\t71\t0\n+chr1\t13923133\t13923172\t5S\t256\t-\t13923133\t13923172\t0\t1\t39\t0\n+chr1\t13949705\t13949779\t5S\t432\t-\t13949705\t13949779\t0\t1\t74\t0\n+chr1\t15976864\t15976906\t5S\t266\t-\t15976864\t15976906\t0\t1\t42\t0\n+chr1\t25483508\t25483621\tLSU-rRNA_Hsa\t896\t-\t25483508\t25483621\t0\t1\t113\t0\n+chr1\t28242609\t28242680\tLSU-rRNA_Hsa\t410\t-\t28242609\t28242680\t0\t1\t71\t0\n+chr1\t30802098\t30802177\tLSU-rRNA_Hsa\t270\t-\t30802098\t30802177\t0\t1\t79\t0\n+chr1\t31583541\t31583592\t5S\t354\t+\t31583541\t31583592\t0\t1\t51\t0\n+chr1\t31851406\t31851445\t5S\t270\t-\t31851406\t31851445\t0\t1\t39\t0\n+chr1\t32833269\t32833349\tSSU-rRNA_Hsa\t257\t+\t32833269\t32833349\t0\t1\t80\t0\n+chr1\t34578550\t34578639\t5S\t715\t+\t34578550\t34578639\t0\t1\t89\t0\n+chr1\t37168239\t37168281\t5S\t241\t-\t37168239\t37168281\t0\t1\t42\t0\n+chr1\t37315505\t37315568\tLSU-rRNA_Hsa\t274\t-\t37315505\t37315568\t0\t1\t63\t0\n+chr1\t37730289\t37730387\t5S\t512\t-\t37730289\t37730387\t0\t1\t98\t0\n+chr1\t39354488\t39354536\tLSU-rRNA_Hsa\t293\t-\t39354488\t39354536\t0\t1\t48\t0\n+chr1\t39619878\t39619968\t5S\t684\t-\t39619878\t39619968\t0\t1\t90\t0\n+chr1\t39673433\t39673475\t5S\t239\t+\t39673433\t39673475\t0\t1\t42\t0\n+chr1\t41932607\t41932698\t5S\t604\t-\t41932607\t41932698\t0\t1\t91\t0\n+chr1\t42779298\t42779471\tLSU-rRNA_Hsa\t1102\t+\t42779298\t42779471\t0\t1\t173\t0\n+chr1\t42956316\t42956359\t5S\t230\t+\t42956316\t42956359\t0\t1\t43\t0\n+chr1\t43045772\t43046173\tLSU-rRNA_Hsa\t661\t+\t43045772\t43046173\t0\t1\t401\t0\n+chr1\t43215463\t43215643\tLSU-rRNA_Hsa\t1020\t-\t43215463\t43215643\t0\t1\t180\t0\n+chr1\t43662087\t43662177\t5S\t695\t+\t43662087\t43662177\t0\t1\t90\t0\n+chr1\t44480550\t44480593\t5S\t309\t-\t44480550\t44480593\t0\t1\t43\t0\n+chr1\t45136058\t45136137\t5S\t430\t-\t45136058\t45136137\t0\t1\t79\t0\n+chr1\t45397994\t45398096\t5S\t601\t+\t45397994\t45398096\t0\t1\t102\t0\n+chr1\t45955546\t45955598\t5S\t237\t-\t45955546\t45955598\t0\t1\t52\t0\n+chr1\t46902468\t46902566\tLSU-rRNA_Hsa\t458\t-\t46902468\t46902566\t0\t1\t98\t0\n+chr1\t51524305\t51524347\t5S\t262\t-\t51524305\t51524347\t0\t1\t42\t0\n+chr1\t52439082\t52439177\t5S\t742\t+\t52439082\t52439177\t0\t1\t95\t0\n+chr1\t59213662\t59213771\t5S\t279\t+\t59213662\t59213771\t0\t1\t109\t0\n+chr1\t63652004\t63652098\t5S\t498\t-\t63652004\t63652098\t0\t1\t94\t0\n+chr1\t64356532\t64356571\t5S\t257\t+\t64356532\t64356571\t0\t1\t39\t0\n+chr1\t65154802\t65154841\t5S\t303\t-\t65154802\t65154841\t0\t1\t39\t0\n+chr1\t67016502\t67016545\t5S\t296\t-\t67016502\t67016545\t0\t1\t43\t0\n+chr1\t68458490\t68458634\tSSU-rRNA_Hsa\t613\t-\t68458490\t68458634\t0\t1\t144\t0\n+chr1\t68461907\t68462474\tSSU-rRNA_Hsa\t1930\t-\t68461907\t68462474\t0\t1\t567\t0\n+chr1\t69262049\t69262103\t5S\t261\t-\t69262049\t69262103\t0\t1\t54\t0\n+chr1\t72642633\t72642675\t5S\t316\t-\t72642633\t72642675\t0\t1\t42\t0\n+chr1\t74215218\t74215309\t5S\t659\t-\t74215218\t74215309\t0\t1\t91\t0\n+chr1\t76240336\t76240470\tLSU-rRNA_Hsa\t568\t-\t76240336\t76240470\t0\t1\t134\t0\n+chr1\t78080553\t78080625\t5S\t451\t+\t78080553\t78080625\t0\t1\t72\t0\n+chr1\t78245582\t78245672\t5S\t646\t+\t78245582\t78245672\t0\t1\t90\t0\n+chr1\t78560489\t78560589\t5S\t659\t-\t78560489\t78560589\t0\t1\t100\t0\n+chr1\t78840873\t78840952\t5S\t542\t-\t78840873\t78840952\t0\t1\t79\t0\n+chr1\t80851092\t80851133\t5S\t277\t+\t80851092\t80851133\t0\t1\t41\t0\n+chr1\t86349360\t86349480\t5S\t674\t-\t86349360\t86349480\t0\t1\t120\t0\n+chr1\t87918966\t87919056\t5S\t627\t-\t87918966\t87919056\t0\t1\t90\t0\n+chr1\t89025825\t89025866\t5S\t256\t-\t89025825\t89025866\t0\t1\t41\t0\n+chr1\t91852785\t91853147\tLSU-rRNA_Hsa\t3277\t-\t91852785\t91853147\t0\t1\t362\t0\n+chr1\t92655309\t92655379\t5S\t362\t+\t92655309\t92655379\t0\t1\t70\t0\n+chr1\t92972727\t92972802\tLSU-rRNA_Hsa\t586\t-\t92972727\t92972802\t0\t1\t75\t0\n+chr1\t93953887\t93954009\t5S\t750\t-\t93953887\t93954009\t0\t1\t122\t0\n+chr1\t95554734\t95554784\t5S\t346\t+\t95554734\t95554784\t0\t1\t50\t0\n+chr1\t100271938\t100272091\tSSU-rRNA_Hsa\t284\t+\t100271938\t100272091\t0\t1\t153\t0\n+chr1\t105973697\t105973741\t5S\t268\t+\t105973697\t105973741\t0\t1\t44\t0\n+chr1\t108113071\t108113633\tLSU-rRNA_Hsa\t3656\t-\t108113071\t108113633\t0\t1\t562\t0\n+chr1\t108113652\t108113729\tLSU-rRNA_Hsa\t474\t-\t108113652\t108113729\t0\t1\t77\t0\n+chr1\t109786180\t109786293\tLSU-rRNA_Hsa\t295\t+\t109786180\t109786293\t0\t1\t113\t0\n+chr1\t110120223\t11012029'..b'1\t0\n+chr22\t25515263\t25515312\t5S\t291\t+\t25515263\t25515312\t0\t1\t49\t0\n+chr22\t26111045\t26111136\t5S\t693\t-\t26111045\t26111136\t0\t1\t91\t0\n+chr22\t26786620\t26786748\t5S\t646\t-\t26786620\t26786748\t0\t1\t128\t0\n+chr22\t30335677\t30335719\t5S\t263\t-\t30335677\t30335719\t0\t1\t42\t0\n+chr22\t31701389\t31701479\t5S\t627\t+\t31701389\t31701479\t0\t1\t90\t0\n+chr22\t33031095\t33031184\t5S\t592\t-\t33031095\t33031184\t0\t1\t89\t0\n+chr22\t33816270\t33816312\t5S\t316\t-\t33816270\t33816312\t0\t1\t42\t0\n+chr22\t36713014\t36713053\t5S\t249\t+\t36713014\t36713053\t0\t1\t39\t0\n+chr22\t36713067\t36713106\t5S\t265\t+\t36713067\t36713106\t0\t1\t39\t0\n+chr22\t36713120\t36713159\t5S\t281\t+\t36713120\t36713159\t0\t1\t39\t0\n+chr22\t39644733\t39644831\t5S\t398\t+\t39644733\t39644831\t0\t1\t98\t0\n+chr22\t41043647\t41043689\t5S\t290\t+\t41043647\t41043689\t0\t1\t42\t0\n+chr22\t42530233\t42530285\t5S\t355\t-\t42530233\t42530285\t0\t1\t52\t0\n+chr6_apd_hap1\t1033560\t1034108\tLSU-rRNA_Hsa\t301\t-\t1033560\t1034108\t0\t1\t548\t0\n+chr6_apd_hap1\t3361052\t3361144\t5S\t563\t-\t3361052\t3361144\t0\t1\t92\t0\n+chr6_cox_hap2\t1246022\t1246134\tLSU-rRNA_Hsa\t257\t-\t1246022\t1246134\t0\t1\t112\t0\n+chr6_cox_hap2\t2707816\t2707855\tSSU-rRNA_Hsa\t233\t+\t2707816\t2707855\t0\t1\t39\t0\n+chr6_cox_hap2\t3516979\t3517071\t5S\t563\t-\t3516979\t3517071\t0\t1\t92\t0\n+chr6_dbb_hap3\t1030728\t1031276\tLSU-rRNA_Hsa\t364\t-\t1030728\t1031276\t0\t1\t548\t0\n+chr6_dbb_hap3\t2490183\t2490222\tSSU-rRNA_Hsa\t233\t+\t2490183\t2490222\t0\t1\t39\t0\n+chr6_dbb_hap3\t3325527\t3325619\t5S\t563\t-\t3325527\t3325619\t0\t1\t92\t0\n+chr6_mcf_hap5\t1030394\t1030506\tLSU-rRNA_Hsa\t307\t-\t1030394\t1030506\t0\t1\t112\t0\n+chr6_mcf_hap5\t2575098\t2575137\tSSU-rRNA_Hsa\t229\t+\t2575098\t2575137\t0\t1\t39\t0\n+chr6_mcf_hap5\t3426152\t3426244\t5S\t563\t-\t3426152\t3426244\t0\t1\t92\t0\n+chr6_qbl_hap6\t1030713\t1031261\tLSU-rRNA_Hsa\t364\t-\t1030713\t1031261\t0\t1\t548\t0\n+chr6_qbl_hap6\t2488966\t2489005\tSSU-rRNA_Hsa\t233\t+\t2488966\t2489005\t0\t1\t39\t0\n+chr6_qbl_hap6\t3307336\t3307428\t5S\t563\t-\t3307336\t3307428\t0\t1\t92\t0\n+chr6_mann_hap4\t1030358\t1030906\tLSU-rRNA_Hsa\t364\t-\t1030358\t1030906\t0\t1\t548\t0\n+chr6_mann_hap4\t2541448\t2541487\tSSU-rRNA_Hsa\t233\t+\t2541448\t2541487\t0\t1\t39\t0\n+chr6_mann_hap4\t3389161\t3389253\t5S\t563\t-\t3389161\t3389253\t0\t1\t92\t0\n+chr6_ssto_hap7\t1067592\t1068140\tLSU-rRNA_Hsa\t363\t-\t1067592\t1068140\t0\t1\t548\t0\n+chr6_ssto_hap7\t2528009\t2528048\tSSU-rRNA_Hsa\t229\t+\t2528009\t2528048\t0\t1\t39\t0\n+chr6_ssto_hap7\t2773921\t2774104\tLSU-rRNA_Hsa\t233\t+\t2773921\t2774104\t0\t1\t183\t0\n+chrUn_gl000220\t95180\t95223\tLSU-rRNA_Hsa\t519\t+\t95180\t95223\t0\t1\t43\t0\n+chrUn_gl000220\t95530\t95616\tLSU-rRNA_Hsa\t519\t+\t95530\t95616\t0\t1\t86\t0\n+chrUn_gl000220\t95614\t96145\tLSU-rRNA_Hsa\t2585\t+\t95614\t96145\t0\t1\t531\t0\n+chrUn_gl000220\t99158\t99197\tLSU-rRNA_Hsa\t239\t+\t99158\t99197\t0\t1\t39\t0\n+chrUn_gl000220\t109077\t110946\tSSU-rRNA_Hsa\t15708\t+\t109077\t110946\t0\t1\t1869\t0\n+chrUn_gl000220\t113347\t114110\tLSU-rRNA_Hsa\t37124\t+\t113347\t114110\t0\t1\t763\t0\n+chrUn_gl000220\t114132\t114194\tLSU-rRNA_Hsa\t37124\t+\t114132\t114194\t0\t1\t62\t0\n+chrUn_gl000220\t114238\t115498\tLSU-rRNA_Hsa\t37124\t+\t114238\t115498\t0\t1\t1260\t0\n+chrUn_gl000220\t115521\t116829\tLSU-rRNA_Hsa\t37124\t+\t115521\t116829\t0\t1\t1308\t0\n+chrUn_gl000220\t116861\t118417\tLSU-rRNA_Hsa\t37124\t+\t116861\t118417\t0\t1\t1556\t0\n+chrUn_gl000220\t153049\t154918\tSSU-rRNA_Hsa\t15708\t+\t153049\t154918\t0\t1\t1869\t0\n+chrUn_gl000220\t157319\t158082\tLSU-rRNA_Hsa\t32456\t+\t157319\t158082\t0\t1\t763\t0\n+chrUn_gl000220\t158104\t158166\tLSU-rRNA_Hsa\t32456\t+\t158104\t158166\t0\t1\t62\t0\n+chrUn_gl000220\t158210\t159470\tLSU-rRNA_Hsa\t32456\t+\t158210\t159470\t0\t1\t1260\t0\n+chrUn_gl000220\t159493\t160801\tLSU-rRNA_Hsa\t32456\t+\t159493\t160801\t0\t1\t1308\t0\n+chrUn_gl000220\t160833\t161802\tLSU-rRNA_Hsa\t32456\t+\t160833\t161802\t0\t1\t969\t0\n+chrUn_gl000223\t58453\t58538\t5S\t650\t+\t58453\t58538\t0\t1\t85\t0\n+chrUn_gl000228\t20112\t20232\t5S\t693\t+\t20112\t20232\t0\t1\t120\t0\n+chrUn_gl000228\t22673\t22794\t5S\t802\t+\t22673\t22794\t0\t1\t121\t0\n+chrUn_gl000229\t18170\t19913\tSSU-rRNA_Hsa\t13464\t+\t18170\t19913\t0\t1\t1743\t0\n+chr17_ctg5_hap1\t19909\t19999\t5S\t692\t-\t19909\t19999\t0\t1\t90\t0\n+chr17_ctg5_hap1\t247970\t248012\t5S\t325\t-\t247970\t248012\t0\t1\t42\t0\n+chr17_ctg5_hap1\t465905\t465947\t5S\t329\t-\t465905\t465947\t0\t1\t42\t0\n+chr17_ctg5_hap1\t1439558\t1439600\t5S\t332\t+\t1439558\t1439600\t0\t1\t42\t0\n+chr1_gl000192_random\t415328\t415417\t5S\t637\t+\t415328\t415417\t0\t1\t89\t0\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/exp_data/hg19_refGene.gtf.tar.gz |
| b |
| Binary file ezBAMQC/test-data/exp_data/hg19_refGene.gtf.tar.gz has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/exp_data/treat1.bam |
| b |
| Binary file ezBAMQC/test-data/exp_data/treat1.bam has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/exp_data/treat2.bam |
| b |
| Binary file ezBAMQC/test-data/exp_data/treat2.bam has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/exp_data/treat3.bam |
| b |
| Binary file ezBAMQC/test-data/exp_data/treat3.bam has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.ReadLen_plot.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.ReadLen_plot.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.readlen_profile.png",width=500,height=500,units="px") +readlen_val=c(67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) +readlen_count=c(220,212,287,315,327,323,420,437,447,422,435,429,427,487,438,487,492,492,518,536,517,565,503,550,723,808,803,859,881,927,945,1535,1897,44685) +plot(readlen_val,(readlen_count/77686),pch=20,xlab="Mapped Read Length",ylab="Proportion",col="blue") +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.TransCoverage.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.TransCoverage.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b"@@ -0,0 +1,10 @@\n+png('/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.TransCoverage.png',width=500,height=500,units='px')\n+a=c(0,0,1,0,0,0,0,172,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,2,0,5,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,5,8,0,0,17,0,1,20,0,0,0,0,0,2,0,4,1,22,0,0,9,12,1,0,0,0,1,0,0,1,0,0,0,2,0,0,0,0,0,1,6,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,6,0,0,0,1,2,5,0,0,4,0,0,2,0,0,5,0,0,0,0,0,0,0,0,0,0,0,6,1,0,6,4,6,0,17,0,0,0,0,0,0,0,0,4,9,0,0,0,0,0,0,0,2,0,0,0,0,0,0,5,2,28,0,0,0,0,0,0,0,2,0,0,0,0,2,0,0,1,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,13,0,0,0,0,0,0,0,0,0,3,1,4,1,3,0,0,0,0,4,0,0,1,0,1,0,1,0,0,3,0,0,0,3,0,0,1,1,0,0,0,0,0,0,0,3,0,0,24,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,1,0,0,0,0,0,0,0,3,7,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,166,0,0,0,0,1,9,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,9,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,1,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,3,0,13,0,0,0,0,0,0,0,0,0,0,0,6,0,3,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,10,0,0,1,1,0,3,0,0,1,0,0,0,4,0,0,0,0,0,2,9,7,0,1,0,0,0,0,2,7,11,0,0,0,0,0,8,0,19,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,6,14,1,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,15,0,1,0,0,0,0,0,0,14,0,4,0,0,0,4,5,0,0,0,0,0,9,0,0,0,0,1,1,1,0,0,0,0,8,0,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,36,0,7,0,0,0,0,0,0,2,0,0,2,0,3,5,0,0,0,0,0,26,2,0,4,0,9,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,3,0,7,1,0,0,1,0,4,0,1,1,0,0,1,1,3,0,0,1,0,0,0,0,1,18,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,6,0,0,0,0,6,0,2,6,0,2,1,173,0,0,0,0,4,0,0,0,3,2,1,0,0,2,0,0,8,0,0,0,1,0,0,0,0,4,4,0,0,0,0,0,3,0,0,2,0,0,0,1,0,0,8,0,1,0,0,0,0,0,0,2,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,6,0,0,2,0,0,0,0,0,2,1,3,1,0,7,0,0,2,8,2,4,0,0,0,0,0,1,1,0,0,0,0,0,0,0,1,0,2,0,7,0,2,0,17,4,1,0,1,1,0,0,2,0,0,0,0,750,0,0,0,0,0,0,5,15,12,1,1,3,1,0,0,1,1,0,0,0,0,0,0,0,5,16,19,8,0,1,10,0,3,0,16,0,0,0,2,0,0,0,6,0,0,0,2,0,0,0,3,0,1,0,0,0,5,8,1,0,0,1,0,0,3,2,0,0,0,0,0,8,0,3,0,0,0,0,1,0,5,0,0,4,0,0,0,0,0,0,0,0,0,0,5,0,0,0,3,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3,4,0,1,8,0,0,1,0,0,0,0,0,2,0,1,0,0,0,0,8,28,0,0,0,0,4,5,1,2,4,1,0,0,0,0,0,3,0,1,3,0,0,0,0,1,0,0,4,0,0,0,0,0,0,0,0,0,0,0,3,0,2,0,0,2,0,0,0,0,0,3,0,1,2,4,1,0,1,0,10,2,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,2,0,0,0,0,4,0,0,0,5,0,0,0,0,0,0,3,0,1,0,0,1,1,0,0,1,4,0,0,1,4,3,0,0,0,13,0,3,3,4,0,0,0,0,0,2,3,0,0,0,0,0,0,0,0,0,0,1,2,1,4,0,1,0,0,0,0,0,4,2,0,18,0,0,1,1,4,0,1,0,1,1,0,0,0,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,103,0,2,3,0,0,0,0,0,0,4,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,4,0,0,0,5,0,2,0,2,0,0,0,0,8,3,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,2,0,1,0,2,0,0,7,1,10,4,0,0,2,1,7,44,0,0,0,0,5,0,57,0,0,0,0,2,0,0,1,0,0,0,0,0,0,1,3,10,4,0,0,0,0,0,0,0,1,1,0,6,0,0,3,0,0,0,0,0,1,0,0,0,0,0,0,10,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,3,2,0,1,0,0,0,0,1,0,0,0,2,0,0,0,3,19,2,1,9,0,0,0,0,0,5,0,0,0,1,0,0,0,0,0,0,0,2,0,5,0,0,0,1,0,0,1,0,16,5,10,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,2,7,0,0,1,0,5,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,1,0,0,21,4,0,1,1,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,2,0,1,4,0,0,0,0,0,0,15,1,0,0,0,0,0,0,2,1,0,21,0,1,1,1,2,0,0,0,2,0,1,0,0,1,0,17,0,0,0,0,0,0,1,0,0,1,100,1,0,0,2,4,1,0,1,0,0,0,1,2,0,0,0,1,0,0,2,0,0,0,21,0,0,0,1,0,0,0,0,2,1,5,3,0,0,38,6,0,0,0,6,0,0,0,2,1,0,0,0,0,0,0,3,2,0,1,0,0,0,0,0,0,3,1,0,0,0,0,0,42,1,0,3,3,0,0,3,0,0,1,13,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,3,0,0,3,0,39,2,0,1,0,0,1,0,0,1,0,3,0,0,2,2,0,1,0,1,0,6,0,0,4,0,5,0,4,0,0,0,0,0,1,0,0,4,4,0,0,0,1,6,0,0,0,0,0,8,0,0,0,4,3,0,0,0,0,0,0,3,10,0,0,0,1,0,6,0,0,"..b'0,0,0,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,1,5,8,3,1,0,2,0,0,0,2,0,0,7,0,39,0,1,12,0,3,0,0,1,0,0,1,0,0,0,1,1,19,2,27,0,0,1,0,0,0,0,1,0,2,0,0,0,0,26,0,0,2,0,0,0,2,3,75,0,0,1,1,1,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,3,0,0,3,2,51,2,4,0,0,2,3,0,4,0,0,2,0,0,0,0,1,0,1,0,0,0,0,0,2,0,0,2,0,0,0,78,0,0,0,0,0,0,0,24,1,1,0,0,1,0,0,0,1,1,7,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,2,0,0,6,11,1,0,0,2,4,1,0,0,0,0,0,3,6,0,0,0,6,0,0,0,0,0,0,0,0,1,2,0,0,0,1,0,0,0,0,2,0,4,7,1,0,0,0,0,1,0,2,0,0,0,0,0,0,9,0,6,0,0,2,29,0,0,0,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,6,0,3,26,0,0,5,3,1,7,1,0,0,5,0,0,0,7,0,0,0,0,0,0,0,49,8,0,1,0,14,3,2,0,4,0,4,4,0,0,0,0,0,0,0,0,0,1,0,0,5,0,1,3,1,0,0,0,0,11,2,5,3,0,1,1,0,0,0,0,11,0,0,3,0,0,0,22,0,31,61,4,0,7,9,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,1,0,0,1,0,0,3,0,5,0,6,1,0,0,1,1,0,0,10,0,0,0,0,1,0,0,2,1,24,4,0,1,0,3,0,0,4,0,0,0,3,9,1,0,0,0,0,3,0,1,0,0,0,2,0,0,1,0,5,0,0,3,0,1,0,0,27,0,0,1,0,2,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,1,0,0,4,0,3,0,4,0,0,0,0,1,0,2,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,13,0,1,2,7,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,2,0,0,4,0,4,1,0,2,0,3,0,0,0,0,0,0,1,0,0,0,3,7,0,0,0,2,0,2,5,0,11,0,0,1,0,4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,3,0,0,0,1,0,8,0,0,0,0,0,0,9,0,0,5,0,0,0,0,0,5,0,0,0,0,15,1,1,0,5,8,0,0,0,0,1,0,0,0,4,0,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,2,3,0,0,1,1,0,0,5,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,2,1,0,0,1,0,3,0,0,0,0,0,0,10,0,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,4,0,0,0,0,0,5,0,0,0,3,0,0,15,0,1,11,0,1,0,0,0,0,0,0,0,0,5,3,1,0,1,1,1,0,0,0,6,0,0,0,0,0,0,1,0,0,49,0,0,4,6,0,58,0,0,4,0,2,2,0,1,0,3,1,0,0,0,0,0,1,3,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,2,3,20,0,0,2,4,0,0,3,0,0,0,0,0,15,0,0,9,0,0,0,0,0,0,0,5,19,4,0,0,0,0,1,0,0,0,4,2,0,1,0,0,0,0,6,4,9,0,0,1,0,0,0,0,0,0,0,0,0,3,0,1,0,12,0,1,0,0,3,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,1,2,2,0,0,7,0,0,0,0,9,0,0,5,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,11,0,0,1,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,0,6,0,0,0,3,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,2,0,0,0,7,0,0,0,0,0,2,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,2,4,0,0,1,1,1,0,0,0,0,0,0,0,1,7,0,0,0,0,0,0,0,0,1,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,1,0,0,2,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,2,1,0,0,3,2,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,2,4,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,13,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,4,0,0,0,0,0,1,55,1,6,0,0,0,0,0,0,7,2,0,0,0,2,4,16,0,0,0,0,0,0,0,0,13,0,1,0,0,0,0,0,0,2,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,4,67,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,5,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\n+Fn = ecdf(a)\n+max_x = round(log(max(knots(Fn)),2),0)\n+xx = c(0,2^seq(0,max_x,by=2))\n+y=Fn(xx)\n+xlog = log(xx[2:length(xx)],base=2)\n+plot(x=c(-1,xlog),y=y,xaxt = \'n\',type="b",col="blue",pch=20,xlab="Number of Reads",ylab="Cumulative proportion of Genes")\n+axis(1,at = c(-1,seq(0,max_x,by=2)),labels=c(0,2^seq(0,max_x,by=2)))\n+dev.state = dev.off()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.clipping_profile.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.clipping_profile.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.clipping_profile.png",width=500,height=500,units="px") +read_pos=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99) +count=c(10290,9294,8453,7955,7448,6951,6512,6050,5602,5219,4931,4682,4396,4114,3827,3540,3283,3032,2781,2555,2301,2073,1849,1638,1411,1212,1012,799,634,485,341,201,104,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,108,203,338,496,660,807,995,1203,1420,1604,1824,2020,2220,2460,2668,2901,3139,3379,3631,3895,4137,4426,4691,4976,5369,5810,6205,6686,7174,7678,8224,9087,10073) +plot(read_pos,1-(count/77686),pch=20,xlab="Position of reads",ylab="Mappability",col="blue") +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.clipping_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.clipping_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,101 @@ +Position Read_Total Read_clipped +0 77686 10290 +1 77686 9294 +2 77686 8453 +3 77686 7955 +4 77686 7448 +5 77686 6951 +6 77686 6512 +7 77686 6050 +8 77686 5602 +9 77686 5219 +10 77686 4931 +11 77686 4682 +12 77686 4396 +13 77686 4114 +14 77686 3827 +15 77686 3540 +16 77686 3283 +17 77686 3032 +18 77686 2781 +19 77686 2555 +20 77686 2301 +21 77686 2073 +22 77686 1849 +23 77686 1638 +24 77686 1411 +25 77686 1212 +26 77686 1012 +27 77686 799 +28 77686 634 +29 77686 485 +30 77686 341 +31 77686 201 +32 77686 104 +33 77686 0 +34 77686 0 +35 77686 0 +36 77686 0 +37 77686 0 +38 77686 0 +39 77686 0 +40 77686 0 +41 77686 0 +42 77686 0 +43 77686 0 +44 77686 0 +45 77686 0 +46 77686 0 +47 77686 0 +48 77686 0 +49 77686 0 +50 77686 0 +51 77686 0 +52 77686 0 +53 77686 0 +54 77686 0 +55 77686 0 +56 77686 0 +57 77686 0 +58 77686 0 +59 77686 0 +60 77686 0 +61 77686 0 +62 77686 0 +63 77686 0 +64 77686 0 +65 77686 0 +66 77686 0 +67 77686 108 +68 77686 203 +69 77686 338 +70 77686 496 +71 77686 660 +72 77686 807 +73 77686 995 +74 77686 1203 +75 77686 1420 +76 77686 1604 +77 77686 1824 +78 77686 2020 +79 77686 2220 +80 77686 2460 +81 77686 2668 +82 77686 2901 +83 77686 3139 +84 77686 3379 +85 77686 3631 +86 77686 3895 +87 77686 4137 +88 77686 4426 +89 77686 4691 +90 77686 4976 +91 77686 5369 +92 77686 5810 +93 77686 6205 +94 77686 6686 +95 77686 7174 +96 77686 7678 +97 77686 8224 +98 77686 9087 +99 77686 10073 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.geneAbundance.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.geneAbundance.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,27321 @@\n+gene\tCounts\n+AADACL3\t0\n+AADACL4\t0\n+ABCD3\t1\n+ACADM\t0\n+ACKR1\t0\n+ACOT11\t0\n+ACTG1P4\t0\n+ACTL8\t172\n+ACTN2\t0\n+ACTRT2\t0\n+ADAM15\t0\n+ADAMTSL4\t0\n+ADCK3\t0\n+ADORA1\t0\n+ADPRHL2\t0\n+AGL\t0\n+AGO1\t0\n+AGO3\t0\n+AGO4\t0\n+AGRN\t0\n+AGTRAP\t0\n+AHCYL1\t2\n+AJAP1\t0\n+AK4\t2\n+AK5\t0\n+AKIRIN1\t5\n+AKR1A1\t0\n+AKR7A2P1\t0\n+ALG6\t0\n+ALPL\t0\n+AMPD2\t1\n+AMY1A\t0\n+AMY1B\t0\n+AMY1C\t0\n+AMY2A\t0\n+AMY2B\t0\n+ANGPTL3\t0\n+ANGPTL7\t0\n+ANKRD34A\t0\n+ANKRD35\t0\n+ANXA9\t0\n+AP4B1-AS1\t0\n+APCS\t0\n+APITD1\t5\n+APITD1-CORT\t8\n+APOA1BP\t0\n+AQP10\t0\n+ARF1\t17\n+ARHGEF10L\t0\n+ARHGEF16\t1\n+ARID1A\t20\n+ARTN\t0\n+ARV1\t0\n+ASH1L-AS1\t0\n+ATAD3A\t0\n+ATAD3B\t0\n+ATAD3C\t2\n+ATF3\t0\n+ATF6\t4\n+ATG4C\t1\n+ATP1A1\t22\n+ATP1A2\t0\n+ATP1A4\t0\n+ATP1B1\t9\n+ATP2B4\t12\n+ATP5F1\t1\n+ATP6V0B\t0\n+ATP8B2\t0\n+ATPIF1\t0\n+ATXN7L2\t1\n+AVPR1B\t0\n+AXDND1\t0\n+AZIN2\t1\n+B3GALT6\t0\n+B4GALT2\t0\n+BCAN\t0\n+BCL9\t2\n+BECN1P1\t0\n+BGLAP\t0\n+BLZF1\t0\n+BMP8A\t0\n+BNIPL\t0\n+BOLA1\t1\n+BRDT\t6\n+BRINP2\t0\n+BROX\t0\n+BSND\t0\n+BTBD19\t0\n+BTBD8\t0\n+BTF3L4\t1\n+BTG2\t0\n+C1QA\t0\n+C1QB\t0\n+C1QC\t0\n+C1orf100\t0\n+C1orf101\t0\n+C1orf105\t0\n+C1orf106\t0\n+C1orf112\t1\n+C1orf115\t0\n+C1orf122\t0\n+C1orf137\t0\n+C1orf146\t0\n+C1orf158\t0\n+C1orf162\t0\n+C1orf167\t0\n+C1orf177\t0\n+C1orf185\t0\n+C1orf21\t1\n+C1orf213\t0\n+C1orf220\t0\n+C1orf226\t4\n+C1orf228\t0\n+C1orf27\t0\n+C1orf50\t0\n+C1orf53\t0\n+C1orf54\t0\n+C1orf56\t0\n+C1orf64\t0\n+C1orf68\t0\n+C1orf94\t0\n+C1orf95\t0\n+C4BPA\t0\n+C4BPB\t0\n+C8A\t0\n+CA14\t1\n+CA6\t0\n+CACHD1\t0\n+CACNA1E\t0\n+CACYBP\t6\n+CADM3\t0\n+CALML6\t0\n+CAMK1G\t0\n+CAMSAP2\t1\n+CAMTA1\t2\n+CAP1\t5\n+CAPN2\t0\n+CAPN9\t0\n+CAPZA1\t4\n+CASQ1\t0\n+CATSPER4\t0\n+CCDC18\t2\n+CCDC185\t0\n+CCDC24\t0\n+CCDC27\t5\n+CCDC28B\t0\n+CCDC30\t0\n+CD101\t0\n+CD1A\t0\n+CD1C\t0\n+CD1D\t0\n+CD1E\t0\n+CD2\t0\n+CD46\t0\n+CD52\t0\n+CD53\t0\n+CD55\t6\n+CDA\t1\n+CDC14A\t0\n+CDC20\t6\n+CDC42\t4\n+CDC7\t6\n+CDC73\t0\n+CDCA8\t17\n+CDK18\t0\n+CDKN2C\t0\n+CELA2A\t0\n+CELA2B\t0\n+CELA3A\t0\n+CELA3B\t0\n+CELSR2\t0\n+CENPF\t0\n+CEP350\t4\n+CEP85\t9\n+CEPT1\t0\n+CFH\t0\n+CFHR1\t0\n+CFHR2\t0\n+CFHR3\t0\n+CFHR4\t0\n+CFHR5\t0\n+CGN\t2\n+CHD1L\t0\n+CHI3L2\t0\n+CHIA\t0\n+CHIAP2\t0\n+CHRM3\t0\n+CHRNB2\t0\n+CHTOP\t5\n+CIART\t2\n+CKS1B\t28\n+CLCA1\t0\n+CLCA2\t0\n+CLCA3P\t0\n+CLCA4\t0\n+CLCN6\t0\n+CLCNKA\t0\n+CLCNKB\t0\n+CLIC4\t2\n+CMPK1\t0\n+CNIH3\t0\n+CNIH4\t0\n+CNKSR1\t0\n+CNST\t2\n+CNTN2\t0\n+COA6\t0\n+COG2\t1\n+CORT\t0\n+COX20\t0\n+CPT2\t0\n+CPTP\t0\n+CR1\t0\n+CR1L\t0\n+CR2\t3\n+CRB1\t0\n+CRCT1\t0\n+CREB3L4\t0\n+CROCC\t0\n+CSF1\t0\n+CSMD2-AS1\t0\n+CTH\t0\n+CTPS1\t0\n+CTRC\t0\n+CTSE\t0\n+CTTNBP2NL\t13\n+CYB561D1\t0\n+CYCSP52\t0\n+CYMP\t0\n+CYP4A22\t0\n+CYP4B1\t0\n+CYP4X1\t0\n+CYP4Z1\t0\n+CYR61\t0\n+DAB1-AS1\t0\n+DAP3\t3\n+DARS2\t1\n+DCAF6\t4\n+DCDC2B\t1\n+DCLRE1B\t3\n+DCST1\t0\n+DDI2\t0\n+DDR2\t0\n+DDX11L1\t0\n+DDX20\t4\n+DEGS1\t0\n+DEPDC1-AS1\t0\n+DESI2\t1\n+DFFB\t0\n+DHDDS\t1\n+DHX9\t0\n+DIEXF\t1\n+DIO1\t0\n+DISC1\t0\n+DISP1\t3\n+DLEU2L\t0\n+DMAP1\t0\n+DMBX1\t0\n+DMRTB1\t3\n+DNAH14\t0\n+DNAJA1P5\t0\n+DNAJB4\t1\n+DNAJC16\t1\n+DNAJC6\t0\n+DNALI1\t0\n+DNASE2B\t0\n+DNM3\t0\n+DPH2\t0\n+DPYD-AS1\t0\n+DPYD-AS2\t0\n+DR1\t3\n+DRAXIN\t0\n+DRD5P2\t0\n+DTL\t24\n+DUSP12\t0\n+DUSP23\t0\n+DUSP27\t0\n+DUSP5P1\t0\n+DYRK3\t0\n+ECM1\t0\n+EDARADD\t0\n+EFCAB14-AS1\t0\n+EFCAB2\t0\n+EFCAB7\t0\n+EFHD2\t0\n+EFNA1\t0\n+EFNA3\t1\n+EFNA4\t1\n+EIF3I\t1\n+ELAVL4\t0\n+ELF3\t0\n+EMBP1\t0\n+ENO1-AS1\t0\n+EPB41\t1\n+EPHA8\t0\n+EPHB2\t0\n+EPHX1\t0\n+EPHX4\t0\n+ERICH3-AS1\t0\n+ERMAP\t0\n+ESPN\t0\n+EXO1\t3\n+EXO5\t7\n+EXTL1\t0\n+FAAH\t0\n+FAAHP1\t0\n+FAM102B\t0\n+FAM110D\t0\n+FAM159A\t0\n+FAM163A\t0\n+FAM167B\t0\n+FAM177B\t0\n+FAM183A\t0\n+FAM19A3\t0\n+FAM20B\t1\n+FAM212B-AS1\t0\n+FAM213B\t0\n+FAM231A\t0\n+FAM231D\t0\n+FAM43B\t0\n+FAM46C\t166\n+FAM71A\t0\n+FAM72A\t0\n+FAM72B\t0\n+FAM72C\t0\n+FAM73A\t1\n+FAM76A\t9\n+FAM87B\t0\n+FASLG\t0\n+FBLIM1\t0\n+FBXO28\t4\n+FBXO44\t0\n+FBXO6\t0\n+FCER1A\t0\n+FCER1G\t0\n+FCGR1A\t0\n+FCGR1C\t0\n+FCGR2A\t0\n+FCGR2B\t0\n+FCGR2C\t0\n+FCRL6\t0\n+FCRLA\t0\n+FCRLB\t0\n+FDPS\t0\n+FGGY\t0\n+FHAD1\t0\n+FLAD1\t0\n+FLG-AS1\t0\n+FLJ23867\t0\n+FLJ31662\t0\n+FLVCR1\t0\n+FMN2\t1\n+FMO1\t0\n+FMO2\t9\n+FMO3\t1\n+FMO4\t0\n+FMO6P\t0\n+FMO9P\t0\n+FNBP1L\t3\n+FNDC7\t0\n+FOXD2\t0\n+FOXD3\t0\n+FOXE3\t0\n+FOXO6\t0\n+FPGT\t0\n+FPGT-TNNI3K\t0\n+G0S2\t0\n+GABPB2\t0\n+GABRD\t0\n+GADD45A\t0\n+GALNT2\t0\n+GAS5-AS1\t0\n+GBP1P1\t0\n+GBP6\t0\n+GCSAML\t0\n+GGPS1\t2\n+GIPC2\t0\n+GJA4\t0\n+GJA8\t0\n+GJB3\t0\n+GJB4\t0\n+GJB5\t0\n+GJC2\t0\n+GM140\t0\n+GMEB1\t1\n+GNAI3\t1\n+GNG12-AS1\t0\n+GNPAT\t0\n+GORAB\t0\n+GPR137B\t3\n+GPR25\t0\n+GPR3\t0\n+GPR37L1\t0\n+GPR52\t0\n+GPR61\t0\n+GPR88\t0\n+GPR89B\t0\n+GPSM2\t1\n+GPX7\t0\n+GRHL3\t0\n+GSTM1\t0\n+GSTM2\t0\n+GSTM4\t0\n+GSTM5\t0\n+GUCA2B\t0\n+GUK1\t0\n+H3F3A\t0\n+H3F3AP4\t0\n+H6PD\t1\n+HAO2\t0\n+HAO2-IT1\t0\n+HAPLN2\t0\n+H'..b'\n+LOC401585\t0\n+LOC643486\t0\n+LOC729609\t0\n+LRCH2\t0\n+MAGEA1\t0\n+MAGEA10\t0\n+MAGEA10-MAGEA5\t0\n+MAGEA12\t0\n+MAGEA2\t0\n+MAGEA2B\t0\n+MAGEA3\t0\n+MAGEA5\t0\n+MAGEA8-AS1\t0\n+MAGEA9\t0\n+MAGEA9B\t0\n+MAGEC2\t0\n+MAGED4\t0\n+MAGED4B\t0\n+MAGEE2\t0\n+MAGT1\t1\n+MAOB\t0\n+MAP2K4P1\t0\n+MAP3K15\t0\n+MAP7D2\t2\n+MAP7D3\t1\n+MBNL3\t0\n+MCF2\t0\n+MECP2\t3\n+MED14\t2\n+MID1\t5\n+MID1IP1-AS1\t0\n+MIR105-1\t0\n+MIR105-2\t0\n+MIR106A\t0\n+MIR1184-1\t0\n+MIR1184-2\t0\n+MIR1184-3\t0\n+MIR1256\t0\n+MIR1468\t0\n+MIR18B\t0\n+MIR19B2\t0\n+MIR20B\t0\n+MIR221\t0\n+MIR222\t0\n+MIR23C\t0\n+MIR3202-2\t0\n+MIR320D2\t0\n+MIR325HG\t0\n+MIR363\t0\n+MIR374A\t0\n+MIR374B\t0\n+MIR384\t0\n+MIR421\t0\n+MIR424\t0\n+MIR4328\t0\n+MIR4329\t0\n+MIR450A1\t0\n+MIR450A2\t0\n+MIR450B\t0\n+MIR452\t0\n+MIR4770\t0\n+MIR503\t0\n+MIR503HG\t0\n+MIR504\t0\n+MIR505\t0\n+MIR506\t0\n+MIR507\t0\n+MIR508\t0\n+MIR509-1\t0\n+MIR509-2\t0\n+MIR509-3\t0\n+MIR510\t0\n+MIR514A1\t0\n+MIR514A2\t0\n+MIR514A3\t0\n+MIR514B\t0\n+MIR542\t0\n+MIR545\t0\n+MIR6134\t0\n+MIR6857\t0\n+MIR6894\t0\n+MIR6895\t0\n+MIR718\t0\n+MIR766\t0\n+MIR767\t0\n+MIR8088\t0\n+MIR888\t0\n+MIR890\t0\n+MIR891A\t0\n+MIR891B\t0\n+MIR892A\t0\n+MIR892B\t0\n+MIR892C\t0\n+MIR92A2\t0\n+MIR98\t0\n+MIRLET7F2\t0\n+MMGT1\t3\n+MORC4\t2\n+MORF4L2\t4\n+MOSPD1\t1\n+MPP1\t1\n+MTCP1\t0\n+MTMR8\t0\n+MTRNR2L10\t0\n+MXRA5\t0\n+NAA10\t0\n+NAP1L2\t0\n+NAP1L3\t0\n+NAP1L6\t0\n+NDP\t0\n+NDUFB11\t0\n+NHS-AS1\t0\n+NKAP\t1\n+NKAPP1\t0\n+NKRF\t0\n+NLGN4X\t2\n+NOX1\t0\n+NR0B1\t0\n+NUDT11\t3\n+NUP62CL\t0\n+NXF2\t0\n+NXF2B\t0\n+NXF3\t0\n+NXF5\t0\n+OPHN1\t0\n+OTUD5\t0\n+P2RY4\t0\n+P2RY8\t0\n+PABPC1L2A\t0\n+PABPC1L2B-AS1\t0\n+PABPC5-AS1\t0\n+PAGE1\t0\n+PAGE3\t0\n+PCDH19\t0\n+PCSK1N\t0\n+PCYT1B\t1\n+PDZD11\t1\n+PDZD4\t0\n+PFKFB1\t0\n+PGAM4\t0\n+PHEX-AS1\t0\n+PHF8\t13\n+PHKA1\t0\n+PHKA2\t0\n+PIGA\t1\n+PIM2\t0\n+PIR\t0\n+PIR-FIGF\t0\n+PJA1\t0\n+PLAC1\t0\n+PLS3-AS1\t0\n+PNCK\t0\n+PNMA5\t0\n+PNPLA4\t0\n+POF1B\t0\n+PPEF1-AS1\t0\n+PPP1R2P9\t0\n+PPP2R3B\t0\n+PRAF2\t0\n+PRICKLE3\t0\n+PRKX\t1\n+PSMD10\t4\n+PTCHD1-AS\t0\n+RAB39B\t0\n+RAB40A\t0\n+RAB9B\t0\n+RAI2\t0\n+RAP2C\t1\n+RBBP7\t55\n+RBM41\t1\n+RBMX\t6\n+RENBP\t0\n+RGAG4\t0\n+RHOXF1\t0\n+RHOXF2\t0\n+RHOXF2B\t0\n+RIPPLY1\t0\n+RLIM\t7\n+RNF113A\t2\n+RNU6-2\t0\n+RNU6-28P\t0\n+RP11-87M18.2\t0\n+RPGR\t2\n+RPL39\t4\n+RPS4X\t16\n+RPS6KA3\t0\n+RPS6KA6\t0\n+RS1\t0\n+SATL1\t0\n+SCARNA9L\t0\n+SCML2\t0\n+SEPT6\t0\n+SERPINA7\t0\n+SH3KBP1\t13\n+SHROOM4\t0\n+SLC10A3\t1\n+SLC25A5-AS1\t0\n+SLC25A53\t0\n+SLC25A6\t0\n+SLC35A2\t0\n+SLC38A5\t0\n+SLC7A3\t0\n+SLC9A7\t2\n+SLITRK4\t0\n+SMARCA1\t0\n+SMC1A\t2\n+SMEK3P\t0\n+SMIM9\t0\n+SMPX\t0\n+SNORA11D\t0\n+SNORA11E\t0\n+SNORA69\t0\n+SNORD61\t0\n+SNORD96B\t0\n+SNX12\t2\n+SOX3\t0\n+SPANXA1\t0\n+SPANXA2\t0\n+SPANXC\t0\n+SPANXD\t0\n+SPANXN2\t0\n+SPANXN3\t0\n+SPANXN5\t0\n+SPIN2A\t0\n+SPIN2B\t0\n+SPIN3\t4\n+SPIN4\t0\n+SRPX\t0\n+SSX2\t0\n+SSX2B\t0\n+SSX3\t0\n+SSX4\t0\n+SSX4B\t0\n+SSX5\t0\n+SSX7\t0\n+SSX9\t0\n+SUPT20HL2\t0\n+SYN1\t0\n+SYP\t0\n+SYTL4\t2\n+TAB3\t1\n+TAF7L\t0\n+TAF9B\t0\n+TCEAL5\t0\n+TCEAL6\t0\n+TCEAL8\t0\n+TCP11X2\t0\n+TENM1\t0\n+TEX11\t0\n+TEX13A\t0\n+TEX13B\t0\n+TEX28\t0\n+TFDP3\t0\n+TFE3\t4\n+THOC2\t67\n+TIMM17B\t1\n+TIMM8A\t0\n+TLR8-AS1\t0\n+TMEM185A\t0\n+TMEM255A\t0\n+TMEM27\t0\n+TMEM47\t0\n+TMLHE\t0\n+TMLHE-AS1\t0\n+TMSB15A\t0\n+TRAPPC2\t1\n+TREX2\t0\n+TRMT2B\t0\n+TRPC5\t0\n+TSC22D3\t0\n+TSPAN6\t0\n+TTC3P1\t0\n+UBL4A\t0\n+UPF3B\t3\n+UQCRBP1\t0\n+USP26\t0\n+USP27X-AS1\t0\n+USP51\t0\n+UXT\t0\n+VCX2\t0\n+VCX3A\t0\n+VSIG4\t0\n+WDR45\t0\n+WNK3\t1\n+XAGE1B\t0\n+XAGE1E\t0\n+XAGE2\t0\n+XAGE3\t0\n+XIST\t0\n+XKRX\t0\n+XRCC6P5\t0\n+ZBED1\t0\n+ZC4H2\t0\n+ZCCHC5\t0\n+ZDHHC15\t0\n+ZDHHC9\t0\n+ZFX-AS1\t0\n+ZMAT1\t0\n+ZMYM3\t1\n+ZNF182\t5\n+ZNF280C\t12\n+ZNF41\t0\n+ZNF630\t0\n+ZNF674\t0\n+ZNF75D\t0\n+ZXDA\t0\n+AMELY\t0\n+ASMTL\t0\n+BCORP1\t0\n+BPY2\t0\n+BPY2B\t0\n+BPY2C\t0\n+CD24\t0\n+CDY1\t0\n+CDY1B\t0\n+CDY2A\t0\n+CDY2B\t0\n+CRLF2\t0\n+CSPG4P1Y\t0\n+DAZ1\t0\n+DAZ3\t0\n+DAZ4\t0\n+DDX11L16\t0\n+DHRSX\t0\n+FAM197Y2\t0\n+FAM197Y5\t0\n+FAM224A\t0\n+FAM224B\t0\n+FAM41AY1\t0\n+FAM41AY2\t0\n+GOLGA2P2Y\t0\n+GOLGA2P3Y\t0\n+GTPBP6\t0\n+GYG2P1\t0\n+HSFY1\t0\n+HSFY2\t0\n+KDM5D\t0\n+LINC00280\t0\n+LOC101929148\t0\n+NCRNA00185\t0\n+NLGN4Y-AS1\t0\n+P2RY8\t0\n+PPP2R3B\t0\n+PRORY\t0\n+PRY\t0\n+PRY2\t0\n+RBMY1A1\t0\n+RBMY1A3P\t0\n+RBMY1B\t0\n+RBMY1D\t0\n+RBMY1E\t0\n+RBMY1F\t0\n+RBMY1J\t0\n+RBMY2EP\t0\n+RBMY3AP\t0\n+SLC25A6\t0\n+SRY\t0\n+TTTY1\t0\n+TTTY10\t0\n+TTTY11\t0\n+TTTY13\t0\n+TTTY14\t0\n+TTTY16\t0\n+TTTY17A\t0\n+TTTY17B\t0\n+TTTY17C\t0\n+TTTY18\t0\n+TTTY1B\t0\n+TTTY2\t0\n+TTTY20\t0\n+TTTY21\t0\n+TTTY21B\t0\n+TTTY23\t0\n+TTTY23B\t0\n+TTTY2B\t0\n+TTTY3\t0\n+TTTY3B\t0\n+TTTY4\t0\n+TTTY4B\t0\n+TTTY4C\t0\n+TTTY5\t0\n+TTTY6\t0\n+TTTY6B\t0\n+TTTY7\t0\n+TTTY7B\t0\n+TTTY8\t0\n+TTTY8B\t0\n+TTTY9A\t0\n+TTTY9B\t0\n+UTY\t0\n+VCY\t0\n+VCY1B\t0\n+XKRY\t0\n+XKRY2\t0\n+ZBED1\t0\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.geneBodyCoverage.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.geneBodyCoverage.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,104 @@ +Total reads: 77686 +Fragment number: 49058 +percentile count +0 194 +1 396 +2 581 +3 734 +4 832 +5 892 +6 1002 +7 1069 +8 1105 +9 1181 +10 1242 +11 1183 +12 1154 +13 1288 +14 1353 +15 1327 +16 1235 +17 1250 +18 1297 +19 1373 +20 1405 +21 1401 +22 1412 +23 1430 +24 1406 +25 1438 +26 1485 +27 1524 +28 1512 +29 1499 +30 1528 +31 1496 +32 1466 +33 1408 +34 1392 +35 1430 +36 1394 +37 1369 +38 1373 +39 1408 +40 1487 +41 1466 +42 1455 +43 1493 +44 1461 +45 1429 +46 1461 +47 1496 +48 1449 +49 1476 +50 1466 +51 1447 +52 1440 +53 1473 +54 1503 +55 1581 +56 1606 +57 1587 +58 1605 +59 1584 +60 1609 +61 1688 +62 1685 +63 1694 +64 1665 +65 1732 +66 1750 +67 1802 +68 1891 +69 1926 +70 1939 +71 1951 +72 1927 +73 1902 +74 1933 +75 1938 +76 1990 +77 1996 +78 1988 +79 2008 +80 2022 +81 2049 +82 2036 +83 2017 +84 2016 +85 2032 +86 2004 +87 2015 +88 2020 +89 2070 +90 2118 +91 2144 +92 2161 +93 2117 +94 2084 +95 2056 +96 1916 +97 1797 +98 1488 +99 963 +100 285 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.geneBodyCoverage_plot.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.geneBodyCoverage_plot.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,6 @@ +png('/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.geneBodyCoverage.png',width=500,height=500,units='px') +x=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) +y=c(194,396,581,734,832,892,1002,1069,1105,1181,1242,1183,1154,1288,1353,1327,1235,1250,1297,1373,1405,1401,1412,1430,1406,1438,1485,1524,1512,1499,1528,1496,1466,1408,1392,1430,1394,1369,1373,1408,1487,1466,1455,1493,1461,1429,1461,1496,1449,1476,1466,1447,1440,1473,1503,1581,1606,1587,1605,1584,1609,1688,1685,1694,1665,1732,1750,1802,1891,1926,1939,1951,1927,1902,1933,1938,1990,1996,1988,2008,2022,2049,2036,2017,2016,2032,2004,2015,2020,2070,2118,2144,2161,2117,2084,2056,1916,1797,1488,963,285) +smoothsp = smooth.spline(x,y,spar=0.35) +plot(smoothsp,type="l",col="blue",xlab="Percentile of Gene Body (5'->3')",ylab="Number of read",xlim=c(0,100)) +dev.state = dev.off() \ No newline at end of file |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.mapq_profile.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.mapq_profile.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,12 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.mapq_profile.png",width=500,height=500,units="px") +mapq_val=c(0,1,3,255) +mapq_count=c(573,3442,9322,64349) +xname=c("<3","<10","<20","<30","30-255") +freq = rep(0,5) +freq[1] = sum(mapq_count[which(mapq_val<3)])/77686*100 +freq[2] = sum(mapq_count[which(mapq_val<10)])/77686*100 +freq[3] = sum(mapq_count[which(mapq_val<20)])/77686*100 +freq[4] = sum(mapq_count[which(mapq_val<30)])/77686*100 +freq[5] = 100 +barplot(freq,beside=T,xlab="Mapping Quality",border="NA",space=1.5,main="Mapping Quality",ylim=c(0,100),ylab="Cumulative proportion (%)",col="blue",names.arg=xname) +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.mapq_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.mapq_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +MAPQ Read_Total Read_with_mapq +0 77686 573 +1 77686 3442 +3 77686 9322 +255 77686 64349 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.read_distr.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.read_distr.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,6 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.read_distr.png",width=500,height=500,units="px") +M=c(24065,1500,15884,8486,169,452,8521,13287) +Mname=c("CDS","5UTR","3UTR","Intron","TSS_Up_1Kb","TES_Down_1Kb","rRNA","Others") +val = barplot(M,xlab="",space=1,ylab="Read Counts",col="blue",border="NA") +text(x=seq(val[1],val[8],by=2),y=rep(0,8),srt=60,adj=0,offset=2,pos=1,xpd=T,labels=Mname) +dev.state = dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.read_distr_pie.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.read_distr_pie.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,3 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.read_distr_pie.png",width=500,height=500,units="px") +pie(c(35107,508528),labels=c("Covered 35107 exons","Uncovered"),main="Exons",radius=0.6,clockwise=T,col=c("blue","white")) +dev.state = dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.readlen_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.readlen_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,35 @@ +Position Read_Total Read_Len_mapped +67 77686 220 +68 77686 212 +69 77686 287 +70 77686 315 +71 77686 327 +72 77686 323 +73 77686 420 +74 77686 437 +75 77686 447 +76 77686 422 +77 77686 435 +78 77686 429 +79 77686 427 +80 77686 487 +81 77686 438 +82 77686 487 +83 77686 492 +84 77686 492 +85 77686 518 +86 77686 536 +87 77686 517 +88 77686 565 +89 77686 503 +90 77686 550 +91 77686 723 +92 77686 808 +93 77686 803 +94 77686 859 +95 77686 881 +96 77686 927 +97 77686 945 +98 77686 1535 +99 77686 1897 +100 77686 44685 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp0.res.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp0.res.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,49 @@ +filename /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp0.res.txt +is_pairEnd 0 +clipping_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.clipping_profile.png +mapq_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.mapq_profile.png +mapq_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp0.mapq_profile.xls +read_cov_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.geneBodyCoverage.png +trans_cov_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.TransCoverage.png +insert_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.inner_distance_plot.png +insert_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp0.inner_distance_freq.txt +read_dist_plot_file1 /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.read_distr.png +read_dist_plot_file2 /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.read_distr_pie.png +readLen_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp0.readlen_profile.png +geneCount_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp0.geneAbundance.txt +seqDeDup_percent 0.000000 +posDeDup_percent 0.000000 +no_clipping 0 +no_rRNA 0 +total_reads 77686 +uniq_mapped_reads 64349 +multi_mapped_reads 13337 +unmapped_reads 0 +low_qual 0 +low_qual_read1 0 +low_qual_read2 0 +pcr_dup 0 +rRNA_read 8521 +cds_read 24065 +utr5_read 1500 +utr3_read 15884 +intron_read 8486 +itgup1k_read 169 +itgdn1k_read 452 +itg_read 13287 +unmapped_read1 0 +unmapped_read2 0 +mapped_read1 0 +mapped_read2 0 +forward_read 31875 +reverse_read 32474 +paired_reads 0 +mapped_plus_minus 0 +mapped_plus_plus 0 +mapped_minus_plus 0 +mapped_minus_minus 0 +ins_read 0 +del_read 0 +noSplice 50449 +splice 13900 +paired_diff_chrom 0 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.ReadLen_plot.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.ReadLen_plot.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.readlen_profile.png",width=500,height=500,units="px") +readlen_val=c(67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) +readlen_count=c(182,173,267,238,308,340,386,354,392,415,421,399,419,456,470,469,496,483,489,488,529,499,514,561,748,813,751,827,863,912,958,1648,1875,47529) +plot(readlen_val,(readlen_count/79258),pch=20,xlab="Mapped Read Length",ylab="Proportion",col="blue") +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.TransCoverage.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.TransCoverage.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b"@@ -0,0 +1,10 @@\n+png('/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.TransCoverage.png',width=500,height=500,units='px')\n+a=c(0,0,1,0,0,0,0,243,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,1,0,2,1,0,1,0,2,0,0,0,0,0,0,0,0,0,0,0,0,5,6,0,0,27,0,4,20,0,0,0,1,0,2,0,3,1,21,0,0,16,18,2,2,0,1,1,0,0,0,1,0,0,1,0,0,1,0,0,0,9,0,2,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,6,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3,2,0,0,10,0,0,1,0,0,5,0,0,0,0,0,0,0,0,0,0,0,10,0,0,8,2,4,0,10,0,0,0,0,0,0,0,3,1,12,1,0,0,0,0,0,0,1,1,0,0,0,0,0,3,6,16,0,0,0,0,0,0,0,3,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,1,0,0,11,0,0,0,0,1,0,0,0,0,1,0,4,0,3,0,0,0,0,0,0,0,1,0,1,0,1,0,0,0,0,0,0,6,1,0,1,2,0,0,0,5,1,0,0,2,0,0,35,1,0,0,0,0,0,1,1,1,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,0,1,0,1,11,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,142,0,0,0,0,1,10,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,13,1,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,6,0,3,0,6,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,7,2,0,0,0,0,0,0,0,0,0,0,4,0,3,1,0,0,1,0,0,0,0,0,0,0,0,1,0,2,0,0,0,0,0,0,0,0,0,4,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,2,1,4,4,0,3,0,0,0,0,4,2,10,0,0,0,0,1,8,0,17,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,2,13,0,0,4,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,21,1,0,0,0,0,1,0,0,13,1,4,0,0,0,2,3,0,0,0,0,0,19,0,0,0,3,5,2,3,0,0,0,0,7,2,2,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,43,0,3,0,0,0,0,2,0,0,0,0,1,0,1,3,4,0,0,0,0,23,1,0,6,0,28,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,0,7,1,0,0,0,0,4,0,0,0,0,0,2,1,1,0,0,0,1,0,0,0,2,12,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,4,12,0,0,0,0,6,0,0,2,0,0,0,149,0,0,0,0,6,1,0,0,1,4,1,2,0,9,1,1,9,0,0,0,0,0,0,0,0,8,2,0,0,0,0,0,5,0,2,1,0,0,0,1,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,9,2,0,1,0,0,0,0,0,0,1,4,3,0,27,0,0,1,5,3,1,0,0,0,1,0,1,0,0,0,0,0,0,0,0,1,0,2,0,2,1,3,0,10,1,1,0,0,1,1,0,1,0,0,0,0,553,0,0,0,0,0,0,6,13,12,0,0,5,2,0,1,2,0,0,0,0,0,0,0,2,5,16,17,5,0,0,2,0,1,0,15,0,3,0,1,0,0,0,2,0,0,0,1,0,0,0,2,0,0,1,2,0,1,7,2,0,0,1,0,0,5,6,0,0,0,0,0,6,0,4,0,0,0,0,0,0,2,0,0,3,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,2,0,0,2,0,0,2,0,0,0,0,0,4,3,0,0,0,0,0,6,47,0,0,0,1,1,4,1,2,1,1,0,1,0,1,0,2,0,1,10,0,0,0,0,0,0,1,7,0,0,1,0,0,0,0,0,0,0,0,4,0,2,0,1,5,0,0,0,0,0,0,0,0,3,2,1,0,6,1,8,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,1,0,0,0,4,2,0,0,0,0,2,5,0,0,0,0,2,0,1,0,0,4,0,0,1,2,6,0,0,0,20,1,2,5,3,0,0,0,0,0,1,1,0,0,0,0,0,0,2,0,0,0,1,3,2,4,1,0,0,2,0,0,0,2,0,1,23,0,0,0,3,4,0,3,0,1,2,2,0,0,0,0,0,0,0,1,0,0,0,2,0,0,0,0,0,85,0,0,8,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,1,0,0,5,0,2,0,1,3,0,0,0,9,2,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,3,0,0,0,7,0,0,8,0,9,2,0,0,0,1,5,42,0,0,2,0,6,0,55,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,1,4,3,0,0,0,0,0,0,0,0,0,0,5,0,1,2,0,0,0,0,0,0,1,0,1,1,0,0,9,0,18,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,2,0,0,0,0,1,2,0,0,0,0,0,0,5,0,0,0,2,0,0,1,2,12,0,3,9,1,0,0,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,4,0,0,0,2,0,0,1,0,9,2,28,0,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,2,0,4,0,0,0,1,0,6,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,3,0,0,24,8,0,3,0,0,2,0,4,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,2,1,0,1,0,1,0,0,7,0,0,0,0,0,0,0,0,5,1,18,0,2,2,0,3,0,0,2,0,0,0,0,0,6,0,16,0,0,0,0,0,0,1,0,0,1,76,0,0,0,2,1,0,0,0,0,0,0,4,4,0,2,0,0,0,0,5,0,0,2,12,0,0,0,0,0,1,0,0,3,0,3,4,0,2,33,6,1,0,0,5,0,0,0,1,4,0,0,0,0,0,0,4,2,0,3,1,0,0,0,0,0,3,1,0,0,1,0,0,30,1,0,1,3,0,1,0,0,0,1,11,0,0,0,0,0,0,0,0,0,1,0,0,2,5,1,0,1,1,0,3,0,36,0,0,0,0,1,0,2,0,1,3,2,0,0,1,0,1,1,0,1,4,5,0,0,6,0,7,0,3,0,0,0,0,0,0,0,0,1,1,0,0,0,7,5,0,0,0,0,0,10,0,0,0,3,0,0,0,0,0,0,0,10,11,0,0,0,2,0,5,"..b'0,0,0,6,1,3,0,0,0,0,0,0,0,0,0,1,0,20,2,0,0,3,0,0,0,1,0,0,3,0,30,0,0,17,0,3,0,0,0,0,0,1,0,0,0,1,0,14,7,43,0,0,1,0,0,0,1,1,0,3,1,0,0,0,36,0,0,6,0,0,0,1,6,59,0,0,0,0,0,0,0,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,0,2,0,0,3,3,52,4,3,0,0,1,3,1,6,0,0,3,0,0,2,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,70,0,1,1,0,0,0,0,27,0,0,2,5,0,1,0,0,1,0,4,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,2,0,0,2,0,0,0,9,9,0,0,1,1,4,2,0,0,0,1,0,3,3,0,0,0,6,0,0,0,0,0,0,0,0,0,5,0,0,0,3,0,0,0,0,2,0,4,0,2,0,0,0,0,0,1,1,0,2,0,0,0,0,7,0,7,0,1,1,25,1,0,2,3,2,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,3,0,3,20,1,0,1,0,3,4,1,0,0,14,0,0,0,10,0,0,0,0,0,0,1,76,15,0,1,0,5,4,0,0,7,0,7,3,0,0,0,0,0,0,0,0,0,1,0,0,12,0,4,3,2,0,0,0,0,8,2,10,0,0,1,3,0,2,1,0,12,0,1,1,0,0,1,25,0,34,55,3,0,16,13,2,2,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,5,0,1,0,0,0,0,0,0,0,4,2,5,1,0,0,0,1,0,0,3,0,0,0,0,1,0,1,2,1,18,2,0,2,0,2,0,0,1,0,1,0,2,8,1,0,0,0,0,7,1,3,0,0,0,3,0,0,1,0,13,0,0,2,0,5,0,0,19,0,0,0,0,0,0,1,0,0,1,1,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,2,0,0,0,0,3,2,9,0,0,0,0,3,0,4,2,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,2,8,0,0,5,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,9,0,1,4,0,4,2,0,0,2,3,0,0,0,0,1,0,0,0,0,0,1,2,0,0,0,2,0,3,3,0,10,4,1,0,0,6,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,2,0,10,0,0,0,0,0,0,2,0,0,4,0,0,0,0,1,7,0,0,0,0,14,2,0,2,4,5,0,0,0,1,1,0,1,1,2,0,8,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0,1,0,0,0,0,0,6,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,8,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,9,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,0,1,7,0,0,0,0,0,0,0,0,13,1,0,0,9,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,7,1,0,0,1,0,0,8,0,0,10,1,0,0,0,0,0,0,0,1,0,7,1,1,0,2,0,0,0,0,0,6,1,0,0,1,0,0,1,0,0,36,0,0,9,7,0,48,0,1,3,0,1,1,0,2,0,0,0,0,1,0,2,0,1,2,0,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,7,0,0,1,2,22,0,0,0,6,2,0,2,1,1,1,0,1,14,0,0,5,0,0,0,0,0,0,0,5,17,1,0,0,0,0,0,0,0,0,2,3,0,2,0,0,0,0,4,6,10,0,0,1,4,0,0,0,0,1,0,3,0,0,0,0,0,13,0,3,0,0,1,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,2,4,0,0,9,0,0,0,0,11,0,0,3,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,12,0,0,1,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,2,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,2,0,0,0,0,3,7,0,0,3,0,2,0,0,0,0,0,0,0,0,11,0,1,0,0,0,1,0,0,1,0,3,0,0,0,0,0,0,0,0,0,0,1,0,0,2,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,3,2,0,0,3,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,1,7,3,1,0,2,0,0,0,0,0,0,0,0,0,1,0,0,3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,21,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,3,0,0,0,0,0,0,59,1,6,0,0,0,0,0,0,3,2,0,0,0,2,1,10,1,0,1,0,0,3,0,0,17,0,2,0,0,0,1,0,0,0,0,0,3,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,2,0,0,4,2,0,0,0,0,0,0,0,0,0,0,0,0,4,79,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,9,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\n+Fn = ecdf(a)\n+max_x = round(log(max(knots(Fn)),2),0)\n+xx = c(0,2^seq(0,max_x,by=2))\n+y=Fn(xx)\n+xlog = log(xx[2:length(xx)],base=2)\n+plot(x=c(-1,xlog),y=y,xaxt = \'n\',type="b",col="blue",pch=20,xlab="Number of Reads",ylab="Cumulative proportion of Genes")\n+axis(1,at = c(-1,seq(0,max_x,by=2)),labels=c(0,2^seq(0,max_x,by=2)))\n+dev.state = dev.off()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.clipping_profile.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.clipping_profile.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.clipping_profile.png",width=500,height=500,units="px") +read_pos=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99) +count=c(10080,9066,8130,7621,7132,6639,6173,5746,5321,4915,4597,4335,4076,3809,3556,3301,3060,2829,2599,2347,2127,1921,1710,1505,1283,1093,918,736,568,415,305,164,84,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,85,162,278,386,526,692,871,1040,1222,1412,1610,1804,2008,2226,2447,2677,2949,3192,3438,3680,3952,4204,4467,4757,5154,5574,5983,6410,6874,7383,7905,8793,9768) +plot(read_pos,1-(count/79258),pch=20,xlab="Position of reads",ylab="Mappability",col="blue") +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.clipping_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.clipping_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,101 @@ +Position Read_Total Read_clipped +0 79258 10080 +1 79258 9066 +2 79258 8130 +3 79258 7621 +4 79258 7132 +5 79258 6639 +6 79258 6173 +7 79258 5746 +8 79258 5321 +9 79258 4915 +10 79258 4597 +11 79258 4335 +12 79258 4076 +13 79258 3809 +14 79258 3556 +15 79258 3301 +16 79258 3060 +17 79258 2829 +18 79258 2599 +19 79258 2347 +20 79258 2127 +21 79258 1921 +22 79258 1710 +23 79258 1505 +24 79258 1283 +25 79258 1093 +26 79258 918 +27 79258 736 +28 79258 568 +29 79258 415 +30 79258 305 +31 79258 164 +32 79258 84 +33 79258 0 +34 79258 0 +35 79258 0 +36 79258 0 +37 79258 0 +38 79258 0 +39 79258 0 +40 79258 0 +41 79258 0 +42 79258 0 +43 79258 0 +44 79258 0 +45 79258 0 +46 79258 0 +47 79258 0 +48 79258 0 +49 79258 0 +50 79258 0 +51 79258 0 +52 79258 0 +53 79258 0 +54 79258 0 +55 79258 0 +56 79258 0 +57 79258 0 +58 79258 0 +59 79258 0 +60 79258 0 +61 79258 0 +62 79258 0 +63 79258 0 +64 79258 0 +65 79258 0 +66 79258 0 +67 79258 85 +68 79258 162 +69 79258 278 +70 79258 386 +71 79258 526 +72 79258 692 +73 79258 871 +74 79258 1040 +75 79258 1222 +76 79258 1412 +77 79258 1610 +78 79258 1804 +79 79258 2008 +80 79258 2226 +81 79258 2447 +82 79258 2677 +83 79258 2949 +84 79258 3192 +85 79258 3438 +86 79258 3680 +87 79258 3952 +88 79258 4204 +89 79258 4467 +90 79258 4757 +91 79258 5154 +92 79258 5574 +93 79258 5983 +94 79258 6410 +95 79258 6874 +96 79258 7383 +97 79258 7905 +98 79258 8793 +99 79258 9768 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.geneAbundance.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.geneAbundance.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,27321 @@\n+gene\tCounts\n+AADACL3\t0\n+AADACL4\t0\n+ABCD3\t1\n+ACADM\t0\n+ACKR1\t0\n+ACOT11\t0\n+ACTG1P4\t0\n+ACTL8\t243\n+ACTN2\t0\n+ACTRT2\t0\n+ADAM15\t0\n+ADAMTSL4\t0\n+ADCK3\t0\n+ADORA1\t0\n+ADPRHL2\t0\n+AGL\t0\n+AGO1\t2\n+AGO3\t0\n+AGO4\t0\n+AGRN\t0\n+AGTRAP\t0\n+AHCYL1\t1\n+AJAP1\t0\n+AK4\t1\n+AK5\t0\n+AKIRIN1\t2\n+AKR1A1\t1\n+AKR7A2P1\t0\n+ALG6\t1\n+ALPL\t0\n+AMPD2\t2\n+AMY1A\t0\n+AMY1B\t0\n+AMY1C\t0\n+AMY2A\t0\n+AMY2B\t0\n+ANGPTL3\t0\n+ANGPTL7\t0\n+ANKRD34A\t0\n+ANKRD35\t0\n+ANXA9\t0\n+AP4B1-AS1\t0\n+APCS\t0\n+APITD1\t5\n+APITD1-CORT\t6\n+APOA1BP\t0\n+AQP10\t0\n+ARF1\t27\n+ARHGEF10L\t0\n+ARHGEF16\t4\n+ARID1A\t20\n+ARTN\t0\n+ARV1\t0\n+ASH1L-AS1\t0\n+ATAD3A\t1\n+ATAD3B\t0\n+ATAD3C\t2\n+ATF3\t0\n+ATF6\t3\n+ATG4C\t1\n+ATP1A1\t21\n+ATP1A2\t0\n+ATP1A4\t0\n+ATP1B1\t16\n+ATP2B4\t18\n+ATP5F1\t2\n+ATP6V0B\t2\n+ATP8B2\t0\n+ATPIF1\t1\n+ATXN7L2\t1\n+AVPR1B\t0\n+AXDND1\t0\n+AZIN2\t0\n+B3GALT6\t1\n+B4GALT2\t0\n+BCAN\t0\n+BCL9\t1\n+BECN1P1\t0\n+BGLAP\t0\n+BLZF1\t1\n+BMP8A\t0\n+BNIPL\t0\n+BOLA1\t0\n+BRDT\t9\n+BRINP2\t0\n+BROX\t2\n+BSND\t0\n+BTBD19\t0\n+BTBD8\t0\n+BTF3L4\t1\n+BTG2\t0\n+C1QA\t0\n+C1QB\t0\n+C1QC\t0\n+C1orf100\t0\n+C1orf101\t0\n+C1orf105\t0\n+C1orf106\t0\n+C1orf112\t0\n+C1orf115\t0\n+C1orf122\t0\n+C1orf137\t0\n+C1orf146\t2\n+C1orf158\t0\n+C1orf162\t0\n+C1orf167\t0\n+C1orf177\t0\n+C1orf185\t0\n+C1orf21\t0\n+C1orf213\t0\n+C1orf220\t0\n+C1orf226\t6\n+C1orf228\t0\n+C1orf27\t0\n+C1orf50\t0\n+C1orf53\t0\n+C1orf54\t0\n+C1orf56\t3\n+C1orf64\t0\n+C1orf68\t0\n+C1orf94\t0\n+C1orf95\t0\n+C4BPA\t0\n+C4BPB\t0\n+C8A\t0\n+CA14\t0\n+CA6\t0\n+CACHD1\t0\n+CACNA1E\t0\n+CACYBP\t1\n+CADM3\t0\n+CALML6\t0\n+CAMK1G\t0\n+CAMSAP2\t0\n+CAMTA1\t3\n+CAP1\t2\n+CAPN2\t0\n+CAPN9\t0\n+CAPZA1\t10\n+CASQ1\t0\n+CATSPER4\t0\n+CCDC18\t1\n+CCDC185\t0\n+CCDC24\t0\n+CCDC27\t5\n+CCDC28B\t0\n+CCDC30\t0\n+CD101\t0\n+CD1A\t0\n+CD1C\t0\n+CD1D\t0\n+CD1E\t0\n+CD2\t0\n+CD46\t0\n+CD52\t0\n+CD53\t0\n+CD55\t10\n+CDA\t0\n+CDC14A\t0\n+CDC20\t8\n+CDC42\t2\n+CDC7\t4\n+CDC73\t0\n+CDCA8\t10\n+CDK18\t0\n+CDKN2C\t0\n+CELA2A\t0\n+CELA2B\t0\n+CELA3A\t0\n+CELA3B\t0\n+CELSR2\t0\n+CENPF\t3\n+CEP350\t1\n+CEP85\t12\n+CEPT1\t1\n+CFH\t0\n+CFHR1\t0\n+CFHR2\t0\n+CFHR3\t0\n+CFHR4\t0\n+CFHR5\t0\n+CGN\t1\n+CHD1L\t1\n+CHI3L2\t0\n+CHIA\t0\n+CHIAP2\t0\n+CHRM3\t0\n+CHRNB2\t0\n+CHTOP\t3\n+CIART\t6\n+CKS1B\t16\n+CLCA1\t0\n+CLCA2\t0\n+CLCA3P\t0\n+CLCA4\t0\n+CLCN6\t0\n+CLCNKA\t0\n+CLCNKB\t0\n+CLIC4\t3\n+CMPK1\t0\n+CNIH3\t0\n+CNIH4\t0\n+CNKSR1\t0\n+CNST\t3\n+CNTN2\t0\n+COA6\t0\n+COG2\t0\n+CORT\t0\n+COX20\t0\n+CPT2\t0\n+CPTP\t0\n+CR1\t0\n+CR1L\t0\n+CR2\t2\n+CRB1\t0\n+CRCT1\t0\n+CREB3L4\t0\n+CROCC\t0\n+CSF1\t0\n+CSMD2-AS1\t0\n+CTH\t0\n+CTPS1\t1\n+CTRC\t0\n+CTSE\t0\n+CTTNBP2NL\t11\n+CYB561D1\t0\n+CYCSP52\t0\n+CYMP\t0\n+CYP4A22\t0\n+CYP4B1\t1\n+CYP4X1\t0\n+CYP4Z1\t0\n+CYR61\t0\n+DAB1-AS1\t0\n+DAP3\t1\n+DARS2\t0\n+DCAF6\t4\n+DCDC2B\t0\n+DCLRE1B\t3\n+DCST1\t0\n+DDI2\t0\n+DDR2\t0\n+DDX11L1\t0\n+DDX20\t0\n+DEGS1\t0\n+DEPDC1-AS1\t0\n+DESI2\t1\n+DFFB\t0\n+DHDDS\t1\n+DHX9\t0\n+DIEXF\t1\n+DIO1\t0\n+DISC1\t0\n+DISP1\t0\n+DLEU2L\t0\n+DMAP1\t0\n+DMBX1\t0\n+DMRTB1\t6\n+DNAH14\t1\n+DNAJA1P5\t0\n+DNAJB4\t1\n+DNAJC16\t2\n+DNAJC6\t0\n+DNALI1\t0\n+DNASE2B\t0\n+DNM3\t5\n+DPH2\t1\n+DPYD-AS1\t0\n+DPYD-AS2\t0\n+DR1\t2\n+DRAXIN\t0\n+DRD5P2\t0\n+DTL\t35\n+DUSP12\t1\n+DUSP23\t0\n+DUSP27\t0\n+DUSP5P1\t0\n+DYRK3\t0\n+ECM1\t0\n+EDARADD\t1\n+EFCAB14-AS1\t1\n+EFCAB2\t1\n+EFCAB7\t0\n+EFHD2\t0\n+EFNA1\t0\n+EFNA3\t0\n+EFNA4\t0\n+EIF3I\t1\n+ELAVL4\t0\n+ELF3\t0\n+EMBP1\t0\n+ENO1-AS1\t0\n+EPB41\t2\n+EPHA8\t0\n+EPHB2\t0\n+EPHX1\t0\n+EPHX4\t0\n+ERICH3-AS1\t0\n+ERMAP\t1\n+ESPN\t0\n+EXO1\t1\n+EXO5\t11\n+EXTL1\t0\n+FAAH\t0\n+FAAHP1\t0\n+FAM102B\t0\n+FAM110D\t0\n+FAM159A\t0\n+FAM163A\t0\n+FAM167B\t0\n+FAM177B\t0\n+FAM183A\t0\n+FAM19A3\t0\n+FAM20B\t0\n+FAM212B-AS1\t0\n+FAM213B\t1\n+FAM231A\t0\n+FAM231D\t0\n+FAM43B\t0\n+FAM46C\t142\n+FAM71A\t0\n+FAM72A\t0\n+FAM72B\t0\n+FAM72C\t0\n+FAM73A\t1\n+FAM76A\t10\n+FAM87B\t0\n+FASLG\t0\n+FBLIM1\t0\n+FBXO28\t2\n+FBXO44\t0\n+FBXO6\t0\n+FCER1A\t0\n+FCER1G\t0\n+FCGR1A\t0\n+FCGR1C\t0\n+FCGR2A\t0\n+FCGR2B\t0\n+FCGR2C\t0\n+FCRL6\t0\n+FCRLA\t0\n+FCRLB\t0\n+FDPS\t0\n+FGGY\t0\n+FHAD1\t0\n+FLAD1\t1\n+FLG-AS1\t0\n+FLJ23867\t0\n+FLJ31662\t0\n+FLVCR1\t0\n+FMN2\t2\n+FMO1\t0\n+FMO2\t13\n+FMO3\t1\n+FMO4\t0\n+FMO6P\t0\n+FMO9P\t0\n+FNBP1L\t1\n+FNDC7\t0\n+FOXD2\t0\n+FOXD3\t0\n+FOXE3\t0\n+FOXO6\t0\n+FPGT\t1\n+FPGT-TNNI3K\t0\n+G0S2\t0\n+GABPB2\t0\n+GABRD\t0\n+GADD45A\t0\n+GALNT2\t0\n+GAS5-AS1\t0\n+GBP1P1\t0\n+GBP6\t0\n+GCSAML\t0\n+GGPS1\t0\n+GIPC2\t0\n+GJA4\t0\n+GJA8\t0\n+GJB3\t0\n+GJB4\t0\n+GJB5\t0\n+GJC2\t0\n+GM140\t0\n+GMEB1\t2\n+GNAI3\t6\n+GNG12-AS1\t0\n+GNPAT\t3\n+GORAB\t0\n+GPR137B\t6\n+GPR25\t0\n+GPR3\t2\n+GPR37L1\t0\n+GPR52\t0\n+GPR61\t0\n+GPR88\t0\n+GPR89B\t0\n+GPSM2\t1\n+GPX7\t0\n+GRHL3\t0\n+GSTM1\t0\n+GSTM2\t0\n+GSTM4\t0\n+GSTM5\t0\n+GUCA2B\t0\n+GUK1\t0\n+H3F3A\t0\n+H3F3AP4\t0\n+H6PD\t1\n+HAO2\t0\n+HAO2-IT1\t0\n+HAPL'..b'0\n+LOC401585\t0\n+LOC643486\t0\n+LOC729609\t0\n+LRCH2\t0\n+MAGEA1\t0\n+MAGEA10\t0\n+MAGEA10-MAGEA5\t0\n+MAGEA12\t0\n+MAGEA2\t0\n+MAGEA2B\t0\n+MAGEA3\t0\n+MAGEA5\t0\n+MAGEA8-AS1\t0\n+MAGEA9\t0\n+MAGEA9B\t0\n+MAGEC2\t0\n+MAGED4\t0\n+MAGED4B\t0\n+MAGEE2\t0\n+MAGT1\t1\n+MAOB\t0\n+MAP2K4P1\t0\n+MAP3K15\t0\n+MAP7D2\t3\n+MAP7D3\t2\n+MBNL3\t0\n+MCF2\t0\n+MECP2\t3\n+MED14\t0\n+MID1\t2\n+MID1IP1-AS1\t0\n+MIR105-1\t0\n+MIR105-2\t0\n+MIR106A\t0\n+MIR1184-1\t0\n+MIR1184-2\t0\n+MIR1184-3\t0\n+MIR1256\t0\n+MIR1468\t0\n+MIR18B\t0\n+MIR19B2\t0\n+MIR20B\t0\n+MIR221\t0\n+MIR222\t0\n+MIR23C\t0\n+MIR3202-2\t0\n+MIR320D2\t0\n+MIR325HG\t0\n+MIR363\t0\n+MIR374A\t0\n+MIR374B\t0\n+MIR384\t0\n+MIR421\t0\n+MIR424\t0\n+MIR4328\t0\n+MIR4329\t0\n+MIR450A1\t0\n+MIR450A2\t0\n+MIR450B\t0\n+MIR452\t0\n+MIR4770\t0\n+MIR503\t0\n+MIR503HG\t0\n+MIR504\t0\n+MIR505\t0\n+MIR506\t0\n+MIR507\t0\n+MIR508\t0\n+MIR509-1\t0\n+MIR509-2\t0\n+MIR509-3\t0\n+MIR510\t0\n+MIR514A1\t0\n+MIR514A2\t0\n+MIR514A3\t0\n+MIR514B\t0\n+MIR542\t0\n+MIR545\t0\n+MIR6134\t0\n+MIR6857\t0\n+MIR6894\t0\n+MIR6895\t0\n+MIR718\t0\n+MIR766\t0\n+MIR767\t0\n+MIR8088\t0\n+MIR888\t0\n+MIR890\t0\n+MIR891A\t0\n+MIR891B\t0\n+MIR892A\t0\n+MIR892B\t0\n+MIR892C\t0\n+MIR92A2\t0\n+MIR98\t0\n+MIRLET7F2\t0\n+MMGT1\t5\n+MORC4\t1\n+MORF4L2\t7\n+MOSPD1\t3\n+MPP1\t1\n+MTCP1\t0\n+MTMR8\t2\n+MTRNR2L10\t0\n+MXRA5\t0\n+NAA10\t0\n+NAP1L2\t0\n+NAP1L3\t0\n+NAP1L6\t0\n+NDP\t0\n+NDUFB11\t0\n+NHS-AS1\t0\n+NKAP\t1\n+NKAPP1\t0\n+NKRF\t0\n+NLGN4X\t3\n+NOX1\t0\n+NR0B1\t0\n+NUDT11\t1\n+NUP62CL\t0\n+NXF2\t0\n+NXF2B\t0\n+NXF3\t0\n+NXF5\t0\n+OPHN1\t0\n+OTUD5\t0\n+P2RY4\t0\n+P2RY8\t0\n+PABPC1L2A\t0\n+PABPC1L2B-AS1\t0\n+PABPC5-AS1\t0\n+PAGE1\t0\n+PAGE3\t0\n+PCDH19\t0\n+PCSK1N\t0\n+PCYT1B\t0\n+PDZD11\t0\n+PDZD4\t0\n+PFKFB1\t0\n+PGAM4\t0\n+PHEX-AS1\t0\n+PHF8\t21\n+PHKA1\t0\n+PHKA2\t0\n+PIGA\t0\n+PIM2\t0\n+PIR\t0\n+PIR-FIGF\t0\n+PJA1\t0\n+PLAC1\t0\n+PLS3-AS1\t0\n+PNCK\t0\n+PNMA5\t0\n+PNPLA4\t2\n+POF1B\t0\n+PPEF1-AS1\t0\n+PPP1R2P9\t0\n+PPP2R3B\t0\n+PRAF2\t1\n+PRICKLE3\t0\n+PRKX\t0\n+PSMD10\t3\n+PTCHD1-AS\t0\n+RAB39B\t0\n+RAB40A\t0\n+RAB9B\t0\n+RAI2\t0\n+RAP2C\t0\n+RBBP7\t59\n+RBM41\t1\n+RBMX\t6\n+RENBP\t0\n+RGAG4\t0\n+RHOXF1\t0\n+RHOXF2\t0\n+RHOXF2B\t0\n+RIPPLY1\t0\n+RLIM\t3\n+RNF113A\t2\n+RNU6-2\t0\n+RNU6-28P\t0\n+RP11-87M18.2\t0\n+RPGR\t2\n+RPL39\t1\n+RPS4X\t10\n+RPS6KA3\t1\n+RPS6KA6\t0\n+RS1\t1\n+SATL1\t0\n+SCARNA9L\t0\n+SCML2\t3\n+SEPT6\t0\n+SERPINA7\t0\n+SH3KBP1\t17\n+SHROOM4\t0\n+SLC10A3\t2\n+SLC25A5-AS1\t0\n+SLC25A53\t0\n+SLC25A6\t0\n+SLC35A2\t1\n+SLC38A5\t0\n+SLC7A3\t0\n+SLC9A7\t0\n+SLITRK4\t0\n+SMARCA1\t0\n+SMC1A\t3\n+SMEK3P\t0\n+SMIM9\t0\n+SMPX\t0\n+SNORA11D\t0\n+SNORA11E\t0\n+SNORA69\t0\n+SNORD61\t0\n+SNORD96B\t0\n+SNX12\t2\n+SOX3\t0\n+SPANXA1\t0\n+SPANXA2\t0\n+SPANXC\t0\n+SPANXD\t0\n+SPANXN2\t0\n+SPANXN3\t0\n+SPANXN5\t0\n+SPIN2A\t0\n+SPIN2B\t0\n+SPIN3\t3\n+SPIN4\t0\n+SRPX\t0\n+SSX2\t0\n+SSX2B\t0\n+SSX3\t0\n+SSX4\t0\n+SSX4B\t0\n+SSX5\t0\n+SSX7\t0\n+SSX9\t0\n+SUPT20HL2\t2\n+SYN1\t0\n+SYP\t0\n+SYTL4\t4\n+TAB3\t2\n+TAF7L\t0\n+TAF9B\t0\n+TCEAL5\t0\n+TCEAL6\t0\n+TCEAL8\t0\n+TCP11X2\t0\n+TENM1\t0\n+TEX11\t0\n+TEX13A\t0\n+TEX13B\t0\n+TEX28\t0\n+TFDP3\t0\n+TFE3\t4\n+THOC2\t79\n+TIMM17B\t0\n+TIMM8A\t1\n+TLR8-AS1\t1\n+TMEM185A\t1\n+TMEM255A\t0\n+TMEM27\t0\n+TMEM47\t0\n+TMLHE\t0\n+TMLHE-AS1\t0\n+TMSB15A\t0\n+TRAPPC2\t0\n+TREX2\t0\n+TRMT2B\t0\n+TRPC5\t0\n+TSC22D3\t0\n+TSPAN6\t0\n+TTC3P1\t0\n+UBL4A\t0\n+UPF3B\t2\n+UQCRBP1\t1\n+USP26\t0\n+USP27X-AS1\t0\n+USP51\t0\n+UXT\t2\n+VCX2\t0\n+VCX3A\t0\n+VSIG4\t0\n+WDR45\t0\n+WNK3\t0\n+XAGE1B\t0\n+XAGE1E\t0\n+XAGE2\t0\n+XAGE3\t0\n+XIST\t0\n+XKRX\t0\n+XRCC6P5\t0\n+ZBED1\t0\n+ZC4H2\t0\n+ZCCHC5\t0\n+ZDHHC15\t0\n+ZDHHC9\t0\n+ZFX-AS1\t0\n+ZMAT1\t0\n+ZMYM3\t0\n+ZNF182\t4\n+ZNF280C\t9\n+ZNF41\t0\n+ZNF630\t0\n+ZNF674\t0\n+ZNF75D\t0\n+ZXDA\t0\n+AMELY\t0\n+ASMTL\t0\n+BCORP1\t0\n+BPY2\t0\n+BPY2B\t0\n+BPY2C\t0\n+CD24\t0\n+CDY1\t0\n+CDY1B\t0\n+CDY2A\t0\n+CDY2B\t0\n+CRLF2\t0\n+CSPG4P1Y\t0\n+DAZ1\t0\n+DAZ3\t0\n+DAZ4\t0\n+DDX11L16\t0\n+DHRSX\t0\n+FAM197Y2\t0\n+FAM197Y5\t0\n+FAM224A\t0\n+FAM224B\t0\n+FAM41AY1\t0\n+FAM41AY2\t0\n+GOLGA2P2Y\t0\n+GOLGA2P3Y\t0\n+GTPBP6\t0\n+GYG2P1\t0\n+HSFY1\t0\n+HSFY2\t0\n+KDM5D\t0\n+LINC00280\t0\n+LOC101929148\t0\n+NCRNA00185\t0\n+NLGN4Y-AS1\t0\n+P2RY8\t0\n+PPP2R3B\t0\n+PRORY\t0\n+PRY\t0\n+PRY2\t0\n+RBMY1A1\t0\n+RBMY1A3P\t0\n+RBMY1B\t0\n+RBMY1D\t0\n+RBMY1E\t0\n+RBMY1F\t0\n+RBMY1J\t0\n+RBMY2EP\t0\n+RBMY3AP\t0\n+SLC25A6\t0\n+SRY\t0\n+TTTY1\t0\n+TTTY10\t0\n+TTTY11\t0\n+TTTY13\t0\n+TTTY14\t0\n+TTTY16\t0\n+TTTY17A\t0\n+TTTY17B\t0\n+TTTY17C\t0\n+TTTY18\t0\n+TTTY1B\t0\n+TTTY2\t0\n+TTTY20\t0\n+TTTY21\t0\n+TTTY21B\t0\n+TTTY23\t0\n+TTTY23B\t0\n+TTTY2B\t0\n+TTTY3\t0\n+TTTY3B\t0\n+TTTY4\t0\n+TTTY4B\t0\n+TTTY4C\t0\n+TTTY5\t0\n+TTTY6\t0\n+TTTY6B\t0\n+TTTY7\t0\n+TTTY7B\t0\n+TTTY8\t0\n+TTTY8B\t0\n+TTTY9A\t0\n+TTTY9B\t0\n+UTY\t0\n+VCY\t0\n+VCY1B\t0\n+XKRY\t0\n+XKRY2\t0\n+ZBED1\t0\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.geneBodyCoverage.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.geneBodyCoverage.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,104 @@ +Total reads: 79258 +Fragment number: 50503 +percentile count +0 200 +1 400 +2 561 +3 726 +4 832 +5 920 +6 1032 +7 1085 +8 1102 +9 1152 +10 1242 +11 1245 +12 1193 +13 1258 +14 1260 +15 1306 +16 1238 +17 1246 +18 1259 +19 1290 +20 1318 +21 1304 +22 1374 +23 1410 +24 1391 +25 1477 +26 1438 +27 1445 +28 1468 +29 1418 +30 1457 +31 1443 +32 1382 +33 1404 +34 1422 +35 1489 +36 1478 +37 1445 +38 1467 +39 1484 +40 1486 +41 1446 +42 1485 +43 1474 +44 1468 +45 1433 +46 1448 +47 1466 +48 1466 +49 1472 +50 1493 +51 1446 +52 1465 +53 1507 +54 1489 +55 1536 +56 1628 +57 1639 +58 1606 +59 1638 +60 1612 +61 1612 +62 1593 +63 1631 +64 1672 +65 1715 +66 1767 +67 1782 +68 1832 +69 1881 +70 1866 +71 1877 +72 1938 +73 1992 +74 1969 +75 1940 +76 1936 +77 1936 +78 1960 +79 1988 +80 1949 +81 1964 +82 1955 +83 1988 +84 1984 +85 2020 +86 2055 +87 2110 +88 2140 +89 2100 +90 2106 +91 2128 +92 2135 +93 2150 +94 2146 +95 2062 +96 1995 +97 1875 +98 1549 +99 982 +100 284 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.geneBodyCoverage_plot.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.geneBodyCoverage_plot.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,6 @@ +png('/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.geneBodyCoverage.png',width=500,height=500,units='px') +x=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) +y=c(200,400,561,726,832,920,1032,1085,1102,1152,1242,1245,1193,1258,1260,1306,1238,1246,1259,1290,1318,1304,1374,1410,1391,1477,1438,1445,1468,1418,1457,1443,1382,1404,1422,1489,1478,1445,1467,1484,1486,1446,1485,1474,1468,1433,1448,1466,1466,1472,1493,1446,1465,1507,1489,1536,1628,1639,1606,1638,1612,1612,1593,1631,1672,1715,1767,1782,1832,1881,1866,1877,1938,1992,1969,1940,1936,1936,1960,1988,1949,1964,1955,1988,1984,2020,2055,2110,2140,2100,2106,2128,2135,2150,2146,2062,1995,1875,1549,982,284) +smoothsp = smooth.spline(x,y,spar=0.35) +plot(smoothsp,type="l",col="blue",xlab="Percentile of Gene Body (5'->3')",ylab="Number of read",xlim=c(0,100)) +dev.state = dev.off() \ No newline at end of file |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.mapq_profile.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.mapq_profile.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,12 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.mapq_profile.png",width=500,height=500,units="px") +mapq_val=c(0,1,3,255) +mapq_count=c(589,3042,8955,66672) +xname=c("<3","<10","<20","<30","30-255") +freq = rep(0,5) +freq[1] = sum(mapq_count[which(mapq_val<3)])/79258*100 +freq[2] = sum(mapq_count[which(mapq_val<10)])/79258*100 +freq[3] = sum(mapq_count[which(mapq_val<20)])/79258*100 +freq[4] = sum(mapq_count[which(mapq_val<30)])/79258*100 +freq[5] = 100 +barplot(freq,beside=T,xlab="Mapping Quality",border="NA",space=1.5,main="Mapping Quality",ylim=c(0,100),ylab="Cumulative proportion (%)",col="blue",names.arg=xname) +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.mapq_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.mapq_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +MAPQ Read_Total Read_with_mapq +0 79258 589 +1 79258 3042 +3 79258 8955 +255 79258 66672 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.read_distr.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.read_distr.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,6 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.read_distr.png",width=500,height=500,units="px") +M=c(24849,1533,16115,8729,205,453,7563,14335) +Mname=c("CDS","5UTR","3UTR","Intron","TSS_Up_1Kb","TES_Down_1Kb","rRNA","Others") +val = barplot(M,xlab="",space=1,ylab="Read Counts",col="blue",border="NA") +text(x=seq(val[1],val[8],by=2),y=rep(0,8),srt=60,adj=0,offset=2,pos=1,xpd=T,labels=Mname) +dev.state = dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.read_distr_pie.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.read_distr_pie.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,3 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.read_distr_pie.png",width=500,height=500,units="px") +pie(c(35430,508205),labels=c("Covered 35430 exons","Uncovered"),main="Exons",radius=0.6,clockwise=T,col=c("blue","white")) +dev.state = dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.readlen_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.readlen_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,35 @@ +Position Read_Total Read_Len_mapped +67 79258 182 +68 79258 173 +69 79258 267 +70 79258 238 +71 79258 308 +72 79258 340 +73 79258 386 +74 79258 354 +75 79258 392 +76 79258 415 +77 79258 421 +78 79258 399 +79 79258 419 +80 79258 456 +81 79258 470 +82 79258 469 +83 79258 496 +84 79258 483 +85 79258 489 +86 79258 488 +87 79258 529 +88 79258 499 +89 79258 514 +90 79258 561 +91 79258 748 +92 79258 813 +93 79258 751 +94 79258 827 +95 79258 863 +96 79258 912 +97 79258 958 +98 79258 1648 +99 79258 1875 +100 79258 47529 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp1.res.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp1.res.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,49 @@ +filename /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp1.res.txt +is_pairEnd 0 +clipping_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.clipping_profile.png +mapq_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.mapq_profile.png +mapq_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp1.mapq_profile.xls +read_cov_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.geneBodyCoverage.png +trans_cov_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.TransCoverage.png +insert_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.inner_distance_plot.png +insert_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp1.inner_distance_freq.txt +read_dist_plot_file1 /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.read_distr.png +read_dist_plot_file2 /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.read_distr_pie.png +readLen_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp1.readlen_profile.png +geneCount_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp1.geneAbundance.txt +seqDeDup_percent 0.000000 +posDeDup_percent 0.000000 +no_clipping 0 +no_rRNA 0 +total_reads 79258 +uniq_mapped_reads 66672 +multi_mapped_reads 12586 +unmapped_reads 0 +low_qual 0 +low_qual_read1 0 +low_qual_read2 0 +pcr_dup 0 +rRNA_read 7563 +cds_read 24849 +utr5_read 1533 +utr3_read 16115 +intron_read 8729 +itgup1k_read 205 +itgdn1k_read 453 +itg_read 14335 +unmapped_read1 0 +unmapped_read2 0 +mapped_read1 0 +mapped_read2 0 +forward_read 33207 +reverse_read 33465 +paired_reads 0 +mapped_plus_minus 0 +mapped_plus_plus 0 +mapped_minus_plus 0 +mapped_minus_minus 0 +ins_read 0 +del_read 0 +noSplice 52213 +splice 14459 +paired_diff_chrom 0 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.ReadLen_plot.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.ReadLen_plot.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.readlen_profile.png",width=500,height=500,units="px") +readlen_val=c(67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) +readlen_count=c(182,177,265,292,322,337,407,384,377,408,421,433,465,412,463,458,469,448,457,476,520,507,519,576,723,741,800,824,805,847,931,1476,1885,50582) +plot(readlen_val,(readlen_count/80603),pch=20,xlab="Mapped Read Length",ylab="Proportion",col="blue") +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.TransCoverage.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.TransCoverage.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| b"@@ -0,0 +1,10 @@\n+png('/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.TransCoverage.png',width=500,height=500,units='px')\n+a=c(0,0,4,0,0,0,0,166,0,0,0,0,0,0,0,0,3,0,0,0,1,1,0,2,0,2,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,6,2,0,0,31,0,1,9,0,0,0,0,0,1,0,3,1,14,0,0,10,10,2,2,0,0,0,0,0,1,0,0,0,4,0,0,0,0,0,0,6,0,2,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,5,0,0,1,0,0,2,0,0,0,0,0,0,0,1,0,0,0,4,0,0,0,0,1,2,0,0,6,0,0,2,0,0,10,0,0,0,0,0,0,0,0,0,0,0,2,2,0,4,5,5,0,16,0,0,0,0,0,0,0,4,5,13,2,0,0,0,0,0,0,4,3,0,0,0,0,0,2,6,6,0,0,0,0,0,0,0,3,0,0,1,0,7,0,1,0,0,0,1,2,0,0,6,0,0,0,0,0,0,0,1,0,0,8,0,0,0,0,0,0,0,0,0,2,0,6,0,1,0,0,0,0,2,0,0,2,0,0,0,0,0,1,2,0,0,0,4,1,0,1,1,0,0,0,0,1,0,0,3,0,0,21,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,1,0,3,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,147,0,0,0,0,0,1,0,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,1,0,4,1,0,0,0,3,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,2,5,0,4,0,5,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,8,0,6,1,0,0,0,0,0,0,0,0,0,0,4,0,2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,1,4,0,0,0,0,0,0,1,0,0,0,0,0,2,5,4,0,0,0,1,0,0,2,4,6,0,0,0,0,0,5,0,15,0,0,4,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,5,14,0,0,2,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,16,0,2,0,0,0,0,0,0,14,1,4,0,0,0,3,3,0,0,0,1,0,11,0,2,0,1,6,2,2,0,0,0,0,15,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,31,1,6,0,0,0,0,2,0,2,0,0,0,0,3,3,0,0,0,0,0,25,3,0,4,0,32,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,12,3,0,0,0,0,7,0,1,0,0,0,2,0,3,0,0,1,0,0,0,0,1,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,8,0,0,0,0,12,0,1,2,0,3,0,111,0,0,1,0,10,1,1,0,1,0,1,0,0,6,1,1,6,0,0,0,1,0,0,0,0,13,6,0,0,0,0,1,0,0,2,2,0,0,0,0,0,0,4,0,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,3,0,0,0,0,0,0,2,0,1,4,7,3,0,20,0,0,2,3,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,1,1,2,0,2,3,2,0,0,8,2,0,0,0,1,2,0,0,0,0,0,0,514,0,0,0,0,0,0,3,6,17,4,1,3,0,0,1,0,0,0,0,0,0,0,0,0,2,5,19,7,0,1,2,1,4,0,16,0,0,1,3,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,2,4,1,1,0,1,0,0,4,3,0,1,0,0,0,5,0,0,0,0,0,0,0,0,4,0,0,1,0,0,0,0,0,1,0,0,1,0,2,0,0,1,1,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,6,0,2,5,0,0,0,0,0,0,0,0,0,1,0,0,0,1,2,9,38,0,0,0,0,1,4,3,2,2,3,0,0,2,0,0,5,0,0,8,0,0,0,0,1,0,1,6,0,0,1,0,0,0,0,0,0,0,0,4,0,0,0,1,2,0,0,0,0,0,5,0,0,1,0,1,0,6,0,7,0,0,0,0,0,0,1,0,0,0,1,0,0,1,1,0,0,0,0,0,0,0,1,0,1,1,0,0,0,2,0,1,0,0,0,0,3,0,0,0,0,1,2,2,1,1,2,0,1,2,2,5,1,0,0,18,1,5,11,1,0,0,0,0,0,2,1,1,0,0,0,0,0,1,0,0,0,2,5,2,2,0,0,1,2,0,0,0,0,0,0,25,1,1,1,3,2,1,5,0,0,3,2,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,63,0,0,6,0,0,0,0,0,0,4,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,3,0,0,0,6,0,3,0,0,0,0,0,0,8,3,0,0,2,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,3,0,3,5,0,9,1,0,0,0,1,5,45,0,0,0,0,3,0,79,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,6,14,2,0,0,0,0,0,0,0,0,1,0,3,0,0,4,0,0,3,0,0,0,1,0,0,1,0,0,10,1,17,0,2,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,3,0,0,0,0,0,0,0,5,5,0,1,0,0,0,0,4,0,0,0,1,0,0,0,0,12,0,2,5,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,5,0,1,0,1,0,0,0,0,10,2,19,0,0,9,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,5,2,0,0,3,0,7,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,18,10,0,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,3,0,0,0,2,0,0,9,2,0,0,0,0,0,0,0,1,1,23,0,0,1,1,2,0,0,2,0,0,0,0,1,1,0,11,0,0,0,0,0,0,0,0,0,1,56,1,0,0,4,0,0,1,4,0,0,0,0,4,0,1,0,1,0,1,5,1,0,1,26,0,0,0,1,0,0,0,0,2,0,1,5,0,2,27,3,0,0,0,6,0,0,0,0,2,0,0,0,0,0,0,6,0,0,3,1,0,0,0,0,0,3,0,0,0,2,0,0,30,0,0,0,4,0,1,0,0,0,0,15,0,0,0,0,0,1,0,0,0,0,2,0,0,3,2,0,2,0,1,2,0,39,3,1,0,0,1,2,1,0,0,3,4,0,0,1,4,1,1,1,1,2,6,0,0,8,0,5,0,3,0,0,0,0,0,1,0,0,0,4,0,0,0,3,1,0,0,0,0,0,4,0,0,0,3,6,0,0,0,0,0,0,2,7,0,0,0,1,0,5,0,0,0,0"..b',0,0,0,5,2,1,0,0,0,1,0,0,0,0,0,4,1,9,3,0,0,2,0,0,0,0,0,0,4,0,22,0,0,14,0,3,0,0,0,0,0,2,0,0,0,2,0,14,2,38,0,0,0,0,0,0,0,0,0,4,0,0,0,0,32,0,0,2,0,0,0,1,5,23,0,0,2,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,16,0,0,1,3,0,0,3,1,29,3,8,0,0,0,5,4,4,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,57,1,0,0,0,0,0,0,21,1,0,0,1,0,2,0,3,0,0,9,0,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,0,0,1,0,0,0,0,2,0,0,0,6,6,5,0,0,5,5,2,0,0,0,0,0,1,4,0,0,0,9,0,0,0,0,0,0,0,0,0,2,0,0,0,2,0,0,0,0,1,0,5,2,0,0,0,0,0,1,0,0,0,0,0,0,0,0,7,0,2,0,1,1,20,0,0,3,0,6,10,0,0,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,1,1,0,0,0,0,0,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,8,21,0,0,1,2,2,0,0,0,0,7,0,0,0,6,0,0,0,0,0,0,1,54,11,0,0,0,6,9,0,0,2,0,3,3,0,0,0,0,0,0,0,1,0,0,0,0,12,0,3,4,0,0,0,0,0,11,0,6,0,0,0,1,0,2,1,1,15,0,0,2,0,0,0,15,1,20,28,2,0,16,16,0,3,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,0,0,2,0,6,2,1,2,0,0,0,0,0,0,0,0,0,0,0,3,0,1,2,2,21,1,0,0,0,2,0,0,2,0,0,0,2,5,2,0,0,0,0,8,0,4,0,0,0,4,0,0,3,0,6,0,0,4,0,2,0,0,13,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,1,0,0,2,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,4,0,0,0,1,0,0,0,0,0,0,6,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,1,7,1,0,8,10,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,1,0,0,0,2,0,0,1,0,2,1,1,0,0,4,0,0,0,0,1,0,1,0,0,0,3,8,0,0,0,5,0,3,2,0,15,5,3,1,0,7,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,3,1,0,0,1,1,0,8,0,0,0,0,0,0,6,0,0,5,0,0,0,0,0,2,0,0,0,0,5,3,0,2,4,7,0,0,0,0,0,0,0,0,1,0,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,2,0,2,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,1,1,0,0,0,3,0,2,0,0,0,0,0,3,16,0,0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,10,0,0,0,1,0,0,5,0,0,11,0,0,0,0,0,0,0,0,0,0,9,1,0,0,5,0,0,1,0,0,5,0,1,0,0,0,0,1,0,0,47,0,0,2,9,0,28,0,0,1,0,1,1,0,6,0,2,0,0,0,0,0,0,0,1,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,2,0,0,0,0,12,0,0,0,5,0,0,1,0,0,3,0,0,9,0,0,10,0,0,0,0,0,0,0,4,14,2,0,0,0,0,2,0,0,1,1,6,0,4,1,0,0,0,6,4,18,0,0,2,0,0,0,0,1,0,0,4,1,1,1,0,0,19,0,1,0,0,0,0,0,0,0,0,0,0,1,2,0,0,0,0,0,0,0,0,1,3,0,0,0,4,0,0,0,0,23,1,0,3,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,14,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,5,0,0,7,0,0,0,4,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,6,0,0,0,0,0,2,0,0,1,0,8,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,2,2,0,0,1,0,1,0,0,0,0,0,0,0,1,3,0,0,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,3,0,0,1,1,1,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,0,0,0,2,4,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,1,7,3,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,4,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,12,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,50,1,5,0,0,0,0,0,0,2,2,0,0,1,2,1,13,2,0,0,0,0,0,0,0,18,0,1,0,0,0,0,0,0,1,0,0,4,0,0,0,0,0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,1,0,0,4,0,0,3,0,0,0,0,0,0,0,0,0,0,7,40,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,3,1,11,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\n+Fn = ecdf(a)\n+max_x = round(log(max(knots(Fn)),2),0)\n+xx = c(0,2^seq(0,max_x,by=2))\n+y=Fn(xx)\n+xlog = log(xx[2:length(xx)],base=2)\n+plot(x=c(-1,xlog),y=y,xaxt = \'n\',type="b",col="blue",pch=20,xlab="Number of Reads",ylab="Cumulative proportion of Genes")\n+axis(1,at = c(-1,seq(0,max_x,by=2)),labels=c(0,2^seq(0,max_x,by=2)))\n+dev.state = dev.off()\n\\ No newline at end of file\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.clipping_profile.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.clipping_profile.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.clipping_profile.png",width=500,height=500,units="px") +read_pos=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99) +count=c(9897,8881,8055,7532,7071,6644,6233,5790,5380,4983,4673,4419,4164,3892,3642,3403,3180,2938,2701,2456,2234,2000,1788,1577,1358,1179,998,799,620,447,306,184,92,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,77,152,275,404,546,700,889,1068,1241,1434,1632,1854,2085,2269,2480,2702,2931,3160,3397,3636,3894,4147,4415,4708,5084,5476,5908,6362,6820,7294,7783,8595,9576) +plot(read_pos,1-(count/80603),pch=20,xlab="Position of reads",ylab="Mappability",col="blue") +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.clipping_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.clipping_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,101 @@ +Position Read_Total Read_clipped +0 80603 9897 +1 80603 8881 +2 80603 8055 +3 80603 7532 +4 80603 7071 +5 80603 6644 +6 80603 6233 +7 80603 5790 +8 80603 5380 +9 80603 4983 +10 80603 4673 +11 80603 4419 +12 80603 4164 +13 80603 3892 +14 80603 3642 +15 80603 3403 +16 80603 3180 +17 80603 2938 +18 80603 2701 +19 80603 2456 +20 80603 2234 +21 80603 2000 +22 80603 1788 +23 80603 1577 +24 80603 1358 +25 80603 1179 +26 80603 998 +27 80603 799 +28 80603 620 +29 80603 447 +30 80603 306 +31 80603 184 +32 80603 92 +33 80603 0 +34 80603 0 +35 80603 0 +36 80603 0 +37 80603 0 +38 80603 0 +39 80603 0 +40 80603 0 +41 80603 0 +42 80603 0 +43 80603 0 +44 80603 0 +45 80603 0 +46 80603 0 +47 80603 0 +48 80603 0 +49 80603 0 +50 80603 0 +51 80603 0 +52 80603 0 +53 80603 0 +54 80603 0 +55 80603 0 +56 80603 0 +57 80603 0 +58 80603 0 +59 80603 0 +60 80603 0 +61 80603 0 +62 80603 0 +63 80603 0 +64 80603 0 +65 80603 0 +66 80603 0 +67 80603 77 +68 80603 152 +69 80603 275 +70 80603 404 +71 80603 546 +72 80603 700 +73 80603 889 +74 80603 1068 +75 80603 1241 +76 80603 1434 +77 80603 1632 +78 80603 1854 +79 80603 2085 +80 80603 2269 +81 80603 2480 +82 80603 2702 +83 80603 2931 +84 80603 3160 +85 80603 3397 +86 80603 3636 +87 80603 3894 +88 80603 4147 +89 80603 4415 +90 80603 4708 +91 80603 5084 +92 80603 5476 +93 80603 5908 +94 80603 6362 +95 80603 6820 +96 80603 7294 +97 80603 7783 +98 80603 8595 +99 80603 9576 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.geneAbundance.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.geneAbundance.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| b'@@ -0,0 +1,27321 @@\n+gene\tCounts\n+AADACL3\t0\n+AADACL4\t0\n+ABCD3\t4\n+ACADM\t0\n+ACKR1\t0\n+ACOT11\t0\n+ACTG1P4\t0\n+ACTL8\t166\n+ACTN2\t0\n+ACTRT2\t0\n+ADAM15\t0\n+ADAMTSL4\t0\n+ADCK3\t0\n+ADORA1\t0\n+ADPRHL2\t0\n+AGL\t0\n+AGO1\t3\n+AGO3\t0\n+AGO4\t0\n+AGRN\t0\n+AGTRAP\t1\n+AHCYL1\t1\n+AJAP1\t0\n+AK4\t2\n+AK5\t0\n+AKIRIN1\t2\n+AKR1A1\t0\n+AKR7A2P1\t0\n+ALG6\t0\n+ALPL\t0\n+AMPD2\t1\n+AMY1A\t0\n+AMY1B\t0\n+AMY1C\t0\n+AMY2A\t0\n+AMY2B\t0\n+ANGPTL3\t0\n+ANGPTL7\t0\n+ANKRD34A\t0\n+ANKRD35\t0\n+ANXA9\t0\n+AP4B1-AS1\t0\n+APCS\t0\n+APITD1\t6\n+APITD1-CORT\t2\n+APOA1BP\t0\n+AQP10\t0\n+ARF1\t31\n+ARHGEF10L\t0\n+ARHGEF16\t1\n+ARID1A\t9\n+ARTN\t0\n+ARV1\t0\n+ASH1L-AS1\t0\n+ATAD3A\t0\n+ATAD3B\t0\n+ATAD3C\t1\n+ATF3\t0\n+ATF6\t3\n+ATG4C\t1\n+ATP1A1\t14\n+ATP1A2\t0\n+ATP1A4\t0\n+ATP1B1\t10\n+ATP2B4\t10\n+ATP5F1\t2\n+ATP6V0B\t2\n+ATP8B2\t0\n+ATPIF1\t0\n+ATXN7L2\t0\n+AVPR1B\t0\n+AXDND1\t0\n+AZIN2\t1\n+B3GALT6\t0\n+B4GALT2\t0\n+BCAN\t0\n+BCL9\t4\n+BECN1P1\t0\n+BGLAP\t0\n+BLZF1\t0\n+BMP8A\t0\n+BNIPL\t0\n+BOLA1\t0\n+BRDT\t6\n+BRINP2\t0\n+BROX\t2\n+BSND\t0\n+BTBD19\t0\n+BTBD8\t1\n+BTF3L4\t0\n+BTG2\t0\n+C1QA\t0\n+C1QB\t0\n+C1QC\t0\n+C1orf100\t0\n+C1orf101\t0\n+C1orf105\t0\n+C1orf106\t0\n+C1orf112\t0\n+C1orf115\t0\n+C1orf122\t0\n+C1orf137\t0\n+C1orf146\t0\n+C1orf158\t0\n+C1orf162\t0\n+C1orf167\t0\n+C1orf177\t0\n+C1orf185\t0\n+C1orf21\t2\n+C1orf213\t0\n+C1orf220\t0\n+C1orf226\t5\n+C1orf228\t0\n+C1orf27\t0\n+C1orf50\t1\n+C1orf53\t0\n+C1orf54\t0\n+C1orf56\t2\n+C1orf64\t0\n+C1orf68\t0\n+C1orf94\t0\n+C1orf95\t0\n+C4BPA\t0\n+C4BPB\t0\n+C8A\t0\n+CA14\t1\n+CA6\t0\n+CACHD1\t0\n+CACNA1E\t0\n+CACYBP\t4\n+CADM3\t0\n+CALML6\t0\n+CAMK1G\t0\n+CAMSAP2\t0\n+CAMTA1\t1\n+CAP1\t2\n+CAPN2\t0\n+CAPN9\t0\n+CAPZA1\t6\n+CASQ1\t0\n+CATSPER4\t0\n+CCDC18\t2\n+CCDC185\t0\n+CCDC24\t0\n+CCDC27\t10\n+CCDC28B\t0\n+CCDC30\t0\n+CD101\t0\n+CD1A\t0\n+CD1C\t0\n+CD1D\t0\n+CD1E\t0\n+CD2\t0\n+CD46\t0\n+CD52\t0\n+CD53\t0\n+CD55\t2\n+CDA\t2\n+CDC14A\t0\n+CDC20\t4\n+CDC42\t5\n+CDC7\t5\n+CDC73\t0\n+CDCA8\t16\n+CDK18\t0\n+CDKN2C\t0\n+CELA2A\t0\n+CELA2B\t0\n+CELA3A\t0\n+CELA3B\t0\n+CELSR2\t0\n+CENPF\t4\n+CEP350\t5\n+CEP85\t13\n+CEPT1\t2\n+CFH\t0\n+CFHR1\t0\n+CFHR2\t0\n+CFHR3\t0\n+CFHR4\t0\n+CFHR5\t0\n+CGN\t4\n+CHD1L\t3\n+CHI3L2\t0\n+CHIA\t0\n+CHIAP2\t0\n+CHRM3\t0\n+CHRNB2\t0\n+CHTOP\t2\n+CIART\t6\n+CKS1B\t6\n+CLCA1\t0\n+CLCA2\t0\n+CLCA3P\t0\n+CLCA4\t0\n+CLCN6\t0\n+CLCNKA\t0\n+CLCNKB\t0\n+CLIC4\t3\n+CMPK1\t0\n+CNIH3\t0\n+CNIH4\t1\n+CNKSR1\t0\n+CNST\t7\n+CNTN2\t0\n+COA6\t1\n+COG2\t0\n+CORT\t0\n+COX20\t0\n+CPT2\t1\n+CPTP\t2\n+CR1\t0\n+CR1L\t0\n+CR2\t6\n+CRB1\t0\n+CRCT1\t0\n+CREB3L4\t0\n+CROCC\t0\n+CSF1\t0\n+CSMD2-AS1\t0\n+CTH\t0\n+CTPS1\t1\n+CTRC\t0\n+CTSE\t0\n+CTTNBP2NL\t8\n+CYB561D1\t0\n+CYCSP52\t0\n+CYMP\t0\n+CYP4A22\t0\n+CYP4B1\t0\n+CYP4X1\t0\n+CYP4Z1\t0\n+CYR61\t0\n+DAB1-AS1\t0\n+DAP3\t2\n+DARS2\t0\n+DCAF6\t6\n+DCDC2B\t0\n+DCLRE1B\t1\n+DCST1\t0\n+DDI2\t0\n+DDR2\t0\n+DDX11L1\t0\n+DDX20\t2\n+DEGS1\t0\n+DEPDC1-AS1\t0\n+DESI2\t2\n+DFFB\t0\n+DHDDS\t0\n+DHX9\t0\n+DIEXF\t0\n+DIO1\t0\n+DISC1\t1\n+DISP1\t2\n+DLEU2L\t0\n+DMAP1\t0\n+DMBX1\t0\n+DMRTB1\t4\n+DNAH14\t1\n+DNAJA1P5\t0\n+DNAJB4\t1\n+DNAJC16\t1\n+DNAJC6\t0\n+DNALI1\t0\n+DNASE2B\t0\n+DNM3\t0\n+DPH2\t1\n+DPYD-AS1\t0\n+DPYD-AS2\t0\n+DR1\t3\n+DRAXIN\t0\n+DRD5P2\t0\n+DTL\t21\n+DUSP12\t0\n+DUSP23\t0\n+DUSP27\t0\n+DUSP5P1\t0\n+DYRK3\t0\n+ECM1\t0\n+EDARADD\t0\n+EFCAB14-AS1\t0\n+EFCAB2\t1\n+EFCAB7\t1\n+EFHD2\t0\n+EFNA1\t0\n+EFNA3\t0\n+EFNA4\t0\n+EIF3I\t0\n+ELAVL4\t0\n+ELF3\t0\n+EMBP1\t0\n+ENO1-AS1\t0\n+EPB41\t1\n+EPHA8\t1\n+EPHB2\t0\n+EPHX1\t0\n+EPHX4\t0\n+ERICH3-AS1\t0\n+ERMAP\t1\n+ESPN\t0\n+EXO1\t3\n+EXO5\t2\n+EXTL1\t0\n+FAAH\t0\n+FAAHP1\t0\n+FAM102B\t0\n+FAM110D\t0\n+FAM159A\t0\n+FAM163A\t0\n+FAM167B\t0\n+FAM177B\t0\n+FAM183A\t0\n+FAM19A3\t0\n+FAM20B\t0\n+FAM212B-AS1\t0\n+FAM213B\t0\n+FAM231A\t0\n+FAM231D\t0\n+FAM43B\t0\n+FAM46C\t147\n+FAM71A\t0\n+FAM72A\t0\n+FAM72B\t0\n+FAM72C\t0\n+FAM73A\t0\n+FAM76A\t1\n+FAM87B\t0\n+FASLG\t0\n+FBLIM1\t2\n+FBXO28\t2\n+FBXO44\t0\n+FBXO6\t0\n+FCER1A\t0\n+FCER1G\t0\n+FCGR1A\t0\n+FCGR1C\t0\n+FCGR2A\t0\n+FCGR2B\t0\n+FCGR2C\t0\n+FCRL6\t0\n+FCRLA\t0\n+FCRLB\t0\n+FDPS\t0\n+FGGY\t1\n+FHAD1\t0\n+FLAD1\t1\n+FLG-AS1\t0\n+FLJ23867\t0\n+FLJ31662\t0\n+FLVCR1\t0\n+FMN2\t1\n+FMO1\t0\n+FMO2\t4\n+FMO3\t1\n+FMO4\t0\n+FMO6P\t0\n+FMO9P\t0\n+FNBP1L\t3\n+FNDC7\t0\n+FOXD2\t0\n+FOXD3\t0\n+FOXE3\t0\n+FOXO6\t1\n+FPGT\t0\n+FPGT-TNNI3K\t0\n+G0S2\t0\n+GABPB2\t0\n+GABRD\t0\n+GADD45A\t1\n+GALNT2\t0\n+GAS5-AS1\t0\n+GBP1P1\t0\n+GBP6\t0\n+GCSAML\t0\n+GGPS1\t0\n+GIPC2\t1\n+GJA4\t0\n+GJA8\t0\n+GJB3\t0\n+GJB4\t0\n+GJB5\t0\n+GJC2\t0\n+GM140\t0\n+GMEB1\t2\n+GNAI3\t5\n+GNG12-AS1\t0\n+GNPAT\t4\n+GORAB\t0\n+GPR137B\t5\n+GPR25\t0\n+GPR3\t0\n+GPR37L1\t0\n+GPR52\t0\n+GPR61\t0\n+GPR88\t0\n+GPR89B\t0\n+GPSM2\t0\n+GPX7\t0\n+GRHL3\t2\n+GSTM1\t0\n+GSTM2\t0\n+GSTM4\t0\n+GSTM5\t0\n+GUCA2B\t0\n+GUK1\t1\n+H3F3A\t0\n+H3F3AP4\t0\n+H6PD\t0\n+HAO2\t0\n+HAO2-IT1\t0\n+HAPLN2\t0\n+H'..b'+LOC401585\t0\n+LOC643486\t0\n+LOC729609\t0\n+LRCH2\t0\n+MAGEA1\t0\n+MAGEA10\t0\n+MAGEA10-MAGEA5\t0\n+MAGEA12\t0\n+MAGEA2\t0\n+MAGEA2B\t0\n+MAGEA3\t0\n+MAGEA5\t0\n+MAGEA8-AS1\t0\n+MAGEA9\t0\n+MAGEA9B\t0\n+MAGEC2\t0\n+MAGED4\t0\n+MAGED4B\t0\n+MAGEE2\t0\n+MAGT1\t0\n+MAOB\t0\n+MAP2K4P1\t0\n+MAP3K15\t0\n+MAP7D2\t4\n+MAP7D3\t0\n+MBNL3\t0\n+MCF2\t0\n+MECP2\t2\n+MED14\t4\n+MID1\t2\n+MID1IP1-AS1\t0\n+MIR105-1\t0\n+MIR105-2\t0\n+MIR106A\t0\n+MIR1184-1\t0\n+MIR1184-2\t0\n+MIR1184-3\t0\n+MIR1256\t0\n+MIR1468\t0\n+MIR18B\t0\n+MIR19B2\t0\n+MIR20B\t0\n+MIR221\t0\n+MIR222\t0\n+MIR23C\t0\n+MIR3202-2\t0\n+MIR320D2\t0\n+MIR325HG\t0\n+MIR363\t0\n+MIR374A\t0\n+MIR374B\t0\n+MIR384\t0\n+MIR421\t0\n+MIR424\t0\n+MIR4328\t0\n+MIR4329\t0\n+MIR450A1\t0\n+MIR450A2\t0\n+MIR450B\t0\n+MIR452\t0\n+MIR4770\t0\n+MIR503\t0\n+MIR503HG\t0\n+MIR504\t0\n+MIR505\t0\n+MIR506\t0\n+MIR507\t0\n+MIR508\t0\n+MIR509-1\t0\n+MIR509-2\t0\n+MIR509-3\t0\n+MIR510\t0\n+MIR514A1\t0\n+MIR514A2\t0\n+MIR514A3\t0\n+MIR514B\t0\n+MIR542\t0\n+MIR545\t0\n+MIR6134\t0\n+MIR6857\t0\n+MIR6894\t0\n+MIR6895\t0\n+MIR718\t0\n+MIR766\t0\n+MIR767\t0\n+MIR8088\t0\n+MIR888\t0\n+MIR890\t0\n+MIR891A\t0\n+MIR891B\t0\n+MIR892A\t0\n+MIR892B\t0\n+MIR892C\t0\n+MIR92A2\t0\n+MIR98\t0\n+MIRLET7F2\t0\n+MMGT1\t10\n+MORC4\t1\n+MORF4L2\t7\n+MOSPD1\t3\n+MPP1\t0\n+MTCP1\t0\n+MTMR8\t0\n+MTRNR2L10\t0\n+MXRA5\t0\n+NAA10\t0\n+NAP1L2\t0\n+NAP1L3\t0\n+NAP1L6\t0\n+NDP\t0\n+NDUFB11\t0\n+NHS-AS1\t0\n+NKAP\t1\n+NKAPP1\t0\n+NKRF\t0\n+NLGN4X\t1\n+NOX1\t0\n+NR0B1\t0\n+NUDT11\t4\n+NUP62CL\t0\n+NXF2\t0\n+NXF2B\t0\n+NXF3\t0\n+NXF5\t0\n+OPHN1\t0\n+OTUD5\t1\n+P2RY4\t0\n+P2RY8\t0\n+PABPC1L2A\t0\n+PABPC1L2B-AS1\t0\n+PABPC5-AS1\t0\n+PAGE1\t0\n+PAGE3\t0\n+PCDH19\t0\n+PCSK1N\t0\n+PCYT1B\t0\n+PDZD11\t0\n+PDZD4\t0\n+PFKFB1\t0\n+PGAM4\t0\n+PHEX-AS1\t0\n+PHF8\t12\n+PHKA1\t0\n+PHKA2\t0\n+PIGA\t1\n+PIM2\t0\n+PIR\t0\n+PIR-FIGF\t0\n+PJA1\t0\n+PLAC1\t0\n+PLS3-AS1\t0\n+PNCK\t0\n+PNMA5\t0\n+PNPLA4\t0\n+POF1B\t0\n+PPEF1-AS1\t0\n+PPP1R2P9\t0\n+PPP2R3B\t0\n+PRAF2\t0\n+PRICKLE3\t0\n+PRKX\t0\n+PSMD10\t1\n+PTCHD1-AS\t0\n+RAB39B\t0\n+RAB40A\t0\n+RAB9B\t0\n+RAI2\t0\n+RAP2C\t1\n+RBBP7\t50\n+RBM41\t1\n+RBMX\t5\n+RENBP\t0\n+RGAG4\t0\n+RHOXF1\t0\n+RHOXF2\t0\n+RHOXF2B\t0\n+RIPPLY1\t0\n+RLIM\t2\n+RNF113A\t2\n+RNU6-2\t0\n+RNU6-28P\t0\n+RP11-87M18.2\t1\n+RPGR\t2\n+RPL39\t1\n+RPS4X\t13\n+RPS6KA3\t2\n+RPS6KA6\t0\n+RS1\t0\n+SATL1\t0\n+SCARNA9L\t0\n+SCML2\t0\n+SEPT6\t0\n+SERPINA7\t0\n+SH3KBP1\t18\n+SHROOM4\t0\n+SLC10A3\t1\n+SLC25A5-AS1\t0\n+SLC25A53\t0\n+SLC25A6\t0\n+SLC35A2\t0\n+SLC38A5\t0\n+SLC7A3\t0\n+SLC9A7\t1\n+SLITRK4\t0\n+SMARCA1\t0\n+SMC1A\t4\n+SMEK3P\t0\n+SMIM9\t0\n+SMPX\t0\n+SNORA11D\t0\n+SNORA11E\t0\n+SNORA69\t0\n+SNORD61\t0\n+SNORD96B\t0\n+SNX12\t5\n+SOX3\t0\n+SPANXA1\t0\n+SPANXA2\t0\n+SPANXC\t0\n+SPANXD\t0\n+SPANXN2\t0\n+SPANXN3\t0\n+SPANXN5\t0\n+SPIN2A\t0\n+SPIN2B\t0\n+SPIN3\t2\n+SPIN4\t0\n+SRPX\t0\n+SSX2\t0\n+SSX2B\t0\n+SSX3\t0\n+SSX4\t0\n+SSX4B\t0\n+SSX5\t0\n+SSX7\t0\n+SSX9\t0\n+SUPT20HL2\t1\n+SYN1\t0\n+SYP\t0\n+SYTL4\t4\n+TAB3\t0\n+TAF7L\t0\n+TAF9B\t3\n+TCEAL5\t0\n+TCEAL6\t0\n+TCEAL8\t0\n+TCP11X2\t0\n+TENM1\t0\n+TEX11\t0\n+TEX13A\t0\n+TEX13B\t0\n+TEX28\t0\n+TFDP3\t0\n+TFE3\t7\n+THOC2\t40\n+TIMM17B\t0\n+TIMM8A\t0\n+TLR8-AS1\t1\n+TMEM185A\t0\n+TMEM255A\t0\n+TMEM27\t0\n+TMEM47\t0\n+TMLHE\t0\n+TMLHE-AS1\t0\n+TMSB15A\t0\n+TRAPPC2\t0\n+TREX2\t0\n+TRMT2B\t0\n+TRPC5\t0\n+TSC22D3\t0\n+TSPAN6\t0\n+TTC3P1\t0\n+UBL4A\t0\n+UPF3B\t2\n+UQCRBP1\t0\n+USP26\t0\n+USP27X-AS1\t0\n+USP51\t0\n+UXT\t0\n+VCX2\t0\n+VCX3A\t0\n+VSIG4\t0\n+WDR45\t0\n+WNK3\t0\n+XAGE1B\t0\n+XAGE1E\t0\n+XAGE2\t0\n+XAGE3\t0\n+XIST\t0\n+XKRX\t0\n+XRCC6P5\t0\n+ZBED1\t0\n+ZC4H2\t0\n+ZCCHC5\t0\n+ZDHHC15\t0\n+ZDHHC9\t3\n+ZFX-AS1\t0\n+ZMAT1\t0\n+ZMYM3\t3\n+ZNF182\t1\n+ZNF280C\t11\n+ZNF41\t0\n+ZNF630\t0\n+ZNF674\t0\n+ZNF75D\t0\n+ZXDA\t0\n+AMELY\t0\n+ASMTL\t0\n+BCORP1\t0\n+BPY2\t0\n+BPY2B\t0\n+BPY2C\t0\n+CD24\t0\n+CDY1\t0\n+CDY1B\t0\n+CDY2A\t0\n+CDY2B\t0\n+CRLF2\t0\n+CSPG4P1Y\t0\n+DAZ1\t0\n+DAZ3\t0\n+DAZ4\t0\n+DDX11L16\t0\n+DHRSX\t0\n+FAM197Y2\t0\n+FAM197Y5\t0\n+FAM224A\t0\n+FAM224B\t0\n+FAM41AY1\t0\n+FAM41AY2\t0\n+GOLGA2P2Y\t0\n+GOLGA2P3Y\t0\n+GTPBP6\t0\n+GYG2P1\t0\n+HSFY1\t0\n+HSFY2\t0\n+KDM5D\t0\n+LINC00280\t0\n+LOC101929148\t0\n+NCRNA00185\t0\n+NLGN4Y-AS1\t0\n+P2RY8\t0\n+PPP2R3B\t0\n+PRORY\t0\n+PRY\t0\n+PRY2\t0\n+RBMY1A1\t0\n+RBMY1A3P\t0\n+RBMY1B\t0\n+RBMY1D\t0\n+RBMY1E\t0\n+RBMY1F\t0\n+RBMY1J\t0\n+RBMY2EP\t0\n+RBMY3AP\t0\n+SLC25A6\t0\n+SRY\t0\n+TTTY1\t0\n+TTTY10\t0\n+TTTY11\t0\n+TTTY13\t0\n+TTTY14\t0\n+TTTY16\t0\n+TTTY17A\t0\n+TTTY17B\t0\n+TTTY17C\t0\n+TTTY18\t0\n+TTTY1B\t0\n+TTTY2\t0\n+TTTY20\t0\n+TTTY21\t0\n+TTTY21B\t0\n+TTTY23\t0\n+TTTY23B\t0\n+TTTY2B\t0\n+TTTY3\t0\n+TTTY3B\t0\n+TTTY4\t0\n+TTTY4B\t0\n+TTTY4C\t0\n+TTTY5\t0\n+TTTY6\t0\n+TTTY6B\t0\n+TTTY7\t0\n+TTTY7B\t0\n+TTTY8\t0\n+TTTY8B\t0\n+TTTY9A\t0\n+TTTY9B\t0\n+UTY\t0\n+VCY\t0\n+VCY1B\t0\n+XKRY\t0\n+XKRY2\t0\n+ZBED1\t0\n' |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.geneBodyCoverage.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.geneBodyCoverage.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,104 @@ +Total reads: 80603 +Fragment number: 45981 +percentile count +0 172 +1 322 +2 489 +3 620 +4 693 +5 723 +6 766 +7 838 +8 861 +9 939 +10 1020 +11 1028 +12 988 +13 1055 +14 1122 +15 1087 +16 1031 +17 1082 +18 1111 +19 1119 +20 1184 +21 1144 +22 1167 +23 1176 +24 1145 +25 1143 +26 1129 +27 1185 +28 1192 +29 1207 +30 1225 +31 1185 +32 1213 +33 1241 +34 1268 +35 1313 +36 1240 +37 1204 +38 1190 +39 1235 +40 1269 +41 1295 +42 1328 +43 1308 +44 1229 +45 1220 +46 1252 +47 1273 +48 1272 +49 1295 +50 1283 +51 1335 +52 1335 +53 1334 +54 1377 +55 1385 +56 1434 +57 1411 +58 1407 +59 1424 +60 1420 +61 1450 +62 1458 +63 1483 +64 1511 +65 1560 +66 1583 +67 1627 +68 1669 +69 1710 +70 1777 +71 1792 +72 1812 +73 1825 +74 1802 +75 1862 +76 1857 +77 1900 +78 1880 +79 1874 +80 1867 +81 1859 +82 1869 +83 1859 +84 1851 +85 1897 +86 1883 +87 1971 +88 2030 +89 1992 +90 1971 +91 2017 +92 2030 +93 1983 +94 1995 +95 1959 +96 1896 +97 1787 +98 1461 +99 980 +100 288 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.geneBodyCoverage_plot.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.geneBodyCoverage_plot.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,6 @@ +png('/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.geneBodyCoverage.png',width=500,height=500,units='px') +x=c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100) +y=c(172,322,489,620,693,723,766,838,861,939,1020,1028,988,1055,1122,1087,1031,1082,1111,1119,1184,1144,1167,1176,1145,1143,1129,1185,1192,1207,1225,1185,1213,1241,1268,1313,1240,1204,1190,1235,1269,1295,1328,1308,1229,1220,1252,1273,1272,1295,1283,1335,1335,1334,1377,1385,1434,1411,1407,1424,1420,1450,1458,1483,1511,1560,1583,1627,1669,1710,1777,1792,1812,1825,1802,1862,1857,1900,1880,1874,1867,1859,1869,1859,1851,1897,1883,1971,2030,1992,1971,2017,2030,1983,1995,1959,1896,1787,1461,980,288) +smoothsp = smooth.spline(x,y,spar=0.35) +plot(smoothsp,type="l",col="blue",xlab="Percentile of Gene Body (5'->3')",ylab="Number of read",xlim=c(0,100)) +dev.state = dev.off() \ No newline at end of file |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.mapq_profile.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.mapq_profile.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,12 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.mapq_profile.png",width=500,height=500,units="px") +mapq_val=c(0,1,3,255) +mapq_count=c(525,2546,8143,69389) +xname=c("<3","<10","<20","<30","30-255") +freq = rep(0,5) +freq[1] = sum(mapq_count[which(mapq_val<3)])/80603*100 +freq[2] = sum(mapq_count[which(mapq_val<10)])/80603*100 +freq[3] = sum(mapq_count[which(mapq_val<20)])/80603*100 +freq[4] = sum(mapq_count[which(mapq_val<30)])/80603*100 +freq[5] = 100 +barplot(freq,beside=T,xlab="Mapping Quality",border="NA",space=1.5,main="Mapping Quality",ylim=c(0,100),ylab="Cumulative proportion (%)",col="blue",names.arg=xname) +dev.state=dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.mapq_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.mapq_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,5 @@ +MAPQ Read_Total Read_with_mapq +0 80603 525 +1 80603 2546 +3 80603 8143 +255 80603 69389 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.read_distr.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.read_distr.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,6 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.read_distr.png",width=500,height=500,units="px") +M=c(22143,1293,15499,7798,156,511,5939,21398) +Mname=c("CDS","5UTR","3UTR","Intron","TSS_Up_1Kb","TES_Down_1Kb","rRNA","Others") +val = barplot(M,xlab="",space=1,ylab="Read Counts",col="blue",border="NA") +text(x=seq(val[1],val[8],by=2),y=rep(0,8),srt=60,adj=0,offset=2,pos=1,xpd=T,labels=Mname) +dev.state = dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.read_distr_pie.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.read_distr_pie.r Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,3 @@ +png("/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.read_distr_pie.png",width=500,height=500,units="px") +pie(c(32831,510804),labels=c("Covered 32831 exons","Uncovered"),main="Exons",radius=0.6,clockwise=T,col=c("blue","white")) +dev.state = dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.readlen_profile.xls --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.readlen_profile.xls Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,35 @@ +Position Read_Total Read_Len_mapped +67 80603 182 +68 80603 177 +69 80603 265 +70 80603 292 +71 80603 322 +72 80603 337 +73 80603 407 +74 80603 384 +75 80603 377 +76 80603 408 +77 80603 421 +78 80603 433 +79 80603 465 +80 80603 412 +81 80603 463 +82 80603 458 +83 80603 469 +84 80603 448 +85 80603 457 +86 80603 476 +87 80603 520 +88 80603 507 +89 80603 519 +90 80603 576 +91 80603 723 +92 80603 741 +93 80603 800 +94 80603 824 +95 80603 805 +96 80603 847 +97 80603 931 +98 80603 1476 +99 80603 1885 +100 80603 50582 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp2.res.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp2.res.txt Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,49 @@ +filename /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp2.res.txt +is_pairEnd 0 +clipping_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.clipping_profile.png +mapq_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.mapq_profile.png +mapq_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp2.mapq_profile.xls +read_cov_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.geneBodyCoverage.png +trans_cov_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.TransCoverage.png +insert_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.inner_distance_plot.png +insert_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp2.inner_distance_freq.txt +read_dist_plot_file1 /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.read_distr.png +read_dist_plot_file2 /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.read_distr_pie.png +readLen_plot_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp2.readlen_profile.png +geneCount_file /sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/data/smp2.geneAbundance.txt +seqDeDup_percent 0.000000 +posDeDup_percent 0.000000 +no_clipping 0 +no_rRNA 0 +total_reads 80603 +uniq_mapped_reads 69389 +multi_mapped_reads 11214 +unmapped_reads 0 +low_qual 0 +low_qual_read1 0 +low_qual_read2 0 +pcr_dup 0 +rRNA_read 5939 +cds_read 22143 +utr5_read 1293 +utr3_read 15499 +intron_read 7798 +itgup1k_read 156 +itgdn1k_read 511 +itg_read 21398 +unmapped_read1 0 +unmapped_read2 0 +mapped_read1 0 +mapped_read2 0 +forward_read 34065 +reverse_read 35324 +paired_reads 0 +mapped_plus_minus 0 +mapped_plus_plus 0 +mapped_minus_plus 0 +mapped_minus_minus 0 +ins_read 0 +del_read 0 +noSplice 56765 +splice 12624 +paired_diff_chrom 0 |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/data/smp_correlation.r --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/data/smp_correlation.r Thu Mar 24 17:12:52 2016 -0400 |
| [ |
| @@ -0,0 +1,78 @@ +library(corrplot) +srcfiles = c("test1/data/smp0.geneAbundance.txt","test1/data/smp1.geneAbundance.txt","test1/data/smp2.geneAbundance.txt") +destfile = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_corr.png" +f1 = read.delim(srcfiles[1],header=T) +MM=matrix(nrow=length(f1[,1]),ncol=length(srcfiles)) +rownames(MM)=f1[,1] +MM[,1]=f1[,2] +for (i in 2:length(srcfiles)){ + f = read.delim(srcfiles[i],header=T) + MM[,i] = f[,2] } +colnames(MM)=c("smp0","smp1","smp2") +libSize<-colSums(MM) +MM<-t(t(MM)*1000000/libSize) +ss<-rowSums(MM) +M1<-MM[ss>0,] +MM_s<-t(scale(t(M1))) +M.cor<-cor(MM_s,method='sp') +M.cor[is.na(M.cor)]<- 0 +png(destfile,width=500,height=500,units='px') +corrplot(M.cor,is.corr=T,order='FPC',method='color',type='full',add=F,diag=T) +dev.state = dev.off() +nz_genes = length(M1[,1]) +destfile = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_reproducibility.png" +if(nz_genes >0) { +png(destfile,width=500,height=500,units='px') +nz_gene_mm = rep(0,length(M1[1,])) +for(i in 1:length(M1[1,])) { +nz_gene_mm[i] = length(which(M1[,i]>0))/nz_genes * 100 } +bplt <- barplot(nz_gene_mm,beside=T,border='NA',space=1.5,ylim=c(0,100),ylab='Genes reproducibly detected (%)',col='blue',names.arg=colnames(MM)) +text(y= nz_gene_mm+2, x= bplt, labels=paste(as.character(round(nz_gene_mm,digits=1)),'%',sep=''), xpd=TRUE) +dev.state = dev.off()} +destfile = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_var.png" +png(destfile,width=500,height=500,units='px') +mad = rep(0,length(M1[,1])) +nz_gene_median = rep(0,length(M1[,1])) +for(i in 1:length(M1[,1])) { +nz_gene_median[i] = median(M1[i,]) +mad[i] = median(abs(M1[i,]-nz_gene_median[i])) } +mad2 = mad[nz_gene_median >0] +nz_gene_median2 = nz_gene_median[nz_gene_median>0] +mad_vs_median = mad2/nz_gene_median2 +nz_gene_median3 = log(nz_gene_median2, base=2) +dd<-data.frame(nz_gene_median3,mad_vs_median) +x = densCols(nz_gene_median3,mad_vs_median, colramp=colorRampPalette(c('black', 'white'))) +dd$dens <- col2rgb(x)[1,] + 1L +cols <- colorRampPalette(c("#000099", "#00FEFF", "#45FE4F", "#FCFF00", "#FF9400", "#FF3100"))(256) +dd$col <- cols[dd$dens] +plot(mad_vs_median ~ nz_gene_median3,data=dd[order(dd$dens),], col=col, pch=20,xlab="Gene expression (median RPM log2)",ylab="Median absolute deviation/median") +dev.state = dev.off() +destfile = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_cov.png" +png(destfile,width=500,height=500,units='px') +xname=c("<0.5","0.5-10","10-100",">=100") +Fn_mm = matrix(0,nrow=length(xname),ncol=length(M1[1,])) +rownames(Fn_mm) = xname +colnames(Fn_mm) = c("smp0","smp1","smp2") +for(i in 1:length(M1[1,])) { +Fn_mm[1,i] = length(which(M1[,i]<0.5)) +Fn_mm[2,i] = length(which(M1[,i]>=0.5 & M1[,i]<10)) +Fn_mm[3,i] = length(which(M1[,i]>=10 & M1[,i]<100)) +Fn_mm[4,i] = length(which(M1[,i]>=100)) } +barplot(Fn_mm,main="Gene abundance (RPM)",xlab="Sample",ylab="Frequency",col=c("green","blue","red","yellow"),legend=xname) +dev.state = dev.off() +destfile3 = "/sonas-hs/bsr/hpc/data/yjin/test_BAMqc/exp/test1/figs/smp_qual.png" +srcfiles3 = c("test1/data/smp0.mapq_profile.xls","test1/data/smp1.mapq_profile.xls","test1/data/smp2.mapq_profile.xls") +png(destfile3,width=500,height=500,units='px') +xname=c("<3","3-10","10-20","20-30",">=30") +Fn_mm = matrix(0,nrow=length(xname),ncol=length(srcfiles3)) +rownames(Fn_mm) = xname +colnames(Fn_mm) = c("smp0","smp1","smp2") +for(i in 1:length(srcfiles3)) { + f = read.delim(srcfiles3[i],header=T) + if(length(which(f[,1]<3)) >0){ Fn_mm[1,i] = sum(f[which(f[,1]<3),3])/f[1,2]} +if(length(which(f[,1]>=3 & f[,1]<10)) >0) {Fn_mm[2,i] = sum(f[which(f[,1]<10 & f[,1]>=3),3])/f[1,2]} +if(length(which(f[,1]>=10 & f[,1]<20)) >0) {Fn_mm[3,i] = sum(f[which(f[,1]<20 & f[,1]>=10),3])/f[1,2] } +if(length(which(f[,1]>=20 & f[,1]<30)) >0) {Fn_mm[4,i] = sum(f[which(f[,1]<30 & f[,1]>=20),3])/f[1,2]} +if(length(which(f[,1]>=30)) >0) {Fn_mm[5,i] = sum(f[which(f[,1]>=30),3])/f[1,2] }} +barplot(Fn_mm,xlab="Sample",main="Mapping Quality",ylim=c(0,1),ylab="Frequency",col=c("blue","green","yellow","orange","red"),legend=xname) +dev.state = dev.off() |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/ezBAMQC_output.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ezBAMQC/test-data/output/ezBAMQC_output.html Thu Mar 24 17:12:52 2016 -0400 |
| b |
| @@ -0,0 +1,144 @@ +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Strict//EN"> +<html> +<head><title>ezBAMQC Report</title> +<style type="text/css"> +html,body{margin:0;padding:0;height:100%;width:1500px} +div#header{background-color:#F3F2ED;} +div#header h1{text-align:center; height:80px;line-height:80px;margin:0;padding-left:10px;} +div#container{text-align:left;height:100%;width:1500px} +div#navigation{background:#F6F0E0;} +div#navigation{float:left;width:200px;height:100%} +.menu-item ul { +background: #F6F0E0; +font-size: 13px; +line-height: 30px; +height: 0px; +list-style-type: none;overflow: hidden; +padding: 0px; } +.menu-item:hover ul { height: 220px; } + table{ margin:0;padding:0;width:1300px;table-layout:fixed;text-align:left; } +table > thead > tr.tableizer-firstrow > th { padding: 10px; background: lavenderblush;} +table > tbody > tr > td{ padding: 10px; background: #f8f8f8; word-wrap: break word; } +div#footer{background:#BFBD93;} +div#footer p{margin:0;padding:5px 10px} +div#footer{clear:both;width:100%;text-align:center} +div#main{float:right;width:1300px} +a{text-decoration:none; color:#000000;} +a:hover {text-decoration: underline; } +</style> </head> +<body> +<div id ="container"> +<div id="header"><h1>ezBAMQC Report</h1><p text-align="left">Created On: 03-24-2016</p></div> +<div id="wrapper"> +<div class="summary" id="navigation"> +<h2>Summary</h2> +<ul> +<li><div class="menu-item"> +<h4>smp0</h4> +<ul> +<li><a href="#M00">Basic Statistics</a></li> +<li><a href="#M01">Read Distribution</a></li> +<li><a href="#M02">Mappability</a></li> +<li><a href="#M03">Coverage</a></li> +<li><a href="#M04">Read Length and Insertion Size</a></li> +</ul></div></li> +<li><div class="menu-item"> +<h4>smp1</h4> +<ul> +<li><a href="#M10">Basic Statistics</a></li> +<li><a href="#M11">Read Distribution</a></li> +<li><a href="#M12">Mappability</a></li> +<li><a href="#M13">Coverage</a></li> +<li><a href="#M14">Read Length and Insertion Size</a></li> +</ul></div></li> +<li><div class="menu-item"> +<h4>smp2</h4> +<ul> +<li><a href="#M20">Basic Statistics</a></li> +<li><a href="#M21">Read Distribution</a></li> +<li><a href="#M22">Mappability</a></li> +<li><a href="#M23">Coverage</a></li> +<li><a href="#M24">Read Length and Insertion Size</a></li> +</ul></div></li> +<li><div class="menu-item"><a href="#M115"><h4>Sample Correlation</h4></a></div></li> +</ul> +</div> +<div id="main" > +<h2>smp0</h2> +<div class="module"><h2 id="M00">Basic Statistics</h2> +<table> +<thead><tr class="tableizer-firstrow"> +<th>Measure</th><th>Value</th></tr></thead> +<tbody><tr><td>Total Reads</td><td>77686</td></tr> +<tr><td>Unique Reads</td><td>64349</td></tr> +<tr><td>Multi-reads</td><td>13337</td></tr> +<tr><td>Unmapped Reads</td><td>0</td></tr> +<tr><td>Low Quality Reads</td><td>0</td></tr> +<tr><td>Forward Reads</td><td>31875</td></tr> +<tr><td>Reverse Reads</td><td>32474</td></tr> +<tr><td>Splice Reads</td><td>13900</td></tr> +<tr><td>Non-Splice Reads</td><td>50449</td></tr> +<tr><td>rRNA Reads</td><td>8521</td></tr></tbody></table></div> +<div class="module"><h2 id="M01">Read Distribution</h2> +<p><img class="indented" src="./figs/smp0.read_distr.png" alt="Read Distribution"><img class="indented" src="./figs/smp0.read_distr_pie.png" alt="Read Distribution"></p></div> +<div class="module"><h2 id="M02">Mappability</h2> +<p><img class="indented" src="./figs/smp0.clipping_profile.png" alt="Mappablity Profile"> <img class="indented" src="./figs/smp0.mapq_profile.png" alt="MapQ Profile"></p></div> +<div class="module"><h2 id="M03">Coverage</h2> +<p><img class="indented" src="./figs/smp0.geneBodyCoverage.png" alt="Read Coverage"> <img class="indented" src="./figs/smp0.TransCoverage.png" alt="Read Coverage"></p></div> +<div class="module"><h2 id="M04">Read Length</h2> +<p><img class="indented" src="./figs/smp0.readlen_profile.png" alt="Read Length"></p></div> +<h2>smp1</h2> +<div class="module"><h2 id="M10">Basic Statistics</h2> +<table> +<thead><tr class="tableizer-firstrow"> +<th>Measure</th><th>Value</th></tr></thead> +<tbody><tr><td>Total Reads</td><td>79258</td></tr> +<tr><td>Unique Reads</td><td>66672</td></tr> +<tr><td>Multi-reads</td><td>12586</td></tr> +<tr><td>Unmapped Reads</td><td>0</td></tr> +<tr><td>Low Quality Reads</td><td>0</td></tr> +<tr><td>Forward Reads</td><td>33207</td></tr> +<tr><td>Reverse Reads</td><td>33465</td></tr> +<tr><td>Splice Reads</td><td>14459</td></tr> +<tr><td>Non-Splice Reads</td><td>52213</td></tr> +<tr><td>rRNA Reads</td><td>7563</td></tr></tbody></table></div> +<div class="module"><h2 id="M11">Read Distribution</h2> +<p><img class="indented" src="./figs/smp1.read_distr.png" alt="Read Distribution"><img class="indented" src="./figs/smp1.read_distr_pie.png" alt="Read Distribution"></p></div> +<div class="module"><h2 id="M12">Mappability</h2> +<p><img class="indented" src="./figs/smp1.clipping_profile.png" alt="Mappablity Profile"> <img class="indented" src="./figs/smp1.mapq_profile.png" alt="MapQ Profile"></p></div> +<div class="module"><h2 id="M13">Coverage</h2> +<p><img class="indented" src="./figs/smp1.geneBodyCoverage.png" alt="Read Coverage"> <img class="indented" src="./figs/smp1.TransCoverage.png" alt="Read Coverage"></p></div> +<div class="module"><h2 id="M14">Read Length</h2> +<p><img class="indented" src="./figs/smp1.readlen_profile.png" alt="Read Length"></p></div> +<h2>smp2</h2> +<div class="module"><h2 id="M20">Basic Statistics</h2> +<table> +<thead><tr class="tableizer-firstrow"> +<th>Measure</th><th>Value</th></tr></thead> +<tbody><tr><td>Total Reads</td><td>80603</td></tr> +<tr><td>Unique Reads</td><td>69389</td></tr> +<tr><td>Multi-reads</td><td>11214</td></tr> +<tr><td>Unmapped Reads</td><td>0</td></tr> +<tr><td>Low Quality Reads</td><td>0</td></tr> +<tr><td>Forward Reads</td><td>34065</td></tr> +<tr><td>Reverse Reads</td><td>35324</td></tr> +<tr><td>Splice Reads</td><td>12624</td></tr> +<tr><td>Non-Splice Reads</td><td>56765</td></tr> +<tr><td>rRNA Reads</td><td>5939</td></tr></tbody></table></div> +<div class="module"><h2 id="M21">Read Distribution</h2> +<p><img class="indented" src="./figs/smp2.read_distr.png" alt="Read Distribution"><img class="indented" src="./figs/smp2.read_distr_pie.png" alt="Read Distribution"></p></div> +<div class="module"><h2 id="M22">Mappability</h2> +<p><img class="indented" src="./figs/smp2.clipping_profile.png" alt="Mappablity Profile"> <img class="indented" src="./figs/smp2.mapq_profile.png" alt="MapQ Profile"></p></div> +<div class="module"><h2 id="M23">Coverage</h2> +<p><img class="indented" src="./figs/smp2.geneBodyCoverage.png" alt="Read Coverage"> <img class="indented" src="./figs/smp2.TransCoverage.png" alt="Read Coverage"></p></div> +<div class="module"><h2 id="M24">Read Length</h2> +<p><img class="indented" src="./figs/smp2.readlen_profile.png" alt="Read Length"></p></div> +<div class="Smp_corr"><h2 id="M115">Sample Correlation and Quality</h2> +<p><img class="indented" src="./figs/smp_corr.png" alt="Sample Correlation"><img class="indented" src="./figs/smp_qual.png" alt="Sample Correlation"></p> +<h2 id="M115">Sample Coverage</h2> +<p><img class="indented" src="./figs/smp_cov.png" alt="Sample Coverage"></p></div> +<h2 id="M115">Sample Variation</h2> +<p><img class="indented" src="./figs/smp_reproducibility.png" alt="Sample Variation"><img class="indented" src="./figs/smp_var.png" alt="Sample Variation"></p></div> +</div> +<div id="footer"><p>Produced by Bioinformatics Shared Resource at CSHL </p></div></div></div></body></html> + |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp0.TransCoverage.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp0.TransCoverage.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp0.clipping_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp0.clipping_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp0.geneBodyCoverage.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp0.geneBodyCoverage.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp0.mapq_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp0.mapq_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp0.read_distr.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp0.read_distr.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp0.read_distr_pie.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp0.read_distr_pie.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp0.readlen_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp0.readlen_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp1.TransCoverage.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp1.TransCoverage.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp1.clipping_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp1.clipping_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp1.geneBodyCoverage.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp1.geneBodyCoverage.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp1.mapq_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp1.mapq_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp1.read_distr.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp1.read_distr.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp1.read_distr_pie.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp1.read_distr_pie.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp1.readlen_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp1.readlen_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp2.TransCoverage.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp2.TransCoverage.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp2.clipping_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp2.clipping_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp2.geneBodyCoverage.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp2.geneBodyCoverage.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp2.mapq_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp2.mapq_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp2.read_distr.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp2.read_distr.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp2.read_distr_pie.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp2.read_distr_pie.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp2.readlen_profile.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp2.readlen_profile.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp_corr.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp_corr.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp_cov.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp_cov.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp_qual.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp_qual.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp_reproducibility.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp_reproducibility.png has changed |
| b |
| diff -r 000000000000 -r dfa3745e5fd8 ezBAMQC/test-data/output/figs/smp_var.png |
| b |
| Binary file ezBAMQC/test-data/output/figs/smp_var.png has changed |