Previous changeset 6:6d54abd510d7 (2017-09-13) |
Commit message:
planemo upload |
added:
chimerascan/COPYING chimerascan/MANIFEST.in chimerascan/PKG-INFO chimerascan/THANKS chimerascan/chimerascan.xml chimerascan/chimerascan/__init__.py chimerascan/chimerascan/bx/__init__.py chimerascan/chimerascan/bx/cluster.c chimerascan/chimerascan/bx/cluster.pyx chimerascan/chimerascan/bx/intersection.c chimerascan/chimerascan/bx/intersection.pyx chimerascan/chimerascan/bx/intervalcluster.c chimerascan/chimerascan/bx/intervalcluster.h chimerascan/chimerascan/chimerascan_index.py chimerascan/chimerascan/chimerascan_run.py chimerascan/chimerascan/lib/__init__.py chimerascan/chimerascan/lib/base.py chimerascan/chimerascan/lib/batch_sort.py chimerascan/chimerascan/lib/chimera.py chimerascan/chimerascan/lib/config.py chimerascan/chimerascan/lib/fastq_to_bam.py chimerascan/chimerascan/lib/feature.py chimerascan/chimerascan/lib/fix_alignment_ordering.py chimerascan/chimerascan/lib/fragment_size_distribution.py chimerascan/chimerascan/lib/gene_to_genome.py chimerascan/chimerascan/lib/gtf.py chimerascan/chimerascan/lib/sam.py chimerascan/chimerascan/lib/seq.py chimerascan/chimerascan/lib/stats.py chimerascan/chimerascan/pipeline/__init__.py chimerascan/chimerascan/pipeline/align_bowtie.py chimerascan/chimerascan/pipeline/chimeras_to_breakpoints.py chimerascan/chimerascan/pipeline/discordant_reads_to_bedpe.py chimerascan/chimerascan/pipeline/fastq_inspect_reads.py chimerascan/chimerascan/pipeline/fastq_merge_trim.py chimerascan/chimerascan/pipeline/filter_chimeras.py chimerascan/chimerascan/pipeline/filter_homologous_genes.py chimerascan/chimerascan/pipeline/find_discordant_reads.py chimerascan/chimerascan/pipeline/merge_spanning_alignments.py chimerascan/chimerascan/pipeline/nominate_chimeras.py chimerascan/chimerascan/pipeline/nominate_spanning_reads.py chimerascan/chimerascan/pipeline/profile_insert_size.py chimerascan/chimerascan/pipeline/resolve_discordant_reads.py chimerascan/chimerascan/pipeline/sam2bam.py chimerascan/chimerascan/pipeline/write_output.py chimerascan/chimerascan/pysam/COPYING chimerascan/chimerascan/pysam/Pileup.py chimerascan/chimerascan/pysam/__init__.py chimerascan/chimerascan/pysam/csamtools.c chimerascan/chimerascan/pysam/csamtools.pxd chimerascan/chimerascan/pysam/csamtools.pyx chimerascan/chimerascan/pysam/ctabix.c chimerascan/chimerascan/pysam/ctabix.pxd chimerascan/chimerascan/pysam/ctabix.pyx chimerascan/chimerascan/pysam/namedtuple.py chimerascan/chimerascan/pysam/pysam_util.c chimerascan/chimerascan/pysam/pysam_util.h chimerascan/chimerascan/pysam/samtools/bam.c chimerascan/chimerascan/pysam/samtools/bam.h chimerascan/chimerascan/pysam/samtools/bam_aux.c chimerascan/chimerascan/pysam/samtools/bam_color.c chimerascan/chimerascan/pysam/samtools/bam_endian.h chimerascan/chimerascan/pysam/samtools/bam_import.c chimerascan/chimerascan/pysam/samtools/bam_index.c chimerascan/chimerascan/pysam/samtools/bam_lpileup.c chimerascan/chimerascan/pysam/samtools/bam_maqcns.c chimerascan/chimerascan/pysam/samtools/bam_maqcns.h chimerascan/chimerascan/pysam/samtools/bam_mate.c chimerascan/chimerascan/pysam/samtools/bam_md.c chimerascan/chimerascan/pysam/samtools/bam_pileup.c chimerascan/chimerascan/pysam/samtools/bam_plcmd.c chimerascan/chimerascan/pysam/samtools/bam_reheader.c chimerascan/chimerascan/pysam/samtools/bam_rmdup.c chimerascan/chimerascan/pysam/samtools/bam_rmdupse.c chimerascan/chimerascan/pysam/samtools/bam_sort.c chimerascan/chimerascan/pysam/samtools/bam_stat.c chimerascan/chimerascan/pysam/samtools/bam_tview.c chimerascan/chimerascan/pysam/samtools/bgzf.c chimerascan/chimerascan/pysam/samtools/bgzf.h chimerascan/chimerascan/pysam/samtools/faidx.c chimerascan/chimerascan/pysam/samtools/faidx.h chimerascan/chimerascan/pysam/samtools/glf.c chimerascan/chimerascan/pysam/samtools/glf.h chimerascan/chimerascan/pysam/samtools/kaln.c chimerascan/chimerascan/pysam/samtools/kaln.h chimerascan/chimerascan/pysam/samtools/khash.h chimerascan/chimerascan/pysam/samtools/klist.h chimerascan/chimerascan/pysam/samtools/knetfile.c chimerascan/chimerascan/pysam/samtools/knetfile.h chimerascan/chimerascan/pysam/samtools/kseq.h chimerascan/chimerascan/pysam/samtools/ksort.h chimerascan/chimerascan/pysam/samtools/kstring.c chimerascan/chimerascan/pysam/samtools/kstring.h chimerascan/chimerascan/pysam/samtools/razf.c chimerascan/chimerascan/pysam/samtools/razf.h chimerascan/chimerascan/pysam/samtools/sam.c chimerascan/chimerascan/pysam/samtools/sam.h chimerascan/chimerascan/pysam/samtools/sam_header.c chimerascan/chimerascan/pysam/samtools/sam_header.h chimerascan/chimerascan/pysam/samtools/sam_view.c chimerascan/chimerascan/pysam/setup.cfg chimerascan/chimerascan/pysam/setup.py chimerascan/chimerascan/pysam/tabix/bam_endian.h chimerascan/chimerascan/pysam/tabix/bgzf.c chimerascan/chimerascan/pysam/tabix/bgzf.h chimerascan/chimerascan/pysam/tabix/bgzip.c chimerascan/chimerascan/pysam/tabix/index.c chimerascan/chimerascan/pysam/tabix/khash.h chimerascan/chimerascan/pysam/tabix/knetfile.c chimerascan/chimerascan/pysam/tabix/knetfile.h chimerascan/chimerascan/pysam/tabix/ksort.h chimerascan/chimerascan/pysam/tabix/kstring.c chimerascan/chimerascan/pysam/tabix/kstring.h chimerascan/chimerascan/pysam/tabix/tabix.h chimerascan/chimerascan/pysam/tests/00README.txt chimerascan/chimerascan/pysam/tests/Makefile chimerascan/chimerascan/pysam/tests/ex1.fa chimerascan/chimerascan/pysam/tests/ex1.sam.gz chimerascan/chimerascan/pysam/tests/ex3.sam chimerascan/chimerascan/pysam/tests/ex4.sam chimerascan/chimerascan/pysam/tests/ex5.sam chimerascan/chimerascan/pysam/tests/ex6.sam chimerascan/chimerascan/pysam/tests/ex7.sam chimerascan/chimerascan/pysam/tests/ex8.sam chimerascan/chimerascan/pysam/tests/example.gtf.gz chimerascan/chimerascan/pysam/tests/example.gtf.gz.tbi chimerascan/chimerascan/pysam/tests/example.py chimerascan/chimerascan/pysam/tests/pysam_test.py chimerascan/chimerascan/pysam/tests/segfault_tests.py chimerascan/chimerascan/pysam/tests/tabix_test.py chimerascan/chimerascan/pysam/version.py chimerascan/chimerascan/test/__init__.py chimerascan/chimerascan/test/test_homology.py chimerascan/chimerascan/tools/__init__.py chimerascan/chimerascan/tools/chimerascan_html_table.py chimerascan/chimerascan/tools/gtf_to_genepred.py chimerascan/chimerascan/tools/make_false_positive_file.py chimerascan/chimerascan/tools/sortable.js chimerascan/chimerascan/tools/sortable_us.js chimerascan/chimerascan/tools/table_style.css chimerascan/chimerascan/tools/table_template.html chimerascan/run.sh chimerascan/setup-cython.py chimerascan/setup.py chimerascan/tests/mcf7_pe_35bp/AHCYL1-RAD51C_1.fq chimerascan/tests/mcf7_pe_35bp/AHCYL1-RAD51C_2.fq chimerascan/tests/mcf7_pe_35bp/ARFGEF2-SULF2_1.fq chimerascan/tests/mcf7_pe_35bp/ARFGEF2-SULF2_2.fq chimerascan/tests/mcf7_pe_35bp/BC035340-MCF2L_1.fq chimerascan/tests/mcf7_pe_35bp/BC035340-MCF2L_2.fq chimerascan/tests/mcf7_pe_35bp/BCAS4-BCAS3_1.fq chimerascan/tests/mcf7_pe_35bp/BCAS4-BCAS3_2.fq chimerascan/tests/mcf7_pe_35bp/CXorf15-SYAP1_1.fq chimerascan/tests/mcf7_pe_35bp/CXorf15-SYAP1_2.fq chimerascan/tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_1.fq chimerascan/tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_2.fq chimerascan/tests/mcf7_pe_35bp/EP300-MRFAP1_1.fq chimerascan/tests/mcf7_pe_35bp/EP300-MRFAP1_2.fq chimerascan/tests/mcf7_pe_35bp/EWSR1-TFF1_1.fq chimerascan/tests/mcf7_pe_35bp/EWSR1-TFF1_2.fq chimerascan/tests/mcf7_pe_35bp/MRFAP1-EP300_1.fq chimerascan/tests/mcf7_pe_35bp/MRFAP1-EP300_2.fq chimerascan/tests/mcf7_pe_35bp/MYO9B-FCHO1_1.fq chimerascan/tests/mcf7_pe_35bp/MYO9B-FCHO1_2.fq chimerascan/tests/mcf7_pe_35bp/NAV1-GPR37L1_1.fq chimerascan/tests/mcf7_pe_35bp/NAV1-GPR37L1_2.fq chimerascan/tests/mcf7_pe_35bp/PAPOLA-AK7_1.fq chimerascan/tests/mcf7_pe_35bp/PAPOLA-AK7_2.fq chimerascan/tests/mcf7_pe_35bp/RPS6KB1-TMEM49_1.fq chimerascan/tests/mcf7_pe_35bp/RPS6KB1-TMEM49_2.fq chimerascan/tests/mcf7_pe_35bp/STK11-MIDN_1.fq chimerascan/tests/mcf7_pe_35bp/STK11-MIDN_2.fq chimerascan/tests/mcf7_pe_35bp/SULF2-PRICKLE2_1.fq chimerascan/tests/mcf7_pe_35bp/SULF2-PRICKLE2_2.fq chimerascan/tests/vcap_pe_53bp/APP-AR_1.fq chimerascan/tests/vcap_pe_53bp/APP-AR_2.fq chimerascan/tests/vcap_pe_53bp/BC018860-NDRG1_1.fq chimerascan/tests/vcap_pe_53bp/BC018860-NDRG1_2.fq chimerascan/tests/vcap_pe_53bp/BC021729-FRY_1.fq chimerascan/tests/vcap_pe_53bp/BC021729-FRY_2.fq chimerascan/tests/vcap_pe_53bp/HJURP-EIF4E2_1.fq chimerascan/tests/vcap_pe_53bp/HJURP-EIF4E2_2.fq chimerascan/tests/vcap_pe_53bp/INPP4A-HJURP_1.fq chimerascan/tests/vcap_pe_53bp/INPP4A-HJURP_2.fq chimerascan/tests/vcap_pe_53bp/PIK3C2A-TEAD1_1.fq chimerascan/tests/vcap_pe_53bp/PIK3C2A-TEAD1_2.fq chimerascan/tests/vcap_pe_53bp/RANBP17-DOCK2_1.fq chimerascan/tests/vcap_pe_53bp/RANBP17-DOCK2_2.fq chimerascan/tests/vcap_pe_53bp/RC3H2-RGS3_1.fq chimerascan/tests/vcap_pe_53bp/RC3H2-RGS3_2.fq chimerascan/tests/vcap_pe_53bp/TMPRSS2-ERG_1.fq chimerascan/tests/vcap_pe_53bp/TMPRSS2-ERG_2.fq chimerascan/tests/vcap_pe_53bp/TYMP-SCO2_1.fq chimerascan/tests/vcap_pe_53bp/TYMP-SCO2_2.fq chimerascan/tests/vcap_pe_53bp/USP10-ZDHHC7_1.fq chimerascan/tests/vcap_pe_53bp/USP10-ZDHHC7_2.fq chimerascan/tests/vcap_pe_53bp/VWA2-PRKCH_1.fq chimerascan/tests/vcap_pe_53bp/VWA2-PRKCH_2.fq chimerascan/tests/vcap_pe_53bp/ZDHHC7-ABCB9_1.fq chimerascan/tests/vcap_pe_53bp/ZDHHC7-ABCB9_2.fq chimerascan/tests/vcap_pe_53bp/ZNF649-ZNF577_1.fq chimerascan/tests/vcap_pe_53bp/ZNF649-ZNF577_2.fq splicetrap/PostAnalysis splicetrap/SpliceChange splicetrap/SpliceTrap.pl splicetrap/TXdbgen splicetrap/bin/ApplyCutoff.jie.pl splicetrap/bin/Pair_estimate_c splicetrap/bin/PostAnalysis splicetrap/bin/PostAnalysis.pl splicetrap/bin/SpliceChange splicetrap/bin/SpliceChange.pl splicetrap/bin/SpliceTrap splicetrap/bin/SpliceTrap.pl splicetrap/bin/SpliceTrap_measure.pl splicetrap/bin/TXdbgen splicetrap/bin/TXdbgen.pl splicetrap/bin/apply_cutoff.sh splicetrap/bin/batch_para_cov10p_fit.sh splicetrap/bin/batchqsub.pl splicetrap/bin/batchqsub.pl_orig splicetrap/bin/beta_fit.R splicetrap/bin/bowtie2eland.pl splicetrap/bin/calc_pval.R splicetrap/bin/downloaddb.pl splicetrap/bin/get.frag.size.pl splicetrap/bin/get.hist.pl splicetrap/bin/get_bed_fa_j.pl splicetrap/bin/get_event_dist_fit.pl splicetrap/bin/gtf2bed.pl splicetrap/bin/mapping_bowtie.sh splicetrap/bin/mapping_rmap.sh splicetrap/bin/mark.mt.4eland.pl splicetrap/bin/rmap2eland.pl splicetrap/bin/scan_nomt.pl splicetrap/bin/scanbed2txdb.pl splicetrap/bin/splitdb.sh splicetrap/bin/vslz.pl splicetrap/cutoffs/cutoff.pair.06.txt splicetrap/cutoffs/cutoff.pair.07.txt splicetrap/cutoffs/cutoff.pair.08.txt splicetrap/refGenes.bed splicetrap/splice_trap.xml splicetrap/src/Makefile splicetrap/src/splicetrap.estimate.cpp splicetrap/test-data/input1.fastq splicetrap/test-data/input2.fastq splicetrap/test-data/output1.txt splicetrap/test-data/output2.txt |
removed:
COPYING MANIFEST.in PKG-INFO THANKS chimerascan.xml chimerascan/__init__.py chimerascan/bx/__init__.py chimerascan/bx/cluster.c chimerascan/bx/cluster.pyx chimerascan/bx/intersection.c chimerascan/bx/intersection.pyx chimerascan/bx/intervalcluster.c chimerascan/bx/intervalcluster.h chimerascan/chimerascan_index.py chimerascan/lib/__init__.py chimerascan/lib/base.py chimerascan/lib/batch_sort.py chimerascan/lib/chimera.py chimerascan/lib/config.py chimerascan/lib/fastq_to_bam.py chimerascan/lib/feature.py chimerascan/lib/fix_alignment_ordering.py chimerascan/lib/fragment_size_distribution.py chimerascan/lib/gene_to_genome.py chimerascan/lib/gtf.py chimerascan/lib/sam.py chimerascan/lib/seq.py chimerascan/lib/stats.py chimerascan/pipeline/__init__.py chimerascan/pipeline/align_bowtie.py chimerascan/pipeline/chimeras_to_breakpoints.py chimerascan/pipeline/discordant_reads_to_bedpe.py chimerascan/pipeline/fastq_inspect_reads.py chimerascan/pipeline/fastq_merge_trim.py chimerascan/pipeline/filter_chimeras.py chimerascan/pipeline/filter_homologous_genes.py chimerascan/pipeline/find_discordant_reads.py chimerascan/pipeline/merge_spanning_alignments.py chimerascan/pipeline/nominate_chimeras.py chimerascan/pipeline/nominate_spanning_reads.py chimerascan/pipeline/profile_insert_size.py chimerascan/pipeline/resolve_discordant_reads.py chimerascan/pipeline/sam2bam.py chimerascan/pipeline/write_output.py chimerascan/pysam/COPYING chimerascan/pysam/Pileup.py chimerascan/pysam/__init__.py chimerascan/pysam/csamtools.c chimerascan/pysam/csamtools.pxd chimerascan/pysam/csamtools.pyx chimerascan/pysam/ctabix.c chimerascan/pysam/ctabix.pxd chimerascan/pysam/ctabix.pyx chimerascan/pysam/namedtuple.py chimerascan/pysam/pysam_util.c chimerascan/pysam/pysam_util.h chimerascan/pysam/samtools/bam.c chimerascan/pysam/samtools/bam.h chimerascan/pysam/samtools/bam_aux.c chimerascan/pysam/samtools/bam_color.c chimerascan/pysam/samtools/bam_endian.h chimerascan/pysam/samtools/bam_import.c chimerascan/pysam/samtools/bam_index.c chimerascan/pysam/samtools/bam_lpileup.c chimerascan/pysam/samtools/bam_maqcns.c chimerascan/pysam/samtools/bam_maqcns.h chimerascan/pysam/samtools/bam_mate.c chimerascan/pysam/samtools/bam_md.c chimerascan/pysam/samtools/bam_pileup.c chimerascan/pysam/samtools/bam_plcmd.c chimerascan/pysam/samtools/bam_reheader.c chimerascan/pysam/samtools/bam_rmdup.c chimerascan/pysam/samtools/bam_rmdupse.c chimerascan/pysam/samtools/bam_sort.c chimerascan/pysam/samtools/bam_stat.c chimerascan/pysam/samtools/bam_tview.c chimerascan/pysam/samtools/bgzf.c chimerascan/pysam/samtools/bgzf.h chimerascan/pysam/samtools/faidx.c chimerascan/pysam/samtools/faidx.h chimerascan/pysam/samtools/glf.c chimerascan/pysam/samtools/glf.h chimerascan/pysam/samtools/kaln.c chimerascan/pysam/samtools/kaln.h chimerascan/pysam/samtools/khash.h chimerascan/pysam/samtools/klist.h chimerascan/pysam/samtools/knetfile.c chimerascan/pysam/samtools/knetfile.h chimerascan/pysam/samtools/kseq.h chimerascan/pysam/samtools/ksort.h chimerascan/pysam/samtools/kstring.c chimerascan/pysam/samtools/kstring.h chimerascan/pysam/samtools/razf.c chimerascan/pysam/samtools/razf.h chimerascan/pysam/samtools/sam.c chimerascan/pysam/samtools/sam.h chimerascan/pysam/samtools/sam_header.c chimerascan/pysam/samtools/sam_header.h chimerascan/pysam/samtools/sam_view.c chimerascan/pysam/setup.cfg chimerascan/pysam/setup.py chimerascan/pysam/tabix/bam_endian.h chimerascan/pysam/tabix/bgzf.c chimerascan/pysam/tabix/bgzf.h chimerascan/pysam/tabix/bgzip.c chimerascan/pysam/tabix/index.c chimerascan/pysam/tabix/khash.h chimerascan/pysam/tabix/knetfile.c chimerascan/pysam/tabix/knetfile.h chimerascan/pysam/tabix/ksort.h chimerascan/pysam/tabix/kstring.c chimerascan/pysam/tabix/kstring.h chimerascan/pysam/tabix/tabix.h chimerascan/pysam/tests/00README.txt chimerascan/pysam/tests/Makefile chimerascan/pysam/tests/ex1.fa chimerascan/pysam/tests/ex1.sam.gz chimerascan/pysam/tests/ex3.sam chimerascan/pysam/tests/ex4.sam chimerascan/pysam/tests/ex5.sam chimerascan/pysam/tests/ex6.sam chimerascan/pysam/tests/ex7.sam chimerascan/pysam/tests/ex8.sam chimerascan/pysam/tests/example.gtf.gz chimerascan/pysam/tests/example.gtf.gz.tbi chimerascan/pysam/tests/example.py chimerascan/pysam/tests/pysam_test.py chimerascan/pysam/tests/segfault_tests.py chimerascan/pysam/tests/tabix_test.py chimerascan/pysam/version.py chimerascan/test/__init__.py chimerascan/test/test_homology.py chimerascan/tools/__init__.py chimerascan/tools/chimerascan_html_table.py chimerascan/tools/gtf_to_genepred.py chimerascan/tools/make_false_positive_file.py chimerascan/tools/sortable.js chimerascan/tools/sortable_us.js chimerascan/tools/table_style.css chimerascan/tools/table_template.html chimerascan_run.py run.sh setup-cython.py setup.py tests/mcf7_pe_35bp/AHCYL1-RAD51C_1.fq tests/mcf7_pe_35bp/AHCYL1-RAD51C_2.fq tests/mcf7_pe_35bp/ARFGEF2-SULF2_1.fq tests/mcf7_pe_35bp/ARFGEF2-SULF2_2.fq tests/mcf7_pe_35bp/BC035340-MCF2L_1.fq tests/mcf7_pe_35bp/BC035340-MCF2L_2.fq tests/mcf7_pe_35bp/BCAS4-BCAS3_1.fq tests/mcf7_pe_35bp/BCAS4-BCAS3_2.fq tests/mcf7_pe_35bp/CXorf15-SYAP1_1.fq tests/mcf7_pe_35bp/CXorf15-SYAP1_2.fq tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_1.fq tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_2.fq tests/mcf7_pe_35bp/EP300-MRFAP1_1.fq tests/mcf7_pe_35bp/EP300-MRFAP1_2.fq tests/mcf7_pe_35bp/EWSR1-TFF1_1.fq tests/mcf7_pe_35bp/EWSR1-TFF1_2.fq tests/mcf7_pe_35bp/MRFAP1-EP300_1.fq tests/mcf7_pe_35bp/MRFAP1-EP300_2.fq tests/mcf7_pe_35bp/MYO9B-FCHO1_1.fq tests/mcf7_pe_35bp/MYO9B-FCHO1_2.fq tests/mcf7_pe_35bp/NAV1-GPR37L1_1.fq tests/mcf7_pe_35bp/NAV1-GPR37L1_2.fq tests/mcf7_pe_35bp/PAPOLA-AK7_1.fq tests/mcf7_pe_35bp/PAPOLA-AK7_2.fq tests/mcf7_pe_35bp/RPS6KB1-TMEM49_1.fq tests/mcf7_pe_35bp/RPS6KB1-TMEM49_2.fq tests/mcf7_pe_35bp/STK11-MIDN_1.fq tests/mcf7_pe_35bp/STK11-MIDN_2.fq tests/mcf7_pe_35bp/SULF2-PRICKLE2_1.fq tests/mcf7_pe_35bp/SULF2-PRICKLE2_2.fq tests/vcap_pe_53bp/APP-AR_1.fq tests/vcap_pe_53bp/APP-AR_2.fq tests/vcap_pe_53bp/BC018860-NDRG1_1.fq tests/vcap_pe_53bp/BC018860-NDRG1_2.fq tests/vcap_pe_53bp/BC021729-FRY_1.fq tests/vcap_pe_53bp/BC021729-FRY_2.fq tests/vcap_pe_53bp/HJURP-EIF4E2_1.fq tests/vcap_pe_53bp/HJURP-EIF4E2_2.fq tests/vcap_pe_53bp/INPP4A-HJURP_1.fq tests/vcap_pe_53bp/INPP4A-HJURP_2.fq tests/vcap_pe_53bp/PIK3C2A-TEAD1_1.fq tests/vcap_pe_53bp/PIK3C2A-TEAD1_2.fq tests/vcap_pe_53bp/RANBP17-DOCK2_1.fq tests/vcap_pe_53bp/RANBP17-DOCK2_2.fq tests/vcap_pe_53bp/RC3H2-RGS3_1.fq tests/vcap_pe_53bp/RC3H2-RGS3_2.fq tests/vcap_pe_53bp/TMPRSS2-ERG_1.fq tests/vcap_pe_53bp/TMPRSS2-ERG_2.fq tests/vcap_pe_53bp/TYMP-SCO2_1.fq tests/vcap_pe_53bp/TYMP-SCO2_2.fq tests/vcap_pe_53bp/USP10-ZDHHC7_1.fq tests/vcap_pe_53bp/USP10-ZDHHC7_2.fq tests/vcap_pe_53bp/VWA2-PRKCH_1.fq tests/vcap_pe_53bp/VWA2-PRKCH_2.fq tests/vcap_pe_53bp/ZDHHC7-ABCB9_1.fq tests/vcap_pe_53bp/ZDHHC7-ABCB9_2.fq tests/vcap_pe_53bp/ZNF649-ZNF577_1.fq tests/vcap_pe_53bp/ZNF649-ZNF577_2.fq |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 COPYING --- a/COPYING Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,674 +0,0 @@\n- GNU GENERAL PUBLIC LICENSE\n- Version 3, 29 June 2007\n-\n- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>\n- Everyone is permitted to copy and distribute verbatim copies\n- of this license document, but changing it is not allowed.\n-\n- Preamble\n-\n- The GNU General Public License is a free, copyleft license for\n-software and other kinds of works.\n-\n- The licenses for most software and other practical works are designed\n-to take away your freedom to share and change the works. By contrast,\n-the GNU General Public License is intended to guarantee your freedom to\n-share and change all versions of a program--to make sure it remains free\n-software for all its users. We, the Free Software Foundation, use the\n-GNU General Public License for most of our software; it applies also to\n-any other work released this way by its authors. You can apply it to\n-your programs, too.\n-\n- When we speak of free software, we are referring to freedom, not\n-price. Our General Public Licenses are designed to make sure that you\n-have the freedom to distribute copies of free software (and charge for\n-them if you wish), that you receive source code or can get it if you\n-want it, that you can change the software or use pieces of it in new\n-free programs, and that you know you can do these things.\n-\n- To protect your rights, we need to prevent others from denying you\n-these rights or asking you to surrender the rights. Therefore, you have\n-certain responsibilities if you distribute copies of the software, or if\n-you modify it: responsibilities to respect the freedom of others.\n-\n- For example, if you distribute copies of such a program, whether\n-gratis or for a fee, you must pass on to the recipients the same\n-freedoms that you received. You must make sure that they, too, receive\n-or can get the source code. And you must show them these terms so they\n-know their rights.\n-\n- Developers that use the GNU GPL protect your rights with two steps:\n-(1) assert copyright on the software, and (2) offer you this License\n-giving you legal permission to copy, distribute and/or modify it.\n-\n- For the developers\' and authors\' protection, the GPL clearly explains\n-that there is no warranty for this free software. For both users\' and\n-authors\' sake, the GPL requires that modified versions be marked as\n-changed, so that their problems will not be attributed erroneously to\n-authors of previous versions.\n-\n- Some devices are designed to deny users access to install or run\n-modified versions of the software inside them, although the manufacturer\n-can do so. This is fundamentally incompatible with the aim of\n-protecting users\' freedom to change the software. The systematic\n-pattern of such abuse occurs in the area of products for individuals to\n-use, which is precisely where it is most unacceptable. Therefore, we\n-have designed this version of the GPL to prohibit the practice for those\n-products. If such problems arise substantially in other domains, we\n-stand ready to extend this provision to those domains in future versions\n-of the GPL, as needed to protect the freedom of users.\n-\n- Finally, every program is threatened constantly by software patents.\n-States should not allow patents to restrict development and use of\n-software on general-purpose computers, but in those that do, we wish to\n-avoid the special danger that patents applied to a free program could\n-make it effectively proprietary. To prevent this, the GPL assures that\n-patents cannot be used to render the program non-free.\n-\n- The precise terms and conditions for copying, distribution and\n-modification follow.\n-\n- TERMS AND CONDITIONS\n-\n- 0. Definitions.\n-\n- "This License" refers to version 3 of the GNU General Public License.\n-\n- "Copyright" also means copyright-like laws that apply to other kinds of\n-works, such as semiconductor masks.\n-\n- "The Program" refers to a'..b'THE PROGRAM\n-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\n-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n-\n- 16. Limitation of Liability.\n-\n- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\n-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\n-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\n-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\n-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\n-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\n-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\n-SUCH DAMAGES.\n-\n- 17. Interpretation of Sections 15 and 16.\n-\n- If the disclaimer of warranty and limitation of liability provided\n-above cannot be given local legal effect according to their terms,\n-reviewing courts shall apply local law that most closely approximates\n-an absolute waiver of all civil liability in connection with the\n-Program, unless a warranty or assumption of liability accompanies a\n-copy of the Program in return for a fee.\n-\n- END OF TERMS AND CONDITIONS\n-\n- How to Apply These Terms to Your New Programs\n-\n- If you develop a new program, and you want it to be of the greatest\n-possible use to the public, the best way to achieve this is to make it\n-free software which everyone can redistribute and change under these terms.\n-\n- To do so, attach the following notices to the program. It is safest\n-to attach them to the start of each source file to most effectively\n-state the exclusion of warranty; and each file should have at least\n-the "copyright" line and a pointer to where the full notice is found.\n-\n- <one line to give the program\'s name and a brief idea of what it does.>\n- Copyright (C) <year> <name of author>\n-\n- This program is free software: you can redistribute it and/or modify\n- it under the terms of the GNU General Public License as published by\n- the Free Software Foundation, either version 3 of the License, or\n- (at your option) any later version.\n-\n- This program is distributed in the hope that it will be useful,\n- but WITHOUT ANY WARRANTY; without even the implied warranty of\n- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n- GNU General Public License for more details.\n-\n- You should have received a copy of the GNU General Public License\n- along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\n-Also add information on how to contact you by electronic and paper mail.\n-\n- If the program does terminal interaction, make it output a short\n-notice like this when it starts in an interactive mode:\n-\n- <program> Copyright (C) <year> <name of author>\n- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w\'.\n- This is free software, and you are welcome to redistribute it\n- under certain conditions; type `show c\' for details.\n-\n-The hypothetical commands `show w\' and `show c\' should show the appropriate\n-parts of the General Public License. Of course, your program\'s commands\n-might be different; for a GUI interface, you would use an "about box".\n-\n- You should also get your employer (if you work as a programmer) or school,\n-if any, to sign a "copyright disclaimer" for the program, if necessary.\n-For more information on this, and how to apply and follow the GNU GPL, see\n-<http://www.gnu.org/licenses/>.\n-\n- The GNU General Public License does not permit incorporating your program\n-into proprietary programs. If your program is a subroutine library, you\n-may consider it more useful to permit linking proprietary applications with\n-the library. If this is what you want to do, use the GNU Lesser General\n-Public License instead of this License. But first, please read\n-<http://www.gnu.org/philosophy/why-not-lgpl.html>.\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 MANIFEST.in --- a/MANIFEST.in Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,42 +0,0 @@ -# -# Use .add_data_files and .add_data_dir methods in a appropriate -# setup.py files to include non-python files such as documentation, -# data, etc files to distribution. Avoid using MANIFEST.in for that. -# - -include MANIFEST.in -include COPYING -include INSTALL -include KNOWN_BUGS -include THANKS - -# bx-python -include chimerascan/bx/intervalcluster.h - -# pysam -include chimerascan/pysam/csamtools.pxd -include chimerascan/pysam/ctabix.pxd -include chimerascan/pysam/pysam_util.h -include chimerascan/pysam/samtools/*.h -include chimerascan/pysam/tabix/*.h - -# pysam tests -include chimerascan/pysam/tests/00README.txt -include chimerascan/pysam/tests/Makefile -include chimerascan/pysam/tests/ex1.fa -include chimerascan/pysam/tests/ex1.sam.gz -include chimerascan/pysam/tests/ex3.sam -include chimerascan/pysam/tests/ex4.sam -include chimerascan/pysam/tests/ex5.sam -include chimerascan/pysam/tests/ex6.sam -include chimerascan/pysam/tests/ex7.sam -include chimerascan/pysam/tests/ex8.sam -include chimerascan/pysam/tests/example.py -include chimerascan/pysam/tests/pysam_test.py -include chimerascan/pysam/tests/segfault_tests.py - -# tabix tests -include chimerascan/pysam/tests/tabix_test.py -include chimerascan/pysam/tests/example.gtf.gz -include chimerascan/pysam/tests/example.gtf.gz.tbi - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 PKG-INFO --- a/PKG-INFO Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,16 +0,0 @@ -Metadata-Version: 1.0 -Name: chimerascan -Version: 0.4.3-1 -Summary: chimerascan -Home-page: http://code.google.com/p/chimerascan/ -Author: Matthew Iyer -Author: Christopher Maher -Author-email: matthew.iyer@gmail.com -License: GPL3 -Description: - - chimerascan - *********** - - -Platform: ALL |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 THANKS --- a/THANKS Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,18 +0,0 @@ -We would like to thank James Taylor and the bx-python team for the -efficient interval tree implementation. The interval tree and -cluster tree implementations are well written and fast and help to -build the foundation of chimerascan. - -We would also like to thank Andreas Heger (andreas.heger) and the -'pysam' team for there excellent package. Pysam is used in almost -every stage of the chimerascan pipeline and facilitated rapid -prototyping and development of the tool. We appreciate their -efforts to make samtools accessible to the python community. - -Christopher Maher wrote the original 'chimerascan' in Perl and was -among the first to use a bioinformatics approach to discover gene -fusions in cancer. The Perl implementation was used as a basis for -this version. - -Finally, we would like to thank Arul Chinnaiyan and members of the -Chinnaiyan Lab for their support. \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan.xml --- a/chimerascan.xml Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,56 +0,0 @@ -<tool id="chimerascan" name="ChimeraScan"> - <description>A tool for identifying chimeric transcription in sequencing data.</description> - <command detect_errors="exit_code"><![CDATA[ - bash $__tool_directory__/run.sh $__tool_directory__ && - python $__tool_directory__/chimerascan_run.py -p 8 $__tool_directory__/myindex - #if $input_type_conditional.chimerascan_input_type == "paired" - $input_type_conditional.input_1 $input_type_conditional.input_2 - #else - $input_type_conditional.input.forward $input_type_conditional.input.reverse - #end if - $galaxy_output - ]]> - </command> - <inputs> - <conditional name="input_type_conditional"> - <param name="chimerascan_input_type" type="select" label="Input Type" help="Select between paired and paired collection"> - <option value="paired" selected="true">Paired</option> - <option value="paired_collection">Paired Collection</option> - </param> - <when value="paired"> - <param format='fastq' name='input_1' type='data' label='FASTQ file, forward reads' /> - <param format='fastq' name='input_2' type='data' label='FASTQ file, reverse reads' /> - </when> - <when value="paired_collection"> - <param format="fastq" name='input' type="data_collection" collection_type="paired" label="Select a paired collection" help="Specify paired dataset collection containing paired reads"/> - </when> - </conditional> - </inputs> - <outputs> - <data name="galaxy_output" format="bed" /> - </outputs> - - <tests> - <test> - <param name="input1" value="input1.fastq"/> - <param name="input2" value="input2.fastq"/> - <output name="galaxy_output" file="outputfile.bed" ftype="bed"/> - </test> - <test> - <param name="fastq_input"> - <collection type="paired"> - <element name="forward" value="input1.fastq" /> - <element name="reverse" value="input2.fastq" /> - </collection> - </param> - <param name="input_type" value="paired_collection" /> - <output name="galaxy_output" file="outputfile.bed" ftype="bed"/> - </test> - </tests> - - <help> - Bowtie index files must be placed inside 'myindex folder' - A tool for identifying chimeric transcription in sequencing data. - </help> - -</tool> |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/COPYING Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,674 @@\n+ GNU GENERAL PUBLIC LICENSE\n+ Version 3, 29 June 2007\n+\n+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>\n+ Everyone is permitted to copy and distribute verbatim copies\n+ of this license document, but changing it is not allowed.\n+\n+ Preamble\n+\n+ The GNU General Public License is a free, copyleft license for\n+software and other kinds of works.\n+\n+ The licenses for most software and other practical works are designed\n+to take away your freedom to share and change the works. By contrast,\n+the GNU General Public License is intended to guarantee your freedom to\n+share and change all versions of a program--to make sure it remains free\n+software for all its users. We, the Free Software Foundation, use the\n+GNU General Public License for most of our software; it applies also to\n+any other work released this way by its authors. You can apply it to\n+your programs, too.\n+\n+ When we speak of free software, we are referring to freedom, not\n+price. Our General Public Licenses are designed to make sure that you\n+have the freedom to distribute copies of free software (and charge for\n+them if you wish), that you receive source code or can get it if you\n+want it, that you can change the software or use pieces of it in new\n+free programs, and that you know you can do these things.\n+\n+ To protect your rights, we need to prevent others from denying you\n+these rights or asking you to surrender the rights. Therefore, you have\n+certain responsibilities if you distribute copies of the software, or if\n+you modify it: responsibilities to respect the freedom of others.\n+\n+ For example, if you distribute copies of such a program, whether\n+gratis or for a fee, you must pass on to the recipients the same\n+freedoms that you received. You must make sure that they, too, receive\n+or can get the source code. And you must show them these terms so they\n+know their rights.\n+\n+ Developers that use the GNU GPL protect your rights with two steps:\n+(1) assert copyright on the software, and (2) offer you this License\n+giving you legal permission to copy, distribute and/or modify it.\n+\n+ For the developers\' and authors\' protection, the GPL clearly explains\n+that there is no warranty for this free software. For both users\' and\n+authors\' sake, the GPL requires that modified versions be marked as\n+changed, so that their problems will not be attributed erroneously to\n+authors of previous versions.\n+\n+ Some devices are designed to deny users access to install or run\n+modified versions of the software inside them, although the manufacturer\n+can do so. This is fundamentally incompatible with the aim of\n+protecting users\' freedom to change the software. The systematic\n+pattern of such abuse occurs in the area of products for individuals to\n+use, which is precisely where it is most unacceptable. Therefore, we\n+have designed this version of the GPL to prohibit the practice for those\n+products. If such problems arise substantially in other domains, we\n+stand ready to extend this provision to those domains in future versions\n+of the GPL, as needed to protect the freedom of users.\n+\n+ Finally, every program is threatened constantly by software patents.\n+States should not allow patents to restrict development and use of\n+software on general-purpose computers, but in those that do, we wish to\n+avoid the special danger that patents applied to a free program could\n+make it effectively proprietary. To prevent this, the GPL assures that\n+patents cannot be used to render the program non-free.\n+\n+ The precise terms and conditions for copying, distribution and\n+modification follow.\n+\n+ TERMS AND CONDITIONS\n+\n+ 0. Definitions.\n+\n+ "This License" refers to version 3 of the GNU General Public License.\n+\n+ "Copyright" also means copyright-like laws that apply to other kinds of\n+works, such as semiconductor masks.\n+\n+ "The Program" refers to a'..b'THE PROGRAM\n+IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\n+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n+\n+ 16. Limitation of Liability.\n+\n+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\n+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\n+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\n+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\n+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\n+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\n+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\n+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\n+SUCH DAMAGES.\n+\n+ 17. Interpretation of Sections 15 and 16.\n+\n+ If the disclaimer of warranty and limitation of liability provided\n+above cannot be given local legal effect according to their terms,\n+reviewing courts shall apply local law that most closely approximates\n+an absolute waiver of all civil liability in connection with the\n+Program, unless a warranty or assumption of liability accompanies a\n+copy of the Program in return for a fee.\n+\n+ END OF TERMS AND CONDITIONS\n+\n+ How to Apply These Terms to Your New Programs\n+\n+ If you develop a new program, and you want it to be of the greatest\n+possible use to the public, the best way to achieve this is to make it\n+free software which everyone can redistribute and change under these terms.\n+\n+ To do so, attach the following notices to the program. It is safest\n+to attach them to the start of each source file to most effectively\n+state the exclusion of warranty; and each file should have at least\n+the "copyright" line and a pointer to where the full notice is found.\n+\n+ <one line to give the program\'s name and a brief idea of what it does.>\n+ Copyright (C) <year> <name of author>\n+\n+ This program is free software: you can redistribute it and/or modify\n+ it under the terms of the GNU General Public License as published by\n+ the Free Software Foundation, either version 3 of the License, or\n+ (at your option) any later version.\n+\n+ This program is distributed in the hope that it will be useful,\n+ but WITHOUT ANY WARRANTY; without even the implied warranty of\n+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+ GNU General Public License for more details.\n+\n+ You should have received a copy of the GNU General Public License\n+ along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\n+Also add information on how to contact you by electronic and paper mail.\n+\n+ If the program does terminal interaction, make it output a short\n+notice like this when it starts in an interactive mode:\n+\n+ <program> Copyright (C) <year> <name of author>\n+ This program comes with ABSOLUTELY NO WARRANTY; for details type `show w\'.\n+ This is free software, and you are welcome to redistribute it\n+ under certain conditions; type `show c\' for details.\n+\n+The hypothetical commands `show w\' and `show c\' should show the appropriate\n+parts of the General Public License. Of course, your program\'s commands\n+might be different; for a GUI interface, you would use an "about box".\n+\n+ You should also get your employer (if you work as a programmer) or school,\n+if any, to sign a "copyright disclaimer" for the program, if necessary.\n+For more information on this, and how to apply and follow the GNU GPL, see\n+<http://www.gnu.org/licenses/>.\n+\n+ The GNU General Public License does not permit incorporating your program\n+into proprietary programs. If your program is a subroutine library, you\n+may consider it more useful to permit linking proprietary applications with\n+the library. If this is what you want to do, use the GNU Lesser General\n+Public License instead of this License. But first, please read\n+<http://www.gnu.org/philosophy/why-not-lgpl.html>.\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/MANIFEST.in --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/MANIFEST.in Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,42 @@ +# +# Use .add_data_files and .add_data_dir methods in a appropriate +# setup.py files to include non-python files such as documentation, +# data, etc files to distribution. Avoid using MANIFEST.in for that. +# + +include MANIFEST.in +include COPYING +include INSTALL +include KNOWN_BUGS +include THANKS + +# bx-python +include chimerascan/bx/intervalcluster.h + +# pysam +include chimerascan/pysam/csamtools.pxd +include chimerascan/pysam/ctabix.pxd +include chimerascan/pysam/pysam_util.h +include chimerascan/pysam/samtools/*.h +include chimerascan/pysam/tabix/*.h + +# pysam tests +include chimerascan/pysam/tests/00README.txt +include chimerascan/pysam/tests/Makefile +include chimerascan/pysam/tests/ex1.fa +include chimerascan/pysam/tests/ex1.sam.gz +include chimerascan/pysam/tests/ex3.sam +include chimerascan/pysam/tests/ex4.sam +include chimerascan/pysam/tests/ex5.sam +include chimerascan/pysam/tests/ex6.sam +include chimerascan/pysam/tests/ex7.sam +include chimerascan/pysam/tests/ex8.sam +include chimerascan/pysam/tests/example.py +include chimerascan/pysam/tests/pysam_test.py +include chimerascan/pysam/tests/segfault_tests.py + +# tabix tests +include chimerascan/pysam/tests/tabix_test.py +include chimerascan/pysam/tests/example.gtf.gz +include chimerascan/pysam/tests/example.gtf.gz.tbi + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/PKG-INFO --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/PKG-INFO Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,16 @@ +Metadata-Version: 1.0 +Name: chimerascan +Version: 0.4.3-1 +Summary: chimerascan +Home-page: http://code.google.com/p/chimerascan/ +Author: Matthew Iyer +Author: Christopher Maher +Author-email: matthew.iyer@gmail.com +License: GPL3 +Description: + + chimerascan + *********** + + +Platform: ALL |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/THANKS --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/THANKS Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,18 @@ +We would like to thank James Taylor and the bx-python team for the +efficient interval tree implementation. The interval tree and +cluster tree implementations are well written and fast and help to +build the foundation of chimerascan. + +We would also like to thank Andreas Heger (andreas.heger) and the +'pysam' team for there excellent package. Pysam is used in almost +every stage of the chimerascan pipeline and facilitated rapid +prototyping and development of the tool. We appreciate their +efforts to make samtools accessible to the python community. + +Christopher Maher wrote the original 'chimerascan' in Perl and was +among the first to use a bioinformatics approach to discover gene +fusions in cancer. The Perl implementation was used as a basis for +this version. + +Finally, we would like to thank Arul Chinnaiyan and members of the +Chinnaiyan Lab for their support. \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/__init__.py --- a/chimerascan/__init__.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -# chimerascan versioning information -__version__ = "0.4.6" |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/bx/cluster.c --- a/chimerascan/bx/cluster.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,2316 +0,0 @@\n-/* Generated by Cython 0.13 on Thu Feb 3 22:15:38 2011 */\n-\n-#define PY_SSIZE_T_CLEAN\n-#include "Python.h"\n-#ifndef Py_PYTHON_H\n- #error Python headers needed to compile C extensions, please install development version of Python.\n-#else\n-\n-#include <stddef.h> /* For offsetof */\n-#ifndef offsetof\n-#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )\n-#endif\n-\n-#if !defined(WIN32) && !defined(MS_WINDOWS)\n- #ifndef __stdcall\n- #define __stdcall\n- #endif\n- #ifndef __cdecl\n- #define __cdecl\n- #endif\n- #ifndef __fastcall\n- #define __fastcall\n- #endif\n-#endif\n-\n-#ifndef DL_IMPORT\n- #define DL_IMPORT(t) t\n-#endif\n-#ifndef DL_EXPORT\n- #define DL_EXPORT(t) t\n-#endif\n-\n-#ifndef PY_LONG_LONG\n- #define PY_LONG_LONG LONG_LONG\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02040000\n- #define METH_COEXIST 0\n- #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type)\n- #define PyDict_Contains(d,o) PySequence_Contains(d,o)\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02050000\n- typedef int Py_ssize_t;\n- #define PY_SSIZE_T_MAX INT_MAX\n- #define PY_SSIZE_T_MIN INT_MIN\n- #define PY_FORMAT_SIZE_T ""\n- #define PyInt_FromSsize_t(z) PyInt_FromLong(z)\n- #define PyInt_AsSsize_t(o) PyInt_AsLong(o)\n- #define PyNumber_Index(o) PyNumber_Int(o)\n- #define PyIndex_Check(o) PyNumber_Check(o)\n- #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02060000\n- #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)\n- #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)\n- #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)\n- #define PyVarObject_HEAD_INIT(type, size) \\\n- PyObject_HEAD_INIT(type) size,\n- #define PyType_Modified(t)\n-\n- typedef struct {\n- void *buf;\n- PyObject *obj;\n- Py_ssize_t len;\n- Py_ssize_t itemsize;\n- int readonly;\n- int ndim;\n- char *format;\n- Py_ssize_t *shape;\n- Py_ssize_t *strides;\n- Py_ssize_t *suboffsets;\n- void *internal;\n- } Py_buffer;\n-\n- #define PyBUF_SIMPLE 0\n- #define PyBUF_WRITABLE 0x0001\n- #define PyBUF_FORMAT 0x0004\n- #define PyBUF_ND 0x0008\n- #define PyBUF_STRIDES (0x0010 | PyBUF_ND)\n- #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)\n- #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)\n- #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)\n- #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)\n-\n-#endif\n-\n-#if PY_MAJOR_VERSION < 3\n- #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"\n-#else\n- #define __Pyx_BUILTIN_MODULE_NAME "builtins"\n-#endif\n-\n-#if PY_MAJOR_VERSION >= 3\n- #define Py_TPFLAGS_CHECKTYPES 0\n- #define Py_TPFLAGS_HAVE_INDEX 0\n-#endif\n-\n-#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)\n- #define Py_TPFLAGS_HAVE_NEWBUFFER 0\n-#endif\n-\n-#if PY_MAJOR_VERSION >= 3\n- #define PyBaseString_Type PyUnicode_Type\n- #define PyStringObject PyUnicodeObject\n- #define PyString_Type PyUnicode_Type\n- #define PyString_Check PyUnicode_Check\n- #define PyString_CheckExact PyUnicode_CheckExact\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02060000\n- #define PyBytesObject PyStringObject\n- #define PyBytes_Type PyString_Type\n- #define PyBytes_Check PyString_Check\n- #define PyBytes_CheckExact PyString_CheckExact\n- #define PyBytes_FromString PyString_FromString\n- #define PyBytes_FromStringAndSize PyString_FromStringAndSize\n- #define PyBytes_FromFormat PyString_FromFormat\n- #define PyBytes_DecodeEscape PyString_DecodeEscape\n- #define PyBytes_AsString PyString_AsString\n- #define PyBytes_AsStringAndSize PyString_AsStringAndSize\n- #define PyBytes_Size PyString_Size\n- #define PyBytes_AS_STRING PyString_AS_STRING\n- #define PyBytes_GET_SIZE PyString_GET_SIZE\n- #define PyBytes_Repr PyString_Repr\n- #define PyBytes_Concat '..b'\n- if (!py_code) goto bad;\n- py_frame = PyFrame_New(\n- PyThreadState_GET(), /*PyThreadState *tstate,*/\n- py_code, /*PyCodeObject *code,*/\n- py_globals, /*PyObject *globals,*/\n- 0 /*PyObject *locals*/\n- );\n- if (!py_frame) goto bad;\n- py_frame->f_lineno = __pyx_lineno;\n- PyTraceBack_Here(py_frame);\n-bad:\n- Py_XDECREF(py_srcfile);\n- Py_XDECREF(py_funcname);\n- Py_XDECREF(py_code);\n- Py_XDECREF(py_frame);\n-}\n-\n-static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {\n- while (t->p) {\n- #if PY_MAJOR_VERSION < 3\n- if (t->is_unicode) {\n- *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);\n- } else if (t->intern) {\n- *t->p = PyString_InternFromString(t->s);\n- } else {\n- *t->p = PyString_FromStringAndSize(t->s, t->n - 1);\n- }\n- #else /* Python 3+ has unicode identifiers */\n- if (t->is_unicode | t->is_str) {\n- if (t->intern) {\n- *t->p = PyUnicode_InternFromString(t->s);\n- } else if (t->encoding) {\n- *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);\n- } else {\n- *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);\n- }\n- } else {\n- *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);\n- }\n- #endif\n- if (!*t->p)\n- return -1;\n- ++t;\n- }\n- return 0;\n-}\n-\n-/* Type Conversion Functions */\n-\n-static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {\n- int is_true = x == Py_True;\n- if (is_true | (x == Py_False) | (x == Py_None)) return is_true;\n- else return PyObject_IsTrue(x);\n-}\n-\n-static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {\n- PyNumberMethods *m;\n- const char *name = NULL;\n- PyObject *res = NULL;\n-#if PY_VERSION_HEX < 0x03000000\n- if (PyInt_Check(x) || PyLong_Check(x))\n-#else\n- if (PyLong_Check(x))\n-#endif\n- return Py_INCREF(x), x;\n- m = Py_TYPE(x)->tp_as_number;\n-#if PY_VERSION_HEX < 0x03000000\n- if (m && m->nb_int) {\n- name = "int";\n- res = PyNumber_Int(x);\n- }\n- else if (m && m->nb_long) {\n- name = "long";\n- res = PyNumber_Long(x);\n- }\n-#else\n- if (m && m->nb_int) {\n- name = "int";\n- res = PyNumber_Long(x);\n- }\n-#endif\n- if (res) {\n-#if PY_VERSION_HEX < 0x03000000\n- if (!PyInt_Check(res) && !PyLong_Check(res)) {\n-#else\n- if (!PyLong_Check(res)) {\n-#endif\n- PyErr_Format(PyExc_TypeError,\n- "__%s__ returned non-%s (type %.200s)",\n- name, name, Py_TYPE(res)->tp_name);\n- Py_DECREF(res);\n- return NULL;\n- }\n- }\n- else if (!PyErr_Occurred()) {\n- PyErr_SetString(PyExc_TypeError,\n- "an integer is required");\n- }\n- return res;\n-}\n-\n-static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {\n- Py_ssize_t ival;\n- PyObject* x = PyNumber_Index(b);\n- if (!x) return -1;\n- ival = PyInt_AsSsize_t(x);\n- Py_DECREF(x);\n- return ival;\n-}\n-\n-static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {\n-#if PY_VERSION_HEX < 0x02050000\n- if (ival <= LONG_MAX)\n- return PyInt_FromLong((long)ival);\n- else {\n- unsigned char *bytes = (unsigned char *) &ival;\n- int one = 1; int little = (int)*(unsigned char*)&one;\n- return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);\n- }\n-#else\n- return PyInt_FromSize_t(ival);\n-#endif\n-}\n-\n-static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {\n- unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);\n- if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {\n- return (size_t)-1;\n- } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {\n- PyErr_SetString(PyExc_OverflowError,\n- "value too large to convert to size_t");\n- return (size_t)-1;\n- }\n- return (size_t)val;\n-}\n-\n-\n-#endif /* Py_PYTHON_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/bx/cluster.pyx --- a/chimerascan/bx/cluster.pyx Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,125 +0,0 @@ -""" -Downloaded from: -https://bitbucket.org/james_taylor/bx-python/wiki/Home - -Kanwei Li, 2009 -Inspired by previous ClusterTree - -Provides a ClusterTree data structure that supports efficient finding of -clusters of intervals that are within a certain distance apart. - -This clustering algorithm uses a binary tree structure. Nodes correspond to -non-overlapping intervals, where overlapping means that the distance between -two intervals is less or equal to the max separation. - -The tree self-balances using rotations based on the binomial sequence. Merges -among nodes are performed whenever a node is changed/added that will cause other -nodes to form a new cluster. - -C source code is in intervalcluster.c -""" - -cdef extern from "intervalcluster.h": - - cdef struct struct_interval: - int start - int end - int id - struct_interval * next - - ctypedef struct_interval interval - - cdef struct struct_clusternode: - int start - int end - struct_interval *interval_head - struct_interval *interval_tail - - ctypedef struct_clusternode clusternode - - cdef struct struct_clustertree: - int max_dist - int min_intervals - - struct_clusternode *root - - ctypedef struct_clustertree clustertree - - cdef struct struct_treeitr: - struct_treeitr *next - struct_clusternode *node - - ctypedef struct_treeitr treeitr - - clusternode* clusternode_insert(clustertree *tree, clusternode *node, int start, int end, int id) - clustertree* create_clustertree(int max_dist, int min_intervals) - treeitr* clusteritr(clustertree *tree) - void freeclusteritr(treeitr *itr) - void free_tree(clustertree *tree) - -cdef class ClusterTree: - cdef clustertree *tree - cdef int mincols - cdef int minregions - - def __cinit__(self, mincols, minregions): - self.tree = create_clustertree(mincols, minregions) - self.mincols = mincols - self.minregions = minregions - - def __dealloc__(self): - free_tree(self.tree) - - def insert(self, s, e, id): - ''' Insert an interval with start, end, id as parameters''' - if s > e: raise ValueError("Interval start must be before end") - self.tree.root = clusternode_insert(self.tree, self.tree.root, s, e, id) - - def getregions(self): - ''' Returns a list clusters in ascending order of starting position. - Each cluster is a tuple of (start, end, [sorted ids of intervals in cluster]) - - tree = ClusterTree(0, 0) - Insert (6, 7, 1), (1, 2, 3), (9, 10, 2), (3, 4, 0), (3, 8, 4) - tree.getregions() returns [(1, 2, [3]), (3, 8, [0, 1, 4]), (9, 10, [2])] - ''' - cdef treeitr *itr - cdef treeitr *head - cdef interval *ival - - regions = [] - head = clusteritr(self.tree) - itr = head - while (itr): - ids = [] - ival = itr.node.interval_head - while (ival): - ids.append(ival.id) - ival = ival.next - - regions.append( (itr.node.start, itr.node.end, sorted(ids)) ) - itr = itr.next - freeclusteritr(head) - return regions - - def getlines(self): - ''' Similar to getregions except it just returns a list of ids of intervals - The above example would return [3, 0, 1, 4, 2] - ''' - cdef treeitr *itr - cdef interval *ival - - lines = [] - itr = clusteritr(self.tree) - - while (itr): - ids = [] - ival = itr.node.interval_head - while (ival): - ids.append(ival.id) - ival = ival.next - - lines.extend(sorted(ids)) - itr = itr.next - return lines - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/bx/intersection.c --- a/chimerascan/bx/intersection.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,7517 +0,0 @@\n-/* Generated by Cython 0.13 on Thu Feb 3 22:15:44 2011 */\n-\n-#define PY_SSIZE_T_CLEAN\n-#include "Python.h"\n-#ifndef Py_PYTHON_H\n- #error Python headers needed to compile C extensions, please install development version of Python.\n-#else\n-\n-#include <stddef.h> /* For offsetof */\n-#ifndef offsetof\n-#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )\n-#endif\n-\n-#if !defined(WIN32) && !defined(MS_WINDOWS)\n- #ifndef __stdcall\n- #define __stdcall\n- #endif\n- #ifndef __cdecl\n- #define __cdecl\n- #endif\n- #ifndef __fastcall\n- #define __fastcall\n- #endif\n-#endif\n-\n-#ifndef DL_IMPORT\n- #define DL_IMPORT(t) t\n-#endif\n-#ifndef DL_EXPORT\n- #define DL_EXPORT(t) t\n-#endif\n-\n-#ifndef PY_LONG_LONG\n- #define PY_LONG_LONG LONG_LONG\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02040000\n- #define METH_COEXIST 0\n- #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type)\n- #define PyDict_Contains(d,o) PySequence_Contains(d,o)\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02050000\n- typedef int Py_ssize_t;\n- #define PY_SSIZE_T_MAX INT_MAX\n- #define PY_SSIZE_T_MIN INT_MIN\n- #define PY_FORMAT_SIZE_T ""\n- #define PyInt_FromSsize_t(z) PyInt_FromLong(z)\n- #define PyInt_AsSsize_t(o) PyInt_AsLong(o)\n- #define PyNumber_Index(o) PyNumber_Int(o)\n- #define PyIndex_Check(o) PyNumber_Check(o)\n- #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02060000\n- #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)\n- #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)\n- #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)\n- #define PyVarObject_HEAD_INIT(type, size) \\\n- PyObject_HEAD_INIT(type) size,\n- #define PyType_Modified(t)\n-\n- typedef struct {\n- void *buf;\n- PyObject *obj;\n- Py_ssize_t len;\n- Py_ssize_t itemsize;\n- int readonly;\n- int ndim;\n- char *format;\n- Py_ssize_t *shape;\n- Py_ssize_t *strides;\n- Py_ssize_t *suboffsets;\n- void *internal;\n- } Py_buffer;\n-\n- #define PyBUF_SIMPLE 0\n- #define PyBUF_WRITABLE 0x0001\n- #define PyBUF_FORMAT 0x0004\n- #define PyBUF_ND 0x0008\n- #define PyBUF_STRIDES (0x0010 | PyBUF_ND)\n- #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)\n- #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)\n- #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)\n- #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)\n-\n-#endif\n-\n-#if PY_MAJOR_VERSION < 3\n- #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"\n-#else\n- #define __Pyx_BUILTIN_MODULE_NAME "builtins"\n-#endif\n-\n-#if PY_MAJOR_VERSION >= 3\n- #define Py_TPFLAGS_CHECKTYPES 0\n- #define Py_TPFLAGS_HAVE_INDEX 0\n-#endif\n-\n-#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)\n- #define Py_TPFLAGS_HAVE_NEWBUFFER 0\n-#endif\n-\n-#if PY_MAJOR_VERSION >= 3\n- #define PyBaseString_Type PyUnicode_Type\n- #define PyStringObject PyUnicodeObject\n- #define PyString_Type PyUnicode_Type\n- #define PyString_Check PyUnicode_Check\n- #define PyString_CheckExact PyUnicode_CheckExact\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02060000\n- #define PyBytesObject PyStringObject\n- #define PyBytes_Type PyString_Type\n- #define PyBytes_Check PyString_Check\n- #define PyBytes_CheckExact PyString_CheckExact\n- #define PyBytes_FromString PyString_FromString\n- #define PyBytes_FromStringAndSize PyString_FromStringAndSize\n- #define PyBytes_FromFormat PyString_FromFormat\n- #define PyBytes_DecodeEscape PyString_DecodeEscape\n- #define PyBytes_AsString PyString_AsString\n- #define PyBytes_AsStringAndSize PyString_AsStringAndSize\n- #define PyBytes_Size PyString_Size\n- #define PyBytes_AS_STRING PyString_AS_STRING\n- #define PyBytes_GET_SIZE PyString_GET_SIZE\n- #define PyBytes_Repr PyString_Repr\n- #define PyBytes_Concat '..b'\n- if (!py_code) goto bad;\n- py_frame = PyFrame_New(\n- PyThreadState_GET(), /*PyThreadState *tstate,*/\n- py_code, /*PyCodeObject *code,*/\n- py_globals, /*PyObject *globals,*/\n- 0 /*PyObject *locals*/\n- );\n- if (!py_frame) goto bad;\n- py_frame->f_lineno = __pyx_lineno;\n- PyTraceBack_Here(py_frame);\n-bad:\n- Py_XDECREF(py_srcfile);\n- Py_XDECREF(py_funcname);\n- Py_XDECREF(py_code);\n- Py_XDECREF(py_frame);\n-}\n-\n-static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {\n- while (t->p) {\n- #if PY_MAJOR_VERSION < 3\n- if (t->is_unicode) {\n- *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);\n- } else if (t->intern) {\n- *t->p = PyString_InternFromString(t->s);\n- } else {\n- *t->p = PyString_FromStringAndSize(t->s, t->n - 1);\n- }\n- #else /* Python 3+ has unicode identifiers */\n- if (t->is_unicode | t->is_str) {\n- if (t->intern) {\n- *t->p = PyUnicode_InternFromString(t->s);\n- } else if (t->encoding) {\n- *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);\n- } else {\n- *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);\n- }\n- } else {\n- *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);\n- }\n- #endif\n- if (!*t->p)\n- return -1;\n- ++t;\n- }\n- return 0;\n-}\n-\n-/* Type Conversion Functions */\n-\n-static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {\n- int is_true = x == Py_True;\n- if (is_true | (x == Py_False) | (x == Py_None)) return is_true;\n- else return PyObject_IsTrue(x);\n-}\n-\n-static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {\n- PyNumberMethods *m;\n- const char *name = NULL;\n- PyObject *res = NULL;\n-#if PY_VERSION_HEX < 0x03000000\n- if (PyInt_Check(x) || PyLong_Check(x))\n-#else\n- if (PyLong_Check(x))\n-#endif\n- return Py_INCREF(x), x;\n- m = Py_TYPE(x)->tp_as_number;\n-#if PY_VERSION_HEX < 0x03000000\n- if (m && m->nb_int) {\n- name = "int";\n- res = PyNumber_Int(x);\n- }\n- else if (m && m->nb_long) {\n- name = "long";\n- res = PyNumber_Long(x);\n- }\n-#else\n- if (m && m->nb_int) {\n- name = "int";\n- res = PyNumber_Long(x);\n- }\n-#endif\n- if (res) {\n-#if PY_VERSION_HEX < 0x03000000\n- if (!PyInt_Check(res) && !PyLong_Check(res)) {\n-#else\n- if (!PyLong_Check(res)) {\n-#endif\n- PyErr_Format(PyExc_TypeError,\n- "__%s__ returned non-%s (type %.200s)",\n- name, name, Py_TYPE(res)->tp_name);\n- Py_DECREF(res);\n- return NULL;\n- }\n- }\n- else if (!PyErr_Occurred()) {\n- PyErr_SetString(PyExc_TypeError,\n- "an integer is required");\n- }\n- return res;\n-}\n-\n-static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {\n- Py_ssize_t ival;\n- PyObject* x = PyNumber_Index(b);\n- if (!x) return -1;\n- ival = PyInt_AsSsize_t(x);\n- Py_DECREF(x);\n- return ival;\n-}\n-\n-static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {\n-#if PY_VERSION_HEX < 0x02050000\n- if (ival <= LONG_MAX)\n- return PyInt_FromLong((long)ival);\n- else {\n- unsigned char *bytes = (unsigned char *) &ival;\n- int one = 1; int little = (int)*(unsigned char*)&one;\n- return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);\n- }\n-#else\n- return PyInt_FromSize_t(ival);\n-#endif\n-}\n-\n-static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {\n- unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);\n- if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {\n- return (size_t)-1;\n- } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {\n- PyErr_SetString(PyExc_OverflowError,\n- "value too large to convert to size_t");\n- return (size_t)-1;\n- }\n- return (size_t)val;\n-}\n-\n-\n-#endif /* Py_PYTHON_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/bx/intersection.pyx --- a/chimerascan/bx/intersection.pyx Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,472 +0,0 @@\n-"""\n-Downloaded from:\n-https://bitbucket.org/james_taylor/bx-python/wiki/Home\n-\n-Data structure for performing intersect queries on a set of intervals which\n-preserves all information about the intervals (unlike bitset projection methods).\n-\n-:Authors: James Taylor (james@jamestaylor.org),\n- Ian Schenk (ian.schenck@gmail.com),\n- Brent Pedersen (bpederse@gmail.com)\n-"""\n-\n-# Historical note:\n-# This module original contained an implementation based on sorted endpoints\n-# and a binary search, using an idea from Scott Schwartz and Piotr Berman.\n-# Later an interval tree implementation was implemented by Ian for Galaxy\'s\n-# join tool (see `bx.intervals.operations.quicksect.py`). This was then\n-# converted to Cython by Brent, who also added support for\n-# upstream/downstream/neighbor queries. This was modified by James to\n-# handle half-open intervals strictly, to maintain sort order, and to\n-# implement the same interface as the original Intersecter.\n-\n-import operator\n-\n-cdef extern from "stdlib.h":\n- int ceil(float f)\n- float log(float f)\n- int RAND_MAX\n- int rand()\n- int strlen(char *)\n- int iabs(int)\n-\n-cdef inline int imax2(int a, int b):\n- if b > a: return b\n- return a\n-\n-cdef inline int imax3(int a, int b, int c):\n- if b > a:\n- if c > b:\n- return c\n- return b\n- if a > c:\n- return a\n- return c\n-\n-cdef inline int imin3(int a, int b, int c):\n- if b < a:\n- if c < b:\n- return c\n- return b\n- if a < c:\n- return a\n- return c\n-\n-cdef inline int imin2(int a, int b):\n- if b < a: return b\n- return a\n-\n-cdef float nlog = -1.0 / log(0.5)\n-\n-cdef class IntervalNode:\n- """\n- A single node of an `IntervalTree`.\n- \n- NOTE: Unless you really know what you are doing, you probably should us\n- `IntervalTree` rather than using this directly. \n- """\n- cdef float priority\n- cdef public object interval\n- cdef public int start, end\n- cdef int minend, maxend, minstart\n- cdef IntervalNode cleft, cright, croot\n-\n- property left_node:\n- def __get__(self):\n- return self.cleft if self.cleft is not EmptyNode else None\n- property right_node:\n- def __get__(self):\n- return self.cright if self.cright is not EmptyNode else None\n- property root_node:\n- def __get__(self):\n- return self.croot if self.croot is not EmptyNode else None\n- \n- def __repr__(self):\n- return "IntervalNode(%i, %i)" % (self.start, self.end)\n-\n- def __cinit__(IntervalNode self, int start, int end, object interval):\n- # Python lacks the binomial distribution, so we convert a\n- # uniform into a binomial because it naturally scales with\n- # tree size. Also, python\'s uniform is perfect since the\n- # upper limit is not inclusive, which gives us undefined here.\n- self.priority = ceil(nlog * log(-1.0/(1.0 * rand()/RAND_MAX - 1)))\n- self.start = start\n- self.end = end\n- self.interval = interval\n- self.maxend = end\n- self.minstart = start\n- self.minend = end\n- self.cleft = EmptyNode\n- self.cright = EmptyNode\n- self.croot = EmptyNode\n- \n- cpdef IntervalNode insert(IntervalNode self, int start, int end, object interval):\n- """\n- Insert a new IntervalNode into the tree of which this node is\n- currently the root. The return value is the new root of the tree (which\n- may or may not be this node!)\n- """\n- cdef IntervalNode croot = self\n- # If starts are the same, decide which to add interval to based on\n- # end, thus maintaining sortedness relative to start/end\n- cdef int decision_endpoint = start\n- if start == self.start:\n- decision_endpoint = end\n- \n- if decision_endpoint > self.start:\n- #'..b'(1, 2, strand="-"), num_intervals=3)\n- [Interval(3, 7), Interval(3, 40), Interval(13, 50)]\n-\n- \n- """\n- \n- cdef IntervalNode root\n- \n- def __cinit__( self ):\n- root = None\n- \n- # ---- Position based interfaces -----------------------------------------\n- \n- def insert( self, int start, int end, object value=None ):\n- """\n- Insert the interval [start,end) associated with value `value`.\n- """\n- if self.root is None:\n- self.root = IntervalNode( start, end, value )\n- else:\n- self.root = self.root.insert( start, end, value )\n- \n- add = insert\n-\n-\n- def find( self, start, end ):\n- """\n- Return a sorted list of all intervals overlapping [start,end).\n- """\n- if self.root is None:\n- return []\n- return self.root.find( start, end )\n- \n- def before( self, position, num_intervals=1, max_dist=2500 ):\n- """\n- Find `num_intervals` intervals that lie before `position` and are no\n- further than `max_dist` positions away\n- """\n- if self.root is None:\n- return []\n- return self.root.left( position, num_intervals, max_dist )\n-\n- def after( self, position, num_intervals=1, max_dist=2500 ):\n- """\n- Find `num_intervals` intervals that lie after `position` and are no\n- further than `max_dist` positions away\n- """\n- if self.root is None:\n- return []\n- return self.root.right( position, num_intervals, max_dist )\n-\n- # ---- Interval-like object based interfaces -----------------------------\n-\n- def insert_interval( self, interval ):\n- """\n- Insert an "interval" like object (one with at least start and end\n- attributes)\n- """\n- self.insert( interval.start, interval.end, interval )\n-\n- add_interval = insert_interval\n-\n- def before_interval( self, interval, num_intervals=1, max_dist=2500 ):\n- """\n- Find `num_intervals` intervals that lie completely before `interval`\n- and are no further than `max_dist` positions away\n- """\n- if self.root is None:\n- return []\n- return self.root.left( interval.start, num_intervals, max_dist )\n-\n- def after_interval( self, interval, num_intervals=1, max_dist=2500 ):\n- """\n- Find `num_intervals` intervals that lie completely after `interval` and\n- are no further than `max_dist` positions away\n- """\n- if self.root is None:\n- return []\n- return self.root.right( interval.end, num_intervals, max_dist )\n-\n- def upstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ):\n- """\n- Find `num_intervals` intervals that lie completely upstream of\n- `interval` and are no further than `max_dist` positions away\n- """\n- if self.root is None:\n- return []\n- if interval.strand == -1 or interval.strand == "-":\n- return self.root.right( interval.end, num_intervals, max_dist )\n- else:\n- return self.root.left( interval.start, num_intervals, max_dist )\n-\n- def downstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ):\n- """\n- Find `num_intervals` intervals that lie completely downstream of\n- `interval` and are no further than `max_dist` positions away\n- """\n- if self.root is None:\n- return []\n- if interval.strand == -1 or interval.strand == "-":\n- return self.root.left( interval.start, num_intervals, max_dist )\n- else:\n- return self.root.right( interval.end, num_intervals, max_dist )\n- \n- def traverse(self, fn):\n- """\n- call fn for each element in the tree\n- """\n- if self.root is None:\n- return None\n- return self.root.traverse(fn)\n-\n-# For backward compatibility\n-Intersecter = IntervalTree\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/bx/intervalcluster.c --- a/chimerascan/bx/intervalcluster.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,268 +0,0 @@\n-/*\n-\tDownloaded from:\n-\thttps://bitbucket.org/james_taylor/bx-python/wiki/Home\n-\n- Kanwei Li, 2009\n- Inspired by previous ClusterTree\n- \n- This clustering algorithm uses a binary tree structure. Nodes correspond to \n- non-overlapping intervals, where overlapping means that the distance between\n- two intervals is less or equal to max_dist, which is the max separation.\n- \n- The tree self-balances using rotations based on the binomial sequence. Merges\n- among nodes are performed whenever a node is changed/added that will cause other\n- nodes to form a new cluster.\n-*/\n-#include <stdlib.h>\n-#include <stdio.h>\n-#include <math.h>\n-#include "intervalcluster.h"\n-\n-#define ALLOC(pt) (malloc(sizeof(pt)))\n-\n-static int min(int a, int b) {\n- if( a < b )\n- return a;\n- else\n- return b;\n-}\n-\n-static int max(int a, int b) {\n- if( a > b )\n- return a;\n- else\n- return b;\n-}\n-\n-/* Create new tree with given max_dist (max distance between intervals to be\n- considered a cluster), and min_intervals, the minimum number of intervals\n- needed for a cluster to be considered significant */\n-clustertree* create_clustertree(int max_dist, int min_intervals) {\n- clustertree *tree = ALLOC(clustertree);\n- tree->max_dist = max_dist;\n- tree->min_intervals = min_intervals;\n- tree->root = NULL;\n- return tree;\n-}\n-\n-static interval* create_interval(int start, int end, int id) {\n- interval *ival = ALLOC(interval);\n- \n- ival->start = start;\n- ival->end = end;\n- ival->id = id;\n- ival->next = NULL;\n- return ival;\n-}\n-\n-static clusternode* create_node(int start, int end, int id) {\n- clusternode *new_node = ALLOC(clusternode);\n- \n- new_node->start = start;\n- new_node->end = end;\n- new_node->interval_head = create_interval(start, end, id);\n- new_node->interval_tail = new_node->interval_head;\n- new_node->num_ivals = 1;\n- new_node->left = NULL;\n- new_node->right = NULL;\n- \n- double uniform = ((double)rand()) / (RAND_MAX);\n- if (uniform == 1.0)\n- uniform = 0;\n- new_node->priority = (int)ceil( (-1.0 / log(.5)) * log( -1.0 / (uniform - 1)));\n- \n- return new_node;\n-}\n-\n-static void recursively_free_intervals(interval *ival) {\n- interval *next;\n- if(ival) {\n- next = ival->next;\n- free(ival);\n- recursively_free_intervals(next);\n- }\n-}\n-\n-static void recursively_free_nodes(clusternode *node) {\n- if(node) {\n- recursively_free_nodes(node->left);\n- recursively_free_nodes(node->right);\n- recursively_free_intervals(node->interval_head);\n- free(node);\n- }\n-}\n-\n-void free_tree(clustertree *tree) {\n- recursively_free_nodes(tree->root);\n- free(tree);\n-}\n-\n-void cluster_rotateright(clusternode **node) {\n- clusternode* root = (*node)->left;\n- (*node)->left = (*node)->left->right;\n- root->right = (*node);\n- *node = root;\n-}\n-\n-void cluster_rotateleft(clusternode **node) {\n- clusternode* root = (*node)->right;\n- (*node)->right = (*node)->right->left;\n- root->left = (*node);\n- *node = root;\n-}\n-\n-/* Go down the tree and merge nodes if necessary */\n-void cluster_fixup(clustertree *tree, clusternode **ln, clusternode **rn) {\n- clusternode* local = *ln;\n- clusternode* root = *rn;\n- int maxstart = max(root->start, local->start);\n- int maxend = max(local->end, root->end);\n- int minstart = min(root->start, local->start);\n- int minend = min(root->end, local->end);\n-\n- if( maxstart - minend <= tree->max_dist ) {\n- /* Have to merge this node and children */\n- root->start = minstart;\n- root->end = maxend;\n- root->interval_tail->next = local->interval_head;\n- root->interval_tail = local->interval_tail;\n- root->num_ivals += local->num_ivals;\n- if( local->right) cluster_fixup(tree, &(local->right), rn);\n- if( local->left) cluster_fixup(tree,'..b'ixup(tree, &(local->left), rn);\n- }\n- if(local->right) {\n- cluster_fixup(tree, &(local->right), rn);\n- }\n-}\n-\n-/* Pyrex "getregions" implements this. Only used for C debugging */\n-void clustereach(clustertree *tree, clusternode *node) {\n- interval* ival;\n- if (node == NULL) {\n- exit(1); /* Shouldn\'t happen */\n- }\n- if (node->left != NULL) {\n- clustereach(tree, node->left);\n- }\n- printf("Node: %d\\t%d\\n", node->start, node->end);\n- ival = node->interval_head;\n- while(ival) {\n- printf("\\tInterval %d: %d\\t%d\\n", ival->id, ival->start, ival->end);\n- ival = ival->next;\n- }\n- \n- if (node->right != NULL) {\n- clustereach(tree, node->right);\n- }\n-}\n-\n-void clusteritr_recursive(clustertree *tree, clusternode *node, treeitr* *itr) {\n- treeitr *newitr;\n-\n- if (node == NULL) {\n- return;\n- }\n- if (node->right != NULL) {\n- clusteritr_recursive(tree, node->right, itr);\n- }\n- if (node->num_ivals >= tree->min_intervals) {\n- newitr = ALLOC(treeitr);\n- newitr->next = *itr;\n- newitr->node = node;\n- *itr = newitr;\n- }\n- if (node->left != NULL) {\n- clusteritr_recursive(tree, node->left, itr);\n- }\n-}\n-\n-/* Create an infix iterator */\n-treeitr* clusteritr(clustertree *tree) {\n- treeitr *itr = NULL;\n- \n- clusteritr_recursive(tree, tree->root, &itr);\n- if (itr != NULL) {\n- return itr;\n- }\n- return NULL;\n-}\n-\n-/* Free an infix iterator */\n-void freeclusteritr(treeitr *itr) {\n- if (itr != NULL) {\n- \tif (itr->next != NULL) {\n- \t\tfreeclusteritr(itr->next);\n- \t}\n- \tfree(itr);\n- }\n-}\n-\n-/* Insert based on the start position of intervals */\n-clusternode* clusternode_insert(clustertree *tree, clusternode *node, int start, int end, int id) {\n- int oldstart;\n- int oldend;\n- interval* ival;\n- \n- // printf("Inserting %d %d %d\\n", start, end, id);\n- if (node == NULL) {\n- node = create_node(start, end, id);\n- \n- } else if ( (start - tree->max_dist) > node->end ) { /* We\'re to the right of this cluster */\n- node->right = clusternode_insert(tree, node->right, start, end, id);\n- if (node->priority < node->right->priority) cluster_rotateleft(&node);\n- \n- } else if ( (end + tree->max_dist) < node->start) { /* We\'re to the left of this cluster */\n- node->left = clusternode_insert(tree, node->left, start, end, id);\n- if (node->priority < node->left->priority) cluster_rotateright(&node);\n- \n- } else { /* We\'re in the range of this cluster */\n- /* Update the start and end to match to new values */\n- oldstart = node->start;\n- oldend = node->end;\n- node->start = min(start, node->start);\n- node->end = max(end, node->end);\n- ival = create_interval(start, end, id);\n- ival->next = node->interval_head; /* Add this interval as the head of the interval list */\n- node->interval_head = ival;\n- node->num_ivals += 1;\n- \n- if ( oldstart > node->start && node->left != NULL ) { /* New interval added to the start, and there\'s a left child */\n- cluster_fixup(tree, &(node->left), &node);\n- }\n- if ( oldend < node->end && node->right != NULL ) { /* New interval added to the end, and there\'s a right child */\n- cluster_fixup(tree, &(node->right), &node);\n- }\n- }\n- return node;\n-}\n-\n-int main() {\n- \n- // Simple test\n- clustertree* tree = create_clustertree(0, 1);\n- \n- tree->root = clusternode_insert(tree, tree->root, 3, 4, 0);\n- tree->root = clusternode_insert(tree, tree->root, 6, 7, 1);\n- tree->root = clusternode_insert(tree, tree->root, 9, 10, 2);\n- tree->root = clusternode_insert(tree, tree->root, 1, 2, 3);\n- tree->root = clusternode_insert(tree, tree->root, 3, 8, 4);\n- \n- clustereach(tree, tree->root);\n- return 0;\n- \n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/bx/intervalcluster.h --- a/chimerascan/bx/intervalcluster.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,44 +0,0 @@ -/* - Downloaded from: - https://bitbucket.org/james_taylor/bx-python/wiki/Home -*/ - -typedef struct struct_interval { - int start; - int end; - int id; - - struct struct_interval *next; -} interval; - -typedef struct struct_clusternode { - int start; - int end; - int priority; - - struct struct_interval *interval_head; - struct struct_interval *interval_tail; - int num_ivals; - - struct struct_clusternode *left; - struct struct_clusternode *right; -} clusternode; - -typedef struct { - int max_dist; - int min_intervals; - - clusternode *root; -} clustertree; - -typedef struct struct_treeitr { - struct struct_treeitr *next; - struct struct_clusternode *node; -} treeitr; - - -clusternode* clusternode_insert(clustertree *tree, clusternode *node, int start, int end, int id); -clustertree* create_clustertree(int max_dist, int min_intervals); -treeitr* clusteritr(clustertree *tree); -void freeclusteritr(treeitr *itr); -void free_tree(clustertree *tree); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan.xml Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,56 @@ +<tool id="chimerascan" name="ChimeraScan"> + <description>A tool for identifying chimeric transcription in sequencing data.</description> + <command detect_errors="exit_code"><![CDATA[ + bash $__tool_directory__/run.sh $__tool_directory__ && + python $__tool_directory__/chimerascan_run.py -p 8 $__tool_directory__/myindex + #if $input_type_conditional.chimerascan_input_type == "paired" + $input_type_conditional.input_1 $input_type_conditional.input_2 + #else + $input_type_conditional.input.forward $input_type_conditional.input.reverse + #end if + $galaxy_output + ]]> + </command> + <inputs> + <conditional name="input_type_conditional"> + <param name="chimerascan_input_type" type="select" label="Input Type" help="Select between paired and paired collection"> + <option value="paired" selected="true">Paired</option> + <option value="paired_collection">Paired Collection</option> + </param> + <when value="paired"> + <param format='fastq' name='input_1' type='data' label='FASTQ file, forward reads' /> + <param format='fastq' name='input_2' type='data' label='FASTQ file, reverse reads' /> + </when> + <when value="paired_collection"> + <param format="fastq" name='input' type="data_collection" collection_type="paired" label="Select a paired collection" help="Specify paired dataset collection containing paired reads"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="galaxy_output" format="bed" /> + </outputs> + + <tests> + <test> + <param name="input1" value="input1.fastq"/> + <param name="input2" value="input2.fastq"/> + <output name="galaxy_output" file="outputfile.bed" ftype="bed"/> + </test> + <test> + <param name="fastq_input"> + <collection type="paired"> + <element name="forward" value="input1.fastq" /> + <element name="reverse" value="input2.fastq" /> + </collection> + </param> + <param name="input_type" value="paired_collection" /> + <output name="galaxy_output" file="outputfile.bed" ftype="bed"/> + </test> + </tests> + + <help> + Bowtie index files must be placed inside 'myindex folder' + A tool for identifying chimeric transcription in sequencing data. + </help> + +</tool> |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/__init__.py Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,2 @@ +# chimerascan versioning information +__version__ = "0.4.6" |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/bx/cluster.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/bx/cluster.c Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,2316 @@\n+/* Generated by Cython 0.13 on Thu Feb 3 22:15:38 2011 */\n+\n+#define PY_SSIZE_T_CLEAN\n+#include "Python.h"\n+#ifndef Py_PYTHON_H\n+ #error Python headers needed to compile C extensions, please install development version of Python.\n+#else\n+\n+#include <stddef.h> /* For offsetof */\n+#ifndef offsetof\n+#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )\n+#endif\n+\n+#if !defined(WIN32) && !defined(MS_WINDOWS)\n+ #ifndef __stdcall\n+ #define __stdcall\n+ #endif\n+ #ifndef __cdecl\n+ #define __cdecl\n+ #endif\n+ #ifndef __fastcall\n+ #define __fastcall\n+ #endif\n+#endif\n+\n+#ifndef DL_IMPORT\n+ #define DL_IMPORT(t) t\n+#endif\n+#ifndef DL_EXPORT\n+ #define DL_EXPORT(t) t\n+#endif\n+\n+#ifndef PY_LONG_LONG\n+ #define PY_LONG_LONG LONG_LONG\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02040000\n+ #define METH_COEXIST 0\n+ #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type)\n+ #define PyDict_Contains(d,o) PySequence_Contains(d,o)\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02050000\n+ typedef int Py_ssize_t;\n+ #define PY_SSIZE_T_MAX INT_MAX\n+ #define PY_SSIZE_T_MIN INT_MIN\n+ #define PY_FORMAT_SIZE_T ""\n+ #define PyInt_FromSsize_t(z) PyInt_FromLong(z)\n+ #define PyInt_AsSsize_t(o) PyInt_AsLong(o)\n+ #define PyNumber_Index(o) PyNumber_Int(o)\n+ #define PyIndex_Check(o) PyNumber_Check(o)\n+ #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02060000\n+ #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)\n+ #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)\n+ #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)\n+ #define PyVarObject_HEAD_INIT(type, size) \\\n+ PyObject_HEAD_INIT(type) size,\n+ #define PyType_Modified(t)\n+\n+ typedef struct {\n+ void *buf;\n+ PyObject *obj;\n+ Py_ssize_t len;\n+ Py_ssize_t itemsize;\n+ int readonly;\n+ int ndim;\n+ char *format;\n+ Py_ssize_t *shape;\n+ Py_ssize_t *strides;\n+ Py_ssize_t *suboffsets;\n+ void *internal;\n+ } Py_buffer;\n+\n+ #define PyBUF_SIMPLE 0\n+ #define PyBUF_WRITABLE 0x0001\n+ #define PyBUF_FORMAT 0x0004\n+ #define PyBUF_ND 0x0008\n+ #define PyBUF_STRIDES (0x0010 | PyBUF_ND)\n+ #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)\n+ #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)\n+ #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)\n+ #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)\n+\n+#endif\n+\n+#if PY_MAJOR_VERSION < 3\n+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"\n+#else\n+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"\n+#endif\n+\n+#if PY_MAJOR_VERSION >= 3\n+ #define Py_TPFLAGS_CHECKTYPES 0\n+ #define Py_TPFLAGS_HAVE_INDEX 0\n+#endif\n+\n+#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)\n+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0\n+#endif\n+\n+#if PY_MAJOR_VERSION >= 3\n+ #define PyBaseString_Type PyUnicode_Type\n+ #define PyStringObject PyUnicodeObject\n+ #define PyString_Type PyUnicode_Type\n+ #define PyString_Check PyUnicode_Check\n+ #define PyString_CheckExact PyUnicode_CheckExact\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02060000\n+ #define PyBytesObject PyStringObject\n+ #define PyBytes_Type PyString_Type\n+ #define PyBytes_Check PyString_Check\n+ #define PyBytes_CheckExact PyString_CheckExact\n+ #define PyBytes_FromString PyString_FromString\n+ #define PyBytes_FromStringAndSize PyString_FromStringAndSize\n+ #define PyBytes_FromFormat PyString_FromFormat\n+ #define PyBytes_DecodeEscape PyString_DecodeEscape\n+ #define PyBytes_AsString PyString_AsString\n+ #define PyBytes_AsStringAndSize PyString_AsStringAndSize\n+ #define PyBytes_Size PyString_Size\n+ #define PyBytes_AS_STRING PyString_AS_STRING\n+ #define PyBytes_GET_SIZE PyString_GET_SIZE\n+ #define PyBytes_Repr PyString_Repr\n+ #define PyBytes_Concat '..b'\n+ if (!py_code) goto bad;\n+ py_frame = PyFrame_New(\n+ PyThreadState_GET(), /*PyThreadState *tstate,*/\n+ py_code, /*PyCodeObject *code,*/\n+ py_globals, /*PyObject *globals,*/\n+ 0 /*PyObject *locals*/\n+ );\n+ if (!py_frame) goto bad;\n+ py_frame->f_lineno = __pyx_lineno;\n+ PyTraceBack_Here(py_frame);\n+bad:\n+ Py_XDECREF(py_srcfile);\n+ Py_XDECREF(py_funcname);\n+ Py_XDECREF(py_code);\n+ Py_XDECREF(py_frame);\n+}\n+\n+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {\n+ while (t->p) {\n+ #if PY_MAJOR_VERSION < 3\n+ if (t->is_unicode) {\n+ *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);\n+ } else if (t->intern) {\n+ *t->p = PyString_InternFromString(t->s);\n+ } else {\n+ *t->p = PyString_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ #else /* Python 3+ has unicode identifiers */\n+ if (t->is_unicode | t->is_str) {\n+ if (t->intern) {\n+ *t->p = PyUnicode_InternFromString(t->s);\n+ } else if (t->encoding) {\n+ *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);\n+ } else {\n+ *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ } else {\n+ *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ #endif\n+ if (!*t->p)\n+ return -1;\n+ ++t;\n+ }\n+ return 0;\n+}\n+\n+/* Type Conversion Functions */\n+\n+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {\n+ int is_true = x == Py_True;\n+ if (is_true | (x == Py_False) | (x == Py_None)) return is_true;\n+ else return PyObject_IsTrue(x);\n+}\n+\n+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {\n+ PyNumberMethods *m;\n+ const char *name = NULL;\n+ PyObject *res = NULL;\n+#if PY_VERSION_HEX < 0x03000000\n+ if (PyInt_Check(x) || PyLong_Check(x))\n+#else\n+ if (PyLong_Check(x))\n+#endif\n+ return Py_INCREF(x), x;\n+ m = Py_TYPE(x)->tp_as_number;\n+#if PY_VERSION_HEX < 0x03000000\n+ if (m && m->nb_int) {\n+ name = "int";\n+ res = PyNumber_Int(x);\n+ }\n+ else if (m && m->nb_long) {\n+ name = "long";\n+ res = PyNumber_Long(x);\n+ }\n+#else\n+ if (m && m->nb_int) {\n+ name = "int";\n+ res = PyNumber_Long(x);\n+ }\n+#endif\n+ if (res) {\n+#if PY_VERSION_HEX < 0x03000000\n+ if (!PyInt_Check(res) && !PyLong_Check(res)) {\n+#else\n+ if (!PyLong_Check(res)) {\n+#endif\n+ PyErr_Format(PyExc_TypeError,\n+ "__%s__ returned non-%s (type %.200s)",\n+ name, name, Py_TYPE(res)->tp_name);\n+ Py_DECREF(res);\n+ return NULL;\n+ }\n+ }\n+ else if (!PyErr_Occurred()) {\n+ PyErr_SetString(PyExc_TypeError,\n+ "an integer is required");\n+ }\n+ return res;\n+}\n+\n+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {\n+ Py_ssize_t ival;\n+ PyObject* x = PyNumber_Index(b);\n+ if (!x) return -1;\n+ ival = PyInt_AsSsize_t(x);\n+ Py_DECREF(x);\n+ return ival;\n+}\n+\n+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {\n+#if PY_VERSION_HEX < 0x02050000\n+ if (ival <= LONG_MAX)\n+ return PyInt_FromLong((long)ival);\n+ else {\n+ unsigned char *bytes = (unsigned char *) &ival;\n+ int one = 1; int little = (int)*(unsigned char*)&one;\n+ return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);\n+ }\n+#else\n+ return PyInt_FromSize_t(ival);\n+#endif\n+}\n+\n+static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {\n+ unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);\n+ if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {\n+ return (size_t)-1;\n+ } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {\n+ PyErr_SetString(PyExc_OverflowError,\n+ "value too large to convert to size_t");\n+ return (size_t)-1;\n+ }\n+ return (size_t)val;\n+}\n+\n+\n+#endif /* Py_PYTHON_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/bx/cluster.pyx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/bx/cluster.pyx Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,125 @@ +""" +Downloaded from: +https://bitbucket.org/james_taylor/bx-python/wiki/Home + +Kanwei Li, 2009 +Inspired by previous ClusterTree + +Provides a ClusterTree data structure that supports efficient finding of +clusters of intervals that are within a certain distance apart. + +This clustering algorithm uses a binary tree structure. Nodes correspond to +non-overlapping intervals, where overlapping means that the distance between +two intervals is less or equal to the max separation. + +The tree self-balances using rotations based on the binomial sequence. Merges +among nodes are performed whenever a node is changed/added that will cause other +nodes to form a new cluster. + +C source code is in intervalcluster.c +""" + +cdef extern from "intervalcluster.h": + + cdef struct struct_interval: + int start + int end + int id + struct_interval * next + + ctypedef struct_interval interval + + cdef struct struct_clusternode: + int start + int end + struct_interval *interval_head + struct_interval *interval_tail + + ctypedef struct_clusternode clusternode + + cdef struct struct_clustertree: + int max_dist + int min_intervals + + struct_clusternode *root + + ctypedef struct_clustertree clustertree + + cdef struct struct_treeitr: + struct_treeitr *next + struct_clusternode *node + + ctypedef struct_treeitr treeitr + + clusternode* clusternode_insert(clustertree *tree, clusternode *node, int start, int end, int id) + clustertree* create_clustertree(int max_dist, int min_intervals) + treeitr* clusteritr(clustertree *tree) + void freeclusteritr(treeitr *itr) + void free_tree(clustertree *tree) + +cdef class ClusterTree: + cdef clustertree *tree + cdef int mincols + cdef int minregions + + def __cinit__(self, mincols, minregions): + self.tree = create_clustertree(mincols, minregions) + self.mincols = mincols + self.minregions = minregions + + def __dealloc__(self): + free_tree(self.tree) + + def insert(self, s, e, id): + ''' Insert an interval with start, end, id as parameters''' + if s > e: raise ValueError("Interval start must be before end") + self.tree.root = clusternode_insert(self.tree, self.tree.root, s, e, id) + + def getregions(self): + ''' Returns a list clusters in ascending order of starting position. + Each cluster is a tuple of (start, end, [sorted ids of intervals in cluster]) + + tree = ClusterTree(0, 0) + Insert (6, 7, 1), (1, 2, 3), (9, 10, 2), (3, 4, 0), (3, 8, 4) + tree.getregions() returns [(1, 2, [3]), (3, 8, [0, 1, 4]), (9, 10, [2])] + ''' + cdef treeitr *itr + cdef treeitr *head + cdef interval *ival + + regions = [] + head = clusteritr(self.tree) + itr = head + while (itr): + ids = [] + ival = itr.node.interval_head + while (ival): + ids.append(ival.id) + ival = ival.next + + regions.append( (itr.node.start, itr.node.end, sorted(ids)) ) + itr = itr.next + freeclusteritr(head) + return regions + + def getlines(self): + ''' Similar to getregions except it just returns a list of ids of intervals + The above example would return [3, 0, 1, 4, 2] + ''' + cdef treeitr *itr + cdef interval *ival + + lines = [] + itr = clusteritr(self.tree) + + while (itr): + ids = [] + ival = itr.node.interval_head + while (ival): + ids.append(ival.id) + ival = ival.next + + lines.extend(sorted(ids)) + itr = itr.next + return lines + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/bx/intersection.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/bx/intersection.c Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,7517 @@\n+/* Generated by Cython 0.13 on Thu Feb 3 22:15:44 2011 */\n+\n+#define PY_SSIZE_T_CLEAN\n+#include "Python.h"\n+#ifndef Py_PYTHON_H\n+ #error Python headers needed to compile C extensions, please install development version of Python.\n+#else\n+\n+#include <stddef.h> /* For offsetof */\n+#ifndef offsetof\n+#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )\n+#endif\n+\n+#if !defined(WIN32) && !defined(MS_WINDOWS)\n+ #ifndef __stdcall\n+ #define __stdcall\n+ #endif\n+ #ifndef __cdecl\n+ #define __cdecl\n+ #endif\n+ #ifndef __fastcall\n+ #define __fastcall\n+ #endif\n+#endif\n+\n+#ifndef DL_IMPORT\n+ #define DL_IMPORT(t) t\n+#endif\n+#ifndef DL_EXPORT\n+ #define DL_EXPORT(t) t\n+#endif\n+\n+#ifndef PY_LONG_LONG\n+ #define PY_LONG_LONG LONG_LONG\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02040000\n+ #define METH_COEXIST 0\n+ #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type)\n+ #define PyDict_Contains(d,o) PySequence_Contains(d,o)\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02050000\n+ typedef int Py_ssize_t;\n+ #define PY_SSIZE_T_MAX INT_MAX\n+ #define PY_SSIZE_T_MIN INT_MIN\n+ #define PY_FORMAT_SIZE_T ""\n+ #define PyInt_FromSsize_t(z) PyInt_FromLong(z)\n+ #define PyInt_AsSsize_t(o) PyInt_AsLong(o)\n+ #define PyNumber_Index(o) PyNumber_Int(o)\n+ #define PyIndex_Check(o) PyNumber_Check(o)\n+ #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02060000\n+ #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)\n+ #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)\n+ #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)\n+ #define PyVarObject_HEAD_INIT(type, size) \\\n+ PyObject_HEAD_INIT(type) size,\n+ #define PyType_Modified(t)\n+\n+ typedef struct {\n+ void *buf;\n+ PyObject *obj;\n+ Py_ssize_t len;\n+ Py_ssize_t itemsize;\n+ int readonly;\n+ int ndim;\n+ char *format;\n+ Py_ssize_t *shape;\n+ Py_ssize_t *strides;\n+ Py_ssize_t *suboffsets;\n+ void *internal;\n+ } Py_buffer;\n+\n+ #define PyBUF_SIMPLE 0\n+ #define PyBUF_WRITABLE 0x0001\n+ #define PyBUF_FORMAT 0x0004\n+ #define PyBUF_ND 0x0008\n+ #define PyBUF_STRIDES (0x0010 | PyBUF_ND)\n+ #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)\n+ #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)\n+ #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)\n+ #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)\n+\n+#endif\n+\n+#if PY_MAJOR_VERSION < 3\n+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"\n+#else\n+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"\n+#endif\n+\n+#if PY_MAJOR_VERSION >= 3\n+ #define Py_TPFLAGS_CHECKTYPES 0\n+ #define Py_TPFLAGS_HAVE_INDEX 0\n+#endif\n+\n+#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)\n+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0\n+#endif\n+\n+#if PY_MAJOR_VERSION >= 3\n+ #define PyBaseString_Type PyUnicode_Type\n+ #define PyStringObject PyUnicodeObject\n+ #define PyString_Type PyUnicode_Type\n+ #define PyString_Check PyUnicode_Check\n+ #define PyString_CheckExact PyUnicode_CheckExact\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02060000\n+ #define PyBytesObject PyStringObject\n+ #define PyBytes_Type PyString_Type\n+ #define PyBytes_Check PyString_Check\n+ #define PyBytes_CheckExact PyString_CheckExact\n+ #define PyBytes_FromString PyString_FromString\n+ #define PyBytes_FromStringAndSize PyString_FromStringAndSize\n+ #define PyBytes_FromFormat PyString_FromFormat\n+ #define PyBytes_DecodeEscape PyString_DecodeEscape\n+ #define PyBytes_AsString PyString_AsString\n+ #define PyBytes_AsStringAndSize PyString_AsStringAndSize\n+ #define PyBytes_Size PyString_Size\n+ #define PyBytes_AS_STRING PyString_AS_STRING\n+ #define PyBytes_GET_SIZE PyString_GET_SIZE\n+ #define PyBytes_Repr PyString_Repr\n+ #define PyBytes_Concat '..b'\n+ if (!py_code) goto bad;\n+ py_frame = PyFrame_New(\n+ PyThreadState_GET(), /*PyThreadState *tstate,*/\n+ py_code, /*PyCodeObject *code,*/\n+ py_globals, /*PyObject *globals,*/\n+ 0 /*PyObject *locals*/\n+ );\n+ if (!py_frame) goto bad;\n+ py_frame->f_lineno = __pyx_lineno;\n+ PyTraceBack_Here(py_frame);\n+bad:\n+ Py_XDECREF(py_srcfile);\n+ Py_XDECREF(py_funcname);\n+ Py_XDECREF(py_code);\n+ Py_XDECREF(py_frame);\n+}\n+\n+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {\n+ while (t->p) {\n+ #if PY_MAJOR_VERSION < 3\n+ if (t->is_unicode) {\n+ *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);\n+ } else if (t->intern) {\n+ *t->p = PyString_InternFromString(t->s);\n+ } else {\n+ *t->p = PyString_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ #else /* Python 3+ has unicode identifiers */\n+ if (t->is_unicode | t->is_str) {\n+ if (t->intern) {\n+ *t->p = PyUnicode_InternFromString(t->s);\n+ } else if (t->encoding) {\n+ *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);\n+ } else {\n+ *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ } else {\n+ *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ #endif\n+ if (!*t->p)\n+ return -1;\n+ ++t;\n+ }\n+ return 0;\n+}\n+\n+/* Type Conversion Functions */\n+\n+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {\n+ int is_true = x == Py_True;\n+ if (is_true | (x == Py_False) | (x == Py_None)) return is_true;\n+ else return PyObject_IsTrue(x);\n+}\n+\n+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {\n+ PyNumberMethods *m;\n+ const char *name = NULL;\n+ PyObject *res = NULL;\n+#if PY_VERSION_HEX < 0x03000000\n+ if (PyInt_Check(x) || PyLong_Check(x))\n+#else\n+ if (PyLong_Check(x))\n+#endif\n+ return Py_INCREF(x), x;\n+ m = Py_TYPE(x)->tp_as_number;\n+#if PY_VERSION_HEX < 0x03000000\n+ if (m && m->nb_int) {\n+ name = "int";\n+ res = PyNumber_Int(x);\n+ }\n+ else if (m && m->nb_long) {\n+ name = "long";\n+ res = PyNumber_Long(x);\n+ }\n+#else\n+ if (m && m->nb_int) {\n+ name = "int";\n+ res = PyNumber_Long(x);\n+ }\n+#endif\n+ if (res) {\n+#if PY_VERSION_HEX < 0x03000000\n+ if (!PyInt_Check(res) && !PyLong_Check(res)) {\n+#else\n+ if (!PyLong_Check(res)) {\n+#endif\n+ PyErr_Format(PyExc_TypeError,\n+ "__%s__ returned non-%s (type %.200s)",\n+ name, name, Py_TYPE(res)->tp_name);\n+ Py_DECREF(res);\n+ return NULL;\n+ }\n+ }\n+ else if (!PyErr_Occurred()) {\n+ PyErr_SetString(PyExc_TypeError,\n+ "an integer is required");\n+ }\n+ return res;\n+}\n+\n+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {\n+ Py_ssize_t ival;\n+ PyObject* x = PyNumber_Index(b);\n+ if (!x) return -1;\n+ ival = PyInt_AsSsize_t(x);\n+ Py_DECREF(x);\n+ return ival;\n+}\n+\n+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {\n+#if PY_VERSION_HEX < 0x02050000\n+ if (ival <= LONG_MAX)\n+ return PyInt_FromLong((long)ival);\n+ else {\n+ unsigned char *bytes = (unsigned char *) &ival;\n+ int one = 1; int little = (int)*(unsigned char*)&one;\n+ return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);\n+ }\n+#else\n+ return PyInt_FromSize_t(ival);\n+#endif\n+}\n+\n+static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {\n+ unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);\n+ if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {\n+ return (size_t)-1;\n+ } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {\n+ PyErr_SetString(PyExc_OverflowError,\n+ "value too large to convert to size_t");\n+ return (size_t)-1;\n+ }\n+ return (size_t)val;\n+}\n+\n+\n+#endif /* Py_PYTHON_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/bx/intersection.pyx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/bx/intersection.pyx Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,472 @@\n+"""\n+Downloaded from:\n+https://bitbucket.org/james_taylor/bx-python/wiki/Home\n+\n+Data structure for performing intersect queries on a set of intervals which\n+preserves all information about the intervals (unlike bitset projection methods).\n+\n+:Authors: James Taylor (james@jamestaylor.org),\n+ Ian Schenk (ian.schenck@gmail.com),\n+ Brent Pedersen (bpederse@gmail.com)\n+"""\n+\n+# Historical note:\n+# This module original contained an implementation based on sorted endpoints\n+# and a binary search, using an idea from Scott Schwartz and Piotr Berman.\n+# Later an interval tree implementation was implemented by Ian for Galaxy\'s\n+# join tool (see `bx.intervals.operations.quicksect.py`). This was then\n+# converted to Cython by Brent, who also added support for\n+# upstream/downstream/neighbor queries. This was modified by James to\n+# handle half-open intervals strictly, to maintain sort order, and to\n+# implement the same interface as the original Intersecter.\n+\n+import operator\n+\n+cdef extern from "stdlib.h":\n+ int ceil(float f)\n+ float log(float f)\n+ int RAND_MAX\n+ int rand()\n+ int strlen(char *)\n+ int iabs(int)\n+\n+cdef inline int imax2(int a, int b):\n+ if b > a: return b\n+ return a\n+\n+cdef inline int imax3(int a, int b, int c):\n+ if b > a:\n+ if c > b:\n+ return c\n+ return b\n+ if a > c:\n+ return a\n+ return c\n+\n+cdef inline int imin3(int a, int b, int c):\n+ if b < a:\n+ if c < b:\n+ return c\n+ return b\n+ if a < c:\n+ return a\n+ return c\n+\n+cdef inline int imin2(int a, int b):\n+ if b < a: return b\n+ return a\n+\n+cdef float nlog = -1.0 / log(0.5)\n+\n+cdef class IntervalNode:\n+ """\n+ A single node of an `IntervalTree`.\n+ \n+ NOTE: Unless you really know what you are doing, you probably should us\n+ `IntervalTree` rather than using this directly. \n+ """\n+ cdef float priority\n+ cdef public object interval\n+ cdef public int start, end\n+ cdef int minend, maxend, minstart\n+ cdef IntervalNode cleft, cright, croot\n+\n+ property left_node:\n+ def __get__(self):\n+ return self.cleft if self.cleft is not EmptyNode else None\n+ property right_node:\n+ def __get__(self):\n+ return self.cright if self.cright is not EmptyNode else None\n+ property root_node:\n+ def __get__(self):\n+ return self.croot if self.croot is not EmptyNode else None\n+ \n+ def __repr__(self):\n+ return "IntervalNode(%i, %i)" % (self.start, self.end)\n+\n+ def __cinit__(IntervalNode self, int start, int end, object interval):\n+ # Python lacks the binomial distribution, so we convert a\n+ # uniform into a binomial because it naturally scales with\n+ # tree size. Also, python\'s uniform is perfect since the\n+ # upper limit is not inclusive, which gives us undefined here.\n+ self.priority = ceil(nlog * log(-1.0/(1.0 * rand()/RAND_MAX - 1)))\n+ self.start = start\n+ self.end = end\n+ self.interval = interval\n+ self.maxend = end\n+ self.minstart = start\n+ self.minend = end\n+ self.cleft = EmptyNode\n+ self.cright = EmptyNode\n+ self.croot = EmptyNode\n+ \n+ cpdef IntervalNode insert(IntervalNode self, int start, int end, object interval):\n+ """\n+ Insert a new IntervalNode into the tree of which this node is\n+ currently the root. The return value is the new root of the tree (which\n+ may or may not be this node!)\n+ """\n+ cdef IntervalNode croot = self\n+ # If starts are the same, decide which to add interval to based on\n+ # end, thus maintaining sortedness relative to start/end\n+ cdef int decision_endpoint = start\n+ if start == self.start:\n+ decision_endpoint = end\n+ \n+ if decision_endpoint > self.start:\n+ #'..b'(1, 2, strand="-"), num_intervals=3)\n+ [Interval(3, 7), Interval(3, 40), Interval(13, 50)]\n+\n+ \n+ """\n+ \n+ cdef IntervalNode root\n+ \n+ def __cinit__( self ):\n+ root = None\n+ \n+ # ---- Position based interfaces -----------------------------------------\n+ \n+ def insert( self, int start, int end, object value=None ):\n+ """\n+ Insert the interval [start,end) associated with value `value`.\n+ """\n+ if self.root is None:\n+ self.root = IntervalNode( start, end, value )\n+ else:\n+ self.root = self.root.insert( start, end, value )\n+ \n+ add = insert\n+\n+\n+ def find( self, start, end ):\n+ """\n+ Return a sorted list of all intervals overlapping [start,end).\n+ """\n+ if self.root is None:\n+ return []\n+ return self.root.find( start, end )\n+ \n+ def before( self, position, num_intervals=1, max_dist=2500 ):\n+ """\n+ Find `num_intervals` intervals that lie before `position` and are no\n+ further than `max_dist` positions away\n+ """\n+ if self.root is None:\n+ return []\n+ return self.root.left( position, num_intervals, max_dist )\n+\n+ def after( self, position, num_intervals=1, max_dist=2500 ):\n+ """\n+ Find `num_intervals` intervals that lie after `position` and are no\n+ further than `max_dist` positions away\n+ """\n+ if self.root is None:\n+ return []\n+ return self.root.right( position, num_intervals, max_dist )\n+\n+ # ---- Interval-like object based interfaces -----------------------------\n+\n+ def insert_interval( self, interval ):\n+ """\n+ Insert an "interval" like object (one with at least start and end\n+ attributes)\n+ """\n+ self.insert( interval.start, interval.end, interval )\n+\n+ add_interval = insert_interval\n+\n+ def before_interval( self, interval, num_intervals=1, max_dist=2500 ):\n+ """\n+ Find `num_intervals` intervals that lie completely before `interval`\n+ and are no further than `max_dist` positions away\n+ """\n+ if self.root is None:\n+ return []\n+ return self.root.left( interval.start, num_intervals, max_dist )\n+\n+ def after_interval( self, interval, num_intervals=1, max_dist=2500 ):\n+ """\n+ Find `num_intervals` intervals that lie completely after `interval` and\n+ are no further than `max_dist` positions away\n+ """\n+ if self.root is None:\n+ return []\n+ return self.root.right( interval.end, num_intervals, max_dist )\n+\n+ def upstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ):\n+ """\n+ Find `num_intervals` intervals that lie completely upstream of\n+ `interval` and are no further than `max_dist` positions away\n+ """\n+ if self.root is None:\n+ return []\n+ if interval.strand == -1 or interval.strand == "-":\n+ return self.root.right( interval.end, num_intervals, max_dist )\n+ else:\n+ return self.root.left( interval.start, num_intervals, max_dist )\n+\n+ def downstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ):\n+ """\n+ Find `num_intervals` intervals that lie completely downstream of\n+ `interval` and are no further than `max_dist` positions away\n+ """\n+ if self.root is None:\n+ return []\n+ if interval.strand == -1 or interval.strand == "-":\n+ return self.root.left( interval.start, num_intervals, max_dist )\n+ else:\n+ return self.root.right( interval.end, num_intervals, max_dist )\n+ \n+ def traverse(self, fn):\n+ """\n+ call fn for each element in the tree\n+ """\n+ if self.root is None:\n+ return None\n+ return self.root.traverse(fn)\n+\n+# For backward compatibility\n+Intersecter = IntervalTree\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/bx/intervalcluster.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/bx/intervalcluster.c Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,268 @@\n+/*\n+\tDownloaded from:\n+\thttps://bitbucket.org/james_taylor/bx-python/wiki/Home\n+\n+ Kanwei Li, 2009\n+ Inspired by previous ClusterTree\n+ \n+ This clustering algorithm uses a binary tree structure. Nodes correspond to \n+ non-overlapping intervals, where overlapping means that the distance between\n+ two intervals is less or equal to max_dist, which is the max separation.\n+ \n+ The tree self-balances using rotations based on the binomial sequence. Merges\n+ among nodes are performed whenever a node is changed/added that will cause other\n+ nodes to form a new cluster.\n+*/\n+#include <stdlib.h>\n+#include <stdio.h>\n+#include <math.h>\n+#include "intervalcluster.h"\n+\n+#define ALLOC(pt) (malloc(sizeof(pt)))\n+\n+static int min(int a, int b) {\n+ if( a < b )\n+ return a;\n+ else\n+ return b;\n+}\n+\n+static int max(int a, int b) {\n+ if( a > b )\n+ return a;\n+ else\n+ return b;\n+}\n+\n+/* Create new tree with given max_dist (max distance between intervals to be\n+ considered a cluster), and min_intervals, the minimum number of intervals\n+ needed for a cluster to be considered significant */\n+clustertree* create_clustertree(int max_dist, int min_intervals) {\n+ clustertree *tree = ALLOC(clustertree);\n+ tree->max_dist = max_dist;\n+ tree->min_intervals = min_intervals;\n+ tree->root = NULL;\n+ return tree;\n+}\n+\n+static interval* create_interval(int start, int end, int id) {\n+ interval *ival = ALLOC(interval);\n+ \n+ ival->start = start;\n+ ival->end = end;\n+ ival->id = id;\n+ ival->next = NULL;\n+ return ival;\n+}\n+\n+static clusternode* create_node(int start, int end, int id) {\n+ clusternode *new_node = ALLOC(clusternode);\n+ \n+ new_node->start = start;\n+ new_node->end = end;\n+ new_node->interval_head = create_interval(start, end, id);\n+ new_node->interval_tail = new_node->interval_head;\n+ new_node->num_ivals = 1;\n+ new_node->left = NULL;\n+ new_node->right = NULL;\n+ \n+ double uniform = ((double)rand()) / (RAND_MAX);\n+ if (uniform == 1.0)\n+ uniform = 0;\n+ new_node->priority = (int)ceil( (-1.0 / log(.5)) * log( -1.0 / (uniform - 1)));\n+ \n+ return new_node;\n+}\n+\n+static void recursively_free_intervals(interval *ival) {\n+ interval *next;\n+ if(ival) {\n+ next = ival->next;\n+ free(ival);\n+ recursively_free_intervals(next);\n+ }\n+}\n+\n+static void recursively_free_nodes(clusternode *node) {\n+ if(node) {\n+ recursively_free_nodes(node->left);\n+ recursively_free_nodes(node->right);\n+ recursively_free_intervals(node->interval_head);\n+ free(node);\n+ }\n+}\n+\n+void free_tree(clustertree *tree) {\n+ recursively_free_nodes(tree->root);\n+ free(tree);\n+}\n+\n+void cluster_rotateright(clusternode **node) {\n+ clusternode* root = (*node)->left;\n+ (*node)->left = (*node)->left->right;\n+ root->right = (*node);\n+ *node = root;\n+}\n+\n+void cluster_rotateleft(clusternode **node) {\n+ clusternode* root = (*node)->right;\n+ (*node)->right = (*node)->right->left;\n+ root->left = (*node);\n+ *node = root;\n+}\n+\n+/* Go down the tree and merge nodes if necessary */\n+void cluster_fixup(clustertree *tree, clusternode **ln, clusternode **rn) {\n+ clusternode* local = *ln;\n+ clusternode* root = *rn;\n+ int maxstart = max(root->start, local->start);\n+ int maxend = max(local->end, root->end);\n+ int minstart = min(root->start, local->start);\n+ int minend = min(root->end, local->end);\n+\n+ if( maxstart - minend <= tree->max_dist ) {\n+ /* Have to merge this node and children */\n+ root->start = minstart;\n+ root->end = maxend;\n+ root->interval_tail->next = local->interval_head;\n+ root->interval_tail = local->interval_tail;\n+ root->num_ivals += local->num_ivals;\n+ if( local->right) cluster_fixup(tree, &(local->right), rn);\n+ if( local->left) cluster_fixup(tree,'..b'ixup(tree, &(local->left), rn);\n+ }\n+ if(local->right) {\n+ cluster_fixup(tree, &(local->right), rn);\n+ }\n+}\n+\n+/* Pyrex "getregions" implements this. Only used for C debugging */\n+void clustereach(clustertree *tree, clusternode *node) {\n+ interval* ival;\n+ if (node == NULL) {\n+ exit(1); /* Shouldn\'t happen */\n+ }\n+ if (node->left != NULL) {\n+ clustereach(tree, node->left);\n+ }\n+ printf("Node: %d\\t%d\\n", node->start, node->end);\n+ ival = node->interval_head;\n+ while(ival) {\n+ printf("\\tInterval %d: %d\\t%d\\n", ival->id, ival->start, ival->end);\n+ ival = ival->next;\n+ }\n+ \n+ if (node->right != NULL) {\n+ clustereach(tree, node->right);\n+ }\n+}\n+\n+void clusteritr_recursive(clustertree *tree, clusternode *node, treeitr* *itr) {\n+ treeitr *newitr;\n+\n+ if (node == NULL) {\n+ return;\n+ }\n+ if (node->right != NULL) {\n+ clusteritr_recursive(tree, node->right, itr);\n+ }\n+ if (node->num_ivals >= tree->min_intervals) {\n+ newitr = ALLOC(treeitr);\n+ newitr->next = *itr;\n+ newitr->node = node;\n+ *itr = newitr;\n+ }\n+ if (node->left != NULL) {\n+ clusteritr_recursive(tree, node->left, itr);\n+ }\n+}\n+\n+/* Create an infix iterator */\n+treeitr* clusteritr(clustertree *tree) {\n+ treeitr *itr = NULL;\n+ \n+ clusteritr_recursive(tree, tree->root, &itr);\n+ if (itr != NULL) {\n+ return itr;\n+ }\n+ return NULL;\n+}\n+\n+/* Free an infix iterator */\n+void freeclusteritr(treeitr *itr) {\n+ if (itr != NULL) {\n+ \tif (itr->next != NULL) {\n+ \t\tfreeclusteritr(itr->next);\n+ \t}\n+ \tfree(itr);\n+ }\n+}\n+\n+/* Insert based on the start position of intervals */\n+clusternode* clusternode_insert(clustertree *tree, clusternode *node, int start, int end, int id) {\n+ int oldstart;\n+ int oldend;\n+ interval* ival;\n+ \n+ // printf("Inserting %d %d %d\\n", start, end, id);\n+ if (node == NULL) {\n+ node = create_node(start, end, id);\n+ \n+ } else if ( (start - tree->max_dist) > node->end ) { /* We\'re to the right of this cluster */\n+ node->right = clusternode_insert(tree, node->right, start, end, id);\n+ if (node->priority < node->right->priority) cluster_rotateleft(&node);\n+ \n+ } else if ( (end + tree->max_dist) < node->start) { /* We\'re to the left of this cluster */\n+ node->left = clusternode_insert(tree, node->left, start, end, id);\n+ if (node->priority < node->left->priority) cluster_rotateright(&node);\n+ \n+ } else { /* We\'re in the range of this cluster */\n+ /* Update the start and end to match to new values */\n+ oldstart = node->start;\n+ oldend = node->end;\n+ node->start = min(start, node->start);\n+ node->end = max(end, node->end);\n+ ival = create_interval(start, end, id);\n+ ival->next = node->interval_head; /* Add this interval as the head of the interval list */\n+ node->interval_head = ival;\n+ node->num_ivals += 1;\n+ \n+ if ( oldstart > node->start && node->left != NULL ) { /* New interval added to the start, and there\'s a left child */\n+ cluster_fixup(tree, &(node->left), &node);\n+ }\n+ if ( oldend < node->end && node->right != NULL ) { /* New interval added to the end, and there\'s a right child */\n+ cluster_fixup(tree, &(node->right), &node);\n+ }\n+ }\n+ return node;\n+}\n+\n+int main() {\n+ \n+ // Simple test\n+ clustertree* tree = create_clustertree(0, 1);\n+ \n+ tree->root = clusternode_insert(tree, tree->root, 3, 4, 0);\n+ tree->root = clusternode_insert(tree, tree->root, 6, 7, 1);\n+ tree->root = clusternode_insert(tree, tree->root, 9, 10, 2);\n+ tree->root = clusternode_insert(tree, tree->root, 1, 2, 3);\n+ tree->root = clusternode_insert(tree, tree->root, 3, 8, 4);\n+ \n+ clustereach(tree, tree->root);\n+ return 0;\n+ \n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/bx/intervalcluster.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/bx/intervalcluster.h Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,44 @@ +/* + Downloaded from: + https://bitbucket.org/james_taylor/bx-python/wiki/Home +*/ + +typedef struct struct_interval { + int start; + int end; + int id; + + struct struct_interval *next; +} interval; + +typedef struct struct_clusternode { + int start; + int end; + int priority; + + struct struct_interval *interval_head; + struct struct_interval *interval_tail; + int num_ivals; + + struct struct_clusternode *left; + struct struct_clusternode *right; +} clusternode; + +typedef struct { + int max_dist; + int min_intervals; + + clusternode *root; +} clustertree; + +typedef struct struct_treeitr { + struct struct_treeitr *next; + struct struct_clusternode *node; +} treeitr; + + +clusternode* clusternode_insert(clustertree *tree, clusternode *node, int start, int end, int id); +clustertree* create_clustertree(int max_dist, int min_intervals); +treeitr* clusteritr(clustertree *tree); +void freeclusteritr(treeitr *itr); +void free_tree(clustertree *tree); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/chimerascan_index.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/chimerascan_index.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,184 @@ +#!/usr/bin/env python +''' +Created on Jan 5, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import logging +import os +import shutil +import subprocess +import sys +from optparse import OptionParser + +# local imports +import chimerascan.pysam as pysam +from chimerascan.lib.feature import GeneFeature +from chimerascan.lib.seq import DNA_reverse_complement +from chimerascan.lib.base import up_to_date, check_executable +from chimerascan.lib.config import JOB_ERROR, JOB_SUCCESS, ALIGN_INDEX, \ + BOWTIE_INDEX_FILE, GENE_FEATURE_FILE, GENE_REF_PREFIX + +BASES_PER_LINE = 50 + +def split_seq(seq, chars_per_line): + pos = 0 + newseq = [] + while pos < len(seq): + if pos + chars_per_line > len(seq): + endpos = len(seq) + else: + endpos = pos + chars_per_line + newseq.append(seq[pos:endpos]) + pos = endpos + return '\n'.join(newseq) + +def genepred_to_fasta(gene_feature_file, reference_seq_file): + ref_fa = pysam.Fastafile(reference_seq_file) + total = 0 + used = 0 + for g in GeneFeature.parse(open(gene_feature_file)): + total += 1 + exon_seqs = [] + error_occurred = False + for start, end in g.exons: + seq = ref_fa.fetch(g.chrom, start, end) + if (not seq) or (len(seq) < (end - start)): + logging.warning("gene %s exon %s:%d-%d not found in reference" % + (g.tx_name, g.chrom, start, end)) + error_occurred = True + break + exon_seqs.append(seq) + if error_occurred: + continue + used += 1 + # make fasta record + seq = ''.join(exon_seqs) + if g.strand == '-': + seq = DNA_reverse_complement(seq) + # break seq onto multiple lines + seqlines = split_seq(seq, BASES_PER_LINE) + fa_record = (">%s range=%s:%d-%d gene=%s strand=%s\n%s" % + (GENE_REF_PREFIX + g.tx_name, g.chrom, start, end, + g.gene_name, g.strand, seqlines)) + yield g, fa_record + logging.info("Used %d/%d gene features" % (used,total)) + ref_fa.close() + +def create_chimerascan_index(output_dir, + genome_fasta_file, + gene_feature_file, + bowtie_build_bin): + # create output dir if it does not exist + if not os.path.exists(output_dir): + os.makedirs(output_dir) + logging.info("Created index directory: %s" % (output_dir)) + # copy reference fasta file to output dir and index it + index_fasta_file = os.path.join(output_dir, ALIGN_INDEX + ".fa") + msg = "Adding reference genome to index" + if (up_to_date(index_fasta_file, genome_fasta_file)): + logging.info("[SKIPPED] %s" % (msg)) + else: + logging.info(msg) + shutil.copyfile(genome_fasta_file, index_fasta_file) + # index the genome fasta file + logging.info("Indexing FASTA file") + fh = pysam.Fastafile(index_fasta_file) + fh.close() + # add gene sequences to index + dst_gene_feature_file = os.path.join(output_dir, GENE_FEATURE_FILE) + msg = "Building transcriptome sequences and gene features" + if (up_to_date(index_fasta_file, gene_feature_file) and + up_to_date(dst_gene_feature_file, gene_feature_file)): + logging.info("[SKIPPED] %s" % (msg)) + else: + logging.info(msg) + # write sequences from gene feature file + logging.info("Adding transcript sequences and gene features to index") + fasta_fh = open(index_fasta_file, "a") + gene_fh = open(dst_gene_feature_file, "w") + for g, fa_record in genepred_to_fasta(gene_feature_file, index_fasta_file): + print >>gene_fh, str(g) + print >>fasta_fh, fa_record + gene_fh.close() + fasta_fh.close() + # remove old fasta index + if os.path.exists(index_fasta_file + ".fai"): + os.remove(index_fasta_file + ".fai") + # index the combined fasta file + logging.info("Reindexing the FASTA file") + fh = pysam.Fastafile(index_fasta_file) + fh.close() + # build bowtie index on the reference sequence file + bowtie_index_file = os.path.join(output_dir, BOWTIE_INDEX_FILE) + msg = "Building bowtie index" + if up_to_date(bowtie_index_file, index_fasta_file): + logging.info("[SKIPPED] %s" % (msg)) + else: + logging.info(msg) + bowtie_index_name = os.path.join(output_dir, ALIGN_INDEX) + args = [bowtie_build_bin, index_fasta_file, bowtie_index_name] + if subprocess.call(args) != os.EX_OK: + logging.error("bowtie-build failed to create alignment index") + if os.path.exists(bowtie_index_file): + os.remove(bowtie_index_file) + return JOB_ERROR + logging.info("Chimerascan index created successfully") + return JOB_SUCCESS + + +def main(): + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog [options] <reference_genome.fa> " + "<genepred_genes.txt> <index_output_dir>") + parser.add_option("--bowtie-dir", dest="bowtie_dir", default="", + help="Path to the 'bowtie' software (by default, " + "expects the 'bowtie' and 'bowtie-build' " + "binaries to be in current PATH)") + options, args = parser.parse_args() + # check command line arguments + if len(args) < 3: + parser.error("Incorrect number of command line arguments") + ref_fasta_file = args[0] + gene_feature_file = args[1] + output_dir = args[2] + # check that input files exist + if not os.path.isfile(ref_fasta_file): + parser.error("Reference fasta file '%s' not found" % (ref_fasta_file)) + if not os.path.isfile(gene_feature_file): + parser.error("Gene feature file '%s' not found" % (gene_feature_file)) + # check that output dir is not a regular file + if os.path.exists(output_dir) and (not os.path.isdir(output_dir)): + parser.error("Output directory name '%s' exists and is not a valid " + "directory" % (output_dir)) + # check that bowtie-build program exists + bowtie_build_bin = os.path.join(options.bowtie_dir, "bowtie-build") + if check_executable(bowtie_build_bin): + logging.debug("Checking for 'bowtie-build' binary... found") + else: + parser.error("bowtie-build binary not found or not executable") + # run main index creation function + retcode = create_chimerascan_index(output_dir, ref_fasta_file, + gene_feature_file, bowtie_build_bin) + sys.exit(retcode) + +if __name__ == '__main__': + main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/chimerascan_run.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/chimerascan_run.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,1032 @@\n+#!/usr/bin/env python\n+\'\'\'\n+Created on Jan 5, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+from chimerascan import __version__\n+\n+__author__ = "Matthew Iyer"\n+__copyright__ = "Copyright 2011, chimerascan project"\n+__credits__ = ["Matthew Iyer", "Christopher Maher"]\n+__license__ = "GPL"\n+__maintainer__ = "Matthew Iyer"\n+__email__ = "mkiyer@med.umich.edu"\n+__status__ = "beta"\n+\n+###\n+#\n+# Modified by \n+#\tBaekdoo Kim(baegi7942@gmail.com)\n+#\n+###\n+\n+import logging\n+import os\n+import subprocess\n+import sys\n+import shutil\n+from optparse import OptionParser, OptionGroup\n+import xml.etree.ElementTree as etree\n+\n+# check for python version 2.6.0 or greater\n+if sys.version_info < (2,6,0):\n+ sys.stderr.write("You need python 2.6 or later to run chimerascan\\n")\n+ sys.exit(1)\n+\n+# local imports\n+from chimerascan import pysam\n+import chimerascan.lib.config as config\n+from chimerascan.lib.config import JOB_SUCCESS, JOB_ERROR, MIN_SEGMENT_LENGTH\n+from chimerascan.lib.base import LibraryTypes, check_executable, \\\n+ parse_bool, indent_xml, up_to_date\n+from chimerascan.lib.seq import FASTQ_QUAL_FORMATS, SANGER_FORMAT\n+from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution\n+\n+from chimerascan.pipeline.fastq_inspect_reads import inspect_reads, detect_read_length, get_min_max_read_lengths\n+from chimerascan.pipeline.align_bowtie import align_pe, align_sr, trim_align_pe_sr\n+from chimerascan.pipeline.find_discordant_reads import find_discordant_fragments\n+from chimerascan.pipeline.discordant_reads_to_bedpe import discordant_reads_to_bedpe, sort_bedpe\n+from chimerascan.pipeline.nominate_chimeras import nominate_chimeras\n+from chimerascan.pipeline.chimeras_to_breakpoints import chimeras_to_breakpoints\n+from chimerascan.pipeline.nominate_spanning_reads import nominate_encomp_spanning_reads, extract_single_mapped_reads, nominate_single_mapped_spanning_reads\n+from chimerascan.pipeline.merge_spanning_alignments import merge_spanning_alignments\n+from chimerascan.pipeline.resolve_discordant_reads import resolve_discordant_reads\n+from chimerascan.pipeline.filter_chimeras import filter_chimeras, filter_highest_coverage_isoforms, filter_encompassing_chimeras\n+from chimerascan.pipeline.filter_homologous_genes import filter_homologous_genes\n+from chimerascan.pipeline.write_output import write_output\n+\n+# defaults for bowtie\n+DEFAULT_NUM_PROCESSORS = config.BASE_PROCESSORS\n+DEFAULT_BOWTIE_PATH = ""\n+DEFAULT_BOWTIE_ARGS = "--best --strata"\n+DEFAULT_DISCORD_BOWTIE_ARGS = "--best"\n+DEFAULT_MULTIHITS = 100\n+DEFAULT_MISMATCHES = 2\n+DEFAULT_DISCORD_MISMATCHES = 3\n+DEFAULT_SEGMENT_LENGTH = 25\n+DEFAULT_TRIM5 = 0\n+DEFAULT_TRIM3 = 0\n+DEFAULT_MIN_FRAG_LENGTH = 0\n+DEFAULT_MAX_FRAG_LENGTH = 1000\n+DEFAULT_NUM_SAMPLES_TO_DETERMINE_READ_LENGTHS = 10000\n+DEFAULT_FASTQ_QUAL_FORMAT = SANGER_FORMAT\n+DEFAULT_LIBRARY_TYPE = LibraryTypes.FR_UNSTRANDED\n+\n+DEFAULT_ISIZE_MEAN = 200\n+DEFAULT_ISIZE_STDEV = 40\n+DEFAULT_HOMOLOGY_MISMATCHES = config.BREAKPOINT_HOMOLOGY_MISMATCHES\n+DEFAULT_ANCHOR_MIN = 4\n+DEFAULT_ANCHOR_LENGTH = 8\n+DEFAULT_ANCHOR_MISMATCHES = 0\n+DEFAULT_FILTER_ISIZE_PROB = 0.01\n+DEFAULT_FILTER_UNIQUE_FRAGS = 2.0\n+DEFAULT_FILTER_ISOFORM_FRACTION = 0.01\n+NUM_POSITIONAL_ARGS = 4\n+DEFAULT_KEEP_TMP = True\n+\n+class RunConfig(object):\n+\n+ '..b'E)\n+ msg = "Filtering chimeras"\n+ if up_to_date(filtered_chimera_file, resolved_spanning_chimera_file):\n+ logging.info("[SKIPPED] %s" % (msg))\n+ else:\n+ logging.info(msg)\n+ # get insert size at prob\n+ filter_chimeras(input_file=resolved_spanning_chimera_file,\n+ output_file=filtered_chimera_file,\n+ index_dir=runconfig.index_dir,\n+ bam_file=sorted_aligned_bam_file,\n+ unique_frags=runconfig.filter_unique_frags,\n+ isoform_fraction=runconfig.filter_isoform_fraction,\n+ false_pos_file=runconfig.filter_false_pos_file)\n+ #\n+ # Filter homologous genes\n+ #\n+ homolog_filtered_chimera_file = os.path.join(tmp_dir, config.HOMOLOG_FILTERED_CHIMERA_FILE)\n+ msg = "Filtering homologous chimeras"\n+ if up_to_date(homolog_filtered_chimera_file, filtered_chimera_file):\n+ logging.info("[SKIPPED] %s" % (msg))\n+ else:\n+ logging.info(msg)\n+ min_isize = isize_dist.isize_at_percentile(1.0)\n+ max_isize = isize_dist.isize_at_percentile(99.0)\n+ filter_homologous_genes(input_file=filtered_chimera_file,\n+ output_file=homolog_filtered_chimera_file,\n+ index_dir=runconfig.index_dir,\n+ homolog_segment_length=runconfig.segment_length-1,\n+ min_isize=min_isize,\n+ max_isize=max_isize,\n+ bowtie_bin=bowtie_bin,\n+ num_processors=runconfig.num_processors,\n+ tmp_dir=tmp_dir)\n+ #\n+ # Choose best isoform for chimeras that share the same breakpoint\n+ #\n+ best_isoform_chimera_file = os.path.join(tmp_dir, config.BEST_FILTERED_CHIMERA_FILE)\n+ msg = "Choosing best isoform for each chimera"\n+ if up_to_date(best_isoform_chimera_file, homolog_filtered_chimera_file):\n+ logging.info("[SKIPPED] %s" % (msg))\n+ else:\n+ logging.info(msg)\n+ retcode = filter_highest_coverage_isoforms(index_dir=runconfig.index_dir,\n+ input_file=homolog_filtered_chimera_file,\n+ output_file=best_isoform_chimera_file)\n+ #\n+ # Write user-friendly output file\n+ #\n+ chimera_output_file = os.path.join(runconfig.output_dir, config.CHIMERA_OUTPUT_FILE)\n+ #msg = "Writing chimeras to file %s" % (chimera_output_file)\n+ if up_to_date(chimera_output_file, best_isoform_chimera_file):\n+ logging.info("[SKIPPED] %s" % (msg))\n+ else:\n+ logging.info(msg)\n+ write_output(best_isoform_chimera_file,\n+ bam_file=sorted_aligned_bam_file,\n+ output_file=chimera_output_file,\n+ index_dir=runconfig.index_dir)\n+ \n+ #\n+ # Move output to Galaxy data file\n+ #\n+ cmd = "mv %s/chimerascan_tmp/chimeras.bedpe %s/%s" % (os.path.dirname(runconfig.output_file_path), os.path.dirname(runconfig.output_file_path), runconfig.output_file_name)\n+ p = subprocess.check_output(cmd.split())\n+\n+ #\n+ # Cleanup\n+ #\n+ if not runconfig.keep_tmp:\n+ logging.info("Cleaning up temporary files")\n+ shutil.rmtree(tmp_dir)\n+ cmd_rm = "rm -r %s/chimerascan_tmp" % os.path.dirname(runconfig.output_file_path)\n+ p = subprocess.check_output(cmd_rm.split())\n+\n+ #\n+ # Done\n+ #\n+ logging.info("Finished run.")\n+ return JOB_SUCCESS\n+\n+\n+def main():\n+ logging.basicConfig(level=logging.INFO,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+ # parse run parameters in config file and command line\n+ runconfig = RunConfig()\n+ runconfig.from_args(sys.argv[1:])\n+ # run chimerascan\n+ sys.exit(run_chimerascan(runconfig))\n+\n+if __name__ == \'__main__\':\n+ main()\n+\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/base.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/base.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,100 @@ +''' +Created on Oct 26, 2010 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import os +import subprocess +import tempfile +import operator + +# +# constants used for library type +# +class LibraryTypes: + FR_UNSTRANDED = "fr-unstranded" + FR_FIRSTSTRAND = "fr-firststrand" + FR_SECONDSTRAND = "fr-secondstrand" + + @staticmethod + def choices(): + return (LibraryTypes.FR_UNSTRANDED, + LibraryTypes.FR_FIRSTSTRAND, + LibraryTypes.FR_SECONDSTRAND) + + @staticmethod + def same_strand(library_type): + return (library_type[0] == library_type[1]) + +def parse_lines(line_iter, numlines=1): + """ + generator that returns list of 'numlines' lines at a time + """ + try: + while True: + yield [line_iter.next().rstrip() for x in xrange(numlines)] + except StopIteration: + pass + +def parse_bool(s): + return True if s[0].lower() == "t" else False + +def parse_string_none(s): + return None if s == "None" else s + +def make_temp(base_dir, suffix=''): + fd,name = tempfile.mkstemp(suffix=suffix, prefix='tmp', dir=base_dir) + os.close(fd) + return name + +def check_executable(filename): + # check that samtools binary exists + devnullfh = open(os.devnull, 'w') + try: + subprocess.call([filename], stdout=devnullfh, stderr=devnullfh) + except OSError: + return False + devnullfh.close() + return True + +def up_to_date(outfile, infile, nzsize=True): + if not os.path.exists(infile): + return False + if not os.path.exists(outfile): + return False + if nzsize and (os.path.getsize(outfile) == 0): + return False + return os.path.getmtime(outfile) >= os.path.getmtime(infile) + +# in-place XML prettyprint formatter +def indent_xml(elem, level=0): + i = "\n" + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + indent_xml(elem, level+1) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/batch_sort.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/batch_sort.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,59 @@ +''' +Created on Jul 21, 2011 + +@author: mkiyer +''' + +# based on Recipe 466302: Sorting big files the Python 2.4 way +# by Nicolas Lehuen +#http://code.activestate.com/recipes/576755-sorting-big-files-the-python-26-way/ + +import os +from tempfile import gettempdir +from itertools import islice, cycle +from collections import namedtuple +import heapq + +Keyed = namedtuple("Keyed", ["key", "obj"]) + +def merge(key=None, *iterables): + # based on code posted by Scott David Daniels in c.l.p. + # http://groups.google.com/group/comp.lang.python/msg/484f01f1ea3c832d + + if key is None: + keyed_iterables = iterables + else: + keyed_iterables = [(Keyed(key(obj), obj) for obj in iterable) + for iterable in iterables] + for element in heapq.merge(*keyed_iterables): + yield element.obj + +def batch_sort(input, output, key=None, buffer_size=32000, tempdirs=None): + if tempdirs is None: + tempdirs = [] + if not tempdirs: + tempdirs.append(gettempdir()) + + chunks = [] + try: + with open(input,'rb',64*1024) as input_file: + input_iterator = iter(input_file) + for tempdir in cycle(tempdirs): + current_chunk = list(islice(input_iterator,buffer_size)) + if not current_chunk: + break + current_chunk.sort(key=key) + output_chunk = open(os.path.join(tempdir,'%06i'%len(chunks)),'w+b',64*1024) + chunks.append(output_chunk) + output_chunk.writelines(current_chunk) + output_chunk.flush() + output_chunk.seek(0) + with open(output,'wb',64*1024) as output_file: + output_file.writelines(merge(key, *chunks)) + finally: + for chunk in chunks: + try: + chunk.close() + os.remove(chunk.name) + except Exception: + pass |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/chimera.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/chimera.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,382 @@\n+\'\'\'\n+Created on Jun 3, 2011\n+\n+@author: mkiyer\n+\'\'\'\n+from base import parse_string_none\n+from sam import get_clipped_interval\n+\n+DISCORDANT_TAG_NAME = "XC"\n+class DiscordantTags(object):\n+ CONCORDANT_TX = 0\n+ DISCORDANT_STRAND_TX = 1\n+ CONCORDANT_GENE = 2\n+ DISCORDANT_STRAND_GENE = 3\n+ CONCORDANT_GENOME = 4\n+ DISCORDANT_STRAND_GENOME = 5\n+ DISCORDANT_GENE = 9\n+ DISCORDANT_GENOME = 17\n+\n+ORIENTATION_TAG_NAME = "XD"\n+class OrientationTags(object):\n+ NONE = 0\n+ FIVEPRIME = 1\n+ THREEPRIME = 2\n+\n+def cmp_orientation(a,b):\n+ if (a == OrientationTags.NONE) or (b == OrientationTags.NONE):\n+ return True\n+ return (a != b)\n+\n+# constants\n+MULTIMAP_BINS = (1,2,4,8,16,32,64,128)\n+CHIMERA_SEP = "|"\n+# amount of trimming to use to stop reads from overlapping \n+# exon boundaries and going into intronic space\n+EXON_JUNCTION_TRIM_BP = 10\n+\n+# chimera types\n+class ChimeraTypes(object):\n+ INTERCHROMOSOMAL = "Interchromosomal"\n+ OVERLAP_SAME = "Overlapping_Same"\n+ OVERLAP_CONVERGE = "Overlapping_Converging"\n+ OVERLAP_DIVERGE = "Overlapping_Diverging"\n+ OVERLAP_COMPLEX = "Overlapping_Complex"\n+ READTHROUGH = "Read_Through"\n+ ADJ_CONVERGE = "Adjacent_Converging"\n+ ADJ_DIVERGE = "Adjacent_Diverging"\n+ ADJ_COMPLEX = "Adjacent_Complex"\n+ INTRACHROMOSOMAL = "Intrachromosomal"\n+ INTRA_CONVERGE = "Intrachromosomal_Converging"\n+ INTRA_DIVERGE = "Intrachromsomal_Diverging"\n+ INTRA_COMPLEX = "Intrachromosomal_Complex"\n+ UNKNOWN = "Undetermined"\n+\n+class DiscordantRead(object):\n+ """\n+ stores read alignment information needed to nominate \n+ chimeric transcripts\n+\n+ (this is a subset of what is kept in SAM file)\n+ """\n+ def __init__(self):\n+ self.qname = ""\n+ self.hit_index = -1\n+ self.readnum = -1\n+ self.seq = ""\n+ self.tid = -1\n+ self.pos = -1\n+ self.aend = -1\n+ self.clipstart = -1\n+ self.clipend = -1\n+ self.is_reverse = False\n+ self.numhits = 0\n+ self.mismatches = 0\n+ self.discordant_type = 0\n+ self.orientation = 0\n+ self.is_spanning = False\n+\n+ @staticmethod\n+ def from_read(r):\n+ a = DiscordantRead()\n+ a.qname = r.qname\n+ a.hit_index = r.opt(\'HI\')\n+ a.readnum = 1 if r.is_read2 else 0\n+ a.seq = r.seq\n+ a.tid = r.rname\n+ a.pos = r.pos\n+ a.aend = r.aend\n+ a.clipstart, a.clipend = get_clipped_interval(r)\n+ a.is_reverse = r.is_reverse\n+ a.numhits = r.opt(\'NH\')\n+ a.mismatches = r.opt(\'NM\')\n+ a.discordant_type = r.opt(DISCORDANT_TAG_NAME)\n+ a.orientation = r.opt(ORIENTATION_TAG_NAME)\n+ a.is_spanning = False\n+ return a\n+\n+ @staticmethod\n+ def from_list(fields):\n+ a = DiscordantRead()\n+ a.qname = fields[0]\n+ a.hit_index = int(fields[1])\n+ a.readnum = int(fields[2])\n+ a.seq = fields[3]\n+ a.tid = int(fields[4])\n+ a.pos = int(fields[5])\n+ a.aend = int(fields[6])\n+ a.clipstart = int(fields[7])\n+ a.clipend = int(fields[8])\n+ a.is_reverse = True if int(fields[9]) == 1 else False\n+ a.numhits = int(fields[10])\n+ a.mismatches = int(fields[11])\n+ a.discordant_type = int(fields[12])\n+ a.orientation = int(fields[13])\n+ a.is_spanning = True if int(fields[14]) == 1 else False\n+ return a\n+\n+ def to_list(self):\n+ return [self.qname, self.hit_index, self.readnum, self.seq, \n+ self.tid, self.pos, self.aend, self.clipstart, \n+ self.clipend, int(self.is_reverse), self.numhits, \n+ self.mismatches, self.discordant_type, \n+ self.orientation, int(self.is_spanning)]\n+\n+\n+def frags_to_encomp_string(frags):\n+ if len(frags) == 0:\n+ return "None"\n+ # encompassing read pairs\n+ encomp_frags = []\n+ for frag in frags:\n+ r5p = Chimera.FIEL'..b' dreads.append(DiscordantRead.from_list(read_fields.split(c.FIELD_DELIM)))\n+ c.encomp_frags.append(dreads)\n+ # raw spanning read information\n+ spanning_reads_field = parse_string_none(fields[20])\n+ if spanning_reads_field is not None:\n+ for read_fields in spanning_reads_field.split(c.READ_DELIM):\n+ c.spanning_reads.append(DiscordantRead.from_list(read_fields.split(c.FIELD_DELIM))) \n+ return c\n+\n+ @staticmethod\n+ def parse(line_iter):\n+ for line in line_iter:\n+ if line.startswith("#"):\n+ continue \n+ fields = line.strip().split(\'\\t\')\n+ yield Chimera.from_list(fields)\n+\n+ def to_list(self):\n+ # reads\n+ if len(self.spanning_reads) == 0:\n+ span_string = None\n+ else:\n+ span_string = Chimera.READ_DELIM.join(Chimera.FIELD_DELIM.join(map(str,r.to_list())) \n+ for r in self.spanning_reads)\n+ return [self.tx_name_5p, self.tx_start_5p, self.tx_end_5p,\n+ self.tx_name_3p, self.tx_start_3p, self.tx_end_3p,\n+ self.name, self.score, \n+ self.tx_strand_5p, self.tx_strand_3p,\n+ self.gene_name_5p, self.gene_name_3p,\n+ "%d-%d" % (self.exons_5p[0], self.exons_5p[1]),\n+ "%d-%d" % (self.exons_3p[0], self.exons_3p[1]),\n+ self.breakpoint_name,\n+ self.breakpoint_seq_5p,\n+ self.breakpoint_seq_3p,\n+ self.homology_left,\n+ self.homology_right,\n+ frags_to_encomp_string(self.encomp_frags),\n+ span_string]\n+\n+ def get_num_unique_positions(self):\n+ """\n+ calculates total number of unique read alignment\n+ positions supporting chimera\n+ """\n+ # find all unique alignment positions and read names\n+ encomp_pos = set()\n+ qnames = set()\n+ for pair in self.encomp_frags:\n+ if pair[0].qname not in qnames:\n+ qnames.add(pair[0].qname)\n+ encomp_pos.add((pair[0].pos, pair[1].pos))\n+ # add spanning reads\n+ spanning_pos = set()\n+ for dr in self.spanning_reads:\n+ if dr.qname not in qnames:\n+ qnames.add(dr.qname)\n+ spanning_pos.add(dr.pos)\n+ return len(encomp_pos) + len(spanning_pos)\n+\n+ def get_num_frags(self, maxnumhits=0):\n+ """\n+ number of unique fragments supporting the \n+ chimera (by read name)\n+ """\n+ qnames = set()\n+ for pair in self.encomp_frags:\n+ if (maxnumhits > 0) and (min(pair[0].numhits, pair[1].numhits) > maxnumhits):\n+ continue\n+ qnames.add(pair[0].qname)\n+ for dr in self.spanning_reads:\n+ if (maxnumhits > 0) and (dr.numhits > maxnumhits):\n+ continue\n+ qnames.add(dr.qname)\n+ return len(qnames)\n+\n+ def get_num_spanning_frags(self, maxnumhits=0):\n+ """\n+ number of unique spanning fragments supporting the \n+ chimera (by read name)\n+ """\n+ qnames = set()\n+ for dpair in self.encomp_frags:\n+ if (maxnumhits > 0) and (min(dpair[0].numhits, dpair[1].numhits) > maxnumhits):\n+ continue\n+ if any(dr.is_spanning for dr in dpair):\n+ qnames.add(dpair[0].qname) \n+ for dr in self.spanning_reads:\n+ if (maxnumhits > 0) and (dr.numhits > maxnumhits):\n+ continue\n+ qnames.add(dr.qname)\n+ return len(qnames) \n+\n+ def get_spanning_reads(self):\n+ for dpair in self.encomp_frags:\n+ if dpair[0].is_spanning:\n+ yield dpair[0]\n+ if dpair[1].is_spanning:\n+ yield dpair[1]\n+ for dr in self.spanning_reads:\n+ yield dr\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/config.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/config.py Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,116 @@ +''' +Created on Jan 5, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +JOB_SUCCESS = 0 +JOB_ERROR = 1 + +# constants for index +ALIGN_INDEX = 'align_index' +ALIGN_INDEX_FASTA_FILE = 'align_index.fa' +BOWTIE_INDEX_FILE = 'align_index.1.ebwt' +GENE_REF_PREFIX = 'gene_' +GENE_FEATURE_FILE = "gene_features.txt" + +# chimerascan subdirectories +LOG_DIR = "log" +TMP_DIR = "tmp" + +# constraints for run configuration +BASE_PROCESSORS = 2 +MIN_SEGMENT_LENGTH = 20 +RUNCONFIG_XML_FILE = "runconfig.xml" + +# output after read inspection, name conversion, and +# quality score conversion +CONVERTED_FASTQ_PREFIX = "reads" +CONVERTED_FASTQ_FILES = tuple(CONVERTED_FASTQ_PREFIX + "_%d.fq" % (x+1) + for x in xrange(2)) + +# output from initial bowtie alignment +ALIGNED_READS_BAM_FILE = "aligned_reads.bam" +UNALIGNED_FASTQ_PARAM = "unaligned.fq" +UNALIGNED_FASTQ_FILES = ("unaligned_1.fq", "unaligned_2.fq") +MAXMULTIMAP_FASTQ_PARAM = "maxmulti.fq" +MAXMULTIMAP_FASTQ_FILES = ("maxmulti_1.fq", "maxmulti_2.fq") + +# sorted aligned reads bam file +SORTED_ALIGNED_READS_BAM_FILE = "sorted_aligned_reads.bam" + +# insert size estimation parameters +ISIZE_MIN_SAMPLES = 100 +ISIZE_MAX_SAMPLES = 1e6 +ISIZE_DIST_FILE = "isize_dist.txt" + +# output from realignment of trimmed reads +REALIGNED_BAM_FILE = "realigned_reads.bam" + +# output for different classes of discordant reads +GENE_PAIRED_BAM_FILE = "gene_paired_reads.bam" +GENOME_PAIRED_BAM_FILE = "genome_paired_reads.bam" +REALIGNED_UNMAPPED_BAM_FILE = "unmapped_reads.bam" +REALIGNED_COMPLEX_BAM_FILE = "complex_reads.bam" + +# discordant reads BEDPE file +DISCORDANT_BEDPE_FILE = "discordant_reads.bedpe" +SORTED_DISCORDANT_BEDPE_FILE = "discordant_reads.srt.bedpe" + +# chimera candidates with encompassing read support +ENCOMPASSING_CHIMERA_FILE = "encompassing_chimeras.txt" +FILTERED_ENCOMPASSING_CHIMERA_FILE = "encompassing_chimeras.filtered.txt" + +# amount of trimming to use to stop reads from overlapping +# exon boundaries and going into intronic space +EXON_JUNCTION_TRIM_BP = 10 + +# number of homology mismatches in breakpoint sequences +# to tolerate when computing homology distance +BREAKPOINT_HOMOLOGY_MISMATCHES = 2 +BREAKPOINT_CHIMERA_FILE = "encompassing_chimeras.breakpoint_sorted.txt" +BREAKPOINT_MAP_FILE = "breakpoints.txt" +BREAKPOINT_FASTA_FILE = "breakpoints.fa" +BREAKPOINT_BOWTIE_INDEX = "breakpoints" +BREAKPOINT_BOWTIE_INDEX_FILE = "breakpoints.1.ebwt" + +# reads to remap to breakpoint junction index +ENCOMP_SPANNING_FASTQ_FILE = "encomp_spanning_reads.fq" +SINGLE_MAPPED_BAM_FILE = "singlemap_reads.srt.bam" +SINGLEMAP_SPANNING_FASTQ_FILE = "singlemap_spanning_reads.fq" +UNALIGNED_SPANNING_FASTQ_FILE = "unaligned_spanning_reads.fq" + +# results of aligning reads to breakpoint index +ENCOMP_SPANNING_BAM_FILE = "encomp_spanning_reads.bam" +SORTED_ENCOMP_SPANNING_BAM_FILE = "encomp_spanning_reads.srt.bam" +SINGLEMAP_SPANNING_BAM_FILE = "singlemap_spanning_reads.bam" +SORTED_SINGLEMAP_SPANNING_BAM_FILE = "singlemap_spanning_reads.srt.bam" +UNALIGNED_SPANNING_BAM_FILE = "unaligned_spanning_reads.bam" +SORTED_UNALIGNED_SPANNING_BAM_FILE = "unaligned_spanning_reads.srt.bam" + +# results of merging spanning reads into chimera nominations +SPANNING_CHIMERA_FILE = "spanning_chimeras.txt" +# results of resolving ambiguous reads +RESOLVED_SPANNING_CHIMERA_FILE = "spanning_chimeras.resolved.txt" +# results of filtering chimeras +FILTERED_CHIMERA_FILE = "spanning_chimeras.resolved.filtered.txt" +HOMOLOG_FILTERED_CHIMERA_FILE = "spanning_chimeras.resolved.filtered.homolog.txt" +BEST_FILTERED_CHIMERA_FILE = "spanning_chimeras.resolved.filtered.homolog.best_isoform.txt" +# output file +CHIMERA_OUTPUT_FILE = "chimeras.bedpe" \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/fastq_to_bam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/fastq_to_bam.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,99 @@ +''' +Created on Apr 28, 2011 + +@author: mkiyer +''' +from chimerascan import pysam +from math import log10 +from string import maketrans + +def get_solexa_qual_conversion_table(): + """ + return a translation table that can be used by str.translate() for + converting solexa to sanger quality scores + """ + offset = 64 + conv_table = ['!'] * 256 + conv_table[offset:] = "I" * (256-offset) + for solq in xrange(-5, 40): + phredq = 10*log10(1 + 10**(solq/10.0)) + phredchr = chr(int(round(33 + phredq))) + conv_table[offset + solq] = phredchr + conv_string = ''.join(conv_table) + return maketrans(''.join(map(chr, range(256))), conv_string) + +def get_illumina_qual_conversion_table(): + """Illumina 1.3+ format can encode a Phred quality score from 0 to 62 + using ASCII 64 to 126 (although in raw read data Phred scores from 0 + to 40 only are expected). + """ + offset = 64 + conv_table = ['!'] * 256 + for x in xrange(0, 62): + conv_table[offset+x] = chr(33 + x) + conv_table[offset+40:] = "I" * (256-(offset+40)) + conv_string = ''.join(conv_table) + return maketrans(''.join(map(chr, range(256))), conv_string) + +def get_sanger_qual_conversion_table(): + offset = 33 + tbl = map(chr, range(256)) + tbl[:offset] = "!" * offset + tbl[offset+40:] = "I" * (256-(offset+40)) + return maketrans(''.join(map(chr, range(256))), ''.join(tbl)) + +conv_tables = {"sanger": get_sanger_qual_conversion_table(), + "illumina": get_illumina_qual_conversion_table(), + "solexa": get_solexa_qual_conversion_table()} + +def parse_fastq(line_iter): + with line_iter: + while True: + rid = line_iter.next().rstrip()[1:] + seq = line_iter.next().rstrip() + line_iter.next() + qual = line_iter.next().rstrip() + yield rid, seq, qual + +def fastq_to_bam(fastq_files, qual_format, bam_file): + fqfhs = [parse_fastq(open(f)) for f in fastq_files] + qual_trans_table = conv_tables[qual_format] + header = {'HD': {'VN': '1.0', 'SO': 'unknown'}} +# 'SQ': [{'LN': 1, 'SN': 'dummy'}]} + bamfh = pysam.Samfile(bam_file, "wb", header=header) + try: + while True: + for i,fqiter in enumerate(fqfhs): + id,seq,qual = fqiter.next() + a = pysam.AlignedRead() + a.rname = -1 + a.mrnm = -1 + #a.pos = 0 + #a.mpos = 0 + a.qname = id + a.seq = seq + a.qual = qual.translate(qual_trans_table) + a.is_read1 = (i == 0) + a.is_read2 = (i == 1) + bamfh.write(a) + except StopIteration: + pass + bamfh.close() + +def bam_to_fastq(bam_file, fastq_files): + fqfhs = [open(f, "w") for f in fastq_files] + bamfh = pysam.Samfile(bam_file, "rb") + for r in bamfh: + if r.is_read1: + i = 0 + elif r.is_read2: + i = 1 + record = "@%s\n%s\n+\n%s" % (r.qname,r.seq,r.qual) + print >>fqfhs[i], record + +if __name__ == '__main__': + sol2std = get_solexa_qual_conversion_table() + illumina2std = get_illumina_qual_conversion_table() + import sys + fastq_to_bam(["read1.fq", "read2.fq"], "solexa", "hi.bam") + bam_to_fastq("hi.bam", ["a1.fq", "a2.fq"]) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/feature.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/feature.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,180 @@ +''' +Created on Dec 18, 2010 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import logging +import itertools + +class GeneFeature(object): + __slots__ = ('chrom', 'tx_start', 'tx_end', 'tx_name', 'gene_name', + 'strand', 'cds_start', 'cds_end', 'exon_count', 'exons') + + def __str__(self): + fields = [self.tx_name, + self.chrom, + self.strand, + str(self.tx_start), + str(self.tx_end), + str(self.cds_start), + str(self.cds_end), + str(self.exon_count), + ','.join(map(str, [e[0] for e in self.exons])) + ',', + ','.join(map(str, [e[1] for e in self.exons])) + ',', + self.gene_name] + return '\t'.join(fields) + + @staticmethod + def from_string(line): + if line is None: + return None + line = line.strip() + if line.startswith('#'): + logging.debug("skipping comment line: %s" % (line)) + return None + if line.startswith('track'): + logging.debug("skipping track header line: %s" % (line)) + return None + fields = line.split('\t') + # first six fields are required + g = GeneFeature() + g.tx_name = fields[0] + g.chrom = fields[1] + g.strand = fields[2] + g.tx_start = int(fields[3]) + g.tx_end = int(fields[4]) + g.cds_start = int(fields[5]) + g.cds_end = int(fields[6]) + g.exon_count = int(fields[7]) + exon_starts = map(int, fields[8].split(',')[:-1]) + exon_ends = map(int, fields[9].split(',')[:-1]) + g.exons = zip(exon_starts, exon_ends) + g.gene_name = fields[10] + return g + + @staticmethod + def parse(line_iter): + for line in line_iter: + if not line: + continue + if not line.strip(): + continue + if line.startswith("#"): + continue + if line.startswith("track"): + continue + yield GeneFeature.from_string(line) + + def get_exon_interval(self, pos): + """ + returns a tuple containing the exon number and start/end + coordinates relative to the transcript + """ + exon_iter = reversed(self.exons) if self.strand == '-' else iter(self.exons) + exon_pos = 0 + exon_num = 0 + for exon_start, exon_end in exon_iter: + exon_size = exon_end - exon_start + if exon_pos + exon_size >= pos: + break + exon_pos += exon_size + exon_num += 1 + if exon_pos + exon_size < pos: + logging.warning("exon_pos %d + exon_size %d < pos %d - clipping to " + "end of gene" % (exon_pos, exon_size, pos)) + return exon_num, exon_pos, exon_pos + exon_size + + +class BEDFeature(object): + __slots__ = ('chrom', 'tx_start', 'tx_end', 'name', 'score', 'strand', + 'cds_start', 'cds_end', 'exon_count', 'block_starts', + 'block_sizes', 'exons', 'attr_fields') + + def __str__(self): + fields = [self.chrom, + str(self.tx_start), + str(self.tx_end), + self.name, + str(self.score), + self.strand, + str(self.cds_start), + str(self.cds_end), + '0', + str(self.exon_count), + ','.join(map(str, self.block_sizes)) + ',', + ','.join(map(str, self.block_starts)) + ','] + return '\t'.join(fields) + + @staticmethod + def from_string(line): + if line is None: + return None + line = line.strip() + if line.startswith('#'): + logging.debug("skipping comment line: %s" % (line)) + return None + if line.startswith('track'): + logging.debug("skipping track header line: %s" % (line)) + return None + fields = line.split('\t') + # first six fields are required + g = BEDFeature() + g.chrom = fields[0] + g.tx_start = int(fields[1]) + g.tx_end = int(fields[2]) + g.name = fields[3] + if len(fields) <= 4: + g.score = 0 + g.strand = '.' + else: + g.score = fields[4] + g.strand = fields[5] + if len(fields) <= 6: + g.cds_start = g.tx_start + g.cds_end = g.tx_end + g.exon_count = 1 + g.exons = [(g.tx_start, g.tx_end)] + else: + g.cds_start = int(fields[6]) + g.cds_end = int(fields[7]) + g.exon_count = int(fields[9]) + g.block_sizes = map(int, fields[10].split(',')[:-1]) + g.block_starts = map(int, fields[11].split(',')[:-1]) + g.exons = [] + for start, size in itertools.izip(g.block_starts, g.block_sizes): + g.exons.append((g.tx_start + start, g.tx_start + start + size)) + if len(fields) <= 12: + g.attr_fields = [] + else: + g.attr_fields = fields[12:] + return g + + @staticmethod + def parse(line_iter): + for line in line_iter: + if not line: + continue + if not line.strip(): + continue + if line.startswith("#"): + continue + if line.startswith("track"): + continue + yield BEDFeature.from_string(line) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/fix_alignment_ordering.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/fix_alignment_ordering.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,137 @@ +''' +Created on Jan 23, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import re +import collections + +ReorderBufferItem = collections.namedtuple('ReorderBufferItem', ("fqrec", "reads")) + +def fix_alignment_ordering(samfh, fqiters, + pe_sr_mode=False, + maxlen=100000): + # function for initializing new buffer entry + buf_init_func = lambda fqrecs: tuple(ReorderBufferItem(fq, []) for fq in fqrecs) + # initialize the qname dictionary to match the fastq file + buf = collections.deque() + qname_read_dict = {} + qname_mate_re = re.compile(r'/(\d)$') + for read in samfh: + # PE-SR mode means that the reads were paired in sequencing + # but aligned separately. The function uses the /1 and /2 + # suffixes in the reads to join them during buffer reordering + if pe_sr_mode: + # get read num (1 or 2) from the qname field of SAM read + read_qname, readnum = qname_mate_re.split(read.qname)[0:2] + readnum = int(readnum) - 1 + # set flags + read.is_paired = True + read.qname = read_qname + if readnum == 0: + read.is_read1 = True + elif readnum == 1: + read.is_read2 = True + else: + assert False + # if not PE-SR mode then we can trust the 'is_read1' and 'is_read2' + # attributes of the SAM read + else: + if read.is_read2: + readnum = 1 + else: + readnum = 0 + # check if this read is already in the buffer + if read.qname not in qname_read_dict: + # if buffer full empty the first entries + while len(buf) >= maxlen: + # get first qname in buf + first_qname = buf.popleft() + # return reads at this qname, then delete them + yield qname_read_dict[first_qname] + del qname_read_dict[first_qname] + # add new qnames to buffer + while True: + # get next qname from fastq file and add it to the queue + fqrecs = [it.next() for it in fqiters] + next_qname = fqrecs[0].qname + buf.append(next_qname) + qname_read_dict[next_qname] = buf_init_func(fqrecs) + # if the next qname in the fastq file is the same as the + # read qname, then we can exit the loop + if next_qname == read.qname: + break + # add read to buffer + qname_read_dict[read.qname][readnum].reads.append(read) + # empty remaining entries in buffer + while len(buf) > 0: + yield qname_read_dict[buf.popleft()] + + +def fix_sr_alignment_ordering(samfh, fqiter, + maxlen=100000): + # function for initializing new buffer entry + buf_init_func = lambda fqrec: [ReorderBufferItem(fqrec, [])] + # initialize the qname dictionary to match the fastq file + buf = collections.deque() + qname_read_dict = {} + qname_mate_re = re.compile(r'/(\d)$') + for read in samfh: + # get read num (1 or 2) from the qname field of SAM read + read_qname, readnum = qname_mate_re.split(read.qname)[0:2] + readnum = int(readnum) - 1 + # set flags + read.is_paired = True + read.qname = read_qname + if readnum == 0: + read.is_read1 = True + elif readnum == 1: + read.is_read2 = True + else: + assert False + # set key for indexing reads + key = (read_qname, readnum) + # check if this read is already in the buffer + if key not in qname_read_dict: + # if buffer full empty the first entries + while len(buf) >= maxlen: + # get first key in buf + first_key = buf.popleft() + # return reads at this qname, then delete them + yield qname_read_dict[first_key] + del qname_read_dict[first_key] + # add new qnames to buffer + while True: + # get next qname from fastq file and add it to the queue + fqrec = fqiter.next() + next_key = (fqrec.qname, fqrec.readnum-1) + buf.append(next_key) + qname_read_dict[next_key] = buf_init_func(fqrec) + # if the next qname in the fastq file is the same as the + # read qname, then we can exit the loop + if next_key == key: + break + # add read to buffer + qname_read_dict[key][0].reads.append(read) + # empty remaining entries in buffer + while len(buf) > 0: + yield qname_read_dict[buf.popleft()] + + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/fragment_size_distribution.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/fragment_size_distribution.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,286 @@\n+\'\'\'\n+Created on Apr 29, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+import collections\n+import array\n+import logging\n+import random\n+\n+from chimerascan.bx.intersection import Interval, IntervalTree\n+\n+# local imports\n+from sam import parse_pe_reads, CIGAR_N, CIGAR_S, CIGAR_H, CIGAR_P\n+from feature import GeneFeature\n+\n+# SAM CIGAR flags that indicate skipping, padding, or clipping\n+SKIP_CIGAR_FLAGS = set((CIGAR_N, CIGAR_S, CIGAR_H, CIGAR_P)) \n+\n+def build_exon_trees(genes):\n+ trees = collections.defaultdict(lambda: IntervalTree())\n+ for g in genes: \n+ for e in g.exons:\n+ start, end = e\n+ trees[g.chrom].insert_interval(Interval(start, end, strand=g.strand))\n+ return trees\n+\n+def find_unambiguous_exon_intervals(genes):\n+ """\n+ returns (chrom, start, end, strand) tuples for exon\n+ intervals that are unique and have no overlapping\n+ transcripts or exons. \n+ """\n+ trees = build_exon_trees(genes) \n+ for g in genes:\n+ for start,end in g.exons:\n+ hits = set((hit.start, hit.end, hit.strand) \n+ for hit in trees[g.chrom].find(start, end))\n+ hits.add((start, end, g.strand))\n+ if len(hits) == 1:\n+ yield g.chrom, start, end, g.strand\n+\n+def sample_fragment_sizes(bamfh, genes, min_isize, max_isize):\n+ """\n+ sample fragment size distribution at genes with exons\n+ larger than the maximum insert size\n+ """\n+ # find all exons that are larger than the maximum estimated fragment size\n+ exons = set(coord for coord in find_unambiguous_exon_intervals(genes)\n+ if (coord[2] - coord[1]) >= max_isize)\n+ logging.info("Found %d exons larger than %d" % (len(exons), max_isize))\n+ refs = set(bamfh.references)\n+ # stats\n+ num_reads = 0\n+ unmapped = 0\n+ ambiguous = 0\n+ spliced = 0\n+ outside_range = 0\n+ count = 0\n+ # fetch reads from BAM file at large exons\n+ for chrom,start,end,strand in exons:\n+ if chrom not in refs:\n+ logging.warning("Skipping exon from reference %s not in BAM" % (chrom))\n+ continue \n+ qname_dict = collections.defaultdict(lambda: [])\n+ for r in bamfh.fetch(chrom, start, end):\n+ num_reads += 1\n+ # ignore unmapped reads, qc fail reads, or unpaired reads\n+ if r.is_unmapped or r.is_qcfail or (not r.is_proper_pair):\n+ unmapped += 1\n+ continue\n+ # ignore multi-mapping reads\n+ if r.opt(\'NH\') > 1:\n+ ambiguous += 1\n+ continue\n+ # ignore spliced reads\n+ has_skip = any(x[0] in SKIP_CIGAR_FLAGS for x in r.cigar)\n+ if has_skip:\n+ spliced += 1\n+ continue \n+ # group paired-end reads by read name\n+ qname_dict[r.qname].append(abs(r.isize))\n+ # keep paired reads with both mates in region\n+ for isizes in qname_dict.itervalues():\n+ isizes = set(abs(x) for x in isizes)\n+ assert len(isizes) == 1\n+ isize = isizes.pop()\n+ if (min_isize <= isize <= max_isize):\n+ count += 1\n+ yield isize\n+ else:\n+ '..b'h, \'\\t\'.join([str(i + self.min_isize), str(x)]) \n+\n+ @staticmethod\n+ def from_file(fileh):\n+ isizes = []\n+ counts = []\n+ for line in fileh:\n+ if line.startswith("#"):\n+ continue\n+ fields = line.strip().split(\'\\t\')\n+ i,x = map(int, fields[0:2])\n+ isizes.append(i)\n+ counts.append(x)\n+ d = InsertSizeDistribution()\n+ d.min_isize = isizes[0]\n+ d.max_isize = isizes[-1]\n+ d.arr = array.array(\'L\', counts) \n+ return d\n+\n+ @staticmethod\n+ def from_random(mean, stdev, min_isize, max_isize, samples=100000):\n+ """\n+ initialize from a random sample using normal distribution with \n+ mean \'mean\' and stdev \'stdev\'\n+ """\n+ d = InsertSizeDistribution()\n+ # implement simple checks\n+ assert min_isize < mean < max_isize\n+ assert stdev < (max_isize - min_isize)\n+ # initialize\n+ d.min_isize = min_isize\n+ d.max_isize = max_isize\n+ d.arr = array.array(\'L\', (0 for x in xrange(min_isize, max_isize+1)))\n+ count = 0\n+ outside_range = 0\n+ while True:\n+ if count > samples:\n+ break\n+ isize = int(round(random.normalvariate(mean, stdev),0))\n+ if (min_isize <= isize <= max_isize):\n+ # store in array\n+ d.arr[isize - min_isize] += 1\n+ count += 1\n+ else:\n+ outside_range += 1\n+ return d\n+\n+ @staticmethod\n+ def from_bam(bamfh, min_isize, max_isize, max_samples=None):\n+ # initialize\n+ d = InsertSizeDistribution()\n+ d.min_isize = min_isize\n+ d.max_isize = max_isize\n+ d.arr = array.array(\'L\', (0 for x in xrange(min_isize, max_isize+1))) \n+ frags = 0 \n+ count = 0\n+ outside_range = 0\n+ unmapped = 0\n+ isoforms = 0\n+ for pe_reads in parse_pe_reads(bamfh):\n+ frags += 1\n+ if (max_samples is not None) and (count > max_samples):\n+ break\n+ # only allow mappings where there is a single\n+ # insert size (multiple isoforms are ambiguous)\n+ isizes = set() \n+ for r in pe_reads[0]:\n+ if r.is_unmapped:\n+ continue\n+ # get insert size\n+ isize = r.isize\n+ if isize < 0: isize = -isize\n+ isizes.add(isize)\n+ # insert size must be within range\n+ if len(isizes) == 0:\n+ unmapped += 1\n+ elif len(isizes) > 1:\n+ isoforms += 1\n+ else:\n+ isize = isizes.pop()\n+ if (min_isize <= isize <= max_isize):\n+ # store in array\n+ d.arr[isize - min_isize] += 1\n+ count += 1\n+ else:\n+ outside_range += 1\n+ logging.debug("Processed fragments: %d" % (frags))\n+ logging.debug("Unique paired frags: %d" % (count))\n+ logging.debug("Unmapped: %d" % (unmapped))\n+ logging.debug("Ambiguous (isoforms): %d" % (isoforms))\n+ logging.debug("Outside range: %d" % (outside_range))\n+ return d\n+ \n+ @staticmethod\n+ def from_genome_bam(bamfh, genes, min_isize, max_isize, max_samples=None):\n+ # initialize\n+ d = InsertSizeDistribution()\n+ d.min_isize = min_isize\n+ d.max_isize = max_isize\n+ d.arr = array.array(\'L\', (0 for x in xrange(min_isize, max_isize+1)))\n+ count = 0\n+ for isize in sample_fragment_sizes(bamfh, genes, min_isize, max_isize):\n+ if (min_isize <= isize <= max_isize):\n+ # store in array\n+ d.arr[isize - min_isize] += 1\n+ count += 1\n+ if (max_samples is not None) and (count > max_samples):\n+ break\n+ return d\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/gene_to_genome.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/gene_to_genome.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,166 @@ +''' +Created on Jan 31, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import logging +import collections + +from chimerascan.bx.cluster import ClusterTree +from chimerascan.bx.intersection import Interval, IntervalTree +# local imports +from feature import GeneFeature + +def build_tid_gene_map(bamfh, genefile, rname_prefix=None): + rname_tid_map = dict((rname,tid) for tid,rname in enumerate(bamfh.references)) + rname_prefix = '' if rname_prefix is None else rname_prefix + tid_tx_map = {} + # build gene and genome data structures for fast lookup + for g in GeneFeature.parse(open(genefile)): + # only use genes that are references in the sam file + rname = rname_prefix + g.tx_name + if rname not in rname_tid_map: + continue + tid = rname_tid_map[rname] + tid_tx_map[tid] = g + return tid_tx_map + +def build_tx_name_gene_map(genefile, rname_prefix=None): + rname_prefix = '' if rname_prefix is None else rname_prefix + tx_map = {} + # build gene and genome data structures for fast lookup + for g in GeneFeature.parse(open(genefile)): + tx_map[rname_prefix + g.tx_name] = g + return tx_map + +def build_genome_tx_trees(genefile): + genome_tx_trees = collections.defaultdict(lambda: IntervalTree()) + # build gene and genome data structures for fast lookup + for g in GeneFeature.parse(open(genefile)): + # add gene to interval tree + interval = Interval(g.tx_start, g.tx_end, strand=g.strand, value=g) + genome_tx_trees[g.chrom].insert_interval(interval) + return genome_tx_trees + +def build_transcript_cluster_map(line_iter, rname_prefix=None): + # setup cluster trees + chrom_strand_cluster_trees = \ + collections.defaultdict(lambda: {"+": ClusterTree(0,1), + "-": ClusterTree(0,1)}) + transcripts = [] + index_cluster_map = {} + for transcript in GeneFeature.parse(line_iter): + # insert exons into cluster tree + cluster_tree = chrom_strand_cluster_trees[transcript.chrom][transcript.strand] + i = len(transcripts) + for start,end in transcript.exons: + cluster_tree.insert(start, end, i) + # each transcript is initially in a cluster by itself + index_cluster_map[i] = set([i]) + transcripts.append(transcript) + # extract gene clusters + for strand_cluster_trees in chrom_strand_cluster_trees.itervalues(): + for cluster_tree in strand_cluster_trees.itervalues(): + for start, end, indexes in cluster_tree.getregions(): + # make new cluster by aggregating all existing + # clusters with new indexes + newclust = set(indexes) + for i in indexes: + newclust.update(index_cluster_map[i]) + # map every transcript to the new cluster + for i in newclust: + index_cluster_map[i] = newclust + # enumerate all clusters + rname_prefix = '' if rname_prefix is None else rname_prefix + transcript_cluster_map = {} + for cluster_id, clust in enumerate(index_cluster_map.values()): + for i in clust: + transcript = transcripts[i] + transcript_cluster_map[rname_prefix + transcript.tx_name] = cluster_id + return transcript_cluster_map + +def build_transcript_tid_cluster_map(bamfh, line_iter, rname_prefix=None): + # make the standard cluster map + transcript_cluster_map = build_transcript_cluster_map(line_iter, rname_prefix) + # map reference name to tid + transcript_tid_map = {} + rname_prefix = '' if rname_prefix is None else rname_prefix + for tid,rname in enumerate(bamfh.references): + if rname.startswith(rname_prefix): + transcript_tid_map[rname] = tid + # remake the cluster map + tid_cluster_map = {} + for rname, cluster_id in transcript_cluster_map.iteritems(): + if rname not in transcript_tid_map: + continue + tid = transcript_tid_map[rname] + tid_cluster_map[tid] = cluster_id + return tid_cluster_map + +def build_transcript_genome_map(line_iter, rname_prefix=None): + # create arrays to map genes in bed file to genome + rname_prefix = '' if rname_prefix is None else rname_prefix + transcript_genome_map = {} + for g in GeneFeature.parse(line_iter): + rname = rname_prefix + g.tx_name + strand = 1 if g.strand == '-' else 0 + exon_vectors = [(start, end) for start, end in g.exons] + if strand: + exon_vectors.reverse() + if rname in transcript_genome_map: + logging.error("Duplicate references %s found in bed file" % (rname)) + transcript_genome_map[rname] = (g.chrom, strand, exon_vectors) + return transcript_genome_map + +def build_transcript_tid_genome_map(bamfh, line_iter, rname_prefix=None): + # make the standard map + transcript_genome_map = build_transcript_genome_map(line_iter, rname_prefix) + # map reference name to tid + rname_prefix = '' if rname_prefix is None else rname_prefix + transcript_tid_map = {} + for tid,rname in enumerate(bamfh.references): + if rname.startswith(rname_prefix): + transcript_tid_map[rname] = tid + # remap using tid as key + tid_genome_map = {} + for rname, coords in transcript_genome_map.iteritems(): + if rname not in transcript_tid_map: + continue + tid = transcript_tid_map[rname] + tid_genome_map[tid] = coords + return tid_genome_map + +def transcript_to_genome_pos(rname, pos, transcript_genome_map): + ''' + translate gene 'rname' position 'gene_pos' to genomic + coordinates. returns a 3-tuple with (chrom, strand, pos) + ''' + chrom, strand, intervals = transcript_genome_map[rname] + offset = 0 + for start, end, in intervals: + exon_size = end - start + if pos < offset + exon_size: + if strand: + return chrom, strand, start + exon_size - (pos - offset) - 1 + else: + return chrom, strand, start + (pos - offset) + #print start, end, offset, pos + offset += exon_size + return None \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/gtf.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/gtf.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,141 @@ +''' +Created on Nov 2, 2010 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import subprocess +import os + +GTF_EMPTY_FIELD = '.' +GTF_ATTR_SEP = ';' +GTF_ATTR_TAGVALUE_SEP = ' ' + +def sort_gtf(filename, output_file): + args = ["sort", "-k1,1", "-k4,4n", "-k3,3r", filename] + myenv = os.environ.copy() + myenv["LC_ALL"] = "C" + subprocess.call(args, stdout=open(output_file, "w"), env=myenv) + +def window_overlap(a, b): + if a[0] != b[0]: + return False + return (a[1] <= b[2]) and (b[1] <= a[2]) + +def separate_loci(feature_iter): + try: + # initialize window + window = [feature_iter.next()] + window_range = (window[0].seqid, window[0].start, window[0].end) + # separate into loci + for feature in feature_iter: + # check if next transcript is outside current window + interval = (feature.seqid, feature.start, feature.end) + if not window_overlap(interval, window_range): + # yield current window + yield window + # reset window + window = [feature] + window_range = (feature.seqid, feature.start, feature.end) + else: + # add transcript to window + window.append(feature) + window_range = (feature.seqid, + min(window_range[1], feature.start), + max(window_range[2], feature.end)) + except StopIteration: + pass + # yield last window + if len(window) > 0: + yield window + +class GTFFeature(object): + ''' + 1. seqname - The name of the sequence. Must be a chromosome or scaffold. + 2. source - The program that generated this feature. + 3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon". + 4. start - The starting position of the feature in the sequence. The first base is numbered 1. + 5. end - The ending position of the feature (inclusive). + 6. score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). If there is no score value, enter ".". + 7. strand - Valid entries include '+', '-', or '.' (for don't know/don't care). + 8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. + + chr1 Cufflinks transcript 136546 137059 1000 . . gene_id "VCAP_SHEZH2.657699"; transcript_id "VCAP_SHEZH2.657699.1"; FPKM "100.7219943204"; frac "1.000000"; conf_lo "80.649925"; conf_hi "120.794064"; cov "2.198209"; + ''' + __slots__ = ('seqid', 'source', 'feature_type', 'start', 'end', 'score', 'strand', 'phase', 'attrs') + + def __str__(self): + line = [self.seqid, + self.source, + self.feature_type, + # convert to 1-based intervals + str(self.start + 1), + str(self.end), + str(self.score), + str(self.strand), + self.phase] + attr_str = ' '.join('%s "%s";' % (k, v) for (k, v) in self.attrs.iteritems()) + line.append(attr_str) + return '\t'.join(line) + + @staticmethod + def from_string(line, attr_defs=None): + f = GTFFeature() + # read the GTF line + fields = line.strip().split('\t') + f.seqid = fields[0] + f.source = fields[1] + f.feature_type = fields[2] + # convert from 1-based (inclusive) to 0-based (exclusive) intervals + f.start = int(fields[3])-1 + f.end = int(fields[4]) + f.score = 0 if (fields[5] == '.') else float(fields[5]) + strand = fields[6] + if not (strand == '+' or strand == '-'): + strand = GTF_EMPTY_FIELD + f.strand = strand + f.phase = fields[7] + attrs = {} + if fields[8] != GTF_EMPTY_FIELD: + attr_strings = fields[8].split(GTF_ATTR_SEP) + for a in attr_strings: + a = a.strip() + if len(a) == 0: + continue + tag, value = a.split(GTF_ATTR_TAGVALUE_SEP, 1) + # remove quotes + value = value.split('"')[1] + # apply parsing function + if (attr_defs != None) and (tag in attr_defs) and (attr_defs[tag] != None): + value = attr_defs[tag](value) + attrs[tag] = value + f.attrs = attrs + return f + + @staticmethod + def parse(line_iter, attr_defs=None): + for line in line_iter: + # read the GTF line + if not line: + continue + if not line.strip(): + continue + if line.startswith("#"): + continue + yield GTFFeature.from_string(line, attr_defs) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/sam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/sam.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,224 @@ +''' +Created on Jun 2, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import operator + +from chimerascan import pysam +from seq import DNA_reverse_complement + +# +# constants used for CIGAR alignments +# +CIGAR_M = 0 #match Alignment match (can be a sequence match or mismatch) +CIGAR_I = 1 #insertion Insertion to the reference +CIGAR_D = 2 #deletion Deletion from the reference +CIGAR_N = 3 #skip Skipped region from the reference +CIGAR_S = 4 #softclip Soft clip on the read (clipped sequence present in <seq>) +CIGAR_H = 5 #hardclip Hard clip on the read (clipped sequence NOT present in <seq>) +CIGAR_P = 6 #padding Padding (silent deletion from the padded reference sequence) + +def parse_reads_by_qname(samfh): + """ + generator function to parse and return lists of + reads that share the same qname + """ + reads = [] + for read in samfh: + if len(reads) > 0 and read.qname != reads[-1].qname: + yield reads + reads = [] + reads.append(read) + if len(reads) > 0: + yield reads + +def parse_pe_reads(bamfh): + pe_reads = ([], []) + # reads must be sorted by qname + num_reads = 0 + prev_qname = None + for read in bamfh: + # get read attributes + qname = read.qname + readnum = 1 if read.is_read2 else 0 + # if query name changes we have completely finished + # the fragment and can reset the read data + if num_reads > 0 and qname != prev_qname: + yield pe_reads + # reset state variables + pe_reads = ([], []) + num_reads = 0 + pe_reads[readnum].append(read) + prev_qname = qname + num_reads += 1 + if num_reads > 0: + yield pe_reads + +def parse_unpaired_pe_reads(bamfh): + """ + parses alignments that were aligned in single read mode + and hence all hits are labeled as 'read1' and lack mate + information. instead the read1 read2 information is + attached to the 'qname' field + """ + pe_reads = ([], []) + num_reads = 0 + prev_qname = None + for read in bamfh: + # extract read1/2 from qname + readnum = int(read.qname[-1]) + if readnum == 1: + read.is_read1 = True + mate = 0 + elif readnum == 2: + mate = 1 + read.is_read2 = True + # reconstitute correct qname + qname = read.qname[:-2] + read.qname = qname + # if query name changes we have completely finished + # the fragment and can reset the read data + if num_reads > 0 and qname != prev_qname: + yield pe_reads + # reset state variables + pe_reads = ([], []) + num_reads = 0 + pe_reads[mate].append(read) + prev_qname = qname + num_reads += 1 + if num_reads > 0: + yield pe_reads + +def select_best_mismatch_strata(reads, mismatch_tolerance=0): + if len(reads) == 0: + return [] + # sort reads by number of mismatches + mapped_reads = [] + unmapped_reads = [] + for r in reads: + if r.is_unmapped: + unmapped_reads.append(r) + else: + mapped_reads.append((r.opt('NM'), r)) + if len(mapped_reads) == 0: + return unmapped_reads + sorted_reads = sorted(mapped_reads, key=operator.itemgetter(0)) + best_nm = sorted_reads[0][0] + worst_nm = sorted_reads[-1][0] + sorted_reads.extend((worst_nm+1, r) for r in unmapped_reads) + # choose reads within a certain mismatch tolerance + best_reads = [] + for mismatches, r in sorted_reads: + if mismatches > (best_nm + mismatch_tolerance): + break + best_reads.append(r) + return best_reads + +def copy_read(r): + a = pysam.AlignedRead() + a.qname = r.qname + a.seq = r.seq + a.flag = r.flag + a.rname = r.rname + a.pos = r.pos + a.mapq = r.mapq + a.cigar = r.cigar + a.mrnm = r.mrnm + a.mpos = r.mpos + a.isize = r.isize + a.qual = r.qual + a.tags = r.tags + return a + +def soft_pad_read(fq, r): + """ + 'fq' is the fastq record + 'r' in the AlignedRead SAM read + """ + # make sequence soft clipped + ext_length = len(fq.seq) - len(r.seq) + cigar_softclip = [(CIGAR_S, ext_length)] + cigar = r.cigar + # reconstitute full length sequence in read + if r.is_reverse: + seq = DNA_reverse_complement(fq.seq) + qual = fq.qual[::-1] + if (cigar is not None) and (ext_length > 0): + cigar = cigar_softclip + cigar + else: + seq = fq.seq + qual = fq.qual + if (cigar is not None) and (ext_length > 0): + cigar = cigar + cigar_softclip + # replace read field + r.seq = seq + r.qual = qual + r.cigar = cigar + +def pair_reads(r1, r2, tags=None): + ''' + fill in paired-end fields in SAM record + ''' + if tags is None: + tags = [] + # convert read1 to paired-end + r1.is_paired = True + r1.is_proper_pair = True + r1.is_read1 = True + r1.mate_is_reverse = r2.is_reverse + r1.mate_is_unmapped = r2.is_unmapped + r1.mpos = r2.pos + r1.mrnm = r2.rname + r1.tags = r1.tags + tags + # convert read2 to paired-end + r2.is_paired = True + r2.is_proper_pair = True + r2.is_read2 = True + r2.mate_is_reverse = r1.is_reverse + r2.mate_is_unmapped = r1.is_unmapped + r2.mpos = r1.pos + r2.mrnm = r1.rname + r2.tags = r2.tags + tags + # compute insert size + if r1.rname != r2.rname: + r1.isize = 0 + r2.isize = 0 + elif r1.pos > r2.pos: + isize = r1.aend - r2.pos + r1.isize = -isize + r2.isize = isize + else: + isize = r2.aend - r1.pos + r1.isize = isize + r2.isize = -isize + +def get_clipped_interval(r): + cigar = r.cigar + padstart, padend = r.pos, r.aend + if len(cigar) > 1: + if (cigar[0][0] == CIGAR_S or + cigar[0][0] == CIGAR_H): + padstart -= cigar[0][1] + elif (cigar[-1][0] == CIGAR_S or + cigar[-1][0] == CIGAR_H): + padend += cigar[-1][1] + return padstart, padend + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/seq.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/seq.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,149 @@ +''' +Created on Jan 5, 2011 + +@author: Dan Blankenberg + +Code from the Galaxy project (http://galaxy.psu.edu) +Contains methods to transform sequence strings +''' +import string +from math import log10 +from string import maketrans + +# Quality score formats +SANGER_FORMAT = "sanger" +SOLEXA_FORMAT = "solexa" +ILLUMINA_FORMAT = "illumina" +FASTQ_QUAL_FORMATS = [SANGER_FORMAT, SOLEXA_FORMAT, ILLUMINA_FORMAT] + +#Translation table for reverse Complement, with ambiguity codes +DNA_COMPLEMENT = string.maketrans( "ACGTRYKMBDHVacgtrykmbdhv", "TGCAYRMKVHDBtgcayrmkvhdb" ) +RNA_COMPLEMENT = string.maketrans( "ACGURYKMBDHVacgurykmbdhv", "UGCAYRMKVHDBugcayrmkvhdb" ) +#Translation table for DNA <--> RNA +DNA_TO_RNA = string.maketrans( "Tt", "Uu" ) +RNA_TO_DNA = string.maketrans( "Uu", "Tt" ) + +def DNA_complement( sequence ): + '''complement DNA sequence string''' + return sequence.translate( DNA_COMPLEMENT ) +def DNA_reverse_complement( sequence ): + '''returns the reverse complement of the sequence''' + return DNA_complement(sequence[::-1]) +def to_DNA( sequence ): + return sequence.translate( DNA_TO_RNA ) +#complement RNA sequence string +def RNA_complement( sequence ): + return sequence.translate( RNA_COMPLEMENT ) +def RNA_reverse_complement( self, sequence ): + return RNA_complement( sequence[::-1] ) +def to_RNA( sequence ): + return sequence.translate( RNA_TO_DNA ) + +def get_solexa_qual_conversion_table(): + """ + return a translation table that can be used by str.translate() for + converting solexa to sanger quality scores + """ + offset = 64 + conv_table = ['!'] * 256 + conv_table[offset:] = "I" * (256-offset) + for solq in xrange(-5, 40): + phredq = 10*log10(1 + 10**(solq/10.0)) + phredchr = chr(int(round(33 + phredq))) + conv_table[offset + solq] = phredchr + conv_string = ''.join(conv_table) + return maketrans(''.join(map(chr, range(256))), conv_string) + +def get_illumina_qual_conversion_table(): + """Illumina 1.3+ format can encode a Phred quality score from 0 to 62 + using ASCII 64 to 126 (although in raw read data Phred scores from 0 + to 40 only are expected). + """ + offset = 64 + conv_table = ['!'] * 256 + for x in xrange(0, 62): + conv_table[offset+x] = chr(33 + x) + conv_table[offset+40:] = "I" * (256-(offset+40)) + conv_string = ''.join(conv_table) + return maketrans(''.join(map(chr, range(256))), conv_string) + +def get_sanger_qual_conversion_table(): + offset = 33 + tbl = map(chr, range(256)) + tbl[:offset] = "!" * offset + tbl[offset+40:] = "I" * (256-(offset+40)) + return maketrans(''.join(map(chr, range(256))), ''.join(tbl)) + +def get_qual_conversion_func(qual_format): + conv_tables = {SANGER_FORMAT: get_sanger_qual_conversion_table(), + ILLUMINA_FORMAT: get_illumina_qual_conversion_table(), + SOLEXA_FORMAT: get_solexa_qual_conversion_table()} + tbl = conv_tables[qual_format] + return lambda q: q.translate(tbl) + +class FASTQRecord: + __slots__ = ("qname", "seq", "qual", "readnum") + def __init__(self, qname, seq, qual, readnum): + self.qname = qname + self.seq = seq + self.qual = qual + self.readnum = readnum + + def to_string(self): + return ("@%s/%d\n%s\n+\n%s" % + (self.qname, self.readnum, self.seq, self.qual)) + +def parse_fastq_record(line_iter, + convert_quals=False, + qual_format=SANGER_FORMAT): + qual_func = get_qual_conversion_func(qual_format) + try: + qname = line_iter.next().rstrip()[1:] + readnum = int(qname[-1]) + qname = qname[:-2] + seq = line_iter.next().rstrip() + line_iter.next() + qual = line_iter.next().rstrip() + if convert_quals: + qual = qual_func(qual) + yield FASTQRecord(qname, seq, qual, readnum) + while True: + # qname + qname = line_iter.next().rstrip()[1:] + readnum = int(qname[-1]) + qname = qname[:-2] + # seq + seq = line_iter.next().rstrip() + # qname again (skip) + line_iter.next() + # qual + qual = line_iter.next().rstrip() + if convert_quals: + qual = qual_func(qual) + yield FASTQRecord(qname, seq, qual, readnum) + except StopIteration: + pass + +def calc_homology(seq1, seq2, num_mismatches): + smallest_len = min(len(seq1), len(seq2)) + mm = 0 + i = 0 + for i in xrange(smallest_len): + if seq1[i] != seq2[i]: + mm += 1 + if mm > num_mismatches: + return i + return i + 1 + +BASES_PER_LINE = 50 +def split_seq(seq, chars_per_line=BASES_PER_LINE): + pos = 0 + newseq = [] + while pos < len(seq): + if pos + chars_per_line > len(seq): + endpos = len(seq) + else: + endpos = pos + chars_per_line + newseq.append(seq[pos:endpos]) + pos = endpos + return '\n'.join(newseq) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/lib/stats.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/lib/stats.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,317 @@\n+\'\'\'\n+Created on Jan 30, 2011\n+\n+@author: mkiyer\n+\'\'\'\n+import math\n+from math import log\n+from collections import defaultdict\n+\n+def comb(N,k):\n+ """\n+ This function was taken from scipy 0.9.0rc1\n+ \n+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \n+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT \n+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS \n+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE \n+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, \n+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR \n+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) \n+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, \n+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING \n+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE \n+ POSSIBILITY OF SUCH DAMAGE.\n+ \n+ The number of combinations of N things taken k at a time.\n+ This is often expressed as "N choose k".\n+\n+ Parameters\n+ ----------\n+ N : int, array\n+ Number of things.\n+ k : int, array\n+ Number of elements taken.\n+\n+ Returns\n+ -------\n+ val : int, array\n+ The total number of combinations.\n+\n+ Notes\n+ -----\n+ - Array arguments accepted only for exact=0 case.\n+ - If k > N, N < 0, or k < 0, then a 0 is returned.\n+\n+ Examples\n+ --------\n+ >>> k = np.array([3, 4])\n+ >>> n = np.array([10, 10])\n+ >>> comb(n, k, exact=False)\n+ array([ 120., 210.])\n+ >>> comb(10, 3, exact=True)\n+ 120L\n+ """\n+ if (k > N) or (N < 0) or (k < 0):\n+ return 0L\n+ val = 1L\n+ for j in xrange(min(k, N-k)):\n+ val = (val*(N-j))//(j+1)\n+ return val\n+\n+def normal_pdf(x, m, v):\n+ return 1.0/math.sqrt(2*math.pi*v) * math.exp(-(x-m)**2/(2*v))\n+\n+def binomial_pdf(p, n, k):\n+ if n < 100:\n+ return comb(n, k) * p**k * p**(n-k) # Fall back to your current method\n+ return normal_pdf(k, n*p, n*p*(1.0-p))\n+\n+def binomial_cdf(p, n, k):\n+ return sum(binomial_pdf(p,n,x) for x in xrange(k+1))\n+\n+def _interpolate(a, b, fraction):\n+ """\n+ This function was taken from scipy 0.9.0rc1\n+ \n+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \n+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT \n+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS \n+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE \n+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, \n+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR \n+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) \n+ HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, \n+ STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING \n+ IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE \n+ POSSIBILITY OF SUCH DAMAGE.\n+\n+ Returns the point at the given fraction between a and b, where\n+ \'fraction\' must be between 0 and 1.\n+ """\n+ return a + (b - a)*fraction;\n+\n+def scoreatpercentile(values, p):\n+ """\n+ This function was taken from scipy 0.9.0rc1\n+ \n+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \n+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT \n+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS \n+ FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE \n+ COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, \n+ INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n+ (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR \n+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) \n'..b'for x in arr\n+ if x > 0)\n+ return kldiv\n+\n+def poisson(m):\n+ \'\'\'\n+ courtesy (http://telliott99.blogspot.com/2010/02/replot-poisson-example-with-python.html)\n+ \'\'\'\n+ def f(k):\n+ e = math.e**(-m)\n+ f = math.factorial(k)\n+ g = m**k\n+ return g*e/f\n+ return f\n+\n+def std(a):\n+ # find the mean\n+ n = len(a)\n+ mean = mean(a)\n+ # find the standard deviation\n+ std = sum((x - mean)**2 for x in a)\n+ std = (std / float(n-1))**0.5\n+ return std\n+\n+def normmeanCI(p, xbar, sd, n):\n+ """\n+ Computes a p x 100 CI for the given arguments\n+ p - confidence coefficient, common values are 0.99, 0.95, 0.90\n+ xbar - sample point estimate of unknown pop. mean.\n+ sd - standard deviation\n+ n - sample size\n+ """\n+ se = sd / (n ** 0.5)\n+ alphadiv2 = (1.0- p)/2.0\n+ z2 = stat.norm. ppf(1-alphadiv2)\n+ a = xbar - z2 * se\n+ b = xbar + z2 * se\n+ return (a, b)\n+\n+def median(a):\n+ b = sorted(a)\n+ ind,odd = divmod(len(b),2)\n+ median = (b[ind] + b[ind+odd]) / 2.0\n+\n+def mean(a):\n+ return sum(a)/float(len(a))\n+\n+class EmpiricalCdf3D(object):\n+ \n+ def prob(self, x, y, z):\n+ if self.n == 0:\n+ return 0.0\n+ # find prob(X = x) by summing all y\'s and z\'a\n+ nx = 0\n+ ydict = self.D[x]\n+ for zdict in ydict.itervalues(): \n+ nz_given_y = sum(zdict.itervalues())\n+ nx += nz_given_y\n+ if nx == 0:\n+ return 0.0\n+ px = nx / float(self.n) \n+ # find prob(Y = y | X = x)\n+ ny_given_x = sum(self.D[x][y].itervalues())\n+ if ny_given_x == 0:\n+ return 0.0\n+ py_given_x = ny_given_x / float(nx)\n+ # find prob(Z = z | Y=y, X=x)\n+ nz_given_xy = self.D[x][y][z]\n+ if nz_given_xy == 0:\n+ return 0.0\n+ pz_given_xy = nz_given_xy / float(ny_given_x) \n+ # multiply together\n+ return pz_given_xy * py_given_x * px\n+\n+ def _count(self, x, y, z):\n+ total = 0\n+ xkeys = sorted(self.D.iterkeys())\n+ for xval in xkeys:\n+ if xval > x:\n+ break\n+ ykeys = sorted(self.D[xval].iterkeys())\n+ for yval in ykeys:\n+ if yval > y:\n+ break\n+ zkeys = sorted(self.D[xval][yval].iterkeys())\n+ for zval in zkeys:\n+ if zval > z:\n+ break\n+ total += self.D[xval][yval][zval]\n+ return total\n+\n+ def __init__(self, data_iter):\n+ # use dict as sparse matrix for now\n+ self.D = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0)))\n+ self.n = 0\n+ for x,y,z in data_iter:\n+ self.n += 1\n+ self.D[x][y][z] += 1\n+ # turn into dicts\n+ for xval, ydict in self.D.iteritems():\n+ self.D[xval] = dict(ydict)\n+ for yval, zdict in ydict.iteritems():\n+ self.D[xval][yval] = dict(zdict)\n+ self.CDF = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0))) \n+ # turn into cumulative counts\n+ xkeys = sorted(self.D.iterkeys())\n+ for xval in xkeys: \n+ ykeys = sorted(self.D[xval].iterkeys())\n+ for yval in ykeys:\n+ zkeys = sorted(self.D[xval][yval].iterkeys())\n+ for zval in zkeys:\n+ c = self._count(xval, yval, zval)\n+ self.CDF[xval][yval][zval] = c \n+\n+ def __call__(self, x, y, z):\n+ return self.CDF[x][y][z] / float(self.n)\n+\n+if __name__ == \'__main__\':\n+ import random\n+ X = [random.randrange(0, 5) for x in xrange(100)]\n+ Y = [random.randrange(0, 5) for y in xrange(100)]\n+ Z = [random.randrange(0, 5) for z in xrange(100)]\n+ import itertools\n+ x = EmpiricalCdf3D(itertools.izip(X,Y,Z))\n+ print x.n \n+ print x(4, 4, 4)\n+\n+ \n+\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/align_bowtie.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/align_bowtie.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,282 @@\n+\'\'\'\n+Created on Jun 1, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+import sys\n+import os\n+import logging\n+import subprocess\n+\n+from chimerascan.lib.base import LibraryTypes\n+from chimerascan.lib.seq import SANGER_FORMAT, SOLEXA_FORMAT, ILLUMINA_FORMAT\n+from chimerascan.lib import config\n+\n+translate_quals = {SOLEXA_FORMAT: \'solexa-quals\',\n+ ILLUMINA_FORMAT: \'solexa1.3-quals\',\n+ SANGER_FORMAT: \'phred33-quals\'}\n+\n+def translate_library_type(library_type):\n+ """\n+ returns the bowtie library type option \'--fr\' or \'--ff\' corresponding\n+ to the first two characters of the library type string\n+ """\n+ return library_type[0:2]\n+\n+_sam2bam_script = os.path.join(os.path.dirname(__file__), "sam2bam.py")\n+_fastq_trim_script = os.path.join(os.path.dirname(__file__), "fastq_merge_trim.py")\n+\n+def align_pe(fastq_files, \n+ bowtie_index,\n+ output_bam_file, \n+ unaligned_fastq_param=None,\n+ maxmultimap_fastq_param=None,\n+ min_fragment_length=0,\n+ max_fragment_length=1000,\n+ trim5=0,\n+ trim3=0,\n+ library_type=LibraryTypes.FR_UNSTRANDED,\n+ num_processors=1, \n+ quals=SANGER_FORMAT,\n+ multihits=100, \n+ mismatches=2, \n+ bowtie_bin="bowtie", \n+ bowtie_args=None,\n+ log_file=None,\n+ keep_unmapped=False):\n+ args = [bowtie_bin, "-q", "-S", \n+ "-p", str(num_processors),\n+ "--%s" % translate_quals[quals],\n+ "-k", str(multihits),\n+ "-m", str(multihits),\n+ "-v", str(mismatches),\n+ "--minins", min_fragment_length,\n+ "--maxins", max_fragment_length,\n+ "--trim5", trim5,\n+ "--trim3", trim3,\n+ "--%s" % translate_library_type(library_type)]\n+ if unaligned_fastq_param is not None:\n+ args.extend(["--un", unaligned_fastq_param])\n+ if maxmultimap_fastq_param is not None:\n+ args.extend(["--max", maxmultimap_fastq_param]) \n+ if bowtie_args is not None: \n+ args.extend(bowtie_args.split())\n+ args += [bowtie_index, \n+ "-1", fastq_files[0],\n+ "-2", fastq_files[1]]\n+ args = map(str, args)\n+ logging.debug("Bowtie alignment args: %s" % (\' \'.join(args)))\n+ # setup logging\n+ if log_file is not None:\n+ logfh = open(log_file, "w")\n+ else:\n+ logfh = None\n+ aln_p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=logfh)\n+ # pipe the bowtie SAM output to a filter that writes BAM format\n+ args = [sys.executable, _sam2bam_script, \n+ "--multihits", str(multihits),\n+ "--quals", quals]\n+ if keep_unmapped:\n+ args.append("--un")\n+ args.extend([output_bam_file, "-"])\n+ args.extend(fastq_files)\n+ logging.debug("SAM to BAM converter args: %s" % (\' \'.join(args)))\n+ retcode = subprocess.call(args, stdin=aln_p.stdout, stderr=logfh) \n+ if logfh is not None:\n+ logfh.close()\n+ if retcode != 0:\n+ logging.error("SAM to BAM conversion script failed")\n+ aln_p.terminate()\n+ # cleanup output file\n+ if os.path.exists(output_bam_file):\n+ '..b', _sam2bam_script, \n+ "--multihits", str(multihits),\n+ "--quals", quals]\n+ if keep_unmapped:\n+ args.append("--un")\n+ args.extend([output_bam_file, "-"])\n+ args.append(fastq_file) \n+ logging.debug("SAM to BAM converter args: %s" % (\' \'.join(args)))\n+ fix_p = subprocess.Popen(args, stdin=aln_p.stdout, stderr=logfh)\n+ # wait for processes to complete\n+ retcode1 = fix_p.wait()\n+ if retcode1 != 0:\n+ logging.error("SAM to BAM conversion script failed")\n+ # kill alignment process\n+ aln_p.kill()\n+ # cleanup output file\n+ if os.path.exists(output_bam_file):\n+ os.remove(output_bam_file)\n+ # end logging\n+ if logfh is not None:\n+ logfh.close()\n+ return config.JOB_ERROR\n+ retcode2 = aln_p.wait()\n+ # end logging\n+ if logfh is not None:\n+ logfh.close()\n+ if retcode2 != 0:\n+ logging.error("Alignment process failed")\n+ # cleanup output file\n+ if os.path.exists(output_bam_file):\n+ os.remove(output_bam_file)\n+ return config.JOB_ERROR\n+ return config.JOB_SUCCESS\n+\n+\n+def trim_align_pe_sr(fastq_files,\n+ bowtie_index,\n+ output_bam_file,\n+ unaligned_fastq_param=None,\n+ maxmultimap_fastq_param=None,\n+ trim5=0,\n+ library_type=LibraryTypes.FR_UNSTRANDED,\n+ num_processors=1, \n+ quals=SANGER_FORMAT,\n+ multihits=100, \n+ mismatches=2, \n+ bowtie_bin="bowtie", \n+ bowtie_args=None,\n+ log_file=None,\n+ segment_length=25,\n+ keep_unmapped=False):\n+ # setup logging\n+ if log_file is not None:\n+ logfh = open(log_file, "w")\n+ else:\n+ logfh = None\n+ #\n+ # Merge paired-end reads into single fastq file\n+ #\n+ args = [sys.executable, _fastq_trim_script, \n+ "--trim5", str(trim5), \n+ "--segment-length", str(segment_length)]\n+ args.extend(fastq_files)\n+ args.append("-")\n+ logging.debug("FASTQ trimming args: %s" % (\' \'.join(args)))\n+ trim_p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=logfh)\n+ #\n+ # Align the trimmed reads\n+ #\n+ args = [bowtie_bin, "-q", "-S", \n+ "-p", str(num_processors),\n+ "--tryhard",\n+ "--%s" % translate_quals[quals],\n+ "-k", str(multihits),\n+ "-m", str(multihits),\n+ "-v", str(mismatches),\n+ "--%s" % translate_library_type(library_type)]\n+ if unaligned_fastq_param is not None:\n+ args.extend(["--un", unaligned_fastq_param])\n+ if maxmultimap_fastq_param is not None:\n+ args.extend(["--max", maxmultimap_fastq_param]) \n+ if bowtie_args is not None: \n+ args.extend(bowtie_args.split())\n+ args += [bowtie_index, "-"]\n+ logging.debug("Alignment args: %s" % (\' \'.join(args)))\n+ aln_p = subprocess.Popen(args, stdin=trim_p.stdout, \n+ stdout=subprocess.PIPE,\n+ stderr=logfh)\n+ #\n+ # Fix alignment ordering and convert to BAM, also extend sequences\n+ # back to full length by adding padding to CIGAR string\n+ #\n+ args = [sys.executable, _sam2bam_script, \n+ "--multihits", str(multihits),\n+ "--quals", quals,\n+ "--pesr", \n+ "--softclip"] \n+ if keep_unmapped:\n+ args.append("--un")\n+ args.extend([output_bam_file, "-"])\n+ args.extend(fastq_files)\n+ logging.debug("SAM to BAM converter args: %s" % (\' \'.join(args)))\n+ fix_p = subprocess.Popen(args, stdin=aln_p.stdout, stderr=logfh)\n+ # wait for processes to complete\n+ fix_p.wait()\n+ aln_p.wait()\n+ trim_p.wait()\n+ # end logging\n+ if logfh is not None:\n+ logfh.close()\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/chimeras_to_breakpoints.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/chimeras_to_breakpoints.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,74 @@ +''' +Created on Jun 11, 2011 + +@author: mkiyer +''' +import logging +import os +import collections + +from chimerascan import pysam +from chimerascan.lib import config +from chimerascan.lib.chimera import Chimera +from chimerascan.lib.batch_sort import batch_sort +from chimerascan.lib.seq import split_seq + +def chimeras_to_breakpoints(input_file, breakpoint_sorted_chimera_file, + breakpoint_map_file, breakpoint_fasta_file, + tmp_dir): + # sort chimera file by breakpoint name + def sortfunc(line): + fields = line.strip().split('\t') + return fields[Chimera.BREAKPOINT_NAME_FIELD] + tempdirs = [tmp_dir] + batch_sort(input=input_file, + output=breakpoint_sorted_chimera_file, + key=sortfunc, + buffer_size=32000, + tempdirs=tempdirs) + # parse and build breakpoint -> chimera map + fastafh = open(breakpoint_fasta_file, "w") + mapfh = open(breakpoint_map_file, "w") + prev_breakpoint_name = None + prev_seq = None + chimera_names = set() + for c in Chimera.parse(open(breakpoint_sorted_chimera_file)): + seq = c.breakpoint_seq_5p + c.breakpoint_seq_3p + if c.breakpoint_name != prev_breakpoint_name: + if len(chimera_names) > 0: + # write to fasta + print >>fastafh, ">%s\n%s" % (prev_breakpoint_name, split_seq(prev_seq)) + # write to map file + print >>mapfh, "%s\t%s\t%s" % (prev_breakpoint_name, + prev_seq, + ",".join(sorted(chimera_names))) + chimera_names = set() + prev_seq = seq + prev_breakpoint_name = c.breakpoint_name + chimera_names.add(c.name) + if len(chimera_names) > 0: + print >>fastafh, ">%s\n%s" % (prev_breakpoint_name, split_seq(prev_seq)) + print >>mapfh, "%s\t%s\t%s" % (prev_breakpoint_name, prev_seq, ",".join(chimera_names)) + fastafh.close() + mapfh.close() + + +def main(): + from optparse import OptionParser + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog [options] <chimeras.bedpe> " + "<sorted_chimeras.bedpe> " + "<breakpoints.txt> <breakpoints.fa> <tmp_dir>") + options, args = parser.parse_args() + input_file = args[0] + breakpoint_sorted_chimera_file = args[1] + breakpoint_map_file = args[2] + breakpoint_fasta_file = args[3] + tmp_dir = args[3] + chimeras_to_breakpoints(input_file, breakpoint_sorted_chimera_file, + breakpoint_map_file, breakpoint_fasta_file, tmp_dir) + + +if __name__ == '__main__': + main() |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/discordant_reads_to_bedpe.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/discordant_reads_to_bedpe.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,109 @@ +''' +Created on Jul 21, 2011 + +@author: mkiyer +''' +import logging +import os +import sys + +from chimerascan import pysam +from chimerascan.lib import config +from chimerascan.lib.chimera import DiscordantTags, DISCORDANT_TAG_NAME, \ + OrientationTags, ORIENTATION_TAG_NAME, DiscordantRead +from chimerascan.lib.gene_to_genome import build_tid_gene_map +from chimerascan.lib.batch_sort import batch_sort + +def parse_pairs(bamfh): + bam_iter = iter(bamfh) + try: + while True: + r1 = bam_iter.next() + r2 = bam_iter.next() + yield r1,r2 + except StopIteration: + pass + +def parse_gene_discordant_reads(bamfh): + """ + return tuples of (5',3') reads that both align to transcripts + """ + for r1,r2 in parse_pairs(bamfh): + # TODO: + # for now we are only going to deal with gene-gene + # chimeras and leave other chimeras for study at a + # later time + dr1 = r1.opt(DISCORDANT_TAG_NAME) + dr2 = r2.opt(DISCORDANT_TAG_NAME) + if (dr1 != DiscordantTags.DISCORDANT_GENE or + dr2 != DiscordantTags.DISCORDANT_GENE): + continue + # organize key in 5' to 3' order + or1 = r1.opt(ORIENTATION_TAG_NAME) + or2 = r2.opt(ORIENTATION_TAG_NAME) + assert or1 != or2 + if or1 == OrientationTags.FIVEPRIME: + pair = (r1,r2) + else: + pair = (r2,r1) + yield pair + +def discordant_reads_to_bedpe(index_dir, input_bam_file, output_file): + # open BAM alignment file + bamfh = pysam.Samfile(input_bam_file, "rb") + # build a lookup table to get genomic intervals from transcripts + logging.debug("Reading gene information") + gene_file = os.path.join(index_dir, config.GENE_FEATURE_FILE) + tid_gene_map = build_tid_gene_map(bamfh, gene_file, + rname_prefix=config.GENE_REF_PREFIX) + outfh = open(output_file, "w") + logging.debug("Converting BAM to BEDPE format") + for r5p,r3p in parse_gene_discordant_reads(bamfh): + # store pertinent read information in lightweight structure called + # DiscordantRead object. this departs from SAM format into a + # custom read format + dr5p = DiscordantRead.from_read(r5p) + dr3p = DiscordantRead.from_read(r3p) + # get gene information + tx5p = tid_gene_map[r5p.rname] + tx3p = tid_gene_map[r3p.rname] + # write bedpe format + fields = [tx5p.tx_name, r5p.pos, r5p.aend, + tx3p.tx_name, r3p.pos, r3p.aend, + r5p.qname, # read name + 0, # score + tx5p.strand, tx3p.strand, # strand 1, strand 2 + ] + fields.append('|'.join(map(str, dr5p.to_list()))) + fields.append('|'.join(map(str, dr3p.to_list()))) + print >>outfh, '\t'.join(map(str, fields)) + outfh.close() + +def sort_bedpe(input_file, output_file, tmp_dir): + # sort BEDPE file by paired chromosome/position + def sortfunc(line): + fields = line.strip().split('\t') + return tuple([fields[0], fields[3], fields[1], fields[4]]) + tempdirs = [tmp_dir] + batch_sort(input=input_file, + output=output_file, + key=sortfunc, + buffer_size=32000, + tempdirs=tempdirs) + + +def main(): + from optparse import OptionParser + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog [options] <index> <pairs.bam> <out.bedpe>") + options, args = parser.parse_args() + index_dir = args[0] + input_bam_file = args[1] + output_file = args[2] + return discordant_reads_to_bedpe(index_dir, + input_bam_file, + output_file) + +if __name__ == '__main__': + sys.exit(main()) \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/fastq_inspect_reads.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/fastq_inspect_reads.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,132 @@ +''' +Created on Jul 14, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import logging +import gzip +import bz2 +import zipfile +import os + +from chimerascan.lib.seq import get_qual_conversion_func +from chimerascan.lib.base import parse_lines +import chimerascan.lib.config as config + +def detect_format(f): + if f.endswith(".gz") or f.endswith(".z"): + return "gz" + elif f.endswith(".bz2"): + return "bz2" + elif f.endswith(".zip"): + return "zip" + else: + return "txt" + +def open_compressed(f): + compression_format = detect_format(f) + if compression_format == "gz": + fh = gzip.open(f, "r") + elif compression_format == "bz2": + fh = bz2.BZ2File(f, "r") + elif compression_format == "zip": + fh = zipfile.ZipFile(f, "r") + else: + fh = open(f, "r") + return fh + +def detect_read_length(filename): + fh = open_compressed(filename) + fh.next() + seq = fh.next() + fh.close() + return len(seq) + +def get_min_max_read_lengths(fastq_files, num_samples=10000): + read_lengths = [] + for filename in fastq_files: + f = open_compressed(filename) + count = 0 + samples = 0 + for line in f: + mod = count % 4 + if mod == 1: + read_lengths.append(len(line)) + samples += 1 + if samples >= num_samples: + break + count += 1 + f.close() + return min(read_lengths), max(read_lengths) + +def inspect_reads(fastq_files, output_prefix, quals): + """ + uncompresses reads, renames reads, and converts quality scores + to 'sanger' format + """ + # setup file iterators + filehandles = [open_compressed(f) for f in fastq_files] + fqiters = [parse_lines(f, numlines=4) for f in filehandles] + output_files = [(output_prefix + "_%d.fq" % (x+1)) + for x in xrange(len(fastq_files))] + outfhs = [open(f, "w") for f in output_files] + qual_func = get_qual_conversion_func(quals) + linenum = 0 + try: + while True: + pelines = [it.next() for it in fqiters] + for i,lines in enumerate(pelines): + # rename read using line number + lines[0] = "@%d/%d" % (linenum,i+1) + # ignore redundant header + lines[2] = "+" + # convert quality score to sanger + lines[3] = qual_func(lines[3]) + print >>outfhs[i], '\n'.join(lines) + linenum += 1 + except StopIteration: + pass + except: + logging.error("Unexpected error during FASTQ file processing") + for f in output_files: + if os.path.exists(f): + os.remove(f) + return config.JOB_ERROR + for fh in filehandles: + fh.close() + logging.debug("Inspected %d fragments" % (linenum)) + return config.JOB_SUCCESS + +def main(): + logging.basicConfig(level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + from optparse import OptionParser + parser = OptionParser("usage: %prog [options] <outprefix> <in1.fq> <in2.fq>") + parser.add_option("--quals", dest="quals", choices=["sanger", "solexa", "illumina"], + default="sanger") + options, args = parser.parse_args() + if len(args) < 2: + parser.error("must specify output prefix and at least one fastq file") + output_prefix = args[0] + fastq_files = args[1:] + inspect_reads(fastq_files, output_prefix, options.quals) + +if __name__ == '__main__': + main() |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/fastq_merge_trim.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/fastq_merge_trim.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,61 @@ +''' +Created on May 23, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import sys + +def parse_fastq(line_iter): + with line_iter: + while True: + lines = [line_iter.next().rstrip() for x in xrange(4)] + yield lines + +def trim_and_merge_fastq(infiles, outfile, trim5, segment_length): + total_length = trim5 + segment_length + fqiters = [parse_fastq(open(f)) for f in infiles] + if outfile == "-": + outfh = sys.stdout + else: + outfh = open(outfile, "w") + try: + while True: + pe_lines = [fqiter.next() for fqiter in fqiters] + for lines in pe_lines: + seqlen = len(lines[1]) + if seqlen > total_length: + lines[1] = lines[1][trim5:total_length] + lines[3] = lines[3][trim5:total_length] + print >>outfh, '\n'.join(lines) + except StopIteration: + pass + if outfile != "-": + outfh.close() + +def main(): + from optparse import OptionParser + parser = OptionParser("usage: %prog [options] <in1.fq> <in2.fq> <out.fq>") + parser.add_option("--trim5", type="int", dest="trim5", default=0) + parser.add_option("--segment-length", type="int", dest="segment_length", default=25) + options, args = parser.parse_args() + trim_and_merge_fastq(args[:2], args[2], options.trim5, options.segment_length) + +if __name__ == '__main__': + main() |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/filter_chimeras.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/filter_chimeras.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,217 @@\n+\'\'\'\n+Created on Jan 31, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+import logging\n+import collections\n+import os\n+\n+from chimerascan import pysam\n+from chimerascan.lib.gene_to_genome import build_transcript_genome_map, \\\n+ transcript_to_genome_pos, build_transcript_cluster_map\n+from chimerascan.lib.chimera import Chimera\n+from chimerascan.lib import config\n+\n+def filter_unique_frags(c, threshold):\n+ """\n+ filters chimeras with less than \'threshold\' unique\n+ alignment positions supporting the chimera \n+ """\n+ return c.get_num_unique_positions() >= threshold\n+\n+def get_wildtype_frags_5p(rname, start, end, bamfh):\n+ num_wildtype_frags = len(set(r.qname for r in bamfh.fetch(rname, start, end)\n+ if (not r.mate_is_unmapped) and (r.mpos >= end)))\n+ return num_wildtype_frags\n+\n+def get_wildtype_frags_3p(rname, start, end, bamfh):\n+ num_wildtype_frags = len(set(r.qname for r in bamfh.fetch(rname, start, end)\n+ if (not r.mate_is_unmapped) and (r.mpos < start)))\n+ return num_wildtype_frags\n+\n+def get_wildtype_frags(c, bamfh):\n+ rname5p = config.GENE_REF_PREFIX + c.tx_name_5p\n+ rname3p = config.GENE_REF_PREFIX + c.tx_name_3p\n+ num_wt_frags_5p = get_wildtype_frags_5p(rname5p, c.tx_start_5p, c.tx_end_5p, bamfh)\n+ num_wt_frags_3p = get_wildtype_frags_3p(rname3p, c.tx_start_3p, c.tx_end_3p, bamfh)\n+ return num_wt_frags_5p, num_wt_frags_3p\n+\n+def filter_chimeric_isoform_fraction(c, frac, bamfh):\n+ """\n+ filters chimeras with fewer than \'threshold\' total\n+ unique read alignments\n+ """\n+ num_wt_frags_5p, num_wt_frags_3p = get_wildtype_frags(c, bamfh)\n+ num_chimeric_frags = c.get_num_frags()\n+ ratio5p = float(num_chimeric_frags) / (num_chimeric_frags + num_wt_frags_5p)\n+ ratio3p = float(num_chimeric_frags) / (num_chimeric_frags + num_wt_frags_3p)\n+ #print c.gene_name_5p, c.gene_name_3p, "chimeras", num_chimeric_frags, "wt5p", num_wt_frags_5p, "wt3p", num_wt_frags_3p, "r5p", ratio5p, "r3p", ratio3p\n+ return min(ratio5p, ratio3p) >= frac\n+\n+def read_false_pos_file(filename):\n+ false_pos_chimeras = set()\n+ for line in open(filename):\n+ fields = line.strip().split("\\t")\n+ tx_name_5p, end5p, tx_name_3p, start3p = fields\n+ end5p = int(end5p)\n+ start3p = int(start3p)\n+ false_pos_chimeras.add((tx_name_5p, end5p, tx_name_3p, start3p))\n+ return false_pos_chimeras\n+\n+def filter_encompassing_chimeras(input_file, output_file, min_frags):\n+ num_chimeras = 0\n+ num_filtered_chimeras = 0\n+ f = open(output_file, "w") \n+ for c in Chimera.parse(open(input_file)):\n+ num_chimeras += 1\n+ if c.get_num_frags() < min_frags:\n+ continue\n+ num_filtered_chimeras += 1\n+ print >>f, \'\\t\'.join(map(str, c.to_list()))\n+ f.close()\n+ logging.debug("\\tchimeras: %d" % (num_chimeras))\n+ logging.debug("\\tfiltered chimeras: %d" % (num_filtered_chimeras))\n+ return config.JOB_SUCCESS\n+\n+def filter_chimeras(input_file, output_file,\n+ index_dir, bam_file,\n+ unique_frags,\n+ isoform_fraction,\n+ false_pos_file):\n+ logging.debug("Parameters")\n+ logging.debug("\\'..b'coverage_isoforms(input_file, gene_file):\n+ # place overlapping chimeras into clusters\n+ logging.debug("Building isoform cluster lookup table")\n+ transcript_cluster_map = build_transcript_cluster_map(open(gene_file))\n+ # build a lookup table to get genome coordinates from transcript \n+ # coordinates\n+ transcript_genome_map = build_transcript_genome_map(open(gene_file))\n+ cluster_chimera_dict = collections.defaultdict(lambda: [])\n+ for c in Chimera.parse(open(input_file)):\n+ # TODO: adjust this to score chimeras differently!\n+ key = (c.name, c.get_num_frags())\n+ # get cluster of overlapping genes\n+ cluster5p = transcript_cluster_map[c.tx_name_5p]\n+ cluster3p = transcript_cluster_map[c.tx_name_3p]\n+ # get genomic positions of breakpoints\n+ coord5p = transcript_to_genome_pos(c.tx_name_5p, c.tx_end_5p-1, transcript_genome_map)\n+ coord3p = transcript_to_genome_pos(c.tx_name_3p, c.tx_start_3p, transcript_genome_map)\n+ # add to dictionary\n+ cluster_chimera_dict[(cluster5p,cluster3p,coord5p,coord3p)].append(key) \n+ # choose highest coverage chimeras within each pair of clusters\n+ logging.debug("Finding highest coverage isoforms")\n+ kept_chimeras = set()\n+ for stats_list in cluster_chimera_dict.itervalues():\n+ stats_dict = collections.defaultdict(lambda: set())\n+ for stats_info in stats_list:\n+ # index chimera names\n+ stats_dict[stats_info[1:]].add(stats_info[0])\n+ # find highest scoring key\n+ sorted_keys = sorted(stats_dict.keys(), reverse=True)\n+ kept_chimeras.update(stats_dict[sorted_keys[0]])\n+ return kept_chimeras\n+\n+def filter_highest_coverage_isoforms(index_dir, input_file, output_file):\n+ # find highest coverage chimeras among isoforms\n+ gene_file = os.path.join(index_dir, config.GENE_FEATURE_FILE)\n+ kept_chimeras = get_highest_coverage_isoforms(input_file, gene_file)\n+ num_filtered_chimeras = 0\n+ f = open(output_file, "w")\n+ for c in Chimera.parse(open(input_file)):\n+ if c.name in kept_chimeras:\n+ num_filtered_chimeras += 1\n+ print >>f, \'\\t\'.join(map(str, c.to_list()))\n+ f.close()\n+ logging.debug("\\tAfter choosing best isoform: %d" % \n+ num_filtered_chimeras)\n+ return config.JOB_SUCCESS\n+\n+\n+\n+def main():\n+ from optparse import OptionParser\n+ logging.basicConfig(level=logging.DEBUG,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+ parser = OptionParser("usage: %prog [options] <index_dir> "\n+ "<sorted_aligned_reads.bam> <in.txt> <out.txt>")\n+ parser.add_option("--unique-frags", type="float", default=2.0,\n+ dest="unique_frags", metavar="N",\n+ help="Filter chimeras with less than N unique "\n+ "aligned fragments [default=%default]")\n+ parser.add_option("--isoform-fraction", type="float", \n+ default=0.10, metavar="X",\n+ help="Filter chimeras with expression ratio "\n+ " less than X (0.0-1.0) relative to the wild-type "\n+ "5\' transcript level [default=%default]")\n+ parser.add_option("--false-pos", dest="false_pos_file",\n+ default=None, \n+ help="File containing known false positive "\n+ "transcript pairs to subtract from output")\n+ options, args = parser.parse_args()\n+ index_dir = args[0]\n+ bam_file = args[1]\n+ input_file = args[2]\n+ output_file = args[3]\n+ return filter_chimeras(input_file, output_file, index_dir, bam_file,\n+ unique_frags=options.unique_frags,\n+ isoform_fraction=options.isoform_fraction,\n+ false_pos_file=options.false_pos_file)\n+\n+if __name__ == "__main__":\n+ main()\n\\ No newline at end of file\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/filter_homologous_genes.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/filter_homologous_genes.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,146 @@ +''' +Created on Aug 1, 2011 + +@author: mkiyer +''' +import logging +import os +import collections +import subprocess + +from chimerascan import pysam +from chimerascan.lib import config +from chimerascan.lib.chimera import Chimera +from chimerascan.bx.intersection import IntervalTree, Interval + +def get_mapped_read_intervals(c, min_isize, max_isize, homolog_segment_length): + start5p = max(0, c.tx_end_5p - min_isize + homolog_segment_length) + end5p = max(0, c.tx_end_5p + max_isize - homolog_segment_length) + if start5p > end5p: + end5p = start5p + homolog_segment_length + start3p = max(0, c.tx_start_3p - max_isize + homolog_segment_length) + end3p = max(0, c.tx_start_3p + min_isize - homolog_segment_length) + if start3p > end3p: + end3p = start3p + homolog_segment_length + return start5p, end5p, start3p, end3p + +def filter_homologous_genes(input_file, output_file, index_dir, + homolog_segment_length, + min_isize, + max_isize, + bowtie_bin, + num_processors, + tmp_dir): + logging.debug("Parameters") + logging.debug("\thomolog segment length: %d" % (homolog_segment_length)) + logging.debug("\tmin fragment size: %d" % (min_isize)) + logging.debug("\tmax fragment size: %d" % (max_isize)) + + # open the reference sequence fasta file + ref_fasta_file = os.path.join(index_dir, config.ALIGN_INDEX + ".fa") + ref_fa = pysam.Fastafile(ref_fasta_file) + bowtie_index = os.path.join(index_dir, config.ALIGN_INDEX) + interval_trees_3p = collections.defaultdict(lambda: IntervalTree()) + + # generate FASTA file of sequences to use in mapping + logging.debug("Generating homologous sequences to test") + fasta5p = os.path.join(tmp_dir, "homologous_5p.fa") + f = open(fasta5p, "w") + for c in Chimera.parse(open(input_file)): + tx_name_5p = config.GENE_REF_PREFIX + c.tx_name_5p + tx_name_3p = config.GENE_REF_PREFIX + c.tx_name_3p + start5p, end5p, start3p, end3p = get_mapped_read_intervals(c, min_isize, max_isize, homolog_segment_length) + # add 3' gene to interval trees + interval_trees_3p[tx_name_3p].insert_interval(Interval(start3p, end3p, value=c.name)) + # extract sequence of 5' gene + seq5p = ref_fa.fetch(tx_name_5p, start5p, end5p) + for i in xrange(0, len(seq5p) - homolog_segment_length): + print >>f, ">%s,%s:%d-%d\n%s" % (c.name,c.tx_name_5p, + start5p+i, + start5p+i+homolog_segment_length, + seq5p[i:i+homolog_segment_length]) + f.close() + + # map 5' sequences to reference using bowtie + logging.debug("Mapping homologous sequences") + sam5p = os.path.join(tmp_dir, "homologous_5p.sam") + args = [bowtie_bin, "-p", num_processors, "-f", "-a", "-m", 100, + "-y", "-v", 3, "-S", + bowtie_index, fasta5p, sam5p] + retcode = subprocess.call(map(str,args)) + if retcode != 0: + return config.JOB_ERROR + + # analyze results for homologous genes + logging.debug("Analyzing mapping results") + samfh = pysam.Samfile(sam5p, "r") + tid_rname_map = dict((i,refname) for i,refname in enumerate(samfh.references)) + homologous_chimeras = set() + for r in pysam.Samfile(sam5p, "r"): + if r.is_unmapped: + continue + # reference name must be in list of 3' chimeras + rname = tid_rname_map[r.rname] + if rname not in interval_trees_3p: + continue + # get chimera name from 'qname' + chimera_name = r.qname.split(",")[0] + for hit in interval_trees_3p[rname].find(r.pos,r.aend): + if hit.value == chimera_name: + homologous_chimeras.add(chimera_name) + + # write output + logging.debug("Writing output") + f = open(output_file, "w") + for c in Chimera.parse(open(input_file)): + if c.name in homologous_chimeras: + logging.debug("Removing homologous chimera %s between %s and %s" % + (c.name, c.gene_name_5p, c.gene_name_3p)) + continue + print >>f, '\t'.join(map(str, c.to_list())) + f.close() + + # cleanup + if os.path.exists(fasta5p): + os.remove(fasta5p) + if os.path.exists(sam5p): + os.remove(sam5p) + return config.JOB_SUCCESS + + +def main(): + from optparse import OptionParser + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog [options] <index_dir> " + "<in.txt> <out.txt>") + parser.add_option("--homolog-segment-length", dest="homolog_segment_length", + type="int", default=25, + help="Segment length to consider when searching for " + "homologous regions [default=%default]") + parser.add_option('--min-fragment-length', dest="min_fragment_length", + type="int", default=100) + parser.add_option('--max-fragment-length', dest="max_fragment_length", + type="int", default=300) + parser.add_option("--bowtie-bin", dest="bowtie_bin", + default="bowtie", + help="Path to bowtie binary [default: %default]") + parser.add_option("-p", type="int", dest="num_processors", default=1, + help="Number of processors to use [default: %default]") + parser.add_option("--tmp-dir", dest="tmp_dir", + default=".", + help="Temporary directory [default=%default]") + options, args = parser.parse_args() + index_dir = args[0] + input_file = args[1] + output_file = args[2] + return filter_homologous_genes(input_file, output_file, index_dir, + homolog_segment_length=options.homolog_segment_length, + min_isize=options.min_fragment_length, + max_isize=options.max_fragment_length, + bowtie_bin=options.bowtie_bin, + num_processors=options.num_processors, + tmp_dir=options.tmp_dir) + +if __name__ == "__main__": + main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/find_discordant_reads.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/find_discordant_reads.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,423 @@\n+\'\'\'\n+Created on Jun 2, 2011\n+\n+@author: mkiyer\n+\'\'\'\n+import logging\n+import collections\n+import os\n+\n+from chimerascan import pysam\n+from chimerascan.bx.cluster import ClusterTree\n+\n+from chimerascan.lib import config\n+from chimerascan.lib.base import LibraryTypes\n+from chimerascan.lib.sam import parse_pe_reads, pair_reads, copy_read, select_best_mismatch_strata\n+from chimerascan.lib.gene_to_genome import build_transcript_tid_genome_map, \\\n+ build_transcript_tid_cluster_map, transcript_to_genome_pos\n+from chimerascan.lib.chimera import DiscordantTags, DISCORDANT_TAG_NAME, \\\n+ OrientationTags, ORIENTATION_TAG_NAME, cmp_orientation\n+\n+# globals\n+imin2 = lambda a,b: a if a <= b else b\n+\n+def annotate_multihits(bamfh, reads, transcript_tid_genome_map):\n+ hits = set()\n+ any_unmapped = False\n+ for r in reads:\n+ if r.is_unmapped:\n+ any_unmapped = True\n+ continue\n+ if r.rname not in transcript_tid_genome_map:\n+ tid = r.rname\n+ pos = r.pos\n+ else:\n+ # use the position that is most 5\' relative to genome\n+ left_tid, left_strand, left_pos = transcript_to_genome_pos(r.rname, r.pos, transcript_tid_genome_map)\n+ right_tid, right_strand, right_pos = transcript_to_genome_pos(r.rname, r.aend-1, transcript_tid_genome_map)\n+ tid = left_tid\n+ pos = imin2(left_pos, right_pos)\n+ hits.add((tid, pos))\n+ #print r.qname, bamfh.getrname(r.rname), r.pos, bamfh.getrname(tid), pos \n+ for i,r in enumerate(reads):\n+ # annotate reads with \'HI\', and \'IH\' tags\n+ r.tags = r.tags + [("HI",i), ("IH",len(reads)), ("NH", len(hits))]\n+ return any_unmapped\n+\n+def map_reads_to_references(pe_reads, transcript_tid_cluster_map):\n+ """\n+ bin reads by transcript cluster and reference (tid)\n+ """\n+ refdict = collections.defaultdict(lambda: ([], []))\n+ genedict = collections.defaultdict(lambda: ([], []))\n+ for readnum, reads in enumerate(pe_reads):\n+ for r in reads:\n+ if r.is_unmapped:\n+ continue \n+ # get cluster id\n+ if r.rname in transcript_tid_cluster_map:\n+ # add to cluster dict\n+ cluster_id = transcript_tid_cluster_map[r.rname]\n+ pairs = genedict[cluster_id]\n+ pairs[readnum].append(r)\n+ # add to reference dict\n+ pairs = refdict[r.rname]\n+ pairs[readnum].append(r)\n+ return refdict, genedict\n+\n+def get_genome_orientation(r, library_type):\n+ if library_type == LibraryTypes.FR_FIRSTSTRAND:\n+ if r.is_read2:\n+ return OrientationTags.FIVEPRIME\n+ else:\n+ return OrientationTags.THREEPRIME\n+ elif library_type == LibraryTypes.FR_SECONDSTRAND:\n+ if r.is_read1:\n+ return OrientationTags.FIVEPRIME\n+ else:\n+ return OrientationTags.THREEPRIME\n+ return OrientationTags.NONE\n+\n+def get_gene_orientation(r, library_type):\n+ if library_type == LibraryTypes.FR_UNSTRANDED:\n+ if r.is_reverse:\n+ return OrientationTags.THREEPRIME\n+ else:\n+ return OrientationTags.FIVEPRIME\n+ elif library_type == LibraryTypes.FR_FIRSTSTRAND:\n+ if r.is_read2:\n+ return OrientationTags.FIVEPRIME\n+ else:\n+ return OrientationTags.THREEPRIME\n+ elif library_type == LibraryTypes.FR_SECONDSTRAND:\n+ if r.is_read1:\n+ return OrientationTags.FIVEPRIME\n+ else:\n+ return OrientationTags.THREEPRIME\n+ logging.error("Unknown library type %s, aborting" % (library_type))\n+ assert False\n+\n+def classify_unpaired_reads(reads, transcript_tid_genome_map, library_type):\n+ gene_hits_5p = []\n+ gene_hits_3p = []\n+ genome_hits = []\n+ for r in reads:\n+ # check to see if this alignment is to a gene, or genomic\n+ if (r.rname not in transcript_tid_genome_map):\n+ #'..b'nput_bam_file))\n+ logging.debug("\\tMax insert size: \'%d\'" % (max_isize))\n+ logging.debug("\\tLibrary type: \'%s\'" % (library_type))\n+ logging.debug("\\tGene paired file: %s" % (gene_paired_bam_file))\n+ logging.debug("\\tGenome paired file: %s" % (genome_paired_bam_file))\n+ logging.debug("\\tUnmapped file: %s" % (unmapped_bam_file))\n+ logging.debug("\\tComplex file: %s" % (complex_bam_file))\n+ # setup input and output files\n+ bamfh = pysam.Samfile(input_bam_file, "rb")\n+ genefh = pysam.Samfile(gene_paired_bam_file, "wb", template=bamfh)\n+ genomefh = pysam.Samfile(genome_paired_bam_file, "wb", template=bamfh)\n+ unmappedfh = pysam.Samfile(unmapped_bam_file, "wb", template=bamfh)\n+ complexfh = pysam.Samfile(complex_bam_file, "wb", template=bamfh)\n+ gene_file = os.path.join(index_dir, config.GENE_FEATURE_FILE)\n+ # build a lookup table to get all the overlapping transcripts given a\n+ # transcript \'tid\'\n+ transcript_tid_cluster_map = \\\n+ build_transcript_tid_cluster_map(bamfh, open(gene_file), \n+ rname_prefix=config.GENE_REF_PREFIX)\n+ # build a lookup table to get genome coordinates from transcript \n+ # coordinates\n+ transcript_tid_genome_map = \\\n+ build_transcript_tid_genome_map(bamfh, open(gene_file), \n+ rname_prefix=config.GENE_REF_PREFIX)\n+ for pe_reads in parse_pe_reads(bamfh):\n+ # add hit index and number of multimaps information to read tags\n+ # this function also checks for unmapped reads\n+ any_unmapped = False\n+ for reads in pe_reads:\n+ any_unmapped = (any_unmapped or \n+ annotate_multihits(bamfh, reads, transcript_tid_genome_map))\n+ if any_unmapped:\n+ # write to output as discordant reads and continue to \n+ # next fragment\n+ write_pe_reads(unmappedfh, pe_reads)\n+ continue\n+ # examine all read pairing combinations and rule out invalid \n+ # pairings. this returns gene pairs and genome pairs\n+ gene_pairs, genome_pairs, unpaired_reads = \\\n+ classify_read_pairs(pe_reads, max_isize,\n+ library_type, transcript_tid_genome_map,\n+ transcript_tid_cluster_map)\n+ if len(gene_pairs) > 0 or len(genome_pairs) > 0:\n+ write_pairs(genefh, gene_pairs)\n+ write_pairs(genomefh, genome_pairs)\n+ else:\n+ write_pe_reads(complexfh, unpaired_reads)\n+ genefh.close()\n+ genomefh.close()\n+ unmappedfh.close()\n+ complexfh.close()\n+ bamfh.close() \n+ logging.info("Finished pairing reads")\n+\n+\n+def main():\n+ from optparse import OptionParser\n+ logging.basicConfig(level=logging.DEBUG,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+ parser = OptionParser("usage: %prog [options] <index> <in.bam> "\n+ "<gene_paired.bam> <genome_paired.bam> "\n+ "<unmapped.bam> <complex.bam>")\n+ parser.add_option(\'--max-fragment-length\', dest="max_fragment_length", \n+ type="int", default=1000)\n+ parser.add_option(\'--library\', dest="library_type", \n+ default=LibraryTypes.FR_UNSTRANDED)\n+ options, args = parser.parse_args() \n+ index_dir = args[0]\n+ input_bam_file = args[1]\n+ gene_paired_bam_file = args[2]\n+ genome_paired_bam_file = args[3]\n+ unmapped_bam_file = args[4]\n+ complex_bam_file = args[5]\n+ find_discordant_fragments(input_bam_file, gene_paired_bam_file,\n+ genome_paired_bam_file, unmapped_bam_file, \n+ complex_bam_file, index_dir,\n+ max_isize=options.max_fragment_length,\n+ library_type=options.library_type)\n+\n+if __name__ == \'__main__\':\n+ main()\n\\ No newline at end of file\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/merge_spanning_alignments.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/merge_spanning_alignments.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,266 @@\n+\'\'\'\n+Created on Nov 7, 2010\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+import logging\n+import collections\n+import shutil\n+import os\n+\n+# local imports\n+from chimerascan import pysam\n+from chimerascan.lib.chimera import Chimera, DiscordantRead, \\\n+ DiscordantTags, DISCORDANT_TAG_NAME, \\\n+ OrientationTags, ORIENTATION_TAG_NAME\n+from chimerascan.lib.base import LibraryTypes\n+\n+from chimerascan.pipeline.find_discordant_reads import get_gene_orientation\n+\n+def parse_group_by_attr(myiter, attr):\n+ mylist = []\n+ prev = None\n+ for itm in myiter:\n+ cur = getattr(itm, attr)\n+ if prev != cur:\n+ if len(mylist) > 0:\n+ yield prev, mylist\n+ mylist = []\n+ prev = cur\n+ mylist.append(itm)\n+ if len(mylist) > 0:\n+ yield prev, mylist\n+\n+def parse_sync_by_breakpoint(chimera_file, bam_file):\n+ # group reads by reference name (matches breakpoint name)\n+ bamfh = pysam.Samfile(bam_file, "rb")\n+ tid_rname_map = list(bamfh.references)\n+ # initialize iterator through reads\n+ read_iter = parse_group_by_attr(bamfh, "rname")\n+ read_iter_valid = True\n+ try:\n+ rname, reads = read_iter.next()\n+ read_breakpoint_name = tid_rname_map[rname]\n+ except StopIteration:\n+ bamfh.close()\n+ read_iter_valid = False\n+ reads = []\n+ read_breakpoint_name = "ZZZZZZZZZZZZZZ"\n+ # group chimeras by breakpoint name\n+ for chimera_breakpoint_name, chimeras in \\\n+ parse_group_by_attr(Chimera.parse(open(chimera_file)), \n+ "breakpoint_name"):\n+ while (read_iter_valid) and (chimera_breakpoint_name > read_breakpoint_name):\n+ try:\n+ rname, reads = read_iter.next()\n+ read_breakpoint_name = tid_rname_map[rname]\n+ except StopIteration:\n+ read_iter_valid = False\n+ reads = []\n+ if chimera_breakpoint_name < read_breakpoint_name:\n+ yield chimeras, []\n+ else:\n+ yield chimeras, reads \n+ bamfh.close()\n+\n+def get_mismatch_positions(md):\n+ x = 0\n+ pos = []\n+ for y in xrange(len(md)):\n+ if md[y].isalpha():\n+ offset = int(md[x:y])\n+ pos.append(offset)\n+ x = y + 1\n+ return pos\n+\n+def check_breakpoint_alignment(c, r,\n+ anchor_min,\n+ anchor_length,\n+ anchor_mismatches):\n+ """\n+ returns True if read \'r\' meets criteria for a valid\n+ breakpoint spanning read, False otherwise\n+ \n+ c - Chimera object\n+ r - pysam AlignedRead object\n+ """\n+ # get position of breakpoint along seq\n+ breakpoint_pos = len(c.breakpoint_seq_5p)\n+ # check if read spans breakpoint \n+ if not (r.pos < breakpoint_pos < r.aend):\n+ return False \n+ # calculate amount in bp that read overlaps breakpoint\n+ # and ensure overlap is sufficient\n+ left_anchor_bp = breakpoint_pos - r.pos\n+ if left_anchor_bp < max(c.homology_left, anchor_min):\n+ return False\n+ right_anchor_bp = r.aend - breakpoint_pos\n+ if right_anchor_bp < max(c.homology_right, anchor_min):\n+ return False\n+ # ensure that alignment'..b' for dpair in c.encomp_frags:\n+ chimera_qname_dict[c.name][dpair[0].qname] = dpair \n+ # find valid spanning reads\n+ for c, dr in filter_spanning_reads(chimeras, reads, \n+ anchor_min, anchor_length, \n+ anchor_mismatches, library_type):\n+ # ensure encompassing read is present\n+ if dr.qname not in chimera_qname_dict[c.name]:\n+ continue\n+ # get discordant pair\n+ dpair = chimera_qname_dict[c.name][dr.qname]\n+ # mark correct read (read1/read2) as a spanning read\n+ if dr.readnum == dpair[0].readnum:\n+ dpair[0].is_spanning = True\n+ elif dr.readnum == dpair[1].readnum:\n+ dpair[1].is_spanning = True\n+ else:\n+ assert False\n+ filtered_hits += 1\n+ # write chimeras back to file\n+ for c in chimeras:\n+ fields = c.to_list()\n+ print >>f, \'\\t\'.join(map(str, fields)) \n+ f.close()\n+ logging.debug("\\tFound %d hits" % (filtered_hits))\n+ #\n+ # Process reads that are single-mapped and spanning\n+ #\n+ logging.debug("Processing single-mapping/spanning reads")\n+ tmp_singlemap_chimera_file = os.path.join(tmp_dir, "tmp_singlemap_chimeras.bedpe")\n+ f = open(tmp_singlemap_chimera_file, "w")\n+ filtered_hits = 0\n+ for chimeras, reads in parse_sync_by_breakpoint(tmp_encomp_chimera_file, singlemap_bam_file):\n+ # find valid spanning reads\n+ for c, dr in filter_spanning_reads(chimeras, reads, \n+ anchor_min, anchor_length, \n+ anchor_mismatches, library_type):\n+ # ensure mate maps to 5\' or 3\' gene\n+ # TODO: implement this using sorted/indexed BAM file?\n+ # add read as a spanning read\n+ c.spanning_reads.append(dr)\n+ filtered_hits += 1 \n+ # write chimeras back to file\n+ for c in chimeras:\n+ fields = c.to_list()\n+ print >>f, \'\\t\'.join(map(str, fields)) \n+ f.close()\n+ logging.debug("\\tFound %d hits" % (filtered_hits))\n+ # output_chimera_file \n+ shutil.copyfile(tmp_singlemap_chimera_file, output_chimera_file)\n+ # remove temporary files\n+ if os.path.exists(tmp_encomp_chimera_file):\n+ os.remove(tmp_encomp_chimera_file)\n+ if os.path.exists(tmp_singlemap_chimera_file):\n+ os.remove(tmp_singlemap_chimera_file)\n+ \n+\n+def main():\n+ from optparse import OptionParser\n+ logging.basicConfig(level=logging.DEBUG,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") \n+ parser = OptionParser("usage: %prog [options] <chimeras.breakpoint_sorted.txt> "\n+ "<encomp.bam> <onemap.bam> <chimeras.out.txt>")\n+ parser.add_option("--anchor-min", type="int", dest="anchor_min", default=4)\n+ parser.add_option("--anchor-length", type="int", dest="anchor_length", default=8)\n+ parser.add_option("--anchor-mismatches", type="int", dest="anchor_mismatches", default=0)\n+ parser.add_option(\'--library\', dest="library_type", \n+ default=LibraryTypes.FR_UNSTRANDED)\n+ options, args = parser.parse_args()\n+ breakpoint_chimera_file = args[0]\n+ encomp_bam_file = args[1]\n+ singlemap_bam_file = args[2]\n+ output_chimera_file = args[4]\n+ merge_spanning_alignments(breakpoint_chimera_file,\n+ encomp_bam_file,\n+ singlemap_bam_file,\n+ output_chimera_file,\n+ options.anchor_min, \n+ options.anchor_length,\n+ options.anchor_mismatches,\n+ options.library_type)\n+\n+if __name__ == \'__main__\':\n+ main()\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/nominate_chimeras.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/nominate_chimeras.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,275 @@\n+\'\'\'\n+Created on Jul 21, 2011\n+\n+@author: mkiyer\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+import logging\n+import os\n+import sys\n+import collections\n+import itertools\n+import operator\n+\n+from chimerascan import pysam\n+\n+from chimerascan.lib import config\n+from chimerascan.lib.chimera import DiscordantRead, Chimera, frags_to_encomp_string\n+from chimerascan.lib.gene_to_genome import build_tx_name_gene_map, build_genome_tx_trees\n+from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution\n+from chimerascan.lib.seq import calc_homology\n+\n+def parse_discordant_bedpe_by_transcript_pair(fh):\n+ prev_tx5p, prev_tx3p = None,None\n+ frags = []\n+ for line in fh:\n+ fields = line.strip().split(\'\\t\') \n+ tx5p = fields[0]\n+ tx3p = fields[3]\n+ dr5p = DiscordantRead.from_list(fields[10].split("|"))\n+ dr3p = DiscordantRead.from_list(fields[11].split("|"))\n+ if (tx5p, tx3p) != (prev_tx5p, prev_tx3p):\n+ if len(frags) > 0:\n+ yield prev_tx5p, prev_tx3p, frags\n+ frags = []\n+ prev_tx5p, prev_tx3p = tx5p, tx3p\n+ frags.append((dr5p, dr3p))\n+ if len(frags) > 0:\n+ yield tx5p, tx3p, frags \n+\n+def calc_isize_prob(isize, isize_dist):\n+ # find percentile of observing this insert size in the reads\n+ isize_per = isize_dist.percentile_at_isize(isize)\n+ # convert to a probability score (0.0-1.0)\n+ isize_prob = 1.0 - (2.0 * abs(50.0 - isize_per))/100.0 \n+ return isize_prob\n+\n+def choose_best_breakpoints(r5p, r3p, tx5p, tx3p, trim_bp, isize_dist):\n+ best_breakpoints = set()\n+ best_isize_prob = None\n+ # iterate through 5\' transcript exons \n+ exon_iter_5p = reversed(tx5p.exons) if tx5p.strand == \'-\' else iter(tx5p.exons)\n+ tx_end_5p = 0\n+ for exon_num_5p,coords5p in enumerate(exon_iter_5p):\n+ genome_start_5p, genome_end_5p = coords5p \n+ exon_size_5p = genome_end_5p - genome_start_5p\n+ tx_end_5p += exon_size_5p\n+ # fast forward on 5\' gene to first exon beyond read \n+ if tx_end_5p < (r5p.aend - trim_bp):\n+ continue \n+ #print "tx end 5p", tx_end_5p, "exon_size_5p", exon_size_5p, "r5p.aend", r5p.aend, "trim_bp", trim_bp\n+ # now have a candidate insert size between between 5\' read and\n+ # end of 5\' exon\n+ isize5p = tx_end_5p - r5p.pos\n+ # iterate through 3\' transcript\n+ exon_iter_3p = reversed(tx3p.exons) if tx3p.strand == \'-\' else iter(tx3p.exons)\n+ tx_start_3p = 0\n+ local_best_breakpoints = set()\n+ local_best_isize_prob = None\n+ for exon_num_3p,coords3p in enumerate(exon_iter_3p):\n+ genome_start_3p, genome_end_3p = coords3p\n+ #print "\\t", coords3p \n+ # stop after going past read on 3\' transcript\n+ if tx_start_3p >= (r3p.pos + trim_bp):\n+ break\n+ # get another candidate insert size between start of 3\'\n+ # exon and 3\' read\n+ isize3p = r3p.aend - tx_start_3p\n+ #print "\\t", isize5p, isize3p, tx_end_5p, tx_start_3p\n+ # compare the insert size against the known insert size\n+ # distribution\n+ isize_prob = calc_isize_prob(isize5p + isize3p, isize_dist)\n+ if ((local_best_isize_prob i'..b'akpoint\n+ breakpoint_seq_5p, breakpoint_seq_3p, homology_left, homology_right = \\\n+ extract_breakpoint_sequence(config.GENE_REF_PREFIX + tx5p.tx_name, tx_end_5p,\n+ config.GENE_REF_PREFIX + tx3p.tx_name, tx_start_3p,\n+ ref_fa, max_read_length,\n+ homology_mismatches) \n+ tx3p_length = sum((end - start) for start,end in tx3p.exons)\n+ # get unique breakpoint id based on sequence\n+ breakpoint_seq = breakpoint_seq_5p + breakpoint_seq_3p\n+ if breakpoint_seq in breakpoint_seq_name_map:\n+ breakpoint_name = breakpoint_seq_name_map[breakpoint_seq]\n+ else:\n+ breakpoint_name = "B%07d" % (breakpoint_num)\n+ breakpoint_seq_name_map[breakpoint_seq] = breakpoint_name\n+ breakpoint_num += 1\n+ # write gene, breakpoint, and raw reads to a file and follow the\n+ # BEDPE format\n+ gene_name_5p = \'_\'.join(tx5p.gene_name.split())\n+ gene_name_3p = \'_\'.join(tx3p.gene_name.split())\n+ fields = [tx5p.tx_name, 0, tx_end_5p, # chrom1, start1, end1\n+ tx3p.tx_name, tx_start_3p, tx3p_length, # chrom2, start2, end2\n+ "C%07d" % (chimera_num), # name\n+ 1.0, # pvalue\n+ tx5p.strand, tx3p.strand, # strand1, strand2\n+ gene_name_5p, gene_name_3p, # gene names\n+ # exon interval information\n+ \'%d-%d\' % (0, exon_num_5p),\n+ \'%d-%d\' % (exon_num_3p, len(tx3p.exons)),\n+ # breakpoint information\n+ breakpoint_name, \n+ breakpoint_seq_5p, breakpoint_seq_3p, \n+ homology_left, homology_right, \n+ # fragments\n+ frags_to_encomp_string(frags),\n+ # spanning reads\n+ None]\n+ print >>outfh, \'\\t\'.join(map(str, fields))\n+ chimera_num += 1\n+ outfh.close()\n+ ref_fa.close()\n+ return config.JOB_SUCCESS\n+ \n+\n+def main():\n+ from optparse import OptionParser\n+ logging.basicConfig(level=logging.DEBUG,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+ parser = OptionParser("usage: %prog [options] <index> <isize_dist.txt> "\n+ "<discordant_reads.srt.bedpe> <chimeras.txt>")\n+ parser.add_option("--trim", dest="trim", type="int", \n+ default=config.EXON_JUNCTION_TRIM_BP,\n+ help="apply trimming when choosing exon boundaries to "\n+ "to consider possible breakpoints")\n+ parser.add_option("--max-read-length", dest="max_read_length", type="int",\n+ default=100, metavar="N",\n+ help="Reads in the BAM file are guaranteed to have "\n+ "length less than N [default=%default]")\n+ parser.add_option("--homology-mismatches", type="int", \n+ dest="homology_mismatches", \n+ default=config.BREAKPOINT_HOMOLOGY_MISMATCHES,\n+ help="Number of mismatches to tolerate when computing "\n+ "homology between gene and its chimeric partner "\n+ "[default=%default]")\n+ options, args = parser.parse_args()\n+ index_dir = args[0]\n+ isize_dist_file = args[1]\n+ input_file = args[2]\n+ output_file = args[3]\n+ return nominate_chimeras(index_dir, isize_dist_file, \n+ input_file, output_file, \n+ options.trim,\n+ options.max_read_length,\n+ options.homology_mismatches)\n+\n+\n+if __name__ == \'__main__\':\n+ sys.exit(main())\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/nominate_spanning_reads.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/nominate_spanning_reads.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,290 @@\n+\'\'\'\n+Created on Jan 30, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+import logging\n+import os\n+\n+from chimerascan import pysam\n+\n+from chimerascan.lib import config\n+from chimerascan.lib.base import LibraryTypes\n+from chimerascan.lib.sam import parse_pe_reads\n+from chimerascan.lib.chimera import Chimera, OrientationTags, ORIENTATION_TAG_NAME\n+from chimerascan.lib.batch_sort import batch_sort\n+from chimerascan.lib.seq import DNA_reverse_complement\n+from chimerascan.pipeline.find_discordant_reads import get_gene_orientation\n+\n+def to_fastq(qname, readnum, seq, qual, is_reverse=False):\n+ if is_reverse:\n+ seq = DNA_reverse_complement(seq)\n+ qual = qual[::-1]\n+ return "@%s/%d\\n%s\\n+\\n%s" % (qname, readnum+1, seq, qual)\n+\n+def nominate_encomp_spanning_reads(chimera_file, output_fastq_file):\n+ """\n+ find all encompassing reads that should to be remapped to see if they\n+ span the breakpoint junction\n+ """\n+ fqfh = open(output_fastq_file, "w")\n+ remap_qnames = set()\n+ for c in Chimera.parse(open(chimera_file)):\n+ # find breakpoint coords of chimera\n+ end5p = c.tx_end_5p\n+ start3p = c.tx_start_3p\n+ for r5p,r3p in c.encomp_frags: \n+ # if 5\' read overlaps breakpoint then it should be remapped\n+ if r5p.clipstart < end5p < r5p.clipend:\n+ key5p = (r5p.qname, r5p.readnum)\n+ if key5p not in remap_qnames:\n+ remap_qnames.add((r5p.qname, r5p.readnum))\n+ print >>fqfh, to_fastq(r5p.qname, r5p.readnum, \n+ r5p.seq, "I" * len(r5p.seq),\n+ is_reverse=r5p.is_reverse)\n+ # if 3\' read overlaps breakpoint then it should be remapped\n+ if r3p.clipstart < start3p < r3p.clipend:\n+ key3p = (r3p.qname, r3p.readnum)\n+ if key3p not in remap_qnames:\n+ remap_qnames.add((r3p.qname, r3p.readnum))\n+ print >>fqfh, to_fastq(r3p.qname, r3p.readnum, \n+ r3p.seq, "I" * len(r3p.seq),\n+ is_reverse=r3p.is_reverse)\n+ fqfh.close()\n+ return config.JOB_SUCCESS\n+\n+def parse_chimeras_by_gene(chimera_file, orientation):\n+ clist = []\n+ prev_tx_name = None\n+ for c in Chimera.parse(open(chimera_file)):\n+ tx_name = c.tx_name_5p if (orientation == OrientationTags.FIVEPRIME) else c.tx_name_3p\n+ if prev_tx_name != tx_name:\n+ if len(clist) > 0:\n+ yield prev_tx_name, clist\n+ clist = []\n+ prev_tx_name = tx_name\n+ clist.append(c)\n+ if len(clist) > 0:\n+ yield prev_tx_name, clist\n+\n+def parse_reads_by_rname(bamfh, orientation):\n+ """\n+ reads must be sorted and include an orientation tag\n+ """\n+ reads = []\n+ prev_rname = None\n+ for r in bamfh:\n+ o = r.opt(ORIENTATION_TAG_NAME)\n+ if o != orientation:\n+ continue\n+ if prev_rname != r.rname:\n+ if len(reads) > 0:\n+ yield reads\n+ reads = []\n+ prev_rname = r.rname\n+ reads.append(r)\n+ if len(reads) > 0:\n+ yield r'..b't >>f, \'\\t\'.join(map(str, [r.qname, readnum, r.opt("R2"), r.opt("Q2")]))\n+ # sort chimeras by 3\' partner\n+ logging.debug("Sorting chimeras by 3\' transcript")\n+ def sort_by_3p_partner(line):\n+ fields = line.strip().split(\'\\t\', Chimera.TX_NAME_3P_FIELD+1)\n+ return fields[Chimera.TX_NAME_3P_FIELD]\n+ tmp_chimera_file_sorted_3p = os.path.join(tmp_dir, "tmp_chimeras.sorted3p.bedpe")\n+ batch_sort(input=chimera_file,\n+ output=tmp_chimera_file_sorted_3p,\n+ key=sort_by_3p_partner,\n+ buffer_size=32000,\n+ tempdirs=[tmp_dir])\n+ # search for matches to 3\' chimeras\n+ logging.debug("Matching single-mapped frags to 3\' chimeras")\n+ for clist, reads in parse_sync_chimera_with_bam(tmp_chimera_file_sorted_3p, \n+ single_mapped_bam_file,\n+ OrientationTags.THREEPRIME):\n+ # TODO: test more specifically that read has a chance to cross breakpoint\n+ for r in reads:\n+ # reverse read number\n+ readnum = 1 if r.is_read1 else 0\n+ print >>f, \'\\t\'.join(map(str, [r.qname, readnum, r.opt("R2"), r.opt("Q2")]))\n+ f.close()\n+ #\n+ # now sort the file of sequences by read name/number to \n+ # eliminate duplicates\n+ # \n+ def sort_by_qname(line):\n+ fields = line.strip().split(\'\\t\')\n+ return (fields[0], int(fields[1]))\n+ tmp_sorted_seqs_to_remap = os.path.join(tmp_dir, "tmp_singlemap_seqs.sorted.txt")\n+ batch_sort(input=tmp_seqs_to_remap,\n+ output=tmp_sorted_seqs_to_remap,\n+ key=sort_by_qname,\n+ buffer_size=32000,\n+ tempdirs=[tmp_dir])\n+ #\n+ # read file and write fastq, ignoring duplicates\n+ # \n+ fqfh = open(single_mapped_fastq_file, "w")\n+ prev = None\n+ for line in open(tmp_sorted_seqs_to_remap):\n+ fields = line.strip().split(\'\\t\')\n+ qname, readnum, seq, qual = fields[0], int(fields[1]), fields[2], fields[3]\n+ cur = (fields[0], int(fields[1]))\n+ if prev != cur:\n+ if prev is not None: \n+ print >>fqfh, to_fastq(qname, readnum, seq, qual)\n+ prev = cur\n+ if prev is not None:\n+ print >>fqfh, to_fastq(qname, readnum, seq, qual)\n+ fqfh.close()\n+ # TODO: remove temporary files\n+ #os.remove(tmp_chimera_file_sorted_3p)\n+ #os.remove(tmp_seqs_to_remap)\n+ #os.remove(tmp_sorted_seqs_to_remap)\n+ return config.JOB_SUCCESS\n+\n+\n+def main():\n+ from optparse import OptionParser\n+ logging.basicConfig(level=logging.DEBUG,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+ parser = OptionParser("usage: %prog [options] <chimeras.txt> "\n+ "<unmapped_reads.bam> <encomp_remap.fq> "\n+ "<singlemap_remap.fq> "\n+ "<unmapped_remap.fq> ")\n+ parser.add_option(\'--library\', dest="library_type", \n+ default=LibraryTypes.FR_UNSTRANDED)\n+ options, args = parser.parse_args()\n+ chimera_file = args[0]\n+ bam_file = args[1]\n+ encomp_remap_fastq_file = args[2]\n+ singlemap_remap_fastq_file = args[3]\n+ unmapped_remap_fastq_file = args[4]\n+ nominate_encomp_spanning_reads(chimera_file, encomp_remap_fastq_file)\n+ extract_single_mapped_reads(chimera_file, \n+ bam_file,\n+ "single_mapped_reads.srt.bam",\n+ unmapped_remap_fastq_file,\n+ options.library_type,\n+ "/tmp") \n+ nominate_single_mapped_spanning_reads(chimera_file, \n+ "single_mapped_reads.srt.bam",\n+ singlemap_remap_fastq_file, \n+ "/tmp")\n+\n+if __name__ == \'__main__\':\n+ main()\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/profile_insert_size.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/profile_insert_size.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,61 @@ +''' +Created on Jan 24, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import logging +import sys +# local imports +from chimerascan import pysam +from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution + +def main(): + from optparse import OptionParser + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog [options] <bam> <out.bedpe>") + parser.add_option('--min-fragment-length', dest="min_fragment_length", + type="int", default=0) + parser.add_option('--max-fragment-length', dest="max_fragment_length", + type="int", default=1000) + parser.add_option('--max-samples', dest="max_samples", + type="int", default=None) + parser.add_option('-o', dest="output_file", default=None) + options, args = parser.parse_args() + input_bam_file = args[0] + bamfh = pysam.Samfile(input_bam_file, "rb") + isizedist = InsertSizeDistribution.from_bam(bamfh, options.min_fragment_length, + options.max_fragment_length, + options.max_samples) + bamfh.close() + if options.output_file is not None: + f = open(options.output_file, "w") + else: + f = sys.stdout + isizedist.to_file(f) + if options.output_file is not None: + f.close() + logging.info("Insert size samples=%d mean=%f std=%f median=%d mode=%d" % + (isizedist.n, isizedist.mean(), isizedist.std(), + isizedist.percentile(50.0), isizedist.mode())) + + +if __name__ == '__main__': + main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/resolve_discordant_reads.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/resolve_discordant_reads.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b"@@ -0,0 +1,287 @@\n+'''\n+Created on Jul 28, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+'''\n+import logging\n+import collections\n+import os\n+\n+from chimerascan.lib.chimera import Chimera\n+from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution\n+from chimerascan.lib.batch_sort import batch_sort\n+\n+QNAME_COL = 0\n+CHIMERA_NAME_COL = 5\n+SCORE_FIELDS = (6,7,8,9,10)\n+\n+class ChimeraStats(object):\n+ __slots__ = ('qname', 'tid5p', 'pos5p', 'tid3p', 'pos3p', \n+ 'chimera_name', 'num_spanning_frags', 'num_unambiguous_frags',\n+ 'num_uniquely_aligning_frags', 'neg_mismatches',\n+ 'isize_prob')\n+\n+ @property\n+ def score_tuple(self):\n+ return (self.num_spanning_frags,\n+ self.num_unambiguous_frags,\n+ self.num_uniquely_aligning_frags,\n+ self.neg_mismatches,\n+ self.isize_prob)\n+\n+ def to_list(self):\n+ return [self.qname,\n+ self.tid5p, self.pos5p,\n+ self.tid3p, self.pos3p, \n+ self.chimera_name,\n+ self.num_spanning_frags,\n+ self.num_unambiguous_frags,\n+ self.num_uniquely_aligning_frags,\n+ self.neg_mismatches,\n+ self.isize_prob]\n+\n+ @staticmethod\n+ def from_list(fields):\n+ s = ChimeraStats()\n+ s.qname = fields[0]\n+ s.tid5p = int(fields[1])\n+ s.pos5p = int(fields[2])\n+ s.tid3p = int(fields[3])\n+ s.pos3p = int(fields[4])\n+ s.chimera_name = fields[5]\n+ s.num_spanning_frags = int(fields[6])\n+ s.num_unambiguous_frags = int(fields[7])\n+ s.num_uniquely_aligning_frags = int(fields[8])\n+ s.neg_mismatches = int(fields[9])\n+ s.isize_prob = float(fields[10])\n+ return s\n+\n+ @staticmethod\n+ def parse(line_iter):\n+ for line in line_iter:\n+ fields = line.strip().split('\\t')\n+ yield ChimeraStats.from_list(fields)\n+\n+def calc_isize_prob(isize, isize_dist):\n+ # find percentile of observing this insert size in the reads\n+ isize_per = isize_dist.percentile_at_isize(isize)\n+ # convert to a probability score (0.0-1.0)\n+ isize_prob = 1.0 - (2.0 * abs(50.0 - isize_per))/100.0 \n+ return isize_prob\n+\n+def group_by_attr(item_iter, attr):\n+ mylist = []\n+ prev = None\n+ for itm in item_iter:\n+ cur = getattr(itm, attr)\n+ if prev != cur:\n+ if len(mylist) > 0:\n+ yield prev, mylist\n+ mylist = []\n+ prev = cur\n+ mylist.append(itm)\n+ if len(mylist) > 0:\n+ yield prev, mylist\n+\n+#def group_by_field(item_iter, colnum):\n+# mylist = []\n+# prev = None\n+# for fields in item_iter:\n+# # parse read stats information\n+# cur = fields[colnum]\n+# if prev != cur:\n+# if len(mylist) > 0:\n+# yield prev, mylist\n+# mylist = []\n+# prev = cur\n+# mylist.append(fields)\n+# if len(mylist) > 0:\n+# yield prev, mylist\n+\n+def parse_sync_chimeras_read_stats(chimera_file, read_stats_file):\n+ # group reads by chimera name\n+ read_stats_iter = group_by_attr(ChimeraStats.parse(open(read_stats_file)), \n+ "..b'+ resolved_read_stats_file = os.path.join(tmp_dir, "read_stats.rname_sorted.resolved.txt")\n+ f = open(resolved_read_stats_file, "w")\n+ for rname,readstats in group_by_attr(ChimeraStats.parse(open(sorted_read_stats_file)), \n+ \'qname\'):\n+ # build a dictionary of stats -> read/chimeras\n+ stats_dict = collections.defaultdict(lambda: [])\n+ for s in readstats:\n+ # add key/value pairs\n+ stats_dict[s.score_tuple].append(s)\n+ # sort based on stats\n+ sorted_stats_keys = sorted(stats_dict.keys(), reverse=True)\n+ # use only the best key\n+ for s in stats_dict[sorted_stats_keys[0]]:\n+ # output read -> chimera relationships\n+ print >>f, \'\\t\'.join(map(str, s.to_list()))\n+ f.close()\n+ #\n+ # re-sort by chimera name\n+ #\n+ logging.debug("Resorting reads by chimera name")\n+ def sort_reads_by_chimera_name(line):\n+ return line.strip().split(\'\\t\',CHIMERA_NAME_COL+1)[CHIMERA_NAME_COL]\n+ sorted_resolved_read_stats_file = os.path.join(tmp_dir, "read_stats.chimera_name_sorted.resolved.txt")\n+ batch_sort(input=resolved_read_stats_file,\n+ output=sorted_resolved_read_stats_file,\n+ key=sort_reads_by_chimera_name,\n+ buffer_size=32000,\n+ tempdirs=[tmp_dir])\n+ logging.debug("Resorting chimeras by name")\n+ def sort_chimeras_by_name(line):\n+ return line.strip().split(\'\\t\',Chimera.NAME_FIELD+1)[Chimera.NAME_FIELD]\n+ sorted_chimera_file = os.path.join(tmp_dir, "spanning_chimeras.name_sorted.txt")\n+ batch_sort(input=input_file,\n+ output=sorted_chimera_file,\n+ key=sort_chimeras_by_name,\n+ buffer_size=32000,\n+ tempdirs=[tmp_dir])\n+ #\n+ # parse and rebuild chimeras based on best reads\n+ # \n+ logging.debug("Rewriting chimeras with lists of \'best\' reads")\n+ f = open(output_file, "w")\n+ # need to sync chimeras with stats\n+ for c,stats in parse_sync_chimeras_read_stats(sorted_chimera_file, sorted_resolved_read_stats_file):\n+ # parse and make lookup set of the resolved alignments\n+ good_alignments = set()\n+ for s in stats:\n+ if s.isize_prob < min_isize_prob:\n+ continue\n+ good_alignments.add((s.qname, s.tid5p, s.pos5p, s.tid3p, s.pos3p))\n+ # replace encompassing frags with resolved alignments\n+ new_encomp_frags = []\n+ for dpair in c.encomp_frags:\n+ # get alignment tuple\n+ aln = (dpair[0].qname, dpair[0].tid, dpair[0].pos, dpair[1].tid, dpair[1].pos)\n+ if aln in good_alignments:\n+ new_encomp_frags.append(dpair)\n+ c.encomp_frags = new_encomp_frags\n+ c.score = c.get_num_frags()\n+ print >>f, \'\\t\'.join(map(str, c.to_list()))\n+ f.close()\n+ # remove temporary files\n+ #os.remove(read_stats_file)\n+ #os.remove(sorted_read_stats_file)\n+ #os.remove(resolved_read_stats_file)\n+ #os.remove(sorted_resolved_read_stats_file)\n+ #os.remove(sorted_chimera_file)\n+\n+def main():\n+ from optparse import OptionParser\n+ logging.basicConfig(level=logging.DEBUG,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+ parser = OptionParser("usage: %prog [options] <in.txt> <out.txt> <isizedist.txt>")\n+ parser.add_option("--min-isize-prob", dest="min_isize_prob", \n+ type="float", default=0.01)\n+ options, args = parser.parse_args()\n+ input_file = args[0]\n+ output_file = args[1]\n+ isize_dist_file = args[2]\n+ # read insert size distribution\n+ isize_dist = InsertSizeDistribution.from_file(open(isize_dist_file))\n+ resolve_discordant_reads(input_file, output_file, isize_dist, \n+ options.min_isize_prob,\n+ tmp_dir=".")\n+\n+if __name__ == \'__main__\':\n+ main()\n\\ No newline at end of file\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/sam2bam.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/sam2bam.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,86 @@ +''' +Created on Jun 2, 2011 + +@author: mkiyer +''' +import logging + +# local imports +import chimerascan.pysam as pysam +from chimerascan.lib.fix_alignment_ordering import fix_alignment_ordering, fix_sr_alignment_ordering +from chimerascan.lib.sam import soft_pad_read +from chimerascan.lib.seq import FASTQ_QUAL_FORMATS, SANGER_FORMAT, parse_fastq_record + +def sam_to_bam(input_fastq_files, input_sam_file, output_bam_file, + quals, multihits, pe_sr_mode=False, softclip=True, + keep_unmapped=True): + samfh = pysam.Samfile(input_sam_file, "r") + num_unmapped = 0 + num_multihits = 0 + num_frags = 0 + bamfh = pysam.Samfile(output_bam_file, "wb", template=samfh) + # setup fastq parsing + if softclip and (quals != SANGER_FORMAT): + kwargs = {"convert_quals": True, "qual_format": quals} + else: + kwargs = {"convert_quals": False} + fqiters = [parse_fastq_record(open(fq), **kwargs) for fq in input_fastq_files] + + # handle single-read and paired-end + if len(fqiters) == 1: + reorder_func = fix_sr_alignment_ordering(samfh, fqiters[0]) + else: + reorder_func = fix_alignment_ordering(samfh, fqiters, pe_sr_mode) + # iterate through buffer + for bufitems in reorder_func: + num_frags += 1 + for bufitem in bufitems: + for r in bufitem.reads: + # softclip uses the fastq record to replace the sequence + # and quality scores of the read + if softclip: + soft_pad_read(bufitem.fqrec, r) + # keep statistics of unmapped/multimapped reads and + # suppress output if 'keep_unmapped' is False + if r.is_unmapped: + xm_tag = r.opt('XM') + if xm_tag < multihits: + num_unmapped += 1 + if not keep_unmapped: + continue + else: + num_multihits += 1 + bamfh.write(r) + for fqfh in fqiters: + fqfh.close() + bamfh.close() + samfh.close() + logging.debug("Found %d fragments" % (num_frags)) + logging.debug("\t%d unmapped reads" % (num_unmapped)) + logging.debug("\t%d multimapping (>%dX) reads" % + (num_multihits, multihits)) + +if __name__ == '__main__': + from optparse import OptionParser + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog [options] <out.bam> <in.sam> <in1.fq> [<in2.fq>]") + parser.add_option("--multihits", type="int", dest="multihits", default=100) + parser.add_option("--quals", dest="quals", + choices=FASTQ_QUAL_FORMATS, + default=SANGER_FORMAT) + parser.add_option("--pesr", action="store_true", dest="pe_sr_mode", default=False) + parser.add_option("--softclip", action="store_true", dest="softclip", default=False) + parser.add_option("--un", action="store_true", dest="keep_unmapped", default=False) + options, args = parser.parse_args() + output_bam_file = args[0] + input_sam_file = args[1] + input_fastq_files = args[2:] + sam_to_bam(input_fastq_files, + input_sam_file, + output_bam_file, + quals=options.quals, + multihits=options.multihits, + pe_sr_mode=options.pe_sr_mode, + softclip=options.softclip, + keep_unmapped=options.keep_unmapped) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pipeline/write_output.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pipeline/write_output.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,185 @@\n+\'\'\'\n+Created on Jul 1, 2011\n+\n+@author: mkiyer\n+\n+chimerascan: chimeric transcript discovery using RNA-seq\n+\n+Copyright (C) 2011 Matthew Iyer\n+\n+This program is free software: you can redistribute it and/or modify\n+it under the terms of the GNU General Public License as published by\n+the Free Software Foundation, either version 3 of the License, or\n+(at your option) any later version.\n+\n+This program is distributed in the hope that it will be useful,\n+but WITHOUT ANY WARRANTY; without even the implied warranty of\n+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n+GNU General Public License for more details.\n+\n+You should have received a copy of the GNU General Public License\n+along with this program. If not, see <http://www.gnu.org/licenses/>.\n+\'\'\'\n+import logging\n+import os\n+import sys\n+import operator\n+import collections\n+\n+from chimerascan import pysam\n+from chimerascan.lib.chimera import Chimera, get_chimera_type\n+from chimerascan.lib import config\n+from chimerascan.lib.gene_to_genome import build_transcript_genome_map, \\\n+ build_transcript_cluster_map, build_genome_tx_trees, \\\n+ build_tx_name_gene_map, transcript_to_genome_pos\n+\n+from chimerascan.pipeline.filter_chimeras import get_wildtype_frags\n+\n+\n+def get_chimera_groups(input_file, gene_file):\n+ # build a lookup table to get gene clusters from transcript name \n+ transcript_cluster_map = build_transcript_cluster_map(open(gene_file))\n+ # build a lookup table to get genome coordinates from transcript \n+ # coordinates\n+ # TODO: can either group by exact breakpoint, or just by\n+ # gene cluster\n+ # transcript_genome_map = build_transcript_genome_map(open(gene_file))\n+ # group chimeras in the same genomic cluster with the same\n+ # breakpoint\n+ cluster_chimera_dict = collections.defaultdict(lambda: [])\n+ for c in Chimera.parse(open(input_file)):\n+ # get cluster of overlapping genes\n+ cluster5p = transcript_cluster_map[c.tx_name_5p]\n+ cluster3p = transcript_cluster_map[c.tx_name_3p]\n+ # get genomic positions of breakpoints\n+ #coord5p = transcript_to_genome_pos(c.partner5p.tx_name, c.partner5p.end-1, transcript_genome_map)\n+ #coord3p = transcript_to_genome_pos(c.partner3p.tx_name, c.partner3p.start, transcript_genome_map)\n+ # add to dictionary\n+ cluster_chimera_dict[(cluster5p,cluster3p)].append(c)\n+ # TODO: use this grouping instead?\n+ #cluster_chimera_dict[(cluster5p,cluster3p,coord5p,coord3p)].append(c)\n+ for key,chimeras in cluster_chimera_dict.iteritems():\n+ yield key,chimeras\n+\n+def get_best_coverage_chimera(chimeras):\n+ stats = []\n+ for c in chimeras:\n+ # TODO: come up with a way to prioritize here (spanning included?)\n+ stats.append((c,\n+ c.get_num_unique_positions(),\n+ c.get_num_frags()))\n+ sorted_stats = sorted(stats, key=operator.itemgetter(1,2), reverse=True)\n+ return sorted_stats[0][0]\n+\n+def write_output(input_file, bam_file, output_file, index_dir):\n+ gene_file = os.path.join(index_dir, config.GENE_FEATURE_FILE)\n+ # build a lookup table to get genome coordinates from transcript \n+ # coordinates\n+ transcript_genome_map = build_transcript_genome_map(open(gene_file)) \n+ tx_name_gene_map = build_tx_name_gene_map(gene_file) \n+ genome_tx_trees = build_genome_tx_trees(gene_file)\n+ # open BAM file for checking wild-type isoform\n+ bamfh = pysam.Samfile(bam_file, "rb") \n+ # group chimera isoforms together\n+ lines = []\n+ chimera_clusters = 0\n+ for key,chimeras in get_chimera_groups(input_file, gene_file):\n+ txs5p = set()\n+ txs3p = set()\n+ genes5p = set()\n+ genes3p = set()\n+ names = set()\n+ for c in chimeras:\n+ txs5p.add("%s:%d-%d" % (c.tx_name_5p, c.tx_start_5p, c.tx_end_5p-1))\n+ txs3p.add("%s:%d-%d" % (c.tx_name_3p, c.tx_start_3p, c.tx_end_3p-1))\n'..b' genome_tx_trees)\n+ # get genomic positions of chimera\n+ chrom5p,strand5p,start5p = transcript_to_genome_pos(c.tx_name_5p, c.tx_start_5p, transcript_genome_map)\n+ chrom5p,strand5p,end5p = transcript_to_genome_pos(c.tx_name_5p, c.tx_end_5p-1, transcript_genome_map)\n+ if strand5p == 1:\n+ start5p,end5p = end5p,start5p\n+ chrom3p,strand3p,start3p = transcript_to_genome_pos(c.tx_name_3p, c.tx_start_3p, transcript_genome_map)\n+ chrom3p,strand3p,end3p = transcript_to_genome_pos(c.tx_name_3p, c.tx_end_3p-1, transcript_genome_map)\n+ if strand3p == 1:\n+ start3p,end3p = end3p,start3p\n+ # get breakpoint spanning sequences\n+ spanning_seqs = set()\n+ spanning_fasta_lines = []\n+ for dr in c.get_spanning_reads():\n+ if dr.seq in spanning_seqs:\n+ continue\n+ spanning_seqs.add(dr.seq)\n+ spanning_fasta_lines.extend([">%s/%d;pos=%d;strand=%s" % \n+ (dr.qname, dr.readnum+1, dr.pos, \n+ "-" if dr.is_reverse else "+"), \n+ dr.seq])\n+ # get isoform fraction\n+ num_wt_frags_5p, num_wt_frags_3p = get_wildtype_frags(c, bamfh)\n+ num_chimeric_frags = c.get_num_frags()\n+ frac5p = float(num_chimeric_frags) / (num_chimeric_frags + num_wt_frags_5p)\n+ frac3p = float(num_chimeric_frags) / (num_chimeric_frags + num_wt_frags_3p)\n+ # setup fields of BEDPE file\n+ fields = [chrom5p, start5p, end5p,\n+ chrom3p, start3p, end3p,\n+ "CLUSTER%d" % (chimera_clusters),\n+ c.get_num_frags(),\n+ "+" if (strand5p == 0) else "-",\n+ "+" if (strand3p == 0) else "-",\n+ \',\'.join(txs5p),\n+ \',\'.join(txs3p),\n+ \',\'.join(genes5p),\n+ \',\'.join(genes3p),\n+ chimera_type, distance,\n+ c.get_num_frags(),\n+ c.get_num_spanning_frags(),\n+ c.get_num_unique_positions(),\n+ frac5p, frac3p,\n+ \',\'.join(spanning_fasta_lines),\n+ \',\'.join(names)]\n+ lines.append(fields)\n+ chimera_clusters += 1\n+ bamfh.close()\n+ logging.debug("Clustered chimeras: %d" % (chimera_clusters))\n+ # sort\n+ lines = sorted(lines, key=operator.itemgetter(18, 17, 16), reverse=True) \n+ f = open(output_file, "w")\n+ print >>f, \'\\t\'.join([\'#chrom5p\', \'start5p\', \'end5p\', \n+ \'chrom3p\', \'start3p\', \'end3p\',\n+ \'chimera_cluster_id\', \'score\', \n+ \'strand5p\', \'strand3p\',\n+ \'transcript_ids_5p\', \'transcript_ids_3p\',\n+ \'genes5p\', \'genes3p\',\n+ \'type\', \'distance\',\n+ \'total_frags\', \n+ \'spanning_frags\',\n+ \'unique_alignment_positions\',\n+ \'isoform_fraction_5p\',\n+ \'isoform_fraction_3p\',\n+ \'breakpoint_spanning_reads\',\n+ \'chimera_ids\'])\n+ for fields in lines:\n+ print >>f, \'\\t\'.join(map(str, fields))\n+ f.close()\n+ return config.JOB_SUCCESS\n+\n+def main():\n+ from optparse import OptionParser\n+ logging.basicConfig(level=logging.DEBUG,\n+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n+ parser = OptionParser("usage: %prog [options] <index_dir> <in.txt> <bam_file> <out.txt>")\n+ options, args = parser.parse_args()\n+ index_dir = args[0]\n+ input_file = args[1]\n+ bam_file = args[2]\n+ output_file = args[3]\n+ return write_output(input_file, bam_file, output_file, index_dir)\n+\n+if __name__ == "__main__":\n+ sys.exit(main())\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/COPYING --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/COPYING Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,21 @@ +The MIT License + +Copyright (c) 2008-2009 Genome Research Ltd. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/Pileup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/Pileup.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,59 @@ +'''Tools for working with files in the samtools pileup -c format.''' +import collections + +PileupSubstitution = collections.namedtuple( "PileupSubstitution", + " ".join( (\ + "chromosome", + "position", + "reference_base", + "consensus_base", + "consensus_quality", + "snp_quality", + "rms_mapping_quality", + "coverage", + "read_bases", + "base_qualities" ) ) ) + +PileupIndel = collections.namedtuple( "PileupIndel", + " ".join( (\ + "chromosome", + "position", + "reference_base", + "genotype", + "consensus_quality", + "snp_quality", + "rms_mapping_quality", + "coverage", + "first_allelle", + "second_allele", + "reads_first", + "reads_second", + "reads_diff" ) ) ) + +def iterate( infile ): + '''iterate over ``samtools pileup -c`` formatted file. + + *infile* can be any iterator over a lines. + + The function yields named tuples of the type :class:`pysam.Pileup.PileupSubstitution` + or :class:`pysam.Pileup.PileupIndel`. + + .. note:: + The parser converts to 0-based coordinates + ''' + + conv_subst = (str,lambda x: int(x)-1,str,str,int,int,int,int,str,str) + conv_indel = (str,lambda x: int(x)-1,str,str,int,int,int,int,str,str,int,int,int) + + for line in infile: + d = line[:-1].split() + if d[2] == "*": + try: + yield PileupIndel( *[x(y) for x,y in zip(conv_indel,d) ] ) + except TypeError: + raise SamtoolsError( "parsing error in line: `%s`" % line) + else: + try: + yield PileupSubstitution( *[x(y) for x,y in zip(conv_subst,d) ] ) + except TypeError: + raise SamtoolsError( "parsing error in line: `%s`" % line) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/__init__.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,110 @@ +from csamtools import * +from ctabix import * +import csamtools +import ctabix +import Pileup +import sys +import os + +class SamtoolsError( Exception ): + '''exception raised in case of an error incurred in the samtools library.''' + + def __init__(self, value): + self.value = value + def __str__(self): + return repr(self.value) + +class SamtoolsDispatcher(object): + '''samtools dispatcher. + + Emulates the samtools command line as module calls. + + Captures stdout and stderr. + + Raises a :class:`pysam.SamtoolsError` exception in case + samtools exits with an error code other than 0. + + Some command line options are associated with parsers. + For example, the samtools command "pileup -c" creates + a tab-separated table on standard output. In order to + associate parsers with options, an optional list of + parsers can be supplied. The list will be processed + in order checking for the presence of each option. + + If no parser is given or no appropriate parser is found, + the stdout output of samtools commands will be returned. + ''' + dispatch=None + parsers=None + + def __init__(self,dispatch, parsers): + self.dispatch = dispatch + self.parsers = parsers + self.stderr = [] + + def __call__(self,*args, **kwargs): + '''execute the samtools command + ''' + retval, stderr, stdout = csamtools._samtools_dispatch( self.dispatch, args ) + if retval: raise SamtoolsError( "\n".join( stderr ) ) + self.stderr = stderr + # samtools commands do not propagate the return code correctly. + # I have thus added this patch to throw if there is output on stderr. + # Note that there is sometimes output on stderr that is not an error, + # for example: [sam_header_read2] 2 sequences loaded. + # Ignore messages like these + stderr = [x for x in stderr + if not (x.startswith( "[sam_header_read2]" ) or + x.startswith("[bam_index_load]") or + x.startswith("[bam_sort_core]") or \ + x.startswith("[samopen] SAM header is present"))] + if stderr: raise SamtoolsError( "\n".join( stderr ) ) + # call parser for stdout: + if not kwargs.get("raw") and stdout and self.parsers: + for options, parser in self.parsers: + for option in options: + if option not in args: break + else: + return parser(stdout) + + return stdout + + def getMessages( self ): + return self.stderr + + def usage(self): + '''return the samtools usage information for this command''' + retval, stderr, stdout = csamtools._samtools_dispatch( self.dispatch ) + return "".join(stderr) + +# +# samtools command line options to export in python +# +# import is a python reserved word. +SAMTOOLS_DISPATCH = { + "view" : ( "view", None ), + "sort" : ( "sort", None), + "samimport": ( "import", None), + "pileup" : ( "pileup", ( (("-c",), Pileup.iterate ), ), ), + "faidx" : ("faidx", None), + "tview" : ("tview", None), + "index" : ("index", None), + "fixmate" : ("fixmate", None), + "glfview" : ("glfview", None), + "flagstat" : ("flagstat", None), + "calmd" : ("calmd", None), + "merge" : ("merge", None), + "rmdup" : ("rmdup", None) } + +# instantiate samtools commands as python functions +for key, options in SAMTOOLS_DISPATCH.iteritems(): + cmd, parser = options + globals()[key] = SamtoolsDispatcher(cmd, parser) + +# hack to export all the symbols from csamtools +__all__ = csamtools.__all__ + \ + ctabix.__all__ + \ + [ "SamtoolsError", "SamtoolsDispatcher" ] + list(SAMTOOLS_DISPATCH) +\ + ["Pileup",] + +from version import __version__, __samtools_version__ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/csamtools.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/csamtools.c Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,24858 @@\n+/* Generated by Cython 0.13 on Mon Jan 31 00:58:16 2011 */\n+\n+#define PY_SSIZE_T_CLEAN\n+#include "Python.h"\n+#ifndef Py_PYTHON_H\n+ #error Python headers needed to compile C extensions, please install development version of Python.\n+#else\n+\n+#include <stddef.h> /* For offsetof */\n+#ifndef offsetof\n+#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )\n+#endif\n+\n+#if !defined(WIN32) && !defined(MS_WINDOWS)\n+ #ifndef __stdcall\n+ #define __stdcall\n+ #endif\n+ #ifndef __cdecl\n+ #define __cdecl\n+ #endif\n+ #ifndef __fastcall\n+ #define __fastcall\n+ #endif\n+#endif\n+\n+#ifndef DL_IMPORT\n+ #define DL_IMPORT(t) t\n+#endif\n+#ifndef DL_EXPORT\n+ #define DL_EXPORT(t) t\n+#endif\n+\n+#ifndef PY_LONG_LONG\n+ #define PY_LONG_LONG LONG_LONG\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02040000\n+ #define METH_COEXIST 0\n+ #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type)\n+ #define PyDict_Contains(d,o) PySequence_Contains(d,o)\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02050000\n+ typedef int Py_ssize_t;\n+ #define PY_SSIZE_T_MAX INT_MAX\n+ #define PY_SSIZE_T_MIN INT_MIN\n+ #define PY_FORMAT_SIZE_T ""\n+ #define PyInt_FromSsize_t(z) PyInt_FromLong(z)\n+ #define PyInt_AsSsize_t(o) PyInt_AsLong(o)\n+ #define PyNumber_Index(o) PyNumber_Int(o)\n+ #define PyIndex_Check(o) PyNumber_Check(o)\n+ #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02060000\n+ #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)\n+ #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)\n+ #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)\n+ #define PyVarObject_HEAD_INIT(type, size) \\\n+ PyObject_HEAD_INIT(type) size,\n+ #define PyType_Modified(t)\n+\n+ typedef struct {\n+ void *buf;\n+ PyObject *obj;\n+ Py_ssize_t len;\n+ Py_ssize_t itemsize;\n+ int readonly;\n+ int ndim;\n+ char *format;\n+ Py_ssize_t *shape;\n+ Py_ssize_t *strides;\n+ Py_ssize_t *suboffsets;\n+ void *internal;\n+ } Py_buffer;\n+\n+ #define PyBUF_SIMPLE 0\n+ #define PyBUF_WRITABLE 0x0001\n+ #define PyBUF_FORMAT 0x0004\n+ #define PyBUF_ND 0x0008\n+ #define PyBUF_STRIDES (0x0010 | PyBUF_ND)\n+ #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)\n+ #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)\n+ #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)\n+ #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)\n+\n+#endif\n+\n+#if PY_MAJOR_VERSION < 3\n+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"\n+#else\n+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"\n+#endif\n+\n+#if PY_MAJOR_VERSION >= 3\n+ #define Py_TPFLAGS_CHECKTYPES 0\n+ #define Py_TPFLAGS_HAVE_INDEX 0\n+#endif\n+\n+#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)\n+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0\n+#endif\n+\n+#if PY_MAJOR_VERSION >= 3\n+ #define PyBaseString_Type PyUnicode_Type\n+ #define PyStringObject PyUnicodeObject\n+ #define PyString_Type PyUnicode_Type\n+ #define PyString_Check PyUnicode_Check\n+ #define PyString_CheckExact PyUnicode_CheckExact\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02060000\n+ #define PyBytesObject PyStringObject\n+ #define PyBytes_Type PyString_Type\n+ #define PyBytes_Check PyString_Check\n+ #define PyBytes_CheckExact PyString_CheckExact\n+ #define PyBytes_FromString PyString_FromString\n+ #define PyBytes_FromStringAndSize PyString_FromStringAndSize\n+ #define PyBytes_FromFormat PyString_FromFormat\n+ #define PyBytes_DecodeEscape PyString_DecodeEscape\n+ #define PyBytes_AsString PyString_AsString\n+ #define PyBytes_AsStringAndSize PyString_AsStringAndSize\n+ #define PyBytes_Size PyString_Size\n+ #define PyBytes_AS_STRING PyString_AS_STRING\n+ #define PyBytes_GET_SIZE PyString_GET_SIZE\n+ #define PyBytes_Repr PyString_Repr\n+ #define PyBytes_Concat '..b'\n+ if (!py_code) goto bad;\n+ py_frame = PyFrame_New(\n+ PyThreadState_GET(), /*PyThreadState *tstate,*/\n+ py_code, /*PyCodeObject *code,*/\n+ py_globals, /*PyObject *globals,*/\n+ 0 /*PyObject *locals*/\n+ );\n+ if (!py_frame) goto bad;\n+ py_frame->f_lineno = __pyx_lineno;\n+ PyTraceBack_Here(py_frame);\n+bad:\n+ Py_XDECREF(py_srcfile);\n+ Py_XDECREF(py_funcname);\n+ Py_XDECREF(py_code);\n+ Py_XDECREF(py_frame);\n+}\n+\n+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {\n+ while (t->p) {\n+ #if PY_MAJOR_VERSION < 3\n+ if (t->is_unicode) {\n+ *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);\n+ } else if (t->intern) {\n+ *t->p = PyString_InternFromString(t->s);\n+ } else {\n+ *t->p = PyString_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ #else /* Python 3+ has unicode identifiers */\n+ if (t->is_unicode | t->is_str) {\n+ if (t->intern) {\n+ *t->p = PyUnicode_InternFromString(t->s);\n+ } else if (t->encoding) {\n+ *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);\n+ } else {\n+ *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ } else {\n+ *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ #endif\n+ if (!*t->p)\n+ return -1;\n+ ++t;\n+ }\n+ return 0;\n+}\n+\n+/* Type Conversion Functions */\n+\n+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {\n+ int is_true = x == Py_True;\n+ if (is_true | (x == Py_False) | (x == Py_None)) return is_true;\n+ else return PyObject_IsTrue(x);\n+}\n+\n+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {\n+ PyNumberMethods *m;\n+ const char *name = NULL;\n+ PyObject *res = NULL;\n+#if PY_VERSION_HEX < 0x03000000\n+ if (PyInt_Check(x) || PyLong_Check(x))\n+#else\n+ if (PyLong_Check(x))\n+#endif\n+ return Py_INCREF(x), x;\n+ m = Py_TYPE(x)->tp_as_number;\n+#if PY_VERSION_HEX < 0x03000000\n+ if (m && m->nb_int) {\n+ name = "int";\n+ res = PyNumber_Int(x);\n+ }\n+ else if (m && m->nb_long) {\n+ name = "long";\n+ res = PyNumber_Long(x);\n+ }\n+#else\n+ if (m && m->nb_int) {\n+ name = "int";\n+ res = PyNumber_Long(x);\n+ }\n+#endif\n+ if (res) {\n+#if PY_VERSION_HEX < 0x03000000\n+ if (!PyInt_Check(res) && !PyLong_Check(res)) {\n+#else\n+ if (!PyLong_Check(res)) {\n+#endif\n+ PyErr_Format(PyExc_TypeError,\n+ "__%s__ returned non-%s (type %.200s)",\n+ name, name, Py_TYPE(res)->tp_name);\n+ Py_DECREF(res);\n+ return NULL;\n+ }\n+ }\n+ else if (!PyErr_Occurred()) {\n+ PyErr_SetString(PyExc_TypeError,\n+ "an integer is required");\n+ }\n+ return res;\n+}\n+\n+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {\n+ Py_ssize_t ival;\n+ PyObject* x = PyNumber_Index(b);\n+ if (!x) return -1;\n+ ival = PyInt_AsSsize_t(x);\n+ Py_DECREF(x);\n+ return ival;\n+}\n+\n+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {\n+#if PY_VERSION_HEX < 0x02050000\n+ if (ival <= LONG_MAX)\n+ return PyInt_FromLong((long)ival);\n+ else {\n+ unsigned char *bytes = (unsigned char *) &ival;\n+ int one = 1; int little = (int)*(unsigned char*)&one;\n+ return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);\n+ }\n+#else\n+ return PyInt_FromSize_t(ival);\n+#endif\n+}\n+\n+static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {\n+ unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);\n+ if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {\n+ return (size_t)-1;\n+ } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {\n+ PyErr_SetString(PyExc_OverflowError,\n+ "value too large to convert to size_t");\n+ return (size_t)-1;\n+ }\n+ return (size_t)val;\n+}\n+\n+\n+#endif /* Py_PYTHON_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/csamtools.pxd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/csamtools.pxd Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,283 @@ + +cdef extern from "string.h": + ctypedef int size_t + void *memcpy(void *dst,void *src,size_t len) + void *memmove(void *dst,void *src,size_t len) + void *memset(void *b,int c,size_t len) + +cdef extern from "stdlib.h": + void free(void *) + void *malloc(size_t) + void *calloc(size_t,size_t) + void *realloc(void *,size_t) + int c_abs "abs" (int) + void qsort(void *base, size_t nmemb, size_t size, + int (*compar)(void *,void *)) + +cdef extern from "stdio.h": + ctypedef struct FILE: + pass + FILE *fopen(char *,char *) + FILE *freopen(char *path, char *mode, FILE *stream) + int fileno(FILE *stream) + int dup2(int oldfd, int newfd) + int fflush(FILE *stream) + + FILE * stderr + FILE * stdout + int fclose(FILE *) + int sscanf(char *str,char *fmt,...) + int printf(char *fmt,...) + int sprintf(char *str,char *fmt,...) + int fprintf(FILE *ifile,char *fmt,...) + char *fgets(char *str,int size,FILE *ifile) + +cdef extern from "ctype.h": + int toupper(int c) + int tolower(int c) + +cdef extern from "unistd.h": + char *ttyname(int fd) + int isatty(int fd) + +cdef extern from "string.h": + int strcmp(char *s1, char *s2) + int strncmp(char *s1,char *s2,size_t len) + char *strcpy(char *dest,char *src) + char *strncpy(char *dest,char *src, size_t len) + char *strdup(char *) + char *strcat(char *,char *) + size_t strlen(char *s) + int memcmp( void * s1, void *s2, size_t len ) + +cdef extern from "Python.h": + long _Py_HashPointer(void*) + +cdef extern from "razf.h": + pass + +cdef extern from "stdint.h": + ctypedef int int64_t + ctypedef int int32_t + ctypedef int uint32_t + ctypedef int uint8_t + ctypedef int uint64_t + + +cdef extern from "bam.h": + + # IF _IOLIB=2, bamFile = BGZF, see bgzf.h + # samtools uses KNETFILE, check how this works + + ctypedef struct tamFile: + pass + + ctypedef struct bamFile: + pass + + ctypedef struct bam1_core_t: + int32_t tid + int32_t pos + uint32_t bin + uint32_t qual + uint32_t l_qname + uint32_t flag + uint32_t n_cigar + int32_t l_qseq + int32_t mtid + int32_t mpos + int32_t isize + + ctypedef struct bam1_t: + bam1_core_t core + int l_aux + int data_len + int m_data + uint8_t *data + + ctypedef struct bam_pileup1_t: + bam1_t *b + int32_t qpos + int indel + int level + uint32_t is_del + uint32_t is_head + uint32_t is_tail + + ctypedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, bam_pileup1_t *pl, void *data) + + ctypedef int (*bam_fetch_f)(bam1_t *b, void *data) + + ctypedef struct bam_header_t: + int32_t n_targets + char **target_name + uint32_t *target_len + void *hash + void *rg2lib + int l_text + char *text + + ctypedef struct bam_index_t: + pass + + ctypedef struct bam_plbuf_t: + pass + + ctypedef struct bam_iter_t: + pass + + bam1_t * bam_init1() + void bam_destroy1(bam1_t *) + + bamFile razf_dopen(int data_fd, char *mode) + + int64_t bam_seek( bamFile fp, uint64_t voffset, int where) + int64_t bam_tell( bamFile fp ) + + # void bam_init_header_hash(bam_header_t *header) + + ############################################### + # stand-ins for samtools macros + uint32_t * bam1_cigar( bam1_t * b) + char * bam1_qname( bam1_t * b) + uint8_t * bam1_seq( bam1_t * b) + uint8_t * bam1_qual( bam1_t * b) + uint8_t * bam1_aux( bam1_t * b) + + ############################################### + # bam iterator interface + bam_iter_t bam_iter_query( bam_index_t *idx, int tid, int beg, int end) + + int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b) + + void bam_iter_destroy(bam_iter_t iter) + + ############################################### + + bam1_t * bam_dup1( bam1_t *src ) + + bam1_t * bam_copy1(bam1_t *bdst, bam1_t *bsrc) + bam_index_t *bam_index_load(char *f ) + + void bam_index_destroy(bam_index_t *idx) + + int bam_parse_region(bam_header_t *header, char *str, int *ref_id, int *begin, int *end) + + ############################################### + bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data) + + int bam_fetch(bamFile fp, bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func) + + int bam_plbuf_push(bam1_t *b, bam_plbuf_t *buf) + + void bam_plbuf_destroy(bam_plbuf_t *buf) + ######################################## + # pileup iterator interface + ctypedef struct bam_plp_t: + pass + + ctypedef int (*bam_plp_auto_f)(void *data, bam1_t *b) + + bam_plp_t bam_plp_init( bam_plp_auto_f func, void *data) + int bam_plp_push( bam_plp_t iter, bam1_t *b) + bam_pileup1_t *bam_plp_next( bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) + bam_pileup1_t *bam_plp_auto( bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) + void bam_plp_set_mask(bam_plp_t iter, int mask) + void bam_plp_reset(bam_plp_t iter) + void bam_plp_destroy(bam_plp_t iter) + + ################################################## + + int bam_read1(bamFile fp, bam1_t *b) + + int bam_write1( bamFile fp, bam1_t *b) + + bam_header_t *bam_header_init() + + int bam_header_write( bamFile fp, bam_header_t *header) + + bam_header_t *bam_header_read( bamFile fp ) + + void bam_header_destroy(bam_header_t *header) + + bam1_t * bam_dup1( bam1_t *src ) + + bam1_t * bam_copy1(bam1_t *bdst, bam1_t *bsrc) + + uint8_t *bam_aux_get(bam1_t *b, char tag[2]) + + int bam_aux2i(uint8_t *s) + float bam_aux2f(uint8_t *s) + double bam_aux2d(uint8_t *s) + char bam_aux2A( uint8_t *s) + char *bam_aux2Z( uint8_t *s) + + int bam_reg2bin(uint32_t beg, uint32_t end) + + uint32_t bam_calend(bam1_core_t *c, uint32_t *cigar) + +cdef extern from "sam.h": + + ctypedef struct samfile_t_un: + tamFile tamr + bamFile bam + FILE *tamw + + ctypedef struct samfile_t: + int type + samfile_t_un x + bam_header_t *header + + samfile_t *samopen( char *fn, char * mode, void *aux) + + int sampileup( samfile_t *fp, int mask, bam_pileup_f func, void *data) + + void samclose(samfile_t *fp) + + int samread(samfile_t *fp, bam1_t *b) + + int samwrite(samfile_t *fp, bam1_t *b) + +cdef extern from "faidx.h": + + ctypedef struct faidx_t: + pass + + int fai_build(char *fn) + + void fai_destroy(faidx_t *fai) + + faidx_t *fai_load(char *fn) + + char *fai_fetch(faidx_t *fai, char *reg, int *len) + + int faidx_fetch_nseq(faidx_t *fai) + + char *faidx_fetch_seq(faidx_t *fai, char *c_name, + int p_beg_i, int p_end_i, int *len) + +cdef extern from "pysam_util.h": + + int pysam_pileup_next(bam1_t *b, + bam_plbuf_t *buf, + bam_pileup1_t ** plp, + int * tid, + int * pos, + int * n_plp ) + + + int pysam_dispatch(int argc, char *argv[] ) + + # stand-in functions for samtools macros + void pysam_bam_destroy1( bam1_t * b) + + # add *nbytes* into the variable length data of *src* at *pos* + bam1_t * pysam_bam_update( bam1_t * b, + size_t nbytes_old, + size_t nbytes_new, + uint8_t * pos ) + + # translate char to unsigned char + unsigned char pysam_translate_sequence( char s ) + + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/csamtools.pyx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/csamtools.pyx Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,2064 @@\n+# cython: embedsignature=True\n+# cython: profile=True\n+# adds doc-strings for sphinx\n+\n+import tempfile, os, sys, types, itertools, struct, ctypes\n+\n+from python_string cimport PyString_FromStringAndSize, PyString_AS_STRING\n+from python_exc cimport PyErr_SetString\n+\n+# defines imported from samtools\n+DEF SEEK_SET = 0\n+DEF SEEK_CUR = 1\n+DEF SEEK_END = 2\n+\n+## These are bits set in the flag.\n+## have to put these definitions here, in csamtools.pxd they got ignored\n+## @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */\n+DEF BAM_FPAIRED =1\n+## @abstract the read is mapped in a proper pair */\n+DEF BAM_FPROPER_PAIR =2\n+## @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */\n+DEF BAM_FUNMAP =4\n+## @abstract the mate is unmapped */\n+DEF BAM_FMUNMAP =8\n+## @abstract the read is mapped to the reverse strand */\n+DEF BAM_FREVERSE =16\n+## @abstract the mate is mapped to the reverse strand */\n+DEF BAM_FMREVERSE =32\n+## @abstract this is read1 */\n+DEF BAM_FREAD1 =64\n+## @abstract this is read2 */\n+DEF BAM_FREAD2 =128\n+## @abstract not primary alignment */\n+DEF BAM_FSECONDARY =256\n+## @abstract QC failure */\n+DEF BAM_FQCFAIL =512\n+## @abstract optical or PCR duplicate */\n+DEF BAM_FDUP =1024\n+\n+DEF BAM_CIGAR_SHIFT=4\n+DEF BAM_CIGAR_MASK=((1 << BAM_CIGAR_SHIFT) - 1)\n+\n+DEF BAM_CMATCH = 0\n+DEF BAM_CINS = 1\n+DEF BAM_CDEL = 2\n+DEF BAM_CREF_SKIP = 3\n+DEF BAM_CSOFT_CLIP = 4\n+DEF BAM_CHARD_CLIP = 5\n+DEF BAM_CPAD = 6\n+\n+#####################################################################\n+#####################################################################\n+#####################################################################\n+## private factory methods\n+#####################################################################\n+cdef class AlignedRead\n+cdef makeAlignedRead( bam1_t * src):\n+ \'\'\'enter src into AlignedRead.\'\'\'\n+ cdef AlignedRead dest\n+ dest = AlignedRead()\n+ # destroy dummy delegate created in constructor\n+ # to prevent memory leak.\n+ bam_destroy1(dest._delegate)\n+ dest._delegate = bam_dup1(src)\n+ return dest\n+\n+cdef class PileupProxy\n+cdef makePileupProxy( bam_pileup1_t * plp, int tid, int pos, int n ):\n+ cdef PileupProxy dest\n+ dest = PileupProxy()\n+ dest.plp = plp\n+ dest.tid = tid\n+ dest.pos = pos\n+ dest.n = n\n+ return dest\n+\n+cdef class PileupRead\n+cdef makePileupRead( bam_pileup1_t * src ):\n+ \'\'\'fill a PileupRead object from a bam_pileup1_t * object.\'\'\'\n+ cdef PileupRead dest\n+ dest = PileupRead()\n+ dest._alignment = makeAlignedRead( src.b )\n+ dest._qpos = src.qpos\n+ dest._indel = src.indel\n+ dest._level = src.level\n+ dest._is_del = src.is_del\n+ dest._is_head = src.is_head\n+ dest._is_tail = src.is_tail\n+ return dest\n+\n+#####################################################################\n+#####################################################################\n+#####################################################################\n+## Generic callbacks for inserting python callbacks.\n+#####################################################################\n+cdef int fetch_callback( bam1_t *alignment, void *f):\n+ \'\'\'callback for bam_fetch. \n+ \n+ calls function in *f* with a new :class:`AlignedRead` object as parameter.\n+ \'\'\'\n+ a = makeAlignedRead( alignment )\n+ (<object>f)(a)\n+\n+class PileupColumn(object): \n+ \'\'\'A pileup column. A pileup column contains \n+ all the reads that map to a certain target base.\n+\n+ tid \n+ chromosome ID as is defined in the header \n+ pos \n+ the target base coordinate (0-based) \n+ n \n+ number of reads mapping to this column \n+ pileups \n+ list of reads (:class:`pysam.PileupRead`) aligned to this column \n+ \'\'\' \n+ def __str__(self): \n+ return "\\t".j'..b'is_tail:\n+ def __get__(self):\n+ return self._is_tail\n+ property level:\n+ def __get__(self):\n+ return self._level\n+\n+class Outs:\n+ \'\'\'http://mail.python.org/pipermail/python-list/2000-June/038406.html\'\'\'\n+ def __init__(self, id = 1):\n+ self.streams = []\n+ self.id = id\n+\n+ def setdevice(self, filename):\n+ \'\'\'open an existing file, like "/dev/null"\'\'\'\n+ fd = os.open(filename, os.O_WRONLY)\n+ self.setfd(fd)\n+\n+ def setfile(self, filename):\n+ \'\'\'open a new file.\'\'\'\n+ fd = os.open(filename, os.O_WRONLY|os.O_CREAT, 0660);\n+ self.setfd(fd)\n+\n+ def setfd(self, fd):\n+ ofd = os.dup(self.id) # Save old stream on new unit.\n+ self.streams.append(ofd)\n+ sys.stdout.flush() # Buffered data goes to old stream.\n+ os.dup2(fd, self.id) # Open unit 1 on new stream.\n+ os.close(fd) # Close other unit (look out, caller.)\n+ \n+ def restore(self):\n+ \'\'\'restore previous output stream\'\'\'\n+ if self.streams:\n+ # the following was not sufficient, hence flush both stderr and stdout\n+ # os.fsync( self.id )\n+ sys.stdout.flush()\n+ sys.stderr.flush()\n+ os.dup2(self.streams[-1], self.id)\n+ os.close(self.streams[-1])\n+ del self.streams[-1]\n+\n+def _samtools_dispatch( method, args = () ):\n+ \'\'\'call ``method`` in samtools providing arguments in args.\n+ \n+ .. note:: \n+ This method redirects stdout and stderr to capture it \n+ from samtools. If for some reason stdout/stderr disappears\n+ the reason might be in this method.\n+\n+ .. note::\n+ The current implementation might only work on linux.\n+ \n+ .. note:: \n+ This method captures stdout and stderr using temporary files, \n+ which are then read into memory in their entirety. This method\n+ is slow and might cause large memory overhead. \n+\n+ See http://bytes.com/topic/c/answers/487231-how-capture-stdout-temporarily\n+ on the topic of redirecting stderr/stdout.\n+ \'\'\'\n+\n+ # note that debugging this module can be a problem\n+ # as stdout/stderr will not appear\n+\n+ # redirect stderr and stdout to file\n+\n+ # open files and redirect into it\n+ stderr_h, stderr_f = tempfile.mkstemp()\n+ stdout_h, stdout_f = tempfile.mkstemp()\n+\n+ # patch for `samtools view`\n+ # samtools `view` closes stdout, from which I can not\n+ # recover. Thus redirect output to file with -o option.\n+ if method == "view":\n+ if "-o" in args: raise ValueError("option -o is forbidden in samtools view")\n+ args = ( "-o", stdout_f ) + args\n+\n+ stdout_save = Outs( sys.stdout.fileno() )\n+ stdout_save.setfd( stdout_h )\n+ stderr_save = Outs( sys.stderr.fileno() )\n+ stderr_save.setfd( stderr_h )\n+\n+ # do the function call to samtools\n+ cdef char ** cargs\n+ cdef int i, n, retval\n+\n+ n = len(args)\n+ # allocate two more for first (dummy) argument (contains command)\n+ cargs = <char**>calloc( n+2, sizeof( char *) )\n+ cargs[0] = "samtools"\n+ cargs[1] = method\n+ for i from 0 <= i < n: cargs[i+2] = args[i]\n+ retval = pysam_dispatch(n+2, cargs)\n+ free( cargs )\n+\n+ # restore stdout/stderr. This will also flush, so\n+ # needs to be before reading back the file contents\n+ stdout_save.restore()\n+ stderr_save.restore()\n+\n+ # capture stderr/stdout.\n+ out_stderr = open( stderr_f, "r").readlines()\n+ out_stdout = open( stdout_f, "r").readlines()\n+\n+ # clean up files\n+ os.remove( stderr_f )\n+ os.remove( stdout_f )\n+\n+ return retval, out_stderr, out_stdout\n+\n+__all__ = ["Samfile", \n+ "Fastafile",\n+ "IteratorRow", \n+ "IteratorRowAll", \n+ "IteratorColumn", \n+ "AlignedRead", \n+ "PileupColumn", \n+ "PileupProxy", \n+ "PileupRead" ]\n+\n+ \n+\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/ctabix.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/ctabix.c Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,12808 @@\n+/* Generated by Cython 0.13 on Mon Jan 31 00:58:34 2011 */\n+\n+#define PY_SSIZE_T_CLEAN\n+#include "Python.h"\n+#ifndef Py_PYTHON_H\n+ #error Python headers needed to compile C extensions, please install development version of Python.\n+#else\n+\n+#include <stddef.h> /* For offsetof */\n+#ifndef offsetof\n+#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )\n+#endif\n+\n+#if !defined(WIN32) && !defined(MS_WINDOWS)\n+ #ifndef __stdcall\n+ #define __stdcall\n+ #endif\n+ #ifndef __cdecl\n+ #define __cdecl\n+ #endif\n+ #ifndef __fastcall\n+ #define __fastcall\n+ #endif\n+#endif\n+\n+#ifndef DL_IMPORT\n+ #define DL_IMPORT(t) t\n+#endif\n+#ifndef DL_EXPORT\n+ #define DL_EXPORT(t) t\n+#endif\n+\n+#ifndef PY_LONG_LONG\n+ #define PY_LONG_LONG LONG_LONG\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02040000\n+ #define METH_COEXIST 0\n+ #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type)\n+ #define PyDict_Contains(d,o) PySequence_Contains(d,o)\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02050000\n+ typedef int Py_ssize_t;\n+ #define PY_SSIZE_T_MAX INT_MAX\n+ #define PY_SSIZE_T_MIN INT_MIN\n+ #define PY_FORMAT_SIZE_T ""\n+ #define PyInt_FromSsize_t(z) PyInt_FromLong(z)\n+ #define PyInt_AsSsize_t(o) PyInt_AsLong(o)\n+ #define PyNumber_Index(o) PyNumber_Int(o)\n+ #define PyIndex_Check(o) PyNumber_Check(o)\n+ #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02060000\n+ #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)\n+ #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)\n+ #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)\n+ #define PyVarObject_HEAD_INIT(type, size) \\\n+ PyObject_HEAD_INIT(type) size,\n+ #define PyType_Modified(t)\n+\n+ typedef struct {\n+ void *buf;\n+ PyObject *obj;\n+ Py_ssize_t len;\n+ Py_ssize_t itemsize;\n+ int readonly;\n+ int ndim;\n+ char *format;\n+ Py_ssize_t *shape;\n+ Py_ssize_t *strides;\n+ Py_ssize_t *suboffsets;\n+ void *internal;\n+ } Py_buffer;\n+\n+ #define PyBUF_SIMPLE 0\n+ #define PyBUF_WRITABLE 0x0001\n+ #define PyBUF_FORMAT 0x0004\n+ #define PyBUF_ND 0x0008\n+ #define PyBUF_STRIDES (0x0010 | PyBUF_ND)\n+ #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)\n+ #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)\n+ #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)\n+ #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)\n+\n+#endif\n+\n+#if PY_MAJOR_VERSION < 3\n+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"\n+#else\n+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"\n+#endif\n+\n+#if PY_MAJOR_VERSION >= 3\n+ #define Py_TPFLAGS_CHECKTYPES 0\n+ #define Py_TPFLAGS_HAVE_INDEX 0\n+#endif\n+\n+#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)\n+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0\n+#endif\n+\n+#if PY_MAJOR_VERSION >= 3\n+ #define PyBaseString_Type PyUnicode_Type\n+ #define PyStringObject PyUnicodeObject\n+ #define PyString_Type PyUnicode_Type\n+ #define PyString_Check PyUnicode_Check\n+ #define PyString_CheckExact PyUnicode_CheckExact\n+#endif\n+\n+#if PY_VERSION_HEX < 0x02060000\n+ #define PyBytesObject PyStringObject\n+ #define PyBytes_Type PyString_Type\n+ #define PyBytes_Check PyString_Check\n+ #define PyBytes_CheckExact PyString_CheckExact\n+ #define PyBytes_FromString PyString_FromString\n+ #define PyBytes_FromStringAndSize PyString_FromStringAndSize\n+ #define PyBytes_FromFormat PyString_FromFormat\n+ #define PyBytes_DecodeEscape PyString_DecodeEscape\n+ #define PyBytes_AsString PyString_AsString\n+ #define PyBytes_AsStringAndSize PyString_AsStringAndSize\n+ #define PyBytes_Size PyString_Size\n+ #define PyBytes_AS_STRING PyString_AS_STRING\n+ #define PyBytes_GET_SIZE PyString_GET_SIZE\n+ #define PyBytes_Repr PyString_Repr\n+ #define PyBytes_Concat '..b'\n+ if (!py_code) goto bad;\n+ py_frame = PyFrame_New(\n+ PyThreadState_GET(), /*PyThreadState *tstate,*/\n+ py_code, /*PyCodeObject *code,*/\n+ py_globals, /*PyObject *globals,*/\n+ 0 /*PyObject *locals*/\n+ );\n+ if (!py_frame) goto bad;\n+ py_frame->f_lineno = __pyx_lineno;\n+ PyTraceBack_Here(py_frame);\n+bad:\n+ Py_XDECREF(py_srcfile);\n+ Py_XDECREF(py_funcname);\n+ Py_XDECREF(py_code);\n+ Py_XDECREF(py_frame);\n+}\n+\n+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {\n+ while (t->p) {\n+ #if PY_MAJOR_VERSION < 3\n+ if (t->is_unicode) {\n+ *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);\n+ } else if (t->intern) {\n+ *t->p = PyString_InternFromString(t->s);\n+ } else {\n+ *t->p = PyString_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ #else /* Python 3+ has unicode identifiers */\n+ if (t->is_unicode | t->is_str) {\n+ if (t->intern) {\n+ *t->p = PyUnicode_InternFromString(t->s);\n+ } else if (t->encoding) {\n+ *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);\n+ } else {\n+ *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ } else {\n+ *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);\n+ }\n+ #endif\n+ if (!*t->p)\n+ return -1;\n+ ++t;\n+ }\n+ return 0;\n+}\n+\n+/* Type Conversion Functions */\n+\n+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {\n+ int is_true = x == Py_True;\n+ if (is_true | (x == Py_False) | (x == Py_None)) return is_true;\n+ else return PyObject_IsTrue(x);\n+}\n+\n+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {\n+ PyNumberMethods *m;\n+ const char *name = NULL;\n+ PyObject *res = NULL;\n+#if PY_VERSION_HEX < 0x03000000\n+ if (PyInt_Check(x) || PyLong_Check(x))\n+#else\n+ if (PyLong_Check(x))\n+#endif\n+ return Py_INCREF(x), x;\n+ m = Py_TYPE(x)->tp_as_number;\n+#if PY_VERSION_HEX < 0x03000000\n+ if (m && m->nb_int) {\n+ name = "int";\n+ res = PyNumber_Int(x);\n+ }\n+ else if (m && m->nb_long) {\n+ name = "long";\n+ res = PyNumber_Long(x);\n+ }\n+#else\n+ if (m && m->nb_int) {\n+ name = "int";\n+ res = PyNumber_Long(x);\n+ }\n+#endif\n+ if (res) {\n+#if PY_VERSION_HEX < 0x03000000\n+ if (!PyInt_Check(res) && !PyLong_Check(res)) {\n+#else\n+ if (!PyLong_Check(res)) {\n+#endif\n+ PyErr_Format(PyExc_TypeError,\n+ "__%s__ returned non-%s (type %.200s)",\n+ name, name, Py_TYPE(res)->tp_name);\n+ Py_DECREF(res);\n+ return NULL;\n+ }\n+ }\n+ else if (!PyErr_Occurred()) {\n+ PyErr_SetString(PyExc_TypeError,\n+ "an integer is required");\n+ }\n+ return res;\n+}\n+\n+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {\n+ Py_ssize_t ival;\n+ PyObject* x = PyNumber_Index(b);\n+ if (!x) return -1;\n+ ival = PyInt_AsSsize_t(x);\n+ Py_DECREF(x);\n+ return ival;\n+}\n+\n+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {\n+#if PY_VERSION_HEX < 0x02050000\n+ if (ival <= LONG_MAX)\n+ return PyInt_FromLong((long)ival);\n+ else {\n+ unsigned char *bytes = (unsigned char *) &ival;\n+ int one = 1; int little = (int)*(unsigned char*)&one;\n+ return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);\n+ }\n+#else\n+ return PyInt_FromSize_t(ival);\n+#endif\n+}\n+\n+static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {\n+ unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);\n+ if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {\n+ return (size_t)-1;\n+ } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {\n+ PyErr_SetString(PyExc_OverflowError,\n+ "value too large to convert to size_t");\n+ return (size_t)-1;\n+ }\n+ return (size_t)val;\n+}\n+\n+\n+#endif /* Py_PYTHON_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/ctabix.pxd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/ctabix.pxd Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,171 @@ + +cdef extern from "string.h": + ctypedef int size_t + void *memcpy(void *dst,void *src,size_t len) + void *memmove(void *dst,void *src,size_t len) + void *memset(void *b,int c,size_t len) + char *strtok_r(char *str, char *delim, char **saveptr) + char *strncpy(char *dest, char *src, size_t n) + void *memchr(void *s, int c, size_t n) + +cdef extern from "stdlib.h": + void free(void *) + void *malloc(size_t) + void *calloc(size_t,size_t) + void *realloc(void *,size_t) + void qsort(void *base, size_t nmemb, size_t size, + int (*compar)(void *,void *)) + int c_abs "abs" (int) + int atoi( char *nptr) + long atol( char *nptr) + double atof( char *nptr) + +cdef extern from "stdio.h": + ctypedef struct FILE: + pass + FILE *fopen(char *,char *) + FILE *freopen(char *path, char *mode, FILE *stream) + int fileno(FILE *stream) + int dup2(int oldfd, int newfd) + int fflush(FILE *stream) + + FILE * stderr + FILE * stdout + int fclose(FILE *) + int sscanf(char *str,char *fmt,...) + int printf(char *str,char *fmt,...) + int sprintf(char *str,char *fmt,...) + int fprintf(FILE *ifile,char *fmt,...) + char *fgets(char *str,int size,FILE *ifile) + +cdef extern from "ctype.h": + int toupper(int c) + int tolower(int c) + +cdef extern from "sys/types.h": + pass + +cdef extern from "sys/stat.h": + pass + +cdef extern from "fcntl.h": + int open(char *pathname, int flags) + +cdef extern from "unistd.h": + ctypedef int ssize_t + char *ttyname(int fd) + int isatty(int fd) + ssize_t read(int fd, void *buf, size_t count) + +cdef extern from "string.h": + int strcmp(char *s1, char *s2) + int strncmp(char *s1,char *s2,size_t len) + char *strcpy(char *dest,char *src) + char *strncpy(char *dest,char *src, size_t len) + char *strdup(char *) + char *strcat(char *,char *) + size_t strlen(char *s) + int memcmp( void * s1, void *s2, size_t len ) + +cdef extern from "stdint.h": + ctypedef int int64_t + ctypedef int int32_t + ctypedef int uint32_t + ctypedef int uint8_t + ctypedef int uint64_t + +cdef extern from "Python.h": + ctypedef struct FILE + FILE* PyFile_AsFile(object) + char *fgets(char *str, int size, FILE *ifile) + int feof(FILE *stream) + size_t strlen(char *s) + size_t getline(char **lineptr, size_t *n, FILE *stream) + char *strstr(char *, char *) + char *strchr(char *string, int c) + int fileno(FILE *stream) + +cdef extern from "bgzf.h": + + ctypedef struct BGZF: + pass + + int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) + + BGZF * bgzf_open(char * path, char * mode) + + int bgzf_write(BGZF * fp, void* data, int length) + + int bgzf_close(BGZF* fp) + +# tabix support +cdef extern from "tabix.h": + + ctypedef struct ti_index_t: + pass + + ctypedef struct tabix_t: + BGZF *fp + ti_index_t *idx + char *fn + char *fnidx + + ctypedef struct ti_iter_t: + pass + + ctypedef struct ti_conf_t: + int32_t preset + int32_t sc, bc, ec + int32_t meta_char, line_skip + + tabix_t *ti_open(char *fn, char *fnidx) + + int ti_lazy_index_load(tabix_t *t) + + void ti_close(tabix_t *t) + + ti_iter_t ti_query(tabix_t *t, char *name, int beg, int end) + ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end) + ti_iter_t ti_querys(tabix_t *t, char *reg) + char * ti_read(tabix_t *t, ti_iter_t iter, int *len) + + # Get the list of sequence names. Each "char*" pointer points to a + # internal member of the index, so DO NOT modify the returned + # pointer; otherwise the index will be corrupted. The returned + # pointer should be freed by a single free() call by the routine + # calling this function. The number of sequences is returned at *n + char **ti_seqname(ti_index_t *idx, int *n) + + + # Destroy the iterator + void ti_iter_destroy(ti_iter_t iter) + + # Build the index for file <fn>. File <fn>.tbi will be generated + # and overwrite the file of the same name. Return -1 on failure. */ + int ti_index_build(char *fn, ti_conf_t *conf) + + #/* Load the index from file <fn>.tbi. If <fn> is a URL and the index + # * file is not in the working directory, <fn>.tbi will be + # * downloaded. Return NULL on failure. */ + ti_index_t *ti_index_load( char *fn) + + ti_index_t *ti_index_load_local(char *fnidx) + + #/* Destroy the index */ + void ti_index_destroy(ti_index_t *idx) + + #/* Parse a region like: chr2, chr2:100, chr2:100-200. Return -1 on failure. */ + int ti_parse_region( ti_index_t *idx, char *str, int *tid, int *begin, int *end) + + int ti_get_tid( ti_index_t *idx, char *name) + + # /* Get the iterator pointing to the first record at the current file + # * position. If the file is just openned, the iterator points to the + # * first record in the file. */ + ti_iter_t ti_iter_first() + + # /* Get the iterator pointing to the first record in region tid:beg-end */ + ti_iter_t ti_iter_query( ti_index_t *idx, int tid, int beg, int end) + + # /* Get the data line pointed by the iterator and iterate to the next record. */ + # char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/ctabix.pyx --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/ctabix.pyx Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,881 @@\n+# cython: embedsignature=True\n+# adds doc-strings for sphinx\n+\n+import tempfile, os, sys, types, itertools, struct, ctypes\n+\n+cdef class Tabixfile:\n+ \'\'\'*(filename, mode=\'r\')*\n+\n+ opens a :term:`tabix file` for reading. A missing\n+ index (*filename* + ".tbi") will raise an exception.\n+ \'\'\'\n+\n+ cdef char * filename\n+\n+ # pointer to tabixfile\n+ cdef tabix_t * tabixfile\n+\n+ def __cinit__(self, *args, **kwargs ):\n+ self.tabixfile = NULL\n+ self._open( *args, **kwargs )\n+\n+ def _isOpen( self ):\n+ \'\'\'return true if samfile has been opened.\'\'\'\n+ return self.tabixfile != NULL\n+\n+ def _open( self, \n+ char * filename, \n+ mode =\'r\',\n+ ):\n+ \'\'\'open a :term:`tabix file` for reading.\n+ \'\'\'\n+\n+ assert mode in ( "r",), "invalid file opening mode `%s`" % mode\n+\n+ # close a previously opened file\n+ if self.tabixfile != NULL: self.close()\n+ self.tabixfile = NULL\n+\n+ self.filename = filename\n+ filename_index = filename + ".tbi"\n+\n+ if mode[0] == \'w\':\n+ # open file for writing\n+ pass\n+\n+ elif mode[0] == "r":\n+ # open file for reading\n+ if not os.path.exists( self.filename ):\n+ raise IOError( "file `%s` not found" % self.filename)\n+\n+ if not os.path.exists( filename_index ):\n+ raise IOError( "index `%s` not found" % filename_index)\n+\n+ # open file and load index\n+ self.tabixfile = ti_open( self.filename, filename_index )\n+\n+ if self.tabixfile == NULL:\n+ raise IOError("could not open file `%s`" % filename )\n+\n+ def _parseRegion( self, \n+ reference = None, \n+ start = None, \n+ end = None, \n+ region = None ):\n+ \'\'\'parse region information.\n+\n+ raise ValueError for for invalid regions.\n+\n+ returns a tuple of region, tid, start and end. Region\n+ is a valid samtools :term:`region` or None if the region\n+ extends over the whole file.\n+\n+ Note that regions are 1-based, while start,end are python coordinates.\n+ \'\'\'\n+ ti_lazy_index_load( self.tabixfile )\n+\n+ cdef int rtid\n+ cdef int rstart\n+ cdef int rend\n+ cdef int max_pos\n+ max_pos = 2 << 29\n+\n+ rtid = rstart = rend = 0\n+\n+ # translate to a region\n+ if reference:\n+ if start != None and end != None:\n+ region = "%s:%i-%i" % (reference, start+1, end)\n+ elif start == None and end != None:\n+ region = "%s:%i-%i" % (reference, 1, end)\n+ elif end == None and start != None:\n+ region = "%s:%i-%i" % (reference, start+1, max_pos-1)\n+ else:\n+ region = reference\n+\n+ if region:\n+ ti_parse_region( self.tabixfile.idx, region, &rtid, &rstart, &rend) \n+ if rtid < 0: raise ValueError( "invalid region `%s`" % region )\n+ if rstart > rend: raise ValueError( \'invalid region: start (%i) > end (%i)\' % (rstart, rend) )\n+ if not 0 <= rstart < max_pos: raise ValueError( \'start out of range (%i)\' % rstart )\n+ if not 0 <= rend < max_pos: raise ValueError( \'end out of range (%i)\' % rend )\n+\n+ return region, rtid, rstart, rend\n+\n+ def fetch( self, \n+ reference = None,\n+ start = None, \n+ end = None, \n+ region = None,\n+ parser = None ):\n+ \'\'\'\n+ \n+ fetch one or more rows in a :term:`region` using 0-based indexing. The region is specified by\n+ :term:`reference`, *start* and *end*. Alternatively, a samtools :term:`region` string can be supplied.\n+\n+ Without *reference* or *region* all entries will be fetched. \n+ \n+ If only *reference* is s'..b'E = 64 * 1024\n+\n+ fp = bgzf_open( filename_out, "w")\n+ if fp == NULL:\n+ raise IOError( "could not open \'%s\' for writing" )\n+\n+ fd_src = open(filename_in, O_RDONLY)\n+ if fd_src == 0:\n+ raise IOError( "could not open \'%s\' for reading" )\n+\n+ buffer = malloc(WINDOW_SIZE)\n+\n+ while c > 0:\n+ c = read(fd_src, buffer, WINDOW_SIZE)\n+ r = bgzf_write(fp, buffer, c)\n+ if r < 0:\n+ free( buffer )\n+ raise OSError("writing failed")\n+ \n+ free( buffer )\n+ r = bgzf_close(fp)\n+ if r < 0: raise OSError("writing failed")\n+\n+def tabix_index( filename, \n+ force = False,\n+ seq_col = None, \n+ start_col = None, \n+ end_col = None,\n+ preset = None,\n+ meta_char = "#",\n+ zerobased = False,\n+ ):\n+ \'\'\'\n+ index tab-separated *filename* using tabix.\n+\n+ An existing index will not be overwritten unless\n+ *force* is set.\n+\n+ The index will be built from coordinates\n+ in columns *seq_col*, *start_col* and *end_col*.\n+\n+ The contents of *filename* have to be sorted by \n+ contig and position - the method does not check\n+ if the file is sorted.\n+\n+ Column indices are 0-based. Coordinates in the file\n+ are assumed to be 1-based.\n+\n+ If *preset* is provided, the column coordinates\n+ are taken from a preset. Valid values for preset\n+ are "gff", "bed", "sam", "vcf", psltbl", "pileup".\n+ \n+ Lines beginning with *meta_char* and the first\n+ *line_skip* lines will be skipped.\n+ \n+ If *filename* does not end in ".gz", it will be automatically\n+ compressed. The original file will be removed and only the \n+ compressed file will be retained. \n+\n+ If *filename* ends in *gz*, the file is assumed to be already\n+ compressed with bgzf.\n+\n+ returns the filename of the compressed data\n+ \'\'\'\n+ \n+ if not os.path.exists(filename): raise IOError("No such file \'%s\'" % filename)\n+\n+ if not filename.endswith(".gz"): \n+ \n+ tabix_compress( filename, filename + ".gz", force = force )\n+ os.unlink( filename )\n+ filename += ".gz"\n+\n+ if not force and os.path.exists(filename + ".tbi" ):\n+ raise IOError( "Filename \'%s.tbi\' already exists, use *force* to overwrite" )\n+\n+ # columns (1-based)\n+ # preset-code, contig, start, end, metachar for commends, lines to ignore at beginning\n+ # 0 is a missing column\n+ preset2conf = {\n+ \'gff\' : ( 0, 1, 4, 5, ord(\'#\'), 0 ),\n+ \'bed\' : ( 0x10000, 1, 2, 3, ord(\'#\'), 0 ),\n+ \'psltbl\' : ( 0x10000, 15, 17, 18, ord(\'#\'), 0 ),\n+ \'sam\' : ( 1, 3, 4, 0, ord(\'#\'), 0 ),\n+ \'vcf\' : ( 2, 1, 2, 0, ord(\'#\'), 0 ),\n+ \'pileup\': (3, 1, 2, 0, ord(\'#\'), 0 ),\n+ }\n+\n+ if preset:\n+ try:\n+ conf_data = preset2conf[preset]\n+ except KeyError:\n+ raise KeyError( "unknown preset \'%s\', valid presets are \'%s\'" % (preset, ",".join(preset2conf.keys() )))\n+ else:\n+ if end_col == None: end_col = -1\n+ preset = 0\n+\n+ # note that tabix internally works with 0-based coordinates and open/closed intervals.\n+ # When using a preset, conversion is automatically taken care of.\n+ # Otherwise, the coordinates are assumed to be 1-based closed intervals and \n+ # -1 is subtracted from the start coordinate. To avoid doing this, set\n+ # the TI_FLAG_UCSC=0x10000 flag:\n+ if zerobased: preset = preset | 0x10000\n+\n+ conf_data = (preset, seq_col+1, start_col+1, end_col+1, ord(meta_char), 0)\n+ \n+ cdef ti_conf_t conf\n+ conf.preset, conf.sc, conf.bc, conf.ec, conf.meta_char, conf.line_skip = conf_data\n+\n+ ti_index_build( filename, &conf)\n+ \n+ return filename\n+ \n+__all__ = ["tabix_index", \n+ "tabix_compress",\n+ "Tabixfile", \n+ "asTuple",\n+ "asGTF",\n+ ]\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/namedtuple.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/namedtuple.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,117 @@ +from operator import itemgetter as _itemgetter +from keyword import iskeyword as _iskeyword +import sys as _sys + +def namedtuple(typename, field_names, verbose=False, rename=False): + """Returns a new subclass of tuple with named fields. + + >>> Point = namedtuple('Point', 'x y') + >>> Point.__doc__ # docstring for the new class + 'Point(x, y)' + >>> p = Point(11, y=22) # instantiate with positional args or keywords + >>> p[0] + p[1] # indexable like a plain tuple + 33 + >>> x, y = p # unpack like a regular tuple + >>> x, y + (11, 22) + >>> p.x + p.y # fields also accessable by name + 33 + >>> d = p._asdict() # convert to a dictionary + >>> d['x'] + 11 + >>> Point(**d) # convert from a dictionary + Point(x=11, y=22) + >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields + Point(x=100, y=22) + + """ + + # Parse and validate the field names. Validation serves two purposes, + # generating informative error messages and preventing template injection attacks. + if isinstance(field_names, basestring): + field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas + field_names = tuple(map(str, field_names)) + if rename: + names = list(field_names) + seen = set() + for i, name in enumerate(names): + if (not min(c.isalnum() or c=='_' for c in name) or _iskeyword(name) + or not name or name[0].isdigit() or name.startswith('_') + or name in seen): + names[i] = '_%d' % i + seen.add(name) + field_names = tuple(names) + for name in (typename,) + field_names: + if not min(c.isalnum() or c=='_' for c in name): + raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name) + if _iskeyword(name): + raise ValueError('Type names and field names cannot be a keyword: %r' % name) + if name[0].isdigit(): + raise ValueError('Type names and field names cannot start with a number: %r' % name) + seen_names = set() + for name in field_names: + if name.startswith('_') and not rename: + raise ValueError('Field names cannot start with an underscore: %r' % name) + if name in seen_names: + raise ValueError('Encountered duplicate field name: %r' % name) + seen_names.add(name) + + # Create and fill-in the class template + numfields = len(field_names) + argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes + reprtxt = ', '.join('%s=%%r' % name for name in field_names) + template = '''class %(typename)s(tuple): + '%(typename)s(%(argtxt)s)' \n + __slots__ = () \n + _fields = %(field_names)r \n + def __new__(_cls, %(argtxt)s): + return _tuple.__new__(_cls, (%(argtxt)s)) \n + @classmethod + def _make(cls, iterable, new=tuple.__new__, len=len): + 'Make a new %(typename)s object from a sequence or iterable' + result = new(cls, iterable) + if len(result) != %(numfields)d: + raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result)) + return result \n + def __repr__(self): + return '%(typename)s(%(reprtxt)s)' %% self \n + def _asdict(self): + 'Return a new dict which maps field names to their values' + return dict(zip(self._fields, self)) \n + def _replace(_self, **kwds): + 'Return a new %(typename)s object replacing specified fields with new values' + result = _self._make(map(kwds.pop, %(field_names)r, _self)) + if kwds: + raise ValueError('Got unexpected field names: %%r' %% kwds.keys()) + return result \n + def __getnewargs__(self): + return tuple(self) \n\n''' % locals() + for i, name in enumerate(field_names): + template += ' %s = _property(_itemgetter(%d))\n' % (name, i) + if verbose: + print template + + # Execute the template string in a temporary namespace + namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, + _property=property, _tuple=tuple) + try: + exec template in namespace + except SyntaxError, e: + raise SyntaxError(e.message + ':\n' + template) + result = namespace[typename] + + # For pickling to work, the __module__ variable needs to be set to the frame + # where the named tuple is created. Bypass this step in enviroments where + # sys._getframe is not defined (Jython for example) or sys._getframe is not + # defined for arguments greater than 0 (IronPython). + try: + result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') + except (AttributeError, ValueError): + pass + + return result + + + + + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/pysam_util.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/pysam_util.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,290 @@\n+#include <ctype.h>\n+#include <assert.h>\n+#include "bam.h"\n+#include "khash.h"\n+#include "ksort.h"\n+#include "bam_endian.h"\n+#include "knetfile.h"\n+#include "pysam_util.h"\n+\n+// #######################################################\n+// utility routines to avoid using callbacks in bam_fetch\n+// taken from bam_index.c\n+// The order of the following declarations is important.\n+// #######################################################\n+\n+typedef struct\n+{\n+ uint64_t u, v;\n+} pair64_t;\n+\n+#define pair64_lt(a,b) ((a).u < (b).u)\n+\n+typedef struct {\n+\tuint32_t m, n;\n+\tpair64_t *list;\n+} bam_binlist_t;\n+\n+typedef struct {\n+\tint32_t n, m;\n+\tuint64_t *offset;\n+} bam_lidx_t;\n+\n+KSORT_INIT(my_off, pair64_t, pair64_lt);\n+KHASH_MAP_INIT_INT(my_i, bam_binlist_t);\n+\n+struct __bam_index_t\n+{\n+ int32_t n;\n+ khash_t(my_i) **index;\n+ bam_lidx_t *index2;\n+};\n+\n+typedef struct __linkbuf_t {\n+\tbam1_t b;\n+\tuint32_t beg, end;\n+\tstruct __linkbuf_t *next;\n+} lbnode_t;\n+\n+typedef struct {\n+\tint cnt, n, max;\n+\tlbnode_t **buf;\n+} mempool_t;\n+\n+struct __bam_plbuf_t {\n+\tmempool_t *mp;\n+\tlbnode_t *head, *tail, *dummy;\n+\tbam_pileup_f func;\n+\tvoid *func_data;\n+\tint32_t tid, pos, max_tid, max_pos;\n+\tint max_pu, is_eof;\n+\tbam_pileup1_t *pu;\n+\tint flag_mask;\n+};\n+\n+static mempool_t *mp_init()\n+{\n+\tmempool_t *mp;\n+\tmp = (mempool_t*)calloc(1, sizeof(mempool_t));\n+\treturn mp;\n+}\n+static void mp_destroy(mempool_t *mp)\n+{\n+\tint k;\n+\tfor (k = 0; k < mp->n; ++k) {\n+\t\tfree(mp->buf[k]->b.data);\n+\t\tfree(mp->buf[k]);\n+\t}\n+\tfree(mp->buf);\n+\tfree(mp);\n+}\n+static inline lbnode_t *mp_alloc(mempool_t *mp)\n+{\n+\t++mp->cnt;\n+\tif (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t));\n+\telse return mp->buf[--mp->n];\n+}\n+static inline void mp_free(mempool_t *mp, lbnode_t *p)\n+{\n+\t--mp->cnt; p->next = 0; // clear lbnode_t::next here\n+\tif (mp->n == mp->max) {\n+\t\tmp->max = mp->max? mp->max<<1 : 256;\n+\t\tmp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max);\n+\t}\n+\tmp->buf[mp->n++] = p;\n+}\n+\n+static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)\n+{\n+\tunsigned k;\n+\tbam1_t *b = p->b;\n+\tbam1_core_t *c = &b->core;\n+\tuint32_t x = c->pos, y = 0;\n+\tint ret = 1, is_restart = 1;\n+\n+\tif (c->flag&BAM_FUNMAP) return 0; // unmapped read\n+\tassert(x <= pos); // otherwise a bug\n+\tp->qpos = -1; p->indel = 0; p->is_del = p->is_head = p->is_tail = 0;\n+\tfor (k = 0; k < c->n_cigar; ++k) {\n+\t\tint op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation\n+\t\tint l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length\n+\t\tif (op == BAM_CMATCH) { // NOTE: this assumes the first and the last operation MUST BE a match or a clip\n+\t\t\tif (x + l > pos) { // overlap with pos\n+\t\t\t\tp->indel = p->is_del = 0;\n+\t\t\t\tp->qpos = y + (pos - x);\n+\t\t\t\tif (x == pos && is_restart) p->is_head = 1;\n+\t\t\t\tif (x + l - 1 == pos) { // come to the end of a match\n+\t\t\t\t\tif (k < c->n_cigar - 1) { // there are additional operation(s)\n+\t\t\t\t\t\tuint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR\n+\t\t\t\t\t\tint op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation\n+\t\t\t\t\t\tif (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del\n+\t\t\t\t\t\telse if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins\n+\t\t\t\t\t\tif (op_next == BAM_CSOFT_CLIP || op_next == BAM_CREF_SKIP || op_next == BAM_CHARD_CLIP)\n+\t\t\t\t\t\t\tp->is_tail = 1; // tail\n+\t\t\t\t\t} else p->is_tail = 1; // this is the last operation; set tail\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\tx += l; y += l;\n+\t\t} else if (op == BAM_CDEL) { // then set ->is_del\n+\t\t\tif (x + l > pos) {\n+\t\t\t\tp->indel = 0; p->is_del = 1;\n+\t\t\t\tp->qpos = y + (pos - x);\n+\t\t\t}\n+\t\t\tx += l;\n+\t\t} else if (op == BAM_CREF_SKIP) x += l;\n+\t\telse if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;\n+\t\tis_restart = (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP);\n+\t\tif (x > pos) {\n+\t\t\tif (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\tassert(x > pos); // otherwise a bug\n+\treturn ret;\n+\n+}\n+// the following code has been taken from bam_plbuf_'..b'plp);\n+ if (plp == NULL) return 0;\n+ return 1;\n+}\n+\n+// pysam dispatch function to emulate the samtools\n+// command line within python.\n+// taken from the main function in bamtk.c\n+// added code to reset getopt\n+extern int main_samview(int argc, char *argv[]);\n+extern int main_import(int argc, char *argv[]);\n+extern int bam_pileup(int argc, char *argv[]);\n+extern int bam_merge(int argc, char *argv[]);\n+extern int bam_sort(int argc, char *argv[]);\n+extern int bam_index(int argc, char *argv[]);\n+extern int faidx_main(int argc, char *argv[]);\n+extern int bam_mating(int argc, char *argv[]);\n+extern int bam_rmdup(int argc, char *argv[]);\n+extern int glf3_view_main(int argc, char *argv[]);\n+extern int bam_flagstat(int argc, char *argv[]);\n+extern int bam_fillmd(int argc, char *argv[]);\n+\n+int pysam_dispatch(int argc, char *argv[] )\n+{\n+\n+#ifdef _WIN32\n+ setmode(fileno(stdout), O_BINARY);\n+ setmode(fileno(stdin), O_BINARY);\n+#ifdef _USE_KNETFILE\n+ knet_win32_init();\n+#endif\n+#endif\n+\n+ extern int optind;\n+ \n+ // reset getop\n+ optind = 1;\n+\n+ if (argc < 2) return 1;\n+\n+ if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1);\n+ else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1);\n+ else if (strcmp(argv[1], "pileup") == 0) return bam_pileup(argc-1, argv+1);\n+ else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1);\n+ else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1);\n+ else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1);\n+ else if (strcmp(argv[1], "faidx") == 0) return faidx_main(argc-1, argv+1);\n+ else if (strcmp(argv[1], "fixmate") == 0) return bam_mating(argc-1, argv+1);\n+ else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1);\n+ else if (strcmp(argv[1], "glfview") == 0) return glf3_view_main(argc-1, argv+1);\n+ else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1);\n+ else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1);\n+ else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1);\n+\n+#if _CURSES_LIB != 0\n+ else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1);\n+#endif\n+ else \n+ {\n+ fprintf(stderr, "[main] unrecognized command \'%s\'\\n", argv[1]);\n+ return 1;\n+ }\n+ return 0;\n+}\n+\n+// taken from samtools/bam_import.c\n+static inline uint8_t *alloc_data(bam1_t *b, size_t size)\n+{\n+ if (b->m_data < size)\n+ {\n+ b->m_data = size;\n+ kroundup32(b->m_data);\n+ b->data = (uint8_t*)realloc(b->data, b->m_data);\n+ }\n+ return b->data;\n+}\n+\n+// update the variable length data within a bam1_t entry.\n+// Adds *nbytes_new* - *nbytes_old* into the variable length data of *src* at *pos*.\n+// Data within the bam1_t entry is moved so that it is\n+// consistent with the data field lengths.\n+bam1_t * pysam_bam_update( bam1_t * b,\n+\t\t\t const size_t nbytes_old,\n+\t\t\t const size_t nbytes_new, \n+\t\t\t uint8_t * pos )\n+{\n+ int d = nbytes_new-nbytes_old;\n+\n+ // no change\n+ if (d == 0) return b;\n+\n+ int new_size = d + b->data_len;\n+ size_t offset = pos - b->data;\n+\n+ //printf("d=%i, old=%i, new=%i, old_size=%i, new_size=%i\\n",\n+ // d, nbytes_old, nbytes_new, b->data_len, new_size);\n+ \n+ // increase memory if required\n+ if (d > 0)\n+ {\n+ alloc_data( b, new_size );\n+ pos = b->data + offset;\n+ }\n+ \n+ if (b->data_len != 0)\n+ {\n+ if (offset < 0 || offset > b->data_len)\n+\tfprintf(stderr, "[pysam_bam_insert] illegal offset: \'%i\'\\n", (int)offset);\n+ }\n+ \n+ // printf("dest=%p, src=%p, n=%i\\n", pos+nbytes_new, pos + nbytes_old, b->data_len - (offset+nbytes_old));\n+ memmove( pos + nbytes_new,\n+\t pos + nbytes_old,\n+\t b->data_len - (offset + nbytes_old));\n+ \n+ b->data_len = new_size;\n+ \n+ return b;\n+}\n+\n+// translate a nucleotide character to binary code\n+unsigned char pysam_translate_sequence( const unsigned char s )\n+{\n+ return bam_nt16_table[s];\n+}\n+\n+\n+\n+\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/pysam_util.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/pysam_util.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,42 @@ +#ifndef PYSAM_UTIL_H +#define PYSAM_UTIL_H + +////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////// +// various helper functions +// +// fill pileup buffer for next position. + +int pysam_pileup_next(const bam1_t *b, + bam_plbuf_t *buf, + bam_pileup1_t ** plp, + int * tid, + int * pos, + int * n_plp); + +int pysam_dispatch(int argc, char *argv[] ); + +/*! + @abstract Update the variable length data within a bam1_t entry + + Old data is deleted and the data within b are re-arranged to + make place for new data. + + @discussion Returns b + + @param b bam1_t data + @param nbytes_old size of old data + @param nbytes_new size of new data + @param pos position of data +*/ +bam1_t * pysam_bam_update( bam1_t * b, + const size_t nbytes_old, + const size_t nbytes_new, + uint8_t * pos ); + +// translate a nucleotide character to binary code +unsigned char pysam_translate_sequence( const unsigned char s ); + + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,308 @@\n+#include <stdio.h>\n+#include <ctype.h>\n+#include <errno.h>\n+#include <assert.h>\n+#include "bam.h"\n+#include "bam_endian.h"\n+#include "kstring.h"\n+#include "sam_header.h"\n+\n+int bam_is_be = 0;\n+char *bam_flag2char_table = "pPuUrR12sfd\\0\\0\\0\\0\\0";\n+\n+/**************************\n+ * CIGAR related routines *\n+ **************************/\n+\n+uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar)\n+{\n+\tuint32_t k, end;\n+\tend = c->pos;\n+\tfor (k = 0; k < c->n_cigar; ++k) {\n+\t\tint op = cigar[k] & BAM_CIGAR_MASK;\n+\t\tif (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP)\n+\t\t\tend += cigar[k] >> BAM_CIGAR_SHIFT;\n+\t}\n+\treturn end;\n+}\n+\n+int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar)\n+{\n+\tuint32_t k;\n+\tint32_t l = 0;\n+\tfor (k = 0; k < c->n_cigar; ++k) {\n+\t\tint op = cigar[k] & BAM_CIGAR_MASK;\n+\t\tif (op == BAM_CMATCH || op == BAM_CINS || op == BAM_CSOFT_CLIP)\n+\t\t\tl += cigar[k] >> BAM_CIGAR_SHIFT;\n+\t}\n+\treturn l;\n+}\n+\n+/********************\n+ * BAM I/O routines *\n+ ********************/\n+\n+bam_header_t *bam_header_init()\n+{\n+\tbam_is_be = bam_is_big_endian();\n+\treturn (bam_header_t*)calloc(1, sizeof(bam_header_t));\n+}\n+\n+void bam_header_destroy(bam_header_t *header)\n+{\n+\tint32_t i;\n+\textern void bam_destroy_header_hash(bam_header_t *header);\n+\tif (header == 0) return;\n+\tif (header->target_name) {\n+\t\tfor (i = 0; i < header->n_targets; ++i)\n+\t\t\tfree(header->target_name[i]);\n+\t\tfree(header->target_name);\n+\t\tfree(header->target_len);\n+\t}\n+\tfree(header->text);\n+\tif (header->dict) sam_header_free(header->dict);\n+\tif (header->rg2lib) sam_tbl_destroy(header->rg2lib);\n+\tbam_destroy_header_hash(header);\n+\tfree(header);\n+}\n+\n+bam_header_t *bam_header_read(bamFile fp)\n+{\n+\tbam_header_t *header;\n+\tchar buf[4];\n+\tint magic_len;\n+\tint32_t i = 1, name_len;\n+\t// check EOF\n+\ti = bgzf_check_EOF(fp);\n+\tif (i < 0) {\n+\t\t// If the file is a pipe, checking the EOF marker will *always* fail\n+\t\t// with ESPIPE. Suppress the error message in this case.\n+\t\tif (errno != ESPIPE) perror("[bam_header_read] bgzf_check_EOF");\n+\t}\n+\telse if (i == 0) fprintf(stderr, "[bam_header_read] EOF marker is absent.\\n");\n+\t// read "BAM1"\n+\tmagic_len = bam_read(fp, buf, 4);\n+\tif (magic_len != 4 || strncmp(buf, "BAM\\001", 4) != 0) {\n+\t\tfprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\\n");\n+\t\treturn 0;\n+\t}\n+\theader = bam_header_init();\n+\t// read plain text and the number of reference sequences\n+\tbam_read(fp, &header->l_text, 4);\n+\tif (bam_is_be) bam_swap_endian_4p(&header->l_text);\n+\theader->text = (char*)calloc(header->l_text + 1, 1);\n+\tbam_read(fp, header->text, header->l_text);\n+\tbam_read(fp, &header->n_targets, 4);\n+\tif (bam_is_be) bam_swap_endian_4p(&header->n_targets);\n+\t// read reference sequence names and lengths\n+\theader->target_name = (char**)calloc(header->n_targets, sizeof(char*));\n+\theader->target_len = (uint32_t*)calloc(header->n_targets, 4);\n+\tfor (i = 0; i != header->n_targets; ++i) {\n+\t\tbam_read(fp, &name_len, 4);\n+\t\tif (bam_is_be) bam_swap_endian_4p(&name_len);\n+\t\theader->target_name[i] = (char*)calloc(name_len, 1);\n+\t\tbam_read(fp, header->target_name[i], name_len);\n+\t\tbam_read(fp, &header->target_len[i], 4);\n+\t\tif (bam_is_be) bam_swap_endian_4p(&header->target_len[i]);\n+\t}\n+\treturn header;\n+}\n+\n+int bam_header_write(bamFile fp, const bam_header_t *header)\n+{\n+\tchar buf[4];\n+\tint32_t i, name_len, x;\n+\t// write "BAM1"\n+\tstrncpy(buf, "BAM\\001", 4);\n+\tbam_write(fp, buf, 4);\n+\t// write plain text and the number of reference sequences\n+\tif (bam_is_be) {\n+\t\tx = bam_swap_endian_4(header->l_text);\n+\t\tbam_write(fp, &x, 4);\n+\t\tif (header->l_text) bam_write(fp, header->text, header->l_text);\n+\t\tx = bam_swap_endian_4(header->n_targets);\n+\t\tbam_write(fp, &x, 4);\n+\t} else {\n+\t\tbam_write(fp, &header->l_text, 4);\n+\t\tif (header->l_text) bam_write(fp, header->text, header->l_text);\n+\t\tbam_write(fp, &header->n_targets, 4);\n+\t}\n+\t// write sequence names and lengths\n+\tfor (i = 0; i != header->n_targets'..b'32);\n+\tx[0] = c->tid;\n+\tx[1] = c->pos;\n+\tx[2] = (uint32_t)c->bin<<16 | c->qual<<8 | c->l_qname;\n+\tx[3] = (uint32_t)c->flag<<16 | c->n_cigar;\n+\tx[4] = c->l_qseq;\n+\tx[5] = c->mtid;\n+\tx[6] = c->mpos;\n+\tx[7] = c->isize;\n+\tbgzf_flush_try(fp, 4 + block_len);\n+\tif (bam_is_be) {\n+\t\tfor (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i);\n+\t\ty = block_len;\n+\t\tbam_write(fp, bam_swap_endian_4p(&y), 4);\n+\t\tswap_endian_data(c, data_len, data);\n+\t} else bam_write(fp, &block_len, 4);\n+\tbam_write(fp, x, BAM_CORE_SIZE);\n+\tbam_write(fp, data, data_len);\n+\tif (bam_is_be) swap_endian_data(c, data_len, data);\n+\treturn 4 + block_len;\n+}\n+\n+int bam_write1(bamFile fp, const bam1_t *b)\n+{\n+\treturn bam_write1_core(fp, &b->core, b->data_len, b->data);\n+}\n+\n+char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)\n+{\n+\tuint8_t *s = bam1_seq(b), *t = bam1_qual(b);\n+\tint i;\n+\tconst bam1_core_t *c = &b->core;\n+\tkstring_t str;\n+\tstr.l = str.m = 0; str.s = 0;\n+\n+\tkputsn(bam1_qname(b), c->l_qname-1, &str); kputc(\'\\t\', &str);\n+\tif (of == BAM_OFDEC) { kputw(c->flag, &str); kputc(\'\\t\', &str); }\n+\telse if (of == BAM_OFHEX) ksprintf(&str, "0x%x\\t", c->flag);\n+\telse { // BAM_OFSTR\n+\t\tfor (i = 0; i < 16; ++i)\n+\t\t\tif ((c->flag & 1<<i) && bam_flag2char_table[i])\n+\t\t\t\tkputc(bam_flag2char_table[i], &str);\n+\t\tkputc(\'\\t\', &str);\n+\t}\n+\tif (c->tid < 0) kputsn("*\\t", 2, &str);\n+\telse { kputs(header->target_name[c->tid], &str); kputc(\'\\t\', &str); }\n+\tkputw(c->pos + 1, &str); kputc(\'\\t\', &str); kputw(c->qual, &str); kputc(\'\\t\', &str);\n+\tif (c->n_cigar == 0) kputc(\'*\', &str);\n+\telse {\n+\t\tfor (i = 0; i < c->n_cigar; ++i) {\n+\t\t\tkputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);\n+\t\t\tkputc("MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);\n+\t\t}\n+\t}\n+\tkputc(\'\\t\', &str);\n+\tif (c->mtid < 0) kputsn("*\\t", 2, &str);\n+\telse if (c->mtid == c->tid) kputsn("=\\t", 2, &str);\n+\telse { kputs(header->target_name[c->mtid], &str); kputc(\'\\t\', &str); }\n+\tkputw(c->mpos + 1, &str); kputc(\'\\t\', &str); kputw(c->isize, &str); kputc(\'\\t\', &str);\n+\tif (c->l_qseq) {\n+\t\tfor (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);\n+\t\tkputc(\'\\t\', &str);\n+\t\tif (t[0] == 0xff) kputc(\'*\', &str);\n+\t\telse for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);\n+\t} else kputsn("*\\t*", 3, &str);\n+\ts = bam1_aux(b);\n+\twhile (s < b->data + b->data_len) {\n+\t\tuint8_t type, key[2];\n+\t\tkey[0] = s[0]; key[1] = s[1];\n+\t\ts += 2; type = *s; ++s;\n+\t\tkputc(\'\\t\', &str); kputsn((char*)key, 2, &str); kputc(\':\', &str);\n+\t\tif (type == \'A\') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; }\n+\t\telse if (type == \'C\') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; }\n+\t\telse if (type == \'c\') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; }\n+\t\telse if (type == \'S\') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; }\n+\t\telse if (type == \'s\') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; }\n+\t\telse if (type == \'I\') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; }\n+\t\telse if (type == \'i\') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; }\n+\t\telse if (type == \'f\') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }\n+\t\telse if (type == \'d\') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }\n+\t\telse if (type == \'Z\' || type == \'H\') { kputc(type, &str); kputc(\':\', &str); while (*s) kputc(*s++, &str); ++s; }\n+\t}\n+\treturn str.s;\n+}\n+\n+char *bam_format1(const bam_header_t *header, const bam1_t *b)\n+{\n+\treturn bam_format1_core(header, b, BAM_OFDEC);\n+}\n+\n+void bam_view1(const bam_header_t *header, const bam1_t *b)\n+{\n+\tchar *s = bam_format1(header, b);\n+\tputs(s);\n+\tfree(s);\n+}\n+\n+// FIXME: we should also check the LB tag associated with each alignment\n+const char *bam_get_library(bam_header_t *h, const bam1_t *b)\n+{\n+\tconst uint8_t *rg;\n+\tif (h->dict == 0) h->dict = sam_header_parse2(h->text);\n+\tif (h->rg2lib == 0) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB");\n+\trg = bam_aux_get(b, "RG");\n+\treturn (rg == 0)? 0 : sam_tbl_get(h->rg2lib, (const char*)(rg + 1));\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,724 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+#ifndef BAM_BAM_H\n+#define BAM_BAM_H\n+\n+/*!\n+ @header\n+\n+ BAM library provides I/O and various operations on manipulating files\n+ in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map)\n+ format. It now supports importing from or exporting to TAM, sorting,\n+ merging, generating pileup, and quickly retrieval of reads overlapped\n+ with a specified region.\n+\n+ @copyright Genome Research Ltd.\n+ */\n+\n+#include <stdint.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <stdio.h>\n+\n+#ifndef BAM_LITE\n+#define BAM_VIRTUAL_OFFSET16\n+#include "bgzf.h"\n+/*! @abstract BAM file handler */\n+typedef BGZF *bamFile;\n+#define bam_open(fn, mode) bgzf_open(fn, mode)\n+#define bam_dopen(fd, mode) bgzf_fdopen(fd, mode)\n+#define bam_close(fp) bgzf_close(fp)\n+#define bam_read(fp, buf, size) bgzf_read(fp, buf, size)\n+#define bam_write(fp, buf, size) bgzf_write(fp, buf, size)\n+#define bam_tell(fp) bgzf_tell(fp)\n+#define bam_seek(fp, pos, dir) bgzf_seek(fp, pos, dir)\n+#else\n+#define BAM_TRUE_OFFSET\n+#include <zlib.h>\n+typedef gzFile bamFile;\n+#define bam_open(fn, mode) gzopen(fn, mode)\n+#define bam_dopen(fd, mode) gzdopen(fd, mode)\n+#define bam_close(fp) gzclose(fp)\n+#define bam_read(fp, buf, size) gzread(fp, buf, size)\n+/* no bam_write/bam_tell/bam_seek() here */\n+#endif\n+\n+/*! @typedef\n+ @abstract Structure for the alignment header.\n+ @field n_targets number of reference sequences\n+ @field target_name names of the reference sequences\n+ @field target_len lengths of the referene sequences\n+ @field dict header dictionary\n+ @field hash hash table for fast name lookup\n+ @field rg2lib hash table for @RG-ID -> LB lookup\n+ @field l_text length of the plain text in the header\n+ @field text plain text\n+\n+ @discussion Field hash points to null by default. It is a private\n+ member.\n+ */\n+typedef struct {\n+\tint32_t n_targets;\n+\tchar **target_name;\n+\tuint32_t *target_len;\n+\tvoid *dict, *hash, *rg2lib;\n+\tsize_t l_text, n_text;\n+\tchar *text;\n+} bam_header_t;\n+\n+/*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */\n+#define BAM_FPAIRED 1\n+/*! @abstract the read is mapped in a proper pair */\n+#define BAM_FPROPER_PAIR 2\n+/*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */\n+#define BAM_FUNMAP 4\n+/*! @abstract the mate is unmapped */\n+#define BAM_FMUNMAP 8\n+/*! @abstract the read is mapped to the reverse strand */\n+#define BAM_FREVERSE 16\n+/*! @abstract the mate is mapped to the reverse strand */\n+#define BAM_FMREVERSE 32\n+/*! @abstract this is read1 */\n+#define BAM_FREAD1 64\n+/*! @abstract this is read2 */\n+#define BAM_FREAD2 128\n+/*! @abstract not primary alignment */\n+#define BA'..b'g, int end);\n+\tint bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b);\n+\tvoid bam_iter_destroy(bam_iter_t iter);\n+\n+\t/*!\n+\t @abstract Parse a region in the format: "chr2:100,000-200,000".\n+\t @discussion bam_header_t::hash will be initialized if empty.\n+\t @param header pointer to the header structure\n+\t @param str string to be parsed\n+\t @param ref_id the returned chromosome ID\n+\t @param begin the returned start coordinate\n+\t @param end the returned end coordinate\n+\t @return 0 on success; -1 on failure\n+\t */\n+\tint bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end);\n+\n+\n+\t/**************************\n+\t * APIs for optional tags *\n+\t **************************/\n+\n+\t/*!\n+\t @abstract Retrieve data of a tag\n+\t @param b pointer to an alignment struct\n+\t @param tag two-character tag to be retrieved\n+\n+\t @return pointer to the type and data. The first character is the\n+\t type that can be \'iIsScCdfAZH\'.\n+\n+\t @discussion Use bam_aux2?() series to convert the returned data to\n+\t the corresponding type.\n+\t*/\n+\tuint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);\n+\n+\tint32_t bam_aux2i(const uint8_t *s);\n+\tfloat bam_aux2f(const uint8_t *s);\n+\tdouble bam_aux2d(const uint8_t *s);\n+\tchar bam_aux2A(const uint8_t *s);\n+\tchar *bam_aux2Z(const uint8_t *s);\n+\n+\tint bam_aux_del(bam1_t *b, uint8_t *s);\n+\tvoid bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);\n+\tuint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()\n+\n+\n+\t/*****************\n+\t * Miscellaneous *\n+\t *****************/\n+\n+\t/*! \n+\t @abstract Calculate the rightmost coordinate of an alignment on the\n+\t reference genome.\n+\n+\t @param c pointer to the bam1_core_t structure\n+\t @param cigar the corresponding CIGAR array (from bam1_t::cigar)\n+\t @return the rightmost coordinate, 0-based\n+\t*/\n+\tuint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar);\n+\n+\t/*!\n+\t @abstract Calculate the length of the query sequence from CIGAR.\n+\t @param c pointer to the bam1_core_t structure\n+\t @param cigar the corresponding CIGAR array (from bam1_t::cigar)\n+\t @return length of the query sequence\n+\t*/\n+\tint32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar);\n+\n+#ifdef __cplusplus\n+}\n+#endif\n+\n+/*!\n+ @abstract Calculate the minimum bin that contains a region [beg,end).\n+ @param beg start of the region, 0-based\n+ @param end end of the region, 0-based\n+ @return bin\n+ */\n+static inline int bam_reg2bin(uint32_t beg, uint32_t end)\n+{\n+\t--end;\n+\tif (beg>>14 == end>>14) return 4681 + (beg>>14);\n+\tif (beg>>17 == end>>17) return 585 + (beg>>17);\n+\tif (beg>>20 == end>>20) return 73 + (beg>>20);\n+\tif (beg>>23 == end>>23) return 9 + (beg>>23);\n+\tif (beg>>26 == end>>26) return 1 + (beg>>26);\n+\treturn 0;\n+}\n+\n+/*!\n+ @abstract Copy an alignment\n+ @param bdst destination alignment struct\n+ @param bsrc source alignment struct\n+ @return pointer to the destination alignment struct\n+ */\n+static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc)\n+{\n+\tuint8_t *data = bdst->data;\n+\tint m_data = bdst->m_data; // backup data and m_data\n+\tif (m_data < bsrc->m_data) { // double the capacity\n+\t\tm_data = bsrc->m_data; kroundup32(m_data);\n+\t\tdata = (uint8_t*)realloc(data, m_data);\n+\t}\n+\tmemcpy(data, bsrc->data, bsrc->data_len); // copy var-len data\n+\t*bdst = *bsrc; // copy the rest\n+\t// restore the backup\n+\tbdst->m_data = m_data;\n+\tbdst->data = data;\n+\treturn bdst;\n+}\n+\n+/*!\n+ @abstract Duplicate an alignment\n+ @param src source alignment struct\n+ @return pointer to the destination alignment struct\n+ */\n+static inline bam1_t *bam_dup1(const bam1_t *src)\n+{\n+\tbam1_t *b;\n+\tb = bam_init1();\n+\t*b = *src;\n+\tb->m_data = b->data_len;\n+\tb->data = (uint8_t*)calloc(b->data_len, 1);\n+\tmemcpy(b->data, src->data, b->data_len);\n+\treturn b;\n+}\n+\n+#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_aux.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_aux.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,182 @@ +#include <ctype.h> +#include "bam.h" +#include "khash.h" +typedef char *str_p; +KHASH_MAP_INIT_STR(s, int) +KHASH_MAP_INIT_STR(r2l, str_p) + +void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data) +{ + int ori_len = b->data_len; + b->data_len += 3 + len; + b->l_aux += 3 + len; + if (b->m_data < b->data_len) { + b->m_data = b->data_len; + kroundup32(b->m_data); + b->data = (uint8_t*)realloc(b->data, b->m_data); + } + b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1]; + b->data[ori_len + 2] = type; + memcpy(b->data + ori_len + 3, data, len); +} + +uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) +{ + return bam_aux_get(b, tag); +} + +#define __skip_tag(s) do { \ + int type = toupper(*(s)); \ + ++(s); \ + if (type == 'C' || type == 'A') ++(s); \ + else if (type == 'S') (s) += 2; \ + else if (type == 'I' || type == 'F') (s) += 4; \ + else if (type == 'D') (s) += 8; \ + else if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ + } while (0) + +uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) +{ + uint8_t *s; + int y = tag[0]<<8 | tag[1]; + s = bam1_aux(b); + while (s < b->data + b->data_len) { + int x = (int)s[0]<<8 | s[1]; + s += 2; + if (x == y) return s; + __skip_tag(s); + } + return 0; +} +// s MUST BE returned by bam_aux_get() +int bam_aux_del(bam1_t *b, uint8_t *s) +{ + uint8_t *p, *aux; + aux = bam1_aux(b); + p = s - 2; + __skip_tag(s); + memmove(p, s, b->l_aux - (s - aux)); + b->data_len -= s - p; + b->l_aux -= s - p; + return 0; +} + +void bam_init_header_hash(bam_header_t *header) +{ + if (header->hash == 0) { + int ret, i; + khiter_t iter; + khash_t(s) *h; + header->hash = h = kh_init(s); + for (i = 0; i < header->n_targets; ++i) { + iter = kh_put(s, h, header->target_name[i], &ret); + kh_value(h, iter) = i; + } + } +} + +void bam_destroy_header_hash(bam_header_t *header) +{ + if (header->hash) + kh_destroy(s, (khash_t(s)*)header->hash); +} + +int32_t bam_get_tid(const bam_header_t *header, const char *seq_name) +{ + khint_t k; + khash_t(s) *h = (khash_t(s)*)header->hash; + k = kh_get(s, h, seq_name); + return k == kh_end(h)? -1 : kh_value(h, k); +} + +int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end) +{ + char *s, *p; + int i, l, k; + khiter_t iter; + khash_t(s) *h; + + bam_init_header_hash(header); + h = (khash_t(s)*)header->hash; + + l = strlen(str); + p = s = (char*)malloc(l+1); + /* squeeze out "," */ + for (i = k = 0; i != l; ++i) + if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i]; + s[k] = 0; + for (i = 0; i != k; ++i) if (s[i] == ':') break; + s[i] = 0; + iter = kh_get(s, h, s); /* get the ref_id */ + if (iter == kh_end(h)) { // name not found + *ref_id = -1; free(s); + return -1; + } + *ref_id = kh_value(h, iter); + if (i == k) { /* dump the whole sequence */ + *begin = 0; *end = 1<<29; free(s); + return 0; + } + for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; + *begin = atoi(p); + if (i < k) { + p = s + i + 1; + *end = atoi(p); + } else *end = 1<<29; + if (*begin > 0) --*begin; + free(s); + if (*begin > *end) { + fprintf(stderr, "[bam_parse_region] invalid region.\n"); + return -1; + } + return 0; +} + +int32_t bam_aux2i(const uint8_t *s) +{ + int type; + if (s == 0) return 0; + type = *s++; + if (type == 'c') return (int32_t)*(int8_t*)s; + else if (type == 'C') return (int32_t)*(uint8_t*)s; + else if (type == 's') return (int32_t)*(int16_t*)s; + else if (type == 'S') return (int32_t)*(uint16_t*)s; + else if (type == 'i' || type == 'I') return *(int32_t*)s; + else return 0; +} + +float bam_aux2f(const uint8_t *s) +{ + int type; + type = *s++; + if (s == 0) return 0.0; + if (type == 'f') return *(float*)s; + else return 0.0; +} + +double bam_aux2d(const uint8_t *s) +{ + int type; + type = *s++; + if (s == 0) return 0.0; + if (type == 'd') return *(double*)s; + else return 0.0; +} + +char bam_aux2A(const uint8_t *s) +{ + int type; + type = *s++; + if (s == 0) return 0; + if (type == 'A') return *(char*)s; + else return 0; +} + +char *bam_aux2Z(const uint8_t *s) +{ + int type; + type = *s++; + if (s == 0) return 0; + if (type == 'Z' || type == 'H') return (char*)s; + else return 0; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_color.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_color.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,127 @@ +#include <ctype.h> +#include "bam.h" + +/*! + @abstract Get the color encoding the previous and current base + @param b pointer to an alignment + @param i The i-th position, 0-based + @return color + + @discussion Returns 0 no color information is found. + */ +char bam_aux_getCSi(bam1_t *b, int i) +{ + uint8_t *c = bam_aux_get(b, "CS"); + char *cs = NULL; + + // return the base if the tag was not found + if(0 == c) return 0; + + cs = bam_aux2Z(c); + // adjust for strandedness and leading adaptor + if(bam1_strand(b)) i = strlen(cs) - 1 - i; + else i++; + return cs[i]; +} + +/*! + @abstract Get the color quality of the color encoding the previous and current base + @param b pointer to an alignment + @param i The i-th position, 0-based + @return color quality + + @discussion Returns 0 no color information is found. + */ +char bam_aux_getCQi(bam1_t *b, int i) +{ + uint8_t *c = bam_aux_get(b, "CQ"); + char *cq = NULL; + + // return the base if the tag was not found + if(0 == c) return 0; + + cq = bam_aux2Z(c); + // adjust for strandedness + if(bam1_strand(b)) i = strlen(cq) - 1 - i; + return cq[i]; +} + +char bam_aux_nt2int(char a) +{ + switch(toupper(a)) { + case 'A': + return 0; + break; + case 'C': + return 1; + break; + case 'G': + return 2; + break; + case 'T': + return 3; + break; + default: + return 4; + break; + } +} + +char bam_aux_ntnt2cs(char a, char b) +{ + a = bam_aux_nt2int(a); + b = bam_aux_nt2int(b); + if(4 == a || 4 == b) return '4'; + return "0123"[(int)(a ^ b)]; +} + +/*! + @abstract Get the color error profile at the give position + @param b pointer to an alignment + @return the original color if the color was an error, '-' (dash) otherwise + + @discussion Returns 0 no color information is found. + */ +char bam_aux_getCEi(bam1_t *b, int i) +{ + int cs_i; + uint8_t *c = bam_aux_get(b, "CS"); + char *cs = NULL; + char prev_b, cur_b; + char cur_color, cor_color; + + // return the base if the tag was not found + if(0 == c) return 0; + + cs = bam_aux2Z(c); + + // adjust for strandedness and leading adaptor + if(bam1_strand(b)) { //reverse strand + cs_i = strlen(cs) - 1 - i; + // get current color + cur_color = cs[cs_i]; + // get previous base. Note: must rc adaptor + prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)]; + // get current base + cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; + } + else { + cs_i=i+1; + // get current color + cur_color = cs[cs_i]; + // get previous base + prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)]; + // get current base + cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; + } + + // corrected color + cor_color = bam_aux_ntnt2cs(prev_b, cur_b); + + if(cur_color == cor_color) { + return '-'; + } + else { + return cur_color; + } +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_endian.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_endian.h Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,42 @@ +#ifndef BAM_ENDIAN_H +#define BAM_ENDIAN_H + +#include <stdint.h> + +static inline int bam_is_big_endian() +{ + long one= 1; + return !(*((char *)(&one))); +} +static inline uint16_t bam_swap_endian_2(uint16_t v) +{ + return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); +} +static inline void *bam_swap_endian_2p(void *x) +{ + *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); + return x; +} +static inline uint32_t bam_swap_endian_4(uint32_t v) +{ + v = ((v & 0x0000FFFFU) << 16) | (v >> 16); + return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); +} +static inline void *bam_swap_endian_4p(void *x) +{ + *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); + return x; +} +static inline uint64_t bam_swap_endian_8(uint64_t v) +{ + v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); + v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); + return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); +} +static inline void *bam_swap_endian_8p(void *x) +{ + *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); + return x; +} + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_import.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_import.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,459 @@\n+#include <zlib.h>\n+#include <stdio.h>\n+#include <ctype.h>\n+#include <string.h>\n+#include <stdlib.h>\n+#include <unistd.h>\n+#include <assert.h>\n+#ifdef _WIN32\n+#include <fcntl.h>\n+#endif\n+#include "kstring.h"\n+#include "bam.h"\n+#include "sam_header.h"\n+#include "kseq.h"\n+#include "khash.h"\n+\n+KSTREAM_INIT(gzFile, gzread, 8192)\n+KHASH_MAP_INIT_STR(ref, uint64_t)\n+\n+void bam_init_header_hash(bam_header_t *header);\n+void bam_destroy_header_hash(bam_header_t *header);\n+int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);\n+\n+unsigned char bam_nt16_table[256] = {\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,\n+\t15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,\n+\t15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,\n+\t15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,\n+\t15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n+\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15\n+};\n+\n+unsigned short bam_char2flag_table[256] = {\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,BAM_FREAD1,BAM_FREAD2,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\tBAM_FPROPER_PAIR,0,BAM_FMREVERSE,0, 0,BAM_FMUNMAP,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, BAM_FDUP,0,BAM_FQCFAIL,0, 0,0,0,0, 0,0,0,0,\n+\tBAM_FPAIRED,0,BAM_FREVERSE,BAM_FSECONDARY, 0,BAM_FUNMAP,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n+\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0\n+};\n+\n+char *bam_nt16_rev_table = "=ACMGRSVTWYHKDBN";\n+\n+struct __tamFile_t {\n+\tgzFile fp;\n+\tkstream_t *ks;\n+\tkstring_t *str;\n+\tuint64_t n_lines;\n+\tint is_first;\n+};\n+\n+char **__bam_get_lines(const char *fn, int *_n) // for bam_plcmd.c only\n+{\n+\tchar **list = 0, *s;\n+\tint n = 0, dret, m = 0;\n+\tgzFile fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r");\n+\tkstream_t *ks;\n+\tkstring_t *str;\n+\tstr = (kstring_t*)calloc(1, sizeof(kstring_t));\n+\tks = ks_init(fp);\n+\twhile (ks_getuntil(ks, \'\\n\', str, &dret) > 0) {\n+\t\tif (n == m) {\n+\t\t\tm = m? m << 1 : 16;\n+\t\t\tlist = (char**)realloc(list, m * sizeof(char*));\n+\t\t}\n+\t\tif (str->s[str->l-1] == \'\\r\')\n+\t\t\tstr->s[--str->l] = \'\\0\';\n+\t\ts = list[n++] = (char*)calloc(str->l + 1, 1);\n+\t\tstrcpy(s, str->s);\n+\t}\n+\tks_destroy(ks);\n+\tgzclose(fp);\n+\tfree(str->s); free(str);\n+\t*_n = n;\n+\treturn list;\n+}\n+\n+static bam_header_t *hash2header(const kh_ref_t *hash)\n+{\n+\tbam_header_t *header;\n+\tkhiter_t k;\n+\theader = bam_header_init();\n+\theader->n_targets = kh_size(hash);\n+\theader->target_name = (char**)calloc(kh_size(hash), sizeof(char*));\n+\theader->target_len = (uint32_t*)calloc(kh_size(hash), 4);\n+\tfor (k = kh_begin(hash); k != kh_end(hash); ++k) {\n+\t\tif (kh_exist(hash, k)) {\n+\t\t\tint i = (int)kh_value(hash, k);\n+\t\t\theader->target_name[i] = (char*)kh_key(hash, k);\n+\t\t\theader->target_len[i] = kh_value(hash, k)>>32;\n+\t\t}\n+\t}\n+\tbam_init_header_hash(header);\n+\treturn header;\n+}\n+bam_header_t *sam_header_read2(const char *fn)\n+{\n+\tbam_header_t *header;\n+\tint c, dret, ret, error = 0;\n+\tgzFile fp;\n+\tkstream_t *ks;\n+\tkstring_t *str;\n+\tkh_ref_t *hash;\n+\tkhiter_t k;\n+\tif (fn == 0) return 0;\n+\tfp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r");\n+\tif (fp == 0) return 0;\n+\thash ='..b' sequence length are inconsistent");\n+\t\t\tp = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff;\n+\t\t\tmemset(p, 0, (c->l_qseq+1)/2);\n+\t\t\tfor (i = 0; i < c->l_qseq; ++i)\n+\t\t\t\tp[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2);\n+\t\t} else c->l_qseq = 0;\n+\t\tif (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual\n+\t\tz += str->l + 1;\n+\t\tif (strcmp(str->s, "*") && c->l_qseq != strlen(str->s))\n+\t\t\tparse_error(fp->n_lines, "sequence and quality are inconsistent");\n+\t\tp += (c->l_qseq+1)/2;\n+\t\tif (strcmp(str->s, "*") == 0) for (i = 0; i < c->l_qseq; ++i) p[i] = 0xff;\n+\t\telse for (i = 0; i < c->l_qseq; ++i) p[i] = str->s[i] - 33;\n+\t\tdoff += c->l_qseq + (c->l_qseq+1)/2;\n+\t}\n+\tdoff0 = doff;\n+\tif (dret != \'\\n\' && dret != \'\\r\') { // aux\n+\t\twhile (ks_getuntil(ks, KS_SEP_TAB, str, &dret) >= 0) {\n+\t\t\tuint8_t *s, type, key[2];\n+\t\t\tz += str->l + 1;\n+\t\t\tif (str->l < 6 || str->s[2] != \':\' || str->s[4] != \':\')\n+\t\t\t\tparse_error(fp->n_lines, "missing colon in auxiliary data");\n+\t\t\tkey[0] = str->s[0]; key[1] = str->s[1];\n+\t\t\ttype = str->s[3];\n+\t\t\ts = alloc_data(b, doff + 3) + doff;\n+\t\t\ts[0] = key[0]; s[1] = key[1]; s += 2; doff += 2;\n+\t\t\tif (type == \'A\' || type == \'a\' || type == \'c\' || type == \'C\') { // c and C for backward compatibility\n+\t\t\t\ts = alloc_data(b, doff + 2) + doff;\n+\t\t\t\t*s++ = \'A\'; *s = str->s[5];\n+\t\t\t\tdoff += 2;\n+\t\t\t} else if (type == \'I\' || type == \'i\') {\n+\t\t\t\tlong long x;\n+\t\t\t\ts = alloc_data(b, doff + 5) + doff;\n+\t\t\t\tx = (long long)atoll(str->s + 5);\n+\t\t\t\tif (x < 0) {\n+\t\t\t\t\tif (x >= -127) {\n+\t\t\t\t\t\t*s++ = \'c\'; *(int8_t*)s = (int8_t)x;\n+\t\t\t\t\t\ts += 1; doff += 2;\n+\t\t\t\t\t} else if (x >= -32767) {\n+\t\t\t\t\t\t*s++ = \'s\'; *(int16_t*)s = (int16_t)x;\n+\t\t\t\t\t\ts += 2; doff += 3;\n+\t\t\t\t\t} else {\n+\t\t\t\t\t\t*s++ = \'i\'; *(int32_t*)s = (int32_t)x;\n+\t\t\t\t\t\ts += 4; doff += 5;\n+\t\t\t\t\t\tif (x < -2147483648ll)\n+\t\t\t\t\t\t\tfprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.",\n+\t\t\t\t\t\t\t\t\t(long long)fp->n_lines, x);\n+\t\t\t\t\t}\n+\t\t\t\t} else {\n+\t\t\t\t\tif (x <= 255) {\n+\t\t\t\t\t\t*s++ = \'C\'; *s++ = (uint8_t)x;\n+\t\t\t\t\t\tdoff += 2;\n+\t\t\t\t\t} else if (x <= 65535) {\n+\t\t\t\t\t\t*s++ = \'S\'; *(uint16_t*)s = (uint16_t)x;\n+\t\t\t\t\t\ts += 2; doff += 3;\n+\t\t\t\t\t} else {\n+\t\t\t\t\t\t*s++ = \'I\'; *(uint32_t*)s = (uint32_t)x;\n+\t\t\t\t\t\ts += 4; doff += 5;\n+\t\t\t\t\t\tif (x > 4294967295ll)\n+\t\t\t\t\t\t\tfprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.",\n+\t\t\t\t\t\t\t\t\t(long long)fp->n_lines, x);\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t} else if (type == \'f\') {\n+\t\t\t\ts = alloc_data(b, doff + 5) + doff;\n+\t\t\t\t*s++ = \'f\';\n+\t\t\t\t*(float*)s = (float)atof(str->s + 5);\n+\t\t\t\ts += 4; doff += 5;\n+\t\t\t} else if (type == \'d\') {\n+\t\t\t\ts = alloc_data(b, doff + 9) + doff;\n+\t\t\t\t*s++ = \'d\';\n+\t\t\t\t*(float*)s = (float)atof(str->s + 9);\n+\t\t\t\ts += 8; doff += 9;\n+\t\t\t} else if (type == \'Z\' || type == \'H\') {\n+\t\t\t\tint size = 1 + (str->l - 5) + 1;\n+\t\t\t\tif (type == \'H\') { // check whether the hex string is valid\n+\t\t\t\t\tint i;\n+\t\t\t\t\tif ((str->l - 5) % 2 == 1) parse_error(fp->n_lines, "length of the hex string not even");\n+\t\t\t\t\tfor (i = 0; i < str->l - 5; ++i) {\n+\t\t\t\t\t\tint c = toupper(str->s[5 + i]);\n+\t\t\t\t\t\tif (!((c >= \'0\' && c <= \'9\') || (c >= \'A\' && c <= \'F\')))\n+\t\t\t\t\t\t\tparse_error(fp->n_lines, "invalid hex character");\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t\ts = alloc_data(b, doff + size) + doff;\n+\t\t\t\t*s++ = type;\n+\t\t\t\tmemcpy(s, str->s + 5, str->l - 5);\n+\t\t\t\ts[str->l - 5] = 0;\n+\t\t\t\tdoff += size;\n+\t\t\t} else parse_error(fp->n_lines, "unrecognized type");\n+\t\t\tif (dret == \'\\n\' || dret == \'\\r\') break;\n+\t\t}\n+\t}\n+\tb->l_aux = doff - doff0;\n+\tb->data_len = doff;\n+\treturn z;\n+}\n+\n+tamFile sam_open(const char *fn)\n+{\n+\ttamFile fp;\n+\tgzFile gzfp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "rb") : gzopen(fn, "rb");\n+\tif (gzfp == 0) return 0;\n+\tfp = (tamFile)calloc(1, sizeof(struct __tamFile_t));\n+\tfp->str = (kstring_t*)calloc(1, sizeof(kstring_t));\n+\tfp->fp = gzfp;\n+\tfp->ks = ks_init(fp->fp);\n+\treturn fp;\n+}\n+\n+void sam_close(tamFile fp)\n+{\n+\tif (fp) {\n+\t\tks_destroy(fp->ks);\n+\t\tgzclose(fp->fp);\n+\t\tfree(fp->str->s); free(fp->str);\n+\t\tfree(fp);\n+\t}\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_index.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_index.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,696 @@\n+#include <ctype.h>\n+#include <assert.h>\n+#include "bam.h"\n+#include "khash.h"\n+#include "ksort.h"\n+#include "bam_endian.h"\n+#ifdef _USE_KNETFILE\n+#include "knetfile.h"\n+#endif\n+\n+/*!\n+ @header\n+\n+ Alignment indexing. Before indexing, BAM must be sorted based on the\n+ leftmost coordinate of alignments. In indexing, BAM uses two indices:\n+ a UCSC binning index and a simple linear index. The binning index is\n+ efficient for alignments spanning long distance, while the auxiliary\n+ linear index helps to reduce unnecessary seek calls especially for\n+ short alignments.\n+\n+ The UCSC binning scheme was suggested by Richard Durbin and Lincoln\n+ Stein and is explained by Kent et al. (2002). In this scheme, each bin\n+ represents a contiguous genomic region which can be fully contained in\n+ another bin; each alignment is associated with a bin which represents\n+ the smallest region containing the entire alignment. The binning\n+ scheme is essentially another representation of R-tree. A distinct bin\n+ uniquely corresponds to a distinct internal node in a R-tree. Bin A is\n+ a child of Bin B if region A is contained in B.\n+\n+ In BAM, each bin may span 2^29, 2^26, 2^23, 2^20, 2^17 or 2^14 bp. Bin\n+ 0 spans a 512Mbp region, bins 1-8 span 64Mbp, 9-72 8Mbp, 73-584 1Mbp,\n+ 585-4680 128Kbp and bins 4681-37449 span 16Kbp regions. If we want to\n+ find the alignments overlapped with a region [rbeg,rend), we need to\n+ calculate the list of bins that may be overlapped the region and test\n+ the alignments in the bins to confirm the overlaps. If the specified\n+ region is short, typically only a few alignments in six bins need to\n+ be retrieved. The overlapping alignments can be quickly fetched.\n+\n+ */\n+\n+#define BAM_MIN_CHUNK_GAP 32768\n+// 1<<14 is the size of minimum bin.\n+#define BAM_LIDX_SHIFT 14\n+\n+#define BAM_MAX_BIN 37450 // =(8^6-1)/7+1\n+\n+typedef struct {\n+\tuint64_t u, v;\n+} pair64_t;\n+\n+#define pair64_lt(a,b) ((a).u < (b).u)\n+KSORT_INIT(off, pair64_t, pair64_lt)\n+\n+typedef struct {\n+\tuint32_t m, n;\n+\tpair64_t *list;\n+} bam_binlist_t;\n+\n+typedef struct {\n+\tint32_t n, m;\n+\tuint64_t *offset;\n+} bam_lidx_t;\n+\n+KHASH_MAP_INIT_INT(i, bam_binlist_t)\n+\n+struct __bam_index_t {\n+\tint32_t n;\n+\tuint64_t n_no_coor; // unmapped reads without coordinate\n+\tkhash_t(i) **index;\n+\tbam_lidx_t *index2;\n+};\n+\n+// requirement: len <= LEN_MASK\n+static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end)\n+{\n+\tkhint_t k;\n+\tbam_binlist_t *l;\n+\tint ret;\n+\tk = kh_put(i, h, bin, &ret);\n+\tl = &kh_value(h, k);\n+\tif (ret) { // not present\n+\t\tl->m = 1; l->n = 0;\n+\t\tl->list = (pair64_t*)calloc(l->m, 16);\n+\t}\n+\tif (l->n == l->m) {\n+\t\tl->m <<= 1;\n+\t\tl->list = (pair64_t*)realloc(l->list, l->m * 16);\n+\t}\n+\tl->list[l->n].u = beg; l->list[l->n++].v = end;\n+}\n+\n+static inline void insert_offset2(bam_lidx_t *index2, bam1_t *b, uint64_t offset)\n+{\n+\tint i, beg, end;\n+\tbeg = b->core.pos >> BAM_LIDX_SHIFT;\n+\tend = (bam_calend(&b->core, bam1_cigar(b)) - 1) >> BAM_LIDX_SHIFT;\n+\tif (index2->m < end + 1) {\n+\t\tint old_m = index2->m;\n+\t\tindex2->m = end + 1;\n+\t\tkroundup32(index2->m);\n+\t\tindex2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);\n+\t\tmemset(index2->offset + old_m, 0, 8 * (index2->m - old_m));\n+\t}\n+\tif (beg == end) {\n+\t\tif (index2->offset[beg] == 0) index2->offset[beg] = offset;\n+\t} else {\n+\t\tfor (i = beg; i <= end; ++i)\n+\t\t\tif (index2->offset[i] == 0) index2->offset[i] = offset;\n+\t}\n+\tindex2->n = end + 1;\n+}\n+\n+static void merge_chunks(bam_index_t *idx)\n+{\n+#if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)\n+\tkhash_t(i) *index;\n+\tint i, l, m;\n+\tkhint_t k;\n+\tfor (i = 0; i < idx->n; ++i) {\n+\t\tindex = idx->index[i];\n+\t\tfor (k = kh_begin(index); k != kh_end(index); ++k) {\n+\t\t\tbam_binlist_t *p;\n+\t\t\tif (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue;\n+\t\t\tp = &kh_value(index, k);\n+\t\t\tm = 0;\n+\t\t\tfor (l = 1; l < p->n; ++l) {\n+#ifdef BAM_TRUE_OFFSET\n+\t\t\t\tif (p->list[m].v + BAM_MIN_CHUNK_GA'..b'\tbam_iter_t iter = 0;\n+\n+\tif (beg < 0) beg = 0;\n+\tif (end < beg) return 0;\n+\t// initialize iter\n+\titer = calloc(1, sizeof(struct __bam_iter_t));\n+\titer->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1;\n+\t//\n+\tbins = (uint16_t*)calloc(BAM_MAX_BIN, 2);\n+\tn_bins = reg2bins(beg, end, bins);\n+\tindex = idx->index[tid];\n+\tif (idx->index2[tid].n > 0) {\n+\t\tmin_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1]\n+\t\t\t: idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT];\n+\t\tif (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4\n+\t\t\tint n = beg>>BAM_LIDX_SHIFT;\n+\t\t\tif (n > idx->index2[tid].n) n = idx->index2[tid].n;\n+\t\t\tfor (i = n - 1; i >= 0; --i)\n+\t\t\t\tif (idx->index2[tid].offset[i] != 0) break;\n+\t\t\tif (i >= 0) min_off = idx->index2[tid].offset[i];\n+\t\t}\n+\t} else min_off = 0; // tabix 0.1.2 may produce such index files\n+\tfor (i = n_off = 0; i < n_bins; ++i) {\n+\t\tif ((k = kh_get(i, index, bins[i])) != kh_end(index))\n+\t\t\tn_off += kh_value(index, k).n;\n+\t}\n+\tif (n_off == 0) {\n+\t\tfree(bins); return iter;\n+\t}\n+\toff = (pair64_t*)calloc(n_off, 16);\n+\tfor (i = n_off = 0; i < n_bins; ++i) {\n+\t\tif ((k = kh_get(i, index, bins[i])) != kh_end(index)) {\n+\t\t\tint j;\n+\t\t\tbam_binlist_t *p = &kh_value(index, k);\n+\t\t\tfor (j = 0; j < p->n; ++j)\n+\t\t\t\tif (p->list[j].v > min_off) off[n_off++] = p->list[j];\n+\t\t}\n+\t}\n+\tfree(bins);\n+\t{\n+\t\tbam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));\n+\t\tint l;\n+\t\tks_introsort(off, n_off, off);\n+\t\t// resolve completely contained adjacent blocks\n+\t\tfor (i = 1, l = 0; i < n_off; ++i)\n+\t\t\tif (off[l].v < off[i].v)\n+\t\t\t\toff[++l] = off[i];\n+\t\tn_off = l + 1;\n+\t\t// resolve overlaps between adjacent blocks; this may happen due to the merge in indexing\n+\t\tfor (i = 1; i < n_off; ++i)\n+\t\t\tif (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;\n+\t\t{ // merge adjacent blocks\n+#if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)\n+\t\t\tfor (i = 1, l = 0; i < n_off; ++i) {\n+#ifdef BAM_TRUE_OFFSET\n+\t\t\t\tif (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v;\n+#else\n+\t\t\t\tif (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;\n+#endif\n+\t\t\t\telse off[++l] = off[i];\n+\t\t\t}\n+\t\t\tn_off = l + 1;\n+#endif\n+\t\t}\n+\t\tbam_destroy1(b);\n+\t}\n+\titer->n_off = n_off; iter->off = off;\n+\treturn iter;\n+}\n+\n+pair64_t *get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int *cnt_off)\n+{ // for pysam compatibility\n+\tbam_iter_t iter;\n+\tpair64_t *off;\n+\titer = bam_iter_query(idx, tid, beg, end);\n+\toff = iter->off; *cnt_off = iter->n_off;\n+\tfree(iter);\n+\treturn off;\n+}\n+\n+void bam_iter_destroy(bam_iter_t iter)\n+{\n+\tif (iter) { free(iter->off); free(iter); }\n+}\n+\n+int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)\n+{\n+\tif (iter->finished) return -1;\n+\tif (iter->from_first) {\n+\t\tint ret = bam_read1(fp, b);\n+\t\tif (ret < 0) iter->finished = 1;\n+\t\treturn ret;\n+\t}\n+\tif (iter->off == 0) return -1;\n+\tfor (;;) {\n+\t\tint ret;\n+\t\tif (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk\n+\t\t\tif (iter->i == iter->n_off - 1) break; // no more chunks\n+\t\t\tif (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug\n+\t\t\tif (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek\n+\t\t\t\tbam_seek(fp, iter->off[iter->i+1].u, SEEK_SET);\n+\t\t\t\titer->curr_off = bam_tell(fp);\n+\t\t\t}\n+\t\t\t++iter->i;\n+\t\t}\n+\t\tif ((ret = bam_read1(fp, b)) > 0) {\n+\t\t\titer->curr_off = bam_tell(fp);\n+\t\t\tif (b->core.tid != iter->tid || b->core.pos >= iter->end) break; // no need to proceed\n+\t\t\telse if (is_overlap(iter->beg, iter->end, b)) return ret;\n+\t\t} else break; // end of file\n+\t}\n+\titer->finished = 1;\n+\treturn -1;\n+}\n+\n+int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)\n+{\n+\tbam_iter_t iter;\n+\tbam1_t *b;\n+\tb = bam_init1();\n+\titer = bam_iter_query(idx, tid, beg, end);\n+\twhile (bam_iter_read(fp, iter, b) >= 0) func(b, data);\n+\tbam_destroy1(b);\n+\treturn 0;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_lpileup.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_lpileup.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,198 @@ +#include <stdlib.h> +#include <stdio.h> +#include <assert.h> +#include "bam.h" +#include "ksort.h" + +#define TV_GAP 2 + +typedef struct __freenode_t { + uint32_t level:28, cnt:4; + struct __freenode_t *next; +} freenode_t, *freenode_p; + +#define freenode_lt(a,b) ((a)->cnt < (b)->cnt || ((a)->cnt == (b)->cnt && (a)->level < (b)->level)) +KSORT_INIT(node, freenode_p, freenode_lt) + +/* Memory pool, similar to the one in bam_pileup.c */ +typedef struct { + int cnt, n, max; + freenode_t **buf; +} mempool_t; + +static mempool_t *mp_init() +{ + return (mempool_t*)calloc(1, sizeof(mempool_t)); +} +static void mp_destroy(mempool_t *mp) +{ + int k; + for (k = 0; k < mp->n; ++k) free(mp->buf[k]); + free(mp->buf); free(mp); +} +static inline freenode_t *mp_alloc(mempool_t *mp) +{ + ++mp->cnt; + if (mp->n == 0) return (freenode_t*)calloc(1, sizeof(freenode_t)); + else return mp->buf[--mp->n]; +} +static inline void mp_free(mempool_t *mp, freenode_t *p) +{ + --mp->cnt; p->next = 0; p->cnt = TV_GAP; + if (mp->n == mp->max) { + mp->max = mp->max? mp->max<<1 : 256; + mp->buf = (freenode_t**)realloc(mp->buf, sizeof(freenode_t*) * mp->max); + } + mp->buf[mp->n++] = p; +} + +/* core part */ +struct __bam_lplbuf_t { + int max, n_cur, n_pre; + int max_level, *cur_level, *pre_level; + mempool_t *mp; + freenode_t **aux, *head, *tail; + int n_nodes, m_aux; + bam_pileup_f func; + void *user_data; + bam_plbuf_t *plbuf; +}; + +void bam_lplbuf_reset(bam_lplbuf_t *buf) +{ + freenode_t *p, *q; + bam_plbuf_reset(buf->plbuf); + for (p = buf->head; p->next;) { + q = p->next; + mp_free(buf->mp, p); + p = q; + } + buf->head = buf->tail; + buf->max_level = 0; + buf->n_cur = buf->n_pre = 0; + buf->n_nodes = 0; +} + +static int tview_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) +{ + bam_lplbuf_t *tv = (bam_lplbuf_t*)data; + freenode_t *p; + int i, l, max_level; + // allocate memory if necessary + if (tv->max < n) { // enlarge + tv->max = n; + kroundup32(tv->max); + tv->cur_level = (int*)realloc(tv->cur_level, sizeof(int) * tv->max); + tv->pre_level = (int*)realloc(tv->pre_level, sizeof(int) * tv->max); + } + tv->n_cur = n; + // update cnt + for (p = tv->head; p->next; p = p->next) + if (p->cnt > 0) --p->cnt; + // calculate cur_level[] + max_level = 0; + for (i = l = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (p->is_head) { + if (tv->head->next && tv->head->cnt == 0) { // then take a free slot + freenode_t *p = tv->head->next; + tv->cur_level[i] = tv->head->level; + mp_free(tv->mp, tv->head); + tv->head = p; + --tv->n_nodes; + } else tv->cur_level[i] = ++tv->max_level; + } else { + tv->cur_level[i] = tv->pre_level[l++]; + if (p->is_tail) { // then return a free slot + tv->tail->level = tv->cur_level[i]; + tv->tail->next = mp_alloc(tv->mp); + tv->tail = tv->tail->next; + ++tv->n_nodes; + } + } + if (tv->cur_level[i] > max_level) max_level = tv->cur_level[i]; + ((bam_pileup1_t*)p)->level = tv->cur_level[i]; + } + assert(l == tv->n_pre); + tv->func(tid, pos, n, pl, tv->user_data); + // sort the linked list + if (tv->n_nodes) { + freenode_t *q; + if (tv->n_nodes + 1 > tv->m_aux) { // enlarge + tv->m_aux = tv->n_nodes + 1; + kroundup32(tv->m_aux); + tv->aux = (freenode_t**)realloc(tv->aux, sizeof(void*) * tv->m_aux); + } + for (p = tv->head, i = l = 0; p->next;) { + if (p->level > max_level) { // then discard this entry + q = p->next; + mp_free(tv->mp, p); + p = q; + } else { + tv->aux[i++] = p; + p = p->next; + } + } + tv->aux[i] = tv->tail; // add a proper tail for the loop below + tv->n_nodes = i; + if (tv->n_nodes) { + ks_introsort(node, tv->n_nodes, tv->aux); + for (i = 0; i < tv->n_nodes; ++i) tv->aux[i]->next = tv->aux[i+1]; + tv->head = tv->aux[0]; + } else tv->head = tv->tail; + } + // clean up + tv->max_level = max_level; + memcpy(tv->pre_level, tv->cur_level, tv->n_cur * 4); + // squeeze out terminated levels + for (i = l = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (!p->is_tail) + tv->pre_level[l++] = tv->pre_level[i]; + } + tv->n_pre = l; +/* + fprintf(stderr, "%d\t", pos+1); + for (i = 0; i < n; ++i) { + const bam_pileup1_t *p = pl + i; + if (p->is_head) fprintf(stderr, "^"); + if (p->is_tail) fprintf(stderr, "$"); + fprintf(stderr, "%d,", p->level); + } + fprintf(stderr, "\n"); +*/ + return 0; +} + +bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data) +{ + bam_lplbuf_t *tv; + tv = (bam_lplbuf_t*)calloc(1, sizeof(bam_lplbuf_t)); + tv->mp = mp_init(); + tv->head = tv->tail = mp_alloc(tv->mp); + tv->func = func; + tv->user_data = data; + tv->plbuf = bam_plbuf_init(tview_func, tv); + return (bam_lplbuf_t*)tv; +} + +void bam_lplbuf_destroy(bam_lplbuf_t *tv) +{ + freenode_t *p, *q; + free(tv->cur_level); free(tv->pre_level); + bam_plbuf_destroy(tv->plbuf); + free(tv->aux); + for (p = tv->head; p->next;) { + q = p->next; + mp_free(tv->mp, p); p = q; + } + mp_free(tv->mp, p); + assert(tv->mp->cnt == 0); + mp_destroy(tv->mp); + free(tv); +} + +int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *tv) +{ + return bam_plbuf_push(b, tv->plbuf); +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_maqcns.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_maqcns.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,614 @@\n+#include <math.h>\n+#include <assert.h>\n+#include "bam.h"\n+#include "bam_maqcns.h"\n+#include "ksort.h"\n+#include "kaln.h"\n+KSORT_INIT_GENERIC(uint32_t)\n+\n+#define INDEL_WINDOW_SIZE 50\n+#define INDEL_EXT_DEP 0.9\n+\n+typedef struct __bmc_aux_t {\n+\tint max;\n+\tuint32_t *info;\n+} bmc_aux_t;\n+\n+typedef struct {\n+\tfloat esum[4], fsum[4];\n+\tuint32_t c[4];\n+\tuint32_t rms_mapQ;\n+} glf_call_aux_t;\n+\n+char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };\n+\n+/*\n+ P(<b1,b2>) = \\theta \\sum_{i=1}^{N-1} 1/i\n+ P(D|<b1,b2>) = \\sum_{k=1}^{N-1} p_k 1/2 [(k/N)^n_2(1-k/N)^n_1 + (k/N)^n1(1-k/N)^n_2]\n+ p_k = 1/k / \\sum_{i=1}^{N-1} 1/i\n+ */\n+static void cal_het(bam_maqcns_t *aa)\n+{\n+\tint k, n1, n2;\n+\tdouble sum_harmo; // harmonic sum\n+\tdouble poly_rate;\n+\n+\tfree(aa->lhet);\n+\taa->lhet = (double*)calloc(256 * 256, sizeof(double));\n+\tsum_harmo = 0.0;\n+\tfor (k = 1; k <= aa->n_hap - 1; ++k)\n+\t\tsum_harmo += 1.0 / k;\n+\tfor (n1 = 0; n1 < 256; ++n1) {\n+\t\tfor (n2 = 0; n2 < 256; ++n2) {\n+\t\t\tlong double sum = 0.0;\n+\t\t\tdouble lC = aa->is_soap? 0 : lgamma(n1+n2+1) - lgamma(n1+1) - lgamma(n2+1); // \\binom{n1+n2}{n1}\n+\t\t\tfor (k = 1; k <= aa->n_hap - 1; ++k) {\n+\t\t\t\tdouble pk = 1.0 / k / sum_harmo;\n+\t\t\t\tdouble log1 = log((double)k/aa->n_hap);\n+\t\t\t\tdouble log2 = log(1.0 - (double)k/aa->n_hap);\n+\t\t\t\tsum += pk * 0.5 * (expl(log1*n2) * expl(log2*n1) + expl(log1*n1) * expl(log2*n2));\n+\t\t\t}\n+\t\t\taa->lhet[n1<<8|n2] = lC + logl(sum);\n+\t\t}\n+\t}\n+\tpoly_rate = aa->het_rate * sum_harmo;\n+\taa->q_r = -4.343 * log(2.0 * poly_rate / (1.0 - poly_rate));\n+}\n+\n+/** initialize the helper structure */\n+static void cal_coef(bam_maqcns_t *aa)\n+{\n+\tint k, n, q;\n+\tlong double sum_a[257], b[256], q_c[256], tmp[256], fk2[256];\n+\tdouble *lC;\n+\n+\t// aa->lhet will be allocated and initialized \n+\tfree(aa->fk); free(aa->coef);\n+\taa->coef = 0;\n+\taa->fk = (double*)calloc(256, sizeof(double));\n+\taa->fk[0] = fk2[0] = 1.0;\n+\tfor (n = 1; n != 256; ++n) {\n+\t\taa->fk[n] = pow(aa->theta, n) * (1.0 - aa->eta) + aa->eta;\n+\t\tfk2[n] = aa->fk[n>>1]; // this is an approximation, assuming reads equally likely come from both strands\n+\t}\n+\tif (aa->is_soap) return;\n+\taa->coef = (double*)calloc(256*256*64, sizeof(double));\n+\tlC = (double*)calloc(256 * 256, sizeof(double));\n+\tfor (n = 1; n != 256; ++n)\n+\t\tfor (k = 1; k <= n; ++k)\n+\t\t\tlC[n<<8|k] = lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);\n+\tfor (q = 1; q != 64; ++q) {\n+\t\tdouble e = pow(10.0, -q/10.0);\n+\t\tdouble le = log(e);\n+\t\tdouble le1 = log(1.0-e);\n+\t\tfor (n = 1; n != 256; ++n) {\n+\t\t\tdouble *coef = aa->coef + (q<<16|n<<8);\n+\t\t\tsum_a[n+1] = 0.0;\n+\t\t\tfor (k = n; k >= 0; --k) { // a_k = \\sum_{i=k}^n C^n_k \\epsilon^k (1-\\epsilon)^{n-k}\n+\t\t\t\tsum_a[k] = sum_a[k+1] + expl(lC[n<<8|k] + k*le + (n-k)*le1);\n+\t\t\t\tb[k] = sum_a[k+1] / sum_a[k];\n+\t\t\t\tif (b[k] > 0.99) b[k] = 0.99;\n+\t\t\t}\n+\t\t\tfor (k = 0; k != n; ++k) // log(\\bar\\beta_{nk}(\\bar\\epsilon)^{f_k})\n+\t\t\t\tq_c[k] = -4.343 * fk2[k] * logl(b[k] / e);\n+\t\t\tfor (k = 1; k != n; ++k) q_c[k] += q_c[k-1]; // \\prod_{i=0}^k c_i\n+\t\t\tfor (k = 0; k <= n; ++k) { // powl() in 64-bit mode seems broken on my Mac OS X 10.4.9\n+\t\t\t\ttmp[k] = -4.343 * logl(1.0 - expl(fk2[k] * logl(b[k])));\n+\t\t\t\tcoef[k] = (k? q_c[k-1] : 0) + tmp[k]; // this is the final c_{nk}\n+\t\t\t}\n+\t\t}\n+\t}\n+\tfree(lC);\n+}\n+\n+bam_maqcns_t *bam_maqcns_init()\n+{\n+\tbam_maqcns_t *bm;\n+\tbm = (bam_maqcns_t*)calloc(1, sizeof(bam_maqcns_t));\n+\tbm->aux = (bmc_aux_t*)calloc(1, sizeof(bmc_aux_t));\n+\tbm->het_rate = 0.001;\n+\tbm->theta = 0.85;\n+\tbm->n_hap = 2;\n+\tbm->eta = 0.03;\n+\tbm->cap_mapQ = 60;\n+\treturn bm;\n+}\n+\n+void bam_maqcns_prepare(bam_maqcns_t *bm)\n+{\n+\tcal_coef(bm); cal_het(bm);\n+}\n+\n+void bam_maqcns_destroy(bam_maqcns_t *bm)\n+{\n+\tif (bm == 0) return;\n+\tfree(bm->lhet); free(bm->fk); free(bm->coef); free(bm->aux->info);\n+\tfree(bm->aux); free(bm);\n+}\n+\n+glf1_t *bam_maqcns_glfgen(int _n, const bam_pileup1_t *pl, uint8_t ref_base, bam_maqcns_t *bm)\n+{\n+\tglf_call_aux_t *b;\n+\tint i, j, k, w[8], c, n;\n+\tglf1_t *g = (glf1_t*)calloc(1, sizeof(glf1_t));\n+'..b'or (l = 0; l < n_acigar; ++l) fprintf(stderr, "%d%c", acigar[l]>>4, "MIDS"[acigar[l]&0xf]);\n+\t\t\t\t\t\tfprintf(stderr, "\\n");\n+\t\t\t\t\t\tfor (l = 0; l < tend - tbeg + types[i]; ++l) fputc("ACGTN"[ref2[l+tbeg-left]], stderr);\n+\t\t\t\t\t\tfputc(\'\\n\', stderr);\n+\t\t\t\t\t\tfor (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[rs[l]], stderr);\n+\t\t\t\t\t\tfputc(\'\\n\', stderr);\n+\t\t\t\t\t\t}*/\n+\t\t\t\t\tfree(acigar);\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\t\t{ // get final result\n+\t\t\tint *sum, max1, max2, max1_i, max2_i;\n+\t\t\t// pick up the best two score\n+\t\t\tsum = (int*)calloc(n_types, sizeof(int));\n+\t\t\tfor (i = 0; i < n_types; ++i)\n+\t\t\t\tfor (j = 0; j < n; ++j)\n+\t\t\t\t\tsum[i] += -pscore[i*n+j];\n+\t\t\tmax1 = max2 = -0x7fffffff; max1_i = max2_i = -1;\n+\t\t\tfor (i = 0; i < n_types; ++i) {\n+\t\t\t\tif (sum[i] > max1) {\n+\t\t\t\t\tmax2 = max1; max2_i = max1_i; max1 = sum[i]; max1_i = i;\n+\t\t\t\t} else if (sum[i] > max2) {\n+\t\t\t\t\tmax2 = sum[i]; max2_i = i;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\tfree(sum);\n+\t\t\t// write ret\n+\t\t\tret = (bam_maqindel_ret_t*)calloc(1, sizeof(bam_maqindel_ret_t));\n+\t\t\tret->indel1 = types[max1_i]; ret->indel2 = types[max2_i];\n+\t\t\tret->s[0] = (char*)calloc(abs(ret->indel1) + 2, 1);\n+\t\t\tret->s[1] = (char*)calloc(abs(ret->indel2) + 2, 1);\n+\t\t\t// write indel sequence\n+\t\t\tif (ret->indel1 > 0) {\n+\t\t\t\tret->s[0][0] = \'+\';\n+\t\t\t\tfor (k = 0; k < ret->indel1; ++k)\n+\t\t\t\t\tret->s[0][k+1] = bam_nt16_rev_table[(int)inscns[max1_i*max_ins + k]];\n+\t\t\t} else if (ret->indel1 < 0) {\n+\t\t\t\tret->s[0][0] = \'-\';\n+\t\t\t\tfor (k = 0; k < -ret->indel1 && ref[pos + k + 1]; ++k)\n+\t\t\t\t\tret->s[0][k+1] = ref[pos + k + 1];\n+\t\t\t} else ret->s[0][0] = \'*\';\n+\t\t\tif (ret->indel2 > 0) {\n+\t\t\t\tret->s[1][0] = \'+\';\n+\t\t\t\tfor (k = 0; k < ret->indel2; ++k)\n+\t\t\t\t\tret->s[1][k+1] = bam_nt16_rev_table[(int)inscns[max2_i*max_ins + k]];\n+\t\t\t} else if (ret->indel2 < 0) {\n+\t\t\t\tret->s[1][0] = \'-\';\n+\t\t\t\tfor (k = 0; k < -ret->indel2 && ref[pos + k + 1]; ++k)\n+\t\t\t\t\tret->s[1][k+1] = ref[pos + k + 1];\n+\t\t\t} else ret->s[1][0] = \'*\';\n+\t\t\t// write count\n+\t\t\tfor (i = 0; i < n; ++i) {\n+\t\t\t\tconst bam_pileup1_t *p = pl + i;\n+\t\t\t\tif (p->indel == ret->indel1) ++ret->cnt1;\n+\t\t\t\telse if (p->indel == ret->indel2) ++ret->cnt2;\n+\t\t\t\telse ++ret->cnt_anti;\n+\t\t\t}\n+\t\t\t{ // write gl[]\n+\t\t\t\tint tmp, seq_err = 0;\n+\t\t\t\tdouble x = 1.0;\n+\t\t\t\ttmp = max1_i - max2_i;\n+\t\t\t\tif (tmp < 0) tmp = -tmp;\n+\t\t\t\tfor (j = 0; j < tmp + 1; ++j) x *= INDEL_EXT_DEP;\n+\t\t\t\tseq_err = mi->q_indel * (1.0 - x) / (1.0 - INDEL_EXT_DEP);\n+\t\t\t\tret->gl[0] = ret->gl[1] = 0;\n+\t\t\t\tfor (j = 0; j < n; ++j) {\n+\t\t\t\t\tint s1 = pscore[max1_i*n + j], s2 = pscore[max2_i*n + j];\n+\t\t\t\t\t//fprintf(stderr, "id=%d, %d, %d, %d, %d, %d\\n", j, pl[j].b->core.pos+1, types[max1_i], types[max2_i], s1, s2);\n+\t\t\t\t\tif (s1 > s2) ret->gl[0] += s1 - s2 < seq_err? s1 - s2 : seq_err;\n+\t\t\t\t\telse ret->gl[1] += s2 - s1 < seq_err? s2 - s1 : seq_err;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\t// write cnt_ref and cnt_ambi\n+\t\t\tif (max1_i != 0 && max2_i != 0) {\n+\t\t\t\tfor (j = 0; j < n; ++j) {\n+\t\t\t\t\tint diff1 = score[j] - score[max1_i * n + j];\n+\t\t\t\t\tint diff2 = score[j] - score[max2_i * n + j];\n+\t\t\t\t\tif (diff1 > 0 && diff2 > 0) ++ret->cnt_ref;\n+\t\t\t\t\telse if (diff1 == 0 || diff2 == 0) ++ret->cnt_ambi;\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\t\tfree(score); free(pscore); free(ref2); free(rs); free(inscns);\n+\t}\n+\t{ // call genotype\n+\t\tint q[3], qr_indel = (int)(-4.343 * log(mi->r_indel) + 0.5);\n+\t\tint min1, min2, min1_i;\n+\t\tq[0] = ret->gl[0] + (ret->s[0][0] != \'*\'? 0 : 0) * qr_indel;\n+\t\tq[1] = ret->gl[1] + (ret->s[1][0] != \'*\'? 0 : 0) * qr_indel;\n+\t\tq[2] = n * 3 + (ret->s[0][0] == \'*\' || ret->s[1][0] == \'*\'? 1 : 1) * qr_indel;\n+\t\tmin1 = min2 = 0x7fffffff; min1_i = -1;\n+\t\tfor (i = 0; i < 3; ++i) {\n+\t\t\tif (q[i] < min1) {\n+\t\t\t\tmin2 = min1; min1 = q[i]; min1_i = i;\n+\t\t\t} else if (q[i] < min2) min2 = q[i];\n+\t\t}\n+\t\tret->gt = min1_i;\n+\t\tret->q_cns = min2 - min1;\n+\t\t// set q_ref\n+\t\tif (ret->gt < 2) ret->q_ref = (ret->s[ret->gt][0] == \'*\')? 0 : q[1-ret->gt] - q[ret->gt] - qr_indel - 3;\n+\t\telse ret->q_ref = (ret->s[0][0] == \'*\')? q[0] - q[2] : q[1] - q[2];\n+\t\tif (ret->q_ref < 0) ret->q_ref = 0;\n+\t}\n+\tfree(types);\n+\treturn ret;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_maqcns.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_maqcns.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,57 @@ +#ifndef BAM_MAQCNS_H +#define BAM_MAQCNS_H + +#include "glf.h" + +struct __bmc_aux_t; + +typedef struct { + float het_rate, theta; + int n_hap, cap_mapQ, is_soap; + + float eta, q_r; + double *fk, *coef; + double *lhet; + struct __bmc_aux_t *aux; +} bam_maqcns_t; + +typedef struct { + int q_indel; // indel sequencing error, phred scaled + float r_indel; // indel prior + float r_snp; // snp prior + // hidden parameters, unchangeable from command line + int mm_penalty, indel_err, ambi_thres; +} bam_maqindel_opt_t; + +typedef struct { + int indel1, indel2; + int cnt1, cnt2, cnt_anti; + int cnt_ref, cnt_ambi; + char *s[2]; + // + int gt, gl[2]; + int q_cns, q_ref; +} bam_maqindel_ret_t; + +#ifdef __cplusplus +extern "C" { +#endif + + bam_maqcns_t *bam_maqcns_init(); + void bam_maqcns_prepare(bam_maqcns_t *bm); + void bam_maqcns_destroy(bam_maqcns_t *bm); + glf1_t *bam_maqcns_glfgen(int n, const bam_pileup1_t *pl, uint8_t ref_base, bam_maqcns_t *bm); + uint32_t bam_maqcns_call(int n, const bam_pileup1_t *pl, bam_maqcns_t *bm); + // return: cns<<28 | cns2<<24 | mapQ<<16 | cnsQ<<8 | cnsQ2 + uint32_t glf2cns(const glf1_t *g, int q_r); + + bam_maqindel_opt_t *bam_maqindel_opt_init(); + bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, const bam_pileup1_t *pl, const char *ref, + int _n_types, int *_types); + void bam_maqindel_ret_destroy(bam_maqindel_ret_t*); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_mate.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_mate.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,70 @@ +#include <stdlib.h> +#include <string.h> +#include "bam.h" + +// currently, this function ONLY works if each read has one hit +void bam_mating_core(bamFile in, bamFile out) +{ + bam_header_t *header; + bam1_t *b[2]; + int curr, has_prev; + + header = bam_header_read(in); + bam_header_write(out, header); + + b[0] = bam_init1(); + b[1] = bam_init1(); + curr = 0; has_prev = 0; + while (bam_read1(in, b[curr]) >= 0) { + bam1_t *cur = b[curr], *pre = b[1-curr]; + if (has_prev) { + if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name + cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos; + pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos; + if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP)) + && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) + { + uint32_t cur5, pre5; + cur5 = (cur->core.flag&BAM_FREVERSE)? bam_calend(&cur->core, bam1_cigar(cur)) : cur->core.pos; + pre5 = (pre->core.flag&BAM_FREVERSE)? bam_calend(&pre->core, bam1_cigar(pre)) : pre->core.pos; + cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5; + } else cur->core.isize = pre->core.isize = 0; + if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE; + else cur->core.flag &= ~BAM_FMREVERSE; + if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE; + else pre->core.flag &= ~BAM_FMREVERSE; + if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; } + if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; } + bam_write1(out, pre); + bam_write1(out, cur); + has_prev = 0; + } else { // unpaired or singleton + pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0; + if (pre->core.flag & BAM_FPAIRED) { + pre->core.flag |= BAM_FMUNMAP; + pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR; + } + bam_write1(out, pre); + } + } else has_prev = 1; + curr = 1 - curr; + } + if (has_prev) bam_write1(out, b[1-curr]); + bam_header_destroy(header); + bam_destroy1(b[0]); + bam_destroy1(b[1]); +} + +int bam_mating(int argc, char *argv[]) +{ + bamFile in, out; + if (argc < 3) { + fprintf(stderr, "samtools fixmate <in.nameSrt.bam> <out.nameSrt.bam>\n"); + return 1; + } + in = (strcmp(argv[1], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[1], "r"); + out = (strcmp(argv[2], "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(argv[2], "w"); + bam_mating_core(in, out); + bam_close(in); bam_close(out); + return 0; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_md.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_md.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,175 @@ +#include <unistd.h> +#include <assert.h> +#include <string.h> +#include <ctype.h> +#include "faidx.h" +#include "sam.h" +#include "kstring.h" + +void bam_fillmd1_core(bam1_t *b, char *ref, int is_equal, int max_nm) +{ + uint8_t *seq = bam1_seq(b); + uint32_t *cigar = bam1_cigar(b); + bam1_core_t *c = &b->core; + int i, x, y, u = 0; + kstring_t *str; + uint8_t *old_md, *old_nm; + int32_t old_nm_i = -1, nm = 0; + + str = (kstring_t*)calloc(1, sizeof(kstring_t)); + for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { + int j, l = cigar[i]>>4, op = cigar[i]&0xf; + if (op == BAM_CMATCH) { + for (j = 0; j < l; ++j) { + int z = y + j; + int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]]; + if (ref[x+j] == 0) break; // out of boundary + if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match + if (is_equal) seq[z/2] &= (z&1)? 0xf0 : 0x0f; + ++u; + } else { + ksprintf(str, "%d", u); + kputc(ref[x+j], str); + u = 0; ++nm; + } + } + if (j < l) break; + x += l; y += l; + } else if (op == BAM_CDEL) { + ksprintf(str, "%d", u); + kputc('^', str); + for (j = 0; j < l; ++j) { + if (ref[x+j] == 0) break; + kputc(ref[x+j], str); + } + u = 0; + if (j < l) break; + x += l; nm += l; + } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) { + y += l; + if (op == BAM_CINS) nm += l; + } else if (op == BAM_CREF_SKIP) { + x += l; + } + } + ksprintf(str, "%d", u); + // apply max_nm + if (max_nm > 0 && nm >= max_nm) { + for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { + int j, l = cigar[i]>>4, op = cigar[i]&0xf; + if (op == BAM_CMATCH) { + for (j = 0; j < l; ++j) { + int z = y + j; + int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]]; + if (ref[x+j] == 0) break; // out of boundary + if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match + seq[z/2] |= (z&1)? 0x0f : 0xf0; + bam1_qual(b)[z] = 0; + } + } + if (j < l) break; + x += l; y += l; + } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l; + else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; + } + } + // update NM + old_nm = bam_aux_get(b, "NM"); + if (c->flag & BAM_FUNMAP) return; + if (old_nm) old_nm_i = bam_aux2i(old_nm); + if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); + else if (nm != old_nm_i) { + fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm); + bam_aux_del(b, old_nm); + bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); + } + // update MD + old_md = bam_aux_get(b, "MD"); + if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); + else { + int is_diff = 0; + if (strlen((char*)old_md+1) == str->l) { + for (i = 0; i < str->l; ++i) + if (toupper(old_md[i+1]) != toupper(str->s[i])) + break; + if (i < str->l) is_diff = 1; + } else is_diff = 1; + if (is_diff) { + fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam1_qname(b), old_md+1, str->s); + bam_aux_del(b, old_md); + bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); + } + } + free(str->s); free(str); +} + +void bam_fillmd1(bam1_t *b, char *ref, int is_equal) +{ + bam_fillmd1_core(b, ref, is_equal, 0); +} + +int bam_fillmd(int argc, char *argv[]) +{ + int c, is_equal = 0, tid = -2, ret, len, is_bam_out, is_sam_in, is_uncompressed, max_nm = 0; + samfile_t *fp, *fpout = 0; + faidx_t *fai; + char *ref = 0, mode_w[8], mode_r[8]; + bam1_t *b; + + is_bam_out = is_sam_in = is_uncompressed = 0; + mode_w[0] = mode_r[0] = 0; + strcpy(mode_r, "r"); strcpy(mode_w, "w"); + while ((c = getopt(argc, argv, "eubSn:")) >= 0) { + switch (c) { + case 'e': is_equal = 1; break; + case 'b': is_bam_out = 1; break; + case 'u': is_uncompressed = is_bam_out = 1; break; + case 'S': is_sam_in = 1; break; + case 'n': max_nm = atoi(optarg); break; + default: fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n", c); return 1; + } + } + if (!is_sam_in) strcat(mode_r, "b"); + if (is_bam_out) strcat(mode_w, "b"); + else strcat(mode_w, "h"); + if (is_uncompressed) strcat(mode_w, "u"); + if (optind + 1 >= argc) { + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: samtools fillmd [-eubS] <aln.bam> <ref.fasta>\n\n"); + fprintf(stderr, "Options: -e change identical bases to '='\n"); + fprintf(stderr, " -u uncompressed BAM output (for piping)\n"); + fprintf(stderr, " -b compressed BAM output\n"); + fprintf(stderr, " -S the input is SAM with header\n\n"); + return 1; + } + fp = samopen(argv[optind], mode_r, 0); + if (fp == 0) return 1; + if (is_sam_in && (fp->header == 0 || fp->header->n_targets == 0)) { + fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n"); + return 1; + } + fpout = samopen("-", mode_w, fp->header); + fai = fai_load(argv[optind+1]); + + b = bam_init1(); + while ((ret = samread(fp, b)) >= 0) { + if (b->core.tid >= 0) { + if (tid != b->core.tid) { + free(ref); + ref = fai_fetch(fai, fp->header->target_name[b->core.tid], &len); + tid = b->core.tid; + if (ref == 0) + fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n", + fp->header->target_name[tid]); + } + if (ref) bam_fillmd1_core(b, ref, is_equal, max_nm); + } + samwrite(fpout, b); + } + bam_destroy1(b); + + free(ref); + fai_destroy(fai); + samclose(fp); samclose(fpout); + return 0; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_pileup.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_pileup.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,396 @@\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <ctype.h>\n+#include <assert.h>\n+#include "sam.h"\n+\n+typedef struct __linkbuf_t {\n+\tbam1_t b;\n+\tuint32_t beg, end;\n+\tstruct __linkbuf_t *next;\n+} lbnode_t;\n+\n+/* --- BEGIN: Memory pool */\n+\n+typedef struct {\n+\tint cnt, n, max;\n+\tlbnode_t **buf;\n+} mempool_t;\n+\n+static mempool_t *mp_init()\n+{\n+\tmempool_t *mp;\n+\tmp = (mempool_t*)calloc(1, sizeof(mempool_t));\n+\treturn mp;\n+}\n+static void mp_destroy(mempool_t *mp)\n+{\n+\tint k;\n+\tfor (k = 0; k < mp->n; ++k) {\n+\t\tfree(mp->buf[k]->b.data);\n+\t\tfree(mp->buf[k]);\n+\t}\n+\tfree(mp->buf);\n+\tfree(mp);\n+}\n+static inline lbnode_t *mp_alloc(mempool_t *mp)\n+{\n+\t++mp->cnt;\n+\tif (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t));\n+\telse return mp->buf[--mp->n];\n+}\n+static inline void mp_free(mempool_t *mp, lbnode_t *p)\n+{\n+\t--mp->cnt; p->next = 0; // clear lbnode_t::next here\n+\tif (mp->n == mp->max) {\n+\t\tmp->max = mp->max? mp->max<<1 : 256;\n+\t\tmp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max);\n+\t}\n+\tmp->buf[mp->n++] = p;\n+}\n+\n+/* --- END: Memory pool */\n+\n+/* --- BEGIN: Auxiliary functions */\n+\n+static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)\n+{\n+\tunsigned k;\n+\tbam1_t *b = p->b;\n+\tbam1_core_t *c = &b->core;\n+\tuint32_t x = c->pos, y = 0;\n+\tint ret = 1, is_restart = 1;\n+\n+\tif (c->flag&BAM_FUNMAP) return 0; // unmapped read\n+\tassert(x <= pos); // otherwise a bug\n+\tp->qpos = -1; p->indel = 0; p->is_del = p->is_head = p->is_tail = 0;\n+\tfor (k = 0; k < c->n_cigar; ++k) {\n+\t\tint op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation\n+\t\tint l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length\n+\t\tif (op == BAM_CMATCH) { // NOTE: this assumes the first and the last operation MUST BE a match or a clip\n+\t\t\tif (x + l > pos) { // overlap with pos\n+\t\t\t\tp->indel = p->is_del = 0;\n+\t\t\t\tp->qpos = y + (pos - x);\n+\t\t\t\tif (x == pos && is_restart) p->is_head = 1;\n+\t\t\t\tif (x + l - 1 == pos) { // come to the end of a match\n+\t\t\t\t\tint has_next_match = 0;\n+\t\t\t\t\tunsigned i;\n+\t\t\t\t\tfor (i = k + 1; i < c->n_cigar; ++i) {\n+\t\t\t\t\t\tuint32_t cigar = bam1_cigar(b)[i];\n+\t\t\t\t\t\tint opi = cigar&BAM_CIGAR_MASK;\n+\t\t\t\t\t\tif (opi == BAM_CMATCH) {\n+\t\t\t\t\t\t\thas_next_match = 1;\n+\t\t\t\t\t\t\tbreak;\n+\t\t\t\t\t\t} else if (opi == BAM_CSOFT_CLIP || opi == BAM_CREF_SKIP || opi == BAM_CHARD_CLIP) break;\n+\t\t\t\t\t}\n+\t\t\t\t\tif (!has_next_match) p->is_tail = 1;\n+\t\t\t\t\tif (k < c->n_cigar - 1 && has_next_match) { // there are additional operation(s)\n+\t\t\t\t\t\tuint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR\n+\t\t\t\t\t\tint op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation\n+\t\t\t\t\t\tif (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del\n+\t\t\t\t\t\telse if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins\n+\t\t\t\t\t\telse if (op_next == BAM_CPAD && k + 2 < c->n_cigar) { // no working for adjacent padding\n+\t\t\t\t\t\t\tcigar = bam1_cigar(b)[k+2]; op_next = cigar&BAM_CIGAR_MASK;\n+\t\t\t\t\t\t\tif (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del\n+\t\t\t\t\t\t\telse if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins\n+\t\t\t\t\t\t}\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\tx += l; y += l;\n+\t\t} else if (op == BAM_CDEL) { // then set ->is_del\n+\t\t\tif (x + l > pos) {\n+\t\t\t\tp->indel = 0; p->is_del = 1;\n+\t\t\t\tp->qpos = y + (pos - x);\n+\t\t\t}\n+\t\t\tx += l;\n+\t\t} else if (op == BAM_CREF_SKIP) x += l;\n+\t\telse if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;\n+\t\tif (is_restart) is_restart ^= (op == BAM_CMATCH);\n+\t\telse is_restart ^= (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP);\n+\t\tif (x > pos) {\n+\t\t\tif (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all\n+\t\t\tbreak;\n+\t\t}\n+\t}\n+\tassert(x > pos); // otherwise a bug\n+\treturn ret;\n+}\n+\n+/* --- END: Auxiliary functions */\n+\n+/*******************\n+ * pileup iterator *\n+ *******************/\n+\n+struct __bam_plp_t {\n+\tmempool_t *mp;\n+\tlbnode_t *head, *tail, *dummy;\n+\tint32_t tid, pos, max_tid, max_pos;\n+\tint is_eof, flag_mask, max_plp, error;\n+\tbam_pileup1_t *plp;\n+\t// for the "auto"'..b're.tid > iter->tid) {\n+\t\t\titer->tail->next = mp_alloc(iter->mp);\n+\t\t\titer->tail = iter->tail->next;\n+\t\t}\n+\t} else iter->is_eof = 1;\n+\treturn 0;\n+}\n+\n+const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)\n+{\n+\tconst bam_pileup1_t *plp;\n+\tif (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; }\n+\tif ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n+\telse {\n+\t\t*_n_plp = 0;\n+\t\tif (iter->is_eof) return 0;\n+\t\twhile (iter->func(iter->data, iter->b) >= 0) {\n+\t\t\tif (bam_plp_push(iter, iter->b) < 0) {\n+\t\t\t\t*_n_plp = -1;\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t\tif ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n+\t\t}\n+\t\tbam_plp_push(iter, 0);\n+\t\tif ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n+\t\treturn 0;\n+\t}\n+}\n+\n+void bam_plp_reset(bam_plp_t iter)\n+{\n+\tlbnode_t *p, *q;\n+\titer->max_tid = iter->max_pos = -1;\n+\titer->tid = iter->pos = 0;\n+\titer->is_eof = 0;\n+\tfor (p = iter->head; p->next;) {\n+\t\tq = p->next;\n+\t\tmp_free(iter->mp, p);\n+\t\tp = q;\n+\t}\n+\titer->head = iter->tail;\n+}\n+\n+void bam_plp_set_mask(bam_plp_t iter, int mask)\n+{\n+\titer->flag_mask = mask < 0? BAM_DEF_MASK : (BAM_FUNMAP | mask);\n+}\n+\n+/*****************\n+ * callback APIs *\n+ *****************/\n+\n+int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data)\n+{\n+\tbam_plbuf_t *buf;\n+\tint ret;\n+\tbam1_t *b;\n+\tb = bam_init1();\n+\tbuf = bam_plbuf_init(func, func_data);\n+\tbam_plbuf_set_mask(buf, mask);\n+\twhile ((ret = bam_read1(fp, b)) >= 0)\n+\t\tbam_plbuf_push(b, buf);\n+\tbam_plbuf_push(0, buf);\n+\tbam_plbuf_destroy(buf);\n+\tbam_destroy1(b);\n+\treturn 0;\n+}\n+\n+void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask)\n+{\n+\tbam_plp_set_mask(buf->iter, mask);\n+}\n+\n+void bam_plbuf_reset(bam_plbuf_t *buf)\n+{\n+\tbam_plp_reset(buf->iter);\n+}\n+\n+bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data)\n+{\n+\tbam_plbuf_t *buf;\n+\tbuf = calloc(1, sizeof(bam_plbuf_t));\n+\tbuf->iter = bam_plp_init(0, 0);\n+\tbuf->func = func;\n+\tbuf->data = data;\n+\treturn buf;\n+}\n+\n+void bam_plbuf_destroy(bam_plbuf_t *buf)\n+{\n+\tbam_plp_destroy(buf->iter);\n+\tfree(buf);\n+}\n+\n+int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf)\n+{\n+\tint ret, n_plp, tid, pos;\n+\tconst bam_pileup1_t *plp;\n+\tret = bam_plp_push(buf->iter, b);\n+\tif (ret < 0) return ret;\n+\twhile ((plp = bam_plp_next(buf->iter, &tid, &pos, &n_plp)) != 0)\n+\t\tbuf->func(tid, pos, n_plp, plp, buf->data);\n+\treturn 0;\n+}\n+\n+/***********\n+ * mpileup *\n+ ***********/\n+\n+struct __bam_mplp_t {\n+\tint n;\n+\tuint64_t min, *pos;\n+\tbam_plp_t *iter;\n+\tint *n_plp;\n+\tconst bam_pileup1_t **plp;\n+};\n+\n+bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data)\n+{\n+\tint i;\n+\tbam_mplp_t iter;\n+\titer = calloc(1, sizeof(struct __bam_mplp_t));\n+\titer->pos = calloc(n, 8);\n+\titer->n_plp = calloc(n, sizeof(int));\n+\titer->plp = calloc(n, sizeof(void*));\n+\titer->iter = calloc(n, sizeof(void*));\n+\titer->n = n;\n+\titer->min = (uint64_t)-1;\n+\tfor (i = 0; i < n; ++i) {\n+\t\titer->iter[i] = bam_plp_init(func, data[i]);\n+\t\titer->pos[i] = iter->min;\n+\t}\n+\treturn iter;\n+}\n+\n+void bam_mplp_destroy(bam_mplp_t iter)\n+{\n+\tint i;\n+\tfor (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]);\n+\tfree(iter->iter); free(iter->pos); free(iter->n_plp); free(iter->plp);\n+\tfree(iter);\n+}\n+\n+int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp)\n+{\n+\tint i, ret = 0;\n+\tuint64_t new_min = (uint64_t)-1;\n+\tfor (i = 0; i < iter->n; ++i) {\n+\t\tif (iter->pos[i] == iter->min) {\n+\t\t\tint tid, pos;\n+\t\t\titer->plp[i] = bam_plp_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]);\n+\t\t\titer->pos[i] = (uint64_t)tid<<32 | pos;\n+\t\t}\n+\t\tif (iter->plp[i] && iter->pos[i] < new_min) new_min = iter->pos[i];\n+\t}\n+\titer->min = new_min;\n+\tif (new_min == (uint64_t)-1) return 0;\n+\t*_tid = new_min>>32; *_pos = (uint32_t)new_min;\n+\tfor (i = 0; i < iter->n; ++i) {\n+\t\tif (iter->pos[i] == iter->min) {\n+\t\t\tn_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i];\n+\t\t\t++ret;\n+\t\t} else n_plp[i] = 0, plp[i] = 0;\n+\t}\n+\treturn ret;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_plcmd.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_plcmd.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,570 @@\n+#include <math.h>\n+#include <stdio.h>\n+#include <unistd.h>\n+#include <ctype.h>\n+#include "sam.h"\n+#include "faidx.h"\n+#include "bam_maqcns.h"\n+#include "khash.h"\n+#include "glf.h"\n+#include "kstring.h"\n+\n+typedef int *indel_list_t;\n+KHASH_MAP_INIT_INT64(64, indel_list_t)\n+\n+#define BAM_PLF_SIMPLE 0x01\n+#define BAM_PLF_CNS 0x02\n+#define BAM_PLF_INDEL_ONLY 0x04\n+#define BAM_PLF_GLF 0x08\n+#define BAM_PLF_VAR_ONLY 0x10\n+#define BAM_PLF_2ND 0x20\n+#define BAM_PLF_RANBASE 0x40\n+#define BAM_PLF_1STBASE 0x80\n+#define BAM_PLF_ALLBASE 0x100\n+#define BAM_PLF_READPOS 0x200\n+\n+typedef struct {\n+\tbam_header_t *h;\n+\tbam_maqcns_t *c;\n+\tbam_maqindel_opt_t *ido;\n+\tfaidx_t *fai;\n+\tkhash_t(64) *hash;\n+\tuint32_t format;\n+\tint tid, len, last_pos;\n+\tint mask;\n+ int max_depth; // for indel calling, ignore reads with the depth too high. 0 for unlimited\n+\tchar *ref;\n+\tglfFile fp_glf; // for glf output only\n+} pu_data_t;\n+\n+char **__bam_get_lines(const char *fn, int *_n);\n+void bam_init_header_hash(bam_header_t *header);\n+int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);\n+\n+static khash_t(64) *load_pos(const char *fn, bam_header_t *h)\n+{\n+\tchar **list;\n+\tint i, j, n, *fields, max_fields;\n+\tkhash_t(64) *hash;\n+\tbam_init_header_hash(h);\n+\tlist = __bam_get_lines(fn, &n);\n+\thash = kh_init(64);\n+\tmax_fields = 0; fields = 0;\n+\tfor (i = 0; i < n; ++i) {\n+\t\tchar *str = list[i];\n+\t\tint chr, n_fields, ret;\n+\t\tkhint_t k;\n+\t\tuint64_t x;\n+\t\tn_fields = ksplit_core(str, 0, &max_fields, &fields);\n+\t\tif (n_fields < 2) continue;\n+\t\tchr = bam_get_tid(h, str + fields[0]);\n+\t\tif (chr < 0) {\n+\t\t\tfprintf(stderr, "[load_pos] unknown reference sequence name: %s\\n", str + fields[0]);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tx = (uint64_t)chr << 32 | (atoi(str + fields[1]) - 1);\n+\t\tk = kh_put(64, hash, x, &ret);\n+\t\tif (ret == 0) {\n+\t\t\tfprintf(stderr, "[load_pos] position %s:%s has been loaded.\\n", str+fields[0], str+fields[1]);\n+\t\t\tcontinue;\n+\t\t}\n+\t\tkh_val(hash, k) = 0;\n+\t\tif (n_fields > 2) {\n+\t\t\t// count\n+\t\t\tfor (j = 2; j < n_fields; ++j) {\n+\t\t\t\tchar *s = str + fields[j];\n+\t\t\t\tif ((*s != \'+\' && *s != \'-\') || !isdigit(s[1])) break;\n+ \t\t\t}\n+\t\t\tif (j > 2) { // update kh_val()\n+\t\t\t\tint *q, y, z;\n+\t\t\t\tq = kh_val(hash, k) = (int*)calloc(j - 1, sizeof(int));\n+\t\t\t\tq[0] = j - 2; z = j; y = 1;\n+\t\t\t\tfor (j = 2; j < z; ++j)\n+\t\t\t\t\tq[y++] = atoi(str + fields[j]);\n+\t\t\t}\n+\t\t}\n+\t\tfree(str);\n+\t}\n+\tfree(list); free(fields);\n+\treturn hash;\n+}\n+\n+// an analogy to pileup_func() below\n+static int glt3_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data)\n+{\n+\tpu_data_t *d = (pu_data_t*)data;\n+\tbam_maqindel_ret_t *r = 0;\n+\tint rb, *proposed_indels = 0;\n+\tglf1_t *g;\n+\tglf3_t *g3;\n+\n+\tif (d->fai == 0) {\n+\t\tfprintf(stderr, "[glt3_func] reference sequence is required for generating GLT. Abort!\\n");\n+\t\texit(1);\n+\t}\n+\tif (d->hash) { // only output a list of sites\n+\t\tkhint_t k = kh_get(64, d->hash, (uint64_t)tid<<32|pos);\n+\t\tif (k == kh_end(d->hash)) return 0;\n+\t\tproposed_indels = kh_val(d->hash, k);\n+\t}\n+\tg3 = glf3_init1();\n+\tif (d->fai && (int)tid != d->tid) {\n+\t\tif (d->ref) { // then write the end mark\n+\t\t\tg3->rtype = GLF3_RTYPE_END;\n+\t\t\tglf3_write1(d->fp_glf, g3);\n+\t\t}\n+\t\tglf3_ref_write(d->fp_glf, d->h->target_name[tid], d->h->target_len[tid]); // write reference\n+\t\tfree(d->ref);\n+\t\td->ref = fai_fetch(d->fai, d->h->target_name[tid], &d->len);\n+\t\td->tid = tid;\n+\t\td->last_pos = 0;\n+\t}\n+\trb = (d->ref && (int)pos < d->len)? d->ref[pos] : \'N\';\n+\tg = bam_maqcns_glfgen(n, pu, bam_nt16_table[rb], d->c);\n+\tmemcpy(g3, g, sizeof(glf1_t));\n+\tg3->rtype = GLF3_RTYPE_SUB;\n+\tg3->offset = pos - d->last_pos;\n+\td->last_pos = pos;\n+\tglf3_write1(d->fp_glf, g3);\n+ if (pos < d->len) {\n+ int m = (!d->max_depth || d->max_depth>n) ? n : d->max_depth;\n+\t\tif (proposed_indels)\n+\t\t\tr = bam_maqindel(m, pos, d->ido, pu, d->ref, proposed_indels[0], proposed_indels+1);\n+\t\telse r = bam_maqindel(m, pos, d->ido, pu, d->ref, 0, 0);\n+\t}\n+\tif (r) { '..b'"[bam_pileup] fail to read the header: non-exisiting file or wrong format.\\n");\n+\t\t\treturn 1;\n+\t\t}\n+\t\td->h = fp->header;\n+\t\tif (fn_pos) d->hash = load_pos(fn_pos, d->h);\n+\t\tsampileup(fp, d->mask, pileup_func, d);\n+\t\tsamclose(fp); // d->h will be destroyed here\n+\t}\n+\n+\t// free\n+\tif (d->format & BAM_PLF_GLF) bgzf_close(d->fp_glf);\n+\tif (fn_pos) { // free the hash table\n+\t\tkhint_t k;\n+\t\tfor (k = kh_begin(d->hash); k < kh_end(d->hash); ++k)\n+\t\t\tif (kh_exist(d->hash, k)) free(kh_val(d->hash, k));\n+\t\tkh_destroy(64, d->hash);\n+\t}\n+\tfree(fn_pos); free(fn_list); free(fn_fa);\n+\tif (d->fai) fai_destroy(d->fai);\n+\tbam_maqcns_destroy(d->c);\n+\tfree(d->ido); free(d->ref); free(d);\n+\treturn 0;\n+}\n+\n+/***********\n+ * mpileup *\n+ ***********/\n+\n+typedef struct {\n+\tchar *reg;\n+\tfaidx_t *fai;\n+} mplp_conf_t;\n+\n+typedef struct {\n+\tbamFile fp;\n+\tbam_iter_t iter;\n+} mplp_aux_t;\n+\n+static int mplp_func(void *data, bam1_t *b)\n+{\n+\tmplp_aux_t *ma = (mplp_aux_t*)data;\n+\tif (ma->iter) return bam_iter_read(ma->fp, ma->iter, b);\n+\treturn bam_read1(ma->fp, b);\n+}\n+\n+static int mpileup(mplp_conf_t *conf, int n, char **fn)\n+{\n+\tmplp_aux_t **data;\n+\tint i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid;\n+\tconst bam_pileup1_t **plp;\n+\tbam_mplp_t iter;\n+\tbam_header_t *h = 0;\n+\tchar *ref;\n+\t// allocate\n+\tdata = calloc(n, sizeof(void*));\n+\tplp = calloc(n, sizeof(void*));\n+\tn_plp = calloc(n, sizeof(int*));\n+\t// read the header and initialize data\n+\tfor (i = 0; i < n; ++i) {\n+\t\tbam_header_t *h_tmp;\n+\t\tdata[i] = calloc(1, sizeof(mplp_aux_t));\n+\t\tdata[i]->fp = bam_open(fn[i], "r");\n+\t\th_tmp = bam_header_read(data[i]->fp);\n+\t\tif (conf->reg) {\n+\t\t\tint beg, end;\n+\t\t\tbam_index_t *idx;\n+\t\t\tidx = bam_index_load(fn[i]);\n+\t\t\tif (idx == 0) {\n+\t\t\t\tfprintf(stderr, "[%s] fail to load index for %d-th input.\\n", __func__, i+1);\n+\t\t\t\texit(1);\n+\t\t\t}\n+\t\t\tif (bam_parse_region(h_tmp, conf->reg, &tid, &beg, &end) < 0) {\n+\t\t\t\tfprintf(stderr, "[%s] malformatted region or wrong seqname for %d-th input.\\n", __func__, i+1);\n+\t\t\t\texit(1);\n+\t\t\t}\n+\t\t\tif (i == 0) beg0 = beg, end0 = end;\n+\t\t\tdata[i]->iter = bam_iter_query(idx, tid, beg, end);\n+\t\t\tbam_index_destroy(idx);\n+\t\t}\n+\t\tif (i == 0) h = h_tmp;\n+\t\telse {\n+\t\t\t// FIXME: to check consistency\n+\t\t\tbam_header_destroy(h_tmp);\n+\t\t}\n+\t}\n+\t// mpileup\n+\tref_tid = -1; ref = 0;\n+\titer = bam_mplp_init(n, mplp_func, (void**)data);\n+\twhile (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {\n+\t\tif (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested\n+\t\tif (tid != ref_tid) {\n+\t\t\tfree(ref);\n+\t\t\tif (conf->fai) ref = fai_fetch(conf->fai, h->target_name[tid], &ref_len);\n+\t\t\tref_tid = tid;\n+\t\t}\n+\t\tprintf("%s\\t%d\\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : \'N\');\n+\t\tfor (i = 0; i < n; ++i) {\n+\t\t\tint j;\n+\t\t\tprintf("\\t%d\\t", n_plp[i]);\n+\t\t\tif (n_plp[i] == 0) printf("*\\t*");\n+\t\t\telse {\n+\t\t\t\tfor (j = 0; j < n_plp[i]; ++j)\n+\t\t\t\t\tpileup_seq(plp[i] + j, pos, ref_len, ref);\n+\t\t\t\tputchar(\'\\t\');\n+\t\t\t\tfor (j = 0; j < n_plp[i]; ++j) {\n+\t\t\t\t\tconst bam_pileup1_t *p = plp[i] + j;\n+\t\t\t\t\tint c = bam1_qual(p->b)[p->qpos] + 33;\n+\t\t\t\t\tif (c > 126) c = 126;\n+\t\t\t\t\tputchar(c);\n+\t\t\t\t}\n+\t\t\t}\n+\t\t}\n+\t\tputchar(\'\\n\');\n+\t}\n+\tbam_mplp_destroy(iter);\n+\tbam_header_destroy(h);\n+\tfor (i = 0; i < n; ++i) {\n+\t\tbam_close(data[i]->fp);\n+\t\tif (data[i]->iter) bam_iter_destroy(data[i]->iter);\n+\t\tfree(data[i]);\n+\t}\n+\tfree(data); free(plp); free(ref); free(n_plp);\n+\treturn 0;\n+}\n+\n+int bam_mpileup(int argc, char *argv[])\n+{\n+\tint c;\n+\tmplp_conf_t mplp;\n+\tmemset(&mplp, 0, sizeof(mplp_conf_t));\n+\twhile ((c = getopt(argc, argv, "f:r:")) >= 0) {\n+\t\tswitch (c) {\n+\t\tcase \'f\':\n+\t\t\tmplp.fai = fai_load(optarg);\n+\t\t\tif (mplp.fai == 0) return 1;\n+\t\t\tbreak;\n+\t\tcase \'r\': mplp.reg = strdup(optarg);\n+\t\t}\n+\t}\n+\tif (argc == 1) {\n+\t\tfprintf(stderr, "Usage: samtools mpileup [-r reg] [-f in.fa] in1.bam [in2.bam [...]]\\n");\n+\t\treturn 1;\n+\t}\n+\tmpileup(&mplp, argc - optind, argv + optind);\n+\tfree(mplp.reg);\n+\tif (mplp.fai) fai_destroy(mplp.fai);\n+\treturn 0;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_reheader.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_reheader.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,60 @@ +#include <stdio.h> +#include <stdlib.h> +#include "bgzf.h" +#include "bam.h" + +#define BUF_SIZE 0x10000 + +int bam_reheader(BGZF *in, const bam_header_t *h, int fd) +{ + BGZF *fp; + bam_header_t *old; + int len; + uint8_t *buf; + if (in->open_mode != 'r') return -1; + buf = malloc(BUF_SIZE); + old = bam_header_read(in); + fp = bgzf_fdopen(fd, "w"); + bam_header_write(fp, h); + if (in->block_offset < in->block_length) { + bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); + bgzf_flush(fp); + } +#ifdef _USE_KNETFILE + while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) +#else + while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) +#endif + fwrite(buf, 1, len, fp->x.fpw); + free(buf); + fp->block_offset = in->block_offset = 0; + bgzf_close(fp); + return 0; +} + +int main_reheader(int argc, char *argv[]) +{ + bam_header_t *h; + BGZF *in; + if (argc != 3) { + fprintf(stderr, "Usage: samtools reheader <in.header.sam> <in.bam>\n"); + return 1; + } + { // read the header + tamFile fph = sam_open(argv[1]); + if (fph == 0) { + fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]); + return 1; + } + h = sam_header_read(fph); + sam_close(fph); + } + in = strcmp(argv[2], "-")? bam_open(argv[2], "r") : bam_dopen(fileno(stdin), "r"); + if (in == 0) { + fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]); + return 1; + } + bam_reheader(in, h, fileno(stdout)); + bgzf_close(in); + return 0; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_rmdup.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_rmdup.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,206 @@ +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <zlib.h> +#include <unistd.h> +#include "sam.h" + +typedef bam1_t *bam1_p; + +#include "khash.h" +KHASH_SET_INIT_STR(name) +KHASH_MAP_INIT_INT64(pos, bam1_p) + +#define BUFFER_SIZE 0x40000 + +typedef struct { + uint64_t n_checked, n_removed; + khash_t(pos) *best_hash; +} lib_aux_t; +KHASH_MAP_INIT_STR(lib, lib_aux_t) + +typedef struct { + int n, max; + bam1_t **a; +} tmp_stack_t; + +static inline void stack_insert(tmp_stack_t *stack, bam1_t *b) +{ + if (stack->n == stack->max) { + stack->max = stack->max? stack->max<<1 : 0x10000; + stack->a = (bam1_t**)realloc(stack->a, sizeof(bam1_t*) * stack->max); + } + stack->a[stack->n++] = b; +} + +static inline void dump_best(tmp_stack_t *stack, samfile_t *out) +{ + int i; + for (i = 0; i != stack->n; ++i) { + samwrite(out, stack->a[i]); + bam_destroy1(stack->a[i]); + } + stack->n = 0; +} + +static void clear_del_set(khash_t(name) *del_set) +{ + khint_t k; + for (k = kh_begin(del_set); k < kh_end(del_set); ++k) + if (kh_exist(del_set, k)) + free((char*)kh_key(del_set, k)); + kh_clear(name, del_set); +} + +static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib) +{ + khint_t k = kh_get(lib, aux, lib); + if (k == kh_end(aux)) { + int ret; + char *p = strdup(lib); + lib_aux_t *q; + k = kh_put(lib, aux, p, &ret); + q = &kh_val(aux, k); + q->n_checked = q->n_removed = 0; + q->best_hash = kh_init(pos); + return q; + } else return &kh_val(aux, k); +} + +static void clear_best(khash_t(lib) *aux, int max) +{ + khint_t k; + for (k = kh_begin(aux); k != kh_end(aux); ++k) { + if (kh_exist(aux, k)) { + lib_aux_t *q = &kh_val(aux, k); + if (kh_size(q->best_hash) >= max) + kh_clear(pos, q->best_hash); + } + } +} + +static inline int sum_qual(const bam1_t *b) +{ + int i, q; + uint8_t *qual = bam1_qual(b); + for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i]; + return q; +} + +void bam_rmdup_core(samfile_t *in, samfile_t *out) +{ + bam1_t *b; + int last_tid = -1, last_pos = -1; + tmp_stack_t stack; + khint_t k; + khash_t(lib) *aux; + khash_t(name) *del_set; + + aux = kh_init(lib); + del_set = kh_init(name); + b = bam_init1(); + memset(&stack, 0, sizeof(tmp_stack_t)); + + kh_resize(name, del_set, 4 * BUFFER_SIZE); + while (samread(in, b) >= 0) { + bam1_core_t *c = &b->core; + if (c->tid != last_tid || last_pos != c->pos) { + dump_best(&stack, out); // write the result + clear_best(aux, BUFFER_SIZE); + if (c->tid != last_tid) { + clear_best(aux, 0); + if (kh_size(del_set)) { // check + fprintf(stderr, "[bam_rmdup_core] %llu unmatched pairs\n", (long long)kh_size(del_set)); + clear_del_set(del_set); + } + if ((int)c->tid == -1) { // append unmapped reads + samwrite(out, b); + while (samread(in, b) >= 0) samwrite(out, b); + break; + } + last_tid = c->tid; + fprintf(stderr, "[bam_rmdup_core] processing reference %s...\n", in->header->target_name[c->tid]); + } + } + if (!(c->flag&BAM_FPAIRED) || (c->flag&(BAM_FUNMAP|BAM_FMUNMAP)) || (c->mtid >= 0 && c->tid != c->mtid)) { + samwrite(out, b); + } else if (c->isize > 0) { // paired, head + uint64_t key = (uint64_t)c->pos<<32 | c->isize; + const char *lib; + lib_aux_t *q; + int ret; + lib = bam_get_library(in->header, b); + q = lib? get_aux(aux, lib) : get_aux(aux, "\t"); + ++q->n_checked; + k = kh_put(pos, q->best_hash, key, &ret); + if (ret == 0) { // found in best_hash + bam1_t *p = kh_val(q->best_hash, k); + ++q->n_removed; + if (sum_qual(p) < sum_qual(b)) { // the current alignment is better; this can be accelerated in principle + kh_put(name, del_set, strdup(bam1_qname(p)), &ret); // p will be removed + bam_copy1(p, b); // replaced as b + } else kh_put(name, del_set, strdup(bam1_qname(b)), &ret); // b will be removed + if (ret == 0) + fprintf(stderr, "[bam_rmdup_core] inconsistent BAM file for pair '%s'. Continue anyway.\n", bam1_qname(b)); + } else { // not found in best_hash + kh_val(q->best_hash, k) = bam_dup1(b); + stack_insert(&stack, kh_val(q->best_hash, k)); + } + } else { // paired, tail + k = kh_get(name, del_set, bam1_qname(b)); + if (k != kh_end(del_set)) { + free((char*)kh_key(del_set, k)); + kh_del(name, del_set, k); + } else samwrite(out, b); + } + last_pos = c->pos; + } + + for (k = kh_begin(aux); k != kh_end(aux); ++k) { + if (kh_exist(aux, k)) { + lib_aux_t *q = &kh_val(aux, k); + dump_best(&stack, out); + fprintf(stderr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed, + (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k)); + kh_destroy(pos, q->best_hash); + free((char*)kh_key(aux, k)); + } + } + kh_destroy(lib, aux); + + clear_del_set(del_set); + kh_destroy(name, del_set); + free(stack.a); + bam_destroy1(b); +} + +void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se); + +int bam_rmdup(int argc, char *argv[]) +{ + int c, is_se = 0, force_se = 0; + samfile_t *in, *out; + while ((c = getopt(argc, argv, "sS")) >= 0) { + switch (c) { + case 's': is_se = 1; break; + case 'S': force_se = is_se = 1; break; + } + } + if (optind + 2 > argc) { + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: samtools rmdup [-sS] <input.srt.bam> <output.bam>\n\n"); + fprintf(stderr, "Option: -s rmdup for SE reads\n"); + fprintf(stderr, " -S treat PE reads as SE in rmdup (force -s)\n\n"); + return 1; + } + in = samopen(argv[optind], "rb", 0); + out = samopen(argv[optind+1], "wb", in->header); + if (in == 0 || out == 0) { + fprintf(stderr, "[bam_rmdup] fail to read/write input files\n"); + return 1; + } + if (is_se) bam_rmdupse_core(in, out, force_se); + else bam_rmdup_core(in, out); + samclose(in); samclose(out); + return 0; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_rmdupse.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_rmdupse.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,159 @@ +#include <math.h> +#include "sam.h" +#include "khash.h" +#include "klist.h" + +#define QUEUE_CLEAR_SIZE 0x100000 +#define MAX_POS 0x7fffffff + +typedef struct { + int endpos; + uint32_t score:31, discarded:1; + bam1_t *b; +} elem_t, *elem_p; +#define __free_elem(p) bam_destroy1((p)->data.b) +KLIST_INIT(q, elem_t, __free_elem) +typedef klist_t(q) queue_t; + +KHASH_MAP_INIT_INT(best, elem_p) +typedef khash_t(best) besthash_t; + +typedef struct { + uint64_t n_checked, n_removed; + besthash_t *left, *rght; +} lib_aux_t; +KHASH_MAP_INIT_STR(lib, lib_aux_t) + +static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib) +{ + khint_t k = kh_get(lib, aux, lib); + if (k == kh_end(aux)) { + int ret; + char *p = strdup(lib); + lib_aux_t *q; + k = kh_put(lib, aux, p, &ret); + q = &kh_val(aux, k); + q->left = kh_init(best); + q->rght = kh_init(best); + q->n_checked = q->n_removed = 0; + return q; + } else return &kh_val(aux, k); +} + +static inline int sum_qual(const bam1_t *b) +{ + int i, q; + uint8_t *qual = bam1_qual(b); + for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i]; + return q; +} + +static inline elem_t *push_queue(queue_t *queue, const bam1_t *b, int endpos, int score) +{ + elem_t *p = kl_pushp(q, queue); + p->discarded = 0; + p->endpos = endpos; p->score = score; + if (p->b == 0) p->b = bam_init1(); + bam_copy1(p->b, b); + return p; +} + +static void clear_besthash(besthash_t *h, int32_t pos) +{ + khint_t k; + for (k = kh_begin(h); k != kh_end(h); ++k) + if (kh_exist(h, k) && kh_val(h, k)->endpos <= pos) + kh_del(best, h, k); +} + +static void dump_alignment(samfile_t *out, queue_t *queue, int32_t pos, khash_t(lib) *h) +{ + if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) { + khint_t k; + while (1) { + elem_t *q; + if (queue->head == queue->tail) break; + q = &kl_val(queue->head); + if (q->discarded) { + q->b->data_len = 0; + kl_shift(q, queue, 0); + continue; + } + if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break; + samwrite(out, q->b); + q->b->data_len = 0; + kl_shift(q, queue, 0); + } + for (k = kh_begin(h); k != kh_end(h); ++k) { + if (kh_exist(h, k)) { + clear_besthash(kh_val(h, k).left, pos); + clear_besthash(kh_val(h, k).rght, pos); + } + } + } +} + +void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se) +{ + bam1_t *b; + queue_t *queue; + khint_t k; + int last_tid = -2; + khash_t(lib) *aux; + + aux = kh_init(lib); + b = bam_init1(); + queue = kl_init(q); + while (samread(in, b) >= 0) { + bam1_core_t *c = &b->core; + int endpos = bam_calend(c, bam1_cigar(b)); + int score = sum_qual(b); + + if (last_tid != c->tid) { + if (last_tid >= 0) dump_alignment(out, queue, MAX_POS, aux); + last_tid = c->tid; + } else dump_alignment(out, queue, c->pos, aux); + if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) { + push_queue(queue, b, endpos, score); + } else { + const char *lib; + lib_aux_t *q; + besthash_t *h; + uint32_t key; + int ret; + lib = bam_get_library(in->header, b); + q = lib? get_aux(aux, lib) : get_aux(aux, "\t"); + ++q->n_checked; + h = (c->flag&BAM_FREVERSE)? q->rght : q->left; + key = (c->flag&BAM_FREVERSE)? endpos : c->pos; + k = kh_put(best, h, key, &ret); + if (ret == 0) { // in the hash table + elem_t *p = kh_val(h, k); + ++q->n_removed; + if (p->score < score) { + if (c->flag&BAM_FREVERSE) { // mark "discarded" and push the queue + p->discarded = 1; + kh_val(h, k) = push_queue(queue, b, endpos, score); + } else { // replace + p->score = score; p->endpos = endpos; + bam_copy1(p->b, b); + } + } // otherwise, discard the alignment + } else kh_val(h, k) = push_queue(queue, b, endpos, score); + } + } + dump_alignment(out, queue, MAX_POS, aux); + + for (k = kh_begin(aux); k != kh_end(aux); ++k) { + if (kh_exist(aux, k)) { + lib_aux_t *q = &kh_val(aux, k); + fprintf(stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed, + (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k)); + kh_destroy(best, q->left); kh_destroy(best, q->rght); + free((char*)kh_key(aux, k)); + } + } + kh_destroy(lib, aux); + bam_destroy1(b); + kl_destroy(q, queue); +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_sort.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_sort.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,357 @@\n+#include <stdlib.h>\n+#include <ctype.h>\n+#include <assert.h>\n+#include <stdio.h>\n+#include <string.h>\n+#include <unistd.h>\n+#include "bam.h"\n+#include "ksort.h"\n+\n+static int g_is_by_qname = 0;\n+\n+static inline int strnum_cmp(const char *a, const char *b)\n+{\n+\tchar *pa, *pb;\n+\tpa = (char*)a; pb = (char*)b;\n+\twhile (*pa && *pb) {\n+\t\tif (isdigit(*pa) && isdigit(*pb)) {\n+\t\t\tlong ai, bi;\n+\t\t\tai = strtol(pa, &pa, 10);\n+\t\t\tbi = strtol(pb, &pb, 10);\n+\t\t\tif (ai != bi) return ai<bi? -1 : ai>bi? 1 : 0;\n+\t\t} else {\n+\t\t\tif (*pa != *pb) break;\n+\t\t\t++pa; ++pb;\n+\t\t}\n+\t}\n+\tif (*pa == *pb)\n+\t\treturn (pa-a) < (pb-b)? -1 : (pa-a) > (pb-b)? 1 : 0;\n+\treturn *pa<*pb? -1 : *pa>*pb? 1 : 0;\n+}\n+\n+#define HEAP_EMPTY 0xffffffffffffffffull\n+\n+typedef struct {\n+\tint i;\n+\tuint64_t pos, idx;\n+\tbam1_t *b;\n+} heap1_t;\n+\n+#define __pos_cmp(a, b) ((a).pos > (b).pos || ((a).pos == (b).pos && ((a).i > (b).i || ((a).i == (b).i && (a).idx > (b).idx))))\n+\n+static inline int heap_lt(const heap1_t a, const heap1_t b)\n+{\n+\tif (g_is_by_qname) {\n+\t\tint t;\n+\t\tif (a.b == 0 || b.b == 0) return a.b == 0? 1 : 0;\n+\t\tt = strnum_cmp(bam1_qname(a.b), bam1_qname(b.b));\n+\t\treturn (t > 0 || (t == 0 && __pos_cmp(a, b)));\n+\t} else return __pos_cmp(a, b);\n+}\n+\n+KSORT_INIT(heap, heap1_t, heap_lt)\n+\n+static void swap_header_text(bam_header_t *h1, bam_header_t *h2)\n+{\n+\tint tempi;\n+\tchar *temps;\n+\ttempi = h1->l_text, h1->l_text = h2->l_text, h2->l_text = tempi;\n+\ttemps = h1->text, h1->text = h2->text, h2->text = temps;\n+}\n+\n+/*!\n+ @abstract Merge multiple sorted BAM.\n+ @param is_by_qname whether to sort by query name\n+ @param out output BAM file name\n+ @param headers name of SAM file from which to copy \'@\' header lines,\n+ or NULL to copy them from the first file to be merged\n+ @param n number of files to be merged\n+ @param fn names of files to be merged\n+\n+ @discussion Padding information may NOT correctly maintained. This\n+ function is NOT thread safe.\n+ */\n+void bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int add_RG)\n+{\n+\tbamFile fpout, *fp;\n+\theap1_t *heap;\n+\tbam_header_t *hout = 0;\n+\tbam_header_t *hheaders = NULL;\n+\tint i, j, *RG_len = 0;\n+\tuint64_t idx = 0;\n+\tchar **RG = 0;\n+\n+\tif (headers) {\n+\t\ttamFile fpheaders = sam_open(headers);\n+\t\tif (fpheaders == 0) {\n+\t\t\tfprintf(stderr, "[bam_merge_core] Cannot open file `%s\'. Continue anyway.\\n", headers);\n+\t\t} else {\n+\t\t\thheaders = sam_header_read(fpheaders);\n+\t\t\tsam_close(fpheaders);\n+\t\t}\n+\t}\n+\n+\tg_is_by_qname = by_qname;\n+\tfp = (bamFile*)calloc(n, sizeof(bamFile));\n+\theap = (heap1_t*)calloc(n, sizeof(heap1_t));\n+\t// prepare RG tag\n+\tif (add_RG) {\n+\t\tRG = (char**)calloc(n, sizeof(void*));\n+\t\tRG_len = (int*)calloc(n, sizeof(int));\n+\t\tfor (i = 0; i != n; ++i) {\n+\t\t\tint l = strlen(fn[i]);\n+\t\t\tconst char *s = fn[i];\n+\t\t\tif (l > 4 && strcmp(s + l - 4, ".bam") == 0) l -= 4;\n+\t\t\tfor (j = l - 1; j >= 0; --j) if (s[j] == \'/\') break;\n+\t\t\t++j; l -= j;\n+\t\t\tRG[i] = calloc(l + 1, 1);\n+\t\t\tRG_len[i] = l;\n+\t\t\tstrncpy(RG[i], s + j, l);\n+\t\t}\n+\t}\n+\t// read the first\n+\tfor (i = 0; i != n; ++i) {\n+\t\theap1_t *h;\n+\t\tbam_header_t *hin;\n+\t\tfp[i] = bam_open(fn[i], "r");\n+\t\tif (fp[i] == 0) {\n+\t\t\tint j;\n+\t\t\tfprintf(stderr, "[bam_merge_core] fail to open file %s\\n", fn[i]);\n+\t\t\tfor (j = 0; j < i; ++j) bam_close(fp[j]);\n+\t\t\tfree(fp); free(heap);\n+\t\t\t// FIXME: possible memory leak\n+\t\t\treturn;\n+\t\t}\n+\t\thin = bam_header_read(fp[i]);\n+\t\tif (i == 0) { // the first SAM\n+\t\t\thout = hin;\n+\t\t\tif (hheaders) {\n+\t\t\t\t// If the text headers to be swapped in include any @SQ headers,\n+\t\t\t\t// check that they are consistent with the existing binary list\n+\t\t\t\t// of reference information.\n+\t\t\t\tif (hheaders->n_targets > 0) {\n+\t\t\t\t\tif (hout->n_targets != hheaders->n_targets)\n+\t\t\t\t\t\tfprintf(stderr, "[bam_merge_core] number of @SQ headers in `%s\' differs from number of target sequences", headers);\n+\t\t\t\t\tfor (j = 0; j < hout->n_targets; ++j)\n+\t\t\t\t\t\tif (strcmp(hout->target_name[j], hheaders'..b' 0 && (((uint64_t)a->core.tid<<32|a->core.pos) < ((uint64_t)b->core.tid<<32|b->core.pos))));\n+\t} else return (((uint64_t)a->core.tid<<32|a->core.pos) < ((uint64_t)b->core.tid<<32|b->core.pos));\n+}\n+KSORT_INIT(sort, bam1_p, bam1_lt)\n+\n+static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam_header_t *h, int is_stdout)\n+{\n+\tchar *name;\n+\tint i;\n+\tbamFile fp;\n+\tks_mergesort(sort, k, buf, 0);\n+\tname = (char*)calloc(strlen(prefix) + 20, 1);\n+\tif (n >= 0) sprintf(name, "%s.%.4d.bam", prefix, n);\n+\telse sprintf(name, "%s.bam", prefix);\n+\tfp = is_stdout? bam_dopen(fileno(stdout), "w") : bam_open(name, "w");\n+\tif (fp == 0) {\n+\t\tfprintf(stderr, "[sort_blocks] fail to create file %s.\\n", name);\n+\t\tfree(name);\n+\t\t// FIXME: possible memory leak\n+\t\treturn;\n+\t}\n+\tfree(name);\n+\tbam_header_write(fp, h);\n+\tfor (i = 0; i < k; ++i)\n+\t\tbam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data);\n+\tbam_close(fp);\n+}\n+\n+/*!\n+ @abstract Sort an unsorted BAM file based on the chromosome order\n+ and the leftmost position of an alignment\n+\n+ @param is_by_qname whether to sort by query name\n+ @param fn name of the file to be sorted\n+ @param prefix prefix of the output and the temporary files; upon\n+\t sucessess, prefix.bam will be written.\n+ @param max_mem approxiate maximum memory (very inaccurate)\n+\n+ @discussion It may create multiple temporary subalignment files\n+ and then merge them by calling bam_merge_core(). This function is\n+ NOT thread safe.\n+ */\n+void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t max_mem, int is_stdout)\n+{\n+\tint n, ret, k, i;\n+\tsize_t mem;\n+\tbam_header_t *header;\n+\tbamFile fp;\n+\tbam1_t *b, **buf;\n+\n+\tg_is_by_qname = is_by_qname;\n+\tn = k = 0; mem = 0;\n+\tfp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");\n+\tif (fp == 0) {\n+\t\tfprintf(stderr, "[bam_sort_core] fail to open file %s\\n", fn);\n+\t\treturn;\n+\t}\n+\theader = bam_header_read(fp);\n+\tbuf = (bam1_t**)calloc(max_mem / BAM_CORE_SIZE, sizeof(bam1_t*));\n+\t// write sub files\n+\tfor (;;) {\n+\t\tif (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t));\n+\t\tb = buf[k];\n+\t\tif ((ret = bam_read1(fp, b)) < 0) break;\n+\t\tmem += ret;\n+\t\t++k;\n+\t\tif (mem >= max_mem) {\n+\t\t\tsort_blocks(n++, k, buf, prefix, header, 0);\n+\t\t\tmem = 0; k = 0;\n+\t\t}\n+\t}\n+\tif (ret != -1)\n+\t\tfprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\\n");\n+\tif (n == 0) sort_blocks(-1, k, buf, prefix, header, is_stdout);\n+\telse { // then merge\n+\t\tchar **fns, *fnout;\n+\t\tfprintf(stderr, "[bam_sort_core] merging from %d files...\\n", n+1);\n+\t\tsort_blocks(n++, k, buf, prefix, header, 0);\n+\t\tfnout = (char*)calloc(strlen(prefix) + 20, 1);\n+\t\tif (is_stdout) sprintf(fnout, "-");\n+\t\telse sprintf(fnout, "%s.bam", prefix);\n+\t\tfns = (char**)calloc(n, sizeof(char*));\n+\t\tfor (i = 0; i < n; ++i) {\n+\t\t\tfns[i] = (char*)calloc(strlen(prefix) + 20, 1);\n+\t\t\tsprintf(fns[i], "%s.%.4d.bam", prefix, i);\n+\t\t}\n+\t\tbam_merge_core(is_by_qname, fnout, 0, n, fns, 0);\n+\t\tfree(fnout);\n+\t\tfor (i = 0; i < n; ++i) {\n+\t\t\tunlink(fns[i]);\n+\t\t\tfree(fns[i]);\n+\t\t}\n+\t\tfree(fns);\n+\t}\n+\tfor (k = 0; k < max_mem / BAM_CORE_SIZE; ++k) {\n+\t\tif (buf[k]) {\n+\t\t\tfree(buf[k]->data);\n+\t\t\tfree(buf[k]);\n+\t\t}\n+\t}\n+\tfree(buf);\n+\tbam_header_destroy(header);\n+\tbam_close(fp);\n+}\n+\n+void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem)\n+{\n+\tbam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0);\n+}\n+\n+int bam_sort(int argc, char *argv[])\n+{\n+\tsize_t max_mem = 500000000;\n+\tint c, is_by_qname = 0, is_stdout = 0;\n+\twhile ((c = getopt(argc, argv, "nom:")) >= 0) {\n+\t\tswitch (c) {\n+\t\tcase \'o\': is_stdout = 1; break;\n+\t\tcase \'n\': is_by_qname = 1; break;\n+\t\tcase \'m\': max_mem = atol(optarg); break;\n+\t\t}\n+\t}\n+\tif (optind + 2 > argc) {\n+\t\tfprintf(stderr, "Usage: samtools sort [-on] [-m <maxMem>] <in.bam> <out.prefix>\\n");\n+\t\treturn 1;\n+\t}\n+\tbam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout);\n+\treturn 0;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_stat.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_stat.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,78 @@ +#include <unistd.h> +#include <assert.h> +#include "bam.h" + +typedef struct { + long long n_reads, n_mapped, n_pair_all, n_pair_map, n_pair_good; + long long n_sgltn, n_read1, n_read2; + long long n_qcfail, n_dup; + long long n_diffchr, n_diffhigh; +} bam_flagstat_t; + +#define flagstat_loop(s, c) do { \ + ++(s)->n_reads; \ + if ((c)->flag & BAM_FPAIRED) { \ + ++(s)->n_pair_all; \ + if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good; \ + if ((c)->flag & BAM_FREAD1) ++(s)->n_read1; \ + if ((c)->flag & BAM_FREAD2) ++(s)->n_read2; \ + if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn; \ + if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \ + ++(s)->n_pair_map; \ + if ((c)->mtid != (c)->tid) { \ + ++(s)->n_diffchr; \ + if ((c)->qual >= 5) ++(s)->n_diffhigh; \ + } \ + } \ + } \ + if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped; \ + if ((c)->flag & BAM_FQCFAIL) ++(s)->n_qcfail; \ + if ((c)->flag & BAM_FDUP) ++(s)->n_dup; \ + } while (0) + +bam_flagstat_t *bam_flagstat_core(bamFile fp) +{ + bam_flagstat_t *s; + bam1_t *b; + bam1_core_t *c; + int ret; + s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t)); + b = bam_init1(); + c = &b->core; + while ((ret = bam_read1(fp, b)) >= 0) + flagstat_loop(s, c); + bam_destroy1(b); + if (ret != -1) + fprintf(stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n"); + return s; +} +int bam_flagstat(int argc, char *argv[]) +{ + bamFile fp; + bam_header_t *header; + bam_flagstat_t *s; + if (argc == optind) { + fprintf(stderr, "Usage: samtools flagstat <in.bam>\n"); + return 1; + } + fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); + assert(fp); + header = bam_header_read(fp); + s = bam_flagstat_core(fp); + printf("%lld in total\n", s->n_reads); + printf("%lld QC failure\n", s->n_qcfail); + printf("%lld duplicates\n", s->n_dup); + printf("%lld mapped (%.2f%%)\n", s->n_mapped, (float)s->n_mapped / s->n_reads * 100.0); + printf("%lld paired in sequencing\n", s->n_pair_all); + printf("%lld read1\n", s->n_read1); + printf("%lld read2\n", s->n_read2); + printf("%lld properly paired (%.2f%%)\n", s->n_pair_good, (float)s->n_pair_good / s->n_pair_all * 100.0); + printf("%lld with itself and mate mapped\n", s->n_pair_map); + printf("%lld singletons (%.2f%%)\n", s->n_sgltn, (float)s->n_sgltn / s->n_pair_all * 100.0); + printf("%lld with mate mapped to a different chr\n", s->n_diffchr); + printf("%lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh); + free(s); + bam_header_destroy(header); + bam_close(fp); + return 0; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bam_tview.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bam_tview.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,424 @@\n+#undef _HAVE_CURSES\n+\n+#if _CURSES_LIB == 0\n+#elif _CURSES_LIB == 1\n+#include <curses.h>\n+#ifndef NCURSES_VERSION\n+#warning "_CURSES_LIB=1 but NCURSES_VERSION not defined; tview is NOT compiled"\n+#else\n+#define _HAVE_CURSES\n+#endif\n+#elif _CURSES_LIB == 2\n+#include <xcurses.h>\n+#define _HAVE_CURSES\n+#else\n+#warning "_CURSES_LIB is not 0, 1 or 2; tview is NOT compiled"\n+#endif\n+\n+#ifdef _HAVE_CURSES\n+#include <ctype.h>\n+#include <assert.h>\n+#include <string.h>\n+#include "bam.h"\n+#include "faidx.h"\n+#include "bam_maqcns.h"\n+\n+char bam_aux_getCEi(bam1_t *b, int i);\n+char bam_aux_getCSi(bam1_t *b, int i);\n+char bam_aux_getCQi(bam1_t *b, int i);\n+\n+#define TV_MIN_ALNROW 2\n+#define TV_MAX_GOTO 40\n+#define TV_LOW_MAPQ 10\n+\n+#define TV_COLOR_MAPQ 0\n+#define TV_COLOR_BASEQ 1\n+#define TV_COLOR_NUCL 2\n+#define TV_COLOR_COL 3\n+#define TV_COLOR_COLQ 4\n+\n+#define TV_BASE_NUCL 0\n+#define TV_BASE_COLOR_SPACE 1\n+\n+typedef struct {\n+\tint mrow, mcol;\n+\tWINDOW *wgoto, *whelp;\n+\n+\tbam_index_t *idx;\n+\tbam_lplbuf_t *lplbuf;\n+\tbam_header_t *header;\n+\tbamFile fp;\n+\tint curr_tid, left_pos;\n+\tfaidx_t *fai;\n+\tbam_maqcns_t *bmc;\n+\n+\tint ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins, no_skip, show_name;\n+\tchar *ref;\n+} tview_t;\n+\n+int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)\n+{\n+\ttview_t *tv = (tview_t*)data;\n+\tint i, j, c, rb, attr, max_ins = 0;\n+\tuint32_t call = 0;\n+\tif (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen\n+\t// print referece\n+\trb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : \'N\';\n+\tfor (i = tv->last_pos + 1; i < pos; ++i) {\n+\t\tif (i%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", i+1);\n+\t\tc = tv->ref? tv->ref[i - tv->left_pos] : \'N\';\n+\t\tmvaddch(1, tv->ccol++, c);\n+\t}\n+\tif (pos%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", pos+1);\n+\t// print consensus\n+\tcall = bam_maqcns_call(n, pl, tv->bmc);\n+\tattr = A_UNDERLINE;\n+\tc = ",ACMGRSVTWYHKDBN"[call>>28&0xf];\n+\ti = (call>>8&0xff)/10+1;\n+\tif (i > 4) i = 4;\n+\tattr |= COLOR_PAIR(i);\n+\tif (c == toupper(rb)) c = \'.\';\n+\tattron(attr);\n+\tmvaddch(2, tv->ccol, c);\n+\tattroff(attr);\n+\tif(tv->ins) {\n+\t\t// calculate maximum insert\n+\t\tfor (i = 0; i < n; ++i) {\n+\t\t\tconst bam_pileup1_t *p = pl + i;\n+\t\t\tif (p->indel > 0 && max_ins < p->indel) max_ins = p->indel;\n+\t\t}\n+\t}\n+\t// core loop\n+\tfor (j = 0; j <= max_ins; ++j) {\n+\t\tfor (i = 0; i < n; ++i) {\n+\t\t\tconst bam_pileup1_t *p = pl + i;\n+\t\t\tint row = TV_MIN_ALNROW + p->level - tv->row_shift;\n+\t\t\tif (j == 0) {\n+\t\t\t\tif (!p->is_del) {\n+\t\t\t\t\tif (tv->base_for == TV_BASE_COLOR_SPACE && \n+\t\t\t\t\t\t\t(c = bam_aux_getCSi(p->b, p->qpos))) {\n+\t\t\t\t\t\tc = bam_aux_getCSi(p->b, p->qpos);\n+\t\t\t\t\t\t// assume that if we found one color, we will be able to get the color error\n+\t\t\t\t\t\tif (tv->is_dot && \'-\' == bam_aux_getCEi(p->b, p->qpos)) c = bam1_strand(p->b)? \',\' : \'.\';\n+\t\t\t\t\t} else {\n+\t\t\t\t\t\tif (tv->show_name) {\n+\t\t\t\t\t\t\tchar *name = bam1_qname(p->b);\n+\t\t\t\t\t\t\tc = (p->qpos + 1 >= p->b->core.l_qname)? \' \' : name[p->qpos];\n+\t\t\t\t\t\t} else {\n+\t\t\t\t\t\t\tc = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];\n+\t\t\t\t\t\t\tif (tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? \',\' : \'.\';\n+\t\t\t\t\t\t}\n+\t\t\t\t\t}\n+\t\t\t\t} else c = \'*\';\n+\t\t\t} else { // padding\n+\t\t\t\tif (j > p->indel) c = \'*\';\n+\t\t\t\telse { // insertion\n+\t\t\t\t\tif (tv->base_for == TV_BASE_NUCL) {\n+\t\t\t\t\t\tif (tv->show_name) {\n+\t\t\t\t\t\t\tchar *name = bam1_qname(p->b);\n+\t\t\t\t\t\t\tc = (p->qpos + j + 1 >= p->b->core.l_qname)? \' \' : name[p->qpos + j];\n+\t\t\t\t\t\t} else {\n+\t\t\t\t\t\t\tc = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];\n+\t\t\t\t\t\t\tif (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? \',\' : \'.\';\n+\t\t\t\t\t\t}\n+\t\t\t\t\t} else {\n+\t\t\t\t\t\tc = bam_aux_getCSi(p->b, p->qpos + j);\n+\t\t\t\t\t\tif (tv->is_dot && \'-\' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam1_strand(p->b)? \',\' : \'.\';\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t}\n+\t\t\tif (row > TV_MIN_ALNROW && row < tv->mrow) {\n+\t'..b' l = 0;\n+\t\telse if (c == \'\\033\') return;\n+\t\tstr[l] = \'\\0\';\n+\t\tfor (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, \' \');\n+\t\tmvwprintw(tv->wgoto, 1, 8, "%s", str);\n+\t}\n+}\n+\n+static void tv_win_help(tview_t *tv) {\n+\tint r = 1;\n+\tWINDOW *win = tv->whelp;\n+\twborder(win, \'|\', \'|\', \'-\', \'-\', \'+\', \'+\', \'+\', \'+\');\n+\tmvwprintw(win, r++, 2, " -=- Help -=- ");\n+\tr++;\n+\tmvwprintw(win, r++, 2, "? This window");\n+\tmvwprintw(win, r++, 2, "Arrows Small scroll movement");\n+\tmvwprintw(win, r++, 2, "h,j,k,l Small scroll movement");\n+\tmvwprintw(win, r++, 2, "H,J,K,L Large scroll movement");\n+\tmvwprintw(win, r++, 2, "ctrl-H Scroll 1k left");\n+\tmvwprintw(win, r++, 2, "ctrl-L Scroll 1k right");\n+\tmvwprintw(win, r++, 2, "space Scroll one screen");\n+\tmvwprintw(win, r++, 2, "backspace Scroll back one screen");\n+\tmvwprintw(win, r++, 2, "g Go to specific location");\n+\tmvwprintw(win, r++, 2, "m Color for mapping qual");\n+\tmvwprintw(win, r++, 2, "n Color for nucleotide");\n+\tmvwprintw(win, r++, 2, "b Color for base quality");\n+\tmvwprintw(win, r++, 2, "c Color for cs color");\n+\tmvwprintw(win, r++, 2, "z Color for cs qual");\n+\tmvwprintw(win, r++, 2, ". Toggle on/off dot view");\n+\tmvwprintw(win, r++, 2, "s Toggle on/off ref skip");\n+\tmvwprintw(win, r++, 2, "r Toggle on/off rd name");\n+\tmvwprintw(win, r++, 2, "N Turn on nt view");\n+\tmvwprintw(win, r++, 2, "C Turn on cs view");\n+\tmvwprintw(win, r++, 2, "i Toggle on/off ins");\n+\tmvwprintw(win, r++, 2, "q Exit");\n+\tr++;\n+\tmvwprintw(win, r++, 2, "Underline: Secondary or orphan");\n+\tmvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19");\n+\tmvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30");\n+\twrefresh(win);\n+\twgetch(win);\n+}\n+\n+void tv_loop(tview_t *tv)\n+{\n+\tint tid, pos;\n+\ttid = tv->curr_tid; pos = tv->left_pos;\n+\twhile (1) {\n+\t\tint c = getch();\n+\t\tswitch (c) {\n+\t\t\tcase \'?\': tv_win_help(tv); break;\n+\t\t\tcase \'\\033\':\n+\t\t\tcase \'q\': goto end_loop;\n+\t\t\tcase \'/\': \n+\t\t\tcase \'g\': tv_win_goto(tv, &tid, &pos); break;\n+\t\t\tcase \'m\': tv->color_for = TV_COLOR_MAPQ; break;\n+\t\t\tcase \'b\': tv->color_for = TV_COLOR_BASEQ; break;\n+\t\t\tcase \'n\': tv->color_for = TV_COLOR_NUCL; break;\n+\t\t\tcase \'c\': tv->color_for = TV_COLOR_COL; break;\n+\t\t\tcase \'z\': tv->color_for = TV_COLOR_COLQ; break;\n+\t\t\tcase \'s\': tv->no_skip = !tv->no_skip; break;\n+\t\t\tcase \'r\': tv->show_name = !tv->show_name; break;\n+\t\t\tcase KEY_LEFT:\n+\t\t\tcase \'h\': --pos; break;\n+\t\t\tcase KEY_RIGHT:\n+\t\t\tcase \'l\': ++pos; break;\n+\t\t\tcase KEY_SLEFT:\n+\t\t\tcase \'H\': pos -= 20; break;\n+\t\t\tcase KEY_SRIGHT:\n+\t\t\tcase \'L\': pos += 20; break;\n+\t\t\tcase \'.\': tv->is_dot = !tv->is_dot; break;\n+\t\t\tcase \'N\': tv->base_for = TV_BASE_NUCL; break;\n+\t\t\tcase \'C\': tv->base_for = TV_BASE_COLOR_SPACE; break;\n+\t\t\tcase \'i\': tv->ins = !tv->ins; break;\n+\t\t\tcase \'\\010\': pos -= 1000; break;\n+\t\t\tcase \'\\014\': pos += 1000; break;\n+\t\t\tcase \' \': pos += tv->mcol; break;\n+\t\t\tcase KEY_UP:\n+\t\t\tcase \'j\': --tv->row_shift; break;\n+\t\t\tcase KEY_DOWN:\n+\t\t\tcase \'k\': ++tv->row_shift; break;\n+\t\t\tcase KEY_BACKSPACE:\n+\t\t\tcase \'\\177\': pos -= tv->mcol; break;\n+\t\t\tcase KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break;\n+\t\t\tdefault: continue;\n+\t\t}\n+\t\tif (pos < 0) pos = 0;\n+\t\tif (tv->row_shift < 0) tv->row_shift = 0;\n+\t\ttv_draw_aln(tv, tid, pos);\n+\t}\n+end_loop:\n+\treturn;\n+}\n+\n+int bam_tview_main(int argc, char *argv[])\n+{\n+\ttview_t *tv;\n+\tif (argc == 1) {\n+\t\tfprintf(stderr, "Usage: bamtk tview <aln.bam> [ref.fasta]\\n");\n+\t\treturn 1;\n+\t}\n+\ttv = tv_init(argv[1], (argc == 2)? 0 : argv[2]);\n+\ttv_draw_aln(tv, 0, 0);\n+\ttv_loop(tv);\n+\ttv_destroy(tv);\n+\treturn 0;\n+}\n+#else // #ifdef _HAVE_CURSES\n+#include <stdio.h>\n+#warning "No curses library is available; tview is disabled."\n+int bam_tview_main(int argc, char *argv[])\n+{\n+\tfprintf(stderr, "[bam_tview_main] The ncurses library is unavailable; tview is not compiled.\\n");\n+\treturn 1;\n+}\n+#endif // #ifdef _HAVE_CURSES\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bgzf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bgzf.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,671 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology\n+\n+ Permission is hereby granted, free of charge, to any person obtaining a copy\n+ of this software and associated documentation files (the "Software"), to deal\n+ in the Software without restriction, including without limitation the rights\n+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ copies of the Software, and to permit persons to whom the Software is\n+ furnished to do so, subject to the following conditions:\n+\n+ The above copyright notice and this permission notice shall be included in\n+ all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n+ THE SOFTWARE.\n+*/\n+\n+/*\n+ 2009-06-29 by lh3: cache recent uncompressed blocks.\n+ 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.\n+ 2009-06-12 by lh3: support a mode string like "wu" where \'u\' for uncompressed output */\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <unistd.h>\n+#include <fcntl.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include "bgzf.h"\n+\n+#include "khash.h"\n+typedef struct {\n+\tint size;\n+\tuint8_t *block;\n+\tint64_t end_offset;\n+} cache_t;\n+KHASH_MAP_INIT_INT64(cache, cache_t)\n+\n+#if defined(_WIN32) || defined(_MSC_VER)\n+#define ftello(fp) ftell(fp)\n+#define fseeko(fp, offset, whence) fseek(fp, offset, whence)\n+#else\n+extern off_t ftello(FILE *stream);\n+extern int fseeko(FILE *stream, off_t offset, int whence);\n+#endif\n+\n+typedef int8_t bgzf_byte_t;\n+\n+static const int DEFAULT_BLOCK_SIZE = 64 * 1024;\n+static const int MAX_BLOCK_SIZE = 64 * 1024;\n+\n+static const int BLOCK_HEADER_LENGTH = 18;\n+static const int BLOCK_FOOTER_LENGTH = 8;\n+\n+static const int GZIP_ID1 = 31;\n+static const int GZIP_ID2 = 139;\n+static const int CM_DEFLATE = 8;\n+static const int FLG_FEXTRA = 4;\n+static const int OS_UNKNOWN = 255;\n+static const int BGZF_ID1 = 66; // \'B\'\n+static const int BGZF_ID2 = 67; // \'C\'\n+static const int BGZF_LEN = 2;\n+static const int BGZF_XLEN = 6; // BGZF_LEN+4\n+\n+static const int GZIP_WINDOW_BITS = -15; // no zlib header\n+static const int Z_DEFAULT_MEM_LEVEL = 8;\n+\n+\n+inline\n+void\n+packInt16(uint8_t* buffer, uint16_t value)\n+{\n+ buffer[0] = value;\n+ buffer[1] = value >> 8;\n+}\n+\n+inline\n+int\n+unpackInt16(const uint8_t* buffer)\n+{\n+ return (buffer[0] | (buffer[1] << 8));\n+}\n+\n+inline\n+void\n+packInt32(uint8_t* buffer, uint32_t value)\n+{\n+ buffer[0] = value;\n+ buffer[1] = value >> 8;\n+ buffer[2] = value >> 16;\n+ buffer[3] = value >> 24;\n+}\n+\n+static inline\n+int\n+bgzf_min(int x, int y)\n+{\n+ return (x < y) ? x : y;\n+}\n+\n+static\n+void\n+report_error(BGZF* fp, const char* message) {\n+ fp->error = message;\n+}\n+\n+static BGZF *bgzf_read_init()\n+{\n+\tBGZF *fp;\n+\tfp = calloc(1, sizeof(BGZF));\n+ fp->uncompressed_block_size = MAX_BLOCK_SIZE;\n+ fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);\n+ fp->compressed_block_size = MAX_BLOCK_SIZE;\n+ fp->compressed_block = malloc(MAX_BLOCK_SIZE);\n+\tfp->cache_size = 0;\n+\tfp->cache = kh_init(cache);\n+\treturn fp;\n+}\n+\n+static\n+BGZF*\n+open_read(int fd)\n+{\n+#ifdef _USE_KNETFILE\n+ knetFile *file = knet_dopen(fd, "r");\n+#else\n+ FILE* file = fdopen(fd, "r");\n+#endif\n+ BGZF* fp;\n+\tif (file == 0) return 0;\n+\tfp = bgzf_read_init();\n+ fp->file_descriptor = fd;\n+ fp->open_mode = \'r\';\n+#ifdef _USE_KNETFILE\n+ fp->x.fpr = file;\n+#else\n+ fp->file = file;\n+#endif\n+ return fp;\n+}\n+\n+static\n+BGZF*\n+open_write(int'..b'_length = deflate_block(fp, fp->block_offset);\n+ if (block_length < 0) return -1;\n+#ifdef _USE_KNETFILE\n+ count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);\n+#else\n+ count = fwrite(fp->compressed_block, 1, block_length, fp->file);\n+#endif\n+ if (count != block_length) {\n+ report_error(fp, "write failed");\n+ return -1;\n+ }\n+ fp->block_address += block_length;\n+ }\n+ return 0;\n+}\n+\n+int bgzf_flush_try(BGZF *fp, int size)\n+{\n+\tif (fp->block_offset + size > fp->uncompressed_block_size)\n+\t\treturn bgzf_flush(fp);\n+\treturn -1;\n+}\n+\n+int bgzf_write(BGZF* fp, const void* data, int length)\n+{\n+ if (fp->open_mode != \'w\') {\n+ report_error(fp, "file not open for writing");\n+ return -1;\n+ }\n+\n+ if (fp->uncompressed_block == NULL)\n+ fp->uncompressed_block = malloc(fp->uncompressed_block_size);\n+\n+ const bgzf_byte_t* input = data;\n+ int block_length = fp->uncompressed_block_size;\n+ int bytes_written = 0;\n+ while (bytes_written < length) {\n+ int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);\n+ bgzf_byte_t* buffer = fp->uncompressed_block;\n+ memcpy(buffer + fp->block_offset, input, copy_length);\n+ fp->block_offset += copy_length;\n+ input += copy_length;\n+ bytes_written += copy_length;\n+ if (fp->block_offset == block_length) {\n+ if (bgzf_flush(fp) != 0) {\n+ break;\n+ }\n+ }\n+ }\n+ return bytes_written;\n+}\n+\n+int bgzf_close(BGZF* fp)\n+{\n+ if (fp->open_mode == \'w\') {\n+ if (bgzf_flush(fp) != 0) return -1;\n+\t\t{ // add an empty block\n+\t\t\tint count, block_length = deflate_block(fp, 0);\n+#ifdef _USE_KNETFILE\n+\t\t\tcount = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);\n+#else\n+\t\t\tcount = fwrite(fp->compressed_block, 1, block_length, fp->file);\n+#endif\n+\t\t}\n+#ifdef _USE_KNETFILE\n+ if (fflush(fp->x.fpw) != 0) {\n+#else\n+ if (fflush(fp->file) != 0) {\n+#endif\n+ report_error(fp, "flush failed");\n+ return -1;\n+ }\n+ }\n+ if (fp->owned_file) {\n+#ifdef _USE_KNETFILE\n+\t\tint ret;\n+\t\tif (fp->open_mode == \'w\') ret = fclose(fp->x.fpw);\n+\t\telse ret = knet_close(fp->x.fpr);\n+ if (ret != 0) return -1;\n+#else\n+ if (fclose(fp->file) != 0) return -1;\n+#endif\n+ }\n+ free(fp->uncompressed_block);\n+ free(fp->compressed_block);\n+\tfree_cache(fp);\n+ free(fp);\n+ return 0;\n+}\n+\n+void bgzf_set_cache_size(BGZF *fp, int cache_size)\n+{\n+\tif (fp) fp->cache_size = cache_size;\n+}\n+\n+int bgzf_check_EOF(BGZF *fp)\n+{\n+\tstatic uint8_t magic[28] = "\\037\\213\\010\\4\\0\\0\\0\\0\\0\\377\\6\\0\\102\\103\\2\\0\\033\\0\\3\\0\\0\\0\\0\\0\\0\\0\\0\\0";\n+\tuint8_t buf[28];\n+\toff_t offset;\n+#ifdef _USE_KNETFILE\n+\toffset = knet_tell(fp->x.fpr);\n+\tif (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;\n+\tknet_read(fp->x.fpr, buf, 28);\n+\tknet_seek(fp->x.fpr, offset, SEEK_SET);\n+#else\n+\toffset = ftello(fp->file);\n+\tif (fseeko(fp->file, -28, SEEK_END) != 0) return -1;\n+\tfread(buf, 1, 28, fp->file);\n+\tfseeko(fp->file, offset, SEEK_SET);\n+#endif\n+\treturn (memcmp(magic, buf, 28) == 0)? 1 : 0;\n+}\n+\n+int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)\n+{\n+\tint block_offset;\n+\tint64_t block_address;\n+\n+ if (fp->open_mode != \'r\') {\n+ report_error(fp, "file not open for read");\n+ return -1;\n+ }\n+ if (where != SEEK_SET) {\n+ report_error(fp, "unimplemented seek option");\n+ return -1;\n+ }\n+ block_offset = pos & 0xFFFF;\n+ block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;\n+#ifdef _USE_KNETFILE\n+ if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {\n+#else\n+ if (fseeko(fp->file, block_address, SEEK_SET) != 0) {\n+#endif\n+ report_error(fp, "seek failed");\n+ return -1;\n+ }\n+ fp->block_length = 0; // indicates current block is not loaded\n+ fp->block_address = block_address;\n+ fp->block_offset = block_offset;\n+ return 0;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/bgzf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/bgzf.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,157 @@ +/* The MIT License + + Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#ifndef __BGZF_H +#define __BGZF_H + +#include <stdint.h> +#include <stdio.h> +#include <stdbool.h> +#include <zlib.h> +#ifdef _USE_KNETFILE +#include "knetfile.h" +#endif + +//typedef int8_t bool; + +typedef struct { + int file_descriptor; + char open_mode; // 'r' or 'w' + bool owned_file, is_uncompressed; +#ifdef _USE_KNETFILE + union { + knetFile *fpr; + FILE *fpw; + } x; +#else + FILE* file; +#endif + int uncompressed_block_size; + int compressed_block_size; + void* uncompressed_block; + void* compressed_block; + int64_t block_address; + int block_length; + int block_offset; + int cache_size; + const char* error; + void *cache; // a pointer to a hash table +} BGZF; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Open an existing file descriptor for reading or writing. + * Mode must be either "r" or "w". + * A subsequent bgzf_close will not close the file descriptor. + * Returns null on error. + */ +BGZF* bgzf_fdopen(int fd, const char* __restrict mode); + +/* + * Open the specified file for reading or writing. + * Mode must be either "r" or "w". + * Returns null on error. + */ +BGZF* bgzf_open(const char* path, const char* __restrict mode); + +/* + * Close the BGZ file and free all associated resources. + * Does not close the underlying file descriptor if created with bgzf_fdopen. + * Returns zero on success, -1 on error. + */ +int bgzf_close(BGZF* fp); + +/* + * Read up to length bytes from the file storing into data. + * Returns the number of bytes actually read. + * Returns zero on end of file. + * Returns -1 on error. + */ +int bgzf_read(BGZF* fp, void* data, int length); + +/* + * Write length bytes from data to the file. + * Returns the number of bytes written. + * Returns -1 on error. + */ +int bgzf_write(BGZF* fp, const void* data, int length); + +/* + * Return a virtual file pointer to the current location in the file. + * No interpetation of the value should be made, other than a subsequent + * call to bgzf_seek can be used to position the file at the same point. + * Return value is non-negative on success. + * Returns -1 on error. + */ +#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) + +/* + * Set the file to read from the location specified by pos, which must + * be a value previously returned by bgzf_tell for this file (but not + * necessarily one returned by this file handle). + * The where argument must be SEEK_SET. + * Seeking on a file opened for write is not supported. + * Returns zero on success, -1 on error. + */ +int64_t bgzf_seek(BGZF* fp, int64_t pos, int where); + +/* + * Set the cache size. Zero to disable. By default, caching is + * disabled. The recommended cache size for frequent random access is + * about 8M bytes. + */ +void bgzf_set_cache_size(BGZF *fp, int cache_size); + +int bgzf_check_EOF(BGZF *fp); +int bgzf_read_block(BGZF* fp); +int bgzf_flush(BGZF* fp); +int bgzf_flush_try(BGZF *fp, int size); + +#ifdef __cplusplus +} +#endif + +static inline int bgzf_getc(BGZF *fp) +{ + int c; + if (fp->block_offset >= fp->block_length) { + if (bgzf_read_block(fp) != 0) return -2; /* error */ + if (fp->block_length == 0) return -1; /* end-of-file */ + } + c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; + if (fp->block_offset == fp->block_length) { +#ifdef _USE_KNETFILE + fp->block_address = knet_tell(fp->x.fpr); +#else + fp->block_address = ftello(fp->file); +#endif + fp->block_offset = 0; + fp->block_length = 0; + } + return c; +} + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/faidx.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/faidx.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,422 @@\n+#include <ctype.h>\n+#include <string.h>\n+#include <stdlib.h>\n+#include <stdio.h>\n+#include "faidx.h"\n+#include "khash.h"\n+\n+typedef struct {\n+\tuint64_t len:32, line_len:16, line_blen:16;\n+\tuint64_t offset;\n+} faidx1_t;\n+KHASH_MAP_INIT_STR(s, faidx1_t)\n+\n+#ifndef _NO_RAZF\n+#include "razf.h"\n+#else\n+#ifdef _WIN32\n+#define ftello(fp) ftell(fp)\n+#define fseeko(fp, offset, whence) fseek(fp, offset, whence)\n+#else\n+extern off_t ftello(FILE *stream);\n+extern int fseeko(FILE *stream, off_t offset, int whence);\n+#endif\n+#define RAZF FILE\n+#define razf_read(fp, buf, size) fread(buf, 1, size, fp)\n+#define razf_open(fn, mode) fopen(fn, mode)\n+#define razf_close(fp) fclose(fp)\n+#define razf_seek(fp, offset, whence) fseeko(fp, offset, whence)\n+#define razf_tell(fp) ftello(fp)\n+#endif\n+#ifdef _USE_KNETFILE\n+#include "knetfile.h"\n+#endif\n+\n+struct __faidx_t {\n+\tRAZF *rz;\n+\tint n, m;\n+\tchar **name;\n+\tkhash_t(s) *hash;\n+};\n+\n+#ifndef kroundup32\n+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))\n+#endif\n+\n+static inline void fai_insert_index(faidx_t *idx, const char *name, int len, int line_len, int line_blen, uint64_t offset)\n+{\n+\tkhint_t k;\n+\tint ret;\n+\tfaidx1_t t;\n+\tif (idx->n == idx->m) {\n+\t\tidx->m = idx->m? idx->m<<1 : 16;\n+\t\tidx->name = (char**)realloc(idx->name, sizeof(void*) * idx->m);\n+\t}\n+\tidx->name[idx->n] = strdup(name);\n+\tk = kh_put(s, idx->hash, idx->name[idx->n], &ret);\n+\tt.len = len; t.line_len = line_len; t.line_blen = line_blen; t.offset = offset;\n+\tkh_value(idx->hash, k) = t;\n+\t++idx->n;\n+}\n+\n+faidx_t *fai_build_core(RAZF *rz)\n+{\n+\tchar c, *name;\n+\tint l_name, m_name, ret;\n+\tint len, line_len, line_blen, state;\n+\tint l1, l2;\n+\tfaidx_t *idx;\n+\tuint64_t offset;\n+\n+\tidx = (faidx_t*)calloc(1, sizeof(faidx_t));\n+\tidx->hash = kh_init(s);\n+\tname = 0; l_name = m_name = 0;\n+\tlen = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;\n+\twhile (razf_read(rz, &c, 1)) {\n+\t\tif (c == \'\\n\') { // an empty line\n+\t\t\tif (state == 1) {\n+\t\t\t\toffset = razf_tell(rz);\n+\t\t\t\tcontinue;\n+\t\t\t} else if ((state == 0 && len < 0) || state == 2) continue;\n+\t\t}\n+\t\tif (c == \'>\') { // fasta header\n+\t\t\tif (len >= 0)\n+\t\t\t\tfai_insert_index(idx, name, len, line_len, line_blen, offset);\n+\t\t\tl_name = 0;\n+\t\t\twhile ((ret = razf_read(rz, &c, 1)) != 0 && !isspace(c)) {\n+\t\t\t\tif (m_name < l_name + 2) {\n+\t\t\t\t\tm_name = l_name + 2;\n+\t\t\t\t\tkroundup32(m_name);\n+\t\t\t\t\tname = (char*)realloc(name, m_name);\n+\t\t\t\t}\n+\t\t\t\tname[l_name++] = c;\n+\t\t\t}\n+\t\t\tname[l_name] = \'\\0\';\n+\t\t\tif (ret == 0) {\n+\t\t\t\tfprintf(stderr, "[fai_build_core] the last entry has no sequence\\n");\n+\t\t\t\tfree(name); fai_destroy(idx);\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t\tif (c != \'\\n\') while (razf_read(rz, &c, 1) && c != \'\\n\');\n+\t\t\tstate = 1; len = 0;\n+\t\t\toffset = razf_tell(rz);\n+\t\t} else {\n+\t\t\tif (state == 3) {\n+\t\t\t\tfprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence \'%s\'.\\n", name);\n+\t\t\t\tfree(name); fai_destroy(idx);\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t\tif (state == 2) state = 3;\n+\t\t\tl1 = l2 = 0;\n+\t\t\tdo {\n+\t\t\t\t++l1;\n+\t\t\t\tif (isgraph(c)) ++l2;\n+\t\t\t} while ((ret = razf_read(rz, &c, 1)) && c != \'\\n\');\n+\t\t\tif (state == 3 && l2) {\n+\t\t\t\tfprintf(stderr, "[fai_build_core] different line length in sequence \'%s\'.\\n", name);\n+\t\t\t\tfree(name); fai_destroy(idx);\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t\t++l1; len += l2;\n+\t\t\tif (l2 >= 0x10000) {\n+\t\t\t\tfprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence \'%s\'.\\n", name);\n+\t\t\t\tfree(name); fai_destroy(idx);\n+\t\t\t\treturn 0;\n+\t\t\t}\n+\t\t\tif (state == 1) line_len = l1, line_blen = l2, state = 0;\n+\t\t\telse if (state == 0) {\n+\t\t\t\tif (l1 != line_len || l2 != line_blen) state = 2;\n+\t\t\t}\n+\t\t}\n+\t}\n+\tfai_insert_index(idx, name, len, line_len, line_blen, offset);\n+\tfree(name);\n+\treturn idx;\n+}\n+\n+void fai_save(const faidx_t *fai, FILE *fp)\n+{\n+\tkhint_t k;\n+\tint i;\n+\tfor (i = 0; i < fai->n; ++i) {\n+\t\tfaidx1_t x;\n+\t\tk = kh_get(s, fai->hash, fai->name[i]);\n+\t\tx = kh_value(fai->hash, k);\n+#ifdef _WIN32\n+\t\tfprintf(fp, "%s\\t%'..b' knet_close(fp_remote);\n+\n+ return fopen(fn, "r");\n+}\n+#endif\n+\n+faidx_t *fai_load(const char *fn)\n+{\n+\tchar *str;\n+\tFILE *fp;\n+\tfaidx_t *fai;\n+\tstr = (char*)calloc(strlen(fn) + 5, 1);\n+\tsprintf(str, "%s.fai", fn);\n+\n+#ifdef _USE_KNETFILE\n+ if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)\n+ {\n+ fp = download_and_open(str);\n+ if ( !fp )\n+ {\n+ fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\\n", str);\n+ free(str);\n+ return 0;\n+ }\n+ }\n+ else\n+#endif\n+ fp = fopen(str, "rb");\n+\tif (fp == 0) {\n+\t\tfprintf(stderr, "[fai_load] build FASTA index.\\n");\n+\t\tfai_build(fn);\n+\t\tfp = fopen(str, "rb");\n+\t\tif (fp == 0) {\n+\t\t\tfprintf(stderr, "[fai_load] fail to open FASTA index.\\n");\n+\t\t\tfree(str);\n+\t\t\treturn 0;\n+\t\t}\n+\t}\n+\n+\tfai = fai_read(fp);\n+\tfclose(fp);\n+\n+\tfai->rz = razf_open(fn, "rb");\n+\tfree(str);\n+\tif (fai->rz == 0) {\n+\t\tfprintf(stderr, "[fai_load] fail to open FASTA file.\\n");\n+\t\treturn 0;\n+\t}\n+\treturn fai;\n+}\n+\n+char *fai_fetch(const faidx_t *fai, const char *str, int *len)\n+{\n+\tchar *s, *p, c;\n+\tint i, l, k;\n+\tkhiter_t iter;\n+\tfaidx1_t val;\n+\tkhash_t(s) *h;\n+\tint beg, end;\n+\n+\tbeg = end = -1;\n+\th = fai->hash;\n+\tl = strlen(str);\n+\tp = s = (char*)malloc(l+1);\n+\t/* squeeze out "," */\n+\tfor (i = k = 0; i != l; ++i)\n+\t\tif (str[i] != \',\' && !isspace(str[i])) s[k++] = str[i];\n+\ts[k] = 0;\n+\tfor (i = 0; i != k; ++i) if (s[i] == \':\') break;\n+\ts[i] = 0;\n+\titer = kh_get(s, h, s); /* get the ref_id */\n+\tif (iter == kh_end(h)) {\n+\t\t*len = 0;\n+\t\tfree(s); return 0;\n+\t}\n+\tval = kh_value(h, iter);\n+\tif (i == k) { /* dump the whole sequence */\n+\t\tbeg = 0; end = val.len;\n+\t} else {\n+\t\tfor (p = s + i + 1; i != k; ++i) if (s[i] == \'-\') break;\n+\t\tbeg = atoi(p);\n+\t\tif (i < k) {\n+\t\t\tp = s + i + 1;\n+\t\t\tend = atoi(p);\n+\t\t} else end = val.len;\n+\t}\n+\tif (beg > 0) --beg;\n+\tif (beg >= val.len) beg = val.len;\n+\tif (end >= val.len) end = val.len;\n+\tif (beg > end) beg = end;\n+\tfree(s);\n+\n+\t// now retrieve the sequence\n+\tl = 0;\n+\ts = (char*)malloc(end - beg + 2);\n+\trazf_seek(fai->rz, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET);\n+\twhile (razf_read(fai->rz, &c, 1) == 1 && l < end - beg && !fai->rz->z_err)\n+\t\tif (isgraph(c)) s[l++] = c;\n+\ts[l] = \'\\0\';\n+\t*len = l;\n+\treturn s;\n+}\n+\n+int faidx_main(int argc, char *argv[])\n+{\n+\tif (argc == 1) {\n+\t\tfprintf(stderr, "Usage: faidx <in.fasta> [<reg> [...]]\\n");\n+\t\treturn 1;\n+\t} else {\n+\t\tif (argc == 2) fai_build(argv[1]);\n+\t\telse {\n+\t\t\tint i, j, k, l;\n+\t\t\tchar *s;\n+\t\t\tfaidx_t *fai;\n+\t\t\tfai = fai_load(argv[1]);\n+\t\t\tif (fai == 0) return 1;\n+\t\t\tfor (i = 2; i != argc; ++i) {\n+\t\t\t\tprintf(">%s\\n", argv[i]);\n+\t\t\t\ts = fai_fetch(fai, argv[i], &l);\n+\t\t\t\tfor (j = 0; j < l; j += 60) {\n+\t\t\t\t\tfor (k = 0; k < 60 && k < l - j; ++k)\n+\t\t\t\t\t\tputchar(s[j + k]);\n+\t\t\t\t\tputchar(\'\\n\');\n+\t\t\t\t}\n+\t\t\t\tfree(s);\n+\t\t\t}\n+\t\t\tfai_destroy(fai);\n+\t\t}\n+\t}\n+\treturn 0;\n+}\n+\n+int faidx_fetch_nseq(const faidx_t *fai) \n+{\n+\treturn fai->n;\n+}\n+\n+char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len)\n+{\n+\tint l;\n+\tchar c;\n+ khiter_t iter;\n+ faidx1_t val;\n+\tchar *seq=NULL;\n+\n+ // Adjust position\n+ iter = kh_get(s, fai->hash, c_name);\n+ if(iter == kh_end(fai->hash)) return 0;\n+ val = kh_value(fai->hash, iter);\n+\tif(p_end_i < p_beg_i) p_beg_i = p_end_i;\n+ if(p_beg_i < 0) p_beg_i = 0;\n+ else if(val.len <= p_beg_i) p_beg_i = val.len - 1;\n+ if(p_end_i < 0) p_end_i = 0;\n+ else if(val.len <= p_end_i) p_end_i = val.len - 1;\n+\n+ // Now retrieve the sequence \n+\tl = 0;\n+\tseq = (char*)malloc(p_end_i - p_beg_i + 2);\n+\trazf_seek(fai->rz, val.offset + p_beg_i / val.line_blen * val.line_len + p_beg_i % val.line_blen, SEEK_SET);\n+\twhile (razf_read(fai->rz, &c, 1) == 1 && l < p_end_i - p_beg_i + 1)\n+\t\tif (isgraph(c)) seq[l++] = c;\n+\tseq[l] = \'\\0\';\n+\t*len = l;\n+\treturn seq;\n+}\n+\n+#ifdef FAIDX_MAIN\n+int main(int argc, char *argv[]) { return faidx_main(argc, argv); }\n+#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/faidx.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/faidx.h Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,103 @@ +/* The MIT License + + Copyright (c) 2008 Genome Research Ltd (GRL). + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* Contact: Heng Li <lh3@sanger.ac.uk> */ + +#ifndef FAIDX_H +#define FAIDX_H + +/*! + @header + + Index FASTA files and extract subsequence. + + @copyright The Wellcome Trust Sanger Institute. + */ + +struct __faidx_t; +typedef struct __faidx_t faidx_t; + +#ifdef __cplusplus +extern "C" { +#endif + + /*! + @abstract Build index for a FASTA or razip compressed FASTA file. + @param fn FASTA file name + @return 0 on success; or -1 on failure + @discussion File "fn.fai" will be generated. + */ + int fai_build(const char *fn); + + /*! + @abstract Distroy a faidx_t struct. + @param fai Pointer to the struct to be destroyed + */ + void fai_destroy(faidx_t *fai); + + /*! + @abstract Load index from "fn.fai". + @param fn File name of the FASTA file + */ + faidx_t *fai_load(const char *fn); + + /*! + @abstract Fetch the sequence in a region. + @param fai Pointer to the faidx_t struct + @param reg Region in the format "chr2:20,000-30,000" + @param len Length of the region + @return Pointer to the sequence; null on failure + + @discussion The returned sequence is allocated by malloc family + and should be destroyed by end users by calling free() on it. + */ + char *fai_fetch(const faidx_t *fai, const char *reg, int *len); + + /*! + @abstract Fetch the number of sequences. + @param fai Pointer to the faidx_t struct + @return The number of sequences + */ + int faidx_fetch_nseq(const faidx_t *fai); + + /*! + @abstract Fetch the sequence in a region. + @param fai Pointer to the faidx_t struct + @param c_name Region name + @param p_beg_i Beginning position number (zero-based) + @param p_end_i End position number (zero-based) + @param len Length of the region + @return Pointer to the sequence; null on failure + + @discussion The returned sequence is allocated by malloc family + and should be destroyed by end users by calling free() on it. + */ + char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/glf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/glf.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,236 @@ +#include <string.h> +#include <stdlib.h> +#include "glf.h" + +#ifdef _NO_BGZF +// then alias bgzf_*() functions +#endif + +static int glf3_is_BE = 0; + +static inline uint32_t bam_swap_endian_4(uint32_t v) +{ + v = ((v & 0x0000FFFFU) << 16) | (v >> 16); + return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); +} + +static inline uint16_t bam_swap_endian_2(uint16_t v) +{ + return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); +} + +static inline int bam_is_big_endian() +{ + long one= 1; + return !(*((char *)(&one))); +} + +glf3_header_t *glf3_header_init() +{ + glf3_is_BE = bam_is_big_endian(); + return (glf3_header_t*)calloc(1, sizeof(glf3_header_t)); +} + +glf3_header_t *glf3_header_read(glfFile fp) +{ + glf3_header_t *h; + char magic[4]; + h = glf3_header_init(); + bgzf_read(fp, magic, 4); + if (strncmp(magic, "GLF\3", 4)) { + fprintf(stderr, "[glf3_header_read] invalid magic.\n"); + glf3_header_destroy(h); + return 0; + } + bgzf_read(fp, &h->l_text, 4); + if (glf3_is_BE) h->l_text = bam_swap_endian_4(h->l_text); + if (h->l_text) { + h->text = (uint8_t*)calloc(h->l_text + 1, 1); + bgzf_read(fp, h->text, h->l_text); + } + return h; +} + +void glf3_header_write(glfFile fp, const glf3_header_t *h) +{ + int32_t x; + bgzf_write(fp, "GLF\3", 4); + x = glf3_is_BE? bam_swap_endian_4(h->l_text) : h->l_text; + bgzf_write(fp, &x, 4); + if (h->l_text) bgzf_write(fp, h->text, h->l_text); +} + +void glf3_header_destroy(glf3_header_t *h) +{ + free(h->text); + free(h); +} + +char *glf3_ref_read(glfFile fp, int *len) +{ + int32_t n, x; + char *str; + *len = 0; + if (bgzf_read(fp, &n, 4) != 4) return 0; + if (glf3_is_BE) n = bam_swap_endian_4(n); + if (n < 0) { + fprintf(stderr, "[glf3_ref_read] invalid reference name length: %d.\n", n); + return 0; + } + str = (char*)calloc(n + 1, 1); // not necesarily n+1 in fact + x = bgzf_read(fp, str, n); + x += bgzf_read(fp, len, 4); + if (x != n + 4) { + free(str); *len = -1; return 0; // truncated + } + if (glf3_is_BE) *len = bam_swap_endian_4(*len); + return str; +} + +void glf3_ref_write(glfFile fp, const char *str, int len) +{ + int32_t m, n = strlen(str) + 1; + m = glf3_is_BE? bam_swap_endian_4(n) : n; + bgzf_write(fp, &m, 4); + bgzf_write(fp, str, n); + if (glf3_is_BE) len = bam_swap_endian_4(len); + bgzf_write(fp, &len, 4); +} + +void glf3_view1(const char *ref_name, const glf3_t *g3, int pos) +{ + int j; + if (g3->rtype == GLF3_RTYPE_END) return; + printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, pos + 1, + g3->rtype == GLF3_RTYPE_INDEL? '*' : "XACMGRSVTWYHKDBN"[g3->ref_base], + g3->depth, g3->rms_mapQ, g3->min_lk); + if (g3->rtype == GLF3_RTYPE_SUB) + for (j = 0; j != 10; ++j) printf("\t%d", g3->lk[j]); + else { + printf("\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t", g3->lk[0], g3->lk[1], g3->lk[2], g3->indel_len[0], g3->indel_len[1], + g3->indel_len[0]? g3->indel_seq[0] : "*", g3->indel_len[1]? g3->indel_seq[1] : "*"); + } + printf("\n"); +} + +int glf3_write1(glfFile fp, const glf3_t *g3) +{ + int r; + uint8_t c; + uint32_t y[2]; + c = g3->rtype<<4 | g3->ref_base; + r = bgzf_write(fp, &c, 1); + if (g3->rtype == GLF3_RTYPE_END) return r; + y[0] = g3->offset; + y[1] = g3->min_lk<<24 | g3->depth; + if (glf3_is_BE) { + y[0] = bam_swap_endian_4(y[0]); + y[1] = bam_swap_endian_4(y[1]); + } + r += bgzf_write(fp, y, 8); + r += bgzf_write(fp, &g3->rms_mapQ, 1); + if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_write(fp, g3->lk, 10); + else { + int16_t x[2]; + r += bgzf_write(fp, g3->lk, 3); + x[0] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[0]) : g3->indel_len[0]; + x[1] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[1]) : g3->indel_len[1]; + r += bgzf_write(fp, x, 4); + if (g3->indel_len[0]) r += bgzf_write(fp, g3->indel_seq[0], abs(g3->indel_len[0])); + if (g3->indel_len[1]) r += bgzf_write(fp, g3->indel_seq[1], abs(g3->indel_len[1])); + } + return r; +} + +#ifndef kv_roundup32 +#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +int glf3_read1(glfFile fp, glf3_t *g3) +{ + int r; + uint8_t c; + uint32_t y[2]; + r = bgzf_read(fp, &c, 1); + if (r == 0) return 0; + g3->ref_base = c & 0xf; + g3->rtype = c>>4; + if (g3->rtype == GLF3_RTYPE_END) return r; + r += bgzf_read(fp, y, 8); + if (glf3_is_BE) { + y[0] = bam_swap_endian_4(y[0]); + y[1] = bam_swap_endian_4(y[1]); + } + g3->offset = y[0]; + g3->min_lk = y[1]>>24; + g3->depth = y[1]<<8>>8; + r += bgzf_read(fp, &g3->rms_mapQ, 1); + if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_read(fp, g3->lk, 10); + else { + int16_t x[2], max; + r += bgzf_read(fp, g3->lk, 3); + r += bgzf_read(fp, x, 4); + if (glf3_is_BE) { + x[0] = bam_swap_endian_2(x[0]); + x[1] = bam_swap_endian_2(x[1]); + } + g3->indel_len[0] = x[0]; + g3->indel_len[1] = x[1]; + x[0] = abs(x[0]); x[1] = abs(x[1]); + max = (x[0] > x[1]? x[0] : x[1]) + 1; + if (g3->max_len < max) { + g3->max_len = max; + kv_roundup32(g3->max_len); + g3->indel_seq[0] = (char*)realloc(g3->indel_seq[0], g3->max_len); + g3->indel_seq[1] = (char*)realloc(g3->indel_seq[1], g3->max_len); + } + r += bgzf_read(fp, g3->indel_seq[0], x[0]); + r += bgzf_read(fp, g3->indel_seq[1], x[1]); + g3->indel_seq[0][x[0]] = g3->indel_seq[1][x[1]] = 0; + } + return r; +} + +void glf3_view(glfFile fp) +{ + glf3_header_t *h; + char *name; + glf3_t *g3; + int len; + h = glf3_header_read(fp); + g3 = glf3_init1(); + while ((name = glf3_ref_read(fp, &len)) != 0) { + int pos = 0; + while (glf3_read1(fp, g3) && g3->rtype != GLF3_RTYPE_END) { + pos += g3->offset; + glf3_view1(name, g3, pos); + } + free(name); + } + glf3_header_destroy(h); + glf3_destroy1(g3); +} + +int glf3_view_main(int argc, char *argv[]) +{ + glfFile fp; + if (argc == 1) { + fprintf(stderr, "Usage: glfview <in.glf>\n"); + return 1; + } + fp = (strcmp(argv[1], "-") == 0)? bgzf_fdopen(fileno(stdin), "r") : bgzf_open(argv[1], "r"); + if (fp == 0) { + fprintf(stderr, "Fail to open file '%s'\n", argv[1]); + return 1; + } + glf3_view(fp); + bgzf_close(fp); + return 0; +} + +#ifdef GLFVIEW_MAIN +int main(int argc, char *argv[]) +{ + return glf3_view_main(argc, argv); +} +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/glf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/glf.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,56 @@ +#ifndef GLF_H_ +#define GLF_H_ + +typedef struct { + unsigned char ref_base:4, dummy:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */ + unsigned char max_mapQ; /** maximum mapping quality */ + unsigned char lk[10]; /** log likelihood ratio, capped at 255 */ + unsigned min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */ +} glf1_t; + +#include <stdint.h> +#include "bgzf.h" +typedef BGZF *glfFile; + +#define GLF3_RTYPE_END 0 +#define GLF3_RTYPE_SUB 1 +#define GLF3_RTYPE_INDEL 2 + +typedef struct { + uint8_t ref_base:4, rtype:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */ + uint8_t rms_mapQ; /** RMS mapping quality */ + uint8_t lk[10]; /** log likelihood ratio, capped at 255 */ + uint32_t min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */ + int32_t offset; /** the first base in a chromosome has offset zero. */ + // for indel (lkHom1, lkHom2 and lkHet are the first three elements in lk[10]) + int16_t indel_len[2]; + int32_t max_len; // maximum indel len; will be modified by glf3_read1() + char *indel_seq[2]; +} glf3_t; + +typedef struct { + int32_t l_text; + uint8_t *text; +} glf3_header_t; + +#ifdef __cplusplus +extern "C" { +#endif + +#define glf3_init1() ((glf3_t*)calloc(1, sizeof(glf3_t))) +#define glf3_destroy1(g3) do { free((g3)->indel_seq[0]); free((g3)->indel_seq[1]); free(g3); } while (0) + + glf3_header_t *glf3_header_init(); + glf3_header_t *glf3_header_read(glfFile fp); + void glf3_header_write(glfFile fp, const glf3_header_t *h); + void glf3_header_destroy(glf3_header_t *h); + char *glf3_ref_read(glfFile fp, int *len); + void glf3_ref_write(glfFile fp, const char *name, int len); + int glf3_write1(glfFile fp, const glf3_t *g3); + int glf3_read1(glfFile fp, glf3_t *g3); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/kaln.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/kaln.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,370 @@\n+/* The MIT License\n+\n+ Copyright (c) 2003-2006, 2008, 2009, by Heng Li <lh3lh3@gmail.com>\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+#include <stdlib.h>\n+#include <stdio.h>\n+#include <string.h>\n+#include <stdint.h>\n+#include "kaln.h"\n+\n+#define FROM_M 0\n+#define FROM_I 1\n+#define FROM_D 2\n+\n+typedef struct {\n+\tint i, j;\n+\tunsigned char ctype;\n+} path_t;\n+\n+int aln_sm_blosum62[] = {\n+/*\t A R N D C Q E G H I L K M F P S T W Y V * X */\n+\t 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0,\n+\t-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1,\n+\t-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1,\n+\t-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1,\n+\t 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2,\n+\t-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1,\n+\t-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1,\n+\t 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1,\n+\t-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1,\n+\t-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1,\n+\t-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1,\n+\t-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1,\n+\t-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1,\n+\t-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1,\n+\t-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2,\n+\t 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0,\n+\t 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0,\n+\t-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2,\n+\t-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1,\n+\t 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1,\n+\t-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4,\n+\t 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1\n+};\n+\n+int aln_sm_blast[] = {\n+\t1, -3, -3, -3, -2,\n+\t-3, 1, -3, -3, -2,\n+\t-3, -3, 1, -3, -2,\n+\t-3, -3, -3, 1, -2,\n+\t-2, -2, -2, -2, -2\n+};\n+\n+ka_param_t ka_param_blast = { 5, 2, 2, aln_sm_blast, 5, 50 };\n+ka_param_t ka_param_aa2aa = { 10, 2, 2, aln_sm_blosum62, 22, 50 };\n+\n+static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar)\n+{\n+\tint i, n;\n+\tuint32_t *cigar;\n+\tunsigned char last_type;\n+\n+\tif (path_len == 0 || path == 0) {\n+\t\t*n_cigar = 0;\n+\t\treturn 0;\n+\t}\n+\n+\tlast_type = path->ctype;\n+\tfor (i = n = 1; i < path_len; ++i) {\n+\t\tif (last_type != path[i].ctype) ++n;\n+\t\tlast_type = path[i].ctype;\n+\t}\n+\t*n_cigar = n;\n+\tcigar = (uint32_t*)calloc(*n_cigar, 4);\n+\n+\tcigar[0] = 1u << 4 | path[path_len-1].ctype;\n+\tlast_type = path[path_len-1].ctype;\n+\tfor (i = path_len - 2, n = 0; i >= 0; --i) {\n+\t\tif (path[i].ctype == last_type) cigar[n] += 1u << 4;\n+\t\telse {\n+\t\t\tcigar[++n] = 1u << 4 | path[i].ctype;\n+\t\t\tlast'..b'= last; last = s;\n+\n+\t/* core dynamic programming, part 1 */\n+\ttmp_end = (b2 < len2)? b2 : len2 - 1;\n+\tfor (j = 1; j <= tmp_end; ++j) {\n+\t\tq = dpcell[j]; s = curr; SET_INF(*s);\n+\t\tset_end_I(s->I, q, last);\n+\t\tend = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;\n+\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n+\t\t++s; ++q;\n+\t\tfor (i = 1; i != end; ++i, ++s, ++q) {\n+\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */\n+\t\t\tset_I(s->I, q, last + i);\n+\t\t\tset_D(s->D, q, s - 1);\n+\t\t}\n+\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n+\t\tset_D(s->D, q, s - 1);\n+\t\tif (j + b1 - 1 > len1) { /* bug fixed, 040227 */\n+\t\t\tset_end_I(s->I, q, last + i);\n+\t\t} else s->I = MINOR_INF;\n+\t\ts = curr; curr = last; last = s;\n+\t}\n+\t/* last row for part 1, use set_end_D() instead of set_D() */\n+\tif (j == len2 && b2 != len2 - 1) {\n+\t\tq = dpcell[j]; s = curr; SET_INF(*s);\n+\t\tset_end_I(s->I, q, last);\n+\t\tend = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;\n+\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n+\t\t++s; ++q;\n+\t\tfor (i = 1; i != end; ++i, ++s, ++q) {\n+\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */\n+\t\t\tset_I(s->I, q, last + i);\n+\t\t\tset_end_D(s->D, q, s - 1);\n+\t\t}\n+\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n+\t\tset_end_D(s->D, q, s - 1);\n+\t\tif (j + b1 - 1 > len1) { /* bug fixed, 040227 */\n+\t\t\tset_end_I(s->I, q, last + i);\n+\t\t} else s->I = MINOR_INF;\n+\t\ts = curr; curr = last; last = s;\n+\t\t++j;\n+\t}\n+\n+\t/* core dynamic programming, part 2 */\n+\tfor (; j <= len2 - b2 + 1; ++j) {\n+\t\tSET_INF(curr[j - b2]);\n+\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n+\t\tend = j + b1 - 1;\n+\t\tfor (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) {\n+\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n+\t\t\tset_I(s->I, q, last + i);\n+\t\t\tset_D(s->D, q, s - 1);\n+\t\t}\n+\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n+\t\tset_D(s->D, q, s - 1);\n+\t\ts->I = MINOR_INF;\n+\t\ts = curr; curr = last; last = s;\n+\t}\n+\n+\t/* core dynamic programming, part 3 */\n+\tfor (; j < len2; ++j) {\n+\t\tSET_INF(curr[j - b2]);\n+\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n+\t\tfor (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {\n+\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n+\t\t\tset_I(s->I, q, last + i);\n+\t\t\tset_D(s->D, q, s - 1);\n+\t\t}\n+\t\tset_M(s->M, q, last + len1 - 1, mat[seq1[i]]);\n+\t\tset_end_I(s->I, q, last + i);\n+\t\tset_D(s->D, q, s - 1);\n+\t\ts = curr; curr = last; last = s;\n+\t}\n+\t/* last row */\n+\tif (j == len2) {\n+\t\tSET_INF(curr[j - b2]);\n+\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n+\t\tfor (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {\n+\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n+\t\t\tset_I(s->I, q, last + i);\n+\t\t\tset_end_D(s->D, q, s - 1);\n+\t\t}\n+\t\tset_M(s->M, q, last + len1 - 1, mat[seq1[i]]);\n+\t\tset_end_I(s->I, q, last + i);\n+\t\tset_end_D(s->D, q, s - 1);\n+\t\ts = curr; curr = last; last = s;\n+\t}\n+\n+\t*_score = last[len1].M;\n+\tif (n_cigar) { /* backtrace */\n+\t\tpath_t *p, *path = (path_t*)malloc(sizeof(path_t) * (len1 + len2 + 2));\n+\t\ti = len1; j = len2;\n+\t\tq = dpcell[j] + i;\n+\t\ts = last + len1;\n+\t\tmax = s->M; type = q->Mt; ctype = FROM_M;\n+\t\tif (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; }\n+\t\tif (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; }\n+\n+\t\tp = path;\n+\t\tp->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */\n+\t\t++p;\n+\t\tdo {\n+\t\t\tswitch (ctype) {\n+\t\t\tcase FROM_M: --i; --j; break;\n+\t\t\tcase FROM_I: --j; break;\n+\t\t\tcase FROM_D: --i; break;\n+\t\t\t}\n+\t\t\tq = dpcell[j] + i;\n+\t\t\tctype = type;\n+\t\t\tswitch (type) {\n+\t\t\tcase FROM_M: type = q->Mt; break;\n+\t\t\tcase FROM_I: type = q->It; break;\n+\t\t\tcase FROM_D: type = q->Dt; break;\n+\t\t\t}\n+\t\t\tp->ctype = ctype; p->i = i; p->j = j;\n+\t\t\t++p;\n+\t\t} while (i || j);\n+\t\tcigar = ka_path2cigar32(path, p - path - 1, n_cigar);\n+\t\tfree(path);\n+\t}\n+\n+\t/* free memory */\n+\tfor (j = b2 + 1; j <= len2; ++j)\n+\t\tdpcell[j] += j - b2;\n+\tfor (j = 0; j <= len2; ++j)\n+\t\tfree(dpcell[j]);\n+\tfree(dpcell);\n+\tfree(curr); free(last);\n+\n+\treturn cigar;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/kaln.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/kaln.h Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,55 @@ +/* The MIT License + + Copyright (c) 2003-2006, 2008, 2009 by Heng Li <lh3@live.co.uk> + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +#ifndef LH3_KALN_H_ +#define LH3_KALN_H_ + +#include <stdint.h> + +#define MINOR_INF -1073741823 + +typedef struct { + int gap_open; + int gap_ext; + int gap_end; + + int *matrix; + int row; + int band_width; +} ka_param_t; + +#ifdef __cplusplus +extern "C" { +#endif + + uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const ka_param_t *ap, int *_score, int *n_cigar); + +#ifdef __cplusplus +} +#endif + +extern ka_param_t ka_param_blast; /* = { 5, 2, 2, aln_sm_blast, 5, 50 }; */ + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/khash.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/khash.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,486 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+/*\n+ An example:\n+\n+#include "khash.h"\n+KHASH_MAP_INIT_INT(32, char)\n+int main() {\n+\tint ret, is_missing;\n+\tkhiter_t k;\n+\tkhash_t(32) *h = kh_init(32);\n+\tk = kh_put(32, h, 5, &ret);\n+\tif (!ret) kh_del(32, h, k);\n+\tkh_value(h, k) = 10;\n+\tk = kh_get(32, h, 10);\n+\tis_missing = (k == kh_end(h));\n+\tk = kh_get(32, h, 5);\n+\tkh_del(32, h, k);\n+\tfor (k = kh_begin(h); k != kh_end(h); ++k)\n+\t\tif (kh_exist(h, k)) kh_value(h, k) = 1;\n+\tkh_destroy(32, h);\n+\treturn 0;\n+}\n+*/\n+\n+/*\n+ 2008-09-19 (0.2.3):\n+\n+\t* Corrected the example\n+\t* Improved interfaces\n+\n+ 2008-09-11 (0.2.2):\n+\n+\t* Improved speed a little in kh_put()\n+\n+ 2008-09-10 (0.2.1):\n+\n+\t* Added kh_clear()\n+\t* Fixed a compiling error\n+\n+ 2008-09-02 (0.2.0):\n+\n+\t* Changed to token concatenation which increases flexibility.\n+\n+ 2008-08-31 (0.1.2):\n+\n+\t* Fixed a bug in kh_get(), which has not been tested previously.\n+\n+ 2008-08-31 (0.1.1):\n+\n+\t* Added destructor\n+*/\n+\n+\n+#ifndef __AC_KHASH_H\n+#define __AC_KHASH_H\n+\n+/*!\n+ @header\n+\n+ Generic hash table library.\n+\n+ @copyright Heng Li\n+ */\n+\n+#define AC_VERSION_KHASH_H "0.2.2"\n+\n+#include <stdint.h>\n+#include <stdlib.h>\n+#include <string.h>\n+\n+typedef uint32_t khint_t;\n+typedef khint_t khiter_t;\n+\n+#define __ac_HASH_PRIME_SIZE 32\n+static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =\n+{\n+ 0ul, 3ul, 11ul, 23ul, 53ul,\n+ 97ul, 193ul, 389ul, 769ul, 1543ul,\n+ 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,\n+ 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,\n+ 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,\n+ 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,\n+ 3221225473ul, 4294967291ul\n+};\n+\n+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)\n+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)\n+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)\n+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))\n+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))\n+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))\n+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))\n+\n+static const double __ac_HASH_UPPER = 0.77;\n+\n+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \\\n+\ttypedef struct {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkhint_t n_buckets, size, n_occupied, upper_bound;\t\t\t\t\\\n+\t\tuint32_t *flags;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkhkey_t *keys;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkhval_t *vals;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t} kh_##name##_t;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic inline kh_##name##_t *kh_init_##name() {\t\t\t\t\t\t\\\n+\t\treturn (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic inlin'..b'e, h, k) kh_get_##name(h, k)\n+\n+/*! @function\n+ @abstract Remove a key from the hash table.\n+ @param name Name of the hash table [symbol]\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param k Iterator to the element to be deleted [khint_t]\n+ */\n+#define kh_del(name, h, k) kh_del_##name(h, k)\n+\n+\n+/*! @function\n+ @abstract Test whether a bucket contains data.\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param x Iterator to the bucket [khint_t]\n+ @return 1 if containing data; 0 otherwise [int]\n+ */\n+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))\n+\n+/*! @function\n+ @abstract Get key given an iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param x Iterator to the bucket [khint_t]\n+ @return Key [type of keys]\n+ */\n+#define kh_key(h, x) ((h)->keys[x])\n+\n+/*! @function\n+ @abstract Get value given an iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param x Iterator to the bucket [khint_t]\n+ @return Value [type of values]\n+ @discussion For hash sets, calling this results in segfault.\n+ */\n+#define kh_val(h, x) ((h)->vals[x])\n+\n+/*! @function\n+ @abstract Alias of kh_val()\n+ */\n+#define kh_value(h, x) ((h)->vals[x])\n+\n+/*! @function\n+ @abstract Get the start iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return The start iterator [khint_t]\n+ */\n+#define kh_begin(h) (khint_t)(0)\n+\n+/*! @function\n+ @abstract Get the end iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return The end iterator [khint_t]\n+ */\n+#define kh_end(h) ((h)->n_buckets)\n+\n+/*! @function\n+ @abstract Get the number of elements in the hash table\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return Number of elements in the hash table [khint_t]\n+ */\n+#define kh_size(h) ((h)->size)\n+\n+/*! @function\n+ @abstract Get the number of buckets in the hash table\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return Number of buckets in the hash table [khint_t]\n+ */\n+#define kh_n_buckets(h) ((h)->n_buckets)\n+\n+/* More conenient interfaces */\n+\n+/*! @function\n+ @abstract Instantiate a hash set containing integer keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_INT(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing integer keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_INT(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing 64-bit integer keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_INT64(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing 64-bit integer keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_INT64(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)\n+\n+typedef const char *kh_cstr_t;\n+/*! @function\n+ @abstract Instantiate a hash map containing const char* keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_STR(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing const char* keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_STR(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)\n+\n+#endif /* __AC_KHASH_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/klist.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/klist.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,96 @@ +#ifndef _LH3_KLIST_H +#define _LH3_KLIST_H + +#include <stdlib.h> + +#define KMEMPOOL_INIT(name, kmptype_t, kmpfree_f) \ + typedef struct { \ + size_t cnt, n, max; \ + kmptype_t **buf; \ + } kmp_##name##_t; \ + static inline kmp_##name##_t *kmp_init_##name() { \ + return calloc(1, sizeof(kmp_##name##_t)); \ + } \ + static inline void kmp_destroy_##name(kmp_##name##_t *mp) { \ + size_t k; \ + for (k = 0; k < mp->n; ++k) { \ + kmpfree_f(mp->buf[k]); free(mp->buf[k]); \ + } \ + free(mp->buf); free(mp); \ + } \ + static inline kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \ + ++mp->cnt; \ + if (mp->n == 0) return calloc(1, sizeof(kmptype_t)); \ + return mp->buf[--mp->n]; \ + } \ + static inline void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \ + --mp->cnt; \ + if (mp->n == mp->max) { \ + mp->max = mp->max? mp->max<<1 : 16; \ + mp->buf = realloc(mp->buf, sizeof(void*) * mp->max); \ + } \ + mp->buf[mp->n++] = p; \ + } + +#define kmempool_t(name) kmp_##name##_t +#define kmp_init(name) kmp_init_##name() +#define kmp_destroy(name, mp) kmp_destroy_##name(mp) +#define kmp_alloc(name, mp) kmp_alloc_##name(mp) +#define kmp_free(name, mp, p) kmp_free_##name(mp, p) + +#define KLIST_INIT(name, kltype_t, kmpfree_t) \ + struct __kl1_##name { \ + kltype_t data; \ + struct __kl1_##name *next; \ + }; \ + typedef struct __kl1_##name kl1_##name; \ + KMEMPOOL_INIT(name, kl1_##name, kmpfree_t) \ + typedef struct { \ + kl1_##name *head, *tail; \ + kmp_##name##_t *mp; \ + size_t size; \ + } kl_##name##_t; \ + static inline kl_##name##_t *kl_init_##name() { \ + kl_##name##_t *kl = calloc(1, sizeof(kl_##name##_t)); \ + kl->mp = kmp_init(name); \ + kl->head = kl->tail = kmp_alloc(name, kl->mp); \ + kl->head->next = 0; \ + return kl; \ + } \ + static inline void kl_destroy_##name(kl_##name##_t *kl) { \ + kl1_##name *p; \ + for (p = kl->head; p != kl->tail; p = p->next) \ + kmp_free(name, kl->mp, p); \ + kmp_free(name, kl->mp, p); \ + kmp_destroy(name, kl->mp); \ + free(kl); \ + } \ + static inline kltype_t *kl_pushp_##name(kl_##name##_t *kl) { \ + kl1_##name *q, *p = kmp_alloc(name, kl->mp); \ + q = kl->tail; p->next = 0; kl->tail->next = p; kl->tail = p; \ + ++kl->size; \ + return &q->data; \ + } \ + static inline int kl_shift_##name(kl_##name##_t *kl, kltype_t *d) { \ + kl1_##name *p; \ + if (kl->head->next == 0) return -1; \ + --kl->size; \ + p = kl->head; kl->head = kl->head->next; \ + if (d) *d = p->data; \ + kmp_free(name, kl->mp, p); \ + return 0; \ + } + +#define kliter_t(name) kl1_##name +#define klist_t(name) kl_##name##_t +#define kl_val(iter) ((iter)->data) +#define kl_next(iter) ((iter)->next) +#define kl_begin(kl) ((kl)->head) +#define kl_end(kl) ((kl)->tail) + +#define kl_init(name) kl_init_##name() +#define kl_destroy(name, kl) kl_destroy_##name(kl) +#define kl_pushp(name, kl) kl_pushp_##name(kl) +#define kl_shift(name, kl, d) kl_shift_##name(kl, d) + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/knetfile.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/knetfile.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,630 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+/* Probably I will not do socket programming in the next few years and\n+ therefore I decide to heavily annotate this file, for Linux and\n+ Windows as well. -lh3 */\n+\n+#include <time.h>\n+#include <stdio.h>\n+#include <ctype.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <errno.h>\n+#include <unistd.h>\n+#include <sys/types.h>\n+\n+#ifndef _WIN32\n+#include <netdb.h>\n+#include <arpa/inet.h>\n+#include <sys/socket.h>\n+#endif\n+\n+#include "knetfile.h"\n+\n+/* In winsock.h, the type of a socket is SOCKET, which is: "typedef\n+ * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed\n+ * integer -1. In knetfile.c, I use "int" for socket type\n+ * throughout. This should be improved to avoid confusion.\n+ *\n+ * In Linux/Mac, recv() and read() do almost the same thing. You can see\n+ * in the header file that netread() is simply an alias of read(). In\n+ * Windows, however, they are different and using recv() is mandatory.\n+ */\n+\n+/* This function tests if the file handler is ready for reading (or\n+ * writing if is_read==0). */\n+static int socket_wait(int fd, int is_read)\n+{\n+\tfd_set fds, *fdr = 0, *fdw = 0;\n+\tstruct timeval tv;\n+\tint ret;\n+\ttv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out\n+\tFD_ZERO(&fds);\n+\tFD_SET(fd, &fds);\n+\tif (is_read) fdr = &fds;\n+\telse fdw = &fds;\n+\tret = select(fd+1, fdr, fdw, 0, &tv);\n+#ifndef _WIN32\n+\tif (ret == -1) perror("select");\n+#else\n+\tif (ret == 0)\n+\t\tfprintf(stderr, "select time-out\\n");\n+\telse if (ret == SOCKET_ERROR)\n+\t\tfprintf(stderr, "select: %d\\n", WSAGetLastError());\n+#endif\n+\treturn ret;\n+}\n+\n+#ifndef _WIN32\n+/* This function does not work with Windows due to the lack of\n+ * getaddrinfo() in winsock. It is addapted from an example in "Beej\'s\n+ * Guide to Network Programming" (http://beej.us/guide/bgnet/). */\n+static int socket_connect(const char *host, const char *port)\n+{\n+#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)\n+\n+\tint on = 1, fd;\n+\tstruct linger lng = { 0, 0 };\n+\tstruct addrinfo hints, *res;\n+\tmemset(&hints, 0, sizeof(struct addrinfo));\n+\thints.ai_family = AF_UNSPEC;\n+\thints.ai_socktype = SOCK_STREAM;\n+\t/* In Unix/Mac, getaddrinfo() is the most convenient way to get\n+\t * server information. */\n+\tif (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");\n+\tif ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");\n+\t/* The following two setsockopt() are used by ftplib\n+\t * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they\n+\t * necessary. */\n+\tif (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");\n+\tif (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");\n+\tif (connect('..b'fp->fd == -1) {\n+\t\tknet_close(fp);\n+\t\treturn 0;\n+\t}\n+\treturn fp;\n+}\n+\n+knetFile *knet_dopen(int fd, const char *mode)\n+{\n+\tknetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));\n+\tfp->type = KNF_TYPE_LOCAL;\n+\tfp->fd = fd;\n+\treturn fp;\n+}\n+\n+off_t knet_read(knetFile *fp, void *buf, off_t len)\n+{\n+\toff_t l = 0;\n+\tif (fp->fd == -1) return 0;\n+\tif (fp->type == KNF_TYPE_FTP) {\n+\t\tif (fp->is_ready == 0) {\n+\t\t\tif (!fp->no_reconnect) kftp_reconnect(fp);\n+\t\t\tkftp_connect_file(fp);\n+\t\t}\n+\t} else if (fp->type == KNF_TYPE_HTTP) {\n+\t\tif (fp->is_ready == 0)\n+\t\t\tkhttp_connect_file(fp);\n+\t}\n+\tif (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX\n+\t\toff_t rest = len, curr;\n+\t\twhile (rest) {\n+\t\t\tcurr = read(fp->fd, buf + l, rest);\n+\t\t\tif (curr == 0) break;\n+\t\t\tl += curr; rest -= curr;\n+\t\t}\n+\t} else l = my_netread(fp->fd, buf, len);\n+\tfp->offset += l;\n+\treturn l;\n+}\n+\n+off_t knet_seek(knetFile *fp, int64_t off, int whence)\n+{\n+\tif (whence == SEEK_SET && off == fp->offset) return 0;\n+\tif (fp->type == KNF_TYPE_LOCAL) {\n+\t\t/* Be aware that lseek() returns the offset after seeking,\n+\t\t * while fseek() returns zero on success. */\n+\t\toff_t offset = lseek(fp->fd, off, whence);\n+\t\tif (offset == -1) {\n+ // Be silent, it is OK for knet_seek to fail when the file is streamed\n+ // fprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n+\t\t\treturn -1;\n+\t\t}\n+\t\tfp->offset = offset;\n+\t\treturn 0;\n+\t}\n+ else if (fp->type == KNF_TYPE_FTP) \n+ {\n+ if (whence==SEEK_CUR)\n+ fp->offset += off;\n+ else if (whence==SEEK_SET)\n+ fp->offset = off;\n+ else if ( whence==SEEK_END)\n+ fp->offset = fp->file_size+off;\n+\t\tfp->is_ready = 0;\n+\t\treturn 0;\n+\t} \n+ else if (fp->type == KNF_TYPE_HTTP) \n+ {\n+\t\tif (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?\n+\t\t\tfprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\\n");\n+\t\t\terrno = ESPIPE;\n+\t\t\treturn -1;\n+\t\t}\n+ if (whence==SEEK_CUR)\n+ fp->offset += off;\n+ else if (whence==SEEK_SET)\n+ fp->offset = off;\n+\t\tfp->is_ready = 0;\n+\t\treturn 0;\n+\t}\n+\terrno = EINVAL;\n+ fprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n+\treturn -1;\n+}\n+\n+int knet_close(knetFile *fp)\n+{\n+\tif (fp == 0) return 0;\n+\tif (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific\n+\tif (fp->fd != -1) {\n+\t\t/* On Linux/Mac, netclose() is an alias of close(), but on\n+\t\t * Windows, it is an alias of closesocket(). */\n+\t\tif (fp->type == KNF_TYPE_LOCAL) close(fp->fd);\n+\t\telse netclose(fp->fd);\n+\t}\n+\tfree(fp->host); free(fp->port);\n+\tfree(fp->response); free(fp->retr); // FTP specific\n+\tfree(fp->path); free(fp->http_host); // HTTP specific\n+\tfree(fp);\n+\treturn 0;\n+}\n+\n+#ifdef KNETFILE_MAIN\n+int main(void)\n+{\n+\tchar *buf;\n+\tknetFile *fp;\n+\tint type = 4, l;\n+#ifdef _WIN32\n+\tknet_win32_init();\n+#endif\n+\tbuf = calloc(0x100000, 1);\n+\tif (type == 0) {\n+\t\tfp = knet_open("knetfile.c", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 1) { // NCBI FTP, large file\n+\t\tfp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");\n+\t\tknet_seek(fp, 2500000000ll, SEEK_SET);\n+\t\tl = knet_read(fp, buf, 255);\n+\t} else if (type == 2) {\n+\t\tfp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 3) {\n+\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 4) {\n+\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");\n+\t\tknet_read(fp, buf, 10000);\n+\t\tknet_seek(fp, 20000, SEEK_SET);\n+\t\tknet_seek(fp, 10000, SEEK_SET);\n+\t\tl = knet_read(fp, buf+10000, 10000000) + 10000;\n+\t}\n+\tif (type != 4 && type != 1) {\n+\t\tknet_read(fp, buf, 255);\n+\t\tbuf[255] = 0;\n+\t\tprintf("%s\\n", buf);\n+\t} else write(fileno(stdout), buf, l);\n+\tknet_close(fp);\n+\tfree(buf);\n+\treturn 0;\n+}\n+#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/knetfile.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/knetfile.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,75 @@ +#ifndef KNETFILE_H +#define KNETFILE_H + +#include <stdint.h> +#include <fcntl.h> + +#ifndef _WIN32 +#define netread(fd, ptr, len) read(fd, ptr, len) +#define netwrite(fd, ptr, len) write(fd, ptr, len) +#define netclose(fd) close(fd) +#else +#include <winsock2.h> +#define netread(fd, ptr, len) recv(fd, ptr, len, 0) +#define netwrite(fd, ptr, len) send(fd, ptr, len, 0) +#define netclose(fd) closesocket(fd) +#endif + +// FIXME: currently I/O is unbuffered + +#define KNF_TYPE_LOCAL 1 +#define KNF_TYPE_FTP 2 +#define KNF_TYPE_HTTP 3 + +typedef struct knetFile_s { + int type, fd; + int64_t offset; + char *host, *port; + + // the following are for FTP only + int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; + char *response, *retr, *size_cmd; + int64_t seek_offset; // for lazy seek + int64_t file_size; + + // the following are for HTTP only + char *path, *http_host; +} knetFile; + +#define knet_tell(fp) ((fp)->offset) +#define knet_fileno(fp) ((fp)->fd) + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 + int knet_win32_init(); + void knet_win32_destroy(); +#endif + + knetFile *knet_open(const char *fn, const char *mode); + + /* + This only works with local files. + */ + knetFile *knet_dopen(int fd, const char *mode); + + /* + If ->is_ready==0, this routine updates ->fd; otherwise, it simply + reads from ->fd. + */ + off_t knet_read(knetFile *fp, void *buf, off_t len); + + /* + This routine only sets ->offset and ->is_ready=0. It does not + communicate with the FTP server. + */ + off_t knet_seek(knetFile *fp, int64_t off, int whence); + int knet_close(knetFile *fp); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/kseq.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/kseq.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,227 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+/*\n+ 2009-07-16 (lh3): in kstream_t, change "char*" to "unsigned char*"\n+ */\n+\n+/* Last Modified: 12APR2009 */\n+\n+#ifndef AC_KSEQ_H\n+#define AC_KSEQ_H\n+\n+#include <ctype.h>\n+#include <string.h>\n+#include <stdlib.h>\n+\n+#define KS_SEP_SPACE 0 // isspace(): \\t, \\n, \\v, \\f, \\r\n+#define KS_SEP_TAB 1 // isspace() && !\' \'\n+#define KS_SEP_MAX 1\n+\n+#define __KS_TYPE(type_t)\t\t\t\t\t\t\\\n+\ttypedef struct __kstream_t {\t\t\t\t\\\n+\t\tunsigned char *buf;\t\t\t\t\t\t\\\n+\t\tint begin, end, is_eof;\t\t\t\t\t\\\n+\t\ttype_t f;\t\t\t\t\t\t\t\t\\\n+\t} kstream_t;\n+\n+#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)\n+#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)\n+\n+#define __KS_BASIC(type_t, __bufsize)\t\t\t\t\t\t\t\t\\\n+\tstatic inline kstream_t *ks_init(type_t f)\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));\t\\\n+\t\tks->f = f;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tks->buf = malloc(__bufsize);\t\t\t\t\t\t\t\t\\\n+\t\treturn ks;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic inline void ks_destroy(kstream_t *ks)\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (ks) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tfree(ks->buf);\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tfree(ks);\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\n+\n+#define __KS_GETC(__read, __bufsize)\t\t\t\t\t\t\\\n+\tstatic inline int ks_getc(kstream_t *ks)\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (ks->is_eof && ks->begin >= ks->end) return -1;\t\\\n+\t\tif (ks->begin >= ks->end) {\t\t\t\t\t\t\t\\\n+\t\t\tks->begin = 0;\t\t\t\t\t\t\t\t\t\\\n+\t\t\tks->end = __read(ks->f, ks->buf, __bufsize);\t\\\n+\t\t\tif (ks->end < __bufsize) ks->is_eof = 1;\t\t\\\n+\t\t\tif (ks->end == 0) return -1;\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\treturn (int)ks->buf[ks->begin++];\t\t\t\t\t\\\n+\t}\n+\n+#ifndef KSTRING_T\n+#define KSTRING_T kstring_t\n+typedef struct __kstring_t {\n+\tsize_t l, m;\n+\tchar *s;\n+} kstring_t;\n+#endif\n+\n+#ifndef kroundup32\n+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))\n+#endif\n+\n+#define __KS_GETUNTIL(__read, __bufsize)\t\t\t\t\t\t\t\t\\\n+\tstatic int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (dret) *dret = 0;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tstr->l = 0;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (ks->begin >= ks->end && ks->is_eof) return -1;\t\t\t\t\\\n+\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tint i;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (ks->begin >= ks->end) {\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (!ks->is_eof) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tks->begin = 0;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tks->end = __read(ks->f, ks->buf, __bufsize);\t\t\\\n+\t\t\t\t\tif (ks->end < __bufsize) ks->is_eof = 1;\t\t\t\\\n+\t\t\t\t\tif (ks->end == 0) break;\t\t\t\t\t\t\t\\\n+\t\t\t\t} else break;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (delimiter > KS_SEP_MAX) {\t\t\t\t\t\t\t\t\\\n+\t\t\t\tfor (i = ks->begin; i < ks->end; ++i)\t\t\t\t\t\\\n+\t\t\t\t\tif (ks->buf[i] == delimiter) break;\t\t\t\t\t\\\n+\t\t\t} else if (delimiter == KS_SEP_SPACE) {\t\t\t\t\t\t\\\n+\t\t\t\tfor (i = ks->begin; i < ks->end; ++i)\t\t\t\t\t\\\n+\t\t\t\t\tif (isspace(ks->buf[i])) break'..b"\t\\\n+\t\t\t\tstr->s = (char*)realloc(str->s, str->m);\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tmemcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \\\n+\t\t\tstr->l = str->l + (i - ks->begin);\t\t\t\t\t\t\t\\\n+\t\t\tks->begin = i + 1;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (i < ks->end) {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (dret) *dret = ks->buf[i];\t\t\t\t\t\t\t\\\n+\t\t\t\tbreak;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (str->l == 0) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tstr->m = 1;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tstr->s = (char*)calloc(1, 1);\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tstr->s[str->l] = '\\0';\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\treturn str->l;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\n+\n+#define KSTREAM_INIT(type_t, __read, __bufsize) \\\n+\t__KS_TYPE(type_t)\t\t\t\t\t\t\t\\\n+\t__KS_BASIC(type_t, __bufsize)\t\t\t\t\\\n+\t__KS_GETC(__read, __bufsize)\t\t\t\t\\\n+\t__KS_GETUNTIL(__read, __bufsize)\n+\n+#define __KSEQ_BASIC(type_t)\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic inline kseq_t *kseq_init(type_t fd)\t\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));\t\t\t\t\t\\\n+\t\ts->f = ks_init(fd);\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\treturn s;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic inline void kseq_rewind(kseq_t *ks)\t\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tks->last_char = 0;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tks->f->is_eof = ks->f->begin = ks->f->end = 0;\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic inline void kseq_destroy(kseq_t *ks)\t\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (!ks) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tfree(ks->name.s); free(ks->comment.s); free(ks->seq.s);\tfree(ks->qual.s); \\\n+\t\tks_destroy(ks->f);\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tfree(ks);\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\n+\n+/* Return value:\n+ >=0 length of the sequence (normal)\n+ -1 end-of-file\n+ -2 truncated quality string\n+ */\n+#define __KSEQ_READ\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic int kseq_read(kseq_t *seq)\t\t\t\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tint c;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkstream_t *ks = seq->f;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (seq->last_char == 0) { /* then jump to the next header line */ \\\n+\t\t\twhile ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');\t\\\n+\t\t\tif (c == -1) return -1; /* end of file */\t\t\t\t\t\\\n+\t\t\tseq->last_char = c;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t} /* the first header char has been read */\t\t\t\t\t\t\\\n+\t\tseq->comment.l = seq->seq.l = seq->qual.l = 0;\t\t\t\t\t\\\n+\t\tif (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;\t\t\t\\\n+\t\tif (c != '\\n') ks_getuntil(ks, '\\n', &seq->comment, 0);\t\t\t\\\n+\t\twhile ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \\\n+\t\t\tif (isgraph(c)) { /* printable non-space character */\t\t\\\n+\t\t\t\tif (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \\\n+\t\t\t\t\tseq->seq.m = seq->seq.l + 2;\t\t\t\t\t\t\\\n+\t\t\t\t\tkroundup32(seq->seq.m); /* rounded to next closest 2^k */ \\\n+\t\t\t\t\tseq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tseq->seq.s[seq->seq.l++] = (char)c;\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */\t\\\n+\t\tseq->seq.s[seq->seq.l] = 0;\t/* null terminated string */\t\t\\\n+\t\tif (c != '+') return seq->seq.l; /* FASTA */\t\t\t\t\t\\\n+\t\tif (seq->qual.m < seq->seq.m) {\t/* allocate enough memory */\t\\\n+\t\t\tseq->qual.m = seq->seq.m;\t\t\t\t\t\t\t\t\t\\\n+\t\t\tseq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\twhile ((c = ks_getc(ks)) != -1 && c != '\\n'); /* skip the rest of '+' line */ \\\n+\t\tif (c == -1) return -2; /* we should not stop here */\t\t\t\\\n+\t\twhile ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)\t\t\\\n+\t\t\tif (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c;\t\\\n+\t\tseq->qual.s[seq->qual.l] = 0; /* null terminated string */\t\t\\\n+\t\tseq->last_char = 0;\t/* we have not come to the next header line */ \\\n+\t\tif (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \\\n+\t\treturn seq->seq.l;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\n+\n+#define __KSEQ_TYPE(type_t)\t\t\t\t\t\t\\\n+\ttypedef struct {\t\t\t\t\t\t\t\\\n+\t\tkstring_t name, comment, seq, qual;\t\t\\\n+\t\tint last_char;\t\t\t\t\t\t\t\\\n+\t\tkstream_t *f;\t\t\t\t\t\t\t\\\n+\t} kseq_t;\n+\n+#define KSEQ_INIT(type_t, __read)\t\t\t\t\\\n+\tKSTREAM_INIT(type_t, __read, 4096)\t\t\t\\\n+\t__KSEQ_TYPE(type_t)\t\t\t\t\t\t\t\\\n+\t__KSEQ_BASIC(type_t)\t\t\t\t\t\t\\\n+\t__KSEQ_READ\n+\n+#endif\n" |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/ksort.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/ksort.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,271 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+/*\n+ 2008-11-16 (0.1.4):\n+\n+ * Fixed a bug in introsort() that happens in rare cases.\n+\n+ 2008-11-05 (0.1.3):\n+\n+ * Fixed a bug in introsort() for complex comparisons.\n+\n+\t* Fixed a bug in mergesort(). The previous version is not stable.\n+\n+ 2008-09-15 (0.1.2):\n+\n+\t* Accelerated introsort. On my Mac (not on another Linux machine),\n+\t my implementation is as fast as std::sort on random input.\n+\n+\t* Added combsort and in introsort, switch to combsort if the\n+\t recursion is too deep.\n+\n+ 2008-09-13 (0.1.1):\n+\n+\t* Added k-small algorithm\n+\n+ 2008-09-05 (0.1.0):\n+\n+\t* Initial version\n+\n+*/\n+\n+#ifndef AC_KSORT_H\n+#define AC_KSORT_H\n+\n+#include <stdlib.h>\n+#include <string.h>\n+\n+typedef struct {\n+\tvoid *left, *right;\n+\tint depth;\n+} ks_isort_stack_t;\n+\n+#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }\n+\n+#define KSORT_INIT(name, type_t, __sort_lt)\t\t\t\t\t\t\t\t\\\n+\tvoid ks_mergesort_##name(size_t n, type_t array[], type_t temp[])\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t *a2[2], *a, *b;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tint curr, shift;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ta2[0] = array;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ta2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);\t\t\\\n+\t\tfor (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {\t\t\t\\\n+\t\t\ta = a2[curr]; b = a2[1-curr];\t\t\t\t\t\t\t\t\\\n+\t\t\tif (shift == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\ttype_t *p = b, *i, *eb = a + n;\t\t\t\t\t\t\t\\\n+\t\t\t\tfor (i = a; i < eb; i += 2) {\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (i == eb - 1) *p++ = *i;\t\t\t\t\t\t\t\\\n+\t\t\t\t\telse {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tif (__sort_lt(*(i+1), *i)) {\t\t\t\t\t\\\n+\t\t\t\t\t\t\t*p++ = *(i+1); *p++ = *i;\t\t\t\t\t\\\n+\t\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t*p++ = *i; *p++ = *(i+1);\t\t\t\t\t\\\n+\t\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tsize_t i, step = 1ul<<shift;\t\t\t\t\t\t\t\\\n+\t\t\t\tfor (i = 0; i < n; i += step<<1) {\t\t\t\t\t\t\\\n+\t\t\t\t\ttype_t *p, *j, *k, *ea, *eb;\t\t\t\t\t\t\\\n+\t\t\t\t\tif (n < i + step) {\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tea = a + n; eb = a;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tea = a + i + step;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\teb = a + (n < i + (step<<1)? n : i + (step<<1)); \\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tj = a + i; k = a + i + step; p = b + i;\t\t\t\t\\\n+\t\t\t\t\twhile (j < ea && k < eb) {\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tif (__sort_lt(*k, *j)) *p++ = *k++;\t\t\t\t\\\n+\t\t\t\t\t\telse *p++ = *j++;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\twhile (j < ea) *p++ = *j++;\t\t\t\t\t\t\t\\\n+\t\t\t\t\twhile (k < eb) *p++ = *k++;\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tcurr = 1 - curr;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (curr == 1) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\ttype_t *p = a2[0], *i = a2[1], *eb = array + n;\t\t\t\t\\\n+\t\t\tfor (; p < eb; ++i) *p++ = *i;\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (temp == 0) free(a2[1]);\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tvoid ks_heapadjust_##name(size_t i, size_t n, type_t l[])\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tsize_t k ='..b'\t\t\t\t\t\t\t\t\\\n+\tvoid ks_introsort_##name(size_t n, type_t a[])\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tint d;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tks_isort_stack_t *top, *stack;\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t rp, swap_tmp;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t *s, *t, *i, *j, *k;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (n < 1) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\telse if (n == 2) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \\\n+\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tfor (d = 2; 1ul<<d < n; ++d);\t\t\t\t\t\t\t\t\t\\\n+\t\tstack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \\\n+\t\ttop = stack; s = a; t = a + (n-1); d <<= 1;\t\t\t\t\t\t\\\n+\t\twhile (1) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (s < t) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (--d == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tks_combsort_##name(t - s + 1, s);\t\t\t\t\t\\\n+\t\t\t\t\tt = s;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tcontinue;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\ti = s; j = t; k = i + ((j-i)>>1) + 1;\t\t\t\t\t\\\n+\t\t\t\tif (__sort_lt(*k, *i)) {\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (__sort_lt(*k, *j)) k = j;\t\t\t\t\t\t\\\n+\t\t\t\t} else k = __sort_lt(*j, *i)? i : j;\t\t\t\t\t\\\n+\t\t\t\trp = *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }\t\\\n+\t\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tdo ++i; while (__sort_lt(*i, rp));\t\t\t\t\t\\\n+\t\t\t\t\tdo --j; while (i <= j && __sort_lt(rp, *j));\t\t\\\n+\t\t\t\t\tif (j <= i) break;\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tswap_tmp = *i; *i = *j; *j = swap_tmp;\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tswap_tmp = *i; *i = *t; *t = swap_tmp;\t\t\t\t\t\\\n+\t\t\t\tif (i-s > t-i) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \\\n+\t\t\t\t\ts = t-i > 16? i+1 : t;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \\\n+\t\t\t\t\tt = i-s > 16? i-1 : s;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (top == stack) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tfree(stack);\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t__ks_insertsort_##name(a, a+n);\t\t\t\t\t\t\\\n+\t\t\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t/* This function is adapted from: http://ndevilla.free.fr/median/ */ \\\n+\t/* 0 <= kk < n */\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\ttype_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t *low, *high, *k, *ll, *hh, *mid;\t\t\t\t\t\t\t\\\n+\t\tlow = arr; high = arr + n - 1; k = arr + kk;\t\t\t\t\t\\\n+\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (high <= low) return *k;\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (high == low + 1) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n+\t\t\t\treturn *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tmid = low + (high - low) / 2;\t\t\t\t\t\t\t\t\\\n+\t\t\tif (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \\\n+\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n+\t\t\tif (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);\t\\\n+\t\t\tKSORT_SWAP(type_t, *mid, *(low+1));\t\t\t\t\t\t\t\\\n+\t\t\tll = low + 1; hh = high;\t\t\t\t\t\t\t\t\t\\\n+\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tdo ++ll; while (__sort_lt(*ll, *low));\t\t\t\t\t\\\n+\t\t\t\tdo --hh; while (__sort_lt(*low, *hh));\t\t\t\t\t\\\n+\t\t\t\tif (hh < ll) break;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tKSORT_SWAP(type_t, *ll, *hh);\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tKSORT_SWAP(type_t, *low, *hh);\t\t\t\t\t\t\t\t\\\n+\t\t\tif (hh <= k) low = ll;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (hh >= k) high = hh - 1;\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\n+\n+#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)\n+#define ks_introsort(name, n, a) ks_introsort_##name(n, a)\n+#define ks_combsort(name, n, a) ks_combsort_##name(n, a)\n+#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)\n+#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)\n+#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)\n+#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)\n+\n+#define ks_lt_generic(a, b) ((a) < (b))\n+#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)\n+\n+typedef const char *ksstr_t;\n+\n+#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)\n+#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)\n+\n+#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/kstring.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/kstring.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,165 @@ +#include <stdarg.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <stdint.h> +#include "kstring.h" + +int ksprintf(kstring_t *s, const char *fmt, ...) +{ + va_list ap; + int l; + va_start(ap, fmt); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'. + va_end(ap); + if (l + 1 > s->m - s->l) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + va_start(ap, fmt); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); + } + va_end(ap); + s->l += l; + return l; +} + +// s MUST BE a null terminated string; l = strlen(s) +int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) +{ + int i, n, max, last_char, last_start, *offsets, l; + n = 0; max = *_max; offsets = *_offsets; + l = strlen(s); + +#define __ksplit_aux do { \ + if (_offsets) { \ + s[i] = 0; \ + if (n == max) { \ + max = max? max<<1 : 2; \ + offsets = (int*)realloc(offsets, sizeof(int) * max); \ + } \ + offsets[n++] = last_start; \ + } else ++n; \ + } while (0) + + for (i = 0, last_char = last_start = 0; i <= l; ++i) { + if (delimiter == 0) { + if (isspace(s[i]) || s[i] == 0) { + if (isgraph(last_char)) __ksplit_aux; // the end of a field + } else { + if (isspace(last_char) || last_char == 0) last_start = i; + } + } else { + if (s[i] == delimiter || s[i] == 0) { + if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field + } else { + if (last_char == delimiter || last_char == 0) last_start = i; + } + } + last_char = s[i]; + } + *_max = max; *_offsets = offsets; + return n; +} + +/********************** + * Boyer-Moore search * + **********************/ + +// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html +int *ksBM_prep(const uint8_t *pat, int m) +{ + int i, *suff, *prep, *bmGs, *bmBc; + prep = calloc(m + 256, 1); + bmGs = prep; bmBc = prep + m; + { // preBmBc() + for (i = 0; i < 256; ++i) bmBc[i] = m; + for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; + } + suff = calloc(m, sizeof(int)); + { // suffixes() + int f = 0, g; + suff[m - 1] = m; + g = m - 1; + for (i = m - 2; i >= 0; --i) { + if (i > g && suff[i + m - 1 - f] < i - g) + suff[i] = suff[i + m - 1 - f]; + else { + if (i < g) g = i; + f = i; + while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; + suff[i] = f - g; + } + } + } + { // preBmGs() + int j = 0; + for (i = 0; i < m; ++i) bmGs[i] = m; + for (i = m - 1; i >= 0; --i) + if (suff[i] == i + 1) + for (; j < m - 1 - i; ++j) + if (bmGs[j] == m) + bmGs[j] = m - 1 - i; + for (i = 0; i <= m - 2; ++i) + bmGs[m - 1 - suff[i]] = m - 1 - i; + } + free(suff); + return prep; +} + +int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches) +{ + int i, j, *prep, *bmGs, *bmBc; + int *matches = 0, mm = 0, nm = 0; + prep = _prep? _prep : ksBM_prep(pat, m); + bmGs = prep; bmBc = prep + m; + j = 0; + while (j <= n - m) { + for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); + if (i < 0) { + if (nm == mm) { + mm = mm? mm<<1 : 1; + matches = realloc(matches, mm * sizeof(int)); + } + matches[nm++] = j; + j += bmGs[0]; + } else { + int max = bmBc[str[i+j]] - m + 1 + i; + if (max < bmGs[i]) max = bmGs[i]; + j += max; + } + } + *n_matches = nm; + if (_prep == 0) free(prep); + return matches; +} + +#ifdef KSTRING_MAIN +#include <stdio.h> +int main() +{ + kstring_t *s; + int *fields, n, i; + s = (kstring_t*)calloc(1, sizeof(kstring_t)); + // test ksprintf() + ksprintf(s, " abcdefg: %d ", 100); + printf("'%s'\n", s->s); + // test ksplit() + fields = ksplit(s, 0, &n); + for (i = 0; i < n; ++i) + printf("field[%d] = '%s'\n", i, s->s + fields[i]); + free(s); + + { + static char *str = "abcdefgcdg"; + static char *pat = "cd"; + int n, *matches; + matches = ksBM_search(str, strlen(str), pat, strlen(pat), 0, &n); + printf("%d: \n", n); + for (i = 0; i < n; ++i) + printf("- %d\n", matches[i]); + free(matches); + } + return 0; +} +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/kstring.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/kstring.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,102 @@ +#ifndef KSTRING_H +#define KSTRING_H + +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + size_t l, m; + char *s; +} kstring_t; +#endif + +int ksprintf(kstring_t *s, const char *fmt, ...); +int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); + +// calculate the auxiliary array, allocated by calloc() +int *ksBM_prep(const uint8_t *pat, int m); + +/* Search pat in str and returned the list of matches. The size of the + * list is returned as n_matches. _prep is the array returned by + * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */ +int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches); + +static inline int kputsn(const char *p, int l, kstring_t *s) +{ + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + strncpy(s->s + s->l, p, l); + s->l += l; + s->s[s->l] = 0; + return l; +} + +static inline int kputs(const char *p, kstring_t *s) +{ + return kputsn(p, strlen(p), s); +} + +static inline int kputc(int c, kstring_t *s) +{ + if (s->l + 1 >= s->m) { + s->m = s->l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + s->s[s->l++] = c; + s->s[s->l] = 0; + return c; +} + +static inline int kputw(int c, kstring_t *s) +{ + char buf[16]; + int l, x; + if (c == 0) return kputc('0', s); + for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; + if (c < 0) buf[l++] = '-'; + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; + s->s[s->l] = 0; + return 0; +} + +static inline int kputuw(unsigned c, kstring_t *s) +{ + char buf[16]; + int l, i; + unsigned x; + if (c == 0) return kputc('0', s); + for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; + s->s[s->l] = 0; + return 0; +} + +static inline int *ksplit(kstring_t *s, int delimiter, int *n) +{ + int max = 0, *offsets = 0; + *n = ksplit_core(s->s, delimiter, &max, &offsets); + return offsets; +} + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/razf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/razf.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,853 @@\n+/*\n+ * RAZF : Random Access compressed(Z) File\n+ * Version: 1.0\n+ * Release Date: 2008-10-27\n+ *\n+ * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>\n+ *\n+ * All rights reserved.\n+ *\n+ * Redistribution and use in source and binary forms, with or without\n+ * modification, are permitted provided that the following conditions\n+ * are met:\n+ * 1. Redistributions of source code must retain the above copyright\n+ * notice, this list of conditions and the following disclaimer.\n+ * 2. Redistributions in binary form must reproduce the above copyright\n+ * notice, this list of conditions and the following disclaimer in the\n+ * documentation and/or other materials provided with the distribution.\n+ *\n+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS\'\' AND\n+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\n+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\n+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\n+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n+ * SUCH DAMAGE.\n+ */\n+\n+#ifndef _NO_RAZF\n+\n+#include <fcntl.h>\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <unistd.h>\n+#include "razf.h"\n+\n+\n+#if ZLIB_VERNUM < 0x1221\n+struct _gz_header_s {\n+ int text;\n+ uLong time;\n+ int xflags;\n+ int os;\n+ Bytef *extra;\n+ uInt extra_len;\n+ uInt extra_max;\n+ Bytef *name;\n+ uInt name_max;\n+ Bytef *comment;\n+ uInt comm_max;\n+ int hcrc;\n+ int done;\n+};\n+#warning "zlib < 1.2.2.1; RAZF writing is disabled."\n+#endif\n+\n+#define DEF_MEM_LEVEL 8\n+\n+static inline uint32_t byte_swap_4(uint32_t v){\n+\tv = ((v & 0x0000FFFFU) << 16) | (v >> 16);\n+\treturn ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);\n+}\n+\n+static inline uint64_t byte_swap_8(uint64_t v){\n+\tv = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);\n+\tv = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);\n+\treturn ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);\n+}\n+\n+static inline int is_big_endian(){\n+\tint x = 0x01;\n+\tchar *c = (char*)&x;\n+\treturn (c[0] != 0x01);\n+}\n+\n+#ifndef _RZ_READONLY\n+static void add_zindex(RAZF *rz, int64_t in, int64_t out){\n+\tif(rz->index->size == rz->index->cap){\n+\t\trz->index->cap = rz->index->cap * 1.5 + 2;\n+\t\trz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);\n+\t\trz->index->bin_offsets = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));\n+\t}\n+\tif(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;\n+\trz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];\n+\trz->index->size ++;\n+}\n+\n+static void save_zindex(RAZF *rz, int fd){\n+\tint32_t i, v32;\n+\tint is_be;\n+\tis_be = is_big_endian();\n+\tif(is_be) write(fd, &rz->index->size, sizeof(int));\n+\telse {\n+\t\tv32 = byte_swap_4((uint32_t)rz->index->size);\n+\t\twrite(fd, &v32, sizeof(uint32_t));\n+\t}\n+\tv32 = rz->index->size / RZ_BIN_SIZE + 1;\n+\tif(!is_be){\n+\t\tfor(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);\n+\t\tfor(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);\n+\t}\n+\twrite(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);\n+\twrite(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size);\n+}\n+#endif\n+\n+#ifdef _USE_KNETFILE\n+static void load_zindex(RAZF *rz, knetFile *fp'..b'E_TYPE_PLAIN){\n+\t\trz->buf_off = rz->buf_len = 0;\n+\t\tpos = block_start + block_offset;\n+#ifdef _USE_KNETFILE\n+\t\tknet_seek(rz->x.fpr, pos, SEEK_SET);\n+ pos = knet_tell(rz->x.fpr);\n+#else\n+\t\tpos = lseek(rz->filedes, pos, SEEK_SET);\n+#endif\n+\t\trz->out = rz->in = pos;\n+\t\treturn pos;\n+\t}\n+\tif(block_start == rz->block_pos && block_offset >= rz->block_off) {\n+\t\tblock_offset -= rz->block_off;\n+\t\tgoto SKIP; // Needn\'t reset inflate\n+\t}\n+\tif(block_start == 0) block_start = rz->header_size; // Automaticly revist wrong block_start\n+\t_razf_reset_read(rz, block_start, 0);\n+\tSKIP:\n+\tif(block_offset) razf_skip(rz, block_offset);\n+\treturn rz->block_off;\n+}\n+\n+int64_t razf_seek(RAZF* rz, int64_t pos, int where){\n+\tint64_t idx;\n+\tint64_t seek_pos, new_out;\n+\trz->z_eof = 0;\n+\tif (where == SEEK_CUR) pos += rz->out;\n+\telse if (where == SEEK_END) pos += rz->src_end;\n+\tif(rz->file_type == FILE_TYPE_PLAIN){\n+#ifdef _USE_KNETFILE\n+\t\tknet_seek(rz->x.fpr, pos, SEEK_SET);\n+ seek_pos = knet_tell(rz->x.fpr);\n+#else\n+\t\tseek_pos = lseek(rz->filedes, pos, SEEK_SET);\n+#endif\n+\t\trz->buf_off = rz->buf_len = 0;\n+\t\trz->out = rz->in = seek_pos;\n+\t\treturn seek_pos;\n+\t} else if(rz->file_type == FILE_TYPE_GZ){\n+\t\tif(pos >= rz->out) goto SKIP;\n+\t\treturn rz->out;\n+\t}\n+\tif(pos == rz->out) return pos;\n+\tif(pos > rz->src_end) return rz->out;\n+\tif(!rz->seekable || !rz->load_index){\n+\t\tif(pos >= rz->out) goto SKIP;\n+\t}\n+\tidx = pos / RZ_BLOCK_SIZE - 1;\n+\tseek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);\n+\tnew_out = (idx + 1) * RZ_BLOCK_SIZE;\n+\tif(pos > rz->out && new_out <= rz->out) goto SKIP;\n+\t_razf_reset_read(rz, seek_pos, new_out);\n+\tSKIP:\n+\trazf_skip(rz, (int)(pos - rz->out));\n+\treturn rz->out;\n+}\n+\n+uint64_t razf_tell2(RAZF *rz)\n+{\n+\t/*\n+\tif (rz->load_index) {\n+\t\tint64_t idx, seek_pos;\n+\t\tidx = rz->out / RZ_BLOCK_SIZE - 1;\n+\t\tseek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);\n+\t\tif (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)\n+\t\t\tfprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\\n",\n+\t\t\t\t\t(long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);\n+\t}\n+\t*/\n+\treturn (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);\n+}\n+\n+int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)\n+{\n+\tif (where != SEEK_SET) return -1;\n+\treturn razf_jump(rz, voffset>>16, voffset&0xffff);\n+}\n+\n+void razf_close(RAZF *rz){\n+\tif(rz->mode == \'w\'){\n+#ifndef _RZ_READONLY\n+\t\trazf_end_flush(rz);\n+\t\tdeflateEnd(rz->stream);\n+#ifdef _USE_KNETFILE\n+\t\tsave_zindex(rz, rz->x.fpw);\n+\t\tif(is_big_endian()){\n+\t\t\twrite(rz->x.fpw, &rz->in, sizeof(int64_t));\n+\t\t\twrite(rz->x.fpw, &rz->out, sizeof(int64_t));\n+\t\t} else {\n+\t\t\tuint64_t v64 = byte_swap_8((uint64_t)rz->in);\n+\t\t\twrite(rz->x.fpw, &v64, sizeof(int64_t));\n+\t\t\tv64 = byte_swap_8((uint64_t)rz->out);\n+\t\t\twrite(rz->x.fpw, &v64, sizeof(int64_t));\n+\t\t}\n+#else\n+\t\tsave_zindex(rz, rz->filedes);\n+\t\tif(is_big_endian()){\n+\t\t\twrite(rz->filedes, &rz->in, sizeof(int64_t));\n+\t\t\twrite(rz->filedes, &rz->out, sizeof(int64_t));\n+\t\t} else {\n+\t\t\tuint64_t v64 = byte_swap_8((uint64_t)rz->in);\n+\t\t\twrite(rz->filedes, &v64, sizeof(int64_t));\n+\t\t\tv64 = byte_swap_8((uint64_t)rz->out);\n+\t\t\twrite(rz->filedes, &v64, sizeof(int64_t));\n+\t\t}\n+#endif\n+#endif\n+\t} else if(rz->mode == \'r\'){\n+\t\tif(rz->stream) inflateEnd(rz->stream);\n+\t}\n+\tif(rz->inbuf) free(rz->inbuf);\n+\tif(rz->outbuf) free(rz->outbuf);\n+\tif(rz->header){\n+\t\tfree(rz->header->extra);\n+\t\tfree(rz->header->name);\n+\t\tfree(rz->header->comment);\n+\t\tfree(rz->header);\n+\t}\n+\tif(rz->index){\n+\t\tfree(rz->index->bin_offsets);\n+\t\tfree(rz->index->cell_offsets);\n+\t\tfree(rz->index);\n+\t}\n+\tfree(rz->stream);\n+#ifdef _USE_KNETFILE\n+ if (rz->mode == \'r\')\n+ knet_close(rz->x.fpr);\n+ if (rz->mode == \'w\')\n+ close(rz->x.fpw);\n+#else\n+\tclose(rz->filedes);\n+#endif\n+\tfree(rz);\n+}\n+\n+#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/razf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/razf.h Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,134 @@ + /*- + * RAZF : Random Access compressed(Z) File + * Version: 1.0 + * Release Date: 2008-10-27 + * + * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + +#ifndef __RAZF_RJ_H +#define __RAZF_RJ_H + +#include <stdint.h> +#include <stdio.h> +#include "zlib.h" + +#ifdef _USE_KNETFILE +#include "knetfile.h" +#endif + +#if ZLIB_VERNUM < 0x1221 +#define _RZ_READONLY +struct _gz_header_s; +typedef struct _gz_header_s _gz_header; +#define gz_header _gz_header +#endif + +#define WINDOW_BITS 15 + +#ifndef RZ_BLOCK_SIZE +#define RZ_BLOCK_SIZE (1<<WINDOW_BITS) +#endif + +#ifndef RZ_BUFFER_SIZE +#define RZ_BUFFER_SIZE 4096 +#endif + +#ifndef RZ_COMPRESS_LEVEL +#define RZ_COMPRESS_LEVEL 6 +#endif + +#define RZ_BIN_SIZE ((1LLU << 32) / RZ_BLOCK_SIZE) + +typedef struct { + uint32_t *cell_offsets; // i + int64_t *bin_offsets; // i / BIN_SIZE + int size; + int cap; +} ZBlockIndex; +/* When storing index, output bytes in Big-Endian everywhere */ + +#define FILE_TYPE_RZ 1 +#define FILE_TYPE_PLAIN 2 +#define FILE_TYPE_GZ 3 + +typedef struct RandomAccessZFile { + char mode; /* 'w' : write mode; 'r' : read mode */ + int file_type; + /* plain file or rz file, razf_read support plain file as input too, in this case, razf_read work as buffered fread */ +#ifdef _USE_KNETFILE + union { + knetFile *fpr; + int fpw; + } x; +#else + int filedes; /* the file descriptor */ +#endif + z_stream *stream; + ZBlockIndex *index; + int64_t in, out, end, src_end; + /* in: n bytes total in; out: n bytes total out; */ + /* end: the end of all data blocks, while the start of index; src_end: the true end position in uncompressed file */ + int buf_flush; // buffer should be flush, suspend inflate util buffer is empty + int64_t block_pos, block_off, next_block_pos; + /* block_pos: the start postiion of current block in compressed file */ + /* block_off: tell how many bytes have been read from current block */ + void *inbuf, *outbuf; + int header_size; + gz_header *header; + /* header is used to transfer inflate_state->mode from HEAD to TYPE after call inflateReset */ + int buf_off, buf_len; + int z_err, z_eof; + int seekable; + /* Indice where the source is seekable */ + int load_index; + /* set has_index to 0 in mode 'w', then index will be discarded */ +} RAZF; + +#ifdef __cplusplus +extern "C" { +#endif + + RAZF* razf_dopen(int data_fd, const char *mode); + RAZF *razf_open(const char *fn, const char *mode); + int razf_write(RAZF* rz, const void *data, int size); + int razf_read(RAZF* rz, void *data, int size); + int64_t razf_seek(RAZF* rz, int64_t pos, int where); + void razf_close(RAZF* rz); + +#define razf_tell(rz) ((rz)->out) + + RAZF* razf_open2(const char *filename, const char *mode); + RAZF* razf_dopen2(int fd, const char *mode); + uint64_t razf_tell2(RAZF *rz); + int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/sam.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/sam.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,175 @@ +#include <string.h> +#include <unistd.h> +#include "faidx.h" +#include "sam.h" + +#define TYPE_BAM 1 +#define TYPE_READ 2 + +bam_header_t *bam_header_dup(const bam_header_t *h0) +{ + bam_header_t *h; + int i; + h = bam_header_init(); + *h = *h0; + h->hash = h->dict = h->rg2lib = 0; + h->text = (char*)calloc(h->l_text + 1, 1); + memcpy(h->text, h0->text, h->l_text); + h->target_len = (uint32_t*)calloc(h->n_targets, 4); + h->target_name = (char**)calloc(h->n_targets, sizeof(void*)); + for (i = 0; i < h->n_targets; ++i) { + h->target_len[i] = h0->target_len[i]; + h->target_name[i] = strdup(h0->target_name[i]); + } + return h; +} +static void append_header_text(bam_header_t *header, char* text, int len) +{ + int x = header->l_text + 1; + int y = header->l_text + len + 1; // 1 byte null + if (text == 0) return; + kroundup32(x); + kroundup32(y); + if (x < y) header->text = (char*)realloc(header->text, y); + strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here. + header->l_text += len; + header->text[header->l_text] = 0; +} + +samfile_t *samopen(const char *fn, const char *mode, const void *aux) +{ + samfile_t *fp; + fp = (samfile_t*)calloc(1, sizeof(samfile_t)); + if (mode[0] == 'r') { // read + fp->type |= TYPE_READ; + if (mode[1] == 'b') { // binary + fp->type |= TYPE_BAM; + fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r"); + if (fp->x.bam == 0) goto open_err_ret; + fp->header = bam_header_read(fp->x.bam); + } else { // text + fp->x.tamr = sam_open(fn); + if (fp->x.tamr == 0) goto open_err_ret; + fp->header = sam_header_read(fp->x.tamr); + if (fp->header->n_targets == 0) { // no @SQ fields + if (aux) { // check if aux is present + bam_header_t *textheader = fp->header; + fp->header = sam_header_read2((const char*)aux); + if (fp->header == 0) goto open_err_ret; + append_header_text(fp->header, textheader->text, textheader->l_text); + bam_header_destroy(textheader); + } + if (fp->header->n_targets == 0) + fprintf(stderr, "[samopen] no @SQ lines in the header.\n"); + } else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets); + } + } else if (mode[0] == 'w') { // write + fp->header = bam_header_dup((const bam_header_t*)aux); + if (mode[1] == 'b') { // binary + char bmode[3]; + bmode[0] = 'w'; bmode[1] = strstr(mode, "u")? 'u' : 0; bmode[2] = 0; + fp->type |= TYPE_BAM; + fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode); + if (fp->x.bam == 0) goto open_err_ret; + bam_header_write(fp->x.bam, fp->header); + } else { // text + // open file + fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout; + if (fp->x.tamr == 0) goto open_err_ret; + if (strstr(mode, "X")) fp->type |= BAM_OFSTR<<2; + else if (strstr(mode, "x")) fp->type |= BAM_OFHEX<<2; + else fp->type |= BAM_OFDEC<<2; + // write header + if (strstr(mode, "h")) { + int i; + bam_header_t *alt; + // parse the header text + alt = bam_header_init(); + alt->l_text = fp->header->l_text; alt->text = fp->header->text; + sam_header_parse(alt); + alt->l_text = 0; alt->text = 0; + // check if there are @SQ lines in the header + fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); + if (alt->n_targets) { // then write the header text without dumping ->target_{name,len} + if (alt->n_targets != fp->header->n_targets) + fprintf(stderr, "[samopen] inconsistent number of target sequences.\n"); + } else { // then dump ->target_{name,len} + for (i = 0; i < fp->header->n_targets; ++i) + fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]); + } + bam_header_destroy(alt); + } + } + } + return fp; + +open_err_ret: + free(fp); + return 0; +} + +void samclose(samfile_t *fp) +{ + if (fp == 0) return; + if (fp->header) bam_header_destroy(fp->header); + if (fp->type & TYPE_BAM) bam_close(fp->x.bam); + else if (fp->type & TYPE_READ) sam_close(fp->x.tamr); + else fclose(fp->x.tamw); + free(fp); +} + +int samread(samfile_t *fp, bam1_t *b) +{ + if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading + if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b); + else return sam_read1(fp->x.tamr, fp->header, b); +} + +int samwrite(samfile_t *fp, const bam1_t *b) +{ + if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing + if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b); + else { + char *s = bam_format1_core(fp->header, b, fp->type>>2&3); + int l = strlen(s); + fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw); + free(s); + return l + 1; + } +} + +int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data) +{ + bam_plbuf_t *buf; + int ret; + bam1_t *b; + b = bam_init1(); + buf = bam_plbuf_init(func, func_data); + bam_plbuf_set_mask(buf, mask); + while ((ret = samread(fp, b)) >= 0) + bam_plbuf_push(b, buf); + bam_plbuf_push(0, buf); + bam_plbuf_destroy(buf); + bam_destroy1(b); + return 0; +} + +char *samfaipath(const char *fn_ref) +{ + char *fn_list = 0; + if (fn_ref == 0) return 0; + fn_list = calloc(strlen(fn_ref) + 5, 1); + strcat(strcpy(fn_list, fn_ref), ".fai"); + if (access(fn_list, R_OK) == -1) { // fn_list is unreadable + if (access(fn_ref, R_OK) == -1) { + fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref); + } else { + fprintf(stderr, "[samfaipath] build FASTA index...\n"); + if (fai_build(fn_ref) == -1) { + fprintf(stderr, "[samfaipath] fail to build FASTA index.\n"); + free(fn_list); fn_list = 0; + } + } + } + return fn_list; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/sam.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/sam.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,98 @@ +#ifndef BAM_SAM_H +#define BAM_SAM_H + +#include "bam.h" + +/*! + @header + + This file provides higher level of I/O routines and unifies the APIs + for SAM and BAM formats. These APIs are more convenient and + recommended. + + @copyright Genome Research Ltd. + */ + +/*! @typedef + @abstract SAM/BAM file handler + @field type type of the handler; bit 1 for BAM, 2 for reading and bit 3-4 for flag format + @field bam BAM file handler; valid if (type&1) == 1 + @field tamr SAM file handler for reading; valid if type == 2 + @field tamw SAM file handler for writing; valid if type == 0 + @field header header struct + */ +typedef struct { + int type; + union { + tamFile tamr; + bamFile bam; + FILE *tamw; + } x; + bam_header_t *header; +} samfile_t; + +#ifdef __cplusplus +extern "C" { +#endif + + /*! + @abstract Open a SAM/BAM file + + @param fn SAM/BAM file name; "-" is recognized as stdin (for + reading) or stdout (for writing). + + @param mode open mode /[rw](b?)(u?)(h?)([xX]?)/: 'r' for reading, + 'w' for writing, 'b' for BAM I/O, 'u' for uncompressed BAM output, + 'h' for outputing header in SAM, 'x' for HEX flag and 'X' for + string flag. If 'b' present, it must immediately follow 'r' or + 'w'. Valid modes are "r", "w", "wh", "wx", "whx", "wX", "whX", + "rb", "wb" and "wbu" exclusively. + + @param aux auxiliary data; if mode[0]=='w', aux points to + bam_header_t; if strcmp(mode, "rb")!=0 and @SQ header lines in SAM + are absent, aux points the file name of the list of the reference; + aux is not used otherwise. If @SQ header lines are present in SAM, + aux is not used, either. + + @return SAM/BAM file handler + */ + samfile_t *samopen(const char *fn, const char *mode, const void *aux); + + /*! + @abstract Close a SAM/BAM handler + @param fp file handler to be closed + */ + void samclose(samfile_t *fp); + + /*! + @abstract Read one alignment + @param fp file handler + @param b alignment + @return bytes read + */ + int samread(samfile_t *fp, bam1_t *b); + + /*! + @abstract Write one alignment + @param fp file handler + @param b alignment + @return bytes written + */ + int samwrite(samfile_t *fp, const bam1_t *b); + + /*! + @abstract Get the pileup for a whole alignment file + @param fp file handler + @param mask mask transferred to bam_plbuf_set_mask() + @param func user defined function called in the pileup process + #param data user provided data for func() + */ + int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *data); + + char *samfaipath(const char *fn_ref); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/sam_header.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/sam_header.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,735 @@\n+#include "sam_header.h"\n+#include <stdio.h>\n+#include <string.h>\n+#include <ctype.h>\n+#include <stdlib.h>\n+#include <stdarg.h>\n+\n+#include "khash.h"\n+KHASH_MAP_INIT_STR(str, const char *)\n+\n+struct _HeaderList\n+{\n+ struct _HeaderList *last; // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only.\n+ struct _HeaderList *next;\n+ void *data;\n+};\n+typedef struct _HeaderList list_t;\n+typedef list_t HeaderDict;\n+\n+typedef struct\n+{\n+ char key[2];\n+ char *value;\n+}\n+HeaderTag;\n+\n+typedef struct\n+{\n+ char type[2];\n+ list_t *tags;\n+}\n+HeaderLine;\n+\n+const char *o_hd_tags[] = {"SO","GO",NULL};\n+const char *r_hd_tags[] = {"VN",NULL};\n+\n+const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL};\n+const char *r_sq_tags[] = {"SN","LN",NULL};\n+const char *u_sq_tags[] = {"SN",NULL};\n+\n+const char *o_rg_tags[] = {"LB","DS","PU","PI","CN","DT","PL",NULL};\n+const char *r_rg_tags[] = {"ID",NULL};\n+const char *u_rg_tags[] = {"ID",NULL};\n+\n+const char *o_pg_tags[] = {"VN","CL",NULL};\n+const char *r_pg_tags[] = {"ID",NULL};\n+\n+const char *types[] = {"HD","SQ","RG","PG","CO",NULL};\n+const char **optional_tags[] = {o_hd_tags,o_sq_tags,o_rg_tags,o_pg_tags,NULL,NULL};\n+const char **required_tags[] = {r_hd_tags,r_sq_tags,r_rg_tags,r_pg_tags,NULL,NULL};\n+const char **unique_tags[] = {NULL, u_sq_tags,u_rg_tags,NULL,NULL,NULL};\n+\n+\n+static void debug(const char *format, ...)\n+{\n+ va_list ap;\n+ va_start(ap, format);\n+ vfprintf(stderr, format, ap);\n+ va_end(ap);\n+}\n+\n+#if 0\n+// Replaced by list_append_to_end\n+static list_t *list_prepend(list_t *root, void *data)\n+{\n+ list_t *l = malloc(sizeof(list_t));\n+ l->next = root;\n+ l->data = data;\n+ return l;\n+}\n+#endif\n+\n+// Relies on the root->last being correct. Do not use with the other list_*\n+// routines unless they are fixed to modify root->last as well.\n+static list_t *list_append_to_end(list_t *root, void *data)\n+{\n+ list_t *l = malloc(sizeof(list_t));\n+ l->last = l;\n+ l->next = NULL;\n+ l->data = data;\n+\n+ if ( !root )\n+ return l;\n+\n+ root->last->next = l;\n+ root->last = l;\n+ return root;\n+}\n+\n+static list_t *list_append(list_t *root, void *data)\n+{\n+ list_t *l = root;\n+ while (l && l->next)\n+ l = l->next;\n+ if ( l ) \n+ {\n+ l->next = malloc(sizeof(list_t));\n+ l = l->next;\n+ }\n+ else\n+ {\n+ l = malloc(sizeof(list_t));\n+ root = l;\n+ }\n+ l->data = data;\n+ l->next = NULL;\n+ return root;\n+}\n+\n+static void list_free(list_t *root)\n+{\n+ list_t *l = root;\n+ while (root)\n+ {\n+ l = root;\n+ root = root->next;\n+ free(l);\n+ }\n+}\n+\n+\n+\n+// Look for a tag "XY" in a predefined const char *[] array.\n+static int tag_exists(const char *tag, const char **tags)\n+{\n+ int itag=0;\n+ if ( !tags ) return -1;\n+ while ( tags[itag] )\n+ {\n+ if ( tags[itag][0]==tag[0] && tags[itag][1]==tag[1] ) return itag; \n+ itag++;\n+ }\n+ return -1;\n+}\n+\n+\n+\n+// Mimics the behaviour of getline, except it returns pointer to the next chunk of the text\n+// or NULL if everything has been read. The lineptr should be freed by the caller. The\n+// newline character is stripped.\n+static const char *nextline(char **lineptr, size_t *n, const char *text)\n+{\n+ int len;\n+ const char *to = text;\n+\n+ if ( !*to ) return NULL;\n+\n+ while ( *to && *to!=\'\\n\' && *to!=\'\\r\' ) to++;\n+ len = to - text + 1;\n+\n+ if ( *to )\n+ {\n+ // Advance the pointer for the next call\n+ if ( *to==\'\\n\' ) to++;\n+ else if ( *to==\'\\r\' && *(to+1)==\'\\n\' ) to+=2;\n+ }\n+ if ( !len )\n+ return to;\n+\n+ if ( !*lineptr ) \n+ {\n+ *lineptr = malloc(len);\n+ *n = len;\n+ }\n+ else if ( *n<len ) \n+ {\n+ *lineptr = realloc(*lineptr, len);\n+ *n = len;\n+ }\n+ if ( !*lineptr ) {\n+\t\tdebug("[nextline] Insufficient memory!\\n");\n+\t\treturn 0;\n+\t}\n+\n+ memcp'..b'e);\n+ else\n+ {\n+\t\t\tif (hline) sam_header_line_free(hline);\n+\t\t\tsam_header_free(hlines);\n+ if ( buf ) free(buf);\n+ return NULL;\n+ }\n+ }\n+ if ( buf ) free(buf);\n+\n+ return hlines;\n+}\n+\n+void *sam_header2tbl(const void *_dict, char type[2], char key_tag[2], char value_tag[2])\n+{\n+\tconst HeaderDict *dict = (const HeaderDict*)_dict;\n+ const list_t *l = dict;\n+ khash_t(str) *tbl = kh_init(str);\n+ khiter_t k;\n+ int ret;\n+\n+\tif (_dict == 0) return tbl; // return an empty (not null) hash table\n+ while (l)\n+ {\n+ HeaderLine *hline = l->data;\n+ if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) \n+ {\n+ l = l->next;\n+ continue;\n+ }\n+ \n+ HeaderTag *key, *value;\n+ key = header_line_has_tag(hline,key_tag);\n+ value = header_line_has_tag(hline,value_tag); \n+ if ( !key || !value )\n+ {\n+ l = l->next;\n+ continue;\n+ }\n+ \n+ k = kh_get(str, tbl, key->value);\n+ if ( k != kh_end(tbl) )\n+ debug("[sam_header_lookup_table] They key %s not unique.\\n", key->value);\n+ k = kh_put(str, tbl, key->value, &ret);\n+ kh_value(tbl, k) = value->value;\n+\n+ l = l->next;\n+ }\n+ return tbl;\n+}\n+\n+char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n)\n+{\n+\tconst HeaderDict *dict = (const HeaderDict*)_dict;\n+ const list_t *l = dict;\n+ int max, n;\n+\tchar **ret;\n+\n+\tret = 0; *_n = max = n = 0;\n+ while (l)\n+ {\n+ HeaderLine *hline = l->data;\n+ if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) \n+ {\n+ l = l->next;\n+ continue;\n+ }\n+ \n+ HeaderTag *key;\n+ key = header_line_has_tag(hline,key_tag);\n+ if ( !key )\n+ {\n+ l = l->next;\n+ continue;\n+ }\n+\n+\t\tif (n == max) {\n+\t\t\tmax = max? max<<1 : 4;\n+\t\t\tret = realloc(ret, max * sizeof(void*));\n+\t\t}\n+\t\tret[n++] = key->value;\n+\n+ l = l->next;\n+ }\n+\t*_n = n;\n+ return ret;\n+}\n+\n+const char *sam_tbl_get(void *h, const char *key)\n+{\n+\tkhash_t(str) *tbl = (khash_t(str)*)h;\n+\tkhint_t k;\n+\tk = kh_get(str, tbl, key);\n+\treturn k == kh_end(tbl)? 0 : kh_val(tbl, k);\n+}\n+\n+int sam_tbl_size(void *h)\n+{\n+\tkhash_t(str) *tbl = (khash_t(str)*)h;\n+\treturn h? kh_size(tbl) : 0;\n+}\n+\n+void sam_tbl_destroy(void *h)\n+{\n+\tkhash_t(str) *tbl = (khash_t(str)*)h;\n+\tkh_destroy(str, tbl);\n+}\n+\n+void *sam_header_merge(int n, const void **_dicts)\n+{\n+\tconst HeaderDict **dicts = (const HeaderDict**)_dicts;\n+ HeaderDict *out_dict;\n+ int idict, status;\n+\n+ if ( n<2 ) return NULL;\n+\n+ out_dict = sam_header_clone(dicts[0]);\n+\n+ for (idict=1; idict<n; idict++)\n+ {\n+ const list_t *tmpl_hlines = dicts[idict];\n+\n+ while ( tmpl_hlines )\n+ {\n+ list_t *out_hlines = out_dict;\n+ int inserted = 0;\n+ while ( out_hlines )\n+ {\n+ status = sam_header_compare_lines(tmpl_hlines->data, out_hlines->data);\n+ if ( status==0 )\n+ {\n+ out_hlines = out_hlines->next;\n+ continue;\n+ }\n+ \n+ if ( status==2 ) \n+ {\n+ print_header_line(stderr,tmpl_hlines->data);\n+ print_header_line(stderr,out_hlines->data);\n+ debug("Conflicting lines, cannot merge the headers.\\n");\n+\t\t\t\t\treturn 0;\n+ }\n+ if ( status==3 )\n+ sam_header_line_merge_with(out_hlines->data, tmpl_hlines->data);\n+\n+ inserted = 1;\n+ break;\n+ }\n+ if ( !inserted )\n+ out_dict = list_append(out_dict, sam_header_line_clone(tmpl_hlines->data));\n+\n+ tmpl_hlines = tmpl_hlines->next;\n+ }\n+ }\n+\n+ return out_dict;\n+}\n+\n+\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/sam_header.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/sam_header.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,24 @@ +#ifndef __SAM_HEADER_H__ +#define __SAM_HEADER_H__ + +#ifdef __cplusplus +extern "C" { +#endif + + void *sam_header_parse2(const char *headerText); + void *sam_header_merge(int n, const void **dicts); + void sam_header_free(void *header); + char *sam_header_write(const void *headerDict); // returns a newly allocated string + + char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n); + + void *sam_header2tbl(const void *dict, char type[2], char key_tag[2], char value_tag[2]); + const char *sam_tbl_get(void *h, const char *key); + int sam_tbl_size(void *h); + void sam_tbl_destroy(void *h); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/samtools/sam_view.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/samtools/sam_view.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,255 @@\n+#include <stdlib.h>\n+#include <string.h>\n+#include <stdio.h>\n+#include <unistd.h>\n+#include <math.h>\n+#include "sam_header.h"\n+#include "sam.h"\n+#include "faidx.h"\n+#include "khash.h"\n+KHASH_SET_INIT_STR(rg)\n+\n+typedef khash_t(rg) *rghash_t;\n+\n+rghash_t g_rghash = 0;\n+static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;\n+static char *g_library, *g_rg;\n+static int g_sol2sanger_tbl[128];\n+\n+static void sol2sanger(bam1_t *b)\n+{\n+\tint l;\n+\tuint8_t *qual = bam1_qual(b);\n+\tif (g_sol2sanger_tbl[30] == 0) {\n+\t\tfor (l = 0; l != 128; ++l) {\n+\t\t\tg_sol2sanger_tbl[l] = (int)(10.0 * log(1.0 + pow(10.0, (l - 64 + 33) / 10.0)) / log(10.0) + .499);\n+\t\t\tif (g_sol2sanger_tbl[l] >= 93) g_sol2sanger_tbl[l] = 93;\n+\t\t}\n+\t}\n+\tfor (l = 0; l < b->core.l_qseq; ++l) {\n+\t\tint q = qual[l];\n+\t\tif (q > 127) q = 127;\n+\t\tqual[l] = g_sol2sanger_tbl[q];\n+\t}\n+}\n+\n+static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b)\n+{\n+\tif (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off))\n+\t\treturn 1;\n+\tif (g_rg || g_rghash) {\n+\t\tuint8_t *s = bam_aux_get(b, "RG");\n+\t\tif (s) {\n+\t\t\tif (g_rg) return (strcmp(g_rg, (char*)(s + 1)) == 0)? 0 : 1;\n+\t\t\tif (g_rghash) {\n+\t\t\t\tkhint_t k = kh_get(rg, g_rghash, (char*)(s + 1));\n+\t\t\t\treturn (k != kh_end(g_rghash))? 0 : 1;\n+\t\t\t}\n+\t\t}\n+\t}\n+\tif (g_library) {\n+\t\tconst char *p = bam_get_library((bam_header_t*)h, b);\n+\t\treturn (p && strcmp(p, g_library) == 0)? 0 : 1;\n+\t}\n+\treturn 0;\n+}\n+\n+// callback function for bam_fetch()\n+static int view_func(const bam1_t *b, void *data)\n+{\n+\tif (!__g_skip_aln(((samfile_t*)data)->header, b))\n+\t\tsamwrite((samfile_t*)data, b);\n+\treturn 0;\n+}\n+\n+static int usage(int is_long_help);\n+\n+int main_samview(int argc, char *argv[])\n+{\n+\tint c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, is_uncompressed = 0, is_bamout = 0, slx2sngr = 0;\n+\tint of_type = BAM_OFDEC, is_long_help = 0;\n+\tsamfile_t *in = 0, *out = 0;\n+\tchar in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0, *fn_rg = 0;\n+\n+\t/* parse command-line options */\n+\tstrcpy(in_mode, "r"); strcpy(out_mode, "w");\n+\twhile ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:r:xX?T:CR:")) >= 0) {\n+\t\tswitch (c) {\n+\t\tcase \'C\': slx2sngr = 1; break;\n+\t\tcase \'S\': is_bamin = 0; break;\n+\t\tcase \'b\': is_bamout = 1; break;\n+\t\tcase \'t\': fn_list = strdup(optarg); is_bamin = 0; break;\n+\t\tcase \'h\': is_header = 1; break;\n+\t\tcase \'H\': is_header_only = 1; break;\n+\t\tcase \'o\': fn_out = strdup(optarg); break;\n+\t\tcase \'f\': g_flag_on = strtol(optarg, 0, 0); break;\n+\t\tcase \'F\': g_flag_off = strtol(optarg, 0, 0); break;\n+\t\tcase \'q\': g_min_mapQ = atoi(optarg); break;\n+\t\tcase \'u\': is_uncompressed = 1; break;\n+\t\tcase \'l\': g_library = strdup(optarg); break;\n+\t\tcase \'r\': g_rg = strdup(optarg); break;\n+\t\tcase \'R\': fn_rg = strdup(optarg); break;\n+\t\tcase \'x\': of_type = BAM_OFHEX; break;\n+\t\tcase \'X\': of_type = BAM_OFSTR; break;\n+\t\tcase \'?\': is_long_help = 1; break;\n+\t\tcase \'T\': fn_ref = strdup(optarg); is_bamin = 0; break;\n+\t\tdefault: return usage(is_long_help);\n+\t\t}\n+\t}\n+\tif (is_uncompressed) is_bamout = 1;\n+\tif (is_header_only) is_header = 1;\n+\tif (is_bamout) strcat(out_mode, "b");\n+\telse {\n+\t\tif (of_type == BAM_OFHEX) strcat(out_mode, "x");\n+\t\telse if (of_type == BAM_OFSTR) strcat(out_mode, "X");\n+\t}\n+\tif (is_bamin) strcat(in_mode, "b");\n+\tif (is_header) strcat(out_mode, "h");\n+\tif (is_uncompressed) strcat(out_mode, "u");\n+\tif (argc == optind) return usage(is_long_help); // potential memory leak...\n+\n+\t// read the list of read groups\n+\tif (fn_rg) {\n+\t\tFILE *fp_rg;\n+\t\tchar buf[1024];\n+\t\tint ret;\n+\t\tg_rghash = kh_init(rg);\n+\t\tfp_rg = fopen(fn_rg, "r");\n+\t\twhile (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but bear me...\n+\t\t\tkh_put(rg, g_rghash, strdup(buf), &ret); // we\'d better check duplicates...\n+\t\tfclose(fp_rg);\n+\t}\n+\n+\t// generate the fn_list if necessary\n+\tif (fn_list == 0 && fn_ref) fn_list = samfaipath(fn_ref);\n+\t// open file handlers\n+\tif ((in = samopen(arg'..b'rr, "[main_samview] fail to get the reference name. Continue anyway.\\n");\n+\t\t\t\tcontinue;\n+\t\t\t}\n+\t\t\tbam_fetch(in->x.bam, idx, tid, beg, end, out, view_func); // fetch alignments\n+\t\t}\n+\t\tbam_index_destroy(idx); // destroy the BAM index\n+\t}\n+\n+view_end:\n+\t// close files, free and return\n+\tfree(fn_list); free(fn_ref); free(fn_out); free(g_library); free(g_rg); free(fn_rg);\n+\tif (g_rghash) {\n+\t\tkhint_t k;\n+\t\tfor (k = 0; k < kh_end(g_rghash); ++k)\n+\t\t\tif (kh_exist(g_rghash, k)) free((char*)kh_key(g_rghash, k));\n+\t\tkh_destroy(rg, g_rghash);\n+\t}\n+\tsamclose(in);\n+\tsamclose(out);\n+\treturn ret;\n+}\n+\n+static int usage(int is_long_help)\n+{\n+\tfprintf(stderr, "\\n");\n+\tfprintf(stderr, "Usage: samtools view [options] <in.bam>|<in.sam> [region1 [...]]\\n\\n");\n+\tfprintf(stderr, "Options: -b output BAM\\n");\n+\tfprintf(stderr, " -h print header for the SAM output\\n");\n+\tfprintf(stderr, " -H print header only (no alignments)\\n");\n+\tfprintf(stderr, " -S input is SAM\\n");\n+\tfprintf(stderr, " -u uncompressed BAM output (force -b)\\n");\n+\tfprintf(stderr, " -x output FLAG in HEX (samtools-C specific)\\n");\n+\tfprintf(stderr, " -X output FLAG in string (samtools-C specific)\\n");\n+\tfprintf(stderr, " -t FILE list of reference names and lengths (force -S) [null]\\n");\n+\tfprintf(stderr, " -T FILE reference sequence file (force -S) [null]\\n");\n+\tfprintf(stderr, " -o FILE output file name [stdout]\\n");\n+\tfprintf(stderr, " -R FILE list of read groups to be outputted [null]\\n");\n+\tfprintf(stderr, " -f INT required flag, 0 for unset [0]\\n");\n+\tfprintf(stderr, " -F INT filtering flag, 0 for unset [0]\\n");\n+\tfprintf(stderr, " -q INT minimum mapping quality [0]\\n");\n+\tfprintf(stderr, " -l STR only output reads in library STR [null]\\n");\n+\tfprintf(stderr, " -r STR only output reads in read group STR [null]\\n");\n+\tfprintf(stderr, " -? longer help\\n");\n+\tfprintf(stderr, "\\n");\n+\tif (is_long_help)\n+\t\tfprintf(stderr, "Notes:\\n\\\n+\\n\\\n+ 1. By default, this command assumes the file on the command line is in\\n\\\n+ the BAM format and it prints the alignments in SAM. If `-t\' is\\n\\\n+ applied, the input file is assumed to be in the SAM format. The\\n\\\n+ file supplied with `-t\' is SPACE/TAB delimited with the first two\\n\\\n+ fields of each line consisting of the reference name and the\\n\\\n+ corresponding sequence length. The `.fai\' file generated by `faidx\'\\n\\\n+ can be used here. This file may be empty if reads are unaligned.\\n\\\n+\\n\\\n+ 2. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz\'.\\n\\\n+\\n\\\n+ 3. BAM->SAM conversion: `samtools view in.bam\'.\\n\\\n+\\n\\\n+ 4. A region should be presented in one of the following formats:\\n\\\n+ `chr1\', `chr2:1,000\' and `chr3:1000-2,000\'. When a region is\\n\\\n+ specified, the input alignment file must be an indexed BAM file.\\n\\\n+\\n\\\n+ 5. Option `-u\' is preferred over `-b\' when the output is piped to\\n\\\n+ another samtools command.\\n\\\n+\\n\\\n+ 6. In a string FLAG, each character represents one bit with\\n\\\n+ p=0x1 (paired), P=0x2 (properly paired), u=0x4 (unmapped),\\n\\\n+ U=0x8 (mate unmapped), r=0x10 (reverse), R=0x20 (mate reverse)\\n\\\n+ 1=0x40 (first), 2=0x80 (second), s=0x100 (not primary), \\n\\\n+ f=0x200 (failure) and d=0x400 (duplicate). Note that `-x\' and\\n\\\n+ `-X\' are samtools-C specific. Picard and older samtools do not\\n\\\n+ support HEX or string flags.\\n\\\n+\\n");\n+\treturn 1;\n+}\n+\n+int main_import(int argc, char *argv[])\n+{\n+\tint argc2, ret;\n+\tchar **argv2;\n+\tif (argc != 4) {\n+\t\tfprintf(stderr, "Usage: bamtk import <in.ref_list> <in.sam> <out.bam>\\n");\n+\t\treturn 1;\n+\t}\n+\targc2 = 6;\n+\targv2 = calloc(6, sizeof(char*));\n+\targv2[0] = "import", argv2[1] = "-o", argv2[2] = argv[3], argv2[3] = "-bt", argv2[4] = argv[1], argv2[5] = argv[2];\n+\tret = main_samview(argc2, argv2);\n+\tfree(argv2);\n+\treturn ret;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/setup.cfg --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/setup.cfg Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,6 @@ +[bdist_rpm] +doc_files = README doc/*.html ChangeLog +vendor = TDB +packager = TDB <email@email.com> +distribution-name = Red Hat Linux +requires = python |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/setup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/setup.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,116 @@ +#!/usr/bin/python +''' + +pysam +***** + +''' + +import os, sys, glob, shutil, hashlib + +name = "pysam" + +# collect pysam version +sys.path.insert( 0, "pysam") +import version + +version = version.__version__ + +samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c", "errmod.c", "bam_reheader.c", "bam2bcf.c" ) +samtools_dest = os.path.abspath( "samtools" ) +tabix_exclude = ( "main.c", ) +tabix_dest = os.path.abspath( "tabix" ) + +# copy samtools source +if len(sys.argv) >= 2 and sys.argv[1] == "import": + if len(sys.argv) < 3: raise ValueError("missing PATH to samtools source directory") + if len(sys.argv) < 4: raise ValueError("missing PATH to tabix source directory") + + for destdir, srcdir, exclude in zip( + (samtools_dest, tabix_dest), + sys.argv[2:4], + (samtools_exclude, tabix_exclude)): + + srcdir = os.path.abspath( srcdir ) + if not os.path.exists( srcdir ): raise IOError( "samtools src dir `%s` does not exist." % srcdir ) + + cfiles = glob.glob( os.path.join( srcdir, "*.c" ) ) + hfiles = glob.glob( os.path.join( srcdir, "*.h" ) ) + ncopied = 0 + for new_file in cfiles + hfiles: + f = os.path.basename(new_file) + if f in exclude: continue + old_file = os.path.join( destdir, f ) + if os.path.exists( old_file ): + md5_old = hashlib.md5("".join(open(old_file,"r").readlines())).digest() + md5_new = hashlib.md5("".join(open(new_file,"r").readlines())).digest() + if md5_old == md5_new: continue + raise ValueError( "incompatible files for %s and %s" % (old_file, new_file )) + + shutil.copy( new_file, destdir ) + ncopied += 1 + print "installed latest source code from %s: %i files copied" % (srcdir, ncopied) + sys.exit(0) + +from distutils.core import setup, Extension +from Cython.Distutils import build_ext + +classifiers = """ +Development Status :: 2 - Alpha +Operating System :: MacOS :: MacOS X +Operating System :: Microsoft :: Windows :: Windows NT/2000 +Operating System :: OS Independent +Operating System :: POSIX +Operating System :: POSIX :: Linux +Operating System :: Unix +Programming Language :: Python +Topic :: Scientific/Engineering +Topic :: Scientific/Engineering :: Bioinformatics +""" + +samtools = Extension( + "csamtools", # name of extension + [ "pysam/csamtools.pyx" ] +\ + [ "pysam/%s" % x for x in ( + "pysam_util.c", )] +\ + glob.glob( os.path.join( "samtools", "*.c" ) ), + library_dirs=[], + include_dirs=[ "samtools", "pysam" ], + libraries=[ "z", ], + language="c", + define_macros = [('FILE_OFFSET_BITS','64'), + ('_USE_KNETFILE','')], + ) + +tabix = Extension( + "ctabix", # name of extension + [ "pysam/ctabix.pyx" ] +\ + [ "pysam/%s" % x for x in ()] +\ + glob.glob( os.path.join( "tabix", "*.c" ) ), + library_dirs=[], + include_dirs=[ "tabix", "pysam" ], + libraries=[ "z", ], + language="c", + ) + +metadata = { + 'name': name, + 'version': version, + 'description': "pysam", + 'long_description': __doc__, + 'author': "Andreas Heger", + 'author_email': "andreas.heger@gmail.com", + 'license': "MIT", + 'platforms': "ALL", + 'url': "http://code.google.com/p/pysam/", + 'py_modules': [ + "pysam/__init__", + "pysam/Pileup", + "pysam/namedtuple", + "pysam/version" ], + 'ext_modules': [samtools, tabix], + 'cmdclass' : {'build_ext': build_ext}, + } + +if __name__=='__main__': + dist = setup(**metadata) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/bam_endian.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/bam_endian.h Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,42 @@ +#ifndef BAM_ENDIAN_H +#define BAM_ENDIAN_H + +#include <stdint.h> + +static inline int bam_is_big_endian() +{ + long one= 1; + return !(*((char *)(&one))); +} +static inline uint16_t bam_swap_endian_2(uint16_t v) +{ + return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); +} +static inline void *bam_swap_endian_2p(void *x) +{ + *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); + return x; +} +static inline uint32_t bam_swap_endian_4(uint32_t v) +{ + v = ((v & 0x0000FFFFU) << 16) | (v >> 16); + return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); +} +static inline void *bam_swap_endian_4p(void *x) +{ + *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); + return x; +} +static inline uint64_t bam_swap_endian_8(uint64_t v) +{ + v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); + v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); + return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); +} +static inline void *bam_swap_endian_8p(void *x) +{ + *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); + return x; +} + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/bgzf.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/bgzf.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,676 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology\n+\n+ Permission is hereby granted, free of charge, to any person obtaining a copy\n+ of this software and associated documentation files (the "Software"), to deal\n+ in the Software without restriction, including without limitation the rights\n+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n+ copies of the Software, and to permit persons to whom the Software is\n+ furnished to do so, subject to the following conditions:\n+\n+ The above copyright notice and this permission notice shall be included in\n+ all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n+ THE SOFTWARE.\n+*/\n+\n+/*\n+ 2009-06-29 by lh3: cache recent uncompressed blocks.\n+ 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.\n+ 2009-06-12 by lh3: support a mode string like "wu" where \'u\' for uncompressed output */\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <unistd.h>\n+#include <fcntl.h>\n+#include <sys/types.h>\n+#include <sys/stat.h>\n+#include "bgzf.h"\n+\n+#include "khash.h"\n+typedef struct {\n+\tint size;\n+\tuint8_t *block;\n+\tint64_t end_offset;\n+} cache_t;\n+KHASH_MAP_INIT_INT64(cache, cache_t)\n+\n+#if defined(_WIN32) || defined(_MSC_VER)\n+#define ftello(fp) ftell(fp)\n+#define fseeko(fp, offset, whence) fseek(fp, offset, whence)\n+#else\n+extern off_t ftello(FILE *stream);\n+extern int fseeko(FILE *stream, off_t offset, int whence);\n+#endif\n+\n+typedef int8_t bgzf_byte_t;\n+\n+static const int DEFAULT_BLOCK_SIZE = 64 * 1024;\n+static const int MAX_BLOCK_SIZE = 64 * 1024;\n+\n+static const int BLOCK_HEADER_LENGTH = 18;\n+static const int BLOCK_FOOTER_LENGTH = 8;\n+\n+static const int GZIP_ID1 = 31;\n+static const int GZIP_ID2 = 139;\n+static const int CM_DEFLATE = 8;\n+static const int FLG_FEXTRA = 4;\n+static const int OS_UNKNOWN = 255;\n+static const int BGZF_ID1 = 66; // \'B\'\n+static const int BGZF_ID2 = 67; // \'C\'\n+static const int BGZF_LEN = 2;\n+static const int BGZF_XLEN = 6; // BGZF_LEN+4\n+\n+static const int GZIP_WINDOW_BITS = -15; // no zlib header\n+static const int Z_DEFAULT_MEM_LEVEL = 8;\n+\n+\n+inline\n+void\n+packInt16(uint8_t* buffer, uint16_t value)\n+{\n+ buffer[0] = value;\n+ buffer[1] = value >> 8;\n+}\n+\n+inline\n+int\n+unpackInt16(const uint8_t* buffer)\n+{\n+ return (buffer[0] | (buffer[1] << 8));\n+}\n+\n+inline\n+void\n+packInt32(uint8_t* buffer, uint32_t value)\n+{\n+ buffer[0] = value;\n+ buffer[1] = value >> 8;\n+ buffer[2] = value >> 16;\n+ buffer[3] = value >> 24;\n+}\n+\n+static inline\n+int\n+bgzf_min(int x, int y)\n+{\n+ return (x < y) ? x : y;\n+}\n+\n+static\n+void\n+report_error(BGZF* fp, const char* message) {\n+ fp->error = message;\n+}\n+\n+static BGZF *bgzf_read_init()\n+{\n+\tBGZF *fp;\n+\tfp = calloc(1, sizeof(BGZF));\n+ fp->uncompressed_block_size = MAX_BLOCK_SIZE;\n+ fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);\n+ fp->compressed_block_size = MAX_BLOCK_SIZE;\n+ fp->compressed_block = malloc(MAX_BLOCK_SIZE);\n+\tfp->cache_size = 0;\n+\tfp->cache = kh_init(cache);\n+\treturn fp;\n+}\n+\n+static\n+BGZF*\n+open_read(int fd)\n+{\n+#ifdef _USE_KNETFILE\n+ knetFile *file = knet_dopen(fd, "r");\n+#else\n+ FILE* file = fdopen(fd, "r");\n+#endif\n+ BGZF* fp;\n+\tif (file == 0) return 0;\n+\tfp = bgzf_read_init();\n+ fp->file_descriptor = fd;\n+ fp->open_mode = \'r\';\n+#ifdef _USE_KNETFILE\n+ fp->x.fpr = file;\n+#else\n+ fp->file = file;\n+#endif\n+ return fp;\n+}\n+\n+static\n+BGZF*\n+open_write(int'..b'nt\n+flush_block(BGZF* fp)\n+{\n+ while (fp->block_offset > 0) {\n+ int block_length = deflate_block(fp, fp->block_offset);\n+ if (block_length < 0) {\n+ return -1;\n+ }\n+#ifdef _USE_KNETFILE\n+ int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);\n+#else\n+ int count = fwrite(fp->compressed_block, 1, block_length, fp->file);\n+#endif\n+ if (count != block_length) {\n+ report_error(fp, "write failed");\n+ return -1;\n+ }\n+ fp->block_address += block_length;\n+ }\n+ return 0;\n+}\n+\n+int\n+bgzf_write(BGZF* fp, const void* data, int length)\n+{\n+ if (fp->open_mode != \'w\') {\n+ report_error(fp, "file not open for writing");\n+ return -1;\n+ }\n+\n+ if (fp->uncompressed_block == NULL) {\n+ fp->uncompressed_block = malloc(fp->uncompressed_block_size);\n+ }\n+\n+ const bgzf_byte_t* input = data;\n+ int block_length = fp->uncompressed_block_size;\n+ int bytes_written = 0;\n+ while (bytes_written < length) {\n+ int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);\n+ bgzf_byte_t* buffer = fp->uncompressed_block;\n+ memcpy(buffer + fp->block_offset, input, copy_length);\n+ fp->block_offset += copy_length;\n+ input += copy_length;\n+ bytes_written += copy_length;\n+ if (fp->block_offset == block_length) {\n+ if (flush_block(fp) != 0) {\n+ break;\n+ }\n+ }\n+ }\n+ return bytes_written;\n+}\n+\n+int\n+bgzf_close(BGZF* fp)\n+{\n+ if (fp->open_mode == \'w\') {\n+ if (flush_block(fp) != 0) {\n+ return -1;\n+ }\n+\t\t{ // add an empty block\n+\t\t\tint count, block_length = deflate_block(fp, 0);\n+#ifdef _USE_KNETFILE\n+\t\t\tcount = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);\n+#else\n+\t\t\tcount = fwrite(fp->compressed_block, 1, block_length, fp->file);\n+#endif\n+\t\t}\n+#ifdef _USE_KNETFILE\n+ if (fflush(fp->x.fpw) != 0) {\n+#else\n+ if (fflush(fp->file) != 0) {\n+#endif\n+ report_error(fp, "flush failed");\n+ return -1;\n+ }\n+ }\n+ if (fp->owned_file) {\n+#ifdef _USE_KNETFILE\n+\t\tint ret;\n+\t\tif (fp->open_mode == \'w\') ret = fclose(fp->x.fpw);\n+\t\telse ret = knet_close(fp->x.fpr);\n+ if (ret != 0) return -1;\n+#else\n+ if (fclose(fp->file) != 0) {\n+ return -1;\n+ }\n+#endif\n+ }\n+ free(fp->uncompressed_block);\n+ free(fp->compressed_block);\n+\tfree_cache(fp);\n+ free(fp);\n+ return 0;\n+}\n+\n+void bgzf_set_cache_size(BGZF *fp, int cache_size)\n+{\n+\tif (fp) fp->cache_size = cache_size;\n+}\n+\n+int bgzf_check_EOF(BGZF *fp)\n+{\n+\tstatic uint8_t magic[28] = "\\037\\213\\010\\4\\0\\0\\0\\0\\0\\377\\6\\0\\102\\103\\2\\0\\033\\0\\3\\0\\0\\0\\0\\0\\0\\0\\0\\0";\n+\tuint8_t buf[28];\n+\toff_t offset;\n+#ifdef _USE_KNETFILE\n+\toffset = knet_tell(fp->x.fpr);\n+\tif (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;\n+\tknet_read(fp->x.fpr, buf, 28);\n+\tknet_seek(fp->x.fpr, offset, SEEK_SET);\n+#else\n+\toffset = ftello(fp->file);\n+\tif (fseeko(fp->file, -28, SEEK_END) != 0) return -1;\n+\tfread(buf, 1, 28, fp->file);\n+\tfseeko(fp->file, offset, SEEK_SET);\n+#endif\n+\treturn (memcmp(magic, buf, 28) == 0)? 1 : 0;\n+}\n+\n+int64_t\n+bgzf_seek(BGZF* fp, int64_t pos, int where)\n+{\n+ if (fp->open_mode != \'r\') {\n+ report_error(fp, "file not open for read");\n+ return -1;\n+ }\n+ if (where != SEEK_SET) {\n+ report_error(fp, "unimplemented seek option");\n+ return -1;\n+ }\n+ int block_offset = pos & 0xFFFF;\n+ int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;\n+#ifdef _USE_KNETFILE\n+ if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {\n+#else\n+ if (fseeko(fp->file, block_address, SEEK_SET) != 0) {\n+#endif\n+ report_error(fp, "seek failed");\n+ return -1;\n+ }\n+ fp->block_length = 0; // indicates current block is not loaded\n+ fp->block_address = block_address;\n+ fp->block_offset = block_offset;\n+ return 0;\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/bgzf.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/bgzf.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,156 @@ +/* The MIT License + + Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#ifndef __BGZF_H +#define __BGZF_H + +#include <stdint.h> +#include <stdio.h> +#include <stdbool.h> +#include <zlib.h> +#ifdef _USE_KNETFILE +#include "knetfile.h" +#endif + +//typedef int8_t bool; + +typedef struct { + int file_descriptor; + char open_mode; // 'r' or 'w' + bool owned_file, is_uncompressed; +#ifdef _USE_KNETFILE + union { + knetFile *fpr; + FILE *fpw; + } x; +#else + FILE* file; +#endif + int uncompressed_block_size; + int compressed_block_size; + void* uncompressed_block; + void* compressed_block; + int64_t block_address; + int block_length; + int block_offset; + int cache_size; + const char* error; + void *cache; // a pointer to a hash table +} BGZF; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Open an existing file descriptor for reading or writing. + * Mode must be either "r" or "w". + * A subsequent bgzf_close will not close the file descriptor. + * Returns null on error. + */ +BGZF* bgzf_fdopen(int fd, const char* __restrict mode); + +/* + * Open the specified file for reading or writing. + * Mode must be either "r" or "w". + * Returns null on error. + */ +BGZF* bgzf_open(const char* path, const char* __restrict mode); + +/* + * Close the BGZ file and free all associated resources. + * Does not close the underlying file descriptor if created with bgzf_fdopen. + * Returns zero on success, -1 on error. + */ +int bgzf_close(BGZF* fp); + +/* + * Read up to length bytes from the file storing into data. + * Returns the number of bytes actually read. + * Returns zero on end of file. + * Returns -1 on error. + */ +int bgzf_read(BGZF* fp, void* data, int length); + +/* + * Write length bytes from data to the file. + * Returns the number of bytes written. + * Returns -1 on error. + */ +int bgzf_write(BGZF* fp, const void* data, int length); + +/* + * Return a virtual file pointer to the current location in the file. + * No interpetation of the value should be made, other than a subsequent + * call to bgzf_seek can be used to position the file at the same point. + * Return value is non-negative on success. + * Returns -1 on error. + */ +#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) + +/* + * Set the file to read from the location specified by pos, which must + * be a value previously returned by bgzf_tell for this file (but not + * necessarily one returned by this file handle). + * The where argument must be SEEK_SET. + * Seeking on a file opened for write is not supported. + * Returns zero on success, -1 on error. + */ +int64_t bgzf_seek(BGZF* fp, int64_t pos, int where); + +/* + * Set the cache size. Zero to disable. By default, caching is + * disabled. The recommended cache size for frequent random access is + * about 8M bytes. + */ +void bgzf_set_cache_size(BGZF *fp, int cache_size); + +int bgzf_check_EOF(BGZF *fp); + +int bgzf_read_block(BGZF* fp); + +#ifdef __cplusplus +} +#endif + +static inline int bgzf_getc(BGZF *fp) +{ + int c; + if (fp->block_offset >= fp->block_length) { + if (bgzf_read_block(fp) != 0) return -2; /* error */ + if (fp->block_length == 0) return -1; /* end-of-file */ + } + c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; + if (fp->block_offset == fp->block_length) { +#ifdef _USE_KNETFILE + fp->block_address = knet_tell(fp->x.fpr); +#else + fp->block_address = ftello(fp->file); +#endif + fp->block_offset = 0; + fp->block_length = 0; + } + return c; +} + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/bgzip.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/bgzip.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,201 @@ +/* The MIT License + + Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. +*/ + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <sys/select.h> +#include <sys/stat.h> +#include "bgzf.h" + +static const int WINDOW_SIZE = 64 * 1024; + +static int bgzip_main_usage() +{ + fprintf(stderr, "\n"); + fprintf(stderr, "Usage: bgzip [options] [file] ...\n\n"); + fprintf(stderr, "Options: -c write on standard output, keep original files unchanged\n"); + fprintf(stderr, " -d decompress\n"); + fprintf(stderr, " -f overwrite files without asking\n"); + fprintf(stderr, " -b INT decompress at virtual file pointer INT\n"); + fprintf(stderr, " -s INT decompress INT bytes in the uncompressed file\n"); + fprintf(stderr, " -h give this help\n"); + fprintf(stderr, "\n"); + return 1; +} + +static int write_open(const char *fn, int is_forced) +{ + int fd = -1; + char c; + if (!is_forced) { + if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) { + fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn); + scanf("%c", &c); + if (c != 'Y' && c != 'y') { + fprintf(stderr, "[bgzip] not overwritten\n"); + exit(1); + } + } + } + if (fd < 0) { + if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) { + fprintf(stderr, "[bgzip] %s: Fail to write\n", fn); + exit(1); + } + } + return fd; +} + +static void fail(BGZF* fp) +{ + fprintf(stderr, "Error: %s\n", fp->error); + exit(1); +} + +int main(int argc, char **argv) +{ + int c, compress, pstdout, is_forced; + BGZF *fp; + void *buffer; + long start, end, size; + + compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; + while((c = getopt(argc, argv, "cdhfb:s:")) >= 0){ + switch(c){ + case 'h': return bgzip_main_usage(); + case 'd': compress = 0; break; + case 'c': pstdout = 1; break; + case 'b': start = atol(optarg); break; + case 's': size = atol(optarg); break; + case 'f': is_forced = 1; break; + } + } + if (size >= 0) end = start + size; + if (end >= 0 && end < start) { + fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); + return 1; + } + if (compress == 1) { + struct stat sbuf; + int f_src = fileno(stdin); + int f_dst = fileno(stdout); + + if ( argc>optind ) + { + if ( stat(argv[optind],&sbuf)<0 ) + { + fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); + return 1; + } + + if ((f_src = open(argv[optind], O_RDONLY)) < 0) { + fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); + return 1; + } + + if (pstdout) + f_dst = fileno(stdout); + else + { + char *name = malloc(strlen(argv[optind]) + 5); + strcpy(name, argv[optind]); + strcat(name, ".gz"); + f_dst = write_open(name, is_forced); + if (f_dst < 0) return 1; + free(name); + } + } + else if (!pstdout && isatty(fileno((FILE *)stdout)) ) + return bgzip_main_usage(); + + fp = bgzf_fdopen(f_dst, "w"); + buffer = malloc(WINDOW_SIZE); + while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) + if (bgzf_write(fp, buffer, c) < 0) fail(fp); + // f_dst will be closed here + if (bgzf_close(fp) < 0) fail(fp); + if (argc > optind) unlink(argv[optind]); + free(buffer); + close(f_src); + return 0; + } else { + struct stat sbuf; + int f_dst; + + if ( argc>optind ) + { + if ( stat(argv[optind],&sbuf)<0 ) + { + fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); + return 1; + } + char *name; + int len = strlen(argv[optind]); + if ( strcmp(argv[optind]+len-3,".gz") ) + { + fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]); + return 1; + } + fp = bgzf_open(argv[optind], "r"); + if (fp == NULL) { + fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]); + return 1; + } + + name = strdup(argv[optind]); + name[strlen(name) - 3] = '\0'; + f_dst = write_open(name, is_forced); + free(name); + } + else if (!pstdout && isatty(fileno((FILE *)stdin)) ) + return bgzip_main_usage(); + else + { + f_dst = fileno(stdout); + fp = bgzf_fdopen(fileno(stdin), "r"); + if (fp == NULL) { + fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); + return 1; + } + } + buffer = malloc(WINDOW_SIZE); + if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp); + while (1) { + if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); + else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); + if (c == 0) break; + if (c < 0) fail(fp); + start += c; + write(f_dst, buffer, c); + if (end >= 0 && start >= end) break; + } + free(buffer); + if (bgzf_close(fp) < 0) fail(fp); + if (!pstdout) unlink(argv[optind]); + return 0; + } +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/index.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/index.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,954 @@\n+#include <ctype.h>\n+#include <assert.h>\n+#include <sys/stat.h>\n+#include "khash.h"\n+#include "ksort.h"\n+#include "kstring.h"\n+#include "bam_endian.h"\n+#ifdef _USE_KNETFILE\n+#include "knetfile.h"\n+#endif\n+#include "tabix.h"\n+\n+#define TAD_MIN_CHUNK_GAP 32768\n+// 1<<14 is the size of minimum bin.\n+#define TAD_LIDX_SHIFT 14\n+\n+typedef struct {\n+\tuint64_t u, v;\n+} pair64_t;\n+\n+#define pair64_lt(a,b) ((a).u < (b).u)\n+KSORT_INIT(off, pair64_t, pair64_lt)\n+\n+typedef struct {\n+\tuint32_t m, n;\n+\tpair64_t *list;\n+} ti_binlist_t;\n+\n+typedef struct {\n+\tint32_t n, m;\n+\tuint64_t *offset;\n+} ti_lidx_t;\n+\n+KHASH_MAP_INIT_INT(i, ti_binlist_t)\n+KHASH_MAP_INIT_STR(s, int)\n+\n+struct __ti_index_t {\n+\tti_conf_t conf;\n+\tint32_t n, max;\n+\tkhash_t(s) *tname;\n+\tkhash_t(i) **index;\n+\tti_lidx_t *index2;\n+};\n+\n+struct __ti_iter_t {\n+\tint from_first; // read from the first record; no random access\n+\tint tid, beg, end, n_off, i, finished;\n+\tuint64_t curr_off;\n+\tkstring_t str;\n+\tconst ti_index_t *idx;\n+\tpair64_t *off;\n+};\n+\n+typedef struct {\n+\tint tid, beg, end, bin;\n+} ti_intv_t;\n+\n+ti_conf_t ti_conf_gff = { 0, 1, 4, 5, \'#\', 0 };\n+ti_conf_t ti_conf_bed = { TI_FLAG_UCSC, 1, 2, 3, \'#\', 0 };\n+ti_conf_t ti_conf_psltbl = { TI_FLAG_UCSC, 15, 17, 18, \'#\', 0 };\n+ti_conf_t ti_conf_sam = { TI_PRESET_SAM, 3, 4, 0, \'@\', 0 };\n+ti_conf_t ti_conf_vcf = { TI_PRESET_VCF, 1, 2, 0, \'#\', 0 };\n+\n+/***************\n+ * read a line *\n+ ***************/\n+\n+/*\n+int ti_readline(BGZF *fp, kstring_t *str)\n+{\n+\tint c, l = 0;\n+\tstr->l = 0;\n+\twhile ((c = bgzf_getc(fp)) >= 0 && c != \'\\n\') {\n+\t\t++l;\n+\t\tif (c != \'\\r\') kputc(c, str);\n+\t}\n+\tif (c < 0 && l == 0) return -1; // end of file\n+\treturn str->l;\n+}\n+*/\n+\n+/* Below is a faster implementation largely equivalent to the one\n+ * commented out above. */\n+int ti_readline(BGZF *fp, kstring_t *str)\n+{\n+\tint l, state = 0;\n+\tunsigned char *buf = (unsigned char*)fp->uncompressed_block;\n+\tstr->l = 0;\n+\tdo {\n+\t\tif (fp->block_offset >= fp->block_length) {\n+\t\t\tif (bgzf_read_block(fp) != 0) { state = -2; break; }\n+\t\t\tif (fp->block_length == 0) { state = -1; break; }\n+\t\t}\n+\t\tfor (l = fp->block_offset; l < fp->block_length && buf[l] != \'\\n\'; ++l);\n+\t\tif (l < fp->block_length) state = 1;\n+\t\tl -= fp->block_offset;\n+\t\tif (str->l + l + 1 >= str->m) {\n+\t\t\tstr->m = str->l + l + 2;\n+\t\t\tkroundup32(str->m);\n+\t\t\tstr->s = (char*)realloc(str->s, str->m);\n+\t\t}\n+\t\tmemcpy(str->s + str->l, buf + fp->block_offset, l);\n+\t\tstr->l += l;\n+\t\tfp->block_offset += l + 1;\n+\t\tif (fp->block_offset >= fp->block_length) {\n+#ifdef _USE_KNETFILE\n+\t\t\tfp->block_address = knet_tell(fp->x.fpr);\n+#else\n+\t\t\tfp->block_address = ftello(fp->file);\n+#endif\n+\t\t\tfp->block_offset = 0;\n+\t\t\tfp->block_length = 0;\n+\t\t} \n+\t} while (state == 0);\n+\tif (str->l == 0 && state < 0) return state;\n+\tstr->s[str->l] = 0;\n+\treturn str->l;\n+}\n+\n+/*************************************\n+ * get the interval from a data line *\n+ *************************************/\n+\n+static inline int ti_reg2bin(uint32_t beg, uint32_t end)\n+{\n+\t--end;\n+\tif (beg>>14 == end>>14) return 4681 + (beg>>14);\n+\tif (beg>>17 == end>>17) return 585 + (beg>>17);\n+\tif (beg>>20 == end>>20) return 73 + (beg>>20);\n+\tif (beg>>23 == end>>23) return 9 + (beg>>23);\n+\tif (beg>>26 == end>>26) return 1 + (beg>>26);\n+\treturn 0;\n+}\n+\n+static int get_tid(ti_index_t *idx, const char *ss)\n+{\n+\tkhint_t k;\n+\tint tid;\n+\tk = kh_get(s, idx->tname, ss);\n+\tif (k == kh_end(idx->tname)) { // a new target sequence\n+\t\tint ret, size;\n+\t\t// update idx->n, ->max, ->index and ->index2\n+\t\tif (idx->n == idx->max) {\n+\t\t\tidx->max = idx->max? idx->max<<1 : 8;\n+\t\t\tidx->index = realloc(idx->index, idx->max * sizeof(void*));\n+\t\t\tidx->index2 = realloc(idx->index2, idx->max * sizeof(ti_lidx_t));\n+\t\t}\n+\t\tmemset(&idx->index2[idx->n], 0, sizeof(ti_lidx_t));\n+\t\tidx->index[idx->n++] = kh_init(i);\n+\t\t// update ->tname\n+\t\ttid = size = kh_size(idx->tname);\n+\t\tk = kh_put(s, idx->tname, strdup(ss), &ret);\n+\t\tkh_value(idx->tname, k) = size;\n+\t\tassert(idx->n == kh_'..b'n_off, off);\n+\t\t// resolve completely contained adjacent blocks\n+\t\tfor (i = 1, l = 0; i < n_off; ++i)\n+\t\t\tif (off[l].v < off[i].v)\n+\t\t\t\toff[++l] = off[i];\n+\t\tn_off = l + 1;\n+\t\t// resolve overlaps between adjacent blocks; this may happen due to the merge in indexing\n+\t\tfor (i = 1; i < n_off; ++i)\n+\t\t\tif (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;\n+\t\t{ // merge adjacent blocks\n+\t\t\tfor (i = 1, l = 0; i < n_off; ++i) {\n+\t\t\t\tif (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;\n+\t\t\t\telse off[++l] = off[i];\n+\t\t\t}\n+\t\t\tn_off = l + 1;\n+\t\t}\n+\t}\n+\titer->n_off = n_off; iter->off = off;\n+\treturn iter;\n+}\n+\n+const char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len)\n+{\n+\tif (iter->finished) return 0;\n+\tif (iter->from_first) {\n+\t\tint ret;\n+\t\tif ((ret = ti_readline(fp, &iter->str)) < 0) {\n+\t\t\titer->finished = 1;\n+\t\t\treturn 0;\n+\t\t} else {\n+\t\t\tif (len) *len = iter->str.l;\n+\t\t\treturn iter->str.s;\n+\t\t}\n+\t}\n+\tif (iter->n_off == 0) return 0;\n+\twhile (1) {\n+\t\tint ret;\n+\t\tif (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk\n+\t\t\tif (iter->i == iter->n_off - 1) break; // no more chunks\n+\t\t\tif (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug\n+\t\t\tif (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek\n+\t\t\t\tbgzf_seek(fp, iter->off[iter->i+1].u, SEEK_SET);\n+\t\t\t\titer->curr_off = bgzf_tell(fp);\n+\t\t\t}\n+\t\t\t++iter->i;\n+\t\t}\n+\t\tif ((ret = ti_readline(fp, &iter->str)) >= 0) {\n+\t\t\tti_intv_t intv;\n+\t\t\titer->curr_off = bgzf_tell(fp);\n+\t\t\tif (iter->str.s[0] == iter->idx->conf.meta_char) continue;\n+\t\t\tget_intv((ti_index_t*)iter->idx, &iter->str, &intv);\n+\t\t\tif (intv.tid != iter->tid || intv.beg >= iter->end) break; // no need to proceed\n+\t\t\telse if (intv.end > iter->beg && iter->end > intv.beg) {\n+\t\t\t\tif (len) *len = iter->str.l;\n+\t\t\t\treturn iter->str.s;\n+\t\t\t}\n+\t\t} else break; // end of file\n+\t}\n+\titer->finished = 1;\n+\treturn 0;\n+}\n+\n+void ti_iter_destroy(ti_iter_t iter)\n+{\n+\tif (iter) {\n+\t\tfree(iter->str.s); free(iter->off);\n+\t\tfree(iter);\n+\t}\n+}\n+\n+int ti_fetch(BGZF *fp, const ti_index_t *idx, int tid, int beg, int end, void *data, ti_fetch_f func)\n+{\n+\tti_iter_t iter;\n+\tconst char *s;\n+\tint len;\n+\titer = ti_iter_query(idx, tid, beg, end);\n+\twhile ((s = ti_iter_read(fp, iter, &len)) != 0)\n+\t\tfunc(len, s, data);\n+\tti_iter_destroy(iter);\n+\treturn 0;\n+}\n+\n+/*******************\n+ * High-level APIs *\n+ *******************/\n+\n+tabix_t *ti_open(const char *fn, const char *fnidx)\n+{\n+\ttabix_t *t;\n+\tBGZF *fp;\n+\tif ((fp = bgzf_open(fn, "r")) == 0) return 0;\n+\tt = calloc(1, sizeof(tabix_t));\n+\tt->fn = strdup(fn);\n+\tif (fnidx) t->fnidx = strdup(fnidx);\n+\tt->fp = fp;\n+\treturn t;\n+}\n+\n+void ti_close(tabix_t *t)\n+{\n+\tif (t) {\n+\t\tbgzf_close(t->fp);\n+\t\tif (t->idx) ti_index_destroy(t->idx);\n+\t\tfree(t->fn); free(t->fnidx);\n+\t\tfree(t);\n+\t}\n+}\n+\n+int ti_lazy_index_load(tabix_t *t)\n+{\n+\tif (t->idx == 0) { // load index\n+\t\tif (t->fnidx) t->idx = ti_index_load_local(t->fnidx);\n+\t\telse t->idx = ti_index_load(t->fn);\n+\t\tif (t->idx == 0) return -1; // fail to load index\n+\t}\n+\treturn 0;\n+}\n+\n+ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end)\n+{\n+\tif (tid < 0) return ti_iter_first();\n+\tif (ti_lazy_index_load(t) != 0) return 0;\n+\treturn ti_iter_query(t->idx, tid, beg, end);\t\n+}\n+\n+ti_iter_t ti_querys(tabix_t *t, const char *reg)\n+{\n+\tint tid, beg, end;\n+\tif (reg == 0) return ti_iter_first();\n+\tif (ti_lazy_index_load(t) != 0) return 0;\n+\tif (ti_parse_region(t->idx, reg, &tid, &beg, &end) < 0) return 0;\n+\treturn ti_iter_query(t->idx, tid, beg, end);\n+}\n+\n+ti_iter_t ti_query(tabix_t *t, const char *name, int beg, int end)\n+{\n+\tint tid;\n+\tif (name == 0) return ti_iter_first();\n+\t// then need to load the index\n+\tif (ti_lazy_index_load(t) != 0) return 0;\n+\tif ((tid = ti_get_tid(t->idx, name)) < 0) return 0;\n+\treturn ti_iter_query(t->idx, tid, beg, end);\n+}\n+\n+const char *ti_read(tabix_t *t, ti_iter_t iter, int *len)\n+{\n+\treturn ti_iter_read(t->fp, iter, len);\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/khash.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/khash.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,486 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+/*\n+ An example:\n+\n+#include "khash.h"\n+KHASH_MAP_INIT_INT(32, char)\n+int main() {\n+\tint ret, is_missing;\n+\tkhiter_t k;\n+\tkhash_t(32) *h = kh_init(32);\n+\tk = kh_put(32, h, 5, &ret);\n+\tif (!ret) kh_del(32, h, k);\n+\tkh_value(h, k) = 10;\n+\tk = kh_get(32, h, 10);\n+\tis_missing = (k == kh_end(h));\n+\tk = kh_get(32, h, 5);\n+\tkh_del(32, h, k);\n+\tfor (k = kh_begin(h); k != kh_end(h); ++k)\n+\t\tif (kh_exist(h, k)) kh_value(h, k) = 1;\n+\tkh_destroy(32, h);\n+\treturn 0;\n+}\n+*/\n+\n+/*\n+ 2008-09-19 (0.2.3):\n+\n+\t* Corrected the example\n+\t* Improved interfaces\n+\n+ 2008-09-11 (0.2.2):\n+\n+\t* Improved speed a little in kh_put()\n+\n+ 2008-09-10 (0.2.1):\n+\n+\t* Added kh_clear()\n+\t* Fixed a compiling error\n+\n+ 2008-09-02 (0.2.0):\n+\n+\t* Changed to token concatenation which increases flexibility.\n+\n+ 2008-08-31 (0.1.2):\n+\n+\t* Fixed a bug in kh_get(), which has not been tested previously.\n+\n+ 2008-08-31 (0.1.1):\n+\n+\t* Added destructor\n+*/\n+\n+\n+#ifndef __AC_KHASH_H\n+#define __AC_KHASH_H\n+\n+/*!\n+ @header\n+\n+ Generic hash table library.\n+\n+ @copyright Heng Li\n+ */\n+\n+#define AC_VERSION_KHASH_H "0.2.2"\n+\n+#include <stdint.h>\n+#include <stdlib.h>\n+#include <string.h>\n+\n+typedef uint32_t khint_t;\n+typedef khint_t khiter_t;\n+\n+#define __ac_HASH_PRIME_SIZE 32\n+static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =\n+{\n+ 0ul, 3ul, 11ul, 23ul, 53ul,\n+ 97ul, 193ul, 389ul, 769ul, 1543ul,\n+ 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,\n+ 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,\n+ 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,\n+ 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,\n+ 3221225473ul, 4294967291ul\n+};\n+\n+#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)\n+#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)\n+#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)\n+#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))\n+#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))\n+#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))\n+#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))\n+\n+static const double __ac_HASH_UPPER = 0.77;\n+\n+#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \\\n+\ttypedef struct {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkhint_t n_buckets, size, n_occupied, upper_bound;\t\t\t\t\\\n+\t\tuint32_t *flags;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkhkey_t *keys;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tkhval_t *vals;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t} kh_##name##_t;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic inline kh_##name##_t *kh_init_##name() {\t\t\t\t\t\t\\\n+\t\treturn (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tstatic inlin'..b'e, h, k) kh_get_##name(h, k)\n+\n+/*! @function\n+ @abstract Remove a key from the hash table.\n+ @param name Name of the hash table [symbol]\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param k Iterator to the element to be deleted [khint_t]\n+ */\n+#define kh_del(name, h, k) kh_del_##name(h, k)\n+\n+\n+/*! @function\n+ @abstract Test whether a bucket contains data.\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param x Iterator to the bucket [khint_t]\n+ @return 1 if containing data; 0 otherwise [int]\n+ */\n+#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))\n+\n+/*! @function\n+ @abstract Get key given an iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param x Iterator to the bucket [khint_t]\n+ @return Key [type of keys]\n+ */\n+#define kh_key(h, x) ((h)->keys[x])\n+\n+/*! @function\n+ @abstract Get value given an iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @param x Iterator to the bucket [khint_t]\n+ @return Value [type of values]\n+ @discussion For hash sets, calling this results in segfault.\n+ */\n+#define kh_val(h, x) ((h)->vals[x])\n+\n+/*! @function\n+ @abstract Alias of kh_val()\n+ */\n+#define kh_value(h, x) ((h)->vals[x])\n+\n+/*! @function\n+ @abstract Get the start iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return The start iterator [khint_t]\n+ */\n+#define kh_begin(h) (khint_t)(0)\n+\n+/*! @function\n+ @abstract Get the end iterator\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return The end iterator [khint_t]\n+ */\n+#define kh_end(h) ((h)->n_buckets)\n+\n+/*! @function\n+ @abstract Get the number of elements in the hash table\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return Number of elements in the hash table [khint_t]\n+ */\n+#define kh_size(h) ((h)->size)\n+\n+/*! @function\n+ @abstract Get the number of buckets in the hash table\n+ @param h Pointer to the hash table [khash_t(name)*]\n+ @return Number of buckets in the hash table [khint_t]\n+ */\n+#define kh_n_buckets(h) ((h)->n_buckets)\n+\n+/* More conenient interfaces */\n+\n+/*! @function\n+ @abstract Instantiate a hash set containing integer keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_INT(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing integer keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_INT(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing 64-bit integer keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_INT64(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing 64-bit integer keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_INT64(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)\n+\n+typedef const char *kh_cstr_t;\n+/*! @function\n+ @abstract Instantiate a hash map containing const char* keys\n+ @param name Name of the hash table [symbol]\n+ */\n+#define KHASH_SET_INIT_STR(name)\t\t\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)\n+\n+/*! @function\n+ @abstract Instantiate a hash map containing const char* keys\n+ @param name Name of the hash table [symbol]\n+ @param khval_t Type of values [type]\n+ */\n+#define KHASH_MAP_INIT_STR(name, khval_t)\t\t\t\t\t\t\t\t\\\n+\tKHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)\n+\n+#endif /* __AC_KHASH_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/knetfile.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/knetfile.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,632 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+/* Probably I will not do socket programming in the next few years and\n+ therefore I decide to heavily annotate this file, for Linux and\n+ Windows as well. -lh3 */\n+\n+#include <time.h>\n+#include <stdio.h>\n+#include <ctype.h>\n+#include <stdlib.h>\n+#include <string.h>\n+#include <errno.h>\n+#include <unistd.h>\n+#include <sys/types.h>\n+\n+#ifdef _WIN32\n+#include <winsock.h>\n+#else\n+#include <netdb.h>\n+#include <arpa/inet.h>\n+#include <sys/socket.h>\n+#endif\n+\n+#include "knetfile.h"\n+\n+/* In winsock.h, the type of a socket is SOCKET, which is: "typedef\n+ * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed\n+ * integer -1. In knetfile.c, I use "int" for socket type\n+ * throughout. This should be improved to avoid confusion.\n+ *\n+ * In Linux/Mac, recv() and read() do almost the same thing. You can see\n+ * in the header file that netread() is simply an alias of read(). In\n+ * Windows, however, they are different and using recv() is mandatory.\n+ */\n+\n+/* This function tests if the file handler is ready for reading (or\n+ * writing if is_read==0). */\n+static int socket_wait(int fd, int is_read)\n+{\n+\tfd_set fds, *fdr = 0, *fdw = 0;\n+\tstruct timeval tv;\n+\tint ret;\n+\ttv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out\n+\tFD_ZERO(&fds);\n+\tFD_SET(fd, &fds);\n+\tif (is_read) fdr = &fds;\n+\telse fdw = &fds;\n+\tret = select(fd+1, fdr, fdw, 0, &tv);\n+#ifndef _WIN32\n+\tif (ret == -1) perror("select");\n+#else\n+\tif (ret == 0)\n+\t\tfprintf(stderr, "select time-out\\n");\n+\telse if (ret == SOCKET_ERROR)\n+\t\tfprintf(stderr, "select: %d\\n", WSAGetLastError());\n+#endif\n+\treturn ret;\n+}\n+\n+#ifndef _WIN32\n+/* This function does not work with Windows due to the lack of\n+ * getaddrinfo() in winsock. It is addapted from an example in "Beej\'s\n+ * Guide to Network Programming" (http://beej.us/guide/bgnet/). */\n+static int socket_connect(const char *host, const char *port)\n+{\n+#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)\n+\n+\tint on = 1, fd;\n+\tstruct linger lng = { 0, 0 };\n+\tstruct addrinfo hints, *res;\n+\tmemset(&hints, 0, sizeof(struct addrinfo));\n+\thints.ai_family = AF_UNSPEC;\n+\thints.ai_socktype = SOCK_STREAM;\n+\t/* In Unix/Mac, getaddrinfo() is the most convenient way to get\n+\t * server information. */\n+\tif (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");\n+\tif ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");\n+\t/* The following two setsockopt() are used by ftplib\n+\t * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they\n+\t * necessary. */\n+\tif (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");\n+\tif (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("'..b'(fp);\n+\t\treturn 0;\n+\t}\n+\treturn fp;\n+}\n+\n+knetFile *knet_dopen(int fd, const char *mode)\n+{\n+\tknetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));\n+\tfp->type = KNF_TYPE_LOCAL;\n+\tfp->fd = fd;\n+\treturn fp;\n+}\n+\n+off_t knet_read(knetFile *fp, void *buf, off_t len)\n+{\n+\toff_t l = 0;\n+\tif (fp->fd == -1) return 0;\n+\tif (fp->type == KNF_TYPE_FTP) {\n+\t\tif (fp->is_ready == 0) {\n+\t\t\tif (!fp->no_reconnect) kftp_reconnect(fp);\n+\t\t\tkftp_connect_file(fp);\n+\t\t}\n+\t} else if (fp->type == KNF_TYPE_HTTP) {\n+\t\tif (fp->is_ready == 0)\n+\t\t\tkhttp_connect_file(fp);\n+\t}\n+\tif (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX\n+\t\toff_t rest = len, curr;\n+\t\twhile (rest) {\n+\t\t\tcurr = read(fp->fd, buf + l, rest);\n+\t\t\tif (curr == 0) break;\n+\t\t\tl += curr; rest -= curr;\n+\t\t}\n+\t} else l = my_netread(fp->fd, buf, len);\n+\tfp->offset += l;\n+\treturn l;\n+}\n+\n+off_t knet_seek(knetFile *fp, int64_t off, int whence)\n+{\n+\tif (whence == SEEK_SET && off == fp->offset) return 0;\n+\tif (fp->type == KNF_TYPE_LOCAL) {\n+\t\t/* Be aware that lseek() returns the offset after seeking,\n+\t\t * while fseek() returns zero on success. */\n+\t\toff_t offset = lseek(fp->fd, off, whence);\n+\t\tif (offset == -1) {\n+ // Be silent, it is OK for knet_seek to fail when the file is streamed\n+ // fprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n+\t\t\treturn -1;\n+\t\t}\n+\t\tfp->offset = offset;\n+\t\treturn 0;\n+\t}\n+ else if (fp->type == KNF_TYPE_FTP) \n+ {\n+ if (whence==SEEK_CUR)\n+ fp->offset += off;\n+ else if (whence==SEEK_SET)\n+ fp->offset = off;\n+ else if ( whence==SEEK_END)\n+ fp->offset = fp->file_size+off;\n+\t\tfp->is_ready = 0;\n+\t\treturn 0;\n+\t} \n+ else if (fp->type == KNF_TYPE_HTTP) \n+ {\n+\t\tif (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?\n+\t\t\tfprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\\n");\n+\t\t\terrno = ESPIPE;\n+\t\t\treturn -1;\n+\t\t}\n+ if (whence==SEEK_CUR)\n+ fp->offset += off;\n+ else if (whence==SEEK_SET)\n+ fp->offset = off;\n+\t\tfp->is_ready = 0;\n+\t\treturn fp->offset;\n+\t}\n+\terrno = EINVAL;\n+ fprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n+\treturn -1;\n+}\n+\n+int knet_close(knetFile *fp)\n+{\n+\tif (fp == 0) return 0;\n+\tif (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific\n+\tif (fp->fd != -1) {\n+\t\t/* On Linux/Mac, netclose() is an alias of close(), but on\n+\t\t * Windows, it is an alias of closesocket(). */\n+\t\tif (fp->type == KNF_TYPE_LOCAL) close(fp->fd);\n+\t\telse netclose(fp->fd);\n+\t}\n+\tfree(fp->host); free(fp->port);\n+\tfree(fp->response); free(fp->retr); free(fp->size_cmd); // FTP specific\n+\tfree(fp->path); free(fp->http_host); // HTTP specific\n+\tfree(fp);\n+\treturn 0;\n+}\n+\n+#ifdef KNETFILE_MAIN\n+int main(void)\n+{\n+\tchar *buf;\n+\tknetFile *fp;\n+\tint type = 4, l;\n+#ifdef _WIN32\n+\tknet_win32_init();\n+#endif\n+\tbuf = calloc(0x100000, 1);\n+\tif (type == 0) {\n+\t\tfp = knet_open("knetfile.c", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 1) { // NCBI FTP, large file\n+\t\tfp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");\n+\t\tknet_seek(fp, 2500000000ll, SEEK_SET);\n+\t\tl = knet_read(fp, buf, 255);\n+\t} else if (type == 2) {\n+\t\tfp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 3) {\n+\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");\n+\t\tknet_seek(fp, 1000, SEEK_SET);\n+\t} else if (type == 4) {\n+\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");\n+\t\tknet_read(fp, buf, 10000);\n+\t\tknet_seek(fp, 20000, SEEK_SET);\n+\t\tknet_seek(fp, 10000, SEEK_SET);\n+\t\tl = knet_read(fp, buf+10000, 10000000) + 10000;\n+\t}\n+\tif (type != 4 && type != 1) {\n+\t\tknet_read(fp, buf, 255);\n+\t\tbuf[255] = 0;\n+\t\tprintf("%s\\n", buf);\n+\t} else write(fileno(stdout), buf, l);\n+\tknet_close(fp);\n+\tfree(buf);\n+\treturn 0;\n+}\n+#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/knetfile.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/knetfile.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,75 @@ +#ifndef KNETFILE_H +#define KNETFILE_H + +#include <stdint.h> +#include <fcntl.h> + +#ifndef _WIN32 +#define netread(fd, ptr, len) read(fd, ptr, len) +#define netwrite(fd, ptr, len) write(fd, ptr, len) +#define netclose(fd) close(fd) +#else +#include <winsock2.h> +#define netread(fd, ptr, len) recv(fd, ptr, len, 0) +#define netwrite(fd, ptr, len) send(fd, ptr, len, 0) +#define netclose(fd) closesocket(fd) +#endif + +// FIXME: currently I/O is unbuffered + +#define KNF_TYPE_LOCAL 1 +#define KNF_TYPE_FTP 2 +#define KNF_TYPE_HTTP 3 + +typedef struct knetFile_s { + int type, fd; + int64_t offset; + char *host, *port; + + // the following are for FTP only + int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; + char *response, *retr, *size_cmd; + int64_t seek_offset; // for lazy seek + int64_t file_size; + + // the following are for HTTP only + char *path, *http_host; +} knetFile; + +#define knet_tell(fp) ((fp)->offset) +#define knet_fileno(fp) ((fp)->fd) + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _WIN32 + int knet_win32_init(); + void knet_win32_destroy(); +#endif + + knetFile *knet_open(const char *fn, const char *mode); + + /* + This only works with local files. + */ + knetFile *knet_dopen(int fd, const char *mode); + + /* + If ->is_ready==0, this routine updates ->fd; otherwise, it simply + reads from ->fd. + */ + off_t knet_read(knetFile *fp, void *buf, off_t len); + + /* + This routine only sets ->offset and ->is_ready=0. It does not + communicate with the FTP server. + */ + off_t knet_seek(knetFile *fp, int64_t off, int whence); + int knet_close(knetFile *fp); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/ksort.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/ksort.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,271 @@\n+/* The MIT License\n+\n+ Copyright (c) 2008 Genome Research Ltd (GRL).\n+\n+ Permission is hereby granted, free of charge, to any person obtaining\n+ a copy of this software and associated documentation files (the\n+ "Software"), to deal in the Software without restriction, including\n+ without limitation the rights to use, copy, modify, merge, publish,\n+ distribute, sublicense, and/or sell copies of the Software, and to\n+ permit persons to whom the Software is furnished to do so, subject to\n+ the following conditions:\n+\n+ The above copyright notice and this permission notice shall be\n+ included in all copies or substantial portions of the Software.\n+\n+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n+ SOFTWARE.\n+*/\n+\n+/* Contact: Heng Li <lh3@sanger.ac.uk> */\n+\n+/*\n+ 2008-11-16 (0.1.4):\n+\n+ * Fixed a bug in introsort() that happens in rare cases.\n+\n+ 2008-11-05 (0.1.3):\n+\n+ * Fixed a bug in introsort() for complex comparisons.\n+\n+\t* Fixed a bug in mergesort(). The previous version is not stable.\n+\n+ 2008-09-15 (0.1.2):\n+\n+\t* Accelerated introsort. On my Mac (not on another Linux machine),\n+\t my implementation is as fast as std::sort on random input.\n+\n+\t* Added combsort and in introsort, switch to combsort if the\n+\t recursion is too deep.\n+\n+ 2008-09-13 (0.1.1):\n+\n+\t* Added k-small algorithm\n+\n+ 2008-09-05 (0.1.0):\n+\n+\t* Initial version\n+\n+*/\n+\n+#ifndef AC_KSORT_H\n+#define AC_KSORT_H\n+\n+#include <stdlib.h>\n+#include <string.h>\n+\n+typedef struct {\n+\tvoid *left, *right;\n+\tint depth;\n+} ks_isort_stack_t;\n+\n+#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }\n+\n+#define KSORT_INIT(name, type_t, __sort_lt)\t\t\t\t\t\t\t\t\\\n+\tvoid ks_mergesort_##name(size_t n, type_t array[], type_t temp[])\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t *a2[2], *a, *b;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tint curr, shift;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ta2[0] = array;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ta2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);\t\t\\\n+\t\tfor (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {\t\t\t\\\n+\t\t\ta = a2[curr]; b = a2[1-curr];\t\t\t\t\t\t\t\t\\\n+\t\t\tif (shift == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\ttype_t *p = b, *i, *eb = a + n;\t\t\t\t\t\t\t\\\n+\t\t\t\tfor (i = a; i < eb; i += 2) {\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (i == eb - 1) *p++ = *i;\t\t\t\t\t\t\t\\\n+\t\t\t\t\telse {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tif (__sort_lt(*(i+1), *i)) {\t\t\t\t\t\\\n+\t\t\t\t\t\t\t*p++ = *(i+1); *p++ = *i;\t\t\t\t\t\\\n+\t\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t*p++ = *i; *p++ = *(i+1);\t\t\t\t\t\\\n+\t\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tsize_t i, step = 1ul<<shift;\t\t\t\t\t\t\t\\\n+\t\t\t\tfor (i = 0; i < n; i += step<<1) {\t\t\t\t\t\t\\\n+\t\t\t\t\ttype_t *p, *j, *k, *ea, *eb;\t\t\t\t\t\t\\\n+\t\t\t\t\tif (n < i + step) {\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tea = a + n; eb = a;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tea = a + i + step;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\teb = a + (n < i + (step<<1)? n : i + (step<<1)); \\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tj = a + i; k = a + i + step; p = b + i;\t\t\t\t\\\n+\t\t\t\t\twhile (j < ea && k < eb) {\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\tif (__sort_lt(*k, *j)) *p++ = *k++;\t\t\t\t\\\n+\t\t\t\t\t\telse *p++ = *j++;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\twhile (j < ea) *p++ = *j++;\t\t\t\t\t\t\t\\\n+\t\t\t\t\twhile (k < eb) *p++ = *k++;\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tcurr = 1 - curr;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (curr == 1) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\ttype_t *p = a2[0], *i = a2[1], *eb = array + n;\t\t\t\t\\\n+\t\t\tfor (; p < eb; ++i) *p++ = *i;\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (temp == 0) free(a2[1]);\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\tvoid ks_heapadjust_##name(size_t i, size_t n, type_t l[])\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tsize_t k ='..b'\t\t\t\t\t\t\t\t\\\n+\tvoid ks_introsort_##name(size_t n, type_t a[])\t\t\t\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tint d;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tks_isort_stack_t *top, *stack;\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t rp, swap_tmp;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t *s, *t, *i, *j, *k;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tif (n < 1) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\telse if (n == 2) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \\\n+\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\tfor (d = 2; 1ul<<d < n; ++d);\t\t\t\t\t\t\t\t\t\\\n+\t\tstack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \\\n+\t\ttop = stack; s = a; t = a + (n-1); d <<= 1;\t\t\t\t\t\t\\\n+\t\twhile (1) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (s < t) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (--d == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tks_combsort_##name(t - s + 1, s);\t\t\t\t\t\\\n+\t\t\t\t\tt = s;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tcontinue;\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\ti = s; j = t; k = i + ((j-i)>>1) + 1;\t\t\t\t\t\\\n+\t\t\t\tif (__sort_lt(*k, *i)) {\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (__sort_lt(*k, *j)) k = j;\t\t\t\t\t\t\\\n+\t\t\t\t} else k = __sort_lt(*j, *i)? i : j;\t\t\t\t\t\\\n+\t\t\t\trp = *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }\t\\\n+\t\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tdo ++i; while (__sort_lt(*i, rp));\t\t\t\t\t\\\n+\t\t\t\t\tdo --j; while (i <= j && __sort_lt(rp, *j));\t\t\\\n+\t\t\t\t\tif (j <= i) break;\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tswap_tmp = *i; *i = *j; *j = swap_tmp;\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tswap_tmp = *i; *i = *t; *t = swap_tmp;\t\t\t\t\t\\\n+\t\t\t\tif (i-s > t-i) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \\\n+\t\t\t\t\ts = t-i > 16? i+1 : t;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tif (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \\\n+\t\t\t\t\tt = i-s > 16? i-1 : s;\t\t\t\t\t\t\t\t\\\n+\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (top == stack) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\tfree(stack);\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t\t__ks_insertsort_##name(a, a+n);\t\t\t\t\t\t\\\n+\t\t\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\t} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t/* This function is adapted from: http://ndevilla.free.fr/median/ */ \\\n+\t/* 0 <= kk < n */\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\ttype_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)\t\t\t\\\n+\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\ttype_t *low, *high, *k, *ll, *hh, *mid;\t\t\t\t\t\t\t\\\n+\t\tlow = arr; high = arr + n - 1; k = arr + kk;\t\t\t\t\t\\\n+\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (high <= low) return *k;\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (high == low + 1) {\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n+\t\t\t\treturn *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tmid = low + (high - low) / 2;\t\t\t\t\t\t\t\t\\\n+\t\t\tif (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \\\n+\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n+\t\t\tif (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);\t\\\n+\t\t\tKSORT_SWAP(type_t, *mid, *(low+1));\t\t\t\t\t\t\t\\\n+\t\t\tll = low + 1; hh = high;\t\t\t\t\t\t\t\t\t\\\n+\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tdo ++ll; while (__sort_lt(*ll, *low));\t\t\t\t\t\\\n+\t\t\t\tdo --hh; while (__sort_lt(*low, *hh));\t\t\t\t\t\\\n+\t\t\t\tif (hh < ll) break;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\t\tKSORT_SWAP(type_t, *ll, *hh);\t\t\t\t\t\t\t\\\n+\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tKSORT_SWAP(type_t, *low, *hh);\t\t\t\t\t\t\t\t\\\n+\t\t\tif (hh <= k) low = ll;\t\t\t\t\t\t\t\t\t\t\\\n+\t\t\tif (hh >= k) high = hh - 1;\t\t\t\t\t\t\t\t\t\\\n+\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n+\t}\n+\n+#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)\n+#define ks_introsort(name, n, a) ks_introsort_##name(n, a)\n+#define ks_combsort(name, n, a) ks_combsort_##name(n, a)\n+#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)\n+#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)\n+#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)\n+#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)\n+\n+#define ks_lt_generic(a, b) ((a) < (b))\n+#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)\n+\n+typedef const char *ksstr_t;\n+\n+#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)\n+#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)\n+\n+#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/kstring.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/kstring.c Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,165 @@ +#include <stdarg.h> +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <stdint.h> +#include "kstring.h" + +int ksprintf(kstring_t *s, const char *fmt, ...) +{ + va_list ap; + int l; + va_start(ap, fmt); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'. + va_end(ap); + if (l + 1 > s->m - s->l) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + va_start(ap, fmt); + l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); + } + va_end(ap); + s->l += l; + return l; +} + +// s MUST BE a null terminated string; l = strlen(s) +int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) +{ + int i, n, max, last_char, last_start, *offsets, l; + n = 0; max = *_max; offsets = *_offsets; + l = strlen(s); + +#define __ksplit_aux do { \ + if (_offsets) { \ + s[i] = 0; \ + if (n == max) { \ + max = max? max<<1 : 2; \ + offsets = (int*)realloc(offsets, sizeof(int) * max); \ + } \ + offsets[n++] = last_start; \ + } else ++n; \ + } while (0) + + for (i = 0, last_char = last_start = 0; i <= l; ++i) { + if (delimiter == 0) { + if (isspace(s[i]) || s[i] == 0) { + if (isgraph(last_char)) __ksplit_aux; // the end of a field + } else { + if (isspace(last_char) || last_char == 0) last_start = i; + } + } else { + if (s[i] == delimiter || s[i] == 0) { + if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field + } else { + if (last_char == delimiter || last_char == 0) last_start = i; + } + } + last_char = s[i]; + } + *_max = max; *_offsets = offsets; + return n; +} + +/********************** + * Boyer-Moore search * + **********************/ + +// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html +int *ksBM_prep(const uint8_t *pat, int m) +{ + int i, *suff, *prep, *bmGs, *bmBc; + prep = calloc(m + 256, 1); + bmGs = prep; bmBc = prep + m; + { // preBmBc() + for (i = 0; i < 256; ++i) bmBc[i] = m; + for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; + } + suff = calloc(m, sizeof(int)); + { // suffixes() + int f = 0, g; + suff[m - 1] = m; + g = m - 1; + for (i = m - 2; i >= 0; --i) { + if (i > g && suff[i + m - 1 - f] < i - g) + suff[i] = suff[i + m - 1 - f]; + else { + if (i < g) g = i; + f = i; + while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; + suff[i] = f - g; + } + } + } + { // preBmGs() + int j = 0; + for (i = 0; i < m; ++i) bmGs[i] = m; + for (i = m - 1; i >= 0; --i) + if (suff[i] == i + 1) + for (; j < m - 1 - i; ++j) + if (bmGs[j] == m) + bmGs[j] = m - 1 - i; + for (i = 0; i <= m - 2; ++i) + bmGs[m - 1 - suff[i]] = m - 1 - i; + } + free(suff); + return prep; +} + +int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches) +{ + int i, j, *prep, *bmGs, *bmBc; + int *matches = 0, mm = 0, nm = 0; + prep = _prep? _prep : ksBM_prep(pat, m); + bmGs = prep; bmBc = prep + m; + j = 0; + while (j <= n - m) { + for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); + if (i < 0) { + if (nm == mm) { + mm = mm? mm<<1 : 1; + matches = realloc(matches, mm * sizeof(int)); + } + matches[nm++] = j; + j += bmGs[0]; + } else { + int max = bmBc[str[i+j]] - m + 1 + i; + if (max < bmGs[i]) max = bmGs[i]; + j += max; + } + } + *n_matches = nm; + if (_prep == 0) free(prep); + return matches; +} + +#ifdef KSTRING_MAIN +#include <stdio.h> +int main() +{ + kstring_t *s; + int *fields, n, i; + s = (kstring_t*)calloc(1, sizeof(kstring_t)); + // test ksprintf() + ksprintf(s, " abcdefg: %d ", 100); + printf("'%s'\n", s->s); + // test ksplit() + fields = ksplit(s, 0, &n); + for (i = 0; i < n; ++i) + printf("field[%d] = '%s'\n", i, s->s + fields[i]); + free(s); + + { + static char *str = "abcdefgcdg"; + static char *pat = "cd"; + int n, *matches; + matches = ksBM_search(str, strlen(str), pat, strlen(pat), 0, &n); + printf("%d: \n", n); + for (i = 0; i < n; ++i) + printf("- %d\n", matches[i]); + free(matches); + } + return 0; +} +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/kstring.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/kstring.h Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,68 @@ +#ifndef KSTRING_H +#define KSTRING_H + +#include <stdlib.h> +#include <string.h> +#include <stdint.h> + +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +#ifndef KSTRING_T +#define KSTRING_T kstring_t +typedef struct __kstring_t { + size_t l, m; + char *s; +} kstring_t; +#endif + +int ksprintf(kstring_t *s, const char *fmt, ...); +int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); + +// calculate the auxiliary array, allocated by calloc() +int *ksBM_prep(const uint8_t *pat, int m); + +/* Search pat in str and returned the list of matches. The size of the + * list is returned as n_matches. _prep is the array returned by + * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */ +int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches); + +static inline int kputsn(const char *p, int l, kstring_t *s) +{ + if (s->l + l + 1 >= s->m) { + s->m = s->l + l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + strncpy(s->s + s->l, p, l); + s->l += l; + s->s[s->l] = 0; + return l; +} + +static inline int kputs(const char *p, kstring_t *s) +{ + return kputsn(p, strlen(p), s); +} + +static inline int kputc(int c, kstring_t *s) +{ + if (s->l + 1 >= s->m) { + s->m = s->l + 2; + kroundup32(s->m); + s->s = (char*)realloc(s->s, s->m); + } + s->s[s->l++] = c; + s->s[s->l] = 0; + return c; +} + +static inline int *ksplit(kstring_t *s, int delimiter, int *n) +{ + int max = 0, *offsets = 0; + *n = ksplit_core(s->s, delimiter, &max, &offsets); + return offsets; +} + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tabix/tabix.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tabix/tabix.h Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,137 @@ +/* The MIT License + + Copyright (c) 2009 Genome Research Ltd (GRL), 2010 Broad Institute + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be + included in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE. +*/ + +/* Contact: Heng Li <lh3@live.co.uk> */ + +#ifndef __TABIDX_H +#define __TABIDX_H + +#include <stdint.h> +#include "kstring.h" +#include "bgzf.h" + +#define TI_PRESET_GENERIC 0 +#define TI_PRESET_SAM 1 +#define TI_PRESET_VCF 2 + +#define TI_FLAG_UCSC 0x10000 + +typedef int (*ti_fetch_f)(int l, const char *s, void *data); + +struct __ti_index_t; +typedef struct __ti_index_t ti_index_t; + +struct __ti_iter_t; +typedef struct __ti_iter_t *ti_iter_t; + +typedef struct { + BGZF *fp; + ti_index_t *idx; + char *fn, *fnidx; +} tabix_t; + +typedef struct { + int32_t preset; + int32_t sc, bc, ec; // seq col., beg col. and end col. + int32_t meta_char, line_skip; +} ti_conf_t; + +extern ti_conf_t ti_conf_gff, ti_conf_bed, ti_conf_psltbl, ti_conf_vcf, ti_conf_sam; // preset + +#ifdef __cplusplus +extern "C" { +#endif + + /******************* + * High-level APIs * + *******************/ + + tabix_t *ti_open(const char *fn, const char *fnidx); + int ti_lazy_index_load(tabix_t *t); + void ti_close(tabix_t *t); + ti_iter_t ti_query(tabix_t *t, const char *name, int beg, int end); + ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end); + ti_iter_t ti_querys(tabix_t *t, const char *reg); + const char *ti_read(tabix_t *t, ti_iter_t iter, int *len); + + /* Destroy the iterator */ + void ti_iter_destroy(ti_iter_t iter); + + /* Get the list of sequence names. Each "char*" pointer points to a + * internal member of the index, so DO NOT modify the returned + * pointer; otherwise the index will be corrupted. The returned + * pointer should be freed by a single free() call by the routine + * calling this function. The number of sequences is returned at *n. */ + const char **ti_seqname(const ti_index_t *idx, int *n); + + /****************** + * Low-level APIs * + ******************/ + + /* Build the index for file <fn>. File <fn>.tbi will be generated + * and overwrite the file of the same name. Return -1 on failure. */ + int ti_index_build(const char *fn, const ti_conf_t *conf); + + /* Load the index from file <fn>.tbi. If <fn> is a URL and the index + * file is not in the working directory, <fn>.tbi will be + * downloaded. Return NULL on failure. */ + ti_index_t *ti_index_load(const char *fn); + + ti_index_t *ti_index_load_local(const char *fnidx); + + /* Destroy the index */ + void ti_index_destroy(ti_index_t *idx); + + /* Parse a region like: chr2, chr2:100, chr2:100-200. Return -1 on failure. */ + int ti_parse_region(const ti_index_t *idx, const char *str, int *tid, int *begin, int *end); + + int ti_get_tid(const ti_index_t *idx, const char *name); + + /* Get the iterator pointing to the first record at the current file + * position. If the file is just openned, the iterator points to the + * first record in the file. */ + ti_iter_t ti_iter_first(void); + + /* Get the iterator pointing to the first record in region tid:beg-end */ + ti_iter_t ti_iter_query(const ti_index_t *idx, int tid, int beg, int end); + + /* Get the data line pointed by the iterator and iterate to the next record. */ + const char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len); + + /******************* + * Deprecated APIs * + *******************/ + + /* The callback version for random access */ + int ti_fetch(BGZF *fp, const ti_index_t *idx, int tid, int beg, int end, void *data, ti_fetch_f func); + + /* Read one line. */ + int ti_readline(BGZF *fp, kstring_t *str); + +#ifdef __cplusplus +} +#endif + +#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/00README.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/00README.txt Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,32 @@ +File ex1.fa contains two sequences cut from the human genome +build36. They were exatracted with command: + + samtools faidx human_b36.fa 2:2043966-2045540 20:67967-69550 + +Sequence names were changed manually for simplicity. File ex1.sam.gz +contains MAQ alignments exatracted with: + + (samtools view NA18507_maq.bam 2:2044001-2045500; + samtools view NA18507_maq.bam 20:68001-69500) + +and processed with `samtools fixmate' to make it self-consistent as a +standalone alignment. + +To try samtools, you may run the following commands: + + samtools faidx ex1.fa # index the reference FASTA + samtools import ex1.fa.fai ex1.sam.gz ex1.bam # SAM->BAM + samtools index ex1.bam # index BAM + samtools tview ex1.bam ex1.fa # view alignment + samtools pileup -cf ex1.fa ex1.bam # pileup and consensus + samtools pileup -cf ex1.fa -t ex1.fa.fai ex1.sam.gz + +In order for the script pysam_test.py to work, you will need pysam +in your PYTHONPATH. + +In order for the script example.py to work, you will need pysam +in your PYTHONPATH and run + + make all + +beforehand. |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/Makefile Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,33 @@ +all: ex1.glf ex1.pileup.gz ex1.bam.bai ex1.glfview.gz \ + ex2.sam.gz ex2.sam ex1.sam \ + ex2.bam \ + ex3.bam ex3.bam.bai \ + ex4.bam ex4.bam.bai \ + ex5.bam ex5.bam.bai \ + ex6.bam \ + ex8.bam + +ex2.sam.gz: ex1.bam ex1.bam.bai + samtools view -h ex1.bam | gzip > ex2.sam.gz + +%.bam: %.sam ex1.fa.fai + samtools import ex1.fa.fai $< $@ + +%.sam: %.sam.gz + gunzip < $< > $@ + +ex1.fa.fai:ex1.fa + samtools faidx ex1.fa +ex1.bam:ex1.sam.gz ex1.fa.fai + samtools import ex1.fa.fai ex1.sam.gz ex1.bam +%.bam.bai:%.bam + samtools index $< +ex1.pileup.gz:ex1.bam ex1.fa + samtools pileup -cf ex1.fa ex1.bam | gzip > ex1.pileup.gz +ex1.glf:ex1.bam ex1.fa + samtools pileup -gf ex1.fa ex1.bam > ex1.glf +ex1.glfview.gz:ex1.glf + samtools glfview ex1.glf | gzip > ex1.glfview.gz + +clean: + rm -fr *.bam *.bai *.glf* *.fai *.pileup* *~ calDepth *.dSYM pysam_*.sam ex2.sam ex2.sam.gz ex1.sam |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/ex1.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/ex1.fa Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,56 @@ +>chr1 +CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCT +GTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCAC +GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAG +TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTC +AGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAA +CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACC +AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT +CTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA +ATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGC +AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAAC +AACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACAC +ATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATAC +CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCT +TTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT +TCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT +GCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT +ACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGA +ACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTG +TGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTA +CGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAG +TCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC +TTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTC +TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTG +TTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGG +AGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATA +TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTC +TCCCTCGTCTTCTTA +>chr2 +TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAG +CTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT +TATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTT +CAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA +AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT +AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC +ATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAG +GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCAT +CAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATT +TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTA +AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA +ATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT +TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATA +AAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACC +TCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATA +GATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATT +AATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA +AATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGT +AAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATAT +AACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAAT +ACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGAT +GATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG +CGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATA +GCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAA +AAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAA +TTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGC +CAGAAAAAAATATTTACAGTAACT |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/ex1.sam.gz |
b |
Binary file chimerascan/chimerascan/pysam/tests/ex1.sam.gz has changed |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/ex3.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/ex3.sam Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,13 @@ +@HD VN:1.0 +@SQ SN:chr1 LN:1575 +@SQ SN:chr2 LN:1584 +@RG ID:L1 PU:SC_1_10 LB:SC_1 SM:NA12891 CN:name:with:colon +@RG ID:L2 PU:SC_2_12 LB:SC_2 SM:NA12891 CN:name:with:colon +@PG ID:P1 VN:1.0 +@PG ID:P2 VN:1.1 +@CO this is a comment +@CO this is another comment +read_28833_29006_6945 99 chr1 33 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 PG:Z:P1 XT:A:U +read_28701_28881_323b 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 PG:Z:P2 XT:A:R +read_28701_28881_323c 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< +test_clipped1 99 chr2 997 20 4S6M1D20M5S = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 PG:Z:P1 XT:A:U |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/ex4.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/ex4.sam Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,9 @@ +@HD VN:1.0 +@SQ SN:chr1 LN:100 +@SQ SN:chr2 LN:100 +@RG ID:L1 PU:SC_1_10 LB:SC_1 SM:NA12891 +@RG ID:L2 PU:SC_2_12 LB:SC_2 SM:NA12891 +@CO this is a comment +@CO this is another comment +read_28833_29006_6945 99 chr1 21 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 +read_28701_28881_323b 147 chr2 21 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/ex5.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/ex5.sam Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,5 @@ +@HD VN:1.0 +@SQ SN:chr1 LN:100 +@SQ SN:chr2 LN:100 +read_28833_29006_6945 0 * * * * * 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< +read_28701_28881_323b 0 * * * * * 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/ex6.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/ex6.sam Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,5 @@ +@HD VN:1.0 +@SQ SN:chr1 LN:1575 +@SQ SN:chr2 LN:1584 +read_28833_29006_6945 99 chr1 33 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 +read_28701_28881_323b 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/ex7.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/ex7.sam Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,2 @@ +read_28833_29006_6945 99 chr1 33 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 PG:Z:P1 XT:A:U +read_28701_28881_323b 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 PG:Z:P2 XT:A:R |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/ex8.sam --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/ex8.sam Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,3 @@ +@HD VN:1.0 +@SQ SN:2 LN:48297693 +GJP00TM04CAQ5W 0 2 38297693 60 45H51M1D13M1D12M1D9M2D5M1D7M4D2M1I6M1D28M1D5M1D2M1D18M55H * 0 0 CATGAAGAACCGCTGGGTATGGAGCACACCTCACCTGATGGACAGTTGATTATGCTCACCTTAACGCTAATTGAGAGCAGCACAAGAGGACTGGAAACTAGAATTTACTCCTCATCTCCGAAGATGTGAATATTCTAAATTCAGCTTGCCTCTTGCTTC IID7757111/=;?///:D>777;EEGAAAEEIHHIIIIIIIIIIIIIIBBBIIIIH==<<<DDGEEE;<<<A><<<DEDDA>>>D?1112544556::03---//25.22=;DD?;;;>BDDDEEEGGGA<888<BAA888<GGGGGEB?9::DD551 NM:i:15 MD:Z:51^T13^A12^A9^AA5^A7^AAAA8^T28^T5^A2^T18 RG:Z:GJP00TM04 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/example.gtf.gz |
b |
Binary file chimerascan/chimerascan/pysam/tests/example.gtf.gz has changed |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/example.gtf.gz.tbi |
b |
Binary file chimerascan/chimerascan/pysam/tests/example.gtf.gz.tbi has changed |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/example.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/example.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,121 @@ +import sys +import pysam + +samfile = pysam.Samfile( "ex1.bam", "rb" ) + +print "###################" +# check different ways to iterate +print len(list(samfile.fetch())) +print len(list(samfile.fetch( "chr1", 10, 200 ))) +print len(list(samfile.fetch( region="chr1:10-200" ))) +print len(list(samfile.fetch( "chr1" ))) +print len(list(samfile.fetch( region="chr1"))) +print len(list(samfile.fetch( "chr2" ))) +print len(list(samfile.fetch( region="chr2"))) +print len(list(samfile.fetch())) +print len(list(samfile.fetch( "chr1" ))) +print len(list(samfile.fetch( region="chr1"))) +print len(list(samfile.fetch())) + +print len(list(samfile.pileup( "chr1", 10, 200 ))) +print len(list(samfile.pileup( region="chr1:10-200" ))) +print len(list(samfile.pileup( "chr1" ))) +print len(list(samfile.pileup( region="chr1"))) +print len(list(samfile.pileup( "chr2" ))) +print len(list(samfile.pileup( region="chr2"))) +print len(list(samfile.pileup())) +print len(list(samfile.pileup())) + +print "########### fetch with callback ################" +def my_fetch_callback( alignment ): print str(alignment) +samfile.fetch( region="chr1:10-200", callback=my_fetch_callback ) + +print "########## pileup with callback ################" +def my_pileup_callback( column ): print str(column) +samfile.pileup( region="chr1:10-200", callback=my_pileup_callback ) + +print "##########iterator row #################" +iter = pysam.IteratorRow( samfile, 0, 10, 200) +for x in iter: print str(x) + +print "##########iterator col #################" +iter = pysam.IteratorColumn( samfile, 0, 10, 200 ) +for x in iter: print str(x) + +print "#########row all##################" +iter = pysam.IteratorRowAll( samfile ) +for x in iter: print str(x) + + +print "###################" + +class Counter: + mCounts = 0 + def __call__(self, alignment): + self.mCounts += 1 + +c = Counter() +samfile.fetch( "chr1:10-200", c ) +print "counts=", c.mCounts + +sys.exit(0) +print samfile.getTarget( 0 ) +print samfile.getTarget( 1 ) + +for p in pysam.pileup( "-c", "ex1.bam" ): + print str(p) + +print pysam.pileup.getMessages() + +for p in pysam.pileup( "-c", "ex1.bam", raw=True ): + print str(p), + + + +print "###########################" + +samfile = pysam.Samfile( "ex2.sam.gz", "r" ) + +print "num targets=", samfile.getNumTargets() + +iter = pysam.IteratorRowAll( samfile ) +for x in iter: print str(x) + +samfile.close() + +print "###########################" +samfile = pysam.Samfile( "ex2.sam.gz", "r" ) +def my_fetch_callback( alignment ): + print str(alignment) + +try: + samfile.fetch( "chr1:10-20", my_fetch_callback ) +except AssertionError: + print "caught fetch exception" + +samfile.close() + +print "###########################" +samfile = pysam.Samfile( "ex2.sam.gz", "r" ) +def my_pileup_callback( pileups ): + print str(pileups) +try: + samfile.pileup( "chr1:10-20", my_pileup_callback ) +except NotImplementedError: + print "caught pileup exception" + +# playing arount with headers +samfile = pysam.Samfile( "ex3.sam", "r" ) +print samfile.targets +print samfile.lengths +print samfile.text +print samdile.header +header = samfile.header +samfile.close() + +header["HD"]["SO"] = "unsorted" +outfile = pysam.Samfile( "out.sam", "wh", + header = header ) + +outfile.close() + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/pysam_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/pysam_test.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,1008 @@\n+#!/usr/bin/env python\n+\'\'\'unit testing code for pysam.\n+\n+Execute in the :file:`tests` directory as it requires the Makefile\n+and data files located there.\n+\'\'\'\n+\n+import pysam\n+import unittest\n+import os, re\n+import itertools\n+import subprocess\n+import shutil\n+\n+\n+def checkBinaryEqual( filename1, filename2 ):\n+ \'\'\'return true if the two files are binary equal.\'\'\'\n+ if os.path.getsize( filename1 ) != os.path.getsize( filename2 ):\n+ return False\n+\n+ infile1 = open(filename1, "rb")\n+ infile2 = open(filename2, "rb")\n+\n+ def chariter( infile ):\n+ while 1:\n+ c = infile.read(1)\n+ if c == "": break\n+ yield c\n+\n+ found = False\n+ for c1,c2 in itertools.izip( chariter( infile1), chariter( infile2) ):\n+ if c1 != c2: break\n+ else:\n+ found = True\n+\n+ infile1.close()\n+ infile2.close()\n+ return found\n+\n+def runSamtools( cmd ):\n+ \'\'\'run a samtools command\'\'\'\n+\n+ try:\n+ retcode = subprocess.call(cmd, shell=True)\n+ if retcode < 0:\n+ print >>sys.stderr, "Child was terminated by signal", -retcode\n+ except OSError, e:\n+ print >>sys.stderr, "Execution failed:", e\n+\n+def getSamtoolsVersion():\n+ \'\'\'return samtools version\'\'\'\n+\n+ pipe = subprocess.Popen("samtools", shell=True, stderr=subprocess.PIPE).stderr\n+ lines = "".join(pipe.readlines())\n+ return re.search( "Version:\\s+(\\S+)", lines).groups()[0]\n+\n+class BinaryTest(unittest.TestCase):\n+ \'\'\'test samtools command line commands and compare\n+ against pysam commands.\n+\n+ Tests fail, if the output is not binary identical.\n+ \'\'\'\n+\n+ first_time = True\n+\n+ # a list of commands to test\n+ mCommands = \\\n+ { "faidx" : \\\n+ ( \n+ ("ex1.fa.fai", "samtools faidx ex1.fa"), \n+ ("pysam_ex1.fa.fai", (pysam.faidx, "ex1.fa") ),\n+ ),\n+ "import" :\n+ (\n+ ("ex1.bam", "samtools import ex1.fa.fai ex1.sam.gz ex1.bam" ),\n+ ("pysam_ex1.bam", (pysam.samimport, "ex1.fa.fai ex1.sam.gz pysam_ex1.bam") ),\n+ ),\n+ "index":\n+ (\n+ ("ex1.bam.bai", "samtools index ex1.bam" ),\n+ ("pysam_ex1.bam.bai", (pysam.index, "pysam_ex1.bam" ) ),\n+ ),\n+ "pileup1" :\n+ (\n+ ("ex1.pileup", "samtools pileup -cf ex1.fa ex1.bam > ex1.pileup" ),\n+ ("pysam_ex1.pileup", (pysam.pileup, "-c -f ex1.fa ex1.bam" ) )\n+ ),\n+ "pileup2" :\n+ (\n+ ("ex1.glf", "samtools pileup -gf ex1.fa ex1.bam > ex1.glf" ),\n+ ("pysam_ex1.glf", (pysam.pileup, "-g -f ex1.fa ex1.bam" ) )\n+ ),\n+ "glfview" :\n+ (\n+ ("ex1.glfview", "samtools glfview ex1.glf > ex1.glfview"),\n+ ("pysam_ex1.glfview", (pysam.glfview, "ex1.glf" ) ),\n+ ),\n+ "view" :\n+ (\n+ ("ex1.view", "samtools view ex1.bam > ex1.view"),\n+ ("pysam_ex1.view", (pysam.view, "ex1.bam" ) ),\n+ ),\n+ "view2" :\n+ (\n+ ("ex1.view", "samtools view -bT ex1.fa -o ex1.view2 ex1.sam"),\n+ # note that -o ex1.view2 throws exception.\n+ ("pysam_ex1.view", (pysam.view, "-bT ex1.fa -oex1.view2 ex1.sam" ) ),\n+ ),\n+ }\n+\n+ # some tests depend on others. The order specifies in which order\n+ # the samtools commands are executed.\n+ mOrder = (\'faidx\', \'import\', \'index\', \'pileup1\', \'pileup2\', \'glfview\', \'view\', \'view2\' )\n+\n+ def setUp( self ):\n+ \'\'\'setup tests. \n+\n+ For setup, all commands will be run before the first test is\n+ executed. Individual tests will then just compare the output\n+ files.\n+ \'\'\'\n+ if BinaryTest.first_time:\n+ # copy the source \n+ shutil.copy( "ex1.fa", "pysam_ex1.fa" )\n+\n+ '..b', self.reads):\n+ self.checkFieldEqual( other, denovo )\n+ self.assertEqual( other.compare( denovo ), 0 )\n+\n+ def testSAMPerRead( self ):\n+ \'\'\'check if individual reads are binary equal.\'\'\'\n+ infile = pysam.Samfile( self.samfile, "r")\n+\n+ others = list(infile)\n+ for denovo, other in zip( others, self.reads):\n+ self.checkFieldEqual( other, denovo )\n+ self.assertEqual( other.compare( denovo), 0 )\n+ \n+ def testBAMWholeFile( self ):\n+ \n+ tmpfilename = "tmp_%i.bam" % id(self)\n+\n+ outfile = pysam.Samfile( tmpfilename, "wb", header = self.header )\n+\n+ for x in self.reads: outfile.write( x )\n+ outfile.close()\n+ \n+ self.assertTrue( checkBinaryEqual( tmpfilename, self.bamfile ),\n+ "mismatch when construction BAM file, see %s %s" % (tmpfilename, self.bamfile))\n+ \n+ os.unlink( tmpfilename )\n+\n+\n+class TestDoubleFetch(unittest.TestCase):\n+ \'\'\'check if two iterators on the same bamfile are independent.\'\'\'\n+ \n+ def testDoubleFetch( self ):\n+\n+ samfile1 = pysam.Samfile(\'ex1.bam\', \'rb\')\n+\n+ for a,b in zip(samfile1.fetch(), samfile1.fetch()):\n+ self.assertEqual( a.compare( b ), 0 )\n+\n+ def testDoubleFetchWithRegion( self ):\n+\n+ samfile1 = pysam.Samfile(\'ex1.bam\', \'rb\')\n+ chr, start, stop = \'chr1\', 200, 3000000\n+ self.assertTrue(len(list(samfile1.fetch ( chr, start, stop))) > 0) #just making sure the test has something to catch\n+\n+ for a,b in zip(samfile1.fetch( chr, start, stop), samfile1.fetch( chr, start, stop)):\n+ self.assertEqual( a.compare( b ), 0 ) \n+\n+ def testDoubleFetchUntilEOF( self ):\n+\n+ samfile1 = pysam.Samfile(\'ex1.bam\', \'rb\')\n+\n+ for a,b in zip(samfile1.fetch( until_eof = True), \n+ samfile1.fetch( until_eof = True )):\n+ self.assertEqual( a.compare( b), 0 )\n+\n+class TestRemoteFileFTP(unittest.TestCase):\n+ \'\'\'test remote access.\n+\n+ \'\'\'\n+\n+ # Need to find an ftp server without password on standard\n+ # port.\n+\n+ url = "ftp://ftp.sanger.ac.uk/pub/rd/humanSequences/CV.bam"\n+ region = "1:1-1000"\n+\n+ def testFTPView( self ):\n+ result = pysam.view( self.url, self.region )\n+ self.assertEqual( len(result), 36 )\n+ \n+ def testFTPFetch( self ):\n+ samfile = pysam.Samfile(self.url, "rb") \n+ result = list(samfile.fetch( region = self.region ))\n+ self.assertEqual( len(result), 36 )\n+\n+class TestRemoteFileHTTP( unittest.TestCase):\n+\n+ url = "http://genserv.anat.ox.ac.uk/downloads/pysam/test/ex1.bam"\n+ region = "chr1:1-1000"\n+ local = "ex1.bam"\n+\n+ def testView( self ):\n+ self.assertRaises( pysam.SamtoolsError, pysam.view, self.url, self.region )\n+ \n+ def testFetch( self ):\n+ samfile = pysam.Samfile(self.url, "rb") \n+ result = list(samfile.fetch( region = self.region ))\n+ samfile_local = pysam.Samfile(self.local, "rb") \n+ ref = list(samfile_local.fetch( region = self.region ))\n+\n+ self.assertEqual( len(ref), len(result) )\n+ for x, y in zip(result, ref):\n+ self.assertEqual( x.compare( y ), 0 )\n+\n+ def testFetchAll( self ):\n+ samfile = pysam.Samfile(self.url, "rb") \n+ result = list(samfile.fetch())\n+ samfile_local = pysam.Samfile(self.local, "rb") \n+ ref = list(samfile_local.fetch() )\n+\n+ self.assertEqual( len(ref), len(result) )\n+ for x, y in zip(result, ref):\n+ self.assertEqual( x.compare( y ), 0 )\n+\n+\n+# TODOS\n+# 1. finish testing all properties within pileup objects\n+# 2. check exceptions and bad input problems (missing files, optional fields that aren\'t present, etc...)\n+\n+if __name__ == "__main__":\n+ # build data files\n+ print "building data files"\n+ subprocess.call( "make", shell=True)\n+ print "starting tests"\n+ unittest.main()\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/segfault_tests.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/segfault_tests.py Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,37 @@ +#!/usr/bin/env python +'''unit testing code for pysam.''' + +import pysam +import unittest +import os +import itertools +import subprocess +import shutil + +class TestExceptions(unittest.TestCase): + + def setUp(self): + self.samfile=pysam.Samfile( "ex1.bam","rb" ) + + def testOutOfRangeNegativeNewFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, -10 ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, 0 ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1", -5, -10 ) + + def testOutOfRangeNegativeOldFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5-10" ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5-0" ) + self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5--10" ) + + def testOutOfRangeLargeNewFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1", 99999999999999999, 999999999999999999 ) + + def testOutOfRangeLargeOldFormat(self): + self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" ) + + def tearDown(self): + self.samfile.close() + +if __name__ == "__main__": + unittest.main() + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/tests/tabix_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/tests/tabix_test.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,225 @@ +#!/usr/bin/env python +'''unit testing code for pysam. + +Execute in the :file:`tests` directory as it requires the Makefile +and data files located there. +''' + +import sys, os, shutil, gzip +import pysam +import unittest +import itertools +import subprocess + +def checkBinaryEqual( filename1, filename2 ): + '''return true if the two files are binary equal.''' + if os.path.getsize( filename1 ) != os.path.getsize( filename2 ): + return False + + infile1 = open(filename1, "rb") + infile2 = open(filename2, "rb") + + def chariter( infile ): + while 1: + c = infile.read(1) + if c == "": break + yield c + + found = False + for c1,c2 in itertools.izip( chariter( infile1), chariter( infile2) ): + if c1 != c2: break + else: + found = True + + infile1.close() + infile2.close() + return found + +class TestIndexing(unittest.TestCase): + filename = "example.gtf.gz" + filename_idx = "example.gtf.gz.tbi" + + def setUp( self ): + + self.tmpfilename = "tmp_%i.gtf.gz" % id(self) + shutil.copyfile( self.filename, self.tmpfilename ) + + def testIndexPreset( self ): + '''test indexing via preset.''' + + pysam.tabix_index( self.tmpfilename, preset = "gff" ) + checkBinaryEqual( self.tmpfilename + ".tbi", self.filename_idx ) + + def tearDown( self ): + os.unlink( self.tmpfilename ) + os.unlink( self.tmpfilename + ".tbi" ) + +class TestCompression(unittest.TestCase): + filename = "example.gtf.gz" + filename_idx = "example.gtf.gz.tbi" + + def setUp( self ): + + self.tmpfilename = "tmp_%i.gtf" % id(self) + infile = gzip.open( self.filename, "r") + outfile = open( self.tmpfilename, "w" ) + outfile.write( "".join(infile.readlines()) ) + outfile.close() + infile.close() + + def testIndexPreset( self ): + '''test indexing via preset.''' + + pysam.tabix_index( self.tmpfilename, preset = "gff" ) + checkBinaryEqual( self.tmpfilename + ".gz", self.filename ) + checkBinaryEqual( self.tmpfilename + ".gz.tbi", self.filename_idx ) + + def tearDown( self ): + os.unlink( self.tmpfilename + ".gz" ) + os.unlink( self.tmpfilename + ".gz.tbi" ) + +class TestIteration( unittest.TestCase ): + + filename = "example.gtf.gz" + + def setUp( self ): + + self.tabix = pysam.Tabixfile( self.filename ) + lines = gzip.open(self.filename).readlines() + # creates index of contig, start, end, adds content without newline. + self.compare = [ + (x[0][0], int(x[0][3]), int(x[0][4]), x[1]) + for x in [ (y.split("\t"), y[:-1]) for y in lines ] ] + + def getSubset( self, contig = None, start = None, end = None): + + if contig == None: + # all lines + subset = [ x[3] for x in self.compare ] + else: + if start != None and end == None: + # until end of contig + subset = [ x[3] for x in self.compare if x[0] == contig and x[2] > start ] + elif start == None and end != None: + # from start of contig + subset = [ x[3] for x in self.compare if x[0] == contig and x[1] <= end ] + elif start == None and end == None: + subset = [ x[3] for x in self.compare if x[0] == contig ] + else: + # all within interval + subset = [ x[3] for x in self.compare if x[0] == contig and \ + min( x[2], end) - max(x[1], start) > 0 ] + + return subset + + def checkPairwise( self, result, ref ): + + result.sort() + ref.sort() + + a = set(result) + b = set(ref) + + self.assertEqual( len(result), len(ref), + "unexpected number of results: %i, expected %i, differences are %s: %s" \ + % (len(result), len(ref), + a.difference(b), + b.difference(a) )) + + for x, d in enumerate( zip( result, ref )): + + self.assertEqual( d[0], d[1], + "unexpected results in pair %i: '%s', expected '%s'" % \ + (x, + d[0], + d[1]) ) + + + def testAll( self ): + result = list(self.tabix.fetch()) + ref = self.getSubset( ) + self.checkPairwise( result, ref ) + + def testPerContig( self ): + for contig in ("chr1", "chr2", "chr1", "chr2" ): + result = list(self.tabix.fetch( contig )) + ref = self.getSubset( contig ) + self.checkPairwise( result, ref ) + + def testPerContigToEnd( self ): + + end = None + for contig in ("chr1", "chr2", "chr1", "chr2" ): + for start in range( 0, 200000, 1000): + result = list(self.tabix.fetch( contig, start, end )) + ref = self.getSubset( contig, start, end ) + self.checkPairwise( result, ref ) + + def testPerContigFromStart( self ): + + start = None + for contig in ("chr1", "chr2", "chr1", "chr2" ): + for end in range( 0, 200000, 1000): + result = list(self.tabix.fetch( contig, start, end )) + ref = self.getSubset( contig, start, end ) + self.checkPairwise( result, ref ) + + def testPerContig( self ): + + start, end = None, None + for contig in ("chr1", "chr2", "chr1", "chr2" ): + result = list(self.tabix.fetch( contig, start, end )) + ref = self.getSubset( contig, start, end ) + self.checkPairwise( result, ref ) + + def testPerInterval( self ): + + start, end = None, None + for contig in ("chr1", "chr2", "chr1", "chr2" ): + for start in range( 0, 200000, 2000): + for end in range( start, start + 2000, 500): + result = list(self.tabix.fetch( contig, start, end )) + ref = self.getSubset( contig, start, end ) + self.checkPairwise( result, ref ) + + + def testInvalidIntervals( self ): + + self.assertRaises( ValueError, self.tabix.fetch, "chr1", 0, -10) + self.assertRaises( ValueError, self.tabix.fetch, "chr1", -10, 200) + self.assertRaises( ValueError, self.tabix.fetch, "chr1", 200, 0) + self.assertRaises( ValueError, self.tabix.fetch, "chr1", -10, -20) + self.assertRaises( ValueError, self.tabix.fetch, "chrUn" ) + + def testGetContigs( self ): + self.assertEqual( sorted(self.tabix.contigs), ["chr1", "chr2"] ) + # check that contigs is read-only + self.assertRaises( AttributeError, setattr, self.tabix, "contigs", ["chr1", "chr2"] ) + +class TestParser( unittest.TestCase ): + + filename = "example.gtf.gz" + + def setUp( self ): + + self.tabix = pysam.Tabixfile( self.filename ) + self.compare = [ x[:-1].split("\t") for x in gzip.open( self.filename, "r") ] + + def testGTF( self ): + + for x, r in enumerate(self.tabix.fetch( parser = pysam.asGTF() )): + self.assertEqual( "\t".join( self.compare[x]), str(r) ) + + def testTuple( self ): + + for x, r in enumerate(self.tabix.fetch( parser = pysam.asTuple() )): + self.assertEqual( self.compare[x], list(r) ) + + self.assertEqual( len(self.compare[x]), len(r) ) + for c in range(0,len(r)): + self.assertEqual( self.compare[x][c], r[c] ) + +if __name__ == "__main__": + unittest.main() + + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/pysam/version.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/pysam/version.py Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +# pysam versioning information +__version__ = "0.3.1" +__samtools_version__ = "0.1.8" +__tabix_version__ = "0.2.1" |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/test/test_homology.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/test/test_homology.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,30 @@ +''' +Created on Jul 21, 2011 + +@author: mkiyer +''' +import unittest + +from chimerascan.lib.seq import calc_homology + +class TestLibraries(unittest.TestCase): + + def testHomology(self): + a = "AAAAGGGGTTTTCCCC" + b = "AAAAGGGGTTTTCCCC" + self.assertEquals(calc_homology(a, b, 0), 16) + b = "AAAAGGGGTTTTCCCG" + self.assertEquals(calc_homology(a, b, 0), 15) + b = "AAATTTGGTTTTCCCC" + self.assertEquals(calc_homology(a, b, 0), 3) + self.assertEquals(calc_homology(a, b, 1), 4) + self.assertEquals(calc_homology(a, b, 2), 5) + self.assertEquals(calc_homology(a, b, 3), 16) + + + + + +if __name__ == "__main__": + #import sys;sys.argv = ['', 'Test.testName'] + unittest.main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/tools/chimerascan_html_table.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/tools/chimerascan_html_table.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,150 @@ +#!/usr/bin/env python +''' +Created on Feb 12, 2011 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2011 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import logging +import sys +from jinja2 import Environment, PackageLoader + +# local imports +from chimerascan.lib.chimera import Chimera, ChimeraTypes + +# setup html template environment +env = Environment(loader=PackageLoader("chimerascan", "tools")) + +# URLs for special links +GENECARDS_URL = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=" +UCSC_POS_URL = "http://genome.ucsc.edu/cgi-bin/hgTracks?" + +def get_header_row(): + return ["5' genomic region", + "5' strand", + "3' genomic region", + "3' strand", + "Cluster ID", + "5' transcripts", "3' transcripts", + "5' genes", "3' genes", + "Type", "5' -> 3' distance", + "Total frags", + "Spanning frags", + "Unique alignment positions", + "Isoform fraction 5'", + "Isoform fraction 3'", + "Breakpoint spanning reads", + "Chimera IDs"] + +def generate_row_data(line_iter, show_read_throughs, + header_fields): + type_col_num = header_fields.index("type") + txs5p_col_num = header_fields.index("transcript_ids_5p") + txs3p_col_num = header_fields.index("transcript_ids_3p") + genes5p_col_num = header_fields.index("genes5p") + genes3p_col_num = header_fields.index("genes3p") + spanning_reads_col_num = header_fields.index("breakpoint_spanning_reads") + chimera_ids_col_num = header_fields.index("chimera_ids") + for line in line_iter: + fields = line.strip().split('\t') + if ((not show_read_throughs) and + (fields[type_col_num] == ChimeraTypes.READTHROUGH)): + continue + newfields = [] + # 5' position (chr12:65432) and strand + newfields.append(("ucsc_pos", ["%s:%s-%s" % (fields[0], fields[1], fields[2])])) + newfields.append(("string", fields[8])) + # 3' position (chr12:76543) and strand + newfields.append(("ucsc_pos", ["%s:%s-%s" % (fields[3], fields[4], fields[5])])) + newfields.append(("string", fields[9])) + # cluster id + newfields.append(("string", fields[6])) + # transcripts + newfields.append(("ucsc_pos", fields[txs5p_col_num].split(","))) + newfields.append(("ucsc_pos", fields[txs3p_col_num].split(","))) + # genes + newfields.append(("genecards", fields[genes5p_col_num].split(","))) + newfields.append(("genecards", fields[genes3p_col_num].split(","))) + # chimera type + newfields.append(("string", fields[14])) + # distance + newfields.append(("string", fields[15])) + # total frags + newfields.append(("string", fields[16])) + # spanning frags + newfields.append(("string", fields[17])) + # unique alignment positions + newfields.append(("string", fields[18])) + # isoform fraction 5p + newfields.append(("string", fields[19])) + # isoform fraction 3p + newfields.append(("string", fields[20])) + # breakpoint spanning reads + newfields.append(("list", fields[21].split(","))) + # chimera ids + newfields.append(("list", fields[22].split(","))) + yield newfields + +def make_html_table(input_file, + ucsc_db, + show_read_throughs=False): + ucsc_pos_url = UCSC_POS_URL + "db=%s&position=" % (ucsc_db) + line_iter = open(input_file) + header_line = line_iter.next()[1:] + header_fields = header_line.strip().split('\t') + row_iter = generate_row_data(line_iter, + show_read_throughs=show_read_throughs, + header_fields=header_fields) + t = env.get_template("table_template.html") + htmlstring = t.render(colnames=get_header_row(), + ucsc_pos_url=ucsc_pos_url, + genecards_url=GENECARDS_URL, + rows=row_iter) + return htmlstring + +def main(): + from optparse import OptionParser + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog [options] <chimeras.txt>") + parser.add_option("-o", dest="output_file", default=None, + help="output file [default=stdout]") + parser.add_option("--ucsc-db", dest="ucsc_db", default="hg19", + help="UCSC Genome Version (specific to organism and " + "revision e.g. 'hg19'") + parser.add_option("--read-throughs", dest="show_read_throughs", + action="store_true", default=False, + help="include read-through chimeras in output " + "[default=%default]") + options, args = parser.parse_args() + input_file = args[0] + if options.output_file is None: + fileh = sys.stdout + else: + fileh = open(options.output_file, "w") + res = make_html_table(input_file, + ucsc_db=options.ucsc_db, + show_read_throughs=options.show_read_throughs) + print >>fileh, res + if options.output_file is not None: + fileh.close() + + +if __name__ == '__main__': + main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/tools/gtf_to_genepred.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/tools/gtf_to_genepred.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,84 @@ +#!/usr/bin/env python +''' +Created on Feb 6, 2012 + +@author: mkiyer + +chimerascan: chimeric transcript discovery using RNA-seq + +Copyright (C) 2012 Matthew Iyer + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <http://www.gnu.org/licenses/>. +''' +import logging +import collections +import operator +import os +import sys +from optparse import OptionParser + +from chimerascan.lib import gtf + +def gtf_to_genepred(gtf_file, genepred_file): + # group by transcript id + logging.info("Reading GTF file") + chrom_exon_features = collections.defaultdict(lambda: collections.defaultdict(lambda: [])) + for feature in gtf.GTFFeature.parse(open(gtf_file)): + if feature.feature_type == "exon": + transcript_id = feature.attrs["transcript_id"] + chrom_exon_features[feature.seqid][transcript_id].append(feature) + # convert to genepred + logging.info("Writing GenePred file") + outfh = open(genepred_file, "w") + for chrom in sorted(chrom_exon_features): + logging.debug("Chromosome %s" % (chrom)) + exon_features = chrom_exon_features[chrom].values() + exon_features.sort(key=lambda exon_list: min(x.start for x in exon_list)) + for exons in exon_features: + # sort exons + exons.sort(key=operator.attrgetter('start')) + chrom = exons[0].seqid + tx_start = exons[0].start + tx_end = exons[-1].end + strand = exons[0].strand + transcript_id = exons[0].attrs['transcript_id'] + gene_name = exons[0].attrs['gene_name'] + # write genepred fields + fields = [transcript_id, chrom, strand, str(tx_start), + str(tx_end), str(tx_start), str(tx_start), + str(len(exons)), + ",".join(map(str,[x.start for x in exons])) + ",", + ",".join(map(str,[x.end for x in exons])) + ",", + gene_name] + print >>outfh, "\t".join(fields) + outfh.close() + +def main(): + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog <input.gtf> <genepred_output.txt>") + options, args = parser.parse_args() + # check command line arguments + if len(args) < 2: + parser.error("Incorrect number of command line arguments") + gtf_file = args[0] + genepred_file = args[1] + # check that input files exist + if not os.path.isfile(gtf_file): + parser.error("GTF file '%s' not found" % (gtf_file)) + gtf_to_genepred(gtf_file, genepred_file) + return 0 + +if __name__ == '__main__': + sys.exit(main()) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/tools/make_false_positive_file.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/tools/make_false_positive_file.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,47 @@ +#!/usr/bin/env python +''' +Created on Jul 6, 2011 + +@author: mkiyer +''' +import logging +import sys +import collections + +from chimerascan.lib.chimera import Chimera + +def main(): + from optparse import OptionParser + logging.basicConfig(level=logging.DEBUG, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") + parser = OptionParser("usage: %prog [options] <chimeras.txt> [<chimeras2.txt> <chimeras3.txt> ...]") + parser.add_option("-o", dest="output_file", default=None, + help="output file [default=stdout]") + parser.add_option("-n", dest="num_files", type="int", default=1, + help="chimera must be recurrent in N samples " + "to make considered a false positive " + "[default=%default]") + options, args = parser.parse_args() + input_files = args + false_pos_chimeras = collections.defaultdict(lambda: 0) + for input_file in input_files: + logging.info("Processing file %s" % (input_file)) + num_chimeras = 0 + for c in Chimera.parse(open(input_file)): + key = (c.partner5p.tx_name, c.partner5p.end, c.partner3p.tx_name, c.partner3p.start) + false_pos_chimeras[key] += 1 + num_chimeras += 1 + logging.info("\tchimeras in file: %d" % (num_chimeras)) + logging.info("\tcurrent false positive candidates: %d" % (len(false_pos_chimeras))) + if options.output_file is None: + fileh = sys.stdout + else: + fileh = open(options.output_file, "w") + for key,recurrence in false_pos_chimeras.iteritems(): + if recurrence >= options.num_files: + print >>fileh, '\t'.join(map(str,key)) + if options.output_file is not None: + fileh.close() + +if __name__ == '__main__': + main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/tools/sortable.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/tools/sortable.js Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,323 @@\n+/*\n+Table sorting script by Joost de Valk, check it out at http://www.joostdevalk.nl/code/sortable-table/.\n+Based on a script from http://www.kryogenix.org/code/browser/sorttable/.\n+Distributed under the MIT license: http://www.kryogenix.org/code/browser/licence.html .\n+\n+Copyright (c) 1997-2007 Stuart Langridge, Joost de Valk.\n+\n+Version 1.5.7\n+*/\n+\n+/* You can change these values */\n+var image_path = "http://www.joostdevalk.nl/code/sortable-table/";\n+var image_up = "arrow-up.gif";\n+var image_down = "arrow-down.gif";\n+var image_none = "arrow-none.gif";\n+var europeandate = true;\n+var alternate_row_colors = true;\n+\n+/* Don\'t change anything below this unless you know what you\'re doing */\n+addEvent(window, "load", sortables_init);\n+\n+var SORT_COLUMN_INDEX;\n+var thead = false;\n+\n+function sortables_init() {\n+\t// Find all tables with class sortable and make them sortable\n+\tif (!document.getElementsByTagName) return;\n+\ttbls = document.getElementsByTagName("table");\n+\tfor (ti=0;ti<tbls.length;ti++) {\n+\t\tthisTbl = tbls[ti];\n+\t\tif (((\' \'+thisTbl.className+\' \').indexOf("sortable") != -1) && (thisTbl.id)) {\n+\t\t\tts_makeSortable(thisTbl);\n+\t\t}\n+\t}\n+}\n+\n+function ts_makeSortable(t) {\n+\tif (t.rows && t.rows.length > 0) {\n+\t\tif (t.tHead && t.tHead.rows.length > 0) {\n+\t\t\tvar firstRow = t.tHead.rows[t.tHead.rows.length-1];\n+\t\t\tthead = true;\n+\t\t} else {\n+\t\t\tvar firstRow = t.rows[0];\n+\t\t}\n+\t}\n+\tif (!firstRow) return;\n+\t\n+\t// We have a first row: assume it\'s the header, and make its contents clickable links\n+\tfor (var i=0;i<firstRow.cells.length;i++) {\n+\t\tvar cell = firstRow.cells[i];\n+\t\tvar txt = ts_getInnerText(cell);\n+\t\tif (cell.className != "unsortable" && cell.className.indexOf("unsortable") == -1) {\n+\t\t\tcell.innerHTML = \'<a href="#" class="sortheader" onclick="ts_resortTable(this, \'+i+\');return false;">\'+txt+\'<span class="sortarrow"> <img src="\'+ image_path + image_none + \'" alt="↓"/></span></a>\';\n+\t\t}\n+\t}\n+\tif (alternate_row_colors) {\n+\t\talternate(t);\n+\t}\n+}\n+\n+function ts_getInnerText(el) {\n+\tif (typeof el == "string") return el;\n+\tif (typeof el == "undefined") { return el };\n+\tif (el.innerText) return el.innerText;\t//Not needed but it is faster\n+\tvar str = "";\n+\t\n+\tvar cs = el.childNodes;\n+\tvar l = cs.length;\n+\tfor (var i = 0; i < l; i++) {\n+\t\tswitch (cs[i].nodeType) {\n+\t\t\tcase 1: //ELEMENT_NODE\n+\t\t\t\tstr += ts_getInnerText(cs[i]);\n+\t\t\t\tbreak;\n+\t\t\tcase 3:\t//TEXT_NODE\n+\t\t\t\tstr += cs[i].nodeValue;\n+\t\t\t\tbreak;\n+\t\t}\n+\t}\n+\treturn str;\n+}\n+\n+function ts_resortTable(lnk, clid) {\n+\tvar span;\n+\tfor (var ci=0;ci<lnk.childNodes.length;ci++) {\n+\t\tif (lnk.childNodes[ci].tagName && lnk.childNodes[ci].tagName.toLowerCase() == \'span\') span = lnk.childNodes[ci];\n+\t}\n+\tvar spantext = ts_getInnerText(span);\n+\tvar td = lnk.parentNode;\n+\tvar column = clid || td.cellIndex;\n+\tvar t = getParent(td,\'TABLE\');\n+\t// Work out a type for the column\n+\tif (t.rows.length <= 1) return;\n+\tvar itm = "";\n+\tvar i = 0;\n+\twhile (itm == "" && i < t.tBodies[0].rows.length) {\n+\t\tvar itm = ts_getInnerText(t.tBodies[0].rows[i].cells[column]);\n+\t\titm = trim(itm);\n+\t\tif (itm.substr(0,4) == "<!--" || itm.length == 0) {\n+\t\t\titm = "";\n+\t\t}\n+\t\ti++;\n+\t}\n+\tif (itm == "") return; \n+\tsortfn = ts_sort_caseinsensitive;\n+\tif (itm.match(/^\\d\\d[\\/\\.-][a-zA-z][a-zA-Z][a-zA-Z][\\/\\.-]\\d\\d\\d\\d$/)) sortfn = ts_sort_date;\n+\tif (itm.match(/^\\d\\d[\\/\\.-]\\d\\d[\\/\\.-]\\d\\d\\d{2}?$/)) sortfn = ts_sort_date;\n+\tif (itm.match(/^-?[\xa3$\x80\xdb\xa2\xb4]\\d/)) sortfn = ts_sort_numeric;\n+\tif (itm.match(/^-?(\\d+[,\\.]?)+(E[-+][\\d]+)?%?$/)) sortfn = ts_sort_numeric;\n+\tSORT_COLUMN_INDEX = column;\n+\tvar firstRow = new Array();\n+\tvar newRows = new Array();\n+\tfor (k=0;k<t.tBodies.length;k++) {\n+\t\tfor (i=0;i<t.tBodies[k].rows[0].length;i++) { \n+\t\t\tfirstRow[i] = t.tBodies[k].rows[0][i]; \n+\t\t}\n+\t}\n+\tfor (k=0;k<t.tBodies.length;k++) {\n+\t\tif (!thead) {\n+\t\t\t// Skip the first row\n+\t\t\tfor (j=1;j<t.tBodies[k].rows.length;j++) { \n+\t\t\t\tnewRows[j-1] = t.tBodies[k].rows[j];\n+\t\t\t}\n+\t\t} else {\n+\t\t\t// Do NOT skip the f'..b'{\n+\t\tmtstr = date.substr(3,3);\n+\t\tmtstr = mtstr.toLowerCase();\n+\t\tswitch(mtstr) {\n+\t\t\tcase "jan": var mt = "01"; break;\n+\t\t\tcase "feb": var mt = "02"; break;\n+\t\t\tcase "mar": var mt = "03"; break;\n+\t\t\tcase "apr": var mt = "04"; break;\n+\t\t\tcase "may": var mt = "05"; break;\n+\t\t\tcase "jun": var mt = "06"; break;\n+\t\t\tcase "jul": var mt = "07"; break;\n+\t\t\tcase "aug": var mt = "08"; break;\n+\t\t\tcase "sep": var mt = "09"; break;\n+\t\t\tcase "oct": var mt = "10"; break;\n+\t\t\tcase "nov": var mt = "11"; break;\n+\t\t\tcase "dec": var mt = "12"; break;\n+\t\t\t// default: var mt = "00";\n+\t\t}\n+\t\tdt = date.substr(7,4)+mt+date.substr(0,2);\n+\t\treturn dt;\n+\t} else if (date.length == 10) {\n+\t\tif (europeandate == false) {\n+\t\t\tdt = date.substr(6,4)+date.substr(0,2)+date.substr(3,2);\n+\t\t\treturn dt;\n+\t\t} else {\n+\t\t\tdt = date.substr(6,4)+date.substr(3,2)+date.substr(0,2);\n+\t\t\treturn dt;\n+\t\t}\n+\t} else if (date.length == 8) {\n+\t\tyr = date.substr(6,2);\n+\t\tif (parseInt(yr) < 50) { \n+\t\t\tyr = \'20\'+yr; \n+\t\t} else { \n+\t\t\tyr = \'19\'+yr; \n+\t\t}\n+\t\tif (europeandate == true) {\n+\t\t\tdt = yr+date.substr(3,2)+date.substr(0,2);\n+\t\t\treturn dt;\n+\t\t} else {\n+\t\t\tdt = yr+date.substr(0,2)+date.substr(3,2);\n+\t\t\treturn dt;\n+\t\t}\n+\t}\n+\treturn dt;\n+}\n+\n+function ts_sort_date(a,b) {\n+\tdt1 = sort_date(ts_getInnerText(a.cells[SORT_COLUMN_INDEX]));\n+\tdt2 = sort_date(ts_getInnerText(b.cells[SORT_COLUMN_INDEX]));\n+\t\n+\tif (dt1==dt2) {\n+\t\treturn 0;\n+\t}\n+\tif (dt1<dt2) { \n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+function ts_sort_numeric(a,b) {\n+\tvar aa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n+\taa = clean_num(aa);\n+\tvar bb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n+\tbb = clean_num(bb);\n+\treturn compare_numeric(aa,bb);\n+}\n+function compare_numeric(a,b) {\n+\tvar a = parseFloat(a);\n+\ta = (isNaN(a) ? 0 : a);\n+\tvar b = parseFloat(b);\n+\tb = (isNaN(b) ? 0 : b);\n+\treturn a - b;\n+}\n+function ts_sort_caseinsensitive(a,b) {\n+\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]).toLowerCase();\n+\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]).toLowerCase();\n+\tif (aa==bb) {\n+\t\treturn 0;\n+\t}\n+\tif (aa<bb) {\n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+function ts_sort_default(a,b) {\n+\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n+\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n+\tif (aa==bb) {\n+\t\treturn 0;\n+\t}\n+\tif (aa<bb) {\n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+function addEvent(elm, evType, fn, useCapture)\n+// addEvent and removeEvent\n+// cross-browser event handling for IE5+,\tNS6 and Mozilla\n+// By Scott Andrew\n+{\n+\tif (elm.addEventListener){\n+\t\telm.addEventListener(evType, fn, useCapture);\n+\t\treturn true;\n+\t} else if (elm.attachEvent){\n+\t\tvar r = elm.attachEvent("on"+evType, fn);\n+\t\treturn r;\n+\t} else {\n+\t\talert("Handler could not be removed");\n+\t}\n+}\n+function clean_num(str) {\n+\tstr = str.replace(new RegExp(/[^-?0-9.]/g),"");\n+\treturn str;\n+}\n+function trim(s) {\n+\treturn s.replace(/^\\s+|\\s+$/g, "");\n+}\n+function alternate(table) {\n+\t// Take object table and get all it\'s tbodies.\n+\tvar tableBodies = table.getElementsByTagName("tbody");\n+\t// Loop through these tbodies\n+\tfor (var i = 0; i < tableBodies.length; i++) {\n+\t\t// Take the tbody, and get all it\'s rows\n+\t\tvar tableRows = tableBodies[i].getElementsByTagName("tr");\n+\t\t// Loop through these rows\n+\t\t// Start at 1 because we want to leave the heading row untouched\n+\t\tfor (var j = 0; j < tableRows.length; j++) {\n+\t\t\t// Check if j is even, and apply classes for both possible results\n+\t\t\tif ( (j % 2) == 0 ) {\n+\t\t\t\tif ( !(tableRows[j].className.indexOf(\'odd\') == -1) ) {\n+\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'odd\', \'even\');\n+\t\t\t\t} else {\n+\t\t\t\t\tif ( tableRows[j].className.indexOf(\'even\') == -1 ) {\n+\t\t\t\t\t\ttableRows[j].className += " even";\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t} else {\n+\t\t\t\tif ( !(tableRows[j].className.indexOf(\'even\') == -1) ) {\n+\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'even\', \'odd\');\n+\t\t\t\t} else {\n+\t\t\t\t\tif ( tableRows[j].className.indexOf(\'odd\') == -1 ) {\n+\t\t\t\t\t\ttableRows[j].className += " odd";\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t} \n+\t\t}\n+\t}\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/tools/sortable_us.js --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/tools/sortable_us.js Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,323 @@\n+/*\n+Table sorting script by Joost de Valk, check it out at http://www.joostdevalk.nl/code/sortable-table/.\n+Based on a script from http://www.kryogenix.org/code/browser/sorttable/.\n+Distributed under the MIT license: http://www.kryogenix.org/code/browser/licence.html .\n+\n+Copyright (c) 1997-2007 Stuart Langridge, Joost de Valk.\n+\n+Version 1.5.7\n+*/\n+\n+/* You can change these values */\n+var image_path = "http://www.joostdevalk.nl/code/sortable-table/";\n+var image_up = "arrow-up.gif";\n+var image_down = "arrow-down.gif";\n+var image_none = "arrow-none.gif";\n+var europeandate = false;\n+var alternate_row_colors = true;\n+\n+/* Don\'t change anything below this unless you know what you\'re doing */\n+addEvent(window, "load", sortables_init);\n+\n+var SORT_COLUMN_INDEX;\n+var thead = false;\n+\n+function sortables_init() {\n+\t// Find all tables with class sortable and make them sortable\n+\tif (!document.getElementsByTagName) return;\n+\ttbls = document.getElementsByTagName("table");\n+\tfor (ti=0;ti<tbls.length;ti++) {\n+\t\tthisTbl = tbls[ti];\n+\t\tif (((\' \'+thisTbl.className+\' \').indexOf("sortable") != -1) && (thisTbl.id)) {\n+\t\t\tts_makeSortable(thisTbl);\n+\t\t}\n+\t}\n+}\n+\n+function ts_makeSortable(t) {\n+\tif (t.rows && t.rows.length > 0) {\n+\t\tif (t.tHead && t.tHead.rows.length > 0) {\n+\t\t\tvar firstRow = t.tHead.rows[t.tHead.rows.length-1];\n+\t\t\tthead = true;\n+\t\t} else {\n+\t\t\tvar firstRow = t.rows[0];\n+\t\t}\n+\t}\n+\tif (!firstRow) return;\n+\t\n+\t// We have a first row: assume it\'s the header, and make its contents clickable links\n+\tfor (var i=0;i<firstRow.cells.length;i++) {\n+\t\tvar cell = firstRow.cells[i];\n+\t\tvar txt = ts_getInnerText(cell);\n+\t\tif (cell.className != "unsortable" && cell.className.indexOf("unsortable") == -1) {\n+\t\t\tcell.innerHTML = \'<a href="#" class="sortheader" onclick="ts_resortTable(this, \'+i+\');return false;">\'+txt+\'<span class="sortarrow"> <img src="\'+ image_path + image_none + \'" alt="↓"/></span></a>\';\n+\t\t}\n+\t}\n+\tif (alternate_row_colors) {\n+\t\talternate(t);\n+\t}\n+}\n+\n+function ts_getInnerText(el) {\n+\tif (typeof el == "string") return el;\n+\tif (typeof el == "undefined") { return el };\n+\tif (el.innerText) return el.innerText;\t//Not needed but it is faster\n+\tvar str = "";\n+\t\n+\tvar cs = el.childNodes;\n+\tvar l = cs.length;\n+\tfor (var i = 0; i < l; i++) {\n+\t\tswitch (cs[i].nodeType) {\n+\t\t\tcase 1: //ELEMENT_NODE\n+\t\t\t\tstr += ts_getInnerText(cs[i]);\n+\t\t\t\tbreak;\n+\t\t\tcase 3:\t//TEXT_NODE\n+\t\t\t\tstr += cs[i].nodeValue;\n+\t\t\t\tbreak;\n+\t\t}\n+\t}\n+\treturn str;\n+}\n+\n+function ts_resortTable(lnk, clid) {\n+\tvar span;\n+\tfor (var ci=0;ci<lnk.childNodes.length;ci++) {\n+\t\tif (lnk.childNodes[ci].tagName && lnk.childNodes[ci].tagName.toLowerCase() == \'span\') span = lnk.childNodes[ci];\n+\t}\n+\tvar spantext = ts_getInnerText(span);\n+\tvar td = lnk.parentNode;\n+\tvar column = clid || td.cellIndex;\n+\tvar t = getParent(td,\'TABLE\');\n+\t// Work out a type for the column\n+\tif (t.rows.length <= 1) return;\n+\tvar itm = "";\n+\tvar i = 0;\n+\twhile (itm == "" && i < t.tBodies[0].rows.length) {\n+\t\tvar itm = ts_getInnerText(t.tBodies[0].rows[i].cells[column]);\n+\t\titm = trim(itm);\n+\t\tif (itm.substr(0,4) == "<!--" || itm.length == 0) {\n+\t\t\titm = "";\n+\t\t}\n+\t\ti++;\n+\t}\n+\tif (itm == "") return; \n+\tsortfn = ts_sort_caseinsensitive;\n+\tif (itm.match(/^\\d\\d[\\/\\.-][a-zA-z][a-zA-Z][a-zA-Z][\\/\\.-]\\d\\d\\d\\d$/)) sortfn = ts_sort_date;\n+\tif (itm.match(/^\\d\\d[\\/\\.-]\\d\\d[\\/\\.-]\\d\\d\\d{2}?$/)) sortfn = ts_sort_date;\n+\tif (itm.match(/^-?[\xa3$\x80\xdb\xa2\xb4]\\d/)) sortfn = ts_sort_numeric;\n+\tif (itm.match(/^-?(\\d+[,\\.]?)+(E[-+][\\d]+)?%?$/)) sortfn = ts_sort_numeric;\n+\tSORT_COLUMN_INDEX = column;\n+\tvar firstRow = new Array();\n+\tvar newRows = new Array();\n+\tfor (k=0;k<t.tBodies.length;k++) {\n+\t\tfor (i=0;i<t.tBodies[k].rows[0].length;i++) { \n+\t\t\tfirstRow[i] = t.tBodies[k].rows[0][i]; \n+\t\t}\n+\t}\n+\tfor (k=0;k<t.tBodies.length;k++) {\n+\t\tif (!thead) {\n+\t\t\t// Skip the first row\n+\t\t\tfor (j=1;j<t.tBodies[k].rows.length;j++) { \n+\t\t\t\tnewRows[j-1] = t.tBodies[k].rows[j];\n+\t\t\t}\n+\t\t} else {\n+\t\t\t// Do NOT skip the '..b'{\n+\t\tmtstr = date.substr(3,3);\n+\t\tmtstr = mtstr.toLowerCase();\n+\t\tswitch(mtstr) {\n+\t\t\tcase "jan": var mt = "01"; break;\n+\t\t\tcase "feb": var mt = "02"; break;\n+\t\t\tcase "mar": var mt = "03"; break;\n+\t\t\tcase "apr": var mt = "04"; break;\n+\t\t\tcase "may": var mt = "05"; break;\n+\t\t\tcase "jun": var mt = "06"; break;\n+\t\t\tcase "jul": var mt = "07"; break;\n+\t\t\tcase "aug": var mt = "08"; break;\n+\t\t\tcase "sep": var mt = "09"; break;\n+\t\t\tcase "oct": var mt = "10"; break;\n+\t\t\tcase "nov": var mt = "11"; break;\n+\t\t\tcase "dec": var mt = "12"; break;\n+\t\t\t// default: var mt = "00";\n+\t\t}\n+\t\tdt = date.substr(7,4)+mt+date.substr(0,2);\n+\t\treturn dt;\n+\t} else if (date.length == 10) {\n+\t\tif (europeandate == false) {\n+\t\t\tdt = date.substr(6,4)+date.substr(0,2)+date.substr(3,2);\n+\t\t\treturn dt;\n+\t\t} else {\n+\t\t\tdt = date.substr(6,4)+date.substr(3,2)+date.substr(0,2);\n+\t\t\treturn dt;\n+\t\t}\n+\t} else if (date.length == 8) {\n+\t\tyr = date.substr(6,2);\n+\t\tif (parseInt(yr) < 50) { \n+\t\t\tyr = \'20\'+yr; \n+\t\t} else { \n+\t\t\tyr = \'19\'+yr; \n+\t\t}\n+\t\tif (europeandate == true) {\n+\t\t\tdt = yr+date.substr(3,2)+date.substr(0,2);\n+\t\t\treturn dt;\n+\t\t} else {\n+\t\t\tdt = yr+date.substr(0,2)+date.substr(3,2);\n+\t\t\treturn dt;\n+\t\t}\n+\t}\n+\treturn dt;\n+}\n+\n+function ts_sort_date(a,b) {\n+\tdt1 = sort_date(ts_getInnerText(a.cells[SORT_COLUMN_INDEX]));\n+\tdt2 = sort_date(ts_getInnerText(b.cells[SORT_COLUMN_INDEX]));\n+\t\n+\tif (dt1==dt2) {\n+\t\treturn 0;\n+\t}\n+\tif (dt1<dt2) { \n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+function ts_sort_numeric(a,b) {\n+\tvar aa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n+\taa = clean_num(aa);\n+\tvar bb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n+\tbb = clean_num(bb);\n+\treturn compare_numeric(aa,bb);\n+}\n+function compare_numeric(a,b) {\n+\tvar a = parseFloat(a);\n+\ta = (isNaN(a) ? 0 : a);\n+\tvar b = parseFloat(b);\n+\tb = (isNaN(b) ? 0 : b);\n+\treturn a - b;\n+}\n+function ts_sort_caseinsensitive(a,b) {\n+\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]).toLowerCase();\n+\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]).toLowerCase();\n+\tif (aa==bb) {\n+\t\treturn 0;\n+\t}\n+\tif (aa<bb) {\n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+function ts_sort_default(a,b) {\n+\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n+\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n+\tif (aa==bb) {\n+\t\treturn 0;\n+\t}\n+\tif (aa<bb) {\n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+function addEvent(elm, evType, fn, useCapture)\n+// addEvent and removeEvent\n+// cross-browser event handling for IE5+,\tNS6 and Mozilla\n+// By Scott Andrew\n+{\n+\tif (elm.addEventListener){\n+\t\telm.addEventListener(evType, fn, useCapture);\n+\t\treturn true;\n+\t} else if (elm.attachEvent){\n+\t\tvar r = elm.attachEvent("on"+evType, fn);\n+\t\treturn r;\n+\t} else {\n+\t\talert("Handler could not be removed");\n+\t}\n+}\n+function clean_num(str) {\n+\tstr = str.replace(new RegExp(/[^-?0-9.]/g),"");\n+\treturn str;\n+}\n+function trim(s) {\n+\treturn s.replace(/^\\s+|\\s+$/g, "");\n+}\n+function alternate(table) {\n+\t// Take object table and get all it\'s tbodies.\n+\tvar tableBodies = table.getElementsByTagName("tbody");\n+\t// Loop through these tbodies\n+\tfor (var i = 0; i < tableBodies.length; i++) {\n+\t\t// Take the tbody, and get all it\'s rows\n+\t\tvar tableRows = tableBodies[i].getElementsByTagName("tr");\n+\t\t// Loop through these rows\n+\t\t// Start at 1 because we want to leave the heading row untouched\n+\t\tfor (var j = 0; j < tableRows.length; j++) {\n+\t\t\t// Check if j is even, and apply classes for both possible results\n+\t\t\tif ( (j % 2) == 0 ) {\n+\t\t\t\tif ( !(tableRows[j].className.indexOf(\'odd\') == -1) ) {\n+\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'odd\', \'even\');\n+\t\t\t\t} else {\n+\t\t\t\t\tif ( tableRows[j].className.indexOf(\'even\') == -1 ) {\n+\t\t\t\t\t\ttableRows[j].className += " even";\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t} else {\n+\t\t\t\tif ( !(tableRows[j].className.indexOf(\'even\') == -1) ) {\n+\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'even\', \'odd\');\n+\t\t\t\t} else {\n+\t\t\t\t\tif ( tableRows[j].className.indexOf(\'odd\') == -1 ) {\n+\t\t\t\t\t\ttableRows[j].className += " odd";\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t} \n+\t\t}\n+\t}\n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/tools/table_style.css --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/tools/table_style.css Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,35 @@ +/* Copyright 2006 Joost de Valk */ +a img { + border: 0; +} +table.sortable { + border-spacing: 0; + border: 1px solid #000; + border-collapse: collapse; +} +table.sortable th, table.sortable td { + text-align: left; + padding: 2px 4px 2px 4px; + width: 100px; + border-style: solid; + border-color: #444; +} +table.sortable th { + border-width: 0px 1px 1px 1px; + background-color: #ccc; +} +table.sortable td { + border-width: 0px 1px 0px 1px; + font: 12px "Lucida Grande", Helvetica, "Arial Unicode MS", "Arial Unicode", Arial, sans-serif; +} +table.sortable tr.odd td { + background-color: #BFEFFF; +} +table.sortable tr.even td { + background-color: #ffffff; +} +table.sortable tr.sortbottom td { + border-top: 1px solid #444; + background-color: #ccc; + font-weight: bold; +} \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan/tools/table_template.html --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/chimerascan/tools/table_template.html Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,404 @@\n+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n+<html xmlns="http://www.w3.org/1999/xhtml">\n+\n+<head>\n+<head>\n+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />\n+ <title>chimerascan results</title>\n+ <!-- <link rel="stylesheet" type="text/css" href="table_style.css"/> -->\n+ <!-- <script type="text/javascript" src="sortable.js"></script> -->\n+ <script type="text/javascript">\n+/*\n+Table sorting script by Joost de Valk, check it out at http://www.joostdevalk.nl/code/sortable-table/.\n+Based on a script from http://www.kryogenix.org/code/browser/sorttable/.\n+Distributed under the MIT license: http://www.kryogenix.org/code/browser/licence.html .\n+\n+Copyright (c) 1997-2007 Stuart Langridge, Joost de Valk.\n+\n+Version 1.5.7\n+*/\n+\n+/* You can change these values */\n+var image_path = "http://www.joostdevalk.nl/code/sortable-table/";\n+var image_up = "arrow-up.gif";\n+var image_down = "arrow-down.gif";\n+var image_none = "arrow-none.gif";\n+var europeandate = false;\n+var alternate_row_colors = true;\n+\n+/* Don\'t change anything below this unless you know what you\'re doing */\n+addEvent(window, "load", sortables_init);\n+\n+var SORT_COLUMN_INDEX;\n+var thead = false;\n+\n+function sortables_init() {\n+\t// Find all tables with class sortable and make them sortable\n+\tif (!document.getElementsByTagName) return;\n+\ttbls = document.getElementsByTagName("table");\n+\tfor (ti=0;ti<tbls.length;ti++) {\n+\t\tthisTbl = tbls[ti];\n+\t\tif (((\' \'+thisTbl.className+\' \').indexOf("sortable") != -1) && (thisTbl.id)) {\n+\t\t\tts_makeSortable(thisTbl);\n+\t\t}\n+\t}\n+}\n+\n+function ts_makeSortable(t) {\n+\tif (t.rows && t.rows.length > 0) {\n+\t\tif (t.tHead && t.tHead.rows.length > 0) {\n+\t\t\tvar firstRow = t.tHead.rows[t.tHead.rows.length-1];\n+\t\t\tthead = true;\n+\t\t} else {\n+\t\t\tvar firstRow = t.rows[0];\n+\t\t}\n+\t}\n+\tif (!firstRow) return;\n+\t\n+\t// We have a first row: assume it\'s the header, and make its contents clickable links\n+\tfor (var i=0;i<firstRow.cells.length;i++) {\n+\t\tvar cell = firstRow.cells[i];\n+\t\tvar txt = ts_getInnerText(cell);\n+\t\tif (cell.className != "unsortable" && cell.className.indexOf("unsortable") == -1) {\n+\t\t\tcell.innerHTML = \'<a href="#" class="sortheader" onclick="ts_resortTable(this, \'+i+\');return false;">\'+txt+\'<span class="sortarrow"> <img src="\'+ image_path + image_none + \'" alt="↓"/></span></a>\';\n+\t\t}\n+\t}\n+\tif (alternate_row_colors) {\n+\t\talternate(t);\n+\t}\n+}\n+\n+function ts_getInnerText(el) {\n+\tif (typeof el == "string") return el;\n+\tif (typeof el == "undefined") { return el };\n+\tif (el.innerText) return el.innerText;\t//Not needed but it is faster\n+\tvar str = "";\n+\t\n+\tvar cs = el.childNodes;\n+\tvar l = cs.length;\n+\tfor (var i = 0; i < l; i++) {\n+\t\tswitch (cs[i].nodeType) {\n+\t\t\tcase 1: //ELEMENT_NODE\n+\t\t\t\tstr += ts_getInnerText(cs[i]);\n+\t\t\t\tbreak;\n+\t\t\tcase 3:\t//TEXT_NODE\n+\t\t\t\tstr += cs[i].nodeValue;\n+\t\t\t\tbreak;\n+\t\t}\n+\t}\n+\treturn str;\n+}\n+\n+function ts_resortTable(lnk, clid) {\n+\tvar span;\n+\tfor (var ci=0;ci<lnk.childNodes.length;ci++) {\n+\t\tif (lnk.childNodes[ci].tagName && lnk.childNodes[ci].tagName.toLowerCase() == \'span\') span = lnk.childNodes[ci];\n+\t}\n+\tvar spantext = ts_getInnerText(span);\n+\tvar td = lnk.parentNode;\n+\tvar column = clid || td.cellIndex;\n+\tvar t = getParent(td,\'TABLE\');\n+\t// Work out a type for the column\n+\tif (t.rows.length <= 1) return;\n+\tvar itm = "";\n+\tvar i = 1;\n+\twhile (itm == "" && i < t.tBodies[0].rows.length) {\n+\t\tvar itm = ts_getInnerText(t.tBodies[0].rows[i].cells[column]);\n+\t\titm = trim(itm);\n+\t\tif (itm.substr(0,4) == "<!--" || itm.length == 0) {\n+\t\t\titm = "";\n+\t\t}\n+\t\ti++;\n+\t}\n+\tif (itm == "") return; \n+\t// alert(itm)\n+\tsortfn = ts_sort_caseinsensitive;\n+\tif (itm.match(/^\\d\\d[\\/\\.-][a-zA-z][a-zA-Z][a-zA-Z][\\/\\.-]\\d\\d\\d\\d$/)) sortfn = ts_sort_date;\n+\tif (itm.match(/^\\d\\d[\\/\\.-]\\d\\d[\\/\\.-]\\d\\d\\d{2}?$/)) sortfn = ts_sort_date;\n+\tif (itm.match(/^-?[$\\u017d]\\d/)) sortfn = ts_sort_numeric;\n+\tif (itm.match(/'..b'aN(a) ? 0 : a);\n+\tvar b = parseFloat(b);\n+\tb = (isNaN(b) ? 0 : b);\n+\treturn a - b;\n+}\n+function ts_sort_caseinsensitive(a,b) {\n+\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]).toLowerCase();\n+\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]).toLowerCase();\n+\tif (aa==bb) {\n+\t\treturn 0;\n+\t}\n+\tif (aa<bb) {\n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+function ts_sort_default(a,b) {\n+\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n+\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n+\tif (aa==bb) {\n+\t\treturn 0;\n+\t}\n+\tif (aa<bb) {\n+\t\treturn -1;\n+\t}\n+\treturn 1;\n+}\n+function addEvent(elm, evType, fn, useCapture)\n+// addEvent and removeEvent\n+// cross-browser event handling for IE5+,\tNS6 and Mozilla\n+// By Scott Andrew\n+{\n+\tif (elm.addEventListener){\n+\t\telm.addEventListener(evType, fn, useCapture);\n+\t\treturn true;\n+\t} else if (elm.attachEvent){\n+\t\tvar r = elm.attachEvent("on"+evType, fn);\n+\t\treturn r;\n+\t} else {\n+\t\talert("Handler could not be removed");\n+\t}\n+}\n+function clean_num(str) {\n+\tstr = str.replace(new RegExp(/[^-?0-9.]/g),"");\n+\treturn str;\n+}\n+function trim(s) {\n+\treturn s.replace(/^\\s+|\\s+$/g, "");\n+}\n+function alternate(table) {\n+\t// Take object table and get all it\'s tbodies.\n+\tvar tableBodies = table.getElementsByTagName("tbody");\n+\t// Loop through these tbodies\n+\tfor (var i = 0; i < tableBodies.length; i++) {\n+\t\t// Take the tbody, and get all it\'s rows\n+\t\tvar tableRows = tableBodies[i].getElementsByTagName("tr");\n+\t\t// Loop through these rows\n+\t\t// Start at 1 because we want to leave the heading row untouched\n+\t\tfor (var j = 0; j < tableRows.length; j++) {\n+\t\t\t// Check if j is even, and apply classes for both possible results\n+\t\t\tif ( (j % 2) == 0 ) {\n+\t\t\t\tif ( !(tableRows[j].className.indexOf(\'odd\') == -1) ) {\n+\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'odd\', \'even\');\n+\t\t\t\t} else {\n+\t\t\t\t\tif ( tableRows[j].className.indexOf(\'even\') == -1 ) {\n+\t\t\t\t\t\ttableRows[j].className += " even";\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t} else {\n+\t\t\t\tif ( !(tableRows[j].className.indexOf(\'even\') == -1) ) {\n+\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'even\', \'odd\');\n+\t\t\t\t} else {\n+\t\t\t\t\tif ( tableRows[j].className.indexOf(\'odd\') == -1 ) {\n+\t\t\t\t\t\ttableRows[j].className += " odd";\n+\t\t\t\t\t}\n+\t\t\t\t}\n+\t\t\t} \n+\t\t}\n+\t}\n+}\n+ </script>\n+ <style type="text/css">\n+a img {\n+\tborder: 0;\n+}\n+table.sortable {\n+\tborder-spacing: 0;\n+\tborder: 1px solid #000;\n+\tborder-collapse: collapse;\n+}\n+table.sortable th, table.sortable td {\n+\ttext-align: left;\n+\tpadding: 2px 4px 2px 4px;\n+\twidth: 100px;\n+\tborder-style: solid;\n+\tborder-color: #444;\n+}\n+table.sortable th {\n+\tborder-width: 0px 1px 1px 1px;\n+\tbackground-color: #ccc;\n+\tfont: 14px "Lucida Grande", Helvetica, "Arial Unicode MS", "Arial Unicode", Arial, sans-serif;\t\n+\tfont-weight: bold;\n+}\n+table.sortable td {\n+\tborder-width: 0px 1px 0px 1px;\n+\tfont: 12px "Lucida Grande", Helvetica, "Arial Unicode MS", "Arial Unicode", Arial, sans-serif;\n+}\n+table.sortable tr.odd td {\n+\tbackground-color: #BFEFFF;\n+}\n+table.sortable tr.even td {\n+\tbackground-color: #ffffff;\n+}\n+table.sortable tr.sortbottom td {\n+\tborder-top: 1px solid #444;\n+\tbackground-color: #ccc;\n+\tfont-weight: bold;\n+} \n+ </style>\n+</head>\n+\n+<body>\n+\n+<table class="sortable" id="anyid" cellpadding="0" cellspacing="0">\n+<tr>{% for colname in colnames %}<th>{{ colname }}</th>{% endfor %}</tr>\n+{% for rowdata in rows %}\n+<tr>\n+ {% for datatype,col in rowdata %}\n+ <td>\n+ {% if datatype == "ucsc_pos" %}\n+ {% for itm in col %}\n+ <a href="{{ ucsc_pos_url }}{{ itm }}" target="_blank">{{ itm }}</a><br/>\n+ {% endfor %}\n+ {% elif datatype == "genecards" %}\n+ {% for itm in col %}\n+ <a href="{{ genecards_url }}{{ itm }}" target="_blank">{{ itm }}</a><br/>\n+ {% endfor %}\n+ {% elif datatype == "list" %}\n+ {% for itm in col %}{{ itm }}<br/>{% endfor %}\n+ {% else %}\n+ {{ col }}\n+ {% endif %}\n+ </td>\n+ {% endfor %}\n+</tr>\n+{% endfor %}\n+</table>\n+\n+</body>\n+</html>\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/chimerascan_index.py --- a/chimerascan/chimerascan_index.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,184 +0,0 @@ -#!/usr/bin/env python -''' -Created on Jan 5, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import logging -import os -import shutil -import subprocess -import sys -from optparse import OptionParser - -# local imports -import chimerascan.pysam as pysam -from chimerascan.lib.feature import GeneFeature -from chimerascan.lib.seq import DNA_reverse_complement -from chimerascan.lib.base import up_to_date, check_executable -from chimerascan.lib.config import JOB_ERROR, JOB_SUCCESS, ALIGN_INDEX, \ - BOWTIE_INDEX_FILE, GENE_FEATURE_FILE, GENE_REF_PREFIX - -BASES_PER_LINE = 50 - -def split_seq(seq, chars_per_line): - pos = 0 - newseq = [] - while pos < len(seq): - if pos + chars_per_line > len(seq): - endpos = len(seq) - else: - endpos = pos + chars_per_line - newseq.append(seq[pos:endpos]) - pos = endpos - return '\n'.join(newseq) - -def genepred_to_fasta(gene_feature_file, reference_seq_file): - ref_fa = pysam.Fastafile(reference_seq_file) - total = 0 - used = 0 - for g in GeneFeature.parse(open(gene_feature_file)): - total += 1 - exon_seqs = [] - error_occurred = False - for start, end in g.exons: - seq = ref_fa.fetch(g.chrom, start, end) - if (not seq) or (len(seq) < (end - start)): - logging.warning("gene %s exon %s:%d-%d not found in reference" % - (g.tx_name, g.chrom, start, end)) - error_occurred = True - break - exon_seqs.append(seq) - if error_occurred: - continue - used += 1 - # make fasta record - seq = ''.join(exon_seqs) - if g.strand == '-': - seq = DNA_reverse_complement(seq) - # break seq onto multiple lines - seqlines = split_seq(seq, BASES_PER_LINE) - fa_record = (">%s range=%s:%d-%d gene=%s strand=%s\n%s" % - (GENE_REF_PREFIX + g.tx_name, g.chrom, start, end, - g.gene_name, g.strand, seqlines)) - yield g, fa_record - logging.info("Used %d/%d gene features" % (used,total)) - ref_fa.close() - -def create_chimerascan_index(output_dir, - genome_fasta_file, - gene_feature_file, - bowtie_build_bin): - # create output dir if it does not exist - if not os.path.exists(output_dir): - os.makedirs(output_dir) - logging.info("Created index directory: %s" % (output_dir)) - # copy reference fasta file to output dir and index it - index_fasta_file = os.path.join(output_dir, ALIGN_INDEX + ".fa") - msg = "Adding reference genome to index" - if (up_to_date(index_fasta_file, genome_fasta_file)): - logging.info("[SKIPPED] %s" % (msg)) - else: - logging.info(msg) - shutil.copyfile(genome_fasta_file, index_fasta_file) - # index the genome fasta file - logging.info("Indexing FASTA file") - fh = pysam.Fastafile(index_fasta_file) - fh.close() - # add gene sequences to index - dst_gene_feature_file = os.path.join(output_dir, GENE_FEATURE_FILE) - msg = "Building transcriptome sequences and gene features" - if (up_to_date(index_fasta_file, gene_feature_file) and - up_to_date(dst_gene_feature_file, gene_feature_file)): - logging.info("[SKIPPED] %s" % (msg)) - else: - logging.info(msg) - # write sequences from gene feature file - logging.info("Adding transcript sequences and gene features to index") - fasta_fh = open(index_fasta_file, "a") - gene_fh = open(dst_gene_feature_file, "w") - for g, fa_record in genepred_to_fasta(gene_feature_file, index_fasta_file): - print >>gene_fh, str(g) - print >>fasta_fh, fa_record - gene_fh.close() - fasta_fh.close() - # remove old fasta index - if os.path.exists(index_fasta_file + ".fai"): - os.remove(index_fasta_file + ".fai") - # index the combined fasta file - logging.info("Reindexing the FASTA file") - fh = pysam.Fastafile(index_fasta_file) - fh.close() - # build bowtie index on the reference sequence file - bowtie_index_file = os.path.join(output_dir, BOWTIE_INDEX_FILE) - msg = "Building bowtie index" - if up_to_date(bowtie_index_file, index_fasta_file): - logging.info("[SKIPPED] %s" % (msg)) - else: - logging.info(msg) - bowtie_index_name = os.path.join(output_dir, ALIGN_INDEX) - args = [bowtie_build_bin, index_fasta_file, bowtie_index_name] - if subprocess.call(args) != os.EX_OK: - logging.error("bowtie-build failed to create alignment index") - if os.path.exists(bowtie_index_file): - os.remove(bowtie_index_file) - return JOB_ERROR - logging.info("Chimerascan index created successfully") - return JOB_SUCCESS - - -def main(): - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog [options] <reference_genome.fa> " - "<genepred_genes.txt> <index_output_dir>") - parser.add_option("--bowtie-dir", dest="bowtie_dir", default="", - help="Path to the 'bowtie' software (by default, " - "expects the 'bowtie' and 'bowtie-build' " - "binaries to be in current PATH)") - options, args = parser.parse_args() - # check command line arguments - if len(args) < 3: - parser.error("Incorrect number of command line arguments") - ref_fasta_file = args[0] - gene_feature_file = args[1] - output_dir = args[2] - # check that input files exist - if not os.path.isfile(ref_fasta_file): - parser.error("Reference fasta file '%s' not found" % (ref_fasta_file)) - if not os.path.isfile(gene_feature_file): - parser.error("Gene feature file '%s' not found" % (gene_feature_file)) - # check that output dir is not a regular file - if os.path.exists(output_dir) and (not os.path.isdir(output_dir)): - parser.error("Output directory name '%s' exists and is not a valid " - "directory" % (output_dir)) - # check that bowtie-build program exists - bowtie_build_bin = os.path.join(options.bowtie_dir, "bowtie-build") - if check_executable(bowtie_build_bin): - logging.debug("Checking for 'bowtie-build' binary... found") - else: - parser.error("bowtie-build binary not found or not executable") - # run main index creation function - retcode = create_chimerascan_index(output_dir, ref_fasta_file, - gene_feature_file, bowtie_build_bin) - sys.exit(retcode) - -if __name__ == '__main__': - main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/base.py --- a/chimerascan/lib/base.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,100 +0,0 @@ -''' -Created on Oct 26, 2010 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import os -import subprocess -import tempfile -import operator - -# -# constants used for library type -# -class LibraryTypes: - FR_UNSTRANDED = "fr-unstranded" - FR_FIRSTSTRAND = "fr-firststrand" - FR_SECONDSTRAND = "fr-secondstrand" - - @staticmethod - def choices(): - return (LibraryTypes.FR_UNSTRANDED, - LibraryTypes.FR_FIRSTSTRAND, - LibraryTypes.FR_SECONDSTRAND) - - @staticmethod - def same_strand(library_type): - return (library_type[0] == library_type[1]) - -def parse_lines(line_iter, numlines=1): - """ - generator that returns list of 'numlines' lines at a time - """ - try: - while True: - yield [line_iter.next().rstrip() for x in xrange(numlines)] - except StopIteration: - pass - -def parse_bool(s): - return True if s[0].lower() == "t" else False - -def parse_string_none(s): - return None if s == "None" else s - -def make_temp(base_dir, suffix=''): - fd,name = tempfile.mkstemp(suffix=suffix, prefix='tmp', dir=base_dir) - os.close(fd) - return name - -def check_executable(filename): - # check that samtools binary exists - devnullfh = open(os.devnull, 'w') - try: - subprocess.call([filename], stdout=devnullfh, stderr=devnullfh) - except OSError: - return False - devnullfh.close() - return True - -def up_to_date(outfile, infile, nzsize=True): - if not os.path.exists(infile): - return False - if not os.path.exists(outfile): - return False - if nzsize and (os.path.getsize(outfile) == 0): - return False - return os.path.getmtime(outfile) >= os.path.getmtime(infile) - -# in-place XML prettyprint formatter -def indent_xml(elem, level=0): - i = "\n" + level*" " - if len(elem): - if not elem.text or not elem.text.strip(): - elem.text = i + " " - if not elem.tail or not elem.tail.strip(): - elem.tail = i - for elem in elem: - indent_xml(elem, level+1) - if not elem.tail or not elem.tail.strip(): - elem.tail = i - else: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = i |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/batch_sort.py --- a/chimerascan/lib/batch_sort.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,59 +0,0 @@ -''' -Created on Jul 21, 2011 - -@author: mkiyer -''' - -# based on Recipe 466302: Sorting big files the Python 2.4 way -# by Nicolas Lehuen -#http://code.activestate.com/recipes/576755-sorting-big-files-the-python-26-way/ - -import os -from tempfile import gettempdir -from itertools import islice, cycle -from collections import namedtuple -import heapq - -Keyed = namedtuple("Keyed", ["key", "obj"]) - -def merge(key=None, *iterables): - # based on code posted by Scott David Daniels in c.l.p. - # http://groups.google.com/group/comp.lang.python/msg/484f01f1ea3c832d - - if key is None: - keyed_iterables = iterables - else: - keyed_iterables = [(Keyed(key(obj), obj) for obj in iterable) - for iterable in iterables] - for element in heapq.merge(*keyed_iterables): - yield element.obj - -def batch_sort(input, output, key=None, buffer_size=32000, tempdirs=None): - if tempdirs is None: - tempdirs = [] - if not tempdirs: - tempdirs.append(gettempdir()) - - chunks = [] - try: - with open(input,'rb',64*1024) as input_file: - input_iterator = iter(input_file) - for tempdir in cycle(tempdirs): - current_chunk = list(islice(input_iterator,buffer_size)) - if not current_chunk: - break - current_chunk.sort(key=key) - output_chunk = open(os.path.join(tempdir,'%06i'%len(chunks)),'w+b',64*1024) - chunks.append(output_chunk) - output_chunk.writelines(current_chunk) - output_chunk.flush() - output_chunk.seek(0) - with open(output,'wb',64*1024) as output_file: - output_file.writelines(merge(key, *chunks)) - finally: - for chunk in chunks: - try: - chunk.close() - os.remove(chunk.name) - except Exception: - pass |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/chimera.py --- a/chimerascan/lib/chimera.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,382 +0,0 @@\n-\'\'\'\n-Created on Jun 3, 2011\n-\n-@author: mkiyer\n-\'\'\'\n-from base import parse_string_none\n-from sam import get_clipped_interval\n-\n-DISCORDANT_TAG_NAME = "XC"\n-class DiscordantTags(object):\n- CONCORDANT_TX = 0\n- DISCORDANT_STRAND_TX = 1\n- CONCORDANT_GENE = 2\n- DISCORDANT_STRAND_GENE = 3\n- CONCORDANT_GENOME = 4\n- DISCORDANT_STRAND_GENOME = 5\n- DISCORDANT_GENE = 9\n- DISCORDANT_GENOME = 17\n-\n-ORIENTATION_TAG_NAME = "XD"\n-class OrientationTags(object):\n- NONE = 0\n- FIVEPRIME = 1\n- THREEPRIME = 2\n-\n-def cmp_orientation(a,b):\n- if (a == OrientationTags.NONE) or (b == OrientationTags.NONE):\n- return True\n- return (a != b)\n-\n-# constants\n-MULTIMAP_BINS = (1,2,4,8,16,32,64,128)\n-CHIMERA_SEP = "|"\n-# amount of trimming to use to stop reads from overlapping \n-# exon boundaries and going into intronic space\n-EXON_JUNCTION_TRIM_BP = 10\n-\n-# chimera types\n-class ChimeraTypes(object):\n- INTERCHROMOSOMAL = "Interchromosomal"\n- OVERLAP_SAME = "Overlapping_Same"\n- OVERLAP_CONVERGE = "Overlapping_Converging"\n- OVERLAP_DIVERGE = "Overlapping_Diverging"\n- OVERLAP_COMPLEX = "Overlapping_Complex"\n- READTHROUGH = "Read_Through"\n- ADJ_CONVERGE = "Adjacent_Converging"\n- ADJ_DIVERGE = "Adjacent_Diverging"\n- ADJ_COMPLEX = "Adjacent_Complex"\n- INTRACHROMOSOMAL = "Intrachromosomal"\n- INTRA_CONVERGE = "Intrachromosomal_Converging"\n- INTRA_DIVERGE = "Intrachromsomal_Diverging"\n- INTRA_COMPLEX = "Intrachromosomal_Complex"\n- UNKNOWN = "Undetermined"\n-\n-class DiscordantRead(object):\n- """\n- stores read alignment information needed to nominate \n- chimeric transcripts\n-\n- (this is a subset of what is kept in SAM file)\n- """\n- def __init__(self):\n- self.qname = ""\n- self.hit_index = -1\n- self.readnum = -1\n- self.seq = ""\n- self.tid = -1\n- self.pos = -1\n- self.aend = -1\n- self.clipstart = -1\n- self.clipend = -1\n- self.is_reverse = False\n- self.numhits = 0\n- self.mismatches = 0\n- self.discordant_type = 0\n- self.orientation = 0\n- self.is_spanning = False\n-\n- @staticmethod\n- def from_read(r):\n- a = DiscordantRead()\n- a.qname = r.qname\n- a.hit_index = r.opt(\'HI\')\n- a.readnum = 1 if r.is_read2 else 0\n- a.seq = r.seq\n- a.tid = r.rname\n- a.pos = r.pos\n- a.aend = r.aend\n- a.clipstart, a.clipend = get_clipped_interval(r)\n- a.is_reverse = r.is_reverse\n- a.numhits = r.opt(\'NH\')\n- a.mismatches = r.opt(\'NM\')\n- a.discordant_type = r.opt(DISCORDANT_TAG_NAME)\n- a.orientation = r.opt(ORIENTATION_TAG_NAME)\n- a.is_spanning = False\n- return a\n-\n- @staticmethod\n- def from_list(fields):\n- a = DiscordantRead()\n- a.qname = fields[0]\n- a.hit_index = int(fields[1])\n- a.readnum = int(fields[2])\n- a.seq = fields[3]\n- a.tid = int(fields[4])\n- a.pos = int(fields[5])\n- a.aend = int(fields[6])\n- a.clipstart = int(fields[7])\n- a.clipend = int(fields[8])\n- a.is_reverse = True if int(fields[9]) == 1 else False\n- a.numhits = int(fields[10])\n- a.mismatches = int(fields[11])\n- a.discordant_type = int(fields[12])\n- a.orientation = int(fields[13])\n- a.is_spanning = True if int(fields[14]) == 1 else False\n- return a\n-\n- def to_list(self):\n- return [self.qname, self.hit_index, self.readnum, self.seq, \n- self.tid, self.pos, self.aend, self.clipstart, \n- self.clipend, int(self.is_reverse), self.numhits, \n- self.mismatches, self.discordant_type, \n- self.orientation, int(self.is_spanning)]\n-\n-\n-def frags_to_encomp_string(frags):\n- if len(frags) == 0:\n- return "None"\n- # encompassing read pairs\n- encomp_frags = []\n- for frag in frags:\n- r5p = Chimera.FIEL'..b' dreads.append(DiscordantRead.from_list(read_fields.split(c.FIELD_DELIM)))\n- c.encomp_frags.append(dreads)\n- # raw spanning read information\n- spanning_reads_field = parse_string_none(fields[20])\n- if spanning_reads_field is not None:\n- for read_fields in spanning_reads_field.split(c.READ_DELIM):\n- c.spanning_reads.append(DiscordantRead.from_list(read_fields.split(c.FIELD_DELIM))) \n- return c\n-\n- @staticmethod\n- def parse(line_iter):\n- for line in line_iter:\n- if line.startswith("#"):\n- continue \n- fields = line.strip().split(\'\\t\')\n- yield Chimera.from_list(fields)\n-\n- def to_list(self):\n- # reads\n- if len(self.spanning_reads) == 0:\n- span_string = None\n- else:\n- span_string = Chimera.READ_DELIM.join(Chimera.FIELD_DELIM.join(map(str,r.to_list())) \n- for r in self.spanning_reads)\n- return [self.tx_name_5p, self.tx_start_5p, self.tx_end_5p,\n- self.tx_name_3p, self.tx_start_3p, self.tx_end_3p,\n- self.name, self.score, \n- self.tx_strand_5p, self.tx_strand_3p,\n- self.gene_name_5p, self.gene_name_3p,\n- "%d-%d" % (self.exons_5p[0], self.exons_5p[1]),\n- "%d-%d" % (self.exons_3p[0], self.exons_3p[1]),\n- self.breakpoint_name,\n- self.breakpoint_seq_5p,\n- self.breakpoint_seq_3p,\n- self.homology_left,\n- self.homology_right,\n- frags_to_encomp_string(self.encomp_frags),\n- span_string]\n-\n- def get_num_unique_positions(self):\n- """\n- calculates total number of unique read alignment\n- positions supporting chimera\n- """\n- # find all unique alignment positions and read names\n- encomp_pos = set()\n- qnames = set()\n- for pair in self.encomp_frags:\n- if pair[0].qname not in qnames:\n- qnames.add(pair[0].qname)\n- encomp_pos.add((pair[0].pos, pair[1].pos))\n- # add spanning reads\n- spanning_pos = set()\n- for dr in self.spanning_reads:\n- if dr.qname not in qnames:\n- qnames.add(dr.qname)\n- spanning_pos.add(dr.pos)\n- return len(encomp_pos) + len(spanning_pos)\n-\n- def get_num_frags(self, maxnumhits=0):\n- """\n- number of unique fragments supporting the \n- chimera (by read name)\n- """\n- qnames = set()\n- for pair in self.encomp_frags:\n- if (maxnumhits > 0) and (min(pair[0].numhits, pair[1].numhits) > maxnumhits):\n- continue\n- qnames.add(pair[0].qname)\n- for dr in self.spanning_reads:\n- if (maxnumhits > 0) and (dr.numhits > maxnumhits):\n- continue\n- qnames.add(dr.qname)\n- return len(qnames)\n-\n- def get_num_spanning_frags(self, maxnumhits=0):\n- """\n- number of unique spanning fragments supporting the \n- chimera (by read name)\n- """\n- qnames = set()\n- for dpair in self.encomp_frags:\n- if (maxnumhits > 0) and (min(dpair[0].numhits, dpair[1].numhits) > maxnumhits):\n- continue\n- if any(dr.is_spanning for dr in dpair):\n- qnames.add(dpair[0].qname) \n- for dr in self.spanning_reads:\n- if (maxnumhits > 0) and (dr.numhits > maxnumhits):\n- continue\n- qnames.add(dr.qname)\n- return len(qnames) \n-\n- def get_spanning_reads(self):\n- for dpair in self.encomp_frags:\n- if dpair[0].is_spanning:\n- yield dpair[0]\n- if dpair[1].is_spanning:\n- yield dpair[1]\n- for dr in self.spanning_reads:\n- yield dr\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/config.py --- a/chimerascan/lib/config.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,116 +0,0 @@ -''' -Created on Jan 5, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -JOB_SUCCESS = 0 -JOB_ERROR = 1 - -# constants for index -ALIGN_INDEX = 'align_index' -ALIGN_INDEX_FASTA_FILE = 'align_index.fa' -BOWTIE_INDEX_FILE = 'align_index.1.ebwt' -GENE_REF_PREFIX = 'gene_' -GENE_FEATURE_FILE = "gene_features.txt" - -# chimerascan subdirectories -LOG_DIR = "log" -TMP_DIR = "tmp" - -# constraints for run configuration -BASE_PROCESSORS = 2 -MIN_SEGMENT_LENGTH = 20 -RUNCONFIG_XML_FILE = "runconfig.xml" - -# output after read inspection, name conversion, and -# quality score conversion -CONVERTED_FASTQ_PREFIX = "reads" -CONVERTED_FASTQ_FILES = tuple(CONVERTED_FASTQ_PREFIX + "_%d.fq" % (x+1) - for x in xrange(2)) - -# output from initial bowtie alignment -ALIGNED_READS_BAM_FILE = "aligned_reads.bam" -UNALIGNED_FASTQ_PARAM = "unaligned.fq" -UNALIGNED_FASTQ_FILES = ("unaligned_1.fq", "unaligned_2.fq") -MAXMULTIMAP_FASTQ_PARAM = "maxmulti.fq" -MAXMULTIMAP_FASTQ_FILES = ("maxmulti_1.fq", "maxmulti_2.fq") - -# sorted aligned reads bam file -SORTED_ALIGNED_READS_BAM_FILE = "sorted_aligned_reads.bam" - -# insert size estimation parameters -ISIZE_MIN_SAMPLES = 100 -ISIZE_MAX_SAMPLES = 1e6 -ISIZE_DIST_FILE = "isize_dist.txt" - -# output from realignment of trimmed reads -REALIGNED_BAM_FILE = "realigned_reads.bam" - -# output for different classes of discordant reads -GENE_PAIRED_BAM_FILE = "gene_paired_reads.bam" -GENOME_PAIRED_BAM_FILE = "genome_paired_reads.bam" -REALIGNED_UNMAPPED_BAM_FILE = "unmapped_reads.bam" -REALIGNED_COMPLEX_BAM_FILE = "complex_reads.bam" - -# discordant reads BEDPE file -DISCORDANT_BEDPE_FILE = "discordant_reads.bedpe" -SORTED_DISCORDANT_BEDPE_FILE = "discordant_reads.srt.bedpe" - -# chimera candidates with encompassing read support -ENCOMPASSING_CHIMERA_FILE = "encompassing_chimeras.txt" -FILTERED_ENCOMPASSING_CHIMERA_FILE = "encompassing_chimeras.filtered.txt" - -# amount of trimming to use to stop reads from overlapping -# exon boundaries and going into intronic space -EXON_JUNCTION_TRIM_BP = 10 - -# number of homology mismatches in breakpoint sequences -# to tolerate when computing homology distance -BREAKPOINT_HOMOLOGY_MISMATCHES = 2 -BREAKPOINT_CHIMERA_FILE = "encompassing_chimeras.breakpoint_sorted.txt" -BREAKPOINT_MAP_FILE = "breakpoints.txt" -BREAKPOINT_FASTA_FILE = "breakpoints.fa" -BREAKPOINT_BOWTIE_INDEX = "breakpoints" -BREAKPOINT_BOWTIE_INDEX_FILE = "breakpoints.1.ebwt" - -# reads to remap to breakpoint junction index -ENCOMP_SPANNING_FASTQ_FILE = "encomp_spanning_reads.fq" -SINGLE_MAPPED_BAM_FILE = "singlemap_reads.srt.bam" -SINGLEMAP_SPANNING_FASTQ_FILE = "singlemap_spanning_reads.fq" -UNALIGNED_SPANNING_FASTQ_FILE = "unaligned_spanning_reads.fq" - -# results of aligning reads to breakpoint index -ENCOMP_SPANNING_BAM_FILE = "encomp_spanning_reads.bam" -SORTED_ENCOMP_SPANNING_BAM_FILE = "encomp_spanning_reads.srt.bam" -SINGLEMAP_SPANNING_BAM_FILE = "singlemap_spanning_reads.bam" -SORTED_SINGLEMAP_SPANNING_BAM_FILE = "singlemap_spanning_reads.srt.bam" -UNALIGNED_SPANNING_BAM_FILE = "unaligned_spanning_reads.bam" -SORTED_UNALIGNED_SPANNING_BAM_FILE = "unaligned_spanning_reads.srt.bam" - -# results of merging spanning reads into chimera nominations -SPANNING_CHIMERA_FILE = "spanning_chimeras.txt" -# results of resolving ambiguous reads -RESOLVED_SPANNING_CHIMERA_FILE = "spanning_chimeras.resolved.txt" -# results of filtering chimeras -FILTERED_CHIMERA_FILE = "spanning_chimeras.resolved.filtered.txt" -HOMOLOG_FILTERED_CHIMERA_FILE = "spanning_chimeras.resolved.filtered.homolog.txt" -BEST_FILTERED_CHIMERA_FILE = "spanning_chimeras.resolved.filtered.homolog.best_isoform.txt" -# output file -CHIMERA_OUTPUT_FILE = "chimeras.bedpe" \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/fastq_to_bam.py --- a/chimerascan/lib/fastq_to_bam.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,99 +0,0 @@ -''' -Created on Apr 28, 2011 - -@author: mkiyer -''' -from chimerascan import pysam -from math import log10 -from string import maketrans - -def get_solexa_qual_conversion_table(): - """ - return a translation table that can be used by str.translate() for - converting solexa to sanger quality scores - """ - offset = 64 - conv_table = ['!'] * 256 - conv_table[offset:] = "I" * (256-offset) - for solq in xrange(-5, 40): - phredq = 10*log10(1 + 10**(solq/10.0)) - phredchr = chr(int(round(33 + phredq))) - conv_table[offset + solq] = phredchr - conv_string = ''.join(conv_table) - return maketrans(''.join(map(chr, range(256))), conv_string) - -def get_illumina_qual_conversion_table(): - """Illumina 1.3+ format can encode a Phred quality score from 0 to 62 - using ASCII 64 to 126 (although in raw read data Phred scores from 0 - to 40 only are expected). - """ - offset = 64 - conv_table = ['!'] * 256 - for x in xrange(0, 62): - conv_table[offset+x] = chr(33 + x) - conv_table[offset+40:] = "I" * (256-(offset+40)) - conv_string = ''.join(conv_table) - return maketrans(''.join(map(chr, range(256))), conv_string) - -def get_sanger_qual_conversion_table(): - offset = 33 - tbl = map(chr, range(256)) - tbl[:offset] = "!" * offset - tbl[offset+40:] = "I" * (256-(offset+40)) - return maketrans(''.join(map(chr, range(256))), ''.join(tbl)) - -conv_tables = {"sanger": get_sanger_qual_conversion_table(), - "illumina": get_illumina_qual_conversion_table(), - "solexa": get_solexa_qual_conversion_table()} - -def parse_fastq(line_iter): - with line_iter: - while True: - rid = line_iter.next().rstrip()[1:] - seq = line_iter.next().rstrip() - line_iter.next() - qual = line_iter.next().rstrip() - yield rid, seq, qual - -def fastq_to_bam(fastq_files, qual_format, bam_file): - fqfhs = [parse_fastq(open(f)) for f in fastq_files] - qual_trans_table = conv_tables[qual_format] - header = {'HD': {'VN': '1.0', 'SO': 'unknown'}} -# 'SQ': [{'LN': 1, 'SN': 'dummy'}]} - bamfh = pysam.Samfile(bam_file, "wb", header=header) - try: - while True: - for i,fqiter in enumerate(fqfhs): - id,seq,qual = fqiter.next() - a = pysam.AlignedRead() - a.rname = -1 - a.mrnm = -1 - #a.pos = 0 - #a.mpos = 0 - a.qname = id - a.seq = seq - a.qual = qual.translate(qual_trans_table) - a.is_read1 = (i == 0) - a.is_read2 = (i == 1) - bamfh.write(a) - except StopIteration: - pass - bamfh.close() - -def bam_to_fastq(bam_file, fastq_files): - fqfhs = [open(f, "w") for f in fastq_files] - bamfh = pysam.Samfile(bam_file, "rb") - for r in bamfh: - if r.is_read1: - i = 0 - elif r.is_read2: - i = 1 - record = "@%s\n%s\n+\n%s" % (r.qname,r.seq,r.qual) - print >>fqfhs[i], record - -if __name__ == '__main__': - sol2std = get_solexa_qual_conversion_table() - illumina2std = get_illumina_qual_conversion_table() - import sys - fastq_to_bam(["read1.fq", "read2.fq"], "solexa", "hi.bam") - bam_to_fastq("hi.bam", ["a1.fq", "a2.fq"]) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/feature.py --- a/chimerascan/lib/feature.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,180 +0,0 @@ -''' -Created on Dec 18, 2010 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import logging -import itertools - -class GeneFeature(object): - __slots__ = ('chrom', 'tx_start', 'tx_end', 'tx_name', 'gene_name', - 'strand', 'cds_start', 'cds_end', 'exon_count', 'exons') - - def __str__(self): - fields = [self.tx_name, - self.chrom, - self.strand, - str(self.tx_start), - str(self.tx_end), - str(self.cds_start), - str(self.cds_end), - str(self.exon_count), - ','.join(map(str, [e[0] for e in self.exons])) + ',', - ','.join(map(str, [e[1] for e in self.exons])) + ',', - self.gene_name] - return '\t'.join(fields) - - @staticmethod - def from_string(line): - if line is None: - return None - line = line.strip() - if line.startswith('#'): - logging.debug("skipping comment line: %s" % (line)) - return None - if line.startswith('track'): - logging.debug("skipping track header line: %s" % (line)) - return None - fields = line.split('\t') - # first six fields are required - g = GeneFeature() - g.tx_name = fields[0] - g.chrom = fields[1] - g.strand = fields[2] - g.tx_start = int(fields[3]) - g.tx_end = int(fields[4]) - g.cds_start = int(fields[5]) - g.cds_end = int(fields[6]) - g.exon_count = int(fields[7]) - exon_starts = map(int, fields[8].split(',')[:-1]) - exon_ends = map(int, fields[9].split(',')[:-1]) - g.exons = zip(exon_starts, exon_ends) - g.gene_name = fields[10] - return g - - @staticmethod - def parse(line_iter): - for line in line_iter: - if not line: - continue - if not line.strip(): - continue - if line.startswith("#"): - continue - if line.startswith("track"): - continue - yield GeneFeature.from_string(line) - - def get_exon_interval(self, pos): - """ - returns a tuple containing the exon number and start/end - coordinates relative to the transcript - """ - exon_iter = reversed(self.exons) if self.strand == '-' else iter(self.exons) - exon_pos = 0 - exon_num = 0 - for exon_start, exon_end in exon_iter: - exon_size = exon_end - exon_start - if exon_pos + exon_size >= pos: - break - exon_pos += exon_size - exon_num += 1 - if exon_pos + exon_size < pos: - logging.warning("exon_pos %d + exon_size %d < pos %d - clipping to " - "end of gene" % (exon_pos, exon_size, pos)) - return exon_num, exon_pos, exon_pos + exon_size - - -class BEDFeature(object): - __slots__ = ('chrom', 'tx_start', 'tx_end', 'name', 'score', 'strand', - 'cds_start', 'cds_end', 'exon_count', 'block_starts', - 'block_sizes', 'exons', 'attr_fields') - - def __str__(self): - fields = [self.chrom, - str(self.tx_start), - str(self.tx_end), - self.name, - str(self.score), - self.strand, - str(self.cds_start), - str(self.cds_end), - '0', - str(self.exon_count), - ','.join(map(str, self.block_sizes)) + ',', - ','.join(map(str, self.block_starts)) + ','] - return '\t'.join(fields) - - @staticmethod - def from_string(line): - if line is None: - return None - line = line.strip() - if line.startswith('#'): - logging.debug("skipping comment line: %s" % (line)) - return None - if line.startswith('track'): - logging.debug("skipping track header line: %s" % (line)) - return None - fields = line.split('\t') - # first six fields are required - g = BEDFeature() - g.chrom = fields[0] - g.tx_start = int(fields[1]) - g.tx_end = int(fields[2]) - g.name = fields[3] - if len(fields) <= 4: - g.score = 0 - g.strand = '.' - else: - g.score = fields[4] - g.strand = fields[5] - if len(fields) <= 6: - g.cds_start = g.tx_start - g.cds_end = g.tx_end - g.exon_count = 1 - g.exons = [(g.tx_start, g.tx_end)] - else: - g.cds_start = int(fields[6]) - g.cds_end = int(fields[7]) - g.exon_count = int(fields[9]) - g.block_sizes = map(int, fields[10].split(',')[:-1]) - g.block_starts = map(int, fields[11].split(',')[:-1]) - g.exons = [] - for start, size in itertools.izip(g.block_starts, g.block_sizes): - g.exons.append((g.tx_start + start, g.tx_start + start + size)) - if len(fields) <= 12: - g.attr_fields = [] - else: - g.attr_fields = fields[12:] - return g - - @staticmethod - def parse(line_iter): - for line in line_iter: - if not line: - continue - if not line.strip(): - continue - if line.startswith("#"): - continue - if line.startswith("track"): - continue - yield BEDFeature.from_string(line) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/fix_alignment_ordering.py --- a/chimerascan/lib/fix_alignment_ordering.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,137 +0,0 @@ -''' -Created on Jan 23, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import re -import collections - -ReorderBufferItem = collections.namedtuple('ReorderBufferItem', ("fqrec", "reads")) - -def fix_alignment_ordering(samfh, fqiters, - pe_sr_mode=False, - maxlen=100000): - # function for initializing new buffer entry - buf_init_func = lambda fqrecs: tuple(ReorderBufferItem(fq, []) for fq in fqrecs) - # initialize the qname dictionary to match the fastq file - buf = collections.deque() - qname_read_dict = {} - qname_mate_re = re.compile(r'/(\d)$') - for read in samfh: - # PE-SR mode means that the reads were paired in sequencing - # but aligned separately. The function uses the /1 and /2 - # suffixes in the reads to join them during buffer reordering - if pe_sr_mode: - # get read num (1 or 2) from the qname field of SAM read - read_qname, readnum = qname_mate_re.split(read.qname)[0:2] - readnum = int(readnum) - 1 - # set flags - read.is_paired = True - read.qname = read_qname - if readnum == 0: - read.is_read1 = True - elif readnum == 1: - read.is_read2 = True - else: - assert False - # if not PE-SR mode then we can trust the 'is_read1' and 'is_read2' - # attributes of the SAM read - else: - if read.is_read2: - readnum = 1 - else: - readnum = 0 - # check if this read is already in the buffer - if read.qname not in qname_read_dict: - # if buffer full empty the first entries - while len(buf) >= maxlen: - # get first qname in buf - first_qname = buf.popleft() - # return reads at this qname, then delete them - yield qname_read_dict[first_qname] - del qname_read_dict[first_qname] - # add new qnames to buffer - while True: - # get next qname from fastq file and add it to the queue - fqrecs = [it.next() for it in fqiters] - next_qname = fqrecs[0].qname - buf.append(next_qname) - qname_read_dict[next_qname] = buf_init_func(fqrecs) - # if the next qname in the fastq file is the same as the - # read qname, then we can exit the loop - if next_qname == read.qname: - break - # add read to buffer - qname_read_dict[read.qname][readnum].reads.append(read) - # empty remaining entries in buffer - while len(buf) > 0: - yield qname_read_dict[buf.popleft()] - - -def fix_sr_alignment_ordering(samfh, fqiter, - maxlen=100000): - # function for initializing new buffer entry - buf_init_func = lambda fqrec: [ReorderBufferItem(fqrec, [])] - # initialize the qname dictionary to match the fastq file - buf = collections.deque() - qname_read_dict = {} - qname_mate_re = re.compile(r'/(\d)$') - for read in samfh: - # get read num (1 or 2) from the qname field of SAM read - read_qname, readnum = qname_mate_re.split(read.qname)[0:2] - readnum = int(readnum) - 1 - # set flags - read.is_paired = True - read.qname = read_qname - if readnum == 0: - read.is_read1 = True - elif readnum == 1: - read.is_read2 = True - else: - assert False - # set key for indexing reads - key = (read_qname, readnum) - # check if this read is already in the buffer - if key not in qname_read_dict: - # if buffer full empty the first entries - while len(buf) >= maxlen: - # get first key in buf - first_key = buf.popleft() - # return reads at this qname, then delete them - yield qname_read_dict[first_key] - del qname_read_dict[first_key] - # add new qnames to buffer - while True: - # get next qname from fastq file and add it to the queue - fqrec = fqiter.next() - next_key = (fqrec.qname, fqrec.readnum-1) - buf.append(next_key) - qname_read_dict[next_key] = buf_init_func(fqrec) - # if the next qname in the fastq file is the same as the - # read qname, then we can exit the loop - if next_key == key: - break - # add read to buffer - qname_read_dict[key][0].reads.append(read) - # empty remaining entries in buffer - while len(buf) > 0: - yield qname_read_dict[buf.popleft()] - - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/fragment_size_distribution.py --- a/chimerascan/lib/fragment_size_distribution.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,286 +0,0 @@\n-\'\'\'\n-Created on Apr 29, 2011\n-\n-@author: mkiyer\n-\n-chimerascan: chimeric transcript discovery using RNA-seq\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\'\'\'\n-import collections\n-import array\n-import logging\n-import random\n-\n-from chimerascan.bx.intersection import Interval, IntervalTree\n-\n-# local imports\n-from sam import parse_pe_reads, CIGAR_N, CIGAR_S, CIGAR_H, CIGAR_P\n-from feature import GeneFeature\n-\n-# SAM CIGAR flags that indicate skipping, padding, or clipping\n-SKIP_CIGAR_FLAGS = set((CIGAR_N, CIGAR_S, CIGAR_H, CIGAR_P)) \n-\n-def build_exon_trees(genes):\n- trees = collections.defaultdict(lambda: IntervalTree())\n- for g in genes: \n- for e in g.exons:\n- start, end = e\n- trees[g.chrom].insert_interval(Interval(start, end, strand=g.strand))\n- return trees\n-\n-def find_unambiguous_exon_intervals(genes):\n- """\n- returns (chrom, start, end, strand) tuples for exon\n- intervals that are unique and have no overlapping\n- transcripts or exons. \n- """\n- trees = build_exon_trees(genes) \n- for g in genes:\n- for start,end in g.exons:\n- hits = set((hit.start, hit.end, hit.strand) \n- for hit in trees[g.chrom].find(start, end))\n- hits.add((start, end, g.strand))\n- if len(hits) == 1:\n- yield g.chrom, start, end, g.strand\n-\n-def sample_fragment_sizes(bamfh, genes, min_isize, max_isize):\n- """\n- sample fragment size distribution at genes with exons\n- larger than the maximum insert size\n- """\n- # find all exons that are larger than the maximum estimated fragment size\n- exons = set(coord for coord in find_unambiguous_exon_intervals(genes)\n- if (coord[2] - coord[1]) >= max_isize)\n- logging.info("Found %d exons larger than %d" % (len(exons), max_isize))\n- refs = set(bamfh.references)\n- # stats\n- num_reads = 0\n- unmapped = 0\n- ambiguous = 0\n- spliced = 0\n- outside_range = 0\n- count = 0\n- # fetch reads from BAM file at large exons\n- for chrom,start,end,strand in exons:\n- if chrom not in refs:\n- logging.warning("Skipping exon from reference %s not in BAM" % (chrom))\n- continue \n- qname_dict = collections.defaultdict(lambda: [])\n- for r in bamfh.fetch(chrom, start, end):\n- num_reads += 1\n- # ignore unmapped reads, qc fail reads, or unpaired reads\n- if r.is_unmapped or r.is_qcfail or (not r.is_proper_pair):\n- unmapped += 1\n- continue\n- # ignore multi-mapping reads\n- if r.opt(\'NH\') > 1:\n- ambiguous += 1\n- continue\n- # ignore spliced reads\n- has_skip = any(x[0] in SKIP_CIGAR_FLAGS for x in r.cigar)\n- if has_skip:\n- spliced += 1\n- continue \n- # group paired-end reads by read name\n- qname_dict[r.qname].append(abs(r.isize))\n- # keep paired reads with both mates in region\n- for isizes in qname_dict.itervalues():\n- isizes = set(abs(x) for x in isizes)\n- assert len(isizes) == 1\n- isize = isizes.pop()\n- if (min_isize <= isize <= max_isize):\n- count += 1\n- yield isize\n- else:\n- '..b'h, \'\\t\'.join([str(i + self.min_isize), str(x)]) \n-\n- @staticmethod\n- def from_file(fileh):\n- isizes = []\n- counts = []\n- for line in fileh:\n- if line.startswith("#"):\n- continue\n- fields = line.strip().split(\'\\t\')\n- i,x = map(int, fields[0:2])\n- isizes.append(i)\n- counts.append(x)\n- d = InsertSizeDistribution()\n- d.min_isize = isizes[0]\n- d.max_isize = isizes[-1]\n- d.arr = array.array(\'L\', counts) \n- return d\n-\n- @staticmethod\n- def from_random(mean, stdev, min_isize, max_isize, samples=100000):\n- """\n- initialize from a random sample using normal distribution with \n- mean \'mean\' and stdev \'stdev\'\n- """\n- d = InsertSizeDistribution()\n- # implement simple checks\n- assert min_isize < mean < max_isize\n- assert stdev < (max_isize - min_isize)\n- # initialize\n- d.min_isize = min_isize\n- d.max_isize = max_isize\n- d.arr = array.array(\'L\', (0 for x in xrange(min_isize, max_isize+1)))\n- count = 0\n- outside_range = 0\n- while True:\n- if count > samples:\n- break\n- isize = int(round(random.normalvariate(mean, stdev),0))\n- if (min_isize <= isize <= max_isize):\n- # store in array\n- d.arr[isize - min_isize] += 1\n- count += 1\n- else:\n- outside_range += 1\n- return d\n-\n- @staticmethod\n- def from_bam(bamfh, min_isize, max_isize, max_samples=None):\n- # initialize\n- d = InsertSizeDistribution()\n- d.min_isize = min_isize\n- d.max_isize = max_isize\n- d.arr = array.array(\'L\', (0 for x in xrange(min_isize, max_isize+1))) \n- frags = 0 \n- count = 0\n- outside_range = 0\n- unmapped = 0\n- isoforms = 0\n- for pe_reads in parse_pe_reads(bamfh):\n- frags += 1\n- if (max_samples is not None) and (count > max_samples):\n- break\n- # only allow mappings where there is a single\n- # insert size (multiple isoforms are ambiguous)\n- isizes = set() \n- for r in pe_reads[0]:\n- if r.is_unmapped:\n- continue\n- # get insert size\n- isize = r.isize\n- if isize < 0: isize = -isize\n- isizes.add(isize)\n- # insert size must be within range\n- if len(isizes) == 0:\n- unmapped += 1\n- elif len(isizes) > 1:\n- isoforms += 1\n- else:\n- isize = isizes.pop()\n- if (min_isize <= isize <= max_isize):\n- # store in array\n- d.arr[isize - min_isize] += 1\n- count += 1\n- else:\n- outside_range += 1\n- logging.debug("Processed fragments: %d" % (frags))\n- logging.debug("Unique paired frags: %d" % (count))\n- logging.debug("Unmapped: %d" % (unmapped))\n- logging.debug("Ambiguous (isoforms): %d" % (isoforms))\n- logging.debug("Outside range: %d" % (outside_range))\n- return d\n- \n- @staticmethod\n- def from_genome_bam(bamfh, genes, min_isize, max_isize, max_samples=None):\n- # initialize\n- d = InsertSizeDistribution()\n- d.min_isize = min_isize\n- d.max_isize = max_isize\n- d.arr = array.array(\'L\', (0 for x in xrange(min_isize, max_isize+1)))\n- count = 0\n- for isize in sample_fragment_sizes(bamfh, genes, min_isize, max_isize):\n- if (min_isize <= isize <= max_isize):\n- # store in array\n- d.arr[isize - min_isize] += 1\n- count += 1\n- if (max_samples is not None) and (count > max_samples):\n- break\n- return d\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/gene_to_genome.py --- a/chimerascan/lib/gene_to_genome.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,166 +0,0 @@ -''' -Created on Jan 31, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import logging -import collections - -from chimerascan.bx.cluster import ClusterTree -from chimerascan.bx.intersection import Interval, IntervalTree -# local imports -from feature import GeneFeature - -def build_tid_gene_map(bamfh, genefile, rname_prefix=None): - rname_tid_map = dict((rname,tid) for tid,rname in enumerate(bamfh.references)) - rname_prefix = '' if rname_prefix is None else rname_prefix - tid_tx_map = {} - # build gene and genome data structures for fast lookup - for g in GeneFeature.parse(open(genefile)): - # only use genes that are references in the sam file - rname = rname_prefix + g.tx_name - if rname not in rname_tid_map: - continue - tid = rname_tid_map[rname] - tid_tx_map[tid] = g - return tid_tx_map - -def build_tx_name_gene_map(genefile, rname_prefix=None): - rname_prefix = '' if rname_prefix is None else rname_prefix - tx_map = {} - # build gene and genome data structures for fast lookup - for g in GeneFeature.parse(open(genefile)): - tx_map[rname_prefix + g.tx_name] = g - return tx_map - -def build_genome_tx_trees(genefile): - genome_tx_trees = collections.defaultdict(lambda: IntervalTree()) - # build gene and genome data structures for fast lookup - for g in GeneFeature.parse(open(genefile)): - # add gene to interval tree - interval = Interval(g.tx_start, g.tx_end, strand=g.strand, value=g) - genome_tx_trees[g.chrom].insert_interval(interval) - return genome_tx_trees - -def build_transcript_cluster_map(line_iter, rname_prefix=None): - # setup cluster trees - chrom_strand_cluster_trees = \ - collections.defaultdict(lambda: {"+": ClusterTree(0,1), - "-": ClusterTree(0,1)}) - transcripts = [] - index_cluster_map = {} - for transcript in GeneFeature.parse(line_iter): - # insert exons into cluster tree - cluster_tree = chrom_strand_cluster_trees[transcript.chrom][transcript.strand] - i = len(transcripts) - for start,end in transcript.exons: - cluster_tree.insert(start, end, i) - # each transcript is initially in a cluster by itself - index_cluster_map[i] = set([i]) - transcripts.append(transcript) - # extract gene clusters - for strand_cluster_trees in chrom_strand_cluster_trees.itervalues(): - for cluster_tree in strand_cluster_trees.itervalues(): - for start, end, indexes in cluster_tree.getregions(): - # make new cluster by aggregating all existing - # clusters with new indexes - newclust = set(indexes) - for i in indexes: - newclust.update(index_cluster_map[i]) - # map every transcript to the new cluster - for i in newclust: - index_cluster_map[i] = newclust - # enumerate all clusters - rname_prefix = '' if rname_prefix is None else rname_prefix - transcript_cluster_map = {} - for cluster_id, clust in enumerate(index_cluster_map.values()): - for i in clust: - transcript = transcripts[i] - transcript_cluster_map[rname_prefix + transcript.tx_name] = cluster_id - return transcript_cluster_map - -def build_transcript_tid_cluster_map(bamfh, line_iter, rname_prefix=None): - # make the standard cluster map - transcript_cluster_map = build_transcript_cluster_map(line_iter, rname_prefix) - # map reference name to tid - transcript_tid_map = {} - rname_prefix = '' if rname_prefix is None else rname_prefix - for tid,rname in enumerate(bamfh.references): - if rname.startswith(rname_prefix): - transcript_tid_map[rname] = tid - # remake the cluster map - tid_cluster_map = {} - for rname, cluster_id in transcript_cluster_map.iteritems(): - if rname not in transcript_tid_map: - continue - tid = transcript_tid_map[rname] - tid_cluster_map[tid] = cluster_id - return tid_cluster_map - -def build_transcript_genome_map(line_iter, rname_prefix=None): - # create arrays to map genes in bed file to genome - rname_prefix = '' if rname_prefix is None else rname_prefix - transcript_genome_map = {} - for g in GeneFeature.parse(line_iter): - rname = rname_prefix + g.tx_name - strand = 1 if g.strand == '-' else 0 - exon_vectors = [(start, end) for start, end in g.exons] - if strand: - exon_vectors.reverse() - if rname in transcript_genome_map: - logging.error("Duplicate references %s found in bed file" % (rname)) - transcript_genome_map[rname] = (g.chrom, strand, exon_vectors) - return transcript_genome_map - -def build_transcript_tid_genome_map(bamfh, line_iter, rname_prefix=None): - # make the standard map - transcript_genome_map = build_transcript_genome_map(line_iter, rname_prefix) - # map reference name to tid - rname_prefix = '' if rname_prefix is None else rname_prefix - transcript_tid_map = {} - for tid,rname in enumerate(bamfh.references): - if rname.startswith(rname_prefix): - transcript_tid_map[rname] = tid - # remap using tid as key - tid_genome_map = {} - for rname, coords in transcript_genome_map.iteritems(): - if rname not in transcript_tid_map: - continue - tid = transcript_tid_map[rname] - tid_genome_map[tid] = coords - return tid_genome_map - -def transcript_to_genome_pos(rname, pos, transcript_genome_map): - ''' - translate gene 'rname' position 'gene_pos' to genomic - coordinates. returns a 3-tuple with (chrom, strand, pos) - ''' - chrom, strand, intervals = transcript_genome_map[rname] - offset = 0 - for start, end, in intervals: - exon_size = end - start - if pos < offset + exon_size: - if strand: - return chrom, strand, start + exon_size - (pos - offset) - 1 - else: - return chrom, strand, start + (pos - offset) - #print start, end, offset, pos - offset += exon_size - return None \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/gtf.py --- a/chimerascan/lib/gtf.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,141 +0,0 @@ -''' -Created on Nov 2, 2010 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import subprocess -import os - -GTF_EMPTY_FIELD = '.' -GTF_ATTR_SEP = ';' -GTF_ATTR_TAGVALUE_SEP = ' ' - -def sort_gtf(filename, output_file): - args = ["sort", "-k1,1", "-k4,4n", "-k3,3r", filename] - myenv = os.environ.copy() - myenv["LC_ALL"] = "C" - subprocess.call(args, stdout=open(output_file, "w"), env=myenv) - -def window_overlap(a, b): - if a[0] != b[0]: - return False - return (a[1] <= b[2]) and (b[1] <= a[2]) - -def separate_loci(feature_iter): - try: - # initialize window - window = [feature_iter.next()] - window_range = (window[0].seqid, window[0].start, window[0].end) - # separate into loci - for feature in feature_iter: - # check if next transcript is outside current window - interval = (feature.seqid, feature.start, feature.end) - if not window_overlap(interval, window_range): - # yield current window - yield window - # reset window - window = [feature] - window_range = (feature.seqid, feature.start, feature.end) - else: - # add transcript to window - window.append(feature) - window_range = (feature.seqid, - min(window_range[1], feature.start), - max(window_range[2], feature.end)) - except StopIteration: - pass - # yield last window - if len(window) > 0: - yield window - -class GTFFeature(object): - ''' - 1. seqname - The name of the sequence. Must be a chromosome or scaffold. - 2. source - The program that generated this feature. - 3. feature - The name of this type of feature. Some examples of standard feature types are "CDS", "start_codon", "stop_codon", and "exon". - 4. start - The starting position of the feature in the sequence. The first base is numbered 1. - 5. end - The ending position of the feature (inclusive). - 6. score - A score between 0 and 1000. If the track line useScore attribute is set to 1 for this annotation data set, the score value will determine the level of gray in which this feature is displayed (higher numbers = darker gray). If there is no score value, enter ".". - 7. strand - Valid entries include '+', '-', or '.' (for don't know/don't care). - 8. phase - If the feature is a coding exon, frame should be a number between 0-2 that represents the reading frame of the first base. If the feature is not a coding exon, the value should be '.'. - - chr1 Cufflinks transcript 136546 137059 1000 . . gene_id "VCAP_SHEZH2.657699"; transcript_id "VCAP_SHEZH2.657699.1"; FPKM "100.7219943204"; frac "1.000000"; conf_lo "80.649925"; conf_hi "120.794064"; cov "2.198209"; - ''' - __slots__ = ('seqid', 'source', 'feature_type', 'start', 'end', 'score', 'strand', 'phase', 'attrs') - - def __str__(self): - line = [self.seqid, - self.source, - self.feature_type, - # convert to 1-based intervals - str(self.start + 1), - str(self.end), - str(self.score), - str(self.strand), - self.phase] - attr_str = ' '.join('%s "%s";' % (k, v) for (k, v) in self.attrs.iteritems()) - line.append(attr_str) - return '\t'.join(line) - - @staticmethod - def from_string(line, attr_defs=None): - f = GTFFeature() - # read the GTF line - fields = line.strip().split('\t') - f.seqid = fields[0] - f.source = fields[1] - f.feature_type = fields[2] - # convert from 1-based (inclusive) to 0-based (exclusive) intervals - f.start = int(fields[3])-1 - f.end = int(fields[4]) - f.score = 0 if (fields[5] == '.') else float(fields[5]) - strand = fields[6] - if not (strand == '+' or strand == '-'): - strand = GTF_EMPTY_FIELD - f.strand = strand - f.phase = fields[7] - attrs = {} - if fields[8] != GTF_EMPTY_FIELD: - attr_strings = fields[8].split(GTF_ATTR_SEP) - for a in attr_strings: - a = a.strip() - if len(a) == 0: - continue - tag, value = a.split(GTF_ATTR_TAGVALUE_SEP, 1) - # remove quotes - value = value.split('"')[1] - # apply parsing function - if (attr_defs != None) and (tag in attr_defs) and (attr_defs[tag] != None): - value = attr_defs[tag](value) - attrs[tag] = value - f.attrs = attrs - return f - - @staticmethod - def parse(line_iter, attr_defs=None): - for line in line_iter: - # read the GTF line - if not line: - continue - if not line.strip(): - continue - if line.startswith("#"): - continue - yield GTFFeature.from_string(line, attr_defs) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/sam.py --- a/chimerascan/lib/sam.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,224 +0,0 @@ -''' -Created on Jun 2, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import operator - -from chimerascan import pysam -from seq import DNA_reverse_complement - -# -# constants used for CIGAR alignments -# -CIGAR_M = 0 #match Alignment match (can be a sequence match or mismatch) -CIGAR_I = 1 #insertion Insertion to the reference -CIGAR_D = 2 #deletion Deletion from the reference -CIGAR_N = 3 #skip Skipped region from the reference -CIGAR_S = 4 #softclip Soft clip on the read (clipped sequence present in <seq>) -CIGAR_H = 5 #hardclip Hard clip on the read (clipped sequence NOT present in <seq>) -CIGAR_P = 6 #padding Padding (silent deletion from the padded reference sequence) - -def parse_reads_by_qname(samfh): - """ - generator function to parse and return lists of - reads that share the same qname - """ - reads = [] - for read in samfh: - if len(reads) > 0 and read.qname != reads[-1].qname: - yield reads - reads = [] - reads.append(read) - if len(reads) > 0: - yield reads - -def parse_pe_reads(bamfh): - pe_reads = ([], []) - # reads must be sorted by qname - num_reads = 0 - prev_qname = None - for read in bamfh: - # get read attributes - qname = read.qname - readnum = 1 if read.is_read2 else 0 - # if query name changes we have completely finished - # the fragment and can reset the read data - if num_reads > 0 and qname != prev_qname: - yield pe_reads - # reset state variables - pe_reads = ([], []) - num_reads = 0 - pe_reads[readnum].append(read) - prev_qname = qname - num_reads += 1 - if num_reads > 0: - yield pe_reads - -def parse_unpaired_pe_reads(bamfh): - """ - parses alignments that were aligned in single read mode - and hence all hits are labeled as 'read1' and lack mate - information. instead the read1 read2 information is - attached to the 'qname' field - """ - pe_reads = ([], []) - num_reads = 0 - prev_qname = None - for read in bamfh: - # extract read1/2 from qname - readnum = int(read.qname[-1]) - if readnum == 1: - read.is_read1 = True - mate = 0 - elif readnum == 2: - mate = 1 - read.is_read2 = True - # reconstitute correct qname - qname = read.qname[:-2] - read.qname = qname - # if query name changes we have completely finished - # the fragment and can reset the read data - if num_reads > 0 and qname != prev_qname: - yield pe_reads - # reset state variables - pe_reads = ([], []) - num_reads = 0 - pe_reads[mate].append(read) - prev_qname = qname - num_reads += 1 - if num_reads > 0: - yield pe_reads - -def select_best_mismatch_strata(reads, mismatch_tolerance=0): - if len(reads) == 0: - return [] - # sort reads by number of mismatches - mapped_reads = [] - unmapped_reads = [] - for r in reads: - if r.is_unmapped: - unmapped_reads.append(r) - else: - mapped_reads.append((r.opt('NM'), r)) - if len(mapped_reads) == 0: - return unmapped_reads - sorted_reads = sorted(mapped_reads, key=operator.itemgetter(0)) - best_nm = sorted_reads[0][0] - worst_nm = sorted_reads[-1][0] - sorted_reads.extend((worst_nm+1, r) for r in unmapped_reads) - # choose reads within a certain mismatch tolerance - best_reads = [] - for mismatches, r in sorted_reads: - if mismatches > (best_nm + mismatch_tolerance): - break - best_reads.append(r) - return best_reads - -def copy_read(r): - a = pysam.AlignedRead() - a.qname = r.qname - a.seq = r.seq - a.flag = r.flag - a.rname = r.rname - a.pos = r.pos - a.mapq = r.mapq - a.cigar = r.cigar - a.mrnm = r.mrnm - a.mpos = r.mpos - a.isize = r.isize - a.qual = r.qual - a.tags = r.tags - return a - -def soft_pad_read(fq, r): - """ - 'fq' is the fastq record - 'r' in the AlignedRead SAM read - """ - # make sequence soft clipped - ext_length = len(fq.seq) - len(r.seq) - cigar_softclip = [(CIGAR_S, ext_length)] - cigar = r.cigar - # reconstitute full length sequence in read - if r.is_reverse: - seq = DNA_reverse_complement(fq.seq) - qual = fq.qual[::-1] - if (cigar is not None) and (ext_length > 0): - cigar = cigar_softclip + cigar - else: - seq = fq.seq - qual = fq.qual - if (cigar is not None) and (ext_length > 0): - cigar = cigar + cigar_softclip - # replace read field - r.seq = seq - r.qual = qual - r.cigar = cigar - -def pair_reads(r1, r2, tags=None): - ''' - fill in paired-end fields in SAM record - ''' - if tags is None: - tags = [] - # convert read1 to paired-end - r1.is_paired = True - r1.is_proper_pair = True - r1.is_read1 = True - r1.mate_is_reverse = r2.is_reverse - r1.mate_is_unmapped = r2.is_unmapped - r1.mpos = r2.pos - r1.mrnm = r2.rname - r1.tags = r1.tags + tags - # convert read2 to paired-end - r2.is_paired = True - r2.is_proper_pair = True - r2.is_read2 = True - r2.mate_is_reverse = r1.is_reverse - r2.mate_is_unmapped = r1.is_unmapped - r2.mpos = r1.pos - r2.mrnm = r1.rname - r2.tags = r2.tags + tags - # compute insert size - if r1.rname != r2.rname: - r1.isize = 0 - r2.isize = 0 - elif r1.pos > r2.pos: - isize = r1.aend - r2.pos - r1.isize = -isize - r2.isize = isize - else: - isize = r2.aend - r1.pos - r1.isize = isize - r2.isize = -isize - -def get_clipped_interval(r): - cigar = r.cigar - padstart, padend = r.pos, r.aend - if len(cigar) > 1: - if (cigar[0][0] == CIGAR_S or - cigar[0][0] == CIGAR_H): - padstart -= cigar[0][1] - elif (cigar[-1][0] == CIGAR_S or - cigar[-1][0] == CIGAR_H): - padend += cigar[-1][1] - return padstart, padend - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/seq.py --- a/chimerascan/lib/seq.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,149 +0,0 @@ -''' -Created on Jan 5, 2011 - -@author: Dan Blankenberg - -Code from the Galaxy project (http://galaxy.psu.edu) -Contains methods to transform sequence strings -''' -import string -from math import log10 -from string import maketrans - -# Quality score formats -SANGER_FORMAT = "sanger" -SOLEXA_FORMAT = "solexa" -ILLUMINA_FORMAT = "illumina" -FASTQ_QUAL_FORMATS = [SANGER_FORMAT, SOLEXA_FORMAT, ILLUMINA_FORMAT] - -#Translation table for reverse Complement, with ambiguity codes -DNA_COMPLEMENT = string.maketrans( "ACGTRYKMBDHVacgtrykmbdhv", "TGCAYRMKVHDBtgcayrmkvhdb" ) -RNA_COMPLEMENT = string.maketrans( "ACGURYKMBDHVacgurykmbdhv", "UGCAYRMKVHDBugcayrmkvhdb" ) -#Translation table for DNA <--> RNA -DNA_TO_RNA = string.maketrans( "Tt", "Uu" ) -RNA_TO_DNA = string.maketrans( "Uu", "Tt" ) - -def DNA_complement( sequence ): - '''complement DNA sequence string''' - return sequence.translate( DNA_COMPLEMENT ) -def DNA_reverse_complement( sequence ): - '''returns the reverse complement of the sequence''' - return DNA_complement(sequence[::-1]) -def to_DNA( sequence ): - return sequence.translate( DNA_TO_RNA ) -#complement RNA sequence string -def RNA_complement( sequence ): - return sequence.translate( RNA_COMPLEMENT ) -def RNA_reverse_complement( self, sequence ): - return RNA_complement( sequence[::-1] ) -def to_RNA( sequence ): - return sequence.translate( RNA_TO_DNA ) - -def get_solexa_qual_conversion_table(): - """ - return a translation table that can be used by str.translate() for - converting solexa to sanger quality scores - """ - offset = 64 - conv_table = ['!'] * 256 - conv_table[offset:] = "I" * (256-offset) - for solq in xrange(-5, 40): - phredq = 10*log10(1 + 10**(solq/10.0)) - phredchr = chr(int(round(33 + phredq))) - conv_table[offset + solq] = phredchr - conv_string = ''.join(conv_table) - return maketrans(''.join(map(chr, range(256))), conv_string) - -def get_illumina_qual_conversion_table(): - """Illumina 1.3+ format can encode a Phred quality score from 0 to 62 - using ASCII 64 to 126 (although in raw read data Phred scores from 0 - to 40 only are expected). - """ - offset = 64 - conv_table = ['!'] * 256 - for x in xrange(0, 62): - conv_table[offset+x] = chr(33 + x) - conv_table[offset+40:] = "I" * (256-(offset+40)) - conv_string = ''.join(conv_table) - return maketrans(''.join(map(chr, range(256))), conv_string) - -def get_sanger_qual_conversion_table(): - offset = 33 - tbl = map(chr, range(256)) - tbl[:offset] = "!" * offset - tbl[offset+40:] = "I" * (256-(offset+40)) - return maketrans(''.join(map(chr, range(256))), ''.join(tbl)) - -def get_qual_conversion_func(qual_format): - conv_tables = {SANGER_FORMAT: get_sanger_qual_conversion_table(), - ILLUMINA_FORMAT: get_illumina_qual_conversion_table(), - SOLEXA_FORMAT: get_solexa_qual_conversion_table()} - tbl = conv_tables[qual_format] - return lambda q: q.translate(tbl) - -class FASTQRecord: - __slots__ = ("qname", "seq", "qual", "readnum") - def __init__(self, qname, seq, qual, readnum): - self.qname = qname - self.seq = seq - self.qual = qual - self.readnum = readnum - - def to_string(self): - return ("@%s/%d\n%s\n+\n%s" % - (self.qname, self.readnum, self.seq, self.qual)) - -def parse_fastq_record(line_iter, - convert_quals=False, - qual_format=SANGER_FORMAT): - qual_func = get_qual_conversion_func(qual_format) - try: - qname = line_iter.next().rstrip()[1:] - readnum = int(qname[-1]) - qname = qname[:-2] - seq = line_iter.next().rstrip() - line_iter.next() - qual = line_iter.next().rstrip() - if convert_quals: - qual = qual_func(qual) - yield FASTQRecord(qname, seq, qual, readnum) - while True: - # qname - qname = line_iter.next().rstrip()[1:] - readnum = int(qname[-1]) - qname = qname[:-2] - # seq - seq = line_iter.next().rstrip() - # qname again (skip) - line_iter.next() - # qual - qual = line_iter.next().rstrip() - if convert_quals: - qual = qual_func(qual) - yield FASTQRecord(qname, seq, qual, readnum) - except StopIteration: - pass - -def calc_homology(seq1, seq2, num_mismatches): - smallest_len = min(len(seq1), len(seq2)) - mm = 0 - i = 0 - for i in xrange(smallest_len): - if seq1[i] != seq2[i]: - mm += 1 - if mm > num_mismatches: - return i - return i + 1 - -BASES_PER_LINE = 50 -def split_seq(seq, chars_per_line=BASES_PER_LINE): - pos = 0 - newseq = [] - while pos < len(seq): - if pos + chars_per_line > len(seq): - endpos = len(seq) - else: - endpos = pos + chars_per_line - newseq.append(seq[pos:endpos]) - pos = endpos - return '\n'.join(newseq) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/lib/stats.py --- a/chimerascan/lib/stats.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,317 +0,0 @@\n-\'\'\'\n-Created on Jan 30, 2011\n-\n-@author: mkiyer\n-\'\'\'\n-import math\n-from math import log\n-from collections import defaultdict\n-\n-def comb(N,k):\n- """\n- This function was taken from scipy 0.9.0rc1\n- \n- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \n- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT \n- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS \n- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE \n- COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, \n- INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR \n- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) \n- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, \n- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING \n- IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE \n- POSSIBILITY OF SUCH DAMAGE.\n- \n- The number of combinations of N things taken k at a time.\n- This is often expressed as "N choose k".\n-\n- Parameters\n- ----------\n- N : int, array\n- Number of things.\n- k : int, array\n- Number of elements taken.\n-\n- Returns\n- -------\n- val : int, array\n- The total number of combinations.\n-\n- Notes\n- -----\n- - Array arguments accepted only for exact=0 case.\n- - If k > N, N < 0, or k < 0, then a 0 is returned.\n-\n- Examples\n- --------\n- >>> k = np.array([3, 4])\n- >>> n = np.array([10, 10])\n- >>> comb(n, k, exact=False)\n- array([ 120., 210.])\n- >>> comb(10, 3, exact=True)\n- 120L\n- """\n- if (k > N) or (N < 0) or (k < 0):\n- return 0L\n- val = 1L\n- for j in xrange(min(k, N-k)):\n- val = (val*(N-j))//(j+1)\n- return val\n-\n-def normal_pdf(x, m, v):\n- return 1.0/math.sqrt(2*math.pi*v) * math.exp(-(x-m)**2/(2*v))\n-\n-def binomial_pdf(p, n, k):\n- if n < 100:\n- return comb(n, k) * p**k * p**(n-k) # Fall back to your current method\n- return normal_pdf(k, n*p, n*p*(1.0-p))\n-\n-def binomial_cdf(p, n, k):\n- return sum(binomial_pdf(p,n,x) for x in xrange(k+1))\n-\n-def _interpolate(a, b, fraction):\n- """\n- This function was taken from scipy 0.9.0rc1\n- \n- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \n- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT \n- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS \n- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE \n- COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, \n- INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR \n- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) \n- HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, \n- STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING \n- IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE \n- POSSIBILITY OF SUCH DAMAGE.\n-\n- Returns the point at the given fraction between a and b, where\n- \'fraction\' must be between 0 and 1.\n- """\n- return a + (b - a)*fraction;\n-\n-def scoreatpercentile(values, p):\n- """\n- This function was taken from scipy 0.9.0rc1\n- \n- THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \n- "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT \n- LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS \n- FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE \n- COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, \n- INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES \n- (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR \n- SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) \n'..b'for x in arr\n- if x > 0)\n- return kldiv\n-\n-def poisson(m):\n- \'\'\'\n- courtesy (http://telliott99.blogspot.com/2010/02/replot-poisson-example-with-python.html)\n- \'\'\'\n- def f(k):\n- e = math.e**(-m)\n- f = math.factorial(k)\n- g = m**k\n- return g*e/f\n- return f\n-\n-def std(a):\n- # find the mean\n- n = len(a)\n- mean = mean(a)\n- # find the standard deviation\n- std = sum((x - mean)**2 for x in a)\n- std = (std / float(n-1))**0.5\n- return std\n-\n-def normmeanCI(p, xbar, sd, n):\n- """\n- Computes a p x 100 CI for the given arguments\n- p - confidence coefficient, common values are 0.99, 0.95, 0.90\n- xbar - sample point estimate of unknown pop. mean.\n- sd - standard deviation\n- n - sample size\n- """\n- se = sd / (n ** 0.5)\n- alphadiv2 = (1.0- p)/2.0\n- z2 = stat.norm. ppf(1-alphadiv2)\n- a = xbar - z2 * se\n- b = xbar + z2 * se\n- return (a, b)\n-\n-def median(a):\n- b = sorted(a)\n- ind,odd = divmod(len(b),2)\n- median = (b[ind] + b[ind+odd]) / 2.0\n-\n-def mean(a):\n- return sum(a)/float(len(a))\n-\n-class EmpiricalCdf3D(object):\n- \n- def prob(self, x, y, z):\n- if self.n == 0:\n- return 0.0\n- # find prob(X = x) by summing all y\'s and z\'a\n- nx = 0\n- ydict = self.D[x]\n- for zdict in ydict.itervalues(): \n- nz_given_y = sum(zdict.itervalues())\n- nx += nz_given_y\n- if nx == 0:\n- return 0.0\n- px = nx / float(self.n) \n- # find prob(Y = y | X = x)\n- ny_given_x = sum(self.D[x][y].itervalues())\n- if ny_given_x == 0:\n- return 0.0\n- py_given_x = ny_given_x / float(nx)\n- # find prob(Z = z | Y=y, X=x)\n- nz_given_xy = self.D[x][y][z]\n- if nz_given_xy == 0:\n- return 0.0\n- pz_given_xy = nz_given_xy / float(ny_given_x) \n- # multiply together\n- return pz_given_xy * py_given_x * px\n-\n- def _count(self, x, y, z):\n- total = 0\n- xkeys = sorted(self.D.iterkeys())\n- for xval in xkeys:\n- if xval > x:\n- break\n- ykeys = sorted(self.D[xval].iterkeys())\n- for yval in ykeys:\n- if yval > y:\n- break\n- zkeys = sorted(self.D[xval][yval].iterkeys())\n- for zval in zkeys:\n- if zval > z:\n- break\n- total += self.D[xval][yval][zval]\n- return total\n-\n- def __init__(self, data_iter):\n- # use dict as sparse matrix for now\n- self.D = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0)))\n- self.n = 0\n- for x,y,z in data_iter:\n- self.n += 1\n- self.D[x][y][z] += 1\n- # turn into dicts\n- for xval, ydict in self.D.iteritems():\n- self.D[xval] = dict(ydict)\n- for yval, zdict in ydict.iteritems():\n- self.D[xval][yval] = dict(zdict)\n- self.CDF = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0))) \n- # turn into cumulative counts\n- xkeys = sorted(self.D.iterkeys())\n- for xval in xkeys: \n- ykeys = sorted(self.D[xval].iterkeys())\n- for yval in ykeys:\n- zkeys = sorted(self.D[xval][yval].iterkeys())\n- for zval in zkeys:\n- c = self._count(xval, yval, zval)\n- self.CDF[xval][yval][zval] = c \n-\n- def __call__(self, x, y, z):\n- return self.CDF[x][y][z] / float(self.n)\n-\n-if __name__ == \'__main__\':\n- import random\n- X = [random.randrange(0, 5) for x in xrange(100)]\n- Y = [random.randrange(0, 5) for y in xrange(100)]\n- Z = [random.randrange(0, 5) for z in xrange(100)]\n- import itertools\n- x = EmpiricalCdf3D(itertools.izip(X,Y,Z))\n- print x.n \n- print x(4, 4, 4)\n-\n- \n-\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/align_bowtie.py --- a/chimerascan/pipeline/align_bowtie.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,282 +0,0 @@\n-\'\'\'\n-Created on Jun 1, 2011\n-\n-@author: mkiyer\n-\n-chimerascan: chimeric transcript discovery using RNA-seq\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\'\'\'\n-import sys\n-import os\n-import logging\n-import subprocess\n-\n-from chimerascan.lib.base import LibraryTypes\n-from chimerascan.lib.seq import SANGER_FORMAT, SOLEXA_FORMAT, ILLUMINA_FORMAT\n-from chimerascan.lib import config\n-\n-translate_quals = {SOLEXA_FORMAT: \'solexa-quals\',\n- ILLUMINA_FORMAT: \'solexa1.3-quals\',\n- SANGER_FORMAT: \'phred33-quals\'}\n-\n-def translate_library_type(library_type):\n- """\n- returns the bowtie library type option \'--fr\' or \'--ff\' corresponding\n- to the first two characters of the library type string\n- """\n- return library_type[0:2]\n-\n-_sam2bam_script = os.path.join(os.path.dirname(__file__), "sam2bam.py")\n-_fastq_trim_script = os.path.join(os.path.dirname(__file__), "fastq_merge_trim.py")\n-\n-def align_pe(fastq_files, \n- bowtie_index,\n- output_bam_file, \n- unaligned_fastq_param=None,\n- maxmultimap_fastq_param=None,\n- min_fragment_length=0,\n- max_fragment_length=1000,\n- trim5=0,\n- trim3=0,\n- library_type=LibraryTypes.FR_UNSTRANDED,\n- num_processors=1, \n- quals=SANGER_FORMAT,\n- multihits=100, \n- mismatches=2, \n- bowtie_bin="bowtie", \n- bowtie_args=None,\n- log_file=None,\n- keep_unmapped=False):\n- args = [bowtie_bin, "-q", "-S", \n- "-p", str(num_processors),\n- "--%s" % translate_quals[quals],\n- "-k", str(multihits),\n- "-m", str(multihits),\n- "-v", str(mismatches),\n- "--minins", min_fragment_length,\n- "--maxins", max_fragment_length,\n- "--trim5", trim5,\n- "--trim3", trim3,\n- "--%s" % translate_library_type(library_type)]\n- if unaligned_fastq_param is not None:\n- args.extend(["--un", unaligned_fastq_param])\n- if maxmultimap_fastq_param is not None:\n- args.extend(["--max", maxmultimap_fastq_param]) \n- if bowtie_args is not None: \n- args.extend(bowtie_args.split())\n- args += [bowtie_index, \n- "-1", fastq_files[0],\n- "-2", fastq_files[1]]\n- args = map(str, args)\n- logging.debug("Bowtie alignment args: %s" % (\' \'.join(args)))\n- # setup logging\n- if log_file is not None:\n- logfh = open(log_file, "w")\n- else:\n- logfh = None\n- aln_p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=logfh)\n- # pipe the bowtie SAM output to a filter that writes BAM format\n- args = [sys.executable, _sam2bam_script, \n- "--multihits", str(multihits),\n- "--quals", quals]\n- if keep_unmapped:\n- args.append("--un")\n- args.extend([output_bam_file, "-"])\n- args.extend(fastq_files)\n- logging.debug("SAM to BAM converter args: %s" % (\' \'.join(args)))\n- retcode = subprocess.call(args, stdin=aln_p.stdout, stderr=logfh) \n- if logfh is not None:\n- logfh.close()\n- if retcode != 0:\n- logging.error("SAM to BAM conversion script failed")\n- aln_p.terminate()\n- # cleanup output file\n- if os.path.exists(output_bam_file):\n- '..b', _sam2bam_script, \n- "--multihits", str(multihits),\n- "--quals", quals]\n- if keep_unmapped:\n- args.append("--un")\n- args.extend([output_bam_file, "-"])\n- args.append(fastq_file) \n- logging.debug("SAM to BAM converter args: %s" % (\' \'.join(args)))\n- fix_p = subprocess.Popen(args, stdin=aln_p.stdout, stderr=logfh)\n- # wait for processes to complete\n- retcode1 = fix_p.wait()\n- if retcode1 != 0:\n- logging.error("SAM to BAM conversion script failed")\n- # kill alignment process\n- aln_p.kill()\n- # cleanup output file\n- if os.path.exists(output_bam_file):\n- os.remove(output_bam_file)\n- # end logging\n- if logfh is not None:\n- logfh.close()\n- return config.JOB_ERROR\n- retcode2 = aln_p.wait()\n- # end logging\n- if logfh is not None:\n- logfh.close()\n- if retcode2 != 0:\n- logging.error("Alignment process failed")\n- # cleanup output file\n- if os.path.exists(output_bam_file):\n- os.remove(output_bam_file)\n- return config.JOB_ERROR\n- return config.JOB_SUCCESS\n-\n-\n-def trim_align_pe_sr(fastq_files,\n- bowtie_index,\n- output_bam_file,\n- unaligned_fastq_param=None,\n- maxmultimap_fastq_param=None,\n- trim5=0,\n- library_type=LibraryTypes.FR_UNSTRANDED,\n- num_processors=1, \n- quals=SANGER_FORMAT,\n- multihits=100, \n- mismatches=2, \n- bowtie_bin="bowtie", \n- bowtie_args=None,\n- log_file=None,\n- segment_length=25,\n- keep_unmapped=False):\n- # setup logging\n- if log_file is not None:\n- logfh = open(log_file, "w")\n- else:\n- logfh = None\n- #\n- # Merge paired-end reads into single fastq file\n- #\n- args = [sys.executable, _fastq_trim_script, \n- "--trim5", str(trim5), \n- "--segment-length", str(segment_length)]\n- args.extend(fastq_files)\n- args.append("-")\n- logging.debug("FASTQ trimming args: %s" % (\' \'.join(args)))\n- trim_p = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=logfh)\n- #\n- # Align the trimmed reads\n- #\n- args = [bowtie_bin, "-q", "-S", \n- "-p", str(num_processors),\n- "--tryhard",\n- "--%s" % translate_quals[quals],\n- "-k", str(multihits),\n- "-m", str(multihits),\n- "-v", str(mismatches),\n- "--%s" % translate_library_type(library_type)]\n- if unaligned_fastq_param is not None:\n- args.extend(["--un", unaligned_fastq_param])\n- if maxmultimap_fastq_param is not None:\n- args.extend(["--max", maxmultimap_fastq_param]) \n- if bowtie_args is not None: \n- args.extend(bowtie_args.split())\n- args += [bowtie_index, "-"]\n- logging.debug("Alignment args: %s" % (\' \'.join(args)))\n- aln_p = subprocess.Popen(args, stdin=trim_p.stdout, \n- stdout=subprocess.PIPE,\n- stderr=logfh)\n- #\n- # Fix alignment ordering and convert to BAM, also extend sequences\n- # back to full length by adding padding to CIGAR string\n- #\n- args = [sys.executable, _sam2bam_script, \n- "--multihits", str(multihits),\n- "--quals", quals,\n- "--pesr", \n- "--softclip"] \n- if keep_unmapped:\n- args.append("--un")\n- args.extend([output_bam_file, "-"])\n- args.extend(fastq_files)\n- logging.debug("SAM to BAM converter args: %s" % (\' \'.join(args)))\n- fix_p = subprocess.Popen(args, stdin=aln_p.stdout, stderr=logfh)\n- # wait for processes to complete\n- fix_p.wait()\n- aln_p.wait()\n- trim_p.wait()\n- # end logging\n- if logfh is not None:\n- logfh.close()\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/chimeras_to_breakpoints.py --- a/chimerascan/pipeline/chimeras_to_breakpoints.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,74 +0,0 @@ -''' -Created on Jun 11, 2011 - -@author: mkiyer -''' -import logging -import os -import collections - -from chimerascan import pysam -from chimerascan.lib import config -from chimerascan.lib.chimera import Chimera -from chimerascan.lib.batch_sort import batch_sort -from chimerascan.lib.seq import split_seq - -def chimeras_to_breakpoints(input_file, breakpoint_sorted_chimera_file, - breakpoint_map_file, breakpoint_fasta_file, - tmp_dir): - # sort chimera file by breakpoint name - def sortfunc(line): - fields = line.strip().split('\t') - return fields[Chimera.BREAKPOINT_NAME_FIELD] - tempdirs = [tmp_dir] - batch_sort(input=input_file, - output=breakpoint_sorted_chimera_file, - key=sortfunc, - buffer_size=32000, - tempdirs=tempdirs) - # parse and build breakpoint -> chimera map - fastafh = open(breakpoint_fasta_file, "w") - mapfh = open(breakpoint_map_file, "w") - prev_breakpoint_name = None - prev_seq = None - chimera_names = set() - for c in Chimera.parse(open(breakpoint_sorted_chimera_file)): - seq = c.breakpoint_seq_5p + c.breakpoint_seq_3p - if c.breakpoint_name != prev_breakpoint_name: - if len(chimera_names) > 0: - # write to fasta - print >>fastafh, ">%s\n%s" % (prev_breakpoint_name, split_seq(prev_seq)) - # write to map file - print >>mapfh, "%s\t%s\t%s" % (prev_breakpoint_name, - prev_seq, - ",".join(sorted(chimera_names))) - chimera_names = set() - prev_seq = seq - prev_breakpoint_name = c.breakpoint_name - chimera_names.add(c.name) - if len(chimera_names) > 0: - print >>fastafh, ">%s\n%s" % (prev_breakpoint_name, split_seq(prev_seq)) - print >>mapfh, "%s\t%s\t%s" % (prev_breakpoint_name, prev_seq, ",".join(chimera_names)) - fastafh.close() - mapfh.close() - - -def main(): - from optparse import OptionParser - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog [options] <chimeras.bedpe> " - "<sorted_chimeras.bedpe> " - "<breakpoints.txt> <breakpoints.fa> <tmp_dir>") - options, args = parser.parse_args() - input_file = args[0] - breakpoint_sorted_chimera_file = args[1] - breakpoint_map_file = args[2] - breakpoint_fasta_file = args[3] - tmp_dir = args[3] - chimeras_to_breakpoints(input_file, breakpoint_sorted_chimera_file, - breakpoint_map_file, breakpoint_fasta_file, tmp_dir) - - -if __name__ == '__main__': - main() |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/discordant_reads_to_bedpe.py --- a/chimerascan/pipeline/discordant_reads_to_bedpe.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,109 +0,0 @@ -''' -Created on Jul 21, 2011 - -@author: mkiyer -''' -import logging -import os -import sys - -from chimerascan import pysam -from chimerascan.lib import config -from chimerascan.lib.chimera import DiscordantTags, DISCORDANT_TAG_NAME, \ - OrientationTags, ORIENTATION_TAG_NAME, DiscordantRead -from chimerascan.lib.gene_to_genome import build_tid_gene_map -from chimerascan.lib.batch_sort import batch_sort - -def parse_pairs(bamfh): - bam_iter = iter(bamfh) - try: - while True: - r1 = bam_iter.next() - r2 = bam_iter.next() - yield r1,r2 - except StopIteration: - pass - -def parse_gene_discordant_reads(bamfh): - """ - return tuples of (5',3') reads that both align to transcripts - """ - for r1,r2 in parse_pairs(bamfh): - # TODO: - # for now we are only going to deal with gene-gene - # chimeras and leave other chimeras for study at a - # later time - dr1 = r1.opt(DISCORDANT_TAG_NAME) - dr2 = r2.opt(DISCORDANT_TAG_NAME) - if (dr1 != DiscordantTags.DISCORDANT_GENE or - dr2 != DiscordantTags.DISCORDANT_GENE): - continue - # organize key in 5' to 3' order - or1 = r1.opt(ORIENTATION_TAG_NAME) - or2 = r2.opt(ORIENTATION_TAG_NAME) - assert or1 != or2 - if or1 == OrientationTags.FIVEPRIME: - pair = (r1,r2) - else: - pair = (r2,r1) - yield pair - -def discordant_reads_to_bedpe(index_dir, input_bam_file, output_file): - # open BAM alignment file - bamfh = pysam.Samfile(input_bam_file, "rb") - # build a lookup table to get genomic intervals from transcripts - logging.debug("Reading gene information") - gene_file = os.path.join(index_dir, config.GENE_FEATURE_FILE) - tid_gene_map = build_tid_gene_map(bamfh, gene_file, - rname_prefix=config.GENE_REF_PREFIX) - outfh = open(output_file, "w") - logging.debug("Converting BAM to BEDPE format") - for r5p,r3p in parse_gene_discordant_reads(bamfh): - # store pertinent read information in lightweight structure called - # DiscordantRead object. this departs from SAM format into a - # custom read format - dr5p = DiscordantRead.from_read(r5p) - dr3p = DiscordantRead.from_read(r3p) - # get gene information - tx5p = tid_gene_map[r5p.rname] - tx3p = tid_gene_map[r3p.rname] - # write bedpe format - fields = [tx5p.tx_name, r5p.pos, r5p.aend, - tx3p.tx_name, r3p.pos, r3p.aend, - r5p.qname, # read name - 0, # score - tx5p.strand, tx3p.strand, # strand 1, strand 2 - ] - fields.append('|'.join(map(str, dr5p.to_list()))) - fields.append('|'.join(map(str, dr3p.to_list()))) - print >>outfh, '\t'.join(map(str, fields)) - outfh.close() - -def sort_bedpe(input_file, output_file, tmp_dir): - # sort BEDPE file by paired chromosome/position - def sortfunc(line): - fields = line.strip().split('\t') - return tuple([fields[0], fields[3], fields[1], fields[4]]) - tempdirs = [tmp_dir] - batch_sort(input=input_file, - output=output_file, - key=sortfunc, - buffer_size=32000, - tempdirs=tempdirs) - - -def main(): - from optparse import OptionParser - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog [options] <index> <pairs.bam> <out.bedpe>") - options, args = parser.parse_args() - index_dir = args[0] - input_bam_file = args[1] - output_file = args[2] - return discordant_reads_to_bedpe(index_dir, - input_bam_file, - output_file) - -if __name__ == '__main__': - sys.exit(main()) \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/fastq_inspect_reads.py --- a/chimerascan/pipeline/fastq_inspect_reads.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,132 +0,0 @@ -''' -Created on Jul 14, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import logging -import gzip -import bz2 -import zipfile -import os - -from chimerascan.lib.seq import get_qual_conversion_func -from chimerascan.lib.base import parse_lines -import chimerascan.lib.config as config - -def detect_format(f): - if f.endswith(".gz") or f.endswith(".z"): - return "gz" - elif f.endswith(".bz2"): - return "bz2" - elif f.endswith(".zip"): - return "zip" - else: - return "txt" - -def open_compressed(f): - compression_format = detect_format(f) - if compression_format == "gz": - fh = gzip.open(f, "r") - elif compression_format == "bz2": - fh = bz2.BZ2File(f, "r") - elif compression_format == "zip": - fh = zipfile.ZipFile(f, "r") - else: - fh = open(f, "r") - return fh - -def detect_read_length(filename): - fh = open_compressed(filename) - fh.next() - seq = fh.next() - fh.close() - return len(seq) - -def get_min_max_read_lengths(fastq_files, num_samples=10000): - read_lengths = [] - for filename in fastq_files: - f = open_compressed(filename) - count = 0 - samples = 0 - for line in f: - mod = count % 4 - if mod == 1: - read_lengths.append(len(line)) - samples += 1 - if samples >= num_samples: - break - count += 1 - f.close() - return min(read_lengths), max(read_lengths) - -def inspect_reads(fastq_files, output_prefix, quals): - """ - uncompresses reads, renames reads, and converts quality scores - to 'sanger' format - """ - # setup file iterators - filehandles = [open_compressed(f) for f in fastq_files] - fqiters = [parse_lines(f, numlines=4) for f in filehandles] - output_files = [(output_prefix + "_%d.fq" % (x+1)) - for x in xrange(len(fastq_files))] - outfhs = [open(f, "w") for f in output_files] - qual_func = get_qual_conversion_func(quals) - linenum = 0 - try: - while True: - pelines = [it.next() for it in fqiters] - for i,lines in enumerate(pelines): - # rename read using line number - lines[0] = "@%d/%d" % (linenum,i+1) - # ignore redundant header - lines[2] = "+" - # convert quality score to sanger - lines[3] = qual_func(lines[3]) - print >>outfhs[i], '\n'.join(lines) - linenum += 1 - except StopIteration: - pass - except: - logging.error("Unexpected error during FASTQ file processing") - for f in output_files: - if os.path.exists(f): - os.remove(f) - return config.JOB_ERROR - for fh in filehandles: - fh.close() - logging.debug("Inspected %d fragments" % (linenum)) - return config.JOB_SUCCESS - -def main(): - logging.basicConfig(level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - from optparse import OptionParser - parser = OptionParser("usage: %prog [options] <outprefix> <in1.fq> <in2.fq>") - parser.add_option("--quals", dest="quals", choices=["sanger", "solexa", "illumina"], - default="sanger") - options, args = parser.parse_args() - if len(args) < 2: - parser.error("must specify output prefix and at least one fastq file") - output_prefix = args[0] - fastq_files = args[1:] - inspect_reads(fastq_files, output_prefix, options.quals) - -if __name__ == '__main__': - main() |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/fastq_merge_trim.py --- a/chimerascan/pipeline/fastq_merge_trim.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,61 +0,0 @@ -''' -Created on May 23, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import sys - -def parse_fastq(line_iter): - with line_iter: - while True: - lines = [line_iter.next().rstrip() for x in xrange(4)] - yield lines - -def trim_and_merge_fastq(infiles, outfile, trim5, segment_length): - total_length = trim5 + segment_length - fqiters = [parse_fastq(open(f)) for f in infiles] - if outfile == "-": - outfh = sys.stdout - else: - outfh = open(outfile, "w") - try: - while True: - pe_lines = [fqiter.next() for fqiter in fqiters] - for lines in pe_lines: - seqlen = len(lines[1]) - if seqlen > total_length: - lines[1] = lines[1][trim5:total_length] - lines[3] = lines[3][trim5:total_length] - print >>outfh, '\n'.join(lines) - except StopIteration: - pass - if outfile != "-": - outfh.close() - -def main(): - from optparse import OptionParser - parser = OptionParser("usage: %prog [options] <in1.fq> <in2.fq> <out.fq>") - parser.add_option("--trim5", type="int", dest="trim5", default=0) - parser.add_option("--segment-length", type="int", dest="segment_length", default=25) - options, args = parser.parse_args() - trim_and_merge_fastq(args[:2], args[2], options.trim5, options.segment_length) - -if __name__ == '__main__': - main() |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/filter_chimeras.py --- a/chimerascan/pipeline/filter_chimeras.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,217 +0,0 @@\n-\'\'\'\n-Created on Jan 31, 2011\n-\n-@author: mkiyer\n-\n-chimerascan: chimeric transcript discovery using RNA-seq\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\'\'\'\n-import logging\n-import collections\n-import os\n-\n-from chimerascan import pysam\n-from chimerascan.lib.gene_to_genome import build_transcript_genome_map, \\\n- transcript_to_genome_pos, build_transcript_cluster_map\n-from chimerascan.lib.chimera import Chimera\n-from chimerascan.lib import config\n-\n-def filter_unique_frags(c, threshold):\n- """\n- filters chimeras with less than \'threshold\' unique\n- alignment positions supporting the chimera \n- """\n- return c.get_num_unique_positions() >= threshold\n-\n-def get_wildtype_frags_5p(rname, start, end, bamfh):\n- num_wildtype_frags = len(set(r.qname for r in bamfh.fetch(rname, start, end)\n- if (not r.mate_is_unmapped) and (r.mpos >= end)))\n- return num_wildtype_frags\n-\n-def get_wildtype_frags_3p(rname, start, end, bamfh):\n- num_wildtype_frags = len(set(r.qname for r in bamfh.fetch(rname, start, end)\n- if (not r.mate_is_unmapped) and (r.mpos < start)))\n- return num_wildtype_frags\n-\n-def get_wildtype_frags(c, bamfh):\n- rname5p = config.GENE_REF_PREFIX + c.tx_name_5p\n- rname3p = config.GENE_REF_PREFIX + c.tx_name_3p\n- num_wt_frags_5p = get_wildtype_frags_5p(rname5p, c.tx_start_5p, c.tx_end_5p, bamfh)\n- num_wt_frags_3p = get_wildtype_frags_3p(rname3p, c.tx_start_3p, c.tx_end_3p, bamfh)\n- return num_wt_frags_5p, num_wt_frags_3p\n-\n-def filter_chimeric_isoform_fraction(c, frac, bamfh):\n- """\n- filters chimeras with fewer than \'threshold\' total\n- unique read alignments\n- """\n- num_wt_frags_5p, num_wt_frags_3p = get_wildtype_frags(c, bamfh)\n- num_chimeric_frags = c.get_num_frags()\n- ratio5p = float(num_chimeric_frags) / (num_chimeric_frags + num_wt_frags_5p)\n- ratio3p = float(num_chimeric_frags) / (num_chimeric_frags + num_wt_frags_3p)\n- #print c.gene_name_5p, c.gene_name_3p, "chimeras", num_chimeric_frags, "wt5p", num_wt_frags_5p, "wt3p", num_wt_frags_3p, "r5p", ratio5p, "r3p", ratio3p\n- return min(ratio5p, ratio3p) >= frac\n-\n-def read_false_pos_file(filename):\n- false_pos_chimeras = set()\n- for line in open(filename):\n- fields = line.strip().split("\\t")\n- tx_name_5p, end5p, tx_name_3p, start3p = fields\n- end5p = int(end5p)\n- start3p = int(start3p)\n- false_pos_chimeras.add((tx_name_5p, end5p, tx_name_3p, start3p))\n- return false_pos_chimeras\n-\n-def filter_encompassing_chimeras(input_file, output_file, min_frags):\n- num_chimeras = 0\n- num_filtered_chimeras = 0\n- f = open(output_file, "w") \n- for c in Chimera.parse(open(input_file)):\n- num_chimeras += 1\n- if c.get_num_frags() < min_frags:\n- continue\n- num_filtered_chimeras += 1\n- print >>f, \'\\t\'.join(map(str, c.to_list()))\n- f.close()\n- logging.debug("\\tchimeras: %d" % (num_chimeras))\n- logging.debug("\\tfiltered chimeras: %d" % (num_filtered_chimeras))\n- return config.JOB_SUCCESS\n-\n-def filter_chimeras(input_file, output_file,\n- index_dir, bam_file,\n- unique_frags,\n- isoform_fraction,\n- false_pos_file):\n- logging.debug("Parameters")\n- logging.debug("\\'..b'coverage_isoforms(input_file, gene_file):\n- # place overlapping chimeras into clusters\n- logging.debug("Building isoform cluster lookup table")\n- transcript_cluster_map = build_transcript_cluster_map(open(gene_file))\n- # build a lookup table to get genome coordinates from transcript \n- # coordinates\n- transcript_genome_map = build_transcript_genome_map(open(gene_file))\n- cluster_chimera_dict = collections.defaultdict(lambda: [])\n- for c in Chimera.parse(open(input_file)):\n- # TODO: adjust this to score chimeras differently!\n- key = (c.name, c.get_num_frags())\n- # get cluster of overlapping genes\n- cluster5p = transcript_cluster_map[c.tx_name_5p]\n- cluster3p = transcript_cluster_map[c.tx_name_3p]\n- # get genomic positions of breakpoints\n- coord5p = transcript_to_genome_pos(c.tx_name_5p, c.tx_end_5p-1, transcript_genome_map)\n- coord3p = transcript_to_genome_pos(c.tx_name_3p, c.tx_start_3p, transcript_genome_map)\n- # add to dictionary\n- cluster_chimera_dict[(cluster5p,cluster3p,coord5p,coord3p)].append(key) \n- # choose highest coverage chimeras within each pair of clusters\n- logging.debug("Finding highest coverage isoforms")\n- kept_chimeras = set()\n- for stats_list in cluster_chimera_dict.itervalues():\n- stats_dict = collections.defaultdict(lambda: set())\n- for stats_info in stats_list:\n- # index chimera names\n- stats_dict[stats_info[1:]].add(stats_info[0])\n- # find highest scoring key\n- sorted_keys = sorted(stats_dict.keys(), reverse=True)\n- kept_chimeras.update(stats_dict[sorted_keys[0]])\n- return kept_chimeras\n-\n-def filter_highest_coverage_isoforms(index_dir, input_file, output_file):\n- # find highest coverage chimeras among isoforms\n- gene_file = os.path.join(index_dir, config.GENE_FEATURE_FILE)\n- kept_chimeras = get_highest_coverage_isoforms(input_file, gene_file)\n- num_filtered_chimeras = 0\n- f = open(output_file, "w")\n- for c in Chimera.parse(open(input_file)):\n- if c.name in kept_chimeras:\n- num_filtered_chimeras += 1\n- print >>f, \'\\t\'.join(map(str, c.to_list()))\n- f.close()\n- logging.debug("\\tAfter choosing best isoform: %d" % \n- num_filtered_chimeras)\n- return config.JOB_SUCCESS\n-\n-\n-\n-def main():\n- from optparse import OptionParser\n- logging.basicConfig(level=logging.DEBUG,\n- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n- parser = OptionParser("usage: %prog [options] <index_dir> "\n- "<sorted_aligned_reads.bam> <in.txt> <out.txt>")\n- parser.add_option("--unique-frags", type="float", default=2.0,\n- dest="unique_frags", metavar="N",\n- help="Filter chimeras with less than N unique "\n- "aligned fragments [default=%default]")\n- parser.add_option("--isoform-fraction", type="float", \n- default=0.10, metavar="X",\n- help="Filter chimeras with expression ratio "\n- " less than X (0.0-1.0) relative to the wild-type "\n- "5\' transcript level [default=%default]")\n- parser.add_option("--false-pos", dest="false_pos_file",\n- default=None, \n- help="File containing known false positive "\n- "transcript pairs to subtract from output")\n- options, args = parser.parse_args()\n- index_dir = args[0]\n- bam_file = args[1]\n- input_file = args[2]\n- output_file = args[3]\n- return filter_chimeras(input_file, output_file, index_dir, bam_file,\n- unique_frags=options.unique_frags,\n- isoform_fraction=options.isoform_fraction,\n- false_pos_file=options.false_pos_file)\n-\n-if __name__ == "__main__":\n- main()\n\\ No newline at end of file\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/filter_homologous_genes.py --- a/chimerascan/pipeline/filter_homologous_genes.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,146 +0,0 @@ -''' -Created on Aug 1, 2011 - -@author: mkiyer -''' -import logging -import os -import collections -import subprocess - -from chimerascan import pysam -from chimerascan.lib import config -from chimerascan.lib.chimera import Chimera -from chimerascan.bx.intersection import IntervalTree, Interval - -def get_mapped_read_intervals(c, min_isize, max_isize, homolog_segment_length): - start5p = max(0, c.tx_end_5p - min_isize + homolog_segment_length) - end5p = max(0, c.tx_end_5p + max_isize - homolog_segment_length) - if start5p > end5p: - end5p = start5p + homolog_segment_length - start3p = max(0, c.tx_start_3p - max_isize + homolog_segment_length) - end3p = max(0, c.tx_start_3p + min_isize - homolog_segment_length) - if start3p > end3p: - end3p = start3p + homolog_segment_length - return start5p, end5p, start3p, end3p - -def filter_homologous_genes(input_file, output_file, index_dir, - homolog_segment_length, - min_isize, - max_isize, - bowtie_bin, - num_processors, - tmp_dir): - logging.debug("Parameters") - logging.debug("\thomolog segment length: %d" % (homolog_segment_length)) - logging.debug("\tmin fragment size: %d" % (min_isize)) - logging.debug("\tmax fragment size: %d" % (max_isize)) - - # open the reference sequence fasta file - ref_fasta_file = os.path.join(index_dir, config.ALIGN_INDEX + ".fa") - ref_fa = pysam.Fastafile(ref_fasta_file) - bowtie_index = os.path.join(index_dir, config.ALIGN_INDEX) - interval_trees_3p = collections.defaultdict(lambda: IntervalTree()) - - # generate FASTA file of sequences to use in mapping - logging.debug("Generating homologous sequences to test") - fasta5p = os.path.join(tmp_dir, "homologous_5p.fa") - f = open(fasta5p, "w") - for c in Chimera.parse(open(input_file)): - tx_name_5p = config.GENE_REF_PREFIX + c.tx_name_5p - tx_name_3p = config.GENE_REF_PREFIX + c.tx_name_3p - start5p, end5p, start3p, end3p = get_mapped_read_intervals(c, min_isize, max_isize, homolog_segment_length) - # add 3' gene to interval trees - interval_trees_3p[tx_name_3p].insert_interval(Interval(start3p, end3p, value=c.name)) - # extract sequence of 5' gene - seq5p = ref_fa.fetch(tx_name_5p, start5p, end5p) - for i in xrange(0, len(seq5p) - homolog_segment_length): - print >>f, ">%s,%s:%d-%d\n%s" % (c.name,c.tx_name_5p, - start5p+i, - start5p+i+homolog_segment_length, - seq5p[i:i+homolog_segment_length]) - f.close() - - # map 5' sequences to reference using bowtie - logging.debug("Mapping homologous sequences") - sam5p = os.path.join(tmp_dir, "homologous_5p.sam") - args = [bowtie_bin, "-p", num_processors, "-f", "-a", "-m", 100, - "-y", "-v", 3, "-S", - bowtie_index, fasta5p, sam5p] - retcode = subprocess.call(map(str,args)) - if retcode != 0: - return config.JOB_ERROR - - # analyze results for homologous genes - logging.debug("Analyzing mapping results") - samfh = pysam.Samfile(sam5p, "r") - tid_rname_map = dict((i,refname) for i,refname in enumerate(samfh.references)) - homologous_chimeras = set() - for r in pysam.Samfile(sam5p, "r"): - if r.is_unmapped: - continue - # reference name must be in list of 3' chimeras - rname = tid_rname_map[r.rname] - if rname not in interval_trees_3p: - continue - # get chimera name from 'qname' - chimera_name = r.qname.split(",")[0] - for hit in interval_trees_3p[rname].find(r.pos,r.aend): - if hit.value == chimera_name: - homologous_chimeras.add(chimera_name) - - # write output - logging.debug("Writing output") - f = open(output_file, "w") - for c in Chimera.parse(open(input_file)): - if c.name in homologous_chimeras: - logging.debug("Removing homologous chimera %s between %s and %s" % - (c.name, c.gene_name_5p, c.gene_name_3p)) - continue - print >>f, '\t'.join(map(str, c.to_list())) - f.close() - - # cleanup - if os.path.exists(fasta5p): - os.remove(fasta5p) - if os.path.exists(sam5p): - os.remove(sam5p) - return config.JOB_SUCCESS - - -def main(): - from optparse import OptionParser - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog [options] <index_dir> " - "<in.txt> <out.txt>") - parser.add_option("--homolog-segment-length", dest="homolog_segment_length", - type="int", default=25, - help="Segment length to consider when searching for " - "homologous regions [default=%default]") - parser.add_option('--min-fragment-length', dest="min_fragment_length", - type="int", default=100) - parser.add_option('--max-fragment-length', dest="max_fragment_length", - type="int", default=300) - parser.add_option("--bowtie-bin", dest="bowtie_bin", - default="bowtie", - help="Path to bowtie binary [default: %default]") - parser.add_option("-p", type="int", dest="num_processors", default=1, - help="Number of processors to use [default: %default]") - parser.add_option("--tmp-dir", dest="tmp_dir", - default=".", - help="Temporary directory [default=%default]") - options, args = parser.parse_args() - index_dir = args[0] - input_file = args[1] - output_file = args[2] - return filter_homologous_genes(input_file, output_file, index_dir, - homolog_segment_length=options.homolog_segment_length, - min_isize=options.min_fragment_length, - max_isize=options.max_fragment_length, - bowtie_bin=options.bowtie_bin, - num_processors=options.num_processors, - tmp_dir=options.tmp_dir) - -if __name__ == "__main__": - main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/find_discordant_reads.py --- a/chimerascan/pipeline/find_discordant_reads.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,423 +0,0 @@\n-\'\'\'\n-Created on Jun 2, 2011\n-\n-@author: mkiyer\n-\'\'\'\n-import logging\n-import collections\n-import os\n-\n-from chimerascan import pysam\n-from chimerascan.bx.cluster import ClusterTree\n-\n-from chimerascan.lib import config\n-from chimerascan.lib.base import LibraryTypes\n-from chimerascan.lib.sam import parse_pe_reads, pair_reads, copy_read, select_best_mismatch_strata\n-from chimerascan.lib.gene_to_genome import build_transcript_tid_genome_map, \\\n- build_transcript_tid_cluster_map, transcript_to_genome_pos\n-from chimerascan.lib.chimera import DiscordantTags, DISCORDANT_TAG_NAME, \\\n- OrientationTags, ORIENTATION_TAG_NAME, cmp_orientation\n-\n-# globals\n-imin2 = lambda a,b: a if a <= b else b\n-\n-def annotate_multihits(bamfh, reads, transcript_tid_genome_map):\n- hits = set()\n- any_unmapped = False\n- for r in reads:\n- if r.is_unmapped:\n- any_unmapped = True\n- continue\n- if r.rname not in transcript_tid_genome_map:\n- tid = r.rname\n- pos = r.pos\n- else:\n- # use the position that is most 5\' relative to genome\n- left_tid, left_strand, left_pos = transcript_to_genome_pos(r.rname, r.pos, transcript_tid_genome_map)\n- right_tid, right_strand, right_pos = transcript_to_genome_pos(r.rname, r.aend-1, transcript_tid_genome_map)\n- tid = left_tid\n- pos = imin2(left_pos, right_pos)\n- hits.add((tid, pos))\n- #print r.qname, bamfh.getrname(r.rname), r.pos, bamfh.getrname(tid), pos \n- for i,r in enumerate(reads):\n- # annotate reads with \'HI\', and \'IH\' tags\n- r.tags = r.tags + [("HI",i), ("IH",len(reads)), ("NH", len(hits))]\n- return any_unmapped\n-\n-def map_reads_to_references(pe_reads, transcript_tid_cluster_map):\n- """\n- bin reads by transcript cluster and reference (tid)\n- """\n- refdict = collections.defaultdict(lambda: ([], []))\n- genedict = collections.defaultdict(lambda: ([], []))\n- for readnum, reads in enumerate(pe_reads):\n- for r in reads:\n- if r.is_unmapped:\n- continue \n- # get cluster id\n- if r.rname in transcript_tid_cluster_map:\n- # add to cluster dict\n- cluster_id = transcript_tid_cluster_map[r.rname]\n- pairs = genedict[cluster_id]\n- pairs[readnum].append(r)\n- # add to reference dict\n- pairs = refdict[r.rname]\n- pairs[readnum].append(r)\n- return refdict, genedict\n-\n-def get_genome_orientation(r, library_type):\n- if library_type == LibraryTypes.FR_FIRSTSTRAND:\n- if r.is_read2:\n- return OrientationTags.FIVEPRIME\n- else:\n- return OrientationTags.THREEPRIME\n- elif library_type == LibraryTypes.FR_SECONDSTRAND:\n- if r.is_read1:\n- return OrientationTags.FIVEPRIME\n- else:\n- return OrientationTags.THREEPRIME\n- return OrientationTags.NONE\n-\n-def get_gene_orientation(r, library_type):\n- if library_type == LibraryTypes.FR_UNSTRANDED:\n- if r.is_reverse:\n- return OrientationTags.THREEPRIME\n- else:\n- return OrientationTags.FIVEPRIME\n- elif library_type == LibraryTypes.FR_FIRSTSTRAND:\n- if r.is_read2:\n- return OrientationTags.FIVEPRIME\n- else:\n- return OrientationTags.THREEPRIME\n- elif library_type == LibraryTypes.FR_SECONDSTRAND:\n- if r.is_read1:\n- return OrientationTags.FIVEPRIME\n- else:\n- return OrientationTags.THREEPRIME\n- logging.error("Unknown library type %s, aborting" % (library_type))\n- assert False\n-\n-def classify_unpaired_reads(reads, transcript_tid_genome_map, library_type):\n- gene_hits_5p = []\n- gene_hits_3p = []\n- genome_hits = []\n- for r in reads:\n- # check to see if this alignment is to a gene, or genomic\n- if (r.rname not in transcript_tid_genome_map):\n- #'..b'nput_bam_file))\n- logging.debug("\\tMax insert size: \'%d\'" % (max_isize))\n- logging.debug("\\tLibrary type: \'%s\'" % (library_type))\n- logging.debug("\\tGene paired file: %s" % (gene_paired_bam_file))\n- logging.debug("\\tGenome paired file: %s" % (genome_paired_bam_file))\n- logging.debug("\\tUnmapped file: %s" % (unmapped_bam_file))\n- logging.debug("\\tComplex file: %s" % (complex_bam_file))\n- # setup input and output files\n- bamfh = pysam.Samfile(input_bam_file, "rb")\n- genefh = pysam.Samfile(gene_paired_bam_file, "wb", template=bamfh)\n- genomefh = pysam.Samfile(genome_paired_bam_file, "wb", template=bamfh)\n- unmappedfh = pysam.Samfile(unmapped_bam_file, "wb", template=bamfh)\n- complexfh = pysam.Samfile(complex_bam_file, "wb", template=bamfh)\n- gene_file = os.path.join(index_dir, config.GENE_FEATURE_FILE)\n- # build a lookup table to get all the overlapping transcripts given a\n- # transcript \'tid\'\n- transcript_tid_cluster_map = \\\n- build_transcript_tid_cluster_map(bamfh, open(gene_file), \n- rname_prefix=config.GENE_REF_PREFIX)\n- # build a lookup table to get genome coordinates from transcript \n- # coordinates\n- transcript_tid_genome_map = \\\n- build_transcript_tid_genome_map(bamfh, open(gene_file), \n- rname_prefix=config.GENE_REF_PREFIX)\n- for pe_reads in parse_pe_reads(bamfh):\n- # add hit index and number of multimaps information to read tags\n- # this function also checks for unmapped reads\n- any_unmapped = False\n- for reads in pe_reads:\n- any_unmapped = (any_unmapped or \n- annotate_multihits(bamfh, reads, transcript_tid_genome_map))\n- if any_unmapped:\n- # write to output as discordant reads and continue to \n- # next fragment\n- write_pe_reads(unmappedfh, pe_reads)\n- continue\n- # examine all read pairing combinations and rule out invalid \n- # pairings. this returns gene pairs and genome pairs\n- gene_pairs, genome_pairs, unpaired_reads = \\\n- classify_read_pairs(pe_reads, max_isize,\n- library_type, transcript_tid_genome_map,\n- transcript_tid_cluster_map)\n- if len(gene_pairs) > 0 or len(genome_pairs) > 0:\n- write_pairs(genefh, gene_pairs)\n- write_pairs(genomefh, genome_pairs)\n- else:\n- write_pe_reads(complexfh, unpaired_reads)\n- genefh.close()\n- genomefh.close()\n- unmappedfh.close()\n- complexfh.close()\n- bamfh.close() \n- logging.info("Finished pairing reads")\n-\n-\n-def main():\n- from optparse import OptionParser\n- logging.basicConfig(level=logging.DEBUG,\n- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n- parser = OptionParser("usage: %prog [options] <index> <in.bam> "\n- "<gene_paired.bam> <genome_paired.bam> "\n- "<unmapped.bam> <complex.bam>")\n- parser.add_option(\'--max-fragment-length\', dest="max_fragment_length", \n- type="int", default=1000)\n- parser.add_option(\'--library\', dest="library_type", \n- default=LibraryTypes.FR_UNSTRANDED)\n- options, args = parser.parse_args() \n- index_dir = args[0]\n- input_bam_file = args[1]\n- gene_paired_bam_file = args[2]\n- genome_paired_bam_file = args[3]\n- unmapped_bam_file = args[4]\n- complex_bam_file = args[5]\n- find_discordant_fragments(input_bam_file, gene_paired_bam_file,\n- genome_paired_bam_file, unmapped_bam_file, \n- complex_bam_file, index_dir,\n- max_isize=options.max_fragment_length,\n- library_type=options.library_type)\n-\n-if __name__ == \'__main__\':\n- main()\n\\ No newline at end of file\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/merge_spanning_alignments.py --- a/chimerascan/pipeline/merge_spanning_alignments.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,266 +0,0 @@\n-\'\'\'\n-Created on Nov 7, 2010\n-\n-@author: mkiyer\n-\n-chimerascan: chimeric transcript discovery using RNA-seq\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\'\'\'\n-import logging\n-import collections\n-import shutil\n-import os\n-\n-# local imports\n-from chimerascan import pysam\n-from chimerascan.lib.chimera import Chimera, DiscordantRead, \\\n- DiscordantTags, DISCORDANT_TAG_NAME, \\\n- OrientationTags, ORIENTATION_TAG_NAME\n-from chimerascan.lib.base import LibraryTypes\n-\n-from chimerascan.pipeline.find_discordant_reads import get_gene_orientation\n-\n-def parse_group_by_attr(myiter, attr):\n- mylist = []\n- prev = None\n- for itm in myiter:\n- cur = getattr(itm, attr)\n- if prev != cur:\n- if len(mylist) > 0:\n- yield prev, mylist\n- mylist = []\n- prev = cur\n- mylist.append(itm)\n- if len(mylist) > 0:\n- yield prev, mylist\n-\n-def parse_sync_by_breakpoint(chimera_file, bam_file):\n- # group reads by reference name (matches breakpoint name)\n- bamfh = pysam.Samfile(bam_file, "rb")\n- tid_rname_map = list(bamfh.references)\n- # initialize iterator through reads\n- read_iter = parse_group_by_attr(bamfh, "rname")\n- read_iter_valid = True\n- try:\n- rname, reads = read_iter.next()\n- read_breakpoint_name = tid_rname_map[rname]\n- except StopIteration:\n- bamfh.close()\n- read_iter_valid = False\n- reads = []\n- read_breakpoint_name = "ZZZZZZZZZZZZZZ"\n- # group chimeras by breakpoint name\n- for chimera_breakpoint_name, chimeras in \\\n- parse_group_by_attr(Chimera.parse(open(chimera_file)), \n- "breakpoint_name"):\n- while (read_iter_valid) and (chimera_breakpoint_name > read_breakpoint_name):\n- try:\n- rname, reads = read_iter.next()\n- read_breakpoint_name = tid_rname_map[rname]\n- except StopIteration:\n- read_iter_valid = False\n- reads = []\n- if chimera_breakpoint_name < read_breakpoint_name:\n- yield chimeras, []\n- else:\n- yield chimeras, reads \n- bamfh.close()\n-\n-def get_mismatch_positions(md):\n- x = 0\n- pos = []\n- for y in xrange(len(md)):\n- if md[y].isalpha():\n- offset = int(md[x:y])\n- pos.append(offset)\n- x = y + 1\n- return pos\n-\n-def check_breakpoint_alignment(c, r,\n- anchor_min,\n- anchor_length,\n- anchor_mismatches):\n- """\n- returns True if read \'r\' meets criteria for a valid\n- breakpoint spanning read, False otherwise\n- \n- c - Chimera object\n- r - pysam AlignedRead object\n- """\n- # get position of breakpoint along seq\n- breakpoint_pos = len(c.breakpoint_seq_5p)\n- # check if read spans breakpoint \n- if not (r.pos < breakpoint_pos < r.aend):\n- return False \n- # calculate amount in bp that read overlaps breakpoint\n- # and ensure overlap is sufficient\n- left_anchor_bp = breakpoint_pos - r.pos\n- if left_anchor_bp < max(c.homology_left, anchor_min):\n- return False\n- right_anchor_bp = r.aend - breakpoint_pos\n- if right_anchor_bp < max(c.homology_right, anchor_min):\n- return False\n- # ensure that alignment'..b' for dpair in c.encomp_frags:\n- chimera_qname_dict[c.name][dpair[0].qname] = dpair \n- # find valid spanning reads\n- for c, dr in filter_spanning_reads(chimeras, reads, \n- anchor_min, anchor_length, \n- anchor_mismatches, library_type):\n- # ensure encompassing read is present\n- if dr.qname not in chimera_qname_dict[c.name]:\n- continue\n- # get discordant pair\n- dpair = chimera_qname_dict[c.name][dr.qname]\n- # mark correct read (read1/read2) as a spanning read\n- if dr.readnum == dpair[0].readnum:\n- dpair[0].is_spanning = True\n- elif dr.readnum == dpair[1].readnum:\n- dpair[1].is_spanning = True\n- else:\n- assert False\n- filtered_hits += 1\n- # write chimeras back to file\n- for c in chimeras:\n- fields = c.to_list()\n- print >>f, \'\\t\'.join(map(str, fields)) \n- f.close()\n- logging.debug("\\tFound %d hits" % (filtered_hits))\n- #\n- # Process reads that are single-mapped and spanning\n- #\n- logging.debug("Processing single-mapping/spanning reads")\n- tmp_singlemap_chimera_file = os.path.join(tmp_dir, "tmp_singlemap_chimeras.bedpe")\n- f = open(tmp_singlemap_chimera_file, "w")\n- filtered_hits = 0\n- for chimeras, reads in parse_sync_by_breakpoint(tmp_encomp_chimera_file, singlemap_bam_file):\n- # find valid spanning reads\n- for c, dr in filter_spanning_reads(chimeras, reads, \n- anchor_min, anchor_length, \n- anchor_mismatches, library_type):\n- # ensure mate maps to 5\' or 3\' gene\n- # TODO: implement this using sorted/indexed BAM file?\n- # add read as a spanning read\n- c.spanning_reads.append(dr)\n- filtered_hits += 1 \n- # write chimeras back to file\n- for c in chimeras:\n- fields = c.to_list()\n- print >>f, \'\\t\'.join(map(str, fields)) \n- f.close()\n- logging.debug("\\tFound %d hits" % (filtered_hits))\n- # output_chimera_file \n- shutil.copyfile(tmp_singlemap_chimera_file, output_chimera_file)\n- # remove temporary files\n- if os.path.exists(tmp_encomp_chimera_file):\n- os.remove(tmp_encomp_chimera_file)\n- if os.path.exists(tmp_singlemap_chimera_file):\n- os.remove(tmp_singlemap_chimera_file)\n- \n-\n-def main():\n- from optparse import OptionParser\n- logging.basicConfig(level=logging.DEBUG,\n- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") \n- parser = OptionParser("usage: %prog [options] <chimeras.breakpoint_sorted.txt> "\n- "<encomp.bam> <onemap.bam> <chimeras.out.txt>")\n- parser.add_option("--anchor-min", type="int", dest="anchor_min", default=4)\n- parser.add_option("--anchor-length", type="int", dest="anchor_length", default=8)\n- parser.add_option("--anchor-mismatches", type="int", dest="anchor_mismatches", default=0)\n- parser.add_option(\'--library\', dest="library_type", \n- default=LibraryTypes.FR_UNSTRANDED)\n- options, args = parser.parse_args()\n- breakpoint_chimera_file = args[0]\n- encomp_bam_file = args[1]\n- singlemap_bam_file = args[2]\n- output_chimera_file = args[4]\n- merge_spanning_alignments(breakpoint_chimera_file,\n- encomp_bam_file,\n- singlemap_bam_file,\n- output_chimera_file,\n- options.anchor_min, \n- options.anchor_length,\n- options.anchor_mismatches,\n- options.library_type)\n-\n-if __name__ == \'__main__\':\n- main()\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/nominate_chimeras.py --- a/chimerascan/pipeline/nominate_chimeras.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,275 +0,0 @@\n-\'\'\'\n-Created on Jul 21, 2011\n-\n-@author: mkiyer\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\'\'\'\n-import logging\n-import os\n-import sys\n-import collections\n-import itertools\n-import operator\n-\n-from chimerascan import pysam\n-\n-from chimerascan.lib import config\n-from chimerascan.lib.chimera import DiscordantRead, Chimera, frags_to_encomp_string\n-from chimerascan.lib.gene_to_genome import build_tx_name_gene_map, build_genome_tx_trees\n-from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution\n-from chimerascan.lib.seq import calc_homology\n-\n-def parse_discordant_bedpe_by_transcript_pair(fh):\n- prev_tx5p, prev_tx3p = None,None\n- frags = []\n- for line in fh:\n- fields = line.strip().split(\'\\t\') \n- tx5p = fields[0]\n- tx3p = fields[3]\n- dr5p = DiscordantRead.from_list(fields[10].split("|"))\n- dr3p = DiscordantRead.from_list(fields[11].split("|"))\n- if (tx5p, tx3p) != (prev_tx5p, prev_tx3p):\n- if len(frags) > 0:\n- yield prev_tx5p, prev_tx3p, frags\n- frags = []\n- prev_tx5p, prev_tx3p = tx5p, tx3p\n- frags.append((dr5p, dr3p))\n- if len(frags) > 0:\n- yield tx5p, tx3p, frags \n-\n-def calc_isize_prob(isize, isize_dist):\n- # find percentile of observing this insert size in the reads\n- isize_per = isize_dist.percentile_at_isize(isize)\n- # convert to a probability score (0.0-1.0)\n- isize_prob = 1.0 - (2.0 * abs(50.0 - isize_per))/100.0 \n- return isize_prob\n-\n-def choose_best_breakpoints(r5p, r3p, tx5p, tx3p, trim_bp, isize_dist):\n- best_breakpoints = set()\n- best_isize_prob = None\n- # iterate through 5\' transcript exons \n- exon_iter_5p = reversed(tx5p.exons) if tx5p.strand == \'-\' else iter(tx5p.exons)\n- tx_end_5p = 0\n- for exon_num_5p,coords5p in enumerate(exon_iter_5p):\n- genome_start_5p, genome_end_5p = coords5p \n- exon_size_5p = genome_end_5p - genome_start_5p\n- tx_end_5p += exon_size_5p\n- # fast forward on 5\' gene to first exon beyond read \n- if tx_end_5p < (r5p.aend - trim_bp):\n- continue \n- #print "tx end 5p", tx_end_5p, "exon_size_5p", exon_size_5p, "r5p.aend", r5p.aend, "trim_bp", trim_bp\n- # now have a candidate insert size between between 5\' read and\n- # end of 5\' exon\n- isize5p = tx_end_5p - r5p.pos\n- # iterate through 3\' transcript\n- exon_iter_3p = reversed(tx3p.exons) if tx3p.strand == \'-\' else iter(tx3p.exons)\n- tx_start_3p = 0\n- local_best_breakpoints = set()\n- local_best_isize_prob = None\n- for exon_num_3p,coords3p in enumerate(exon_iter_3p):\n- genome_start_3p, genome_end_3p = coords3p\n- #print "\\t", coords3p \n- # stop after going past read on 3\' transcript\n- if tx_start_3p >= (r3p.pos + trim_bp):\n- break\n- # get another candidate insert size between start of 3\'\n- # exon and 3\' read\n- isize3p = r3p.aend - tx_start_3p\n- #print "\\t", isize5p, isize3p, tx_end_5p, tx_start_3p\n- # compare the insert size against the known insert size\n- # distribution\n- isize_prob = calc_isize_prob(isize5p + isize3p, isize_dist)\n- if ((local_best_isize_prob i'..b'akpoint\n- breakpoint_seq_5p, breakpoint_seq_3p, homology_left, homology_right = \\\n- extract_breakpoint_sequence(config.GENE_REF_PREFIX + tx5p.tx_name, tx_end_5p,\n- config.GENE_REF_PREFIX + tx3p.tx_name, tx_start_3p,\n- ref_fa, max_read_length,\n- homology_mismatches) \n- tx3p_length = sum((end - start) for start,end in tx3p.exons)\n- # get unique breakpoint id based on sequence\n- breakpoint_seq = breakpoint_seq_5p + breakpoint_seq_3p\n- if breakpoint_seq in breakpoint_seq_name_map:\n- breakpoint_name = breakpoint_seq_name_map[breakpoint_seq]\n- else:\n- breakpoint_name = "B%07d" % (breakpoint_num)\n- breakpoint_seq_name_map[breakpoint_seq] = breakpoint_name\n- breakpoint_num += 1\n- # write gene, breakpoint, and raw reads to a file and follow the\n- # BEDPE format\n- gene_name_5p = \'_\'.join(tx5p.gene_name.split())\n- gene_name_3p = \'_\'.join(tx3p.gene_name.split())\n- fields = [tx5p.tx_name, 0, tx_end_5p, # chrom1, start1, end1\n- tx3p.tx_name, tx_start_3p, tx3p_length, # chrom2, start2, end2\n- "C%07d" % (chimera_num), # name\n- 1.0, # pvalue\n- tx5p.strand, tx3p.strand, # strand1, strand2\n- gene_name_5p, gene_name_3p, # gene names\n- # exon interval information\n- \'%d-%d\' % (0, exon_num_5p),\n- \'%d-%d\' % (exon_num_3p, len(tx3p.exons)),\n- # breakpoint information\n- breakpoint_name, \n- breakpoint_seq_5p, breakpoint_seq_3p, \n- homology_left, homology_right, \n- # fragments\n- frags_to_encomp_string(frags),\n- # spanning reads\n- None]\n- print >>outfh, \'\\t\'.join(map(str, fields))\n- chimera_num += 1\n- outfh.close()\n- ref_fa.close()\n- return config.JOB_SUCCESS\n- \n-\n-def main():\n- from optparse import OptionParser\n- logging.basicConfig(level=logging.DEBUG,\n- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n- parser = OptionParser("usage: %prog [options] <index> <isize_dist.txt> "\n- "<discordant_reads.srt.bedpe> <chimeras.txt>")\n- parser.add_option("--trim", dest="trim", type="int", \n- default=config.EXON_JUNCTION_TRIM_BP,\n- help="apply trimming when choosing exon boundaries to "\n- "to consider possible breakpoints")\n- parser.add_option("--max-read-length", dest="max_read_length", type="int",\n- default=100, metavar="N",\n- help="Reads in the BAM file are guaranteed to have "\n- "length less than N [default=%default]")\n- parser.add_option("--homology-mismatches", type="int", \n- dest="homology_mismatches", \n- default=config.BREAKPOINT_HOMOLOGY_MISMATCHES,\n- help="Number of mismatches to tolerate when computing "\n- "homology between gene and its chimeric partner "\n- "[default=%default]")\n- options, args = parser.parse_args()\n- index_dir = args[0]\n- isize_dist_file = args[1]\n- input_file = args[2]\n- output_file = args[3]\n- return nominate_chimeras(index_dir, isize_dist_file, \n- input_file, output_file, \n- options.trim,\n- options.max_read_length,\n- options.homology_mismatches)\n-\n-\n-if __name__ == \'__main__\':\n- sys.exit(main())\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/nominate_spanning_reads.py --- a/chimerascan/pipeline/nominate_spanning_reads.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,290 +0,0 @@\n-\'\'\'\n-Created on Jan 30, 2011\n-\n-@author: mkiyer\n-\n-chimerascan: chimeric transcript discovery using RNA-seq\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\'\'\'\n-import logging\n-import os\n-\n-from chimerascan import pysam\n-\n-from chimerascan.lib import config\n-from chimerascan.lib.base import LibraryTypes\n-from chimerascan.lib.sam import parse_pe_reads\n-from chimerascan.lib.chimera import Chimera, OrientationTags, ORIENTATION_TAG_NAME\n-from chimerascan.lib.batch_sort import batch_sort\n-from chimerascan.lib.seq import DNA_reverse_complement\n-from chimerascan.pipeline.find_discordant_reads import get_gene_orientation\n-\n-def to_fastq(qname, readnum, seq, qual, is_reverse=False):\n- if is_reverse:\n- seq = DNA_reverse_complement(seq)\n- qual = qual[::-1]\n- return "@%s/%d\\n%s\\n+\\n%s" % (qname, readnum+1, seq, qual)\n-\n-def nominate_encomp_spanning_reads(chimera_file, output_fastq_file):\n- """\n- find all encompassing reads that should to be remapped to see if they\n- span the breakpoint junction\n- """\n- fqfh = open(output_fastq_file, "w")\n- remap_qnames = set()\n- for c in Chimera.parse(open(chimera_file)):\n- # find breakpoint coords of chimera\n- end5p = c.tx_end_5p\n- start3p = c.tx_start_3p\n- for r5p,r3p in c.encomp_frags: \n- # if 5\' read overlaps breakpoint then it should be remapped\n- if r5p.clipstart < end5p < r5p.clipend:\n- key5p = (r5p.qname, r5p.readnum)\n- if key5p not in remap_qnames:\n- remap_qnames.add((r5p.qname, r5p.readnum))\n- print >>fqfh, to_fastq(r5p.qname, r5p.readnum, \n- r5p.seq, "I" * len(r5p.seq),\n- is_reverse=r5p.is_reverse)\n- # if 3\' read overlaps breakpoint then it should be remapped\n- if r3p.clipstart < start3p < r3p.clipend:\n- key3p = (r3p.qname, r3p.readnum)\n- if key3p not in remap_qnames:\n- remap_qnames.add((r3p.qname, r3p.readnum))\n- print >>fqfh, to_fastq(r3p.qname, r3p.readnum, \n- r3p.seq, "I" * len(r3p.seq),\n- is_reverse=r3p.is_reverse)\n- fqfh.close()\n- return config.JOB_SUCCESS\n-\n-def parse_chimeras_by_gene(chimera_file, orientation):\n- clist = []\n- prev_tx_name = None\n- for c in Chimera.parse(open(chimera_file)):\n- tx_name = c.tx_name_5p if (orientation == OrientationTags.FIVEPRIME) else c.tx_name_3p\n- if prev_tx_name != tx_name:\n- if len(clist) > 0:\n- yield prev_tx_name, clist\n- clist = []\n- prev_tx_name = tx_name\n- clist.append(c)\n- if len(clist) > 0:\n- yield prev_tx_name, clist\n-\n-def parse_reads_by_rname(bamfh, orientation):\n- """\n- reads must be sorted and include an orientation tag\n- """\n- reads = []\n- prev_rname = None\n- for r in bamfh:\n- o = r.opt(ORIENTATION_TAG_NAME)\n- if o != orientation:\n- continue\n- if prev_rname != r.rname:\n- if len(reads) > 0:\n- yield reads\n- reads = []\n- prev_rname = r.rname\n- reads.append(r)\n- if len(reads) > 0:\n- yield r'..b't >>f, \'\\t\'.join(map(str, [r.qname, readnum, r.opt("R2"), r.opt("Q2")]))\n- # sort chimeras by 3\' partner\n- logging.debug("Sorting chimeras by 3\' transcript")\n- def sort_by_3p_partner(line):\n- fields = line.strip().split(\'\\t\', Chimera.TX_NAME_3P_FIELD+1)\n- return fields[Chimera.TX_NAME_3P_FIELD]\n- tmp_chimera_file_sorted_3p = os.path.join(tmp_dir, "tmp_chimeras.sorted3p.bedpe")\n- batch_sort(input=chimera_file,\n- output=tmp_chimera_file_sorted_3p,\n- key=sort_by_3p_partner,\n- buffer_size=32000,\n- tempdirs=[tmp_dir])\n- # search for matches to 3\' chimeras\n- logging.debug("Matching single-mapped frags to 3\' chimeras")\n- for clist, reads in parse_sync_chimera_with_bam(tmp_chimera_file_sorted_3p, \n- single_mapped_bam_file,\n- OrientationTags.THREEPRIME):\n- # TODO: test more specifically that read has a chance to cross breakpoint\n- for r in reads:\n- # reverse read number\n- readnum = 1 if r.is_read1 else 0\n- print >>f, \'\\t\'.join(map(str, [r.qname, readnum, r.opt("R2"), r.opt("Q2")]))\n- f.close()\n- #\n- # now sort the file of sequences by read name/number to \n- # eliminate duplicates\n- # \n- def sort_by_qname(line):\n- fields = line.strip().split(\'\\t\')\n- return (fields[0], int(fields[1]))\n- tmp_sorted_seqs_to_remap = os.path.join(tmp_dir, "tmp_singlemap_seqs.sorted.txt")\n- batch_sort(input=tmp_seqs_to_remap,\n- output=tmp_sorted_seqs_to_remap,\n- key=sort_by_qname,\n- buffer_size=32000,\n- tempdirs=[tmp_dir])\n- #\n- # read file and write fastq, ignoring duplicates\n- # \n- fqfh = open(single_mapped_fastq_file, "w")\n- prev = None\n- for line in open(tmp_sorted_seqs_to_remap):\n- fields = line.strip().split(\'\\t\')\n- qname, readnum, seq, qual = fields[0], int(fields[1]), fields[2], fields[3]\n- cur = (fields[0], int(fields[1]))\n- if prev != cur:\n- if prev is not None: \n- print >>fqfh, to_fastq(qname, readnum, seq, qual)\n- prev = cur\n- if prev is not None:\n- print >>fqfh, to_fastq(qname, readnum, seq, qual)\n- fqfh.close()\n- # TODO: remove temporary files\n- #os.remove(tmp_chimera_file_sorted_3p)\n- #os.remove(tmp_seqs_to_remap)\n- #os.remove(tmp_sorted_seqs_to_remap)\n- return config.JOB_SUCCESS\n-\n-\n-def main():\n- from optparse import OptionParser\n- logging.basicConfig(level=logging.DEBUG,\n- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n- parser = OptionParser("usage: %prog [options] <chimeras.txt> "\n- "<unmapped_reads.bam> <encomp_remap.fq> "\n- "<singlemap_remap.fq> "\n- "<unmapped_remap.fq> ")\n- parser.add_option(\'--library\', dest="library_type", \n- default=LibraryTypes.FR_UNSTRANDED)\n- options, args = parser.parse_args()\n- chimera_file = args[0]\n- bam_file = args[1]\n- encomp_remap_fastq_file = args[2]\n- singlemap_remap_fastq_file = args[3]\n- unmapped_remap_fastq_file = args[4]\n- nominate_encomp_spanning_reads(chimera_file, encomp_remap_fastq_file)\n- extract_single_mapped_reads(chimera_file, \n- bam_file,\n- "single_mapped_reads.srt.bam",\n- unmapped_remap_fastq_file,\n- options.library_type,\n- "/tmp") \n- nominate_single_mapped_spanning_reads(chimera_file, \n- "single_mapped_reads.srt.bam",\n- singlemap_remap_fastq_file, \n- "/tmp")\n-\n-if __name__ == \'__main__\':\n- main()\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/profile_insert_size.py --- a/chimerascan/pipeline/profile_insert_size.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,61 +0,0 @@ -''' -Created on Jan 24, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import logging -import sys -# local imports -from chimerascan import pysam -from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution - -def main(): - from optparse import OptionParser - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog [options] <bam> <out.bedpe>") - parser.add_option('--min-fragment-length', dest="min_fragment_length", - type="int", default=0) - parser.add_option('--max-fragment-length', dest="max_fragment_length", - type="int", default=1000) - parser.add_option('--max-samples', dest="max_samples", - type="int", default=None) - parser.add_option('-o', dest="output_file", default=None) - options, args = parser.parse_args() - input_bam_file = args[0] - bamfh = pysam.Samfile(input_bam_file, "rb") - isizedist = InsertSizeDistribution.from_bam(bamfh, options.min_fragment_length, - options.max_fragment_length, - options.max_samples) - bamfh.close() - if options.output_file is not None: - f = open(options.output_file, "w") - else: - f = sys.stdout - isizedist.to_file(f) - if options.output_file is not None: - f.close() - logging.info("Insert size samples=%d mean=%f std=%f median=%d mode=%d" % - (isizedist.n, isizedist.mean(), isizedist.std(), - isizedist.percentile(50.0), isizedist.mode())) - - -if __name__ == '__main__': - main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/resolve_discordant_reads.py --- a/chimerascan/pipeline/resolve_discordant_reads.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b"@@ -1,287 +0,0 @@\n-'''\n-Created on Jul 28, 2011\n-\n-@author: mkiyer\n-\n-chimerascan: chimeric transcript discovery using RNA-seq\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-'''\n-import logging\n-import collections\n-import os\n-\n-from chimerascan.lib.chimera import Chimera\n-from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution\n-from chimerascan.lib.batch_sort import batch_sort\n-\n-QNAME_COL = 0\n-CHIMERA_NAME_COL = 5\n-SCORE_FIELDS = (6,7,8,9,10)\n-\n-class ChimeraStats(object):\n- __slots__ = ('qname', 'tid5p', 'pos5p', 'tid3p', 'pos3p', \n- 'chimera_name', 'num_spanning_frags', 'num_unambiguous_frags',\n- 'num_uniquely_aligning_frags', 'neg_mismatches',\n- 'isize_prob')\n-\n- @property\n- def score_tuple(self):\n- return (self.num_spanning_frags,\n- self.num_unambiguous_frags,\n- self.num_uniquely_aligning_frags,\n- self.neg_mismatches,\n- self.isize_prob)\n-\n- def to_list(self):\n- return [self.qname,\n- self.tid5p, self.pos5p,\n- self.tid3p, self.pos3p, \n- self.chimera_name,\n- self.num_spanning_frags,\n- self.num_unambiguous_frags,\n- self.num_uniquely_aligning_frags,\n- self.neg_mismatches,\n- self.isize_prob]\n-\n- @staticmethod\n- def from_list(fields):\n- s = ChimeraStats()\n- s.qname = fields[0]\n- s.tid5p = int(fields[1])\n- s.pos5p = int(fields[2])\n- s.tid3p = int(fields[3])\n- s.pos3p = int(fields[4])\n- s.chimera_name = fields[5]\n- s.num_spanning_frags = int(fields[6])\n- s.num_unambiguous_frags = int(fields[7])\n- s.num_uniquely_aligning_frags = int(fields[8])\n- s.neg_mismatches = int(fields[9])\n- s.isize_prob = float(fields[10])\n- return s\n-\n- @staticmethod\n- def parse(line_iter):\n- for line in line_iter:\n- fields = line.strip().split('\\t')\n- yield ChimeraStats.from_list(fields)\n-\n-def calc_isize_prob(isize, isize_dist):\n- # find percentile of observing this insert size in the reads\n- isize_per = isize_dist.percentile_at_isize(isize)\n- # convert to a probability score (0.0-1.0)\n- isize_prob = 1.0 - (2.0 * abs(50.0 - isize_per))/100.0 \n- return isize_prob\n-\n-def group_by_attr(item_iter, attr):\n- mylist = []\n- prev = None\n- for itm in item_iter:\n- cur = getattr(itm, attr)\n- if prev != cur:\n- if len(mylist) > 0:\n- yield prev, mylist\n- mylist = []\n- prev = cur\n- mylist.append(itm)\n- if len(mylist) > 0:\n- yield prev, mylist\n-\n-#def group_by_field(item_iter, colnum):\n-# mylist = []\n-# prev = None\n-# for fields in item_iter:\n-# # parse read stats information\n-# cur = fields[colnum]\n-# if prev != cur:\n-# if len(mylist) > 0:\n-# yield prev, mylist\n-# mylist = []\n-# prev = cur\n-# mylist.append(fields)\n-# if len(mylist) > 0:\n-# yield prev, mylist\n-\n-def parse_sync_chimeras_read_stats(chimera_file, read_stats_file):\n- # group reads by chimera name\n- read_stats_iter = group_by_attr(ChimeraStats.parse(open(read_stats_file)), \n- "..b'- resolved_read_stats_file = os.path.join(tmp_dir, "read_stats.rname_sorted.resolved.txt")\n- f = open(resolved_read_stats_file, "w")\n- for rname,readstats in group_by_attr(ChimeraStats.parse(open(sorted_read_stats_file)), \n- \'qname\'):\n- # build a dictionary of stats -> read/chimeras\n- stats_dict = collections.defaultdict(lambda: [])\n- for s in readstats:\n- # add key/value pairs\n- stats_dict[s.score_tuple].append(s)\n- # sort based on stats\n- sorted_stats_keys = sorted(stats_dict.keys(), reverse=True)\n- # use only the best key\n- for s in stats_dict[sorted_stats_keys[0]]:\n- # output read -> chimera relationships\n- print >>f, \'\\t\'.join(map(str, s.to_list()))\n- f.close()\n- #\n- # re-sort by chimera name\n- #\n- logging.debug("Resorting reads by chimera name")\n- def sort_reads_by_chimera_name(line):\n- return line.strip().split(\'\\t\',CHIMERA_NAME_COL+1)[CHIMERA_NAME_COL]\n- sorted_resolved_read_stats_file = os.path.join(tmp_dir, "read_stats.chimera_name_sorted.resolved.txt")\n- batch_sort(input=resolved_read_stats_file,\n- output=sorted_resolved_read_stats_file,\n- key=sort_reads_by_chimera_name,\n- buffer_size=32000,\n- tempdirs=[tmp_dir])\n- logging.debug("Resorting chimeras by name")\n- def sort_chimeras_by_name(line):\n- return line.strip().split(\'\\t\',Chimera.NAME_FIELD+1)[Chimera.NAME_FIELD]\n- sorted_chimera_file = os.path.join(tmp_dir, "spanning_chimeras.name_sorted.txt")\n- batch_sort(input=input_file,\n- output=sorted_chimera_file,\n- key=sort_chimeras_by_name,\n- buffer_size=32000,\n- tempdirs=[tmp_dir])\n- #\n- # parse and rebuild chimeras based on best reads\n- # \n- logging.debug("Rewriting chimeras with lists of \'best\' reads")\n- f = open(output_file, "w")\n- # need to sync chimeras with stats\n- for c,stats in parse_sync_chimeras_read_stats(sorted_chimera_file, sorted_resolved_read_stats_file):\n- # parse and make lookup set of the resolved alignments\n- good_alignments = set()\n- for s in stats:\n- if s.isize_prob < min_isize_prob:\n- continue\n- good_alignments.add((s.qname, s.tid5p, s.pos5p, s.tid3p, s.pos3p))\n- # replace encompassing frags with resolved alignments\n- new_encomp_frags = []\n- for dpair in c.encomp_frags:\n- # get alignment tuple\n- aln = (dpair[0].qname, dpair[0].tid, dpair[0].pos, dpair[1].tid, dpair[1].pos)\n- if aln in good_alignments:\n- new_encomp_frags.append(dpair)\n- c.encomp_frags = new_encomp_frags\n- c.score = c.get_num_frags()\n- print >>f, \'\\t\'.join(map(str, c.to_list()))\n- f.close()\n- # remove temporary files\n- #os.remove(read_stats_file)\n- #os.remove(sorted_read_stats_file)\n- #os.remove(resolved_read_stats_file)\n- #os.remove(sorted_resolved_read_stats_file)\n- #os.remove(sorted_chimera_file)\n-\n-def main():\n- from optparse import OptionParser\n- logging.basicConfig(level=logging.DEBUG,\n- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n- parser = OptionParser("usage: %prog [options] <in.txt> <out.txt> <isizedist.txt>")\n- parser.add_option("--min-isize-prob", dest="min_isize_prob", \n- type="float", default=0.01)\n- options, args = parser.parse_args()\n- input_file = args[0]\n- output_file = args[1]\n- isize_dist_file = args[2]\n- # read insert size distribution\n- isize_dist = InsertSizeDistribution.from_file(open(isize_dist_file))\n- resolve_discordant_reads(input_file, output_file, isize_dist, \n- options.min_isize_prob,\n- tmp_dir=".")\n-\n-if __name__ == \'__main__\':\n- main()\n\\ No newline at end of file\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/sam2bam.py --- a/chimerascan/pipeline/sam2bam.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,86 +0,0 @@ -''' -Created on Jun 2, 2011 - -@author: mkiyer -''' -import logging - -# local imports -import chimerascan.pysam as pysam -from chimerascan.lib.fix_alignment_ordering import fix_alignment_ordering, fix_sr_alignment_ordering -from chimerascan.lib.sam import soft_pad_read -from chimerascan.lib.seq import FASTQ_QUAL_FORMATS, SANGER_FORMAT, parse_fastq_record - -def sam_to_bam(input_fastq_files, input_sam_file, output_bam_file, - quals, multihits, pe_sr_mode=False, softclip=True, - keep_unmapped=True): - samfh = pysam.Samfile(input_sam_file, "r") - num_unmapped = 0 - num_multihits = 0 - num_frags = 0 - bamfh = pysam.Samfile(output_bam_file, "wb", template=samfh) - # setup fastq parsing - if softclip and (quals != SANGER_FORMAT): - kwargs = {"convert_quals": True, "qual_format": quals} - else: - kwargs = {"convert_quals": False} - fqiters = [parse_fastq_record(open(fq), **kwargs) for fq in input_fastq_files] - - # handle single-read and paired-end - if len(fqiters) == 1: - reorder_func = fix_sr_alignment_ordering(samfh, fqiters[0]) - else: - reorder_func = fix_alignment_ordering(samfh, fqiters, pe_sr_mode) - # iterate through buffer - for bufitems in reorder_func: - num_frags += 1 - for bufitem in bufitems: - for r in bufitem.reads: - # softclip uses the fastq record to replace the sequence - # and quality scores of the read - if softclip: - soft_pad_read(bufitem.fqrec, r) - # keep statistics of unmapped/multimapped reads and - # suppress output if 'keep_unmapped' is False - if r.is_unmapped: - xm_tag = r.opt('XM') - if xm_tag < multihits: - num_unmapped += 1 - if not keep_unmapped: - continue - else: - num_multihits += 1 - bamfh.write(r) - for fqfh in fqiters: - fqfh.close() - bamfh.close() - samfh.close() - logging.debug("Found %d fragments" % (num_frags)) - logging.debug("\t%d unmapped reads" % (num_unmapped)) - logging.debug("\t%d multimapping (>%dX) reads" % - (num_multihits, multihits)) - -if __name__ == '__main__': - from optparse import OptionParser - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog [options] <out.bam> <in.sam> <in1.fq> [<in2.fq>]") - parser.add_option("--multihits", type="int", dest="multihits", default=100) - parser.add_option("--quals", dest="quals", - choices=FASTQ_QUAL_FORMATS, - default=SANGER_FORMAT) - parser.add_option("--pesr", action="store_true", dest="pe_sr_mode", default=False) - parser.add_option("--softclip", action="store_true", dest="softclip", default=False) - parser.add_option("--un", action="store_true", dest="keep_unmapped", default=False) - options, args = parser.parse_args() - output_bam_file = args[0] - input_sam_file = args[1] - input_fastq_files = args[2:] - sam_to_bam(input_fastq_files, - input_sam_file, - output_bam_file, - quals=options.quals, - multihits=options.multihits, - pe_sr_mode=options.pe_sr_mode, - softclip=options.softclip, - keep_unmapped=options.keep_unmapped) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pipeline/write_output.py --- a/chimerascan/pipeline/write_output.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,185 +0,0 @@\n-\'\'\'\n-Created on Jul 1, 2011\n-\n-@author: mkiyer\n-\n-chimerascan: chimeric transcript discovery using RNA-seq\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\'\'\'\n-import logging\n-import os\n-import sys\n-import operator\n-import collections\n-\n-from chimerascan import pysam\n-from chimerascan.lib.chimera import Chimera, get_chimera_type\n-from chimerascan.lib import config\n-from chimerascan.lib.gene_to_genome import build_transcript_genome_map, \\\n- build_transcript_cluster_map, build_genome_tx_trees, \\\n- build_tx_name_gene_map, transcript_to_genome_pos\n-\n-from chimerascan.pipeline.filter_chimeras import get_wildtype_frags\n-\n-\n-def get_chimera_groups(input_file, gene_file):\n- # build a lookup table to get gene clusters from transcript name \n- transcript_cluster_map = build_transcript_cluster_map(open(gene_file))\n- # build a lookup table to get genome coordinates from transcript \n- # coordinates\n- # TODO: can either group by exact breakpoint, or just by\n- # gene cluster\n- # transcript_genome_map = build_transcript_genome_map(open(gene_file))\n- # group chimeras in the same genomic cluster with the same\n- # breakpoint\n- cluster_chimera_dict = collections.defaultdict(lambda: [])\n- for c in Chimera.parse(open(input_file)):\n- # get cluster of overlapping genes\n- cluster5p = transcript_cluster_map[c.tx_name_5p]\n- cluster3p = transcript_cluster_map[c.tx_name_3p]\n- # get genomic positions of breakpoints\n- #coord5p = transcript_to_genome_pos(c.partner5p.tx_name, c.partner5p.end-1, transcript_genome_map)\n- #coord3p = transcript_to_genome_pos(c.partner3p.tx_name, c.partner3p.start, transcript_genome_map)\n- # add to dictionary\n- cluster_chimera_dict[(cluster5p,cluster3p)].append(c)\n- # TODO: use this grouping instead?\n- #cluster_chimera_dict[(cluster5p,cluster3p,coord5p,coord3p)].append(c)\n- for key,chimeras in cluster_chimera_dict.iteritems():\n- yield key,chimeras\n-\n-def get_best_coverage_chimera(chimeras):\n- stats = []\n- for c in chimeras:\n- # TODO: come up with a way to prioritize here (spanning included?)\n- stats.append((c,\n- c.get_num_unique_positions(),\n- c.get_num_frags()))\n- sorted_stats = sorted(stats, key=operator.itemgetter(1,2), reverse=True)\n- return sorted_stats[0][0]\n-\n-def write_output(input_file, bam_file, output_file, index_dir):\n- gene_file = os.path.join(index_dir, config.GENE_FEATURE_FILE)\n- # build a lookup table to get genome coordinates from transcript \n- # coordinates\n- transcript_genome_map = build_transcript_genome_map(open(gene_file)) \n- tx_name_gene_map = build_tx_name_gene_map(gene_file) \n- genome_tx_trees = build_genome_tx_trees(gene_file)\n- # open BAM file for checking wild-type isoform\n- bamfh = pysam.Samfile(bam_file, "rb") \n- # group chimera isoforms together\n- lines = []\n- chimera_clusters = 0\n- for key,chimeras in get_chimera_groups(input_file, gene_file):\n- txs5p = set()\n- txs3p = set()\n- genes5p = set()\n- genes3p = set()\n- names = set()\n- for c in chimeras:\n- txs5p.add("%s:%d-%d" % (c.tx_name_5p, c.tx_start_5p, c.tx_end_5p-1))\n- txs3p.add("%s:%d-%d" % (c.tx_name_3p, c.tx_start_3p, c.tx_end_3p-1))\n'..b' genome_tx_trees)\n- # get genomic positions of chimera\n- chrom5p,strand5p,start5p = transcript_to_genome_pos(c.tx_name_5p, c.tx_start_5p, transcript_genome_map)\n- chrom5p,strand5p,end5p = transcript_to_genome_pos(c.tx_name_5p, c.tx_end_5p-1, transcript_genome_map)\n- if strand5p == 1:\n- start5p,end5p = end5p,start5p\n- chrom3p,strand3p,start3p = transcript_to_genome_pos(c.tx_name_3p, c.tx_start_3p, transcript_genome_map)\n- chrom3p,strand3p,end3p = transcript_to_genome_pos(c.tx_name_3p, c.tx_end_3p-1, transcript_genome_map)\n- if strand3p == 1:\n- start3p,end3p = end3p,start3p\n- # get breakpoint spanning sequences\n- spanning_seqs = set()\n- spanning_fasta_lines = []\n- for dr in c.get_spanning_reads():\n- if dr.seq in spanning_seqs:\n- continue\n- spanning_seqs.add(dr.seq)\n- spanning_fasta_lines.extend([">%s/%d;pos=%d;strand=%s" % \n- (dr.qname, dr.readnum+1, dr.pos, \n- "-" if dr.is_reverse else "+"), \n- dr.seq])\n- # get isoform fraction\n- num_wt_frags_5p, num_wt_frags_3p = get_wildtype_frags(c, bamfh)\n- num_chimeric_frags = c.get_num_frags()\n- frac5p = float(num_chimeric_frags) / (num_chimeric_frags + num_wt_frags_5p)\n- frac3p = float(num_chimeric_frags) / (num_chimeric_frags + num_wt_frags_3p)\n- # setup fields of BEDPE file\n- fields = [chrom5p, start5p, end5p,\n- chrom3p, start3p, end3p,\n- "CLUSTER%d" % (chimera_clusters),\n- c.get_num_frags(),\n- "+" if (strand5p == 0) else "-",\n- "+" if (strand3p == 0) else "-",\n- \',\'.join(txs5p),\n- \',\'.join(txs3p),\n- \',\'.join(genes5p),\n- \',\'.join(genes3p),\n- chimera_type, distance,\n- c.get_num_frags(),\n- c.get_num_spanning_frags(),\n- c.get_num_unique_positions(),\n- frac5p, frac3p,\n- \',\'.join(spanning_fasta_lines),\n- \',\'.join(names)]\n- lines.append(fields)\n- chimera_clusters += 1\n- bamfh.close()\n- logging.debug("Clustered chimeras: %d" % (chimera_clusters))\n- # sort\n- lines = sorted(lines, key=operator.itemgetter(18, 17, 16), reverse=True) \n- f = open(output_file, "w")\n- print >>f, \'\\t\'.join([\'#chrom5p\', \'start5p\', \'end5p\', \n- \'chrom3p\', \'start3p\', \'end3p\',\n- \'chimera_cluster_id\', \'score\', \n- \'strand5p\', \'strand3p\',\n- \'transcript_ids_5p\', \'transcript_ids_3p\',\n- \'genes5p\', \'genes3p\',\n- \'type\', \'distance\',\n- \'total_frags\', \n- \'spanning_frags\',\n- \'unique_alignment_positions\',\n- \'isoform_fraction_5p\',\n- \'isoform_fraction_3p\',\n- \'breakpoint_spanning_reads\',\n- \'chimera_ids\'])\n- for fields in lines:\n- print >>f, \'\\t\'.join(map(str, fields))\n- f.close()\n- return config.JOB_SUCCESS\n-\n-def main():\n- from optparse import OptionParser\n- logging.basicConfig(level=logging.DEBUG,\n- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n- parser = OptionParser("usage: %prog [options] <index_dir> <in.txt> <bam_file> <out.txt>")\n- options, args = parser.parse_args()\n- index_dir = args[0]\n- input_file = args[1]\n- bam_file = args[2]\n- output_file = args[3]\n- return write_output(input_file, bam_file, output_file, index_dir)\n-\n-if __name__ == "__main__":\n- sys.exit(main())\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/COPYING --- a/chimerascan/pysam/COPYING Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,21 +0,0 @@ -The MIT License - -Copyright (c) 2008-2009 Genome Research Ltd. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/Pileup.py --- a/chimerascan/pysam/Pileup.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,59 +0,0 @@ -'''Tools for working with files in the samtools pileup -c format.''' -import collections - -PileupSubstitution = collections.namedtuple( "PileupSubstitution", - " ".join( (\ - "chromosome", - "position", - "reference_base", - "consensus_base", - "consensus_quality", - "snp_quality", - "rms_mapping_quality", - "coverage", - "read_bases", - "base_qualities" ) ) ) - -PileupIndel = collections.namedtuple( "PileupIndel", - " ".join( (\ - "chromosome", - "position", - "reference_base", - "genotype", - "consensus_quality", - "snp_quality", - "rms_mapping_quality", - "coverage", - "first_allelle", - "second_allele", - "reads_first", - "reads_second", - "reads_diff" ) ) ) - -def iterate( infile ): - '''iterate over ``samtools pileup -c`` formatted file. - - *infile* can be any iterator over a lines. - - The function yields named tuples of the type :class:`pysam.Pileup.PileupSubstitution` - or :class:`pysam.Pileup.PileupIndel`. - - .. note:: - The parser converts to 0-based coordinates - ''' - - conv_subst = (str,lambda x: int(x)-1,str,str,int,int,int,int,str,str) - conv_indel = (str,lambda x: int(x)-1,str,str,int,int,int,int,str,str,int,int,int) - - for line in infile: - d = line[:-1].split() - if d[2] == "*": - try: - yield PileupIndel( *[x(y) for x,y in zip(conv_indel,d) ] ) - except TypeError: - raise SamtoolsError( "parsing error in line: `%s`" % line) - else: - try: - yield PileupSubstitution( *[x(y) for x,y in zip(conv_subst,d) ] ) - except TypeError: - raise SamtoolsError( "parsing error in line: `%s`" % line) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/__init__.py --- a/chimerascan/pysam/__init__.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,110 +0,0 @@ -from csamtools import * -from ctabix import * -import csamtools -import ctabix -import Pileup -import sys -import os - -class SamtoolsError( Exception ): - '''exception raised in case of an error incurred in the samtools library.''' - - def __init__(self, value): - self.value = value - def __str__(self): - return repr(self.value) - -class SamtoolsDispatcher(object): - '''samtools dispatcher. - - Emulates the samtools command line as module calls. - - Captures stdout and stderr. - - Raises a :class:`pysam.SamtoolsError` exception in case - samtools exits with an error code other than 0. - - Some command line options are associated with parsers. - For example, the samtools command "pileup -c" creates - a tab-separated table on standard output. In order to - associate parsers with options, an optional list of - parsers can be supplied. The list will be processed - in order checking for the presence of each option. - - If no parser is given or no appropriate parser is found, - the stdout output of samtools commands will be returned. - ''' - dispatch=None - parsers=None - - def __init__(self,dispatch, parsers): - self.dispatch = dispatch - self.parsers = parsers - self.stderr = [] - - def __call__(self,*args, **kwargs): - '''execute the samtools command - ''' - retval, stderr, stdout = csamtools._samtools_dispatch( self.dispatch, args ) - if retval: raise SamtoolsError( "\n".join( stderr ) ) - self.stderr = stderr - # samtools commands do not propagate the return code correctly. - # I have thus added this patch to throw if there is output on stderr. - # Note that there is sometimes output on stderr that is not an error, - # for example: [sam_header_read2] 2 sequences loaded. - # Ignore messages like these - stderr = [x for x in stderr - if not (x.startswith( "[sam_header_read2]" ) or - x.startswith("[bam_index_load]") or - x.startswith("[bam_sort_core]") or \ - x.startswith("[samopen] SAM header is present"))] - if stderr: raise SamtoolsError( "\n".join( stderr ) ) - # call parser for stdout: - if not kwargs.get("raw") and stdout and self.parsers: - for options, parser in self.parsers: - for option in options: - if option not in args: break - else: - return parser(stdout) - - return stdout - - def getMessages( self ): - return self.stderr - - def usage(self): - '''return the samtools usage information for this command''' - retval, stderr, stdout = csamtools._samtools_dispatch( self.dispatch ) - return "".join(stderr) - -# -# samtools command line options to export in python -# -# import is a python reserved word. -SAMTOOLS_DISPATCH = { - "view" : ( "view", None ), - "sort" : ( "sort", None), - "samimport": ( "import", None), - "pileup" : ( "pileup", ( (("-c",), Pileup.iterate ), ), ), - "faidx" : ("faidx", None), - "tview" : ("tview", None), - "index" : ("index", None), - "fixmate" : ("fixmate", None), - "glfview" : ("glfview", None), - "flagstat" : ("flagstat", None), - "calmd" : ("calmd", None), - "merge" : ("merge", None), - "rmdup" : ("rmdup", None) } - -# instantiate samtools commands as python functions -for key, options in SAMTOOLS_DISPATCH.iteritems(): - cmd, parser = options - globals()[key] = SamtoolsDispatcher(cmd, parser) - -# hack to export all the symbols from csamtools -__all__ = csamtools.__all__ + \ - ctabix.__all__ + \ - [ "SamtoolsError", "SamtoolsDispatcher" ] + list(SAMTOOLS_DISPATCH) +\ - ["Pileup",] - -from version import __version__, __samtools_version__ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/csamtools.c --- a/chimerascan/pysam/csamtools.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,24858 +0,0 @@\n-/* Generated by Cython 0.13 on Mon Jan 31 00:58:16 2011 */\n-\n-#define PY_SSIZE_T_CLEAN\n-#include "Python.h"\n-#ifndef Py_PYTHON_H\n- #error Python headers needed to compile C extensions, please install development version of Python.\n-#else\n-\n-#include <stddef.h> /* For offsetof */\n-#ifndef offsetof\n-#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )\n-#endif\n-\n-#if !defined(WIN32) && !defined(MS_WINDOWS)\n- #ifndef __stdcall\n- #define __stdcall\n- #endif\n- #ifndef __cdecl\n- #define __cdecl\n- #endif\n- #ifndef __fastcall\n- #define __fastcall\n- #endif\n-#endif\n-\n-#ifndef DL_IMPORT\n- #define DL_IMPORT(t) t\n-#endif\n-#ifndef DL_EXPORT\n- #define DL_EXPORT(t) t\n-#endif\n-\n-#ifndef PY_LONG_LONG\n- #define PY_LONG_LONG LONG_LONG\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02040000\n- #define METH_COEXIST 0\n- #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type)\n- #define PyDict_Contains(d,o) PySequence_Contains(d,o)\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02050000\n- typedef int Py_ssize_t;\n- #define PY_SSIZE_T_MAX INT_MAX\n- #define PY_SSIZE_T_MIN INT_MIN\n- #define PY_FORMAT_SIZE_T ""\n- #define PyInt_FromSsize_t(z) PyInt_FromLong(z)\n- #define PyInt_AsSsize_t(o) PyInt_AsLong(o)\n- #define PyNumber_Index(o) PyNumber_Int(o)\n- #define PyIndex_Check(o) PyNumber_Check(o)\n- #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02060000\n- #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)\n- #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)\n- #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)\n- #define PyVarObject_HEAD_INIT(type, size) \\\n- PyObject_HEAD_INIT(type) size,\n- #define PyType_Modified(t)\n-\n- typedef struct {\n- void *buf;\n- PyObject *obj;\n- Py_ssize_t len;\n- Py_ssize_t itemsize;\n- int readonly;\n- int ndim;\n- char *format;\n- Py_ssize_t *shape;\n- Py_ssize_t *strides;\n- Py_ssize_t *suboffsets;\n- void *internal;\n- } Py_buffer;\n-\n- #define PyBUF_SIMPLE 0\n- #define PyBUF_WRITABLE 0x0001\n- #define PyBUF_FORMAT 0x0004\n- #define PyBUF_ND 0x0008\n- #define PyBUF_STRIDES (0x0010 | PyBUF_ND)\n- #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)\n- #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)\n- #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)\n- #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)\n-\n-#endif\n-\n-#if PY_MAJOR_VERSION < 3\n- #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"\n-#else\n- #define __Pyx_BUILTIN_MODULE_NAME "builtins"\n-#endif\n-\n-#if PY_MAJOR_VERSION >= 3\n- #define Py_TPFLAGS_CHECKTYPES 0\n- #define Py_TPFLAGS_HAVE_INDEX 0\n-#endif\n-\n-#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)\n- #define Py_TPFLAGS_HAVE_NEWBUFFER 0\n-#endif\n-\n-#if PY_MAJOR_VERSION >= 3\n- #define PyBaseString_Type PyUnicode_Type\n- #define PyStringObject PyUnicodeObject\n- #define PyString_Type PyUnicode_Type\n- #define PyString_Check PyUnicode_Check\n- #define PyString_CheckExact PyUnicode_CheckExact\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02060000\n- #define PyBytesObject PyStringObject\n- #define PyBytes_Type PyString_Type\n- #define PyBytes_Check PyString_Check\n- #define PyBytes_CheckExact PyString_CheckExact\n- #define PyBytes_FromString PyString_FromString\n- #define PyBytes_FromStringAndSize PyString_FromStringAndSize\n- #define PyBytes_FromFormat PyString_FromFormat\n- #define PyBytes_DecodeEscape PyString_DecodeEscape\n- #define PyBytes_AsString PyString_AsString\n- #define PyBytes_AsStringAndSize PyString_AsStringAndSize\n- #define PyBytes_Size PyString_Size\n- #define PyBytes_AS_STRING PyString_AS_STRING\n- #define PyBytes_GET_SIZE PyString_GET_SIZE\n- #define PyBytes_Repr PyString_Repr\n- #define PyBytes_Concat '..b'\n- if (!py_code) goto bad;\n- py_frame = PyFrame_New(\n- PyThreadState_GET(), /*PyThreadState *tstate,*/\n- py_code, /*PyCodeObject *code,*/\n- py_globals, /*PyObject *globals,*/\n- 0 /*PyObject *locals*/\n- );\n- if (!py_frame) goto bad;\n- py_frame->f_lineno = __pyx_lineno;\n- PyTraceBack_Here(py_frame);\n-bad:\n- Py_XDECREF(py_srcfile);\n- Py_XDECREF(py_funcname);\n- Py_XDECREF(py_code);\n- Py_XDECREF(py_frame);\n-}\n-\n-static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {\n- while (t->p) {\n- #if PY_MAJOR_VERSION < 3\n- if (t->is_unicode) {\n- *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);\n- } else if (t->intern) {\n- *t->p = PyString_InternFromString(t->s);\n- } else {\n- *t->p = PyString_FromStringAndSize(t->s, t->n - 1);\n- }\n- #else /* Python 3+ has unicode identifiers */\n- if (t->is_unicode | t->is_str) {\n- if (t->intern) {\n- *t->p = PyUnicode_InternFromString(t->s);\n- } else if (t->encoding) {\n- *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);\n- } else {\n- *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);\n- }\n- } else {\n- *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);\n- }\n- #endif\n- if (!*t->p)\n- return -1;\n- ++t;\n- }\n- return 0;\n-}\n-\n-/* Type Conversion Functions */\n-\n-static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {\n- int is_true = x == Py_True;\n- if (is_true | (x == Py_False) | (x == Py_None)) return is_true;\n- else return PyObject_IsTrue(x);\n-}\n-\n-static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {\n- PyNumberMethods *m;\n- const char *name = NULL;\n- PyObject *res = NULL;\n-#if PY_VERSION_HEX < 0x03000000\n- if (PyInt_Check(x) || PyLong_Check(x))\n-#else\n- if (PyLong_Check(x))\n-#endif\n- return Py_INCREF(x), x;\n- m = Py_TYPE(x)->tp_as_number;\n-#if PY_VERSION_HEX < 0x03000000\n- if (m && m->nb_int) {\n- name = "int";\n- res = PyNumber_Int(x);\n- }\n- else if (m && m->nb_long) {\n- name = "long";\n- res = PyNumber_Long(x);\n- }\n-#else\n- if (m && m->nb_int) {\n- name = "int";\n- res = PyNumber_Long(x);\n- }\n-#endif\n- if (res) {\n-#if PY_VERSION_HEX < 0x03000000\n- if (!PyInt_Check(res) && !PyLong_Check(res)) {\n-#else\n- if (!PyLong_Check(res)) {\n-#endif\n- PyErr_Format(PyExc_TypeError,\n- "__%s__ returned non-%s (type %.200s)",\n- name, name, Py_TYPE(res)->tp_name);\n- Py_DECREF(res);\n- return NULL;\n- }\n- }\n- else if (!PyErr_Occurred()) {\n- PyErr_SetString(PyExc_TypeError,\n- "an integer is required");\n- }\n- return res;\n-}\n-\n-static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {\n- Py_ssize_t ival;\n- PyObject* x = PyNumber_Index(b);\n- if (!x) return -1;\n- ival = PyInt_AsSsize_t(x);\n- Py_DECREF(x);\n- return ival;\n-}\n-\n-static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {\n-#if PY_VERSION_HEX < 0x02050000\n- if (ival <= LONG_MAX)\n- return PyInt_FromLong((long)ival);\n- else {\n- unsigned char *bytes = (unsigned char *) &ival;\n- int one = 1; int little = (int)*(unsigned char*)&one;\n- return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);\n- }\n-#else\n- return PyInt_FromSize_t(ival);\n-#endif\n-}\n-\n-static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {\n- unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);\n- if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {\n- return (size_t)-1;\n- } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {\n- PyErr_SetString(PyExc_OverflowError,\n- "value too large to convert to size_t");\n- return (size_t)-1;\n- }\n- return (size_t)val;\n-}\n-\n-\n-#endif /* Py_PYTHON_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/csamtools.pxd --- a/chimerascan/pysam/csamtools.pxd Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,283 +0,0 @@ - -cdef extern from "string.h": - ctypedef int size_t - void *memcpy(void *dst,void *src,size_t len) - void *memmove(void *dst,void *src,size_t len) - void *memset(void *b,int c,size_t len) - -cdef extern from "stdlib.h": - void free(void *) - void *malloc(size_t) - void *calloc(size_t,size_t) - void *realloc(void *,size_t) - int c_abs "abs" (int) - void qsort(void *base, size_t nmemb, size_t size, - int (*compar)(void *,void *)) - -cdef extern from "stdio.h": - ctypedef struct FILE: - pass - FILE *fopen(char *,char *) - FILE *freopen(char *path, char *mode, FILE *stream) - int fileno(FILE *stream) - int dup2(int oldfd, int newfd) - int fflush(FILE *stream) - - FILE * stderr - FILE * stdout - int fclose(FILE *) - int sscanf(char *str,char *fmt,...) - int printf(char *fmt,...) - int sprintf(char *str,char *fmt,...) - int fprintf(FILE *ifile,char *fmt,...) - char *fgets(char *str,int size,FILE *ifile) - -cdef extern from "ctype.h": - int toupper(int c) - int tolower(int c) - -cdef extern from "unistd.h": - char *ttyname(int fd) - int isatty(int fd) - -cdef extern from "string.h": - int strcmp(char *s1, char *s2) - int strncmp(char *s1,char *s2,size_t len) - char *strcpy(char *dest,char *src) - char *strncpy(char *dest,char *src, size_t len) - char *strdup(char *) - char *strcat(char *,char *) - size_t strlen(char *s) - int memcmp( void * s1, void *s2, size_t len ) - -cdef extern from "Python.h": - long _Py_HashPointer(void*) - -cdef extern from "razf.h": - pass - -cdef extern from "stdint.h": - ctypedef int int64_t - ctypedef int int32_t - ctypedef int uint32_t - ctypedef int uint8_t - ctypedef int uint64_t - - -cdef extern from "bam.h": - - # IF _IOLIB=2, bamFile = BGZF, see bgzf.h - # samtools uses KNETFILE, check how this works - - ctypedef struct tamFile: - pass - - ctypedef struct bamFile: - pass - - ctypedef struct bam1_core_t: - int32_t tid - int32_t pos - uint32_t bin - uint32_t qual - uint32_t l_qname - uint32_t flag - uint32_t n_cigar - int32_t l_qseq - int32_t mtid - int32_t mpos - int32_t isize - - ctypedef struct bam1_t: - bam1_core_t core - int l_aux - int data_len - int m_data - uint8_t *data - - ctypedef struct bam_pileup1_t: - bam1_t *b - int32_t qpos - int indel - int level - uint32_t is_del - uint32_t is_head - uint32_t is_tail - - ctypedef int (*bam_pileup_f)(uint32_t tid, uint32_t pos, int n, bam_pileup1_t *pl, void *data) - - ctypedef int (*bam_fetch_f)(bam1_t *b, void *data) - - ctypedef struct bam_header_t: - int32_t n_targets - char **target_name - uint32_t *target_len - void *hash - void *rg2lib - int l_text - char *text - - ctypedef struct bam_index_t: - pass - - ctypedef struct bam_plbuf_t: - pass - - ctypedef struct bam_iter_t: - pass - - bam1_t * bam_init1() - void bam_destroy1(bam1_t *) - - bamFile razf_dopen(int data_fd, char *mode) - - int64_t bam_seek( bamFile fp, uint64_t voffset, int where) - int64_t bam_tell( bamFile fp ) - - # void bam_init_header_hash(bam_header_t *header) - - ############################################### - # stand-ins for samtools macros - uint32_t * bam1_cigar( bam1_t * b) - char * bam1_qname( bam1_t * b) - uint8_t * bam1_seq( bam1_t * b) - uint8_t * bam1_qual( bam1_t * b) - uint8_t * bam1_aux( bam1_t * b) - - ############################################### - # bam iterator interface - bam_iter_t bam_iter_query( bam_index_t *idx, int tid, int beg, int end) - - int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b) - - void bam_iter_destroy(bam_iter_t iter) - - ############################################### - - bam1_t * bam_dup1( bam1_t *src ) - - bam1_t * bam_copy1(bam1_t *bdst, bam1_t *bsrc) - bam_index_t *bam_index_load(char *f ) - - void bam_index_destroy(bam_index_t *idx) - - int bam_parse_region(bam_header_t *header, char *str, int *ref_id, int *begin, int *end) - - ############################################### - bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data) - - int bam_fetch(bamFile fp, bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func) - - int bam_plbuf_push(bam1_t *b, bam_plbuf_t *buf) - - void bam_plbuf_destroy(bam_plbuf_t *buf) - ######################################## - # pileup iterator interface - ctypedef struct bam_plp_t: - pass - - ctypedef int (*bam_plp_auto_f)(void *data, bam1_t *b) - - bam_plp_t bam_plp_init( bam_plp_auto_f func, void *data) - int bam_plp_push( bam_plp_t iter, bam1_t *b) - bam_pileup1_t *bam_plp_next( bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) - bam_pileup1_t *bam_plp_auto( bam_plp_t iter, int *_tid, int *_pos, int *_n_plp) - void bam_plp_set_mask(bam_plp_t iter, int mask) - void bam_plp_reset(bam_plp_t iter) - void bam_plp_destroy(bam_plp_t iter) - - ################################################## - - int bam_read1(bamFile fp, bam1_t *b) - - int bam_write1( bamFile fp, bam1_t *b) - - bam_header_t *bam_header_init() - - int bam_header_write( bamFile fp, bam_header_t *header) - - bam_header_t *bam_header_read( bamFile fp ) - - void bam_header_destroy(bam_header_t *header) - - bam1_t * bam_dup1( bam1_t *src ) - - bam1_t * bam_copy1(bam1_t *bdst, bam1_t *bsrc) - - uint8_t *bam_aux_get(bam1_t *b, char tag[2]) - - int bam_aux2i(uint8_t *s) - float bam_aux2f(uint8_t *s) - double bam_aux2d(uint8_t *s) - char bam_aux2A( uint8_t *s) - char *bam_aux2Z( uint8_t *s) - - int bam_reg2bin(uint32_t beg, uint32_t end) - - uint32_t bam_calend(bam1_core_t *c, uint32_t *cigar) - -cdef extern from "sam.h": - - ctypedef struct samfile_t_un: - tamFile tamr - bamFile bam - FILE *tamw - - ctypedef struct samfile_t: - int type - samfile_t_un x - bam_header_t *header - - samfile_t *samopen( char *fn, char * mode, void *aux) - - int sampileup( samfile_t *fp, int mask, bam_pileup_f func, void *data) - - void samclose(samfile_t *fp) - - int samread(samfile_t *fp, bam1_t *b) - - int samwrite(samfile_t *fp, bam1_t *b) - -cdef extern from "faidx.h": - - ctypedef struct faidx_t: - pass - - int fai_build(char *fn) - - void fai_destroy(faidx_t *fai) - - faidx_t *fai_load(char *fn) - - char *fai_fetch(faidx_t *fai, char *reg, int *len) - - int faidx_fetch_nseq(faidx_t *fai) - - char *faidx_fetch_seq(faidx_t *fai, char *c_name, - int p_beg_i, int p_end_i, int *len) - -cdef extern from "pysam_util.h": - - int pysam_pileup_next(bam1_t *b, - bam_plbuf_t *buf, - bam_pileup1_t ** plp, - int * tid, - int * pos, - int * n_plp ) - - - int pysam_dispatch(int argc, char *argv[] ) - - # stand-in functions for samtools macros - void pysam_bam_destroy1( bam1_t * b) - - # add *nbytes* into the variable length data of *src* at *pos* - bam1_t * pysam_bam_update( bam1_t * b, - size_t nbytes_old, - size_t nbytes_new, - uint8_t * pos ) - - # translate char to unsigned char - unsigned char pysam_translate_sequence( char s ) - - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/csamtools.pyx --- a/chimerascan/pysam/csamtools.pyx Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,2064 +0,0 @@\n-# cython: embedsignature=True\n-# cython: profile=True\n-# adds doc-strings for sphinx\n-\n-import tempfile, os, sys, types, itertools, struct, ctypes\n-\n-from python_string cimport PyString_FromStringAndSize, PyString_AS_STRING\n-from python_exc cimport PyErr_SetString\n-\n-# defines imported from samtools\n-DEF SEEK_SET = 0\n-DEF SEEK_CUR = 1\n-DEF SEEK_END = 2\n-\n-## These are bits set in the flag.\n-## have to put these definitions here, in csamtools.pxd they got ignored\n-## @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */\n-DEF BAM_FPAIRED =1\n-## @abstract the read is mapped in a proper pair */\n-DEF BAM_FPROPER_PAIR =2\n-## @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */\n-DEF BAM_FUNMAP =4\n-## @abstract the mate is unmapped */\n-DEF BAM_FMUNMAP =8\n-## @abstract the read is mapped to the reverse strand */\n-DEF BAM_FREVERSE =16\n-## @abstract the mate is mapped to the reverse strand */\n-DEF BAM_FMREVERSE =32\n-## @abstract this is read1 */\n-DEF BAM_FREAD1 =64\n-## @abstract this is read2 */\n-DEF BAM_FREAD2 =128\n-## @abstract not primary alignment */\n-DEF BAM_FSECONDARY =256\n-## @abstract QC failure */\n-DEF BAM_FQCFAIL =512\n-## @abstract optical or PCR duplicate */\n-DEF BAM_FDUP =1024\n-\n-DEF BAM_CIGAR_SHIFT=4\n-DEF BAM_CIGAR_MASK=((1 << BAM_CIGAR_SHIFT) - 1)\n-\n-DEF BAM_CMATCH = 0\n-DEF BAM_CINS = 1\n-DEF BAM_CDEL = 2\n-DEF BAM_CREF_SKIP = 3\n-DEF BAM_CSOFT_CLIP = 4\n-DEF BAM_CHARD_CLIP = 5\n-DEF BAM_CPAD = 6\n-\n-#####################################################################\n-#####################################################################\n-#####################################################################\n-## private factory methods\n-#####################################################################\n-cdef class AlignedRead\n-cdef makeAlignedRead( bam1_t * src):\n- \'\'\'enter src into AlignedRead.\'\'\'\n- cdef AlignedRead dest\n- dest = AlignedRead()\n- # destroy dummy delegate created in constructor\n- # to prevent memory leak.\n- bam_destroy1(dest._delegate)\n- dest._delegate = bam_dup1(src)\n- return dest\n-\n-cdef class PileupProxy\n-cdef makePileupProxy( bam_pileup1_t * plp, int tid, int pos, int n ):\n- cdef PileupProxy dest\n- dest = PileupProxy()\n- dest.plp = plp\n- dest.tid = tid\n- dest.pos = pos\n- dest.n = n\n- return dest\n-\n-cdef class PileupRead\n-cdef makePileupRead( bam_pileup1_t * src ):\n- \'\'\'fill a PileupRead object from a bam_pileup1_t * object.\'\'\'\n- cdef PileupRead dest\n- dest = PileupRead()\n- dest._alignment = makeAlignedRead( src.b )\n- dest._qpos = src.qpos\n- dest._indel = src.indel\n- dest._level = src.level\n- dest._is_del = src.is_del\n- dest._is_head = src.is_head\n- dest._is_tail = src.is_tail\n- return dest\n-\n-#####################################################################\n-#####################################################################\n-#####################################################################\n-## Generic callbacks for inserting python callbacks.\n-#####################################################################\n-cdef int fetch_callback( bam1_t *alignment, void *f):\n- \'\'\'callback for bam_fetch. \n- \n- calls function in *f* with a new :class:`AlignedRead` object as parameter.\n- \'\'\'\n- a = makeAlignedRead( alignment )\n- (<object>f)(a)\n-\n-class PileupColumn(object): \n- \'\'\'A pileup column. A pileup column contains \n- all the reads that map to a certain target base.\n-\n- tid \n- chromosome ID as is defined in the header \n- pos \n- the target base coordinate (0-based) \n- n \n- number of reads mapping to this column \n- pileups \n- list of reads (:class:`pysam.PileupRead`) aligned to this column \n- \'\'\' \n- def __str__(self): \n- return "\\t".j'..b'is_tail:\n- def __get__(self):\n- return self._is_tail\n- property level:\n- def __get__(self):\n- return self._level\n-\n-class Outs:\n- \'\'\'http://mail.python.org/pipermail/python-list/2000-June/038406.html\'\'\'\n- def __init__(self, id = 1):\n- self.streams = []\n- self.id = id\n-\n- def setdevice(self, filename):\n- \'\'\'open an existing file, like "/dev/null"\'\'\'\n- fd = os.open(filename, os.O_WRONLY)\n- self.setfd(fd)\n-\n- def setfile(self, filename):\n- \'\'\'open a new file.\'\'\'\n- fd = os.open(filename, os.O_WRONLY|os.O_CREAT, 0660);\n- self.setfd(fd)\n-\n- def setfd(self, fd):\n- ofd = os.dup(self.id) # Save old stream on new unit.\n- self.streams.append(ofd)\n- sys.stdout.flush() # Buffered data goes to old stream.\n- os.dup2(fd, self.id) # Open unit 1 on new stream.\n- os.close(fd) # Close other unit (look out, caller.)\n- \n- def restore(self):\n- \'\'\'restore previous output stream\'\'\'\n- if self.streams:\n- # the following was not sufficient, hence flush both stderr and stdout\n- # os.fsync( self.id )\n- sys.stdout.flush()\n- sys.stderr.flush()\n- os.dup2(self.streams[-1], self.id)\n- os.close(self.streams[-1])\n- del self.streams[-1]\n-\n-def _samtools_dispatch( method, args = () ):\n- \'\'\'call ``method`` in samtools providing arguments in args.\n- \n- .. note:: \n- This method redirects stdout and stderr to capture it \n- from samtools. If for some reason stdout/stderr disappears\n- the reason might be in this method.\n-\n- .. note::\n- The current implementation might only work on linux.\n- \n- .. note:: \n- This method captures stdout and stderr using temporary files, \n- which are then read into memory in their entirety. This method\n- is slow and might cause large memory overhead. \n-\n- See http://bytes.com/topic/c/answers/487231-how-capture-stdout-temporarily\n- on the topic of redirecting stderr/stdout.\n- \'\'\'\n-\n- # note that debugging this module can be a problem\n- # as stdout/stderr will not appear\n-\n- # redirect stderr and stdout to file\n-\n- # open files and redirect into it\n- stderr_h, stderr_f = tempfile.mkstemp()\n- stdout_h, stdout_f = tempfile.mkstemp()\n-\n- # patch for `samtools view`\n- # samtools `view` closes stdout, from which I can not\n- # recover. Thus redirect output to file with -o option.\n- if method == "view":\n- if "-o" in args: raise ValueError("option -o is forbidden in samtools view")\n- args = ( "-o", stdout_f ) + args\n-\n- stdout_save = Outs( sys.stdout.fileno() )\n- stdout_save.setfd( stdout_h )\n- stderr_save = Outs( sys.stderr.fileno() )\n- stderr_save.setfd( stderr_h )\n-\n- # do the function call to samtools\n- cdef char ** cargs\n- cdef int i, n, retval\n-\n- n = len(args)\n- # allocate two more for first (dummy) argument (contains command)\n- cargs = <char**>calloc( n+2, sizeof( char *) )\n- cargs[0] = "samtools"\n- cargs[1] = method\n- for i from 0 <= i < n: cargs[i+2] = args[i]\n- retval = pysam_dispatch(n+2, cargs)\n- free( cargs )\n-\n- # restore stdout/stderr. This will also flush, so\n- # needs to be before reading back the file contents\n- stdout_save.restore()\n- stderr_save.restore()\n-\n- # capture stderr/stdout.\n- out_stderr = open( stderr_f, "r").readlines()\n- out_stdout = open( stdout_f, "r").readlines()\n-\n- # clean up files\n- os.remove( stderr_f )\n- os.remove( stdout_f )\n-\n- return retval, out_stderr, out_stdout\n-\n-__all__ = ["Samfile", \n- "Fastafile",\n- "IteratorRow", \n- "IteratorRowAll", \n- "IteratorColumn", \n- "AlignedRead", \n- "PileupColumn", \n- "PileupProxy", \n- "PileupRead" ]\n-\n- \n-\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/ctabix.c --- a/chimerascan/pysam/ctabix.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
b'@@ -1,12808 +0,0 @@\n-/* Generated by Cython 0.13 on Mon Jan 31 00:58:34 2011 */\n-\n-#define PY_SSIZE_T_CLEAN\n-#include "Python.h"\n-#ifndef Py_PYTHON_H\n- #error Python headers needed to compile C extensions, please install development version of Python.\n-#else\n-\n-#include <stddef.h> /* For offsetof */\n-#ifndef offsetof\n-#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )\n-#endif\n-\n-#if !defined(WIN32) && !defined(MS_WINDOWS)\n- #ifndef __stdcall\n- #define __stdcall\n- #endif\n- #ifndef __cdecl\n- #define __cdecl\n- #endif\n- #ifndef __fastcall\n- #define __fastcall\n- #endif\n-#endif\n-\n-#ifndef DL_IMPORT\n- #define DL_IMPORT(t) t\n-#endif\n-#ifndef DL_EXPORT\n- #define DL_EXPORT(t) t\n-#endif\n-\n-#ifndef PY_LONG_LONG\n- #define PY_LONG_LONG LONG_LONG\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02040000\n- #define METH_COEXIST 0\n- #define PyDict_CheckExact(op) (Py_TYPE(op) == &PyDict_Type)\n- #define PyDict_Contains(d,o) PySequence_Contains(d,o)\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02050000\n- typedef int Py_ssize_t;\n- #define PY_SSIZE_T_MAX INT_MAX\n- #define PY_SSIZE_T_MIN INT_MIN\n- #define PY_FORMAT_SIZE_T ""\n- #define PyInt_FromSsize_t(z) PyInt_FromLong(z)\n- #define PyInt_AsSsize_t(o) PyInt_AsLong(o)\n- #define PyNumber_Index(o) PyNumber_Int(o)\n- #define PyIndex_Check(o) PyNumber_Check(o)\n- #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02060000\n- #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)\n- #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)\n- #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)\n- #define PyVarObject_HEAD_INIT(type, size) \\\n- PyObject_HEAD_INIT(type) size,\n- #define PyType_Modified(t)\n-\n- typedef struct {\n- void *buf;\n- PyObject *obj;\n- Py_ssize_t len;\n- Py_ssize_t itemsize;\n- int readonly;\n- int ndim;\n- char *format;\n- Py_ssize_t *shape;\n- Py_ssize_t *strides;\n- Py_ssize_t *suboffsets;\n- void *internal;\n- } Py_buffer;\n-\n- #define PyBUF_SIMPLE 0\n- #define PyBUF_WRITABLE 0x0001\n- #define PyBUF_FORMAT 0x0004\n- #define PyBUF_ND 0x0008\n- #define PyBUF_STRIDES (0x0010 | PyBUF_ND)\n- #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)\n- #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)\n- #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)\n- #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)\n-\n-#endif\n-\n-#if PY_MAJOR_VERSION < 3\n- #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"\n-#else\n- #define __Pyx_BUILTIN_MODULE_NAME "builtins"\n-#endif\n-\n-#if PY_MAJOR_VERSION >= 3\n- #define Py_TPFLAGS_CHECKTYPES 0\n- #define Py_TPFLAGS_HAVE_INDEX 0\n-#endif\n-\n-#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)\n- #define Py_TPFLAGS_HAVE_NEWBUFFER 0\n-#endif\n-\n-#if PY_MAJOR_VERSION >= 3\n- #define PyBaseString_Type PyUnicode_Type\n- #define PyStringObject PyUnicodeObject\n- #define PyString_Type PyUnicode_Type\n- #define PyString_Check PyUnicode_Check\n- #define PyString_CheckExact PyUnicode_CheckExact\n-#endif\n-\n-#if PY_VERSION_HEX < 0x02060000\n- #define PyBytesObject PyStringObject\n- #define PyBytes_Type PyString_Type\n- #define PyBytes_Check PyString_Check\n- #define PyBytes_CheckExact PyString_CheckExact\n- #define PyBytes_FromString PyString_FromString\n- #define PyBytes_FromStringAndSize PyString_FromStringAndSize\n- #define PyBytes_FromFormat PyString_FromFormat\n- #define PyBytes_DecodeEscape PyString_DecodeEscape\n- #define PyBytes_AsString PyString_AsString\n- #define PyBytes_AsStringAndSize PyString_AsStringAndSize\n- #define PyBytes_Size PyString_Size\n- #define PyBytes_AS_STRING PyString_AS_STRING\n- #define PyBytes_GET_SIZE PyString_GET_SIZE\n- #define PyBytes_Repr PyString_Repr\n- #define PyBytes_Concat '..b'\n- if (!py_code) goto bad;\n- py_frame = PyFrame_New(\n- PyThreadState_GET(), /*PyThreadState *tstate,*/\n- py_code, /*PyCodeObject *code,*/\n- py_globals, /*PyObject *globals,*/\n- 0 /*PyObject *locals*/\n- );\n- if (!py_frame) goto bad;\n- py_frame->f_lineno = __pyx_lineno;\n- PyTraceBack_Here(py_frame);\n-bad:\n- Py_XDECREF(py_srcfile);\n- Py_XDECREF(py_funcname);\n- Py_XDECREF(py_code);\n- Py_XDECREF(py_frame);\n-}\n-\n-static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {\n- while (t->p) {\n- #if PY_MAJOR_VERSION < 3\n- if (t->is_unicode) {\n- *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);\n- } else if (t->intern) {\n- *t->p = PyString_InternFromString(t->s);\n- } else {\n- *t->p = PyString_FromStringAndSize(t->s, t->n - 1);\n- }\n- #else /* Python 3+ has unicode identifiers */\n- if (t->is_unicode | t->is_str) {\n- if (t->intern) {\n- *t->p = PyUnicode_InternFromString(t->s);\n- } else if (t->encoding) {\n- *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);\n- } else {\n- *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);\n- }\n- } else {\n- *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);\n- }\n- #endif\n- if (!*t->p)\n- return -1;\n- ++t;\n- }\n- return 0;\n-}\n-\n-/* Type Conversion Functions */\n-\n-static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {\n- int is_true = x == Py_True;\n- if (is_true | (x == Py_False) | (x == Py_None)) return is_true;\n- else return PyObject_IsTrue(x);\n-}\n-\n-static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {\n- PyNumberMethods *m;\n- const char *name = NULL;\n- PyObject *res = NULL;\n-#if PY_VERSION_HEX < 0x03000000\n- if (PyInt_Check(x) || PyLong_Check(x))\n-#else\n- if (PyLong_Check(x))\n-#endif\n- return Py_INCREF(x), x;\n- m = Py_TYPE(x)->tp_as_number;\n-#if PY_VERSION_HEX < 0x03000000\n- if (m && m->nb_int) {\n- name = "int";\n- res = PyNumber_Int(x);\n- }\n- else if (m && m->nb_long) {\n- name = "long";\n- res = PyNumber_Long(x);\n- }\n-#else\n- if (m && m->nb_int) {\n- name = "int";\n- res = PyNumber_Long(x);\n- }\n-#endif\n- if (res) {\n-#if PY_VERSION_HEX < 0x03000000\n- if (!PyInt_Check(res) && !PyLong_Check(res)) {\n-#else\n- if (!PyLong_Check(res)) {\n-#endif\n- PyErr_Format(PyExc_TypeError,\n- "__%s__ returned non-%s (type %.200s)",\n- name, name, Py_TYPE(res)->tp_name);\n- Py_DECREF(res);\n- return NULL;\n- }\n- }\n- else if (!PyErr_Occurred()) {\n- PyErr_SetString(PyExc_TypeError,\n- "an integer is required");\n- }\n- return res;\n-}\n-\n-static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {\n- Py_ssize_t ival;\n- PyObject* x = PyNumber_Index(b);\n- if (!x) return -1;\n- ival = PyInt_AsSsize_t(x);\n- Py_DECREF(x);\n- return ival;\n-}\n-\n-static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {\n-#if PY_VERSION_HEX < 0x02050000\n- if (ival <= LONG_MAX)\n- return PyInt_FromLong((long)ival);\n- else {\n- unsigned char *bytes = (unsigned char *) &ival;\n- int one = 1; int little = (int)*(unsigned char*)&one;\n- return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);\n- }\n-#else\n- return PyInt_FromSize_t(ival);\n-#endif\n-}\n-\n-static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {\n- unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);\n- if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {\n- return (size_t)-1;\n- } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {\n- PyErr_SetString(PyExc_OverflowError,\n- "value too large to convert to size_t");\n- return (size_t)-1;\n- }\n- return (size_t)val;\n-}\n-\n-\n-#endif /* Py_PYTHON_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/ctabix.pxd --- a/chimerascan/pysam/ctabix.pxd Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,171 +0,0 @@ - -cdef extern from "string.h": - ctypedef int size_t - void *memcpy(void *dst,void *src,size_t len) - void *memmove(void *dst,void *src,size_t len) - void *memset(void *b,int c,size_t len) - char *strtok_r(char *str, char *delim, char **saveptr) - char *strncpy(char *dest, char *src, size_t n) - void *memchr(void *s, int c, size_t n) - -cdef extern from "stdlib.h": - void free(void *) - void *malloc(size_t) - void *calloc(size_t,size_t) - void *realloc(void *,size_t) - void qsort(void *base, size_t nmemb, size_t size, - int (*compar)(void *,void *)) - int c_abs "abs" (int) - int atoi( char *nptr) - long atol( char *nptr) - double atof( char *nptr) - -cdef extern from "stdio.h": - ctypedef struct FILE: - pass - FILE *fopen(char *,char *) - FILE *freopen(char *path, char *mode, FILE *stream) - int fileno(FILE *stream) - int dup2(int oldfd, int newfd) - int fflush(FILE *stream) - - FILE * stderr - FILE * stdout - int fclose(FILE *) - int sscanf(char *str,char *fmt,...) - int printf(char *str,char *fmt,...) - int sprintf(char *str,char *fmt,...) - int fprintf(FILE *ifile,char *fmt,...) - char *fgets(char *str,int size,FILE *ifile) - -cdef extern from "ctype.h": - int toupper(int c) - int tolower(int c) - -cdef extern from "sys/types.h": - pass - -cdef extern from "sys/stat.h": - pass - -cdef extern from "fcntl.h": - int open(char *pathname, int flags) - -cdef extern from "unistd.h": - ctypedef int ssize_t - char *ttyname(int fd) - int isatty(int fd) - ssize_t read(int fd, void *buf, size_t count) - -cdef extern from "string.h": - int strcmp(char *s1, char *s2) - int strncmp(char *s1,char *s2,size_t len) - char *strcpy(char *dest,char *src) - char *strncpy(char *dest,char *src, size_t len) - char *strdup(char *) - char *strcat(char *,char *) - size_t strlen(char *s) - int memcmp( void * s1, void *s2, size_t len ) - -cdef extern from "stdint.h": - ctypedef int int64_t - ctypedef int int32_t - ctypedef int uint32_t - ctypedef int uint8_t - ctypedef int uint64_t - -cdef extern from "Python.h": - ctypedef struct FILE - FILE* PyFile_AsFile(object) - char *fgets(char *str, int size, FILE *ifile) - int feof(FILE *stream) - size_t strlen(char *s) - size_t getline(char **lineptr, size_t *n, FILE *stream) - char *strstr(char *, char *) - char *strchr(char *string, int c) - int fileno(FILE *stream) - -cdef extern from "bgzf.h": - - ctypedef struct BGZF: - pass - - int64_t bgzf_seek(BGZF* fp, int64_t pos, int where) - - BGZF * bgzf_open(char * path, char * mode) - - int bgzf_write(BGZF * fp, void* data, int length) - - int bgzf_close(BGZF* fp) - -# tabix support -cdef extern from "tabix.h": - - ctypedef struct ti_index_t: - pass - - ctypedef struct tabix_t: - BGZF *fp - ti_index_t *idx - char *fn - char *fnidx - - ctypedef struct ti_iter_t: - pass - - ctypedef struct ti_conf_t: - int32_t preset - int32_t sc, bc, ec - int32_t meta_char, line_skip - - tabix_t *ti_open(char *fn, char *fnidx) - - int ti_lazy_index_load(tabix_t *t) - - void ti_close(tabix_t *t) - - ti_iter_t ti_query(tabix_t *t, char *name, int beg, int end) - ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end) - ti_iter_t ti_querys(tabix_t *t, char *reg) - char * ti_read(tabix_t *t, ti_iter_t iter, int *len) - - # Get the list of sequence names. Each "char*" pointer points to a - # internal member of the index, so DO NOT modify the returned - # pointer; otherwise the index will be corrupted. The returned - # pointer should be freed by a single free() call by the routine - # calling this function. The number of sequences is returned at *n - char **ti_seqname(ti_index_t *idx, int *n) - - - # Destroy the iterator - void ti_iter_destroy(ti_iter_t iter) - - # Build the index for file <fn>. File <fn>.tbi will be generated - # and overwrite the file of the same name. Return -1 on failure. */ - int ti_index_build(char *fn, ti_conf_t *conf) - - #/* Load the index from file <fn>.tbi. If <fn> is a URL and the index - # * file is not in the working directory, <fn>.tbi will be - # * downloaded. Return NULL on failure. */ - ti_index_t *ti_index_load( char *fn) - - ti_index_t *ti_index_load_local(char *fnidx) - - #/* Destroy the index */ - void ti_index_destroy(ti_index_t *idx) - - #/* Parse a region like: chr2, chr2:100, chr2:100-200. Return -1 on failure. */ - int ti_parse_region( ti_index_t *idx, char *str, int *tid, int *begin, int *end) - - int ti_get_tid( ti_index_t *idx, char *name) - - # /* Get the iterator pointing to the first record at the current file - # * position. If the file is just openned, the iterator points to the - # * first record in the file. */ - ti_iter_t ti_iter_first() - - # /* Get the iterator pointing to the first record in region tid:beg-end */ - ti_iter_t ti_iter_query( ti_index_t *idx, int tid, int beg, int end) - - # /* Get the data line pointed by the iterator and iterate to the next record. */ - # char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/ctabix.pyx --- a/chimerascan/pysam/ctabix.pyx Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,881 +0,0 @@\n-# cython: embedsignature=True\n-# adds doc-strings for sphinx\n-\n-import tempfile, os, sys, types, itertools, struct, ctypes\n-\n-cdef class Tabixfile:\n- \'\'\'*(filename, mode=\'r\')*\n-\n- opens a :term:`tabix file` for reading. A missing\n- index (*filename* + ".tbi") will raise an exception.\n- \'\'\'\n-\n- cdef char * filename\n-\n- # pointer to tabixfile\n- cdef tabix_t * tabixfile\n-\n- def __cinit__(self, *args, **kwargs ):\n- self.tabixfile = NULL\n- self._open( *args, **kwargs )\n-\n- def _isOpen( self ):\n- \'\'\'return true if samfile has been opened.\'\'\'\n- return self.tabixfile != NULL\n-\n- def _open( self, \n- char * filename, \n- mode =\'r\',\n- ):\n- \'\'\'open a :term:`tabix file` for reading.\n- \'\'\'\n-\n- assert mode in ( "r",), "invalid file opening mode `%s`" % mode\n-\n- # close a previously opened file\n- if self.tabixfile != NULL: self.close()\n- self.tabixfile = NULL\n-\n- self.filename = filename\n- filename_index = filename + ".tbi"\n-\n- if mode[0] == \'w\':\n- # open file for writing\n- pass\n-\n- elif mode[0] == "r":\n- # open file for reading\n- if not os.path.exists( self.filename ):\n- raise IOError( "file `%s` not found" % self.filename)\n-\n- if not os.path.exists( filename_index ):\n- raise IOError( "index `%s` not found" % filename_index)\n-\n- # open file and load index\n- self.tabixfile = ti_open( self.filename, filename_index )\n-\n- if self.tabixfile == NULL:\n- raise IOError("could not open file `%s`" % filename )\n-\n- def _parseRegion( self, \n- reference = None, \n- start = None, \n- end = None, \n- region = None ):\n- \'\'\'parse region information.\n-\n- raise ValueError for for invalid regions.\n-\n- returns a tuple of region, tid, start and end. Region\n- is a valid samtools :term:`region` or None if the region\n- extends over the whole file.\n-\n- Note that regions are 1-based, while start,end are python coordinates.\n- \'\'\'\n- ti_lazy_index_load( self.tabixfile )\n-\n- cdef int rtid\n- cdef int rstart\n- cdef int rend\n- cdef int max_pos\n- max_pos = 2 << 29\n-\n- rtid = rstart = rend = 0\n-\n- # translate to a region\n- if reference:\n- if start != None and end != None:\n- region = "%s:%i-%i" % (reference, start+1, end)\n- elif start == None and end != None:\n- region = "%s:%i-%i" % (reference, 1, end)\n- elif end == None and start != None:\n- region = "%s:%i-%i" % (reference, start+1, max_pos-1)\n- else:\n- region = reference\n-\n- if region:\n- ti_parse_region( self.tabixfile.idx, region, &rtid, &rstart, &rend) \n- if rtid < 0: raise ValueError( "invalid region `%s`" % region )\n- if rstart > rend: raise ValueError( \'invalid region: start (%i) > end (%i)\' % (rstart, rend) )\n- if not 0 <= rstart < max_pos: raise ValueError( \'start out of range (%i)\' % rstart )\n- if not 0 <= rend < max_pos: raise ValueError( \'end out of range (%i)\' % rend )\n-\n- return region, rtid, rstart, rend\n-\n- def fetch( self, \n- reference = None,\n- start = None, \n- end = None, \n- region = None,\n- parser = None ):\n- \'\'\'\n- \n- fetch one or more rows in a :term:`region` using 0-based indexing. The region is specified by\n- :term:`reference`, *start* and *end*. Alternatively, a samtools :term:`region` string can be supplied.\n-\n- Without *reference* or *region* all entries will be fetched. \n- \n- If only *reference* is s'..b'E = 64 * 1024\n-\n- fp = bgzf_open( filename_out, "w")\n- if fp == NULL:\n- raise IOError( "could not open \'%s\' for writing" )\n-\n- fd_src = open(filename_in, O_RDONLY)\n- if fd_src == 0:\n- raise IOError( "could not open \'%s\' for reading" )\n-\n- buffer = malloc(WINDOW_SIZE)\n-\n- while c > 0:\n- c = read(fd_src, buffer, WINDOW_SIZE)\n- r = bgzf_write(fp, buffer, c)\n- if r < 0:\n- free( buffer )\n- raise OSError("writing failed")\n- \n- free( buffer )\n- r = bgzf_close(fp)\n- if r < 0: raise OSError("writing failed")\n-\n-def tabix_index( filename, \n- force = False,\n- seq_col = None, \n- start_col = None, \n- end_col = None,\n- preset = None,\n- meta_char = "#",\n- zerobased = False,\n- ):\n- \'\'\'\n- index tab-separated *filename* using tabix.\n-\n- An existing index will not be overwritten unless\n- *force* is set.\n-\n- The index will be built from coordinates\n- in columns *seq_col*, *start_col* and *end_col*.\n-\n- The contents of *filename* have to be sorted by \n- contig and position - the method does not check\n- if the file is sorted.\n-\n- Column indices are 0-based. Coordinates in the file\n- are assumed to be 1-based.\n-\n- If *preset* is provided, the column coordinates\n- are taken from a preset. Valid values for preset\n- are "gff", "bed", "sam", "vcf", psltbl", "pileup".\n- \n- Lines beginning with *meta_char* and the first\n- *line_skip* lines will be skipped.\n- \n- If *filename* does not end in ".gz", it will be automatically\n- compressed. The original file will be removed and only the \n- compressed file will be retained. \n-\n- If *filename* ends in *gz*, the file is assumed to be already\n- compressed with bgzf.\n-\n- returns the filename of the compressed data\n- \'\'\'\n- \n- if not os.path.exists(filename): raise IOError("No such file \'%s\'" % filename)\n-\n- if not filename.endswith(".gz"): \n- \n- tabix_compress( filename, filename + ".gz", force = force )\n- os.unlink( filename )\n- filename += ".gz"\n-\n- if not force and os.path.exists(filename + ".tbi" ):\n- raise IOError( "Filename \'%s.tbi\' already exists, use *force* to overwrite" )\n-\n- # columns (1-based)\n- # preset-code, contig, start, end, metachar for commends, lines to ignore at beginning\n- # 0 is a missing column\n- preset2conf = {\n- \'gff\' : ( 0, 1, 4, 5, ord(\'#\'), 0 ),\n- \'bed\' : ( 0x10000, 1, 2, 3, ord(\'#\'), 0 ),\n- \'psltbl\' : ( 0x10000, 15, 17, 18, ord(\'#\'), 0 ),\n- \'sam\' : ( 1, 3, 4, 0, ord(\'#\'), 0 ),\n- \'vcf\' : ( 2, 1, 2, 0, ord(\'#\'), 0 ),\n- \'pileup\': (3, 1, 2, 0, ord(\'#\'), 0 ),\n- }\n-\n- if preset:\n- try:\n- conf_data = preset2conf[preset]\n- except KeyError:\n- raise KeyError( "unknown preset \'%s\', valid presets are \'%s\'" % (preset, ",".join(preset2conf.keys() )))\n- else:\n- if end_col == None: end_col = -1\n- preset = 0\n-\n- # note that tabix internally works with 0-based coordinates and open/closed intervals.\n- # When using a preset, conversion is automatically taken care of.\n- # Otherwise, the coordinates are assumed to be 1-based closed intervals and \n- # -1 is subtracted from the start coordinate. To avoid doing this, set\n- # the TI_FLAG_UCSC=0x10000 flag:\n- if zerobased: preset = preset | 0x10000\n-\n- conf_data = (preset, seq_col+1, start_col+1, end_col+1, ord(meta_char), 0)\n- \n- cdef ti_conf_t conf\n- conf.preset, conf.sc, conf.bc, conf.ec, conf.meta_char, conf.line_skip = conf_data\n-\n- ti_index_build( filename, &conf)\n- \n- return filename\n- \n-__all__ = ["tabix_index", \n- "tabix_compress",\n- "Tabixfile", \n- "asTuple",\n- "asGTF",\n- ]\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/namedtuple.py --- a/chimerascan/pysam/namedtuple.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,117 +0,0 @@ -from operator import itemgetter as _itemgetter -from keyword import iskeyword as _iskeyword -import sys as _sys - -def namedtuple(typename, field_names, verbose=False, rename=False): - """Returns a new subclass of tuple with named fields. - - >>> Point = namedtuple('Point', 'x y') - >>> Point.__doc__ # docstring for the new class - 'Point(x, y)' - >>> p = Point(11, y=22) # instantiate with positional args or keywords - >>> p[0] + p[1] # indexable like a plain tuple - 33 - >>> x, y = p # unpack like a regular tuple - >>> x, y - (11, 22) - >>> p.x + p.y # fields also accessable by name - 33 - >>> d = p._asdict() # convert to a dictionary - >>> d['x'] - 11 - >>> Point(**d) # convert from a dictionary - Point(x=11, y=22) - >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields - Point(x=100, y=22) - - """ - - # Parse and validate the field names. Validation serves two purposes, - # generating informative error messages and preventing template injection attacks. - if isinstance(field_names, basestring): - field_names = field_names.replace(',', ' ').split() # names separated by whitespace and/or commas - field_names = tuple(map(str, field_names)) - if rename: - names = list(field_names) - seen = set() - for i, name in enumerate(names): - if (not min(c.isalnum() or c=='_' for c in name) or _iskeyword(name) - or not name or name[0].isdigit() or name.startswith('_') - or name in seen): - names[i] = '_%d' % i - seen.add(name) - field_names = tuple(names) - for name in (typename,) + field_names: - if not min(c.isalnum() or c=='_' for c in name): - raise ValueError('Type names and field names can only contain alphanumeric characters and underscores: %r' % name) - if _iskeyword(name): - raise ValueError('Type names and field names cannot be a keyword: %r' % name) - if name[0].isdigit(): - raise ValueError('Type names and field names cannot start with a number: %r' % name) - seen_names = set() - for name in field_names: - if name.startswith('_') and not rename: - raise ValueError('Field names cannot start with an underscore: %r' % name) - if name in seen_names: - raise ValueError('Encountered duplicate field name: %r' % name) - seen_names.add(name) - - # Create and fill-in the class template - numfields = len(field_names) - argtxt = repr(field_names).replace("'", "")[1:-1] # tuple repr without parens or quotes - reprtxt = ', '.join('%s=%%r' % name for name in field_names) - template = '''class %(typename)s(tuple): - '%(typename)s(%(argtxt)s)' \n - __slots__ = () \n - _fields = %(field_names)r \n - def __new__(_cls, %(argtxt)s): - return _tuple.__new__(_cls, (%(argtxt)s)) \n - @classmethod - def _make(cls, iterable, new=tuple.__new__, len=len): - 'Make a new %(typename)s object from a sequence or iterable' - result = new(cls, iterable) - if len(result) != %(numfields)d: - raise TypeError('Expected %(numfields)d arguments, got %%d' %% len(result)) - return result \n - def __repr__(self): - return '%(typename)s(%(reprtxt)s)' %% self \n - def _asdict(self): - 'Return a new dict which maps field names to their values' - return dict(zip(self._fields, self)) \n - def _replace(_self, **kwds): - 'Return a new %(typename)s object replacing specified fields with new values' - result = _self._make(map(kwds.pop, %(field_names)r, _self)) - if kwds: - raise ValueError('Got unexpected field names: %%r' %% kwds.keys()) - return result \n - def __getnewargs__(self): - return tuple(self) \n\n''' % locals() - for i, name in enumerate(field_names): - template += ' %s = _property(_itemgetter(%d))\n' % (name, i) - if verbose: - print template - - # Execute the template string in a temporary namespace - namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename, - _property=property, _tuple=tuple) - try: - exec template in namespace - except SyntaxError, e: - raise SyntaxError(e.message + ':\n' + template) - result = namespace[typename] - - # For pickling to work, the __module__ variable needs to be set to the frame - # where the named tuple is created. Bypass this step in enviroments where - # sys._getframe is not defined (Jython for example) or sys._getframe is not - # defined for arguments greater than 0 (IronPython). - try: - result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__') - except (AttributeError, ValueError): - pass - - return result - - - - - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/pysam_util.c --- a/chimerascan/pysam/pysam_util.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,290 +0,0 @@\n-#include <ctype.h>\n-#include <assert.h>\n-#include "bam.h"\n-#include "khash.h"\n-#include "ksort.h"\n-#include "bam_endian.h"\n-#include "knetfile.h"\n-#include "pysam_util.h"\n-\n-// #######################################################\n-// utility routines to avoid using callbacks in bam_fetch\n-// taken from bam_index.c\n-// The order of the following declarations is important.\n-// #######################################################\n-\n-typedef struct\n-{\n- uint64_t u, v;\n-} pair64_t;\n-\n-#define pair64_lt(a,b) ((a).u < (b).u)\n-\n-typedef struct {\n-\tuint32_t m, n;\n-\tpair64_t *list;\n-} bam_binlist_t;\n-\n-typedef struct {\n-\tint32_t n, m;\n-\tuint64_t *offset;\n-} bam_lidx_t;\n-\n-KSORT_INIT(my_off, pair64_t, pair64_lt);\n-KHASH_MAP_INIT_INT(my_i, bam_binlist_t);\n-\n-struct __bam_index_t\n-{\n- int32_t n;\n- khash_t(my_i) **index;\n- bam_lidx_t *index2;\n-};\n-\n-typedef struct __linkbuf_t {\n-\tbam1_t b;\n-\tuint32_t beg, end;\n-\tstruct __linkbuf_t *next;\n-} lbnode_t;\n-\n-typedef struct {\n-\tint cnt, n, max;\n-\tlbnode_t **buf;\n-} mempool_t;\n-\n-struct __bam_plbuf_t {\n-\tmempool_t *mp;\n-\tlbnode_t *head, *tail, *dummy;\n-\tbam_pileup_f func;\n-\tvoid *func_data;\n-\tint32_t tid, pos, max_tid, max_pos;\n-\tint max_pu, is_eof;\n-\tbam_pileup1_t *pu;\n-\tint flag_mask;\n-};\n-\n-static mempool_t *mp_init()\n-{\n-\tmempool_t *mp;\n-\tmp = (mempool_t*)calloc(1, sizeof(mempool_t));\n-\treturn mp;\n-}\n-static void mp_destroy(mempool_t *mp)\n-{\n-\tint k;\n-\tfor (k = 0; k < mp->n; ++k) {\n-\t\tfree(mp->buf[k]->b.data);\n-\t\tfree(mp->buf[k]);\n-\t}\n-\tfree(mp->buf);\n-\tfree(mp);\n-}\n-static inline lbnode_t *mp_alloc(mempool_t *mp)\n-{\n-\t++mp->cnt;\n-\tif (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t));\n-\telse return mp->buf[--mp->n];\n-}\n-static inline void mp_free(mempool_t *mp, lbnode_t *p)\n-{\n-\t--mp->cnt; p->next = 0; // clear lbnode_t::next here\n-\tif (mp->n == mp->max) {\n-\t\tmp->max = mp->max? mp->max<<1 : 256;\n-\t\tmp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max);\n-\t}\n-\tmp->buf[mp->n++] = p;\n-}\n-\n-static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)\n-{\n-\tunsigned k;\n-\tbam1_t *b = p->b;\n-\tbam1_core_t *c = &b->core;\n-\tuint32_t x = c->pos, y = 0;\n-\tint ret = 1, is_restart = 1;\n-\n-\tif (c->flag&BAM_FUNMAP) return 0; // unmapped read\n-\tassert(x <= pos); // otherwise a bug\n-\tp->qpos = -1; p->indel = 0; p->is_del = p->is_head = p->is_tail = 0;\n-\tfor (k = 0; k < c->n_cigar; ++k) {\n-\t\tint op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation\n-\t\tint l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length\n-\t\tif (op == BAM_CMATCH) { // NOTE: this assumes the first and the last operation MUST BE a match or a clip\n-\t\t\tif (x + l > pos) { // overlap with pos\n-\t\t\t\tp->indel = p->is_del = 0;\n-\t\t\t\tp->qpos = y + (pos - x);\n-\t\t\t\tif (x == pos && is_restart) p->is_head = 1;\n-\t\t\t\tif (x + l - 1 == pos) { // come to the end of a match\n-\t\t\t\t\tif (k < c->n_cigar - 1) { // there are additional operation(s)\n-\t\t\t\t\t\tuint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR\n-\t\t\t\t\t\tint op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation\n-\t\t\t\t\t\tif (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del\n-\t\t\t\t\t\telse if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins\n-\t\t\t\t\t\tif (op_next == BAM_CSOFT_CLIP || op_next == BAM_CREF_SKIP || op_next == BAM_CHARD_CLIP)\n-\t\t\t\t\t\t\tp->is_tail = 1; // tail\n-\t\t\t\t\t} else p->is_tail = 1; // this is the last operation; set tail\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tx += l; y += l;\n-\t\t} else if (op == BAM_CDEL) { // then set ->is_del\n-\t\t\tif (x + l > pos) {\n-\t\t\t\tp->indel = 0; p->is_del = 1;\n-\t\t\t\tp->qpos = y + (pos - x);\n-\t\t\t}\n-\t\t\tx += l;\n-\t\t} else if (op == BAM_CREF_SKIP) x += l;\n-\t\telse if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;\n-\t\tis_restart = (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP);\n-\t\tif (x > pos) {\n-\t\t\tif (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all\n-\t\t\tbreak;\n-\t\t}\n-\t}\n-\tassert(x > pos); // otherwise a bug\n-\treturn ret;\n-\n-}\n-// the following code has been taken from bam_plbuf_'..b'plp);\n- if (plp == NULL) return 0;\n- return 1;\n-}\n-\n-// pysam dispatch function to emulate the samtools\n-// command line within python.\n-// taken from the main function in bamtk.c\n-// added code to reset getopt\n-extern int main_samview(int argc, char *argv[]);\n-extern int main_import(int argc, char *argv[]);\n-extern int bam_pileup(int argc, char *argv[]);\n-extern int bam_merge(int argc, char *argv[]);\n-extern int bam_sort(int argc, char *argv[]);\n-extern int bam_index(int argc, char *argv[]);\n-extern int faidx_main(int argc, char *argv[]);\n-extern int bam_mating(int argc, char *argv[]);\n-extern int bam_rmdup(int argc, char *argv[]);\n-extern int glf3_view_main(int argc, char *argv[]);\n-extern int bam_flagstat(int argc, char *argv[]);\n-extern int bam_fillmd(int argc, char *argv[]);\n-\n-int pysam_dispatch(int argc, char *argv[] )\n-{\n-\n-#ifdef _WIN32\n- setmode(fileno(stdout), O_BINARY);\n- setmode(fileno(stdin), O_BINARY);\n-#ifdef _USE_KNETFILE\n- knet_win32_init();\n-#endif\n-#endif\n-\n- extern int optind;\n- \n- // reset getop\n- optind = 1;\n-\n- if (argc < 2) return 1;\n-\n- if (strcmp(argv[1], "view") == 0) return main_samview(argc-1, argv+1);\n- else if (strcmp(argv[1], "import") == 0) return main_import(argc-1, argv+1);\n- else if (strcmp(argv[1], "pileup") == 0) return bam_pileup(argc-1, argv+1);\n- else if (strcmp(argv[1], "merge") == 0) return bam_merge(argc-1, argv+1);\n- else if (strcmp(argv[1], "sort") == 0) return bam_sort(argc-1, argv+1);\n- else if (strcmp(argv[1], "index") == 0) return bam_index(argc-1, argv+1);\n- else if (strcmp(argv[1], "faidx") == 0) return faidx_main(argc-1, argv+1);\n- else if (strcmp(argv[1], "fixmate") == 0) return bam_mating(argc-1, argv+1);\n- else if (strcmp(argv[1], "rmdup") == 0) return bam_rmdup(argc-1, argv+1);\n- else if (strcmp(argv[1], "glfview") == 0) return glf3_view_main(argc-1, argv+1);\n- else if (strcmp(argv[1], "flagstat") == 0) return bam_flagstat(argc-1, argv+1);\n- else if (strcmp(argv[1], "calmd") == 0) return bam_fillmd(argc-1, argv+1);\n- else if (strcmp(argv[1], "fillmd") == 0) return bam_fillmd(argc-1, argv+1);\n-\n-#if _CURSES_LIB != 0\n- else if (strcmp(argv[1], "tview") == 0) return bam_tview_main(argc-1, argv+1);\n-#endif\n- else \n- {\n- fprintf(stderr, "[main] unrecognized command \'%s\'\\n", argv[1]);\n- return 1;\n- }\n- return 0;\n-}\n-\n-// taken from samtools/bam_import.c\n-static inline uint8_t *alloc_data(bam1_t *b, size_t size)\n-{\n- if (b->m_data < size)\n- {\n- b->m_data = size;\n- kroundup32(b->m_data);\n- b->data = (uint8_t*)realloc(b->data, b->m_data);\n- }\n- return b->data;\n-}\n-\n-// update the variable length data within a bam1_t entry.\n-// Adds *nbytes_new* - *nbytes_old* into the variable length data of *src* at *pos*.\n-// Data within the bam1_t entry is moved so that it is\n-// consistent with the data field lengths.\n-bam1_t * pysam_bam_update( bam1_t * b,\n-\t\t\t const size_t nbytes_old,\n-\t\t\t const size_t nbytes_new, \n-\t\t\t uint8_t * pos )\n-{\n- int d = nbytes_new-nbytes_old;\n-\n- // no change\n- if (d == 0) return b;\n-\n- int new_size = d + b->data_len;\n- size_t offset = pos - b->data;\n-\n- //printf("d=%i, old=%i, new=%i, old_size=%i, new_size=%i\\n",\n- // d, nbytes_old, nbytes_new, b->data_len, new_size);\n- \n- // increase memory if required\n- if (d > 0)\n- {\n- alloc_data( b, new_size );\n- pos = b->data + offset;\n- }\n- \n- if (b->data_len != 0)\n- {\n- if (offset < 0 || offset > b->data_len)\n-\tfprintf(stderr, "[pysam_bam_insert] illegal offset: \'%i\'\\n", (int)offset);\n- }\n- \n- // printf("dest=%p, src=%p, n=%i\\n", pos+nbytes_new, pos + nbytes_old, b->data_len - (offset+nbytes_old));\n- memmove( pos + nbytes_new,\n-\t pos + nbytes_old,\n-\t b->data_len - (offset + nbytes_old));\n- \n- b->data_len = new_size;\n- \n- return b;\n-}\n-\n-// translate a nucleotide character to binary code\n-unsigned char pysam_translate_sequence( const unsigned char s )\n-{\n- return bam_nt16_table[s];\n-}\n-\n-\n-\n-\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/pysam_util.h --- a/chimerascan/pysam/pysam_util.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,42 +0,0 @@ -#ifndef PYSAM_UTIL_H -#define PYSAM_UTIL_H - -////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////// -// various helper functions -// -// fill pileup buffer for next position. - -int pysam_pileup_next(const bam1_t *b, - bam_plbuf_t *buf, - bam_pileup1_t ** plp, - int * tid, - int * pos, - int * n_plp); - -int pysam_dispatch(int argc, char *argv[] ); - -/*! - @abstract Update the variable length data within a bam1_t entry - - Old data is deleted and the data within b are re-arranged to - make place for new data. - - @discussion Returns b - - @param b bam1_t data - @param nbytes_old size of old data - @param nbytes_new size of new data - @param pos position of data -*/ -bam1_t * pysam_bam_update( bam1_t * b, - const size_t nbytes_old, - const size_t nbytes_new, - uint8_t * pos ); - -// translate a nucleotide character to binary code -unsigned char pysam_translate_sequence( const unsigned char s ); - - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam.c --- a/chimerascan/pysam/samtools/bam.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,308 +0,0 @@\n-#include <stdio.h>\n-#include <ctype.h>\n-#include <errno.h>\n-#include <assert.h>\n-#include "bam.h"\n-#include "bam_endian.h"\n-#include "kstring.h"\n-#include "sam_header.h"\n-\n-int bam_is_be = 0;\n-char *bam_flag2char_table = "pPuUrR12sfd\\0\\0\\0\\0\\0";\n-\n-/**************************\n- * CIGAR related routines *\n- **************************/\n-\n-uint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar)\n-{\n-\tuint32_t k, end;\n-\tend = c->pos;\n-\tfor (k = 0; k < c->n_cigar; ++k) {\n-\t\tint op = cigar[k] & BAM_CIGAR_MASK;\n-\t\tif (op == BAM_CMATCH || op == BAM_CDEL || op == BAM_CREF_SKIP)\n-\t\t\tend += cigar[k] >> BAM_CIGAR_SHIFT;\n-\t}\n-\treturn end;\n-}\n-\n-int32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar)\n-{\n-\tuint32_t k;\n-\tint32_t l = 0;\n-\tfor (k = 0; k < c->n_cigar; ++k) {\n-\t\tint op = cigar[k] & BAM_CIGAR_MASK;\n-\t\tif (op == BAM_CMATCH || op == BAM_CINS || op == BAM_CSOFT_CLIP)\n-\t\t\tl += cigar[k] >> BAM_CIGAR_SHIFT;\n-\t}\n-\treturn l;\n-}\n-\n-/********************\n- * BAM I/O routines *\n- ********************/\n-\n-bam_header_t *bam_header_init()\n-{\n-\tbam_is_be = bam_is_big_endian();\n-\treturn (bam_header_t*)calloc(1, sizeof(bam_header_t));\n-}\n-\n-void bam_header_destroy(bam_header_t *header)\n-{\n-\tint32_t i;\n-\textern void bam_destroy_header_hash(bam_header_t *header);\n-\tif (header == 0) return;\n-\tif (header->target_name) {\n-\t\tfor (i = 0; i < header->n_targets; ++i)\n-\t\t\tfree(header->target_name[i]);\n-\t\tfree(header->target_name);\n-\t\tfree(header->target_len);\n-\t}\n-\tfree(header->text);\n-\tif (header->dict) sam_header_free(header->dict);\n-\tif (header->rg2lib) sam_tbl_destroy(header->rg2lib);\n-\tbam_destroy_header_hash(header);\n-\tfree(header);\n-}\n-\n-bam_header_t *bam_header_read(bamFile fp)\n-{\n-\tbam_header_t *header;\n-\tchar buf[4];\n-\tint magic_len;\n-\tint32_t i = 1, name_len;\n-\t// check EOF\n-\ti = bgzf_check_EOF(fp);\n-\tif (i < 0) {\n-\t\t// If the file is a pipe, checking the EOF marker will *always* fail\n-\t\t// with ESPIPE. Suppress the error message in this case.\n-\t\tif (errno != ESPIPE) perror("[bam_header_read] bgzf_check_EOF");\n-\t}\n-\telse if (i == 0) fprintf(stderr, "[bam_header_read] EOF marker is absent.\\n");\n-\t// read "BAM1"\n-\tmagic_len = bam_read(fp, buf, 4);\n-\tif (magic_len != 4 || strncmp(buf, "BAM\\001", 4) != 0) {\n-\t\tfprintf(stderr, "[bam_header_read] invalid BAM binary header (this is not a BAM file).\\n");\n-\t\treturn 0;\n-\t}\n-\theader = bam_header_init();\n-\t// read plain text and the number of reference sequences\n-\tbam_read(fp, &header->l_text, 4);\n-\tif (bam_is_be) bam_swap_endian_4p(&header->l_text);\n-\theader->text = (char*)calloc(header->l_text + 1, 1);\n-\tbam_read(fp, header->text, header->l_text);\n-\tbam_read(fp, &header->n_targets, 4);\n-\tif (bam_is_be) bam_swap_endian_4p(&header->n_targets);\n-\t// read reference sequence names and lengths\n-\theader->target_name = (char**)calloc(header->n_targets, sizeof(char*));\n-\theader->target_len = (uint32_t*)calloc(header->n_targets, 4);\n-\tfor (i = 0; i != header->n_targets; ++i) {\n-\t\tbam_read(fp, &name_len, 4);\n-\t\tif (bam_is_be) bam_swap_endian_4p(&name_len);\n-\t\theader->target_name[i] = (char*)calloc(name_len, 1);\n-\t\tbam_read(fp, header->target_name[i], name_len);\n-\t\tbam_read(fp, &header->target_len[i], 4);\n-\t\tif (bam_is_be) bam_swap_endian_4p(&header->target_len[i]);\n-\t}\n-\treturn header;\n-}\n-\n-int bam_header_write(bamFile fp, const bam_header_t *header)\n-{\n-\tchar buf[4];\n-\tint32_t i, name_len, x;\n-\t// write "BAM1"\n-\tstrncpy(buf, "BAM\\001", 4);\n-\tbam_write(fp, buf, 4);\n-\t// write plain text and the number of reference sequences\n-\tif (bam_is_be) {\n-\t\tx = bam_swap_endian_4(header->l_text);\n-\t\tbam_write(fp, &x, 4);\n-\t\tif (header->l_text) bam_write(fp, header->text, header->l_text);\n-\t\tx = bam_swap_endian_4(header->n_targets);\n-\t\tbam_write(fp, &x, 4);\n-\t} else {\n-\t\tbam_write(fp, &header->l_text, 4);\n-\t\tif (header->l_text) bam_write(fp, header->text, header->l_text);\n-\t\tbam_write(fp, &header->n_targets, 4);\n-\t}\n-\t// write sequence names and lengths\n-\tfor (i = 0; i != header->n_targets'..b'32);\n-\tx[0] = c->tid;\n-\tx[1] = c->pos;\n-\tx[2] = (uint32_t)c->bin<<16 | c->qual<<8 | c->l_qname;\n-\tx[3] = (uint32_t)c->flag<<16 | c->n_cigar;\n-\tx[4] = c->l_qseq;\n-\tx[5] = c->mtid;\n-\tx[6] = c->mpos;\n-\tx[7] = c->isize;\n-\tbgzf_flush_try(fp, 4 + block_len);\n-\tif (bam_is_be) {\n-\t\tfor (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i);\n-\t\ty = block_len;\n-\t\tbam_write(fp, bam_swap_endian_4p(&y), 4);\n-\t\tswap_endian_data(c, data_len, data);\n-\t} else bam_write(fp, &block_len, 4);\n-\tbam_write(fp, x, BAM_CORE_SIZE);\n-\tbam_write(fp, data, data_len);\n-\tif (bam_is_be) swap_endian_data(c, data_len, data);\n-\treturn 4 + block_len;\n-}\n-\n-int bam_write1(bamFile fp, const bam1_t *b)\n-{\n-\treturn bam_write1_core(fp, &b->core, b->data_len, b->data);\n-}\n-\n-char *bam_format1_core(const bam_header_t *header, const bam1_t *b, int of)\n-{\n-\tuint8_t *s = bam1_seq(b), *t = bam1_qual(b);\n-\tint i;\n-\tconst bam1_core_t *c = &b->core;\n-\tkstring_t str;\n-\tstr.l = str.m = 0; str.s = 0;\n-\n-\tkputsn(bam1_qname(b), c->l_qname-1, &str); kputc(\'\\t\', &str);\n-\tif (of == BAM_OFDEC) { kputw(c->flag, &str); kputc(\'\\t\', &str); }\n-\telse if (of == BAM_OFHEX) ksprintf(&str, "0x%x\\t", c->flag);\n-\telse { // BAM_OFSTR\n-\t\tfor (i = 0; i < 16; ++i)\n-\t\t\tif ((c->flag & 1<<i) && bam_flag2char_table[i])\n-\t\t\t\tkputc(bam_flag2char_table[i], &str);\n-\t\tkputc(\'\\t\', &str);\n-\t}\n-\tif (c->tid < 0) kputsn("*\\t", 2, &str);\n-\telse { kputs(header->target_name[c->tid], &str); kputc(\'\\t\', &str); }\n-\tkputw(c->pos + 1, &str); kputc(\'\\t\', &str); kputw(c->qual, &str); kputc(\'\\t\', &str);\n-\tif (c->n_cigar == 0) kputc(\'*\', &str);\n-\telse {\n-\t\tfor (i = 0; i < c->n_cigar; ++i) {\n-\t\t\tkputw(bam1_cigar(b)[i]>>BAM_CIGAR_SHIFT, &str);\n-\t\t\tkputc("MIDNSHP"[bam1_cigar(b)[i]&BAM_CIGAR_MASK], &str);\n-\t\t}\n-\t}\n-\tkputc(\'\\t\', &str);\n-\tif (c->mtid < 0) kputsn("*\\t", 2, &str);\n-\telse if (c->mtid == c->tid) kputsn("=\\t", 2, &str);\n-\telse { kputs(header->target_name[c->mtid], &str); kputc(\'\\t\', &str); }\n-\tkputw(c->mpos + 1, &str); kputc(\'\\t\', &str); kputw(c->isize, &str); kputc(\'\\t\', &str);\n-\tif (c->l_qseq) {\n-\t\tfor (i = 0; i < c->l_qseq; ++i) kputc(bam_nt16_rev_table[bam1_seqi(s, i)], &str);\n-\t\tkputc(\'\\t\', &str);\n-\t\tif (t[0] == 0xff) kputc(\'*\', &str);\n-\t\telse for (i = 0; i < c->l_qseq; ++i) kputc(t[i] + 33, &str);\n-\t} else kputsn("*\\t*", 3, &str);\n-\ts = bam1_aux(b);\n-\twhile (s < b->data + b->data_len) {\n-\t\tuint8_t type, key[2];\n-\t\tkey[0] = s[0]; key[1] = s[1];\n-\t\ts += 2; type = *s; ++s;\n-\t\tkputc(\'\\t\', &str); kputsn((char*)key, 2, &str); kputc(\':\', &str);\n-\t\tif (type == \'A\') { kputsn("A:", 2, &str); kputc(*s, &str); ++s; }\n-\t\telse if (type == \'C\') { kputsn("i:", 2, &str); kputw(*s, &str); ++s; }\n-\t\telse if (type == \'c\') { kputsn("i:", 2, &str); kputw(*(int8_t*)s, &str); ++s; }\n-\t\telse if (type == \'S\') { kputsn("i:", 2, &str); kputw(*(uint16_t*)s, &str); s += 2; }\n-\t\telse if (type == \'s\') { kputsn("i:", 2, &str); kputw(*(int16_t*)s, &str); s += 2; }\n-\t\telse if (type == \'I\') { kputsn("i:", 2, &str); kputuw(*(uint32_t*)s, &str); s += 4; }\n-\t\telse if (type == \'i\') { kputsn("i:", 2, &str); kputw(*(int32_t*)s, &str); s += 4; }\n-\t\telse if (type == \'f\') { ksprintf(&str, "f:%g", *(float*)s); s += 4; }\n-\t\telse if (type == \'d\') { ksprintf(&str, "d:%lg", *(double*)s); s += 8; }\n-\t\telse if (type == \'Z\' || type == \'H\') { kputc(type, &str); kputc(\':\', &str); while (*s) kputc(*s++, &str); ++s; }\n-\t}\n-\treturn str.s;\n-}\n-\n-char *bam_format1(const bam_header_t *header, const bam1_t *b)\n-{\n-\treturn bam_format1_core(header, b, BAM_OFDEC);\n-}\n-\n-void bam_view1(const bam_header_t *header, const bam1_t *b)\n-{\n-\tchar *s = bam_format1(header, b);\n-\tputs(s);\n-\tfree(s);\n-}\n-\n-// FIXME: we should also check the LB tag associated with each alignment\n-const char *bam_get_library(bam_header_t *h, const bam1_t *b)\n-{\n-\tconst uint8_t *rg;\n-\tif (h->dict == 0) h->dict = sam_header_parse2(h->text);\n-\tif (h->rg2lib == 0) h->rg2lib = sam_header2tbl(h->dict, "RG", "ID", "LB");\n-\trg = bam_aux_get(b, "RG");\n-\treturn (rg == 0)? 0 : sam_tbl_get(h->rg2lib, (const char*)(rg + 1));\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam.h --- a/chimerascan/pysam/samtools/bam.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,724 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-#ifndef BAM_BAM_H\n-#define BAM_BAM_H\n-\n-/*!\n- @header\n-\n- BAM library provides I/O and various operations on manipulating files\n- in the BAM (Binary Alignment/Mapping) or SAM (Sequence Alignment/Map)\n- format. It now supports importing from or exporting to TAM, sorting,\n- merging, generating pileup, and quickly retrieval of reads overlapped\n- with a specified region.\n-\n- @copyright Genome Research Ltd.\n- */\n-\n-#include <stdint.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <stdio.h>\n-\n-#ifndef BAM_LITE\n-#define BAM_VIRTUAL_OFFSET16\n-#include "bgzf.h"\n-/*! @abstract BAM file handler */\n-typedef BGZF *bamFile;\n-#define bam_open(fn, mode) bgzf_open(fn, mode)\n-#define bam_dopen(fd, mode) bgzf_fdopen(fd, mode)\n-#define bam_close(fp) bgzf_close(fp)\n-#define bam_read(fp, buf, size) bgzf_read(fp, buf, size)\n-#define bam_write(fp, buf, size) bgzf_write(fp, buf, size)\n-#define bam_tell(fp) bgzf_tell(fp)\n-#define bam_seek(fp, pos, dir) bgzf_seek(fp, pos, dir)\n-#else\n-#define BAM_TRUE_OFFSET\n-#include <zlib.h>\n-typedef gzFile bamFile;\n-#define bam_open(fn, mode) gzopen(fn, mode)\n-#define bam_dopen(fd, mode) gzdopen(fd, mode)\n-#define bam_close(fp) gzclose(fp)\n-#define bam_read(fp, buf, size) gzread(fp, buf, size)\n-/* no bam_write/bam_tell/bam_seek() here */\n-#endif\n-\n-/*! @typedef\n- @abstract Structure for the alignment header.\n- @field n_targets number of reference sequences\n- @field target_name names of the reference sequences\n- @field target_len lengths of the referene sequences\n- @field dict header dictionary\n- @field hash hash table for fast name lookup\n- @field rg2lib hash table for @RG-ID -> LB lookup\n- @field l_text length of the plain text in the header\n- @field text plain text\n-\n- @discussion Field hash points to null by default. It is a private\n- member.\n- */\n-typedef struct {\n-\tint32_t n_targets;\n-\tchar **target_name;\n-\tuint32_t *target_len;\n-\tvoid *dict, *hash, *rg2lib;\n-\tsize_t l_text, n_text;\n-\tchar *text;\n-} bam_header_t;\n-\n-/*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */\n-#define BAM_FPAIRED 1\n-/*! @abstract the read is mapped in a proper pair */\n-#define BAM_FPROPER_PAIR 2\n-/*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */\n-#define BAM_FUNMAP 4\n-/*! @abstract the mate is unmapped */\n-#define BAM_FMUNMAP 8\n-/*! @abstract the read is mapped to the reverse strand */\n-#define BAM_FREVERSE 16\n-/*! @abstract the mate is mapped to the reverse strand */\n-#define BAM_FMREVERSE 32\n-/*! @abstract this is read1 */\n-#define BAM_FREAD1 64\n-/*! @abstract this is read2 */\n-#define BAM_FREAD2 128\n-/*! @abstract not primary alignment */\n-#define BA'..b'g, int end);\n-\tint bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b);\n-\tvoid bam_iter_destroy(bam_iter_t iter);\n-\n-\t/*!\n-\t @abstract Parse a region in the format: "chr2:100,000-200,000".\n-\t @discussion bam_header_t::hash will be initialized if empty.\n-\t @param header pointer to the header structure\n-\t @param str string to be parsed\n-\t @param ref_id the returned chromosome ID\n-\t @param begin the returned start coordinate\n-\t @param end the returned end coordinate\n-\t @return 0 on success; -1 on failure\n-\t */\n-\tint bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end);\n-\n-\n-\t/**************************\n-\t * APIs for optional tags *\n-\t **************************/\n-\n-\t/*!\n-\t @abstract Retrieve data of a tag\n-\t @param b pointer to an alignment struct\n-\t @param tag two-character tag to be retrieved\n-\n-\t @return pointer to the type and data. The first character is the\n-\t type that can be \'iIsScCdfAZH\'.\n-\n-\t @discussion Use bam_aux2?() series to convert the returned data to\n-\t the corresponding type.\n-\t*/\n-\tuint8_t *bam_aux_get(const bam1_t *b, const char tag[2]);\n-\n-\tint32_t bam_aux2i(const uint8_t *s);\n-\tfloat bam_aux2f(const uint8_t *s);\n-\tdouble bam_aux2d(const uint8_t *s);\n-\tchar bam_aux2A(const uint8_t *s);\n-\tchar *bam_aux2Z(const uint8_t *s);\n-\n-\tint bam_aux_del(bam1_t *b, uint8_t *s);\n-\tvoid bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data);\n-\tuint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]); // an alias of bam_aux_get()\n-\n-\n-\t/*****************\n-\t * Miscellaneous *\n-\t *****************/\n-\n-\t/*! \n-\t @abstract Calculate the rightmost coordinate of an alignment on the\n-\t reference genome.\n-\n-\t @param c pointer to the bam1_core_t structure\n-\t @param cigar the corresponding CIGAR array (from bam1_t::cigar)\n-\t @return the rightmost coordinate, 0-based\n-\t*/\n-\tuint32_t bam_calend(const bam1_core_t *c, const uint32_t *cigar);\n-\n-\t/*!\n-\t @abstract Calculate the length of the query sequence from CIGAR.\n-\t @param c pointer to the bam1_core_t structure\n-\t @param cigar the corresponding CIGAR array (from bam1_t::cigar)\n-\t @return length of the query sequence\n-\t*/\n-\tint32_t bam_cigar2qlen(const bam1_core_t *c, const uint32_t *cigar);\n-\n-#ifdef __cplusplus\n-}\n-#endif\n-\n-/*!\n- @abstract Calculate the minimum bin that contains a region [beg,end).\n- @param beg start of the region, 0-based\n- @param end end of the region, 0-based\n- @return bin\n- */\n-static inline int bam_reg2bin(uint32_t beg, uint32_t end)\n-{\n-\t--end;\n-\tif (beg>>14 == end>>14) return 4681 + (beg>>14);\n-\tif (beg>>17 == end>>17) return 585 + (beg>>17);\n-\tif (beg>>20 == end>>20) return 73 + (beg>>20);\n-\tif (beg>>23 == end>>23) return 9 + (beg>>23);\n-\tif (beg>>26 == end>>26) return 1 + (beg>>26);\n-\treturn 0;\n-}\n-\n-/*!\n- @abstract Copy an alignment\n- @param bdst destination alignment struct\n- @param bsrc source alignment struct\n- @return pointer to the destination alignment struct\n- */\n-static inline bam1_t *bam_copy1(bam1_t *bdst, const bam1_t *bsrc)\n-{\n-\tuint8_t *data = bdst->data;\n-\tint m_data = bdst->m_data; // backup data and m_data\n-\tif (m_data < bsrc->m_data) { // double the capacity\n-\t\tm_data = bsrc->m_data; kroundup32(m_data);\n-\t\tdata = (uint8_t*)realloc(data, m_data);\n-\t}\n-\tmemcpy(data, bsrc->data, bsrc->data_len); // copy var-len data\n-\t*bdst = *bsrc; // copy the rest\n-\t// restore the backup\n-\tbdst->m_data = m_data;\n-\tbdst->data = data;\n-\treturn bdst;\n-}\n-\n-/*!\n- @abstract Duplicate an alignment\n- @param src source alignment struct\n- @return pointer to the destination alignment struct\n- */\n-static inline bam1_t *bam_dup1(const bam1_t *src)\n-{\n-\tbam1_t *b;\n-\tb = bam_init1();\n-\t*b = *src;\n-\tb->m_data = b->data_len;\n-\tb->data = (uint8_t*)calloc(b->data_len, 1);\n-\tmemcpy(b->data, src->data, b->data_len);\n-\treturn b;\n-}\n-\n-#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_aux.c --- a/chimerascan/pysam/samtools/bam_aux.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,182 +0,0 @@ -#include <ctype.h> -#include "bam.h" -#include "khash.h" -typedef char *str_p; -KHASH_MAP_INIT_STR(s, int) -KHASH_MAP_INIT_STR(r2l, str_p) - -void bam_aux_append(bam1_t *b, const char tag[2], char type, int len, uint8_t *data) -{ - int ori_len = b->data_len; - b->data_len += 3 + len; - b->l_aux += 3 + len; - if (b->m_data < b->data_len) { - b->m_data = b->data_len; - kroundup32(b->m_data); - b->data = (uint8_t*)realloc(b->data, b->m_data); - } - b->data[ori_len] = tag[0]; b->data[ori_len + 1] = tag[1]; - b->data[ori_len + 2] = type; - memcpy(b->data + ori_len + 3, data, len); -} - -uint8_t *bam_aux_get_core(bam1_t *b, const char tag[2]) -{ - return bam_aux_get(b, tag); -} - -#define __skip_tag(s) do { \ - int type = toupper(*(s)); \ - ++(s); \ - if (type == 'C' || type == 'A') ++(s); \ - else if (type == 'S') (s) += 2; \ - else if (type == 'I' || type == 'F') (s) += 4; \ - else if (type == 'D') (s) += 8; \ - else if (type == 'Z' || type == 'H') { while (*(s)) ++(s); ++(s); } \ - } while (0) - -uint8_t *bam_aux_get(const bam1_t *b, const char tag[2]) -{ - uint8_t *s; - int y = tag[0]<<8 | tag[1]; - s = bam1_aux(b); - while (s < b->data + b->data_len) { - int x = (int)s[0]<<8 | s[1]; - s += 2; - if (x == y) return s; - __skip_tag(s); - } - return 0; -} -// s MUST BE returned by bam_aux_get() -int bam_aux_del(bam1_t *b, uint8_t *s) -{ - uint8_t *p, *aux; - aux = bam1_aux(b); - p = s - 2; - __skip_tag(s); - memmove(p, s, b->l_aux - (s - aux)); - b->data_len -= s - p; - b->l_aux -= s - p; - return 0; -} - -void bam_init_header_hash(bam_header_t *header) -{ - if (header->hash == 0) { - int ret, i; - khiter_t iter; - khash_t(s) *h; - header->hash = h = kh_init(s); - for (i = 0; i < header->n_targets; ++i) { - iter = kh_put(s, h, header->target_name[i], &ret); - kh_value(h, iter) = i; - } - } -} - -void bam_destroy_header_hash(bam_header_t *header) -{ - if (header->hash) - kh_destroy(s, (khash_t(s)*)header->hash); -} - -int32_t bam_get_tid(const bam_header_t *header, const char *seq_name) -{ - khint_t k; - khash_t(s) *h = (khash_t(s)*)header->hash; - k = kh_get(s, h, seq_name); - return k == kh_end(h)? -1 : kh_value(h, k); -} - -int bam_parse_region(bam_header_t *header, const char *str, int *ref_id, int *begin, int *end) -{ - char *s, *p; - int i, l, k; - khiter_t iter; - khash_t(s) *h; - - bam_init_header_hash(header); - h = (khash_t(s)*)header->hash; - - l = strlen(str); - p = s = (char*)malloc(l+1); - /* squeeze out "," */ - for (i = k = 0; i != l; ++i) - if (str[i] != ',' && !isspace(str[i])) s[k++] = str[i]; - s[k] = 0; - for (i = 0; i != k; ++i) if (s[i] == ':') break; - s[i] = 0; - iter = kh_get(s, h, s); /* get the ref_id */ - if (iter == kh_end(h)) { // name not found - *ref_id = -1; free(s); - return -1; - } - *ref_id = kh_value(h, iter); - if (i == k) { /* dump the whole sequence */ - *begin = 0; *end = 1<<29; free(s); - return 0; - } - for (p = s + i + 1; i != k; ++i) if (s[i] == '-') break; - *begin = atoi(p); - if (i < k) { - p = s + i + 1; - *end = atoi(p); - } else *end = 1<<29; - if (*begin > 0) --*begin; - free(s); - if (*begin > *end) { - fprintf(stderr, "[bam_parse_region] invalid region.\n"); - return -1; - } - return 0; -} - -int32_t bam_aux2i(const uint8_t *s) -{ - int type; - if (s == 0) return 0; - type = *s++; - if (type == 'c') return (int32_t)*(int8_t*)s; - else if (type == 'C') return (int32_t)*(uint8_t*)s; - else if (type == 's') return (int32_t)*(int16_t*)s; - else if (type == 'S') return (int32_t)*(uint16_t*)s; - else if (type == 'i' || type == 'I') return *(int32_t*)s; - else return 0; -} - -float bam_aux2f(const uint8_t *s) -{ - int type; - type = *s++; - if (s == 0) return 0.0; - if (type == 'f') return *(float*)s; - else return 0.0; -} - -double bam_aux2d(const uint8_t *s) -{ - int type; - type = *s++; - if (s == 0) return 0.0; - if (type == 'd') return *(double*)s; - else return 0.0; -} - -char bam_aux2A(const uint8_t *s) -{ - int type; - type = *s++; - if (s == 0) return 0; - if (type == 'A') return *(char*)s; - else return 0; -} - -char *bam_aux2Z(const uint8_t *s) -{ - int type; - type = *s++; - if (s == 0) return 0; - if (type == 'Z' || type == 'H') return (char*)s; - else return 0; -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_color.c --- a/chimerascan/pysam/samtools/bam_color.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,127 +0,0 @@ -#include <ctype.h> -#include "bam.h" - -/*! - @abstract Get the color encoding the previous and current base - @param b pointer to an alignment - @param i The i-th position, 0-based - @return color - - @discussion Returns 0 no color information is found. - */ -char bam_aux_getCSi(bam1_t *b, int i) -{ - uint8_t *c = bam_aux_get(b, "CS"); - char *cs = NULL; - - // return the base if the tag was not found - if(0 == c) return 0; - - cs = bam_aux2Z(c); - // adjust for strandedness and leading adaptor - if(bam1_strand(b)) i = strlen(cs) - 1 - i; - else i++; - return cs[i]; -} - -/*! - @abstract Get the color quality of the color encoding the previous and current base - @param b pointer to an alignment - @param i The i-th position, 0-based - @return color quality - - @discussion Returns 0 no color information is found. - */ -char bam_aux_getCQi(bam1_t *b, int i) -{ - uint8_t *c = bam_aux_get(b, "CQ"); - char *cq = NULL; - - // return the base if the tag was not found - if(0 == c) return 0; - - cq = bam_aux2Z(c); - // adjust for strandedness - if(bam1_strand(b)) i = strlen(cq) - 1 - i; - return cq[i]; -} - -char bam_aux_nt2int(char a) -{ - switch(toupper(a)) { - case 'A': - return 0; - break; - case 'C': - return 1; - break; - case 'G': - return 2; - break; - case 'T': - return 3; - break; - default: - return 4; - break; - } -} - -char bam_aux_ntnt2cs(char a, char b) -{ - a = bam_aux_nt2int(a); - b = bam_aux_nt2int(b); - if(4 == a || 4 == b) return '4'; - return "0123"[(int)(a ^ b)]; -} - -/*! - @abstract Get the color error profile at the give position - @param b pointer to an alignment - @return the original color if the color was an error, '-' (dash) otherwise - - @discussion Returns 0 no color information is found. - */ -char bam_aux_getCEi(bam1_t *b, int i) -{ - int cs_i; - uint8_t *c = bam_aux_get(b, "CS"); - char *cs = NULL; - char prev_b, cur_b; - char cur_color, cor_color; - - // return the base if the tag was not found - if(0 == c) return 0; - - cs = bam_aux2Z(c); - - // adjust for strandedness and leading adaptor - if(bam1_strand(b)) { //reverse strand - cs_i = strlen(cs) - 1 - i; - // get current color - cur_color = cs[cs_i]; - // get previous base. Note: must rc adaptor - prev_b = (cs_i == 1) ? "TGCAN"[(int)bam_aux_nt2int(cs[0])] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i+1)]; - // get current base - cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; - } - else { - cs_i=i+1; - // get current color - cur_color = cs[cs_i]; - // get previous base - prev_b = (0 == i) ? cs[0] : bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i-1)]; - // get current base - cur_b = bam_nt16_rev_table[bam1_seqi(bam1_seq(b), i)]; - } - - // corrected color - cor_color = bam_aux_ntnt2cs(prev_b, cur_b); - - if(cur_color == cor_color) { - return '-'; - } - else { - return cur_color; - } -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_endian.h --- a/chimerascan/pysam/samtools/bam_endian.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,42 +0,0 @@ -#ifndef BAM_ENDIAN_H -#define BAM_ENDIAN_H - -#include <stdint.h> - -static inline int bam_is_big_endian() -{ - long one= 1; - return !(*((char *)(&one))); -} -static inline uint16_t bam_swap_endian_2(uint16_t v) -{ - return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); -} -static inline void *bam_swap_endian_2p(void *x) -{ - *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); - return x; -} -static inline uint32_t bam_swap_endian_4(uint32_t v) -{ - v = ((v & 0x0000FFFFU) << 16) | (v >> 16); - return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); -} -static inline void *bam_swap_endian_4p(void *x) -{ - *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); - return x; -} -static inline uint64_t bam_swap_endian_8(uint64_t v) -{ - v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); - v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); - return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); -} -static inline void *bam_swap_endian_8p(void *x) -{ - *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); - return x; -} - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_import.c --- a/chimerascan/pysam/samtools/bam_import.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,459 +0,0 @@\n-#include <zlib.h>\n-#include <stdio.h>\n-#include <ctype.h>\n-#include <string.h>\n-#include <stdlib.h>\n-#include <unistd.h>\n-#include <assert.h>\n-#ifdef _WIN32\n-#include <fcntl.h>\n-#endif\n-#include "kstring.h"\n-#include "bam.h"\n-#include "sam_header.h"\n-#include "kseq.h"\n-#include "khash.h"\n-\n-KSTREAM_INIT(gzFile, gzread, 8192)\n-KHASH_MAP_INIT_STR(ref, uint64_t)\n-\n-void bam_init_header_hash(bam_header_t *header);\n-void bam_destroy_header_hash(bam_header_t *header);\n-int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);\n-\n-unsigned char bam_nt16_table[256] = {\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,\n-\t15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,\n-\t15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,\n-\t15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,\n-\t15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,\n-\t15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15\n-};\n-\n-unsigned short bam_char2flag_table[256] = {\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,BAM_FREAD1,BAM_FREAD2,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\tBAM_FPROPER_PAIR,0,BAM_FMREVERSE,0, 0,BAM_FMUNMAP,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, BAM_FDUP,0,BAM_FQCFAIL,0, 0,0,0,0, 0,0,0,0,\n-\tBAM_FPAIRED,0,BAM_FREVERSE,BAM_FSECONDARY, 0,BAM_FUNMAP,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,\n-\t0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0\n-};\n-\n-char *bam_nt16_rev_table = "=ACMGRSVTWYHKDBN";\n-\n-struct __tamFile_t {\n-\tgzFile fp;\n-\tkstream_t *ks;\n-\tkstring_t *str;\n-\tuint64_t n_lines;\n-\tint is_first;\n-};\n-\n-char **__bam_get_lines(const char *fn, int *_n) // for bam_plcmd.c only\n-{\n-\tchar **list = 0, *s;\n-\tint n = 0, dret, m = 0;\n-\tgzFile fp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r");\n-\tkstream_t *ks;\n-\tkstring_t *str;\n-\tstr = (kstring_t*)calloc(1, sizeof(kstring_t));\n-\tks = ks_init(fp);\n-\twhile (ks_getuntil(ks, \'\\n\', str, &dret) > 0) {\n-\t\tif (n == m) {\n-\t\t\tm = m? m << 1 : 16;\n-\t\t\tlist = (char**)realloc(list, m * sizeof(char*));\n-\t\t}\n-\t\tif (str->s[str->l-1] == \'\\r\')\n-\t\t\tstr->s[--str->l] = \'\\0\';\n-\t\ts = list[n++] = (char*)calloc(str->l + 1, 1);\n-\t\tstrcpy(s, str->s);\n-\t}\n-\tks_destroy(ks);\n-\tgzclose(fp);\n-\tfree(str->s); free(str);\n-\t*_n = n;\n-\treturn list;\n-}\n-\n-static bam_header_t *hash2header(const kh_ref_t *hash)\n-{\n-\tbam_header_t *header;\n-\tkhiter_t k;\n-\theader = bam_header_init();\n-\theader->n_targets = kh_size(hash);\n-\theader->target_name = (char**)calloc(kh_size(hash), sizeof(char*));\n-\theader->target_len = (uint32_t*)calloc(kh_size(hash), 4);\n-\tfor (k = kh_begin(hash); k != kh_end(hash); ++k) {\n-\t\tif (kh_exist(hash, k)) {\n-\t\t\tint i = (int)kh_value(hash, k);\n-\t\t\theader->target_name[i] = (char*)kh_key(hash, k);\n-\t\t\theader->target_len[i] = kh_value(hash, k)>>32;\n-\t\t}\n-\t}\n-\tbam_init_header_hash(header);\n-\treturn header;\n-}\n-bam_header_t *sam_header_read2(const char *fn)\n-{\n-\tbam_header_t *header;\n-\tint c, dret, ret, error = 0;\n-\tgzFile fp;\n-\tkstream_t *ks;\n-\tkstring_t *str;\n-\tkh_ref_t *hash;\n-\tkhiter_t k;\n-\tif (fn == 0) return 0;\n-\tfp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "r") : gzopen(fn, "r");\n-\tif (fp == 0) return 0;\n-\thash ='..b' sequence length are inconsistent");\n-\t\t\tp = (uint8_t*)alloc_data(b, doff + c->l_qseq + (c->l_qseq+1)/2) + doff;\n-\t\t\tmemset(p, 0, (c->l_qseq+1)/2);\n-\t\t\tfor (i = 0; i < c->l_qseq; ++i)\n-\t\t\t\tp[i/2] |= bam_nt16_table[(int)str->s[i]] << 4*(1-i%2);\n-\t\t} else c->l_qseq = 0;\n-\t\tif (ks_getuntil(ks, KS_SEP_TAB, str, &dret) < 0) return -6; // qual\n-\t\tz += str->l + 1;\n-\t\tif (strcmp(str->s, "*") && c->l_qseq != strlen(str->s))\n-\t\t\tparse_error(fp->n_lines, "sequence and quality are inconsistent");\n-\t\tp += (c->l_qseq+1)/2;\n-\t\tif (strcmp(str->s, "*") == 0) for (i = 0; i < c->l_qseq; ++i) p[i] = 0xff;\n-\t\telse for (i = 0; i < c->l_qseq; ++i) p[i] = str->s[i] - 33;\n-\t\tdoff += c->l_qseq + (c->l_qseq+1)/2;\n-\t}\n-\tdoff0 = doff;\n-\tif (dret != \'\\n\' && dret != \'\\r\') { // aux\n-\t\twhile (ks_getuntil(ks, KS_SEP_TAB, str, &dret) >= 0) {\n-\t\t\tuint8_t *s, type, key[2];\n-\t\t\tz += str->l + 1;\n-\t\t\tif (str->l < 6 || str->s[2] != \':\' || str->s[4] != \':\')\n-\t\t\t\tparse_error(fp->n_lines, "missing colon in auxiliary data");\n-\t\t\tkey[0] = str->s[0]; key[1] = str->s[1];\n-\t\t\ttype = str->s[3];\n-\t\t\ts = alloc_data(b, doff + 3) + doff;\n-\t\t\ts[0] = key[0]; s[1] = key[1]; s += 2; doff += 2;\n-\t\t\tif (type == \'A\' || type == \'a\' || type == \'c\' || type == \'C\') { // c and C for backward compatibility\n-\t\t\t\ts = alloc_data(b, doff + 2) + doff;\n-\t\t\t\t*s++ = \'A\'; *s = str->s[5];\n-\t\t\t\tdoff += 2;\n-\t\t\t} else if (type == \'I\' || type == \'i\') {\n-\t\t\t\tlong long x;\n-\t\t\t\ts = alloc_data(b, doff + 5) + doff;\n-\t\t\t\tx = (long long)atoll(str->s + 5);\n-\t\t\t\tif (x < 0) {\n-\t\t\t\t\tif (x >= -127) {\n-\t\t\t\t\t\t*s++ = \'c\'; *(int8_t*)s = (int8_t)x;\n-\t\t\t\t\t\ts += 1; doff += 2;\n-\t\t\t\t\t} else if (x >= -32767) {\n-\t\t\t\t\t\t*s++ = \'s\'; *(int16_t*)s = (int16_t)x;\n-\t\t\t\t\t\ts += 2; doff += 3;\n-\t\t\t\t\t} else {\n-\t\t\t\t\t\t*s++ = \'i\'; *(int32_t*)s = (int32_t)x;\n-\t\t\t\t\t\ts += 4; doff += 5;\n-\t\t\t\t\t\tif (x < -2147483648ll)\n-\t\t\t\t\t\t\tfprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.",\n-\t\t\t\t\t\t\t\t\t(long long)fp->n_lines, x);\n-\t\t\t\t\t}\n-\t\t\t\t} else {\n-\t\t\t\t\tif (x <= 255) {\n-\t\t\t\t\t\t*s++ = \'C\'; *s++ = (uint8_t)x;\n-\t\t\t\t\t\tdoff += 2;\n-\t\t\t\t\t} else if (x <= 65535) {\n-\t\t\t\t\t\t*s++ = \'S\'; *(uint16_t*)s = (uint16_t)x;\n-\t\t\t\t\t\ts += 2; doff += 3;\n-\t\t\t\t\t} else {\n-\t\t\t\t\t\t*s++ = \'I\'; *(uint32_t*)s = (uint32_t)x;\n-\t\t\t\t\t\ts += 4; doff += 5;\n-\t\t\t\t\t\tif (x > 4294967295ll)\n-\t\t\t\t\t\t\tfprintf(stderr, "Parse warning at line %lld: integer %lld is out of range.",\n-\t\t\t\t\t\t\t\t\t(long long)fp->n_lines, x);\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} else if (type == \'f\') {\n-\t\t\t\ts = alloc_data(b, doff + 5) + doff;\n-\t\t\t\t*s++ = \'f\';\n-\t\t\t\t*(float*)s = (float)atof(str->s + 5);\n-\t\t\t\ts += 4; doff += 5;\n-\t\t\t} else if (type == \'d\') {\n-\t\t\t\ts = alloc_data(b, doff + 9) + doff;\n-\t\t\t\t*s++ = \'d\';\n-\t\t\t\t*(float*)s = (float)atof(str->s + 9);\n-\t\t\t\ts += 8; doff += 9;\n-\t\t\t} else if (type == \'Z\' || type == \'H\') {\n-\t\t\t\tint size = 1 + (str->l - 5) + 1;\n-\t\t\t\tif (type == \'H\') { // check whether the hex string is valid\n-\t\t\t\t\tint i;\n-\t\t\t\t\tif ((str->l - 5) % 2 == 1) parse_error(fp->n_lines, "length of the hex string not even");\n-\t\t\t\t\tfor (i = 0; i < str->l - 5; ++i) {\n-\t\t\t\t\t\tint c = toupper(str->s[5 + i]);\n-\t\t\t\t\t\tif (!((c >= \'0\' && c <= \'9\') || (c >= \'A\' && c <= \'F\')))\n-\t\t\t\t\t\t\tparse_error(fp->n_lines, "invalid hex character");\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t\ts = alloc_data(b, doff + size) + doff;\n-\t\t\t\t*s++ = type;\n-\t\t\t\tmemcpy(s, str->s + 5, str->l - 5);\n-\t\t\t\ts[str->l - 5] = 0;\n-\t\t\t\tdoff += size;\n-\t\t\t} else parse_error(fp->n_lines, "unrecognized type");\n-\t\t\tif (dret == \'\\n\' || dret == \'\\r\') break;\n-\t\t}\n-\t}\n-\tb->l_aux = doff - doff0;\n-\tb->data_len = doff;\n-\treturn z;\n-}\n-\n-tamFile sam_open(const char *fn)\n-{\n-\ttamFile fp;\n-\tgzFile gzfp = (strcmp(fn, "-") == 0)? gzdopen(fileno(stdin), "rb") : gzopen(fn, "rb");\n-\tif (gzfp == 0) return 0;\n-\tfp = (tamFile)calloc(1, sizeof(struct __tamFile_t));\n-\tfp->str = (kstring_t*)calloc(1, sizeof(kstring_t));\n-\tfp->fp = gzfp;\n-\tfp->ks = ks_init(fp->fp);\n-\treturn fp;\n-}\n-\n-void sam_close(tamFile fp)\n-{\n-\tif (fp) {\n-\t\tks_destroy(fp->ks);\n-\t\tgzclose(fp->fp);\n-\t\tfree(fp->str->s); free(fp->str);\n-\t\tfree(fp);\n-\t}\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_index.c --- a/chimerascan/pysam/samtools/bam_index.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,696 +0,0 @@\n-#include <ctype.h>\n-#include <assert.h>\n-#include "bam.h"\n-#include "khash.h"\n-#include "ksort.h"\n-#include "bam_endian.h"\n-#ifdef _USE_KNETFILE\n-#include "knetfile.h"\n-#endif\n-\n-/*!\n- @header\n-\n- Alignment indexing. Before indexing, BAM must be sorted based on the\n- leftmost coordinate of alignments. In indexing, BAM uses two indices:\n- a UCSC binning index and a simple linear index. The binning index is\n- efficient for alignments spanning long distance, while the auxiliary\n- linear index helps to reduce unnecessary seek calls especially for\n- short alignments.\n-\n- The UCSC binning scheme was suggested by Richard Durbin and Lincoln\n- Stein and is explained by Kent et al. (2002). In this scheme, each bin\n- represents a contiguous genomic region which can be fully contained in\n- another bin; each alignment is associated with a bin which represents\n- the smallest region containing the entire alignment. The binning\n- scheme is essentially another representation of R-tree. A distinct bin\n- uniquely corresponds to a distinct internal node in a R-tree. Bin A is\n- a child of Bin B if region A is contained in B.\n-\n- In BAM, each bin may span 2^29, 2^26, 2^23, 2^20, 2^17 or 2^14 bp. Bin\n- 0 spans a 512Mbp region, bins 1-8 span 64Mbp, 9-72 8Mbp, 73-584 1Mbp,\n- 585-4680 128Kbp and bins 4681-37449 span 16Kbp regions. If we want to\n- find the alignments overlapped with a region [rbeg,rend), we need to\n- calculate the list of bins that may be overlapped the region and test\n- the alignments in the bins to confirm the overlaps. If the specified\n- region is short, typically only a few alignments in six bins need to\n- be retrieved. The overlapping alignments can be quickly fetched.\n-\n- */\n-\n-#define BAM_MIN_CHUNK_GAP 32768\n-// 1<<14 is the size of minimum bin.\n-#define BAM_LIDX_SHIFT 14\n-\n-#define BAM_MAX_BIN 37450 // =(8^6-1)/7+1\n-\n-typedef struct {\n-\tuint64_t u, v;\n-} pair64_t;\n-\n-#define pair64_lt(a,b) ((a).u < (b).u)\n-KSORT_INIT(off, pair64_t, pair64_lt)\n-\n-typedef struct {\n-\tuint32_t m, n;\n-\tpair64_t *list;\n-} bam_binlist_t;\n-\n-typedef struct {\n-\tint32_t n, m;\n-\tuint64_t *offset;\n-} bam_lidx_t;\n-\n-KHASH_MAP_INIT_INT(i, bam_binlist_t)\n-\n-struct __bam_index_t {\n-\tint32_t n;\n-\tuint64_t n_no_coor; // unmapped reads without coordinate\n-\tkhash_t(i) **index;\n-\tbam_lidx_t *index2;\n-};\n-\n-// requirement: len <= LEN_MASK\n-static inline void insert_offset(khash_t(i) *h, int bin, uint64_t beg, uint64_t end)\n-{\n-\tkhint_t k;\n-\tbam_binlist_t *l;\n-\tint ret;\n-\tk = kh_put(i, h, bin, &ret);\n-\tl = &kh_value(h, k);\n-\tif (ret) { // not present\n-\t\tl->m = 1; l->n = 0;\n-\t\tl->list = (pair64_t*)calloc(l->m, 16);\n-\t}\n-\tif (l->n == l->m) {\n-\t\tl->m <<= 1;\n-\t\tl->list = (pair64_t*)realloc(l->list, l->m * 16);\n-\t}\n-\tl->list[l->n].u = beg; l->list[l->n++].v = end;\n-}\n-\n-static inline void insert_offset2(bam_lidx_t *index2, bam1_t *b, uint64_t offset)\n-{\n-\tint i, beg, end;\n-\tbeg = b->core.pos >> BAM_LIDX_SHIFT;\n-\tend = (bam_calend(&b->core, bam1_cigar(b)) - 1) >> BAM_LIDX_SHIFT;\n-\tif (index2->m < end + 1) {\n-\t\tint old_m = index2->m;\n-\t\tindex2->m = end + 1;\n-\t\tkroundup32(index2->m);\n-\t\tindex2->offset = (uint64_t*)realloc(index2->offset, index2->m * 8);\n-\t\tmemset(index2->offset + old_m, 0, 8 * (index2->m - old_m));\n-\t}\n-\tif (beg == end) {\n-\t\tif (index2->offset[beg] == 0) index2->offset[beg] = offset;\n-\t} else {\n-\t\tfor (i = beg; i <= end; ++i)\n-\t\t\tif (index2->offset[i] == 0) index2->offset[i] = offset;\n-\t}\n-\tindex2->n = end + 1;\n-}\n-\n-static void merge_chunks(bam_index_t *idx)\n-{\n-#if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)\n-\tkhash_t(i) *index;\n-\tint i, l, m;\n-\tkhint_t k;\n-\tfor (i = 0; i < idx->n; ++i) {\n-\t\tindex = idx->index[i];\n-\t\tfor (k = kh_begin(index); k != kh_end(index); ++k) {\n-\t\t\tbam_binlist_t *p;\n-\t\t\tif (!kh_exist(index, k) || kh_key(index, k) == BAM_MAX_BIN) continue;\n-\t\t\tp = &kh_value(index, k);\n-\t\t\tm = 0;\n-\t\t\tfor (l = 1; l < p->n; ++l) {\n-#ifdef BAM_TRUE_OFFSET\n-\t\t\t\tif (p->list[m].v + BAM_MIN_CHUNK_GA'..b'\tbam_iter_t iter = 0;\n-\n-\tif (beg < 0) beg = 0;\n-\tif (end < beg) return 0;\n-\t// initialize iter\n-\titer = calloc(1, sizeof(struct __bam_iter_t));\n-\titer->tid = tid, iter->beg = beg, iter->end = end; iter->i = -1;\n-\t//\n-\tbins = (uint16_t*)calloc(BAM_MAX_BIN, 2);\n-\tn_bins = reg2bins(beg, end, bins);\n-\tindex = idx->index[tid];\n-\tif (idx->index2[tid].n > 0) {\n-\t\tmin_off = (beg>>BAM_LIDX_SHIFT >= idx->index2[tid].n)? idx->index2[tid].offset[idx->index2[tid].n-1]\n-\t\t\t: idx->index2[tid].offset[beg>>BAM_LIDX_SHIFT];\n-\t\tif (min_off == 0) { // improvement for index files built by tabix prior to 0.1.4\n-\t\t\tint n = beg>>BAM_LIDX_SHIFT;\n-\t\t\tif (n > idx->index2[tid].n) n = idx->index2[tid].n;\n-\t\t\tfor (i = n - 1; i >= 0; --i)\n-\t\t\t\tif (idx->index2[tid].offset[i] != 0) break;\n-\t\t\tif (i >= 0) min_off = idx->index2[tid].offset[i];\n-\t\t}\n-\t} else min_off = 0; // tabix 0.1.2 may produce such index files\n-\tfor (i = n_off = 0; i < n_bins; ++i) {\n-\t\tif ((k = kh_get(i, index, bins[i])) != kh_end(index))\n-\t\t\tn_off += kh_value(index, k).n;\n-\t}\n-\tif (n_off == 0) {\n-\t\tfree(bins); return iter;\n-\t}\n-\toff = (pair64_t*)calloc(n_off, 16);\n-\tfor (i = n_off = 0; i < n_bins; ++i) {\n-\t\tif ((k = kh_get(i, index, bins[i])) != kh_end(index)) {\n-\t\t\tint j;\n-\t\t\tbam_binlist_t *p = &kh_value(index, k);\n-\t\t\tfor (j = 0; j < p->n; ++j)\n-\t\t\t\tif (p->list[j].v > min_off) off[n_off++] = p->list[j];\n-\t\t}\n-\t}\n-\tfree(bins);\n-\t{\n-\t\tbam1_t *b = (bam1_t*)calloc(1, sizeof(bam1_t));\n-\t\tint l;\n-\t\tks_introsort(off, n_off, off);\n-\t\t// resolve completely contained adjacent blocks\n-\t\tfor (i = 1, l = 0; i < n_off; ++i)\n-\t\t\tif (off[l].v < off[i].v)\n-\t\t\t\toff[++l] = off[i];\n-\t\tn_off = l + 1;\n-\t\t// resolve overlaps between adjacent blocks; this may happen due to the merge in indexing\n-\t\tfor (i = 1; i < n_off; ++i)\n-\t\t\tif (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;\n-\t\t{ // merge adjacent blocks\n-#if defined(BAM_TRUE_OFFSET) || defined(BAM_VIRTUAL_OFFSET16)\n-\t\t\tfor (i = 1, l = 0; i < n_off; ++i) {\n-#ifdef BAM_TRUE_OFFSET\n-\t\t\t\tif (off[l].v + BAM_MIN_CHUNK_GAP > off[i].u) off[l].v = off[i].v;\n-#else\n-\t\t\t\tif (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;\n-#endif\n-\t\t\t\telse off[++l] = off[i];\n-\t\t\t}\n-\t\t\tn_off = l + 1;\n-#endif\n-\t\t}\n-\t\tbam_destroy1(b);\n-\t}\n-\titer->n_off = n_off; iter->off = off;\n-\treturn iter;\n-}\n-\n-pair64_t *get_chunk_coordinates(const bam_index_t *idx, int tid, int beg, int end, int *cnt_off)\n-{ // for pysam compatibility\n-\tbam_iter_t iter;\n-\tpair64_t *off;\n-\titer = bam_iter_query(idx, tid, beg, end);\n-\toff = iter->off; *cnt_off = iter->n_off;\n-\tfree(iter);\n-\treturn off;\n-}\n-\n-void bam_iter_destroy(bam_iter_t iter)\n-{\n-\tif (iter) { free(iter->off); free(iter); }\n-}\n-\n-int bam_iter_read(bamFile fp, bam_iter_t iter, bam1_t *b)\n-{\n-\tif (iter->finished) return -1;\n-\tif (iter->from_first) {\n-\t\tint ret = bam_read1(fp, b);\n-\t\tif (ret < 0) iter->finished = 1;\n-\t\treturn ret;\n-\t}\n-\tif (iter->off == 0) return -1;\n-\tfor (;;) {\n-\t\tint ret;\n-\t\tif (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk\n-\t\t\tif (iter->i == iter->n_off - 1) break; // no more chunks\n-\t\t\tif (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug\n-\t\t\tif (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek\n-\t\t\t\tbam_seek(fp, iter->off[iter->i+1].u, SEEK_SET);\n-\t\t\t\titer->curr_off = bam_tell(fp);\n-\t\t\t}\n-\t\t\t++iter->i;\n-\t\t}\n-\t\tif ((ret = bam_read1(fp, b)) > 0) {\n-\t\t\titer->curr_off = bam_tell(fp);\n-\t\t\tif (b->core.tid != iter->tid || b->core.pos >= iter->end) break; // no need to proceed\n-\t\t\telse if (is_overlap(iter->beg, iter->end, b)) return ret;\n-\t\t} else break; // end of file\n-\t}\n-\titer->finished = 1;\n-\treturn -1;\n-}\n-\n-int bam_fetch(bamFile fp, const bam_index_t *idx, int tid, int beg, int end, void *data, bam_fetch_f func)\n-{\n-\tbam_iter_t iter;\n-\tbam1_t *b;\n-\tb = bam_init1();\n-\titer = bam_iter_query(idx, tid, beg, end);\n-\twhile (bam_iter_read(fp, iter, b) >= 0) func(b, data);\n-\tbam_destroy1(b);\n-\treturn 0;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_lpileup.c --- a/chimerascan/pysam/samtools/bam_lpileup.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,198 +0,0 @@ -#include <stdlib.h> -#include <stdio.h> -#include <assert.h> -#include "bam.h" -#include "ksort.h" - -#define TV_GAP 2 - -typedef struct __freenode_t { - uint32_t level:28, cnt:4; - struct __freenode_t *next; -} freenode_t, *freenode_p; - -#define freenode_lt(a,b) ((a)->cnt < (b)->cnt || ((a)->cnt == (b)->cnt && (a)->level < (b)->level)) -KSORT_INIT(node, freenode_p, freenode_lt) - -/* Memory pool, similar to the one in bam_pileup.c */ -typedef struct { - int cnt, n, max; - freenode_t **buf; -} mempool_t; - -static mempool_t *mp_init() -{ - return (mempool_t*)calloc(1, sizeof(mempool_t)); -} -static void mp_destroy(mempool_t *mp) -{ - int k; - for (k = 0; k < mp->n; ++k) free(mp->buf[k]); - free(mp->buf); free(mp); -} -static inline freenode_t *mp_alloc(mempool_t *mp) -{ - ++mp->cnt; - if (mp->n == 0) return (freenode_t*)calloc(1, sizeof(freenode_t)); - else return mp->buf[--mp->n]; -} -static inline void mp_free(mempool_t *mp, freenode_t *p) -{ - --mp->cnt; p->next = 0; p->cnt = TV_GAP; - if (mp->n == mp->max) { - mp->max = mp->max? mp->max<<1 : 256; - mp->buf = (freenode_t**)realloc(mp->buf, sizeof(freenode_t*) * mp->max); - } - mp->buf[mp->n++] = p; -} - -/* core part */ -struct __bam_lplbuf_t { - int max, n_cur, n_pre; - int max_level, *cur_level, *pre_level; - mempool_t *mp; - freenode_t **aux, *head, *tail; - int n_nodes, m_aux; - bam_pileup_f func; - void *user_data; - bam_plbuf_t *plbuf; -}; - -void bam_lplbuf_reset(bam_lplbuf_t *buf) -{ - freenode_t *p, *q; - bam_plbuf_reset(buf->plbuf); - for (p = buf->head; p->next;) { - q = p->next; - mp_free(buf->mp, p); - p = q; - } - buf->head = buf->tail; - buf->max_level = 0; - buf->n_cur = buf->n_pre = 0; - buf->n_nodes = 0; -} - -static int tview_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data) -{ - bam_lplbuf_t *tv = (bam_lplbuf_t*)data; - freenode_t *p; - int i, l, max_level; - // allocate memory if necessary - if (tv->max < n) { // enlarge - tv->max = n; - kroundup32(tv->max); - tv->cur_level = (int*)realloc(tv->cur_level, sizeof(int) * tv->max); - tv->pre_level = (int*)realloc(tv->pre_level, sizeof(int) * tv->max); - } - tv->n_cur = n; - // update cnt - for (p = tv->head; p->next; p = p->next) - if (p->cnt > 0) --p->cnt; - // calculate cur_level[] - max_level = 0; - for (i = l = 0; i < n; ++i) { - const bam_pileup1_t *p = pl + i; - if (p->is_head) { - if (tv->head->next && tv->head->cnt == 0) { // then take a free slot - freenode_t *p = tv->head->next; - tv->cur_level[i] = tv->head->level; - mp_free(tv->mp, tv->head); - tv->head = p; - --tv->n_nodes; - } else tv->cur_level[i] = ++tv->max_level; - } else { - tv->cur_level[i] = tv->pre_level[l++]; - if (p->is_tail) { // then return a free slot - tv->tail->level = tv->cur_level[i]; - tv->tail->next = mp_alloc(tv->mp); - tv->tail = tv->tail->next; - ++tv->n_nodes; - } - } - if (tv->cur_level[i] > max_level) max_level = tv->cur_level[i]; - ((bam_pileup1_t*)p)->level = tv->cur_level[i]; - } - assert(l == tv->n_pre); - tv->func(tid, pos, n, pl, tv->user_data); - // sort the linked list - if (tv->n_nodes) { - freenode_t *q; - if (tv->n_nodes + 1 > tv->m_aux) { // enlarge - tv->m_aux = tv->n_nodes + 1; - kroundup32(tv->m_aux); - tv->aux = (freenode_t**)realloc(tv->aux, sizeof(void*) * tv->m_aux); - } - for (p = tv->head, i = l = 0; p->next;) { - if (p->level > max_level) { // then discard this entry - q = p->next; - mp_free(tv->mp, p); - p = q; - } else { - tv->aux[i++] = p; - p = p->next; - } - } - tv->aux[i] = tv->tail; // add a proper tail for the loop below - tv->n_nodes = i; - if (tv->n_nodes) { - ks_introsort(node, tv->n_nodes, tv->aux); - for (i = 0; i < tv->n_nodes; ++i) tv->aux[i]->next = tv->aux[i+1]; - tv->head = tv->aux[0]; - } else tv->head = tv->tail; - } - // clean up - tv->max_level = max_level; - memcpy(tv->pre_level, tv->cur_level, tv->n_cur * 4); - // squeeze out terminated levels - for (i = l = 0; i < n; ++i) { - const bam_pileup1_t *p = pl + i; - if (!p->is_tail) - tv->pre_level[l++] = tv->pre_level[i]; - } - tv->n_pre = l; -/* - fprintf(stderr, "%d\t", pos+1); - for (i = 0; i < n; ++i) { - const bam_pileup1_t *p = pl + i; - if (p->is_head) fprintf(stderr, "^"); - if (p->is_tail) fprintf(stderr, "$"); - fprintf(stderr, "%d,", p->level); - } - fprintf(stderr, "\n"); -*/ - return 0; -} - -bam_lplbuf_t *bam_lplbuf_init(bam_pileup_f func, void *data) -{ - bam_lplbuf_t *tv; - tv = (bam_lplbuf_t*)calloc(1, sizeof(bam_lplbuf_t)); - tv->mp = mp_init(); - tv->head = tv->tail = mp_alloc(tv->mp); - tv->func = func; - tv->user_data = data; - tv->plbuf = bam_plbuf_init(tview_func, tv); - return (bam_lplbuf_t*)tv; -} - -void bam_lplbuf_destroy(bam_lplbuf_t *tv) -{ - freenode_t *p, *q; - free(tv->cur_level); free(tv->pre_level); - bam_plbuf_destroy(tv->plbuf); - free(tv->aux); - for (p = tv->head; p->next;) { - q = p->next; - mp_free(tv->mp, p); p = q; - } - mp_free(tv->mp, p); - assert(tv->mp->cnt == 0); - mp_destroy(tv->mp); - free(tv); -} - -int bam_lplbuf_push(const bam1_t *b, bam_lplbuf_t *tv) -{ - return bam_plbuf_push(b, tv->plbuf); -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_maqcns.c --- a/chimerascan/pysam/samtools/bam_maqcns.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,614 +0,0 @@\n-#include <math.h>\n-#include <assert.h>\n-#include "bam.h"\n-#include "bam_maqcns.h"\n-#include "ksort.h"\n-#include "kaln.h"\n-KSORT_INIT_GENERIC(uint32_t)\n-\n-#define INDEL_WINDOW_SIZE 50\n-#define INDEL_EXT_DEP 0.9\n-\n-typedef struct __bmc_aux_t {\n-\tint max;\n-\tuint32_t *info;\n-} bmc_aux_t;\n-\n-typedef struct {\n-\tfloat esum[4], fsum[4];\n-\tuint32_t c[4];\n-\tuint32_t rms_mapQ;\n-} glf_call_aux_t;\n-\n-char bam_nt16_nt4_table[] = { 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 };\n-\n-/*\n- P(<b1,b2>) = \\theta \\sum_{i=1}^{N-1} 1/i\n- P(D|<b1,b2>) = \\sum_{k=1}^{N-1} p_k 1/2 [(k/N)^n_2(1-k/N)^n_1 + (k/N)^n1(1-k/N)^n_2]\n- p_k = 1/k / \\sum_{i=1}^{N-1} 1/i\n- */\n-static void cal_het(bam_maqcns_t *aa)\n-{\n-\tint k, n1, n2;\n-\tdouble sum_harmo; // harmonic sum\n-\tdouble poly_rate;\n-\n-\tfree(aa->lhet);\n-\taa->lhet = (double*)calloc(256 * 256, sizeof(double));\n-\tsum_harmo = 0.0;\n-\tfor (k = 1; k <= aa->n_hap - 1; ++k)\n-\t\tsum_harmo += 1.0 / k;\n-\tfor (n1 = 0; n1 < 256; ++n1) {\n-\t\tfor (n2 = 0; n2 < 256; ++n2) {\n-\t\t\tlong double sum = 0.0;\n-\t\t\tdouble lC = aa->is_soap? 0 : lgamma(n1+n2+1) - lgamma(n1+1) - lgamma(n2+1); // \\binom{n1+n2}{n1}\n-\t\t\tfor (k = 1; k <= aa->n_hap - 1; ++k) {\n-\t\t\t\tdouble pk = 1.0 / k / sum_harmo;\n-\t\t\t\tdouble log1 = log((double)k/aa->n_hap);\n-\t\t\t\tdouble log2 = log(1.0 - (double)k/aa->n_hap);\n-\t\t\t\tsum += pk * 0.5 * (expl(log1*n2) * expl(log2*n1) + expl(log1*n1) * expl(log2*n2));\n-\t\t\t}\n-\t\t\taa->lhet[n1<<8|n2] = lC + logl(sum);\n-\t\t}\n-\t}\n-\tpoly_rate = aa->het_rate * sum_harmo;\n-\taa->q_r = -4.343 * log(2.0 * poly_rate / (1.0 - poly_rate));\n-}\n-\n-/** initialize the helper structure */\n-static void cal_coef(bam_maqcns_t *aa)\n-{\n-\tint k, n, q;\n-\tlong double sum_a[257], b[256], q_c[256], tmp[256], fk2[256];\n-\tdouble *lC;\n-\n-\t// aa->lhet will be allocated and initialized \n-\tfree(aa->fk); free(aa->coef);\n-\taa->coef = 0;\n-\taa->fk = (double*)calloc(256, sizeof(double));\n-\taa->fk[0] = fk2[0] = 1.0;\n-\tfor (n = 1; n != 256; ++n) {\n-\t\taa->fk[n] = pow(aa->theta, n) * (1.0 - aa->eta) + aa->eta;\n-\t\tfk2[n] = aa->fk[n>>1]; // this is an approximation, assuming reads equally likely come from both strands\n-\t}\n-\tif (aa->is_soap) return;\n-\taa->coef = (double*)calloc(256*256*64, sizeof(double));\n-\tlC = (double*)calloc(256 * 256, sizeof(double));\n-\tfor (n = 1; n != 256; ++n)\n-\t\tfor (k = 1; k <= n; ++k)\n-\t\t\tlC[n<<8|k] = lgamma(n+1) - lgamma(k+1) - lgamma(n-k+1);\n-\tfor (q = 1; q != 64; ++q) {\n-\t\tdouble e = pow(10.0, -q/10.0);\n-\t\tdouble le = log(e);\n-\t\tdouble le1 = log(1.0-e);\n-\t\tfor (n = 1; n != 256; ++n) {\n-\t\t\tdouble *coef = aa->coef + (q<<16|n<<8);\n-\t\t\tsum_a[n+1] = 0.0;\n-\t\t\tfor (k = n; k >= 0; --k) { // a_k = \\sum_{i=k}^n C^n_k \\epsilon^k (1-\\epsilon)^{n-k}\n-\t\t\t\tsum_a[k] = sum_a[k+1] + expl(lC[n<<8|k] + k*le + (n-k)*le1);\n-\t\t\t\tb[k] = sum_a[k+1] / sum_a[k];\n-\t\t\t\tif (b[k] > 0.99) b[k] = 0.99;\n-\t\t\t}\n-\t\t\tfor (k = 0; k != n; ++k) // log(\\bar\\beta_{nk}(\\bar\\epsilon)^{f_k})\n-\t\t\t\tq_c[k] = -4.343 * fk2[k] * logl(b[k] / e);\n-\t\t\tfor (k = 1; k != n; ++k) q_c[k] += q_c[k-1]; // \\prod_{i=0}^k c_i\n-\t\t\tfor (k = 0; k <= n; ++k) { // powl() in 64-bit mode seems broken on my Mac OS X 10.4.9\n-\t\t\t\ttmp[k] = -4.343 * logl(1.0 - expl(fk2[k] * logl(b[k])));\n-\t\t\t\tcoef[k] = (k? q_c[k-1] : 0) + tmp[k]; // this is the final c_{nk}\n-\t\t\t}\n-\t\t}\n-\t}\n-\tfree(lC);\n-}\n-\n-bam_maqcns_t *bam_maqcns_init()\n-{\n-\tbam_maqcns_t *bm;\n-\tbm = (bam_maqcns_t*)calloc(1, sizeof(bam_maqcns_t));\n-\tbm->aux = (bmc_aux_t*)calloc(1, sizeof(bmc_aux_t));\n-\tbm->het_rate = 0.001;\n-\tbm->theta = 0.85;\n-\tbm->n_hap = 2;\n-\tbm->eta = 0.03;\n-\tbm->cap_mapQ = 60;\n-\treturn bm;\n-}\n-\n-void bam_maqcns_prepare(bam_maqcns_t *bm)\n-{\n-\tcal_coef(bm); cal_het(bm);\n-}\n-\n-void bam_maqcns_destroy(bam_maqcns_t *bm)\n-{\n-\tif (bm == 0) return;\n-\tfree(bm->lhet); free(bm->fk); free(bm->coef); free(bm->aux->info);\n-\tfree(bm->aux); free(bm);\n-}\n-\n-glf1_t *bam_maqcns_glfgen(int _n, const bam_pileup1_t *pl, uint8_t ref_base, bam_maqcns_t *bm)\n-{\n-\tglf_call_aux_t *b;\n-\tint i, j, k, w[8], c, n;\n-\tglf1_t *g = (glf1_t*)calloc(1, sizeof(glf1_t));\n-'..b'or (l = 0; l < n_acigar; ++l) fprintf(stderr, "%d%c", acigar[l]>>4, "MIDS"[acigar[l]&0xf]);\n-\t\t\t\t\t\tfprintf(stderr, "\\n");\n-\t\t\t\t\t\tfor (l = 0; l < tend - tbeg + types[i]; ++l) fputc("ACGTN"[ref2[l+tbeg-left]], stderr);\n-\t\t\t\t\t\tfputc(\'\\n\', stderr);\n-\t\t\t\t\t\tfor (l = 0; l < qend - qbeg; ++l) fputc("ACGTN"[rs[l]], stderr);\n-\t\t\t\t\t\tfputc(\'\\n\', stderr);\n-\t\t\t\t\t\t}*/\n-\t\t\t\t\tfree(acigar);\n-\t\t\t\t}\n-\t\t\t}\n-\t\t}\n-\t\t{ // get final result\n-\t\t\tint *sum, max1, max2, max1_i, max2_i;\n-\t\t\t// pick up the best two score\n-\t\t\tsum = (int*)calloc(n_types, sizeof(int));\n-\t\t\tfor (i = 0; i < n_types; ++i)\n-\t\t\t\tfor (j = 0; j < n; ++j)\n-\t\t\t\t\tsum[i] += -pscore[i*n+j];\n-\t\t\tmax1 = max2 = -0x7fffffff; max1_i = max2_i = -1;\n-\t\t\tfor (i = 0; i < n_types; ++i) {\n-\t\t\t\tif (sum[i] > max1) {\n-\t\t\t\t\tmax2 = max1; max2_i = max1_i; max1 = sum[i]; max1_i = i;\n-\t\t\t\t} else if (sum[i] > max2) {\n-\t\t\t\t\tmax2 = sum[i]; max2_i = i;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tfree(sum);\n-\t\t\t// write ret\n-\t\t\tret = (bam_maqindel_ret_t*)calloc(1, sizeof(bam_maqindel_ret_t));\n-\t\t\tret->indel1 = types[max1_i]; ret->indel2 = types[max2_i];\n-\t\t\tret->s[0] = (char*)calloc(abs(ret->indel1) + 2, 1);\n-\t\t\tret->s[1] = (char*)calloc(abs(ret->indel2) + 2, 1);\n-\t\t\t// write indel sequence\n-\t\t\tif (ret->indel1 > 0) {\n-\t\t\t\tret->s[0][0] = \'+\';\n-\t\t\t\tfor (k = 0; k < ret->indel1; ++k)\n-\t\t\t\t\tret->s[0][k+1] = bam_nt16_rev_table[(int)inscns[max1_i*max_ins + k]];\n-\t\t\t} else if (ret->indel1 < 0) {\n-\t\t\t\tret->s[0][0] = \'-\';\n-\t\t\t\tfor (k = 0; k < -ret->indel1 && ref[pos + k + 1]; ++k)\n-\t\t\t\t\tret->s[0][k+1] = ref[pos + k + 1];\n-\t\t\t} else ret->s[0][0] = \'*\';\n-\t\t\tif (ret->indel2 > 0) {\n-\t\t\t\tret->s[1][0] = \'+\';\n-\t\t\t\tfor (k = 0; k < ret->indel2; ++k)\n-\t\t\t\t\tret->s[1][k+1] = bam_nt16_rev_table[(int)inscns[max2_i*max_ins + k]];\n-\t\t\t} else if (ret->indel2 < 0) {\n-\t\t\t\tret->s[1][0] = \'-\';\n-\t\t\t\tfor (k = 0; k < -ret->indel2 && ref[pos + k + 1]; ++k)\n-\t\t\t\t\tret->s[1][k+1] = ref[pos + k + 1];\n-\t\t\t} else ret->s[1][0] = \'*\';\n-\t\t\t// write count\n-\t\t\tfor (i = 0; i < n; ++i) {\n-\t\t\t\tconst bam_pileup1_t *p = pl + i;\n-\t\t\t\tif (p->indel == ret->indel1) ++ret->cnt1;\n-\t\t\t\telse if (p->indel == ret->indel2) ++ret->cnt2;\n-\t\t\t\telse ++ret->cnt_anti;\n-\t\t\t}\n-\t\t\t{ // write gl[]\n-\t\t\t\tint tmp, seq_err = 0;\n-\t\t\t\tdouble x = 1.0;\n-\t\t\t\ttmp = max1_i - max2_i;\n-\t\t\t\tif (tmp < 0) tmp = -tmp;\n-\t\t\t\tfor (j = 0; j < tmp + 1; ++j) x *= INDEL_EXT_DEP;\n-\t\t\t\tseq_err = mi->q_indel * (1.0 - x) / (1.0 - INDEL_EXT_DEP);\n-\t\t\t\tret->gl[0] = ret->gl[1] = 0;\n-\t\t\t\tfor (j = 0; j < n; ++j) {\n-\t\t\t\t\tint s1 = pscore[max1_i*n + j], s2 = pscore[max2_i*n + j];\n-\t\t\t\t\t//fprintf(stderr, "id=%d, %d, %d, %d, %d, %d\\n", j, pl[j].b->core.pos+1, types[max1_i], types[max2_i], s1, s2);\n-\t\t\t\t\tif (s1 > s2) ret->gl[0] += s1 - s2 < seq_err? s1 - s2 : seq_err;\n-\t\t\t\t\telse ret->gl[1] += s2 - s1 < seq_err? s2 - s1 : seq_err;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\t// write cnt_ref and cnt_ambi\n-\t\t\tif (max1_i != 0 && max2_i != 0) {\n-\t\t\t\tfor (j = 0; j < n; ++j) {\n-\t\t\t\t\tint diff1 = score[j] - score[max1_i * n + j];\n-\t\t\t\t\tint diff2 = score[j] - score[max2_i * n + j];\n-\t\t\t\t\tif (diff1 > 0 && diff2 > 0) ++ret->cnt_ref;\n-\t\t\t\t\telse if (diff1 == 0 || diff2 == 0) ++ret->cnt_ambi;\n-\t\t\t\t}\n-\t\t\t}\n-\t\t}\n-\t\tfree(score); free(pscore); free(ref2); free(rs); free(inscns);\n-\t}\n-\t{ // call genotype\n-\t\tint q[3], qr_indel = (int)(-4.343 * log(mi->r_indel) + 0.5);\n-\t\tint min1, min2, min1_i;\n-\t\tq[0] = ret->gl[0] + (ret->s[0][0] != \'*\'? 0 : 0) * qr_indel;\n-\t\tq[1] = ret->gl[1] + (ret->s[1][0] != \'*\'? 0 : 0) * qr_indel;\n-\t\tq[2] = n * 3 + (ret->s[0][0] == \'*\' || ret->s[1][0] == \'*\'? 1 : 1) * qr_indel;\n-\t\tmin1 = min2 = 0x7fffffff; min1_i = -1;\n-\t\tfor (i = 0; i < 3; ++i) {\n-\t\t\tif (q[i] < min1) {\n-\t\t\t\tmin2 = min1; min1 = q[i]; min1_i = i;\n-\t\t\t} else if (q[i] < min2) min2 = q[i];\n-\t\t}\n-\t\tret->gt = min1_i;\n-\t\tret->q_cns = min2 - min1;\n-\t\t// set q_ref\n-\t\tif (ret->gt < 2) ret->q_ref = (ret->s[ret->gt][0] == \'*\')? 0 : q[1-ret->gt] - q[ret->gt] - qr_indel - 3;\n-\t\telse ret->q_ref = (ret->s[0][0] == \'*\')? q[0] - q[2] : q[1] - q[2];\n-\t\tif (ret->q_ref < 0) ret->q_ref = 0;\n-\t}\n-\tfree(types);\n-\treturn ret;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_maqcns.h --- a/chimerascan/pysam/samtools/bam_maqcns.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,57 +0,0 @@ -#ifndef BAM_MAQCNS_H -#define BAM_MAQCNS_H - -#include "glf.h" - -struct __bmc_aux_t; - -typedef struct { - float het_rate, theta; - int n_hap, cap_mapQ, is_soap; - - float eta, q_r; - double *fk, *coef; - double *lhet; - struct __bmc_aux_t *aux; -} bam_maqcns_t; - -typedef struct { - int q_indel; // indel sequencing error, phred scaled - float r_indel; // indel prior - float r_snp; // snp prior - // hidden parameters, unchangeable from command line - int mm_penalty, indel_err, ambi_thres; -} bam_maqindel_opt_t; - -typedef struct { - int indel1, indel2; - int cnt1, cnt2, cnt_anti; - int cnt_ref, cnt_ambi; - char *s[2]; - // - int gt, gl[2]; - int q_cns, q_ref; -} bam_maqindel_ret_t; - -#ifdef __cplusplus -extern "C" { -#endif - - bam_maqcns_t *bam_maqcns_init(); - void bam_maqcns_prepare(bam_maqcns_t *bm); - void bam_maqcns_destroy(bam_maqcns_t *bm); - glf1_t *bam_maqcns_glfgen(int n, const bam_pileup1_t *pl, uint8_t ref_base, bam_maqcns_t *bm); - uint32_t bam_maqcns_call(int n, const bam_pileup1_t *pl, bam_maqcns_t *bm); - // return: cns<<28 | cns2<<24 | mapQ<<16 | cnsQ<<8 | cnsQ2 - uint32_t glf2cns(const glf1_t *g, int q_r); - - bam_maqindel_opt_t *bam_maqindel_opt_init(); - bam_maqindel_ret_t *bam_maqindel(int n, int pos, const bam_maqindel_opt_t *mi, const bam_pileup1_t *pl, const char *ref, - int _n_types, int *_types); - void bam_maqindel_ret_destroy(bam_maqindel_ret_t*); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_mate.c --- a/chimerascan/pysam/samtools/bam_mate.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,70 +0,0 @@ -#include <stdlib.h> -#include <string.h> -#include "bam.h" - -// currently, this function ONLY works if each read has one hit -void bam_mating_core(bamFile in, bamFile out) -{ - bam_header_t *header; - bam1_t *b[2]; - int curr, has_prev; - - header = bam_header_read(in); - bam_header_write(out, header); - - b[0] = bam_init1(); - b[1] = bam_init1(); - curr = 0; has_prev = 0; - while (bam_read1(in, b[curr]) >= 0) { - bam1_t *cur = b[curr], *pre = b[1-curr]; - if (has_prev) { - if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name - cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos; - pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos; - if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP)) - && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) - { - uint32_t cur5, pre5; - cur5 = (cur->core.flag&BAM_FREVERSE)? bam_calend(&cur->core, bam1_cigar(cur)) : cur->core.pos; - pre5 = (pre->core.flag&BAM_FREVERSE)? bam_calend(&pre->core, bam1_cigar(pre)) : pre->core.pos; - cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5; - } else cur->core.isize = pre->core.isize = 0; - if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE; - else cur->core.flag &= ~BAM_FMREVERSE; - if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE; - else pre->core.flag &= ~BAM_FMREVERSE; - if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; } - if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; } - bam_write1(out, pre); - bam_write1(out, cur); - has_prev = 0; - } else { // unpaired or singleton - pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0; - if (pre->core.flag & BAM_FPAIRED) { - pre->core.flag |= BAM_FMUNMAP; - pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR; - } - bam_write1(out, pre); - } - } else has_prev = 1; - curr = 1 - curr; - } - if (has_prev) bam_write1(out, b[1-curr]); - bam_header_destroy(header); - bam_destroy1(b[0]); - bam_destroy1(b[1]); -} - -int bam_mating(int argc, char *argv[]) -{ - bamFile in, out; - if (argc < 3) { - fprintf(stderr, "samtools fixmate <in.nameSrt.bam> <out.nameSrt.bam>\n"); - return 1; - } - in = (strcmp(argv[1], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[1], "r"); - out = (strcmp(argv[2], "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(argv[2], "w"); - bam_mating_core(in, out); - bam_close(in); bam_close(out); - return 0; -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_md.c --- a/chimerascan/pysam/samtools/bam_md.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,175 +0,0 @@ -#include <unistd.h> -#include <assert.h> -#include <string.h> -#include <ctype.h> -#include "faidx.h" -#include "sam.h" -#include "kstring.h" - -void bam_fillmd1_core(bam1_t *b, char *ref, int is_equal, int max_nm) -{ - uint8_t *seq = bam1_seq(b); - uint32_t *cigar = bam1_cigar(b); - bam1_core_t *c = &b->core; - int i, x, y, u = 0; - kstring_t *str; - uint8_t *old_md, *old_nm; - int32_t old_nm_i = -1, nm = 0; - - str = (kstring_t*)calloc(1, sizeof(kstring_t)); - for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { - int j, l = cigar[i]>>4, op = cigar[i]&0xf; - if (op == BAM_CMATCH) { - for (j = 0; j < l; ++j) { - int z = y + j; - int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]]; - if (ref[x+j] == 0) break; // out of boundary - if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match - if (is_equal) seq[z/2] &= (z&1)? 0xf0 : 0x0f; - ++u; - } else { - ksprintf(str, "%d", u); - kputc(ref[x+j], str); - u = 0; ++nm; - } - } - if (j < l) break; - x += l; y += l; - } else if (op == BAM_CDEL) { - ksprintf(str, "%d", u); - kputc('^', str); - for (j = 0; j < l; ++j) { - if (ref[x+j] == 0) break; - kputc(ref[x+j], str); - } - u = 0; - if (j < l) break; - x += l; nm += l; - } else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) { - y += l; - if (op == BAM_CINS) nm += l; - } else if (op == BAM_CREF_SKIP) { - x += l; - } - } - ksprintf(str, "%d", u); - // apply max_nm - if (max_nm > 0 && nm >= max_nm) { - for (i = y = 0, x = c->pos; i < c->n_cigar; ++i) { - int j, l = cigar[i]>>4, op = cigar[i]&0xf; - if (op == BAM_CMATCH) { - for (j = 0; j < l; ++j) { - int z = y + j; - int c1 = bam1_seqi(seq, z), c2 = bam_nt16_table[(int)ref[x+j]]; - if (ref[x+j] == 0) break; // out of boundary - if ((c1 == c2 && c1 != 15 && c2 != 15) || c1 == 0) { // a match - seq[z/2] |= (z&1)? 0x0f : 0xf0; - bam1_qual(b)[z] = 0; - } - } - if (j < l) break; - x += l; y += l; - } else if (op == BAM_CDEL || op == BAM_CREF_SKIP) x += l; - else if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l; - } - } - // update NM - old_nm = bam_aux_get(b, "NM"); - if (c->flag & BAM_FUNMAP) return; - if (old_nm) old_nm_i = bam_aux2i(old_nm); - if (!old_nm) bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); - else if (nm != old_nm_i) { - fprintf(stderr, "[bam_fillmd1] different NM for read '%s': %d -> %d\n", bam1_qname(b), old_nm_i, nm); - bam_aux_del(b, old_nm); - bam_aux_append(b, "NM", 'i', 4, (uint8_t*)&nm); - } - // update MD - old_md = bam_aux_get(b, "MD"); - if (!old_md) bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); - else { - int is_diff = 0; - if (strlen((char*)old_md+1) == str->l) { - for (i = 0; i < str->l; ++i) - if (toupper(old_md[i+1]) != toupper(str->s[i])) - break; - if (i < str->l) is_diff = 1; - } else is_diff = 1; - if (is_diff) { - fprintf(stderr, "[bam_fillmd1] different MD for read '%s': '%s' -> '%s'\n", bam1_qname(b), old_md+1, str->s); - bam_aux_del(b, old_md); - bam_aux_append(b, "MD", 'Z', str->l + 1, (uint8_t*)str->s); - } - } - free(str->s); free(str); -} - -void bam_fillmd1(bam1_t *b, char *ref, int is_equal) -{ - bam_fillmd1_core(b, ref, is_equal, 0); -} - -int bam_fillmd(int argc, char *argv[]) -{ - int c, is_equal = 0, tid = -2, ret, len, is_bam_out, is_sam_in, is_uncompressed, max_nm = 0; - samfile_t *fp, *fpout = 0; - faidx_t *fai; - char *ref = 0, mode_w[8], mode_r[8]; - bam1_t *b; - - is_bam_out = is_sam_in = is_uncompressed = 0; - mode_w[0] = mode_r[0] = 0; - strcpy(mode_r, "r"); strcpy(mode_w, "w"); - while ((c = getopt(argc, argv, "eubSn:")) >= 0) { - switch (c) { - case 'e': is_equal = 1; break; - case 'b': is_bam_out = 1; break; - case 'u': is_uncompressed = is_bam_out = 1; break; - case 'S': is_sam_in = 1; break; - case 'n': max_nm = atoi(optarg); break; - default: fprintf(stderr, "[bam_fillmd] unrecognized option '-%c'\n", c); return 1; - } - } - if (!is_sam_in) strcat(mode_r, "b"); - if (is_bam_out) strcat(mode_w, "b"); - else strcat(mode_w, "h"); - if (is_uncompressed) strcat(mode_w, "u"); - if (optind + 1 >= argc) { - fprintf(stderr, "\n"); - fprintf(stderr, "Usage: samtools fillmd [-eubS] <aln.bam> <ref.fasta>\n\n"); - fprintf(stderr, "Options: -e change identical bases to '='\n"); - fprintf(stderr, " -u uncompressed BAM output (for piping)\n"); - fprintf(stderr, " -b compressed BAM output\n"); - fprintf(stderr, " -S the input is SAM with header\n\n"); - return 1; - } - fp = samopen(argv[optind], mode_r, 0); - if (fp == 0) return 1; - if (is_sam_in && (fp->header == 0 || fp->header->n_targets == 0)) { - fprintf(stderr, "[bam_fillmd] input SAM does not have header. Abort!\n"); - return 1; - } - fpout = samopen("-", mode_w, fp->header); - fai = fai_load(argv[optind+1]); - - b = bam_init1(); - while ((ret = samread(fp, b)) >= 0) { - if (b->core.tid >= 0) { - if (tid != b->core.tid) { - free(ref); - ref = fai_fetch(fai, fp->header->target_name[b->core.tid], &len); - tid = b->core.tid; - if (ref == 0) - fprintf(stderr, "[bam_fillmd] fail to find sequence '%s' in the reference.\n", - fp->header->target_name[tid]); - } - if (ref) bam_fillmd1_core(b, ref, is_equal, max_nm); - } - samwrite(fpout, b); - } - bam_destroy1(b); - - free(ref); - fai_destroy(fai); - samclose(fp); samclose(fpout); - return 0; -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_pileup.c --- a/chimerascan/pysam/samtools/bam_pileup.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,396 +0,0 @@\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <ctype.h>\n-#include <assert.h>\n-#include "sam.h"\n-\n-typedef struct __linkbuf_t {\n-\tbam1_t b;\n-\tuint32_t beg, end;\n-\tstruct __linkbuf_t *next;\n-} lbnode_t;\n-\n-/* --- BEGIN: Memory pool */\n-\n-typedef struct {\n-\tint cnt, n, max;\n-\tlbnode_t **buf;\n-} mempool_t;\n-\n-static mempool_t *mp_init()\n-{\n-\tmempool_t *mp;\n-\tmp = (mempool_t*)calloc(1, sizeof(mempool_t));\n-\treturn mp;\n-}\n-static void mp_destroy(mempool_t *mp)\n-{\n-\tint k;\n-\tfor (k = 0; k < mp->n; ++k) {\n-\t\tfree(mp->buf[k]->b.data);\n-\t\tfree(mp->buf[k]);\n-\t}\n-\tfree(mp->buf);\n-\tfree(mp);\n-}\n-static inline lbnode_t *mp_alloc(mempool_t *mp)\n-{\n-\t++mp->cnt;\n-\tif (mp->n == 0) return (lbnode_t*)calloc(1, sizeof(lbnode_t));\n-\telse return mp->buf[--mp->n];\n-}\n-static inline void mp_free(mempool_t *mp, lbnode_t *p)\n-{\n-\t--mp->cnt; p->next = 0; // clear lbnode_t::next here\n-\tif (mp->n == mp->max) {\n-\t\tmp->max = mp->max? mp->max<<1 : 256;\n-\t\tmp->buf = (lbnode_t**)realloc(mp->buf, sizeof(lbnode_t*) * mp->max);\n-\t}\n-\tmp->buf[mp->n++] = p;\n-}\n-\n-/* --- END: Memory pool */\n-\n-/* --- BEGIN: Auxiliary functions */\n-\n-static inline int resolve_cigar(bam_pileup1_t *p, uint32_t pos)\n-{\n-\tunsigned k;\n-\tbam1_t *b = p->b;\n-\tbam1_core_t *c = &b->core;\n-\tuint32_t x = c->pos, y = 0;\n-\tint ret = 1, is_restart = 1;\n-\n-\tif (c->flag&BAM_FUNMAP) return 0; // unmapped read\n-\tassert(x <= pos); // otherwise a bug\n-\tp->qpos = -1; p->indel = 0; p->is_del = p->is_head = p->is_tail = 0;\n-\tfor (k = 0; k < c->n_cigar; ++k) {\n-\t\tint op = bam1_cigar(b)[k] & BAM_CIGAR_MASK; // operation\n-\t\tint l = bam1_cigar(b)[k] >> BAM_CIGAR_SHIFT; // length\n-\t\tif (op == BAM_CMATCH) { // NOTE: this assumes the first and the last operation MUST BE a match or a clip\n-\t\t\tif (x + l > pos) { // overlap with pos\n-\t\t\t\tp->indel = p->is_del = 0;\n-\t\t\t\tp->qpos = y + (pos - x);\n-\t\t\t\tif (x == pos && is_restart) p->is_head = 1;\n-\t\t\t\tif (x + l - 1 == pos) { // come to the end of a match\n-\t\t\t\t\tint has_next_match = 0;\n-\t\t\t\t\tunsigned i;\n-\t\t\t\t\tfor (i = k + 1; i < c->n_cigar; ++i) {\n-\t\t\t\t\t\tuint32_t cigar = bam1_cigar(b)[i];\n-\t\t\t\t\t\tint opi = cigar&BAM_CIGAR_MASK;\n-\t\t\t\t\t\tif (opi == BAM_CMATCH) {\n-\t\t\t\t\t\t\thas_next_match = 1;\n-\t\t\t\t\t\t\tbreak;\n-\t\t\t\t\t\t} else if (opi == BAM_CSOFT_CLIP || opi == BAM_CREF_SKIP || opi == BAM_CHARD_CLIP) break;\n-\t\t\t\t\t}\n-\t\t\t\t\tif (!has_next_match) p->is_tail = 1;\n-\t\t\t\t\tif (k < c->n_cigar - 1 && has_next_match) { // there are additional operation(s)\n-\t\t\t\t\t\tuint32_t cigar = bam1_cigar(b)[k+1]; // next CIGAR\n-\t\t\t\t\t\tint op_next = cigar&BAM_CIGAR_MASK; // next CIGAR operation\n-\t\t\t\t\t\tif (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del\n-\t\t\t\t\t\telse if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins\n-\t\t\t\t\t\telse if (op_next == BAM_CPAD && k + 2 < c->n_cigar) { // no working for adjacent padding\n-\t\t\t\t\t\t\tcigar = bam1_cigar(b)[k+2]; op_next = cigar&BAM_CIGAR_MASK;\n-\t\t\t\t\t\t\tif (op_next == BAM_CDEL) p->indel = -(int32_t)(cigar>>BAM_CIGAR_SHIFT); // del\n-\t\t\t\t\t\t\telse if (op_next == BAM_CINS) p->indel = cigar>>BAM_CIGAR_SHIFT; // ins\n-\t\t\t\t\t\t}\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tx += l; y += l;\n-\t\t} else if (op == BAM_CDEL) { // then set ->is_del\n-\t\t\tif (x + l > pos) {\n-\t\t\t\tp->indel = 0; p->is_del = 1;\n-\t\t\t\tp->qpos = y + (pos - x);\n-\t\t\t}\n-\t\t\tx += l;\n-\t\t} else if (op == BAM_CREF_SKIP) x += l;\n-\t\telse if (op == BAM_CINS || op == BAM_CSOFT_CLIP) y += l;\n-\t\tif (is_restart) is_restart ^= (op == BAM_CMATCH);\n-\t\telse is_restart ^= (op == BAM_CREF_SKIP || op == BAM_CSOFT_CLIP || op == BAM_CHARD_CLIP);\n-\t\tif (x > pos) {\n-\t\t\tif (op == BAM_CREF_SKIP) ret = 0; // then do not put it into pileup at all\n-\t\t\tbreak;\n-\t\t}\n-\t}\n-\tassert(x > pos); // otherwise a bug\n-\treturn ret;\n-}\n-\n-/* --- END: Auxiliary functions */\n-\n-/*******************\n- * pileup iterator *\n- *******************/\n-\n-struct __bam_plp_t {\n-\tmempool_t *mp;\n-\tlbnode_t *head, *tail, *dummy;\n-\tint32_t tid, pos, max_tid, max_pos;\n-\tint is_eof, flag_mask, max_plp, error;\n-\tbam_pileup1_t *plp;\n-\t// for the "auto"'..b're.tid > iter->tid) {\n-\t\t\titer->tail->next = mp_alloc(iter->mp);\n-\t\t\titer->tail = iter->tail->next;\n-\t\t}\n-\t} else iter->is_eof = 1;\n-\treturn 0;\n-}\n-\n-const bam_pileup1_t *bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp)\n-{\n-\tconst bam_pileup1_t *plp;\n-\tif (iter->func == 0 || iter->error) { *_n_plp = -1; return 0; }\n-\tif ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n-\telse {\n-\t\t*_n_plp = 0;\n-\t\tif (iter->is_eof) return 0;\n-\t\twhile (iter->func(iter->data, iter->b) >= 0) {\n-\t\t\tif (bam_plp_push(iter, iter->b) < 0) {\n-\t\t\t\t*_n_plp = -1;\n-\t\t\t\treturn 0;\n-\t\t\t}\n-\t\t\tif ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n-\t\t}\n-\t\tbam_plp_push(iter, 0);\n-\t\tif ((plp = bam_plp_next(iter, _tid, _pos, _n_plp)) != 0) return plp;\n-\t\treturn 0;\n-\t}\n-}\n-\n-void bam_plp_reset(bam_plp_t iter)\n-{\n-\tlbnode_t *p, *q;\n-\titer->max_tid = iter->max_pos = -1;\n-\titer->tid = iter->pos = 0;\n-\titer->is_eof = 0;\n-\tfor (p = iter->head; p->next;) {\n-\t\tq = p->next;\n-\t\tmp_free(iter->mp, p);\n-\t\tp = q;\n-\t}\n-\titer->head = iter->tail;\n-}\n-\n-void bam_plp_set_mask(bam_plp_t iter, int mask)\n-{\n-\titer->flag_mask = mask < 0? BAM_DEF_MASK : (BAM_FUNMAP | mask);\n-}\n-\n-/*****************\n- * callback APIs *\n- *****************/\n-\n-int bam_pileup_file(bamFile fp, int mask, bam_pileup_f func, void *func_data)\n-{\n-\tbam_plbuf_t *buf;\n-\tint ret;\n-\tbam1_t *b;\n-\tb = bam_init1();\n-\tbuf = bam_plbuf_init(func, func_data);\n-\tbam_plbuf_set_mask(buf, mask);\n-\twhile ((ret = bam_read1(fp, b)) >= 0)\n-\t\tbam_plbuf_push(b, buf);\n-\tbam_plbuf_push(0, buf);\n-\tbam_plbuf_destroy(buf);\n-\tbam_destroy1(b);\n-\treturn 0;\n-}\n-\n-void bam_plbuf_set_mask(bam_plbuf_t *buf, int mask)\n-{\n-\tbam_plp_set_mask(buf->iter, mask);\n-}\n-\n-void bam_plbuf_reset(bam_plbuf_t *buf)\n-{\n-\tbam_plp_reset(buf->iter);\n-}\n-\n-bam_plbuf_t *bam_plbuf_init(bam_pileup_f func, void *data)\n-{\n-\tbam_plbuf_t *buf;\n-\tbuf = calloc(1, sizeof(bam_plbuf_t));\n-\tbuf->iter = bam_plp_init(0, 0);\n-\tbuf->func = func;\n-\tbuf->data = data;\n-\treturn buf;\n-}\n-\n-void bam_plbuf_destroy(bam_plbuf_t *buf)\n-{\n-\tbam_plp_destroy(buf->iter);\n-\tfree(buf);\n-}\n-\n-int bam_plbuf_push(const bam1_t *b, bam_plbuf_t *buf)\n-{\n-\tint ret, n_plp, tid, pos;\n-\tconst bam_pileup1_t *plp;\n-\tret = bam_plp_push(buf->iter, b);\n-\tif (ret < 0) return ret;\n-\twhile ((plp = bam_plp_next(buf->iter, &tid, &pos, &n_plp)) != 0)\n-\t\tbuf->func(tid, pos, n_plp, plp, buf->data);\n-\treturn 0;\n-}\n-\n-/***********\n- * mpileup *\n- ***********/\n-\n-struct __bam_mplp_t {\n-\tint n;\n-\tuint64_t min, *pos;\n-\tbam_plp_t *iter;\n-\tint *n_plp;\n-\tconst bam_pileup1_t **plp;\n-};\n-\n-bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data)\n-{\n-\tint i;\n-\tbam_mplp_t iter;\n-\titer = calloc(1, sizeof(struct __bam_mplp_t));\n-\titer->pos = calloc(n, 8);\n-\titer->n_plp = calloc(n, sizeof(int));\n-\titer->plp = calloc(n, sizeof(void*));\n-\titer->iter = calloc(n, sizeof(void*));\n-\titer->n = n;\n-\titer->min = (uint64_t)-1;\n-\tfor (i = 0; i < n; ++i) {\n-\t\titer->iter[i] = bam_plp_init(func, data[i]);\n-\t\titer->pos[i] = iter->min;\n-\t}\n-\treturn iter;\n-}\n-\n-void bam_mplp_destroy(bam_mplp_t iter)\n-{\n-\tint i;\n-\tfor (i = 0; i < iter->n; ++i) bam_plp_destroy(iter->iter[i]);\n-\tfree(iter->iter); free(iter->pos); free(iter->n_plp); free(iter->plp);\n-\tfree(iter);\n-}\n-\n-int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp)\n-{\n-\tint i, ret = 0;\n-\tuint64_t new_min = (uint64_t)-1;\n-\tfor (i = 0; i < iter->n; ++i) {\n-\t\tif (iter->pos[i] == iter->min) {\n-\t\t\tint tid, pos;\n-\t\t\titer->plp[i] = bam_plp_auto(iter->iter[i], &tid, &pos, &iter->n_plp[i]);\n-\t\t\titer->pos[i] = (uint64_t)tid<<32 | pos;\n-\t\t}\n-\t\tif (iter->plp[i] && iter->pos[i] < new_min) new_min = iter->pos[i];\n-\t}\n-\titer->min = new_min;\n-\tif (new_min == (uint64_t)-1) return 0;\n-\t*_tid = new_min>>32; *_pos = (uint32_t)new_min;\n-\tfor (i = 0; i < iter->n; ++i) {\n-\t\tif (iter->pos[i] == iter->min) {\n-\t\t\tn_plp[i] = iter->n_plp[i], plp[i] = iter->plp[i];\n-\t\t\t++ret;\n-\t\t} else n_plp[i] = 0, plp[i] = 0;\n-\t}\n-\treturn ret;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_plcmd.c --- a/chimerascan/pysam/samtools/bam_plcmd.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,570 +0,0 @@\n-#include <math.h>\n-#include <stdio.h>\n-#include <unistd.h>\n-#include <ctype.h>\n-#include "sam.h"\n-#include "faidx.h"\n-#include "bam_maqcns.h"\n-#include "khash.h"\n-#include "glf.h"\n-#include "kstring.h"\n-\n-typedef int *indel_list_t;\n-KHASH_MAP_INIT_INT64(64, indel_list_t)\n-\n-#define BAM_PLF_SIMPLE 0x01\n-#define BAM_PLF_CNS 0x02\n-#define BAM_PLF_INDEL_ONLY 0x04\n-#define BAM_PLF_GLF 0x08\n-#define BAM_PLF_VAR_ONLY 0x10\n-#define BAM_PLF_2ND 0x20\n-#define BAM_PLF_RANBASE 0x40\n-#define BAM_PLF_1STBASE 0x80\n-#define BAM_PLF_ALLBASE 0x100\n-#define BAM_PLF_READPOS 0x200\n-\n-typedef struct {\n-\tbam_header_t *h;\n-\tbam_maqcns_t *c;\n-\tbam_maqindel_opt_t *ido;\n-\tfaidx_t *fai;\n-\tkhash_t(64) *hash;\n-\tuint32_t format;\n-\tint tid, len, last_pos;\n-\tint mask;\n- int max_depth; // for indel calling, ignore reads with the depth too high. 0 for unlimited\n-\tchar *ref;\n-\tglfFile fp_glf; // for glf output only\n-} pu_data_t;\n-\n-char **__bam_get_lines(const char *fn, int *_n);\n-void bam_init_header_hash(bam_header_t *header);\n-int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);\n-\n-static khash_t(64) *load_pos(const char *fn, bam_header_t *h)\n-{\n-\tchar **list;\n-\tint i, j, n, *fields, max_fields;\n-\tkhash_t(64) *hash;\n-\tbam_init_header_hash(h);\n-\tlist = __bam_get_lines(fn, &n);\n-\thash = kh_init(64);\n-\tmax_fields = 0; fields = 0;\n-\tfor (i = 0; i < n; ++i) {\n-\t\tchar *str = list[i];\n-\t\tint chr, n_fields, ret;\n-\t\tkhint_t k;\n-\t\tuint64_t x;\n-\t\tn_fields = ksplit_core(str, 0, &max_fields, &fields);\n-\t\tif (n_fields < 2) continue;\n-\t\tchr = bam_get_tid(h, str + fields[0]);\n-\t\tif (chr < 0) {\n-\t\t\tfprintf(stderr, "[load_pos] unknown reference sequence name: %s\\n", str + fields[0]);\n-\t\t\tcontinue;\n-\t\t}\n-\t\tx = (uint64_t)chr << 32 | (atoi(str + fields[1]) - 1);\n-\t\tk = kh_put(64, hash, x, &ret);\n-\t\tif (ret == 0) {\n-\t\t\tfprintf(stderr, "[load_pos] position %s:%s has been loaded.\\n", str+fields[0], str+fields[1]);\n-\t\t\tcontinue;\n-\t\t}\n-\t\tkh_val(hash, k) = 0;\n-\t\tif (n_fields > 2) {\n-\t\t\t// count\n-\t\t\tfor (j = 2; j < n_fields; ++j) {\n-\t\t\t\tchar *s = str + fields[j];\n-\t\t\t\tif ((*s != \'+\' && *s != \'-\') || !isdigit(s[1])) break;\n- \t\t\t}\n-\t\t\tif (j > 2) { // update kh_val()\n-\t\t\t\tint *q, y, z;\n-\t\t\t\tq = kh_val(hash, k) = (int*)calloc(j - 1, sizeof(int));\n-\t\t\t\tq[0] = j - 2; z = j; y = 1;\n-\t\t\t\tfor (j = 2; j < z; ++j)\n-\t\t\t\t\tq[y++] = atoi(str + fields[j]);\n-\t\t\t}\n-\t\t}\n-\t\tfree(str);\n-\t}\n-\tfree(list); free(fields);\n-\treturn hash;\n-}\n-\n-// an analogy to pileup_func() below\n-static int glt3_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pu, void *data)\n-{\n-\tpu_data_t *d = (pu_data_t*)data;\n-\tbam_maqindel_ret_t *r = 0;\n-\tint rb, *proposed_indels = 0;\n-\tglf1_t *g;\n-\tglf3_t *g3;\n-\n-\tif (d->fai == 0) {\n-\t\tfprintf(stderr, "[glt3_func] reference sequence is required for generating GLT. Abort!\\n");\n-\t\texit(1);\n-\t}\n-\tif (d->hash) { // only output a list of sites\n-\t\tkhint_t k = kh_get(64, d->hash, (uint64_t)tid<<32|pos);\n-\t\tif (k == kh_end(d->hash)) return 0;\n-\t\tproposed_indels = kh_val(d->hash, k);\n-\t}\n-\tg3 = glf3_init1();\n-\tif (d->fai && (int)tid != d->tid) {\n-\t\tif (d->ref) { // then write the end mark\n-\t\t\tg3->rtype = GLF3_RTYPE_END;\n-\t\t\tglf3_write1(d->fp_glf, g3);\n-\t\t}\n-\t\tglf3_ref_write(d->fp_glf, d->h->target_name[tid], d->h->target_len[tid]); // write reference\n-\t\tfree(d->ref);\n-\t\td->ref = fai_fetch(d->fai, d->h->target_name[tid], &d->len);\n-\t\td->tid = tid;\n-\t\td->last_pos = 0;\n-\t}\n-\trb = (d->ref && (int)pos < d->len)? d->ref[pos] : \'N\';\n-\tg = bam_maqcns_glfgen(n, pu, bam_nt16_table[rb], d->c);\n-\tmemcpy(g3, g, sizeof(glf1_t));\n-\tg3->rtype = GLF3_RTYPE_SUB;\n-\tg3->offset = pos - d->last_pos;\n-\td->last_pos = pos;\n-\tglf3_write1(d->fp_glf, g3);\n- if (pos < d->len) {\n- int m = (!d->max_depth || d->max_depth>n) ? n : d->max_depth;\n-\t\tif (proposed_indels)\n-\t\t\tr = bam_maqindel(m, pos, d->ido, pu, d->ref, proposed_indels[0], proposed_indels+1);\n-\t\telse r = bam_maqindel(m, pos, d->ido, pu, d->ref, 0, 0);\n-\t}\n-\tif (r) { '..b'"[bam_pileup] fail to read the header: non-exisiting file or wrong format.\\n");\n-\t\t\treturn 1;\n-\t\t}\n-\t\td->h = fp->header;\n-\t\tif (fn_pos) d->hash = load_pos(fn_pos, d->h);\n-\t\tsampileup(fp, d->mask, pileup_func, d);\n-\t\tsamclose(fp); // d->h will be destroyed here\n-\t}\n-\n-\t// free\n-\tif (d->format & BAM_PLF_GLF) bgzf_close(d->fp_glf);\n-\tif (fn_pos) { // free the hash table\n-\t\tkhint_t k;\n-\t\tfor (k = kh_begin(d->hash); k < kh_end(d->hash); ++k)\n-\t\t\tif (kh_exist(d->hash, k)) free(kh_val(d->hash, k));\n-\t\tkh_destroy(64, d->hash);\n-\t}\n-\tfree(fn_pos); free(fn_list); free(fn_fa);\n-\tif (d->fai) fai_destroy(d->fai);\n-\tbam_maqcns_destroy(d->c);\n-\tfree(d->ido); free(d->ref); free(d);\n-\treturn 0;\n-}\n-\n-/***********\n- * mpileup *\n- ***********/\n-\n-typedef struct {\n-\tchar *reg;\n-\tfaidx_t *fai;\n-} mplp_conf_t;\n-\n-typedef struct {\n-\tbamFile fp;\n-\tbam_iter_t iter;\n-} mplp_aux_t;\n-\n-static int mplp_func(void *data, bam1_t *b)\n-{\n-\tmplp_aux_t *ma = (mplp_aux_t*)data;\n-\tif (ma->iter) return bam_iter_read(ma->fp, ma->iter, b);\n-\treturn bam_read1(ma->fp, b);\n-}\n-\n-static int mpileup(mplp_conf_t *conf, int n, char **fn)\n-{\n-\tmplp_aux_t **data;\n-\tint i, tid, pos, *n_plp, beg0 = 0, end0 = 1u<<29, ref_len, ref_tid;\n-\tconst bam_pileup1_t **plp;\n-\tbam_mplp_t iter;\n-\tbam_header_t *h = 0;\n-\tchar *ref;\n-\t// allocate\n-\tdata = calloc(n, sizeof(void*));\n-\tplp = calloc(n, sizeof(void*));\n-\tn_plp = calloc(n, sizeof(int*));\n-\t// read the header and initialize data\n-\tfor (i = 0; i < n; ++i) {\n-\t\tbam_header_t *h_tmp;\n-\t\tdata[i] = calloc(1, sizeof(mplp_aux_t));\n-\t\tdata[i]->fp = bam_open(fn[i], "r");\n-\t\th_tmp = bam_header_read(data[i]->fp);\n-\t\tif (conf->reg) {\n-\t\t\tint beg, end;\n-\t\t\tbam_index_t *idx;\n-\t\t\tidx = bam_index_load(fn[i]);\n-\t\t\tif (idx == 0) {\n-\t\t\t\tfprintf(stderr, "[%s] fail to load index for %d-th input.\\n", __func__, i+1);\n-\t\t\t\texit(1);\n-\t\t\t}\n-\t\t\tif (bam_parse_region(h_tmp, conf->reg, &tid, &beg, &end) < 0) {\n-\t\t\t\tfprintf(stderr, "[%s] malformatted region or wrong seqname for %d-th input.\\n", __func__, i+1);\n-\t\t\t\texit(1);\n-\t\t\t}\n-\t\t\tif (i == 0) beg0 = beg, end0 = end;\n-\t\t\tdata[i]->iter = bam_iter_query(idx, tid, beg, end);\n-\t\t\tbam_index_destroy(idx);\n-\t\t}\n-\t\tif (i == 0) h = h_tmp;\n-\t\telse {\n-\t\t\t// FIXME: to check consistency\n-\t\t\tbam_header_destroy(h_tmp);\n-\t\t}\n-\t}\n-\t// mpileup\n-\tref_tid = -1; ref = 0;\n-\titer = bam_mplp_init(n, mplp_func, (void**)data);\n-\twhile (bam_mplp_auto(iter, &tid, &pos, n_plp, plp) > 0) {\n-\t\tif (conf->reg && (pos < beg0 || pos >= end0)) continue; // out of the region requested\n-\t\tif (tid != ref_tid) {\n-\t\t\tfree(ref);\n-\t\t\tif (conf->fai) ref = fai_fetch(conf->fai, h->target_name[tid], &ref_len);\n-\t\t\tref_tid = tid;\n-\t\t}\n-\t\tprintf("%s\\t%d\\t%c", h->target_name[tid], pos + 1, (ref && pos < ref_len)? ref[pos] : \'N\');\n-\t\tfor (i = 0; i < n; ++i) {\n-\t\t\tint j;\n-\t\t\tprintf("\\t%d\\t", n_plp[i]);\n-\t\t\tif (n_plp[i] == 0) printf("*\\t*");\n-\t\t\telse {\n-\t\t\t\tfor (j = 0; j < n_plp[i]; ++j)\n-\t\t\t\t\tpileup_seq(plp[i] + j, pos, ref_len, ref);\n-\t\t\t\tputchar(\'\\t\');\n-\t\t\t\tfor (j = 0; j < n_plp[i]; ++j) {\n-\t\t\t\t\tconst bam_pileup1_t *p = plp[i] + j;\n-\t\t\t\t\tint c = bam1_qual(p->b)[p->qpos] + 33;\n-\t\t\t\t\tif (c > 126) c = 126;\n-\t\t\t\t\tputchar(c);\n-\t\t\t\t}\n-\t\t\t}\n-\t\t}\n-\t\tputchar(\'\\n\');\n-\t}\n-\tbam_mplp_destroy(iter);\n-\tbam_header_destroy(h);\n-\tfor (i = 0; i < n; ++i) {\n-\t\tbam_close(data[i]->fp);\n-\t\tif (data[i]->iter) bam_iter_destroy(data[i]->iter);\n-\t\tfree(data[i]);\n-\t}\n-\tfree(data); free(plp); free(ref); free(n_plp);\n-\treturn 0;\n-}\n-\n-int bam_mpileup(int argc, char *argv[])\n-{\n-\tint c;\n-\tmplp_conf_t mplp;\n-\tmemset(&mplp, 0, sizeof(mplp_conf_t));\n-\twhile ((c = getopt(argc, argv, "f:r:")) >= 0) {\n-\t\tswitch (c) {\n-\t\tcase \'f\':\n-\t\t\tmplp.fai = fai_load(optarg);\n-\t\t\tif (mplp.fai == 0) return 1;\n-\t\t\tbreak;\n-\t\tcase \'r\': mplp.reg = strdup(optarg);\n-\t\t}\n-\t}\n-\tif (argc == 1) {\n-\t\tfprintf(stderr, "Usage: samtools mpileup [-r reg] [-f in.fa] in1.bam [in2.bam [...]]\\n");\n-\t\treturn 1;\n-\t}\n-\tmpileup(&mplp, argc - optind, argv + optind);\n-\tfree(mplp.reg);\n-\tif (mplp.fai) fai_destroy(mplp.fai);\n-\treturn 0;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_reheader.c --- a/chimerascan/pysam/samtools/bam_reheader.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,60 +0,0 @@ -#include <stdio.h> -#include <stdlib.h> -#include "bgzf.h" -#include "bam.h" - -#define BUF_SIZE 0x10000 - -int bam_reheader(BGZF *in, const bam_header_t *h, int fd) -{ - BGZF *fp; - bam_header_t *old; - int len; - uint8_t *buf; - if (in->open_mode != 'r') return -1; - buf = malloc(BUF_SIZE); - old = bam_header_read(in); - fp = bgzf_fdopen(fd, "w"); - bam_header_write(fp, h); - if (in->block_offset < in->block_length) { - bgzf_write(fp, in->uncompressed_block + in->block_offset, in->block_length - in->block_offset); - bgzf_flush(fp); - } -#ifdef _USE_KNETFILE - while ((len = knet_read(in->x.fpr, buf, BUF_SIZE)) > 0) -#else - while (!feof(in->file) && (len = fread(buf, 1, BUF_SIZE, in->file)) > 0) -#endif - fwrite(buf, 1, len, fp->x.fpw); - free(buf); - fp->block_offset = in->block_offset = 0; - bgzf_close(fp); - return 0; -} - -int main_reheader(int argc, char *argv[]) -{ - bam_header_t *h; - BGZF *in; - if (argc != 3) { - fprintf(stderr, "Usage: samtools reheader <in.header.sam> <in.bam>\n"); - return 1; - } - { // read the header - tamFile fph = sam_open(argv[1]); - if (fph == 0) { - fprintf(stderr, "[%s] fail to read the header from %s.\n", __func__, argv[1]); - return 1; - } - h = sam_header_read(fph); - sam_close(fph); - } - in = strcmp(argv[2], "-")? bam_open(argv[2], "r") : bam_dopen(fileno(stdin), "r"); - if (in == 0) { - fprintf(stderr, "[%s] fail to open file %s.\n", __func__, argv[2]); - return 1; - } - bam_reheader(in, h, fileno(stdout)); - bgzf_close(in); - return 0; -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_rmdup.c --- a/chimerascan/pysam/samtools/bam_rmdup.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,206 +0,0 @@ -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <zlib.h> -#include <unistd.h> -#include "sam.h" - -typedef bam1_t *bam1_p; - -#include "khash.h" -KHASH_SET_INIT_STR(name) -KHASH_MAP_INIT_INT64(pos, bam1_p) - -#define BUFFER_SIZE 0x40000 - -typedef struct { - uint64_t n_checked, n_removed; - khash_t(pos) *best_hash; -} lib_aux_t; -KHASH_MAP_INIT_STR(lib, lib_aux_t) - -typedef struct { - int n, max; - bam1_t **a; -} tmp_stack_t; - -static inline void stack_insert(tmp_stack_t *stack, bam1_t *b) -{ - if (stack->n == stack->max) { - stack->max = stack->max? stack->max<<1 : 0x10000; - stack->a = (bam1_t**)realloc(stack->a, sizeof(bam1_t*) * stack->max); - } - stack->a[stack->n++] = b; -} - -static inline void dump_best(tmp_stack_t *stack, samfile_t *out) -{ - int i; - for (i = 0; i != stack->n; ++i) { - samwrite(out, stack->a[i]); - bam_destroy1(stack->a[i]); - } - stack->n = 0; -} - -static void clear_del_set(khash_t(name) *del_set) -{ - khint_t k; - for (k = kh_begin(del_set); k < kh_end(del_set); ++k) - if (kh_exist(del_set, k)) - free((char*)kh_key(del_set, k)); - kh_clear(name, del_set); -} - -static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib) -{ - khint_t k = kh_get(lib, aux, lib); - if (k == kh_end(aux)) { - int ret; - char *p = strdup(lib); - lib_aux_t *q; - k = kh_put(lib, aux, p, &ret); - q = &kh_val(aux, k); - q->n_checked = q->n_removed = 0; - q->best_hash = kh_init(pos); - return q; - } else return &kh_val(aux, k); -} - -static void clear_best(khash_t(lib) *aux, int max) -{ - khint_t k; - for (k = kh_begin(aux); k != kh_end(aux); ++k) { - if (kh_exist(aux, k)) { - lib_aux_t *q = &kh_val(aux, k); - if (kh_size(q->best_hash) >= max) - kh_clear(pos, q->best_hash); - } - } -} - -static inline int sum_qual(const bam1_t *b) -{ - int i, q; - uint8_t *qual = bam1_qual(b); - for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i]; - return q; -} - -void bam_rmdup_core(samfile_t *in, samfile_t *out) -{ - bam1_t *b; - int last_tid = -1, last_pos = -1; - tmp_stack_t stack; - khint_t k; - khash_t(lib) *aux; - khash_t(name) *del_set; - - aux = kh_init(lib); - del_set = kh_init(name); - b = bam_init1(); - memset(&stack, 0, sizeof(tmp_stack_t)); - - kh_resize(name, del_set, 4 * BUFFER_SIZE); - while (samread(in, b) >= 0) { - bam1_core_t *c = &b->core; - if (c->tid != last_tid || last_pos != c->pos) { - dump_best(&stack, out); // write the result - clear_best(aux, BUFFER_SIZE); - if (c->tid != last_tid) { - clear_best(aux, 0); - if (kh_size(del_set)) { // check - fprintf(stderr, "[bam_rmdup_core] %llu unmatched pairs\n", (long long)kh_size(del_set)); - clear_del_set(del_set); - } - if ((int)c->tid == -1) { // append unmapped reads - samwrite(out, b); - while (samread(in, b) >= 0) samwrite(out, b); - break; - } - last_tid = c->tid; - fprintf(stderr, "[bam_rmdup_core] processing reference %s...\n", in->header->target_name[c->tid]); - } - } - if (!(c->flag&BAM_FPAIRED) || (c->flag&(BAM_FUNMAP|BAM_FMUNMAP)) || (c->mtid >= 0 && c->tid != c->mtid)) { - samwrite(out, b); - } else if (c->isize > 0) { // paired, head - uint64_t key = (uint64_t)c->pos<<32 | c->isize; - const char *lib; - lib_aux_t *q; - int ret; - lib = bam_get_library(in->header, b); - q = lib? get_aux(aux, lib) : get_aux(aux, "\t"); - ++q->n_checked; - k = kh_put(pos, q->best_hash, key, &ret); - if (ret == 0) { // found in best_hash - bam1_t *p = kh_val(q->best_hash, k); - ++q->n_removed; - if (sum_qual(p) < sum_qual(b)) { // the current alignment is better; this can be accelerated in principle - kh_put(name, del_set, strdup(bam1_qname(p)), &ret); // p will be removed - bam_copy1(p, b); // replaced as b - } else kh_put(name, del_set, strdup(bam1_qname(b)), &ret); // b will be removed - if (ret == 0) - fprintf(stderr, "[bam_rmdup_core] inconsistent BAM file for pair '%s'. Continue anyway.\n", bam1_qname(b)); - } else { // not found in best_hash - kh_val(q->best_hash, k) = bam_dup1(b); - stack_insert(&stack, kh_val(q->best_hash, k)); - } - } else { // paired, tail - k = kh_get(name, del_set, bam1_qname(b)); - if (k != kh_end(del_set)) { - free((char*)kh_key(del_set, k)); - kh_del(name, del_set, k); - } else samwrite(out, b); - } - last_pos = c->pos; - } - - for (k = kh_begin(aux); k != kh_end(aux); ++k) { - if (kh_exist(aux, k)) { - lib_aux_t *q = &kh_val(aux, k); - dump_best(&stack, out); - fprintf(stderr, "[bam_rmdup_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed, - (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k)); - kh_destroy(pos, q->best_hash); - free((char*)kh_key(aux, k)); - } - } - kh_destroy(lib, aux); - - clear_del_set(del_set); - kh_destroy(name, del_set); - free(stack.a); - bam_destroy1(b); -} - -void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se); - -int bam_rmdup(int argc, char *argv[]) -{ - int c, is_se = 0, force_se = 0; - samfile_t *in, *out; - while ((c = getopt(argc, argv, "sS")) >= 0) { - switch (c) { - case 's': is_se = 1; break; - case 'S': force_se = is_se = 1; break; - } - } - if (optind + 2 > argc) { - fprintf(stderr, "\n"); - fprintf(stderr, "Usage: samtools rmdup [-sS] <input.srt.bam> <output.bam>\n\n"); - fprintf(stderr, "Option: -s rmdup for SE reads\n"); - fprintf(stderr, " -S treat PE reads as SE in rmdup (force -s)\n\n"); - return 1; - } - in = samopen(argv[optind], "rb", 0); - out = samopen(argv[optind+1], "wb", in->header); - if (in == 0 || out == 0) { - fprintf(stderr, "[bam_rmdup] fail to read/write input files\n"); - return 1; - } - if (is_se) bam_rmdupse_core(in, out, force_se); - else bam_rmdup_core(in, out); - samclose(in); samclose(out); - return 0; -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_rmdupse.c --- a/chimerascan/pysam/samtools/bam_rmdupse.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,159 +0,0 @@ -#include <math.h> -#include "sam.h" -#include "khash.h" -#include "klist.h" - -#define QUEUE_CLEAR_SIZE 0x100000 -#define MAX_POS 0x7fffffff - -typedef struct { - int endpos; - uint32_t score:31, discarded:1; - bam1_t *b; -} elem_t, *elem_p; -#define __free_elem(p) bam_destroy1((p)->data.b) -KLIST_INIT(q, elem_t, __free_elem) -typedef klist_t(q) queue_t; - -KHASH_MAP_INIT_INT(best, elem_p) -typedef khash_t(best) besthash_t; - -typedef struct { - uint64_t n_checked, n_removed; - besthash_t *left, *rght; -} lib_aux_t; -KHASH_MAP_INIT_STR(lib, lib_aux_t) - -static lib_aux_t *get_aux(khash_t(lib) *aux, const char *lib) -{ - khint_t k = kh_get(lib, aux, lib); - if (k == kh_end(aux)) { - int ret; - char *p = strdup(lib); - lib_aux_t *q; - k = kh_put(lib, aux, p, &ret); - q = &kh_val(aux, k); - q->left = kh_init(best); - q->rght = kh_init(best); - q->n_checked = q->n_removed = 0; - return q; - } else return &kh_val(aux, k); -} - -static inline int sum_qual(const bam1_t *b) -{ - int i, q; - uint8_t *qual = bam1_qual(b); - for (i = q = 0; i < b->core.l_qseq; ++i) q += qual[i]; - return q; -} - -static inline elem_t *push_queue(queue_t *queue, const bam1_t *b, int endpos, int score) -{ - elem_t *p = kl_pushp(q, queue); - p->discarded = 0; - p->endpos = endpos; p->score = score; - if (p->b == 0) p->b = bam_init1(); - bam_copy1(p->b, b); - return p; -} - -static void clear_besthash(besthash_t *h, int32_t pos) -{ - khint_t k; - for (k = kh_begin(h); k != kh_end(h); ++k) - if (kh_exist(h, k) && kh_val(h, k)->endpos <= pos) - kh_del(best, h, k); -} - -static void dump_alignment(samfile_t *out, queue_t *queue, int32_t pos, khash_t(lib) *h) -{ - if (queue->size > QUEUE_CLEAR_SIZE || pos == MAX_POS) { - khint_t k; - while (1) { - elem_t *q; - if (queue->head == queue->tail) break; - q = &kl_val(queue->head); - if (q->discarded) { - q->b->data_len = 0; - kl_shift(q, queue, 0); - continue; - } - if ((q->b->core.flag&BAM_FREVERSE) && q->endpos > pos) break; - samwrite(out, q->b); - q->b->data_len = 0; - kl_shift(q, queue, 0); - } - for (k = kh_begin(h); k != kh_end(h); ++k) { - if (kh_exist(h, k)) { - clear_besthash(kh_val(h, k).left, pos); - clear_besthash(kh_val(h, k).rght, pos); - } - } - } -} - -void bam_rmdupse_core(samfile_t *in, samfile_t *out, int force_se) -{ - bam1_t *b; - queue_t *queue; - khint_t k; - int last_tid = -2; - khash_t(lib) *aux; - - aux = kh_init(lib); - b = bam_init1(); - queue = kl_init(q); - while (samread(in, b) >= 0) { - bam1_core_t *c = &b->core; - int endpos = bam_calend(c, bam1_cigar(b)); - int score = sum_qual(b); - - if (last_tid != c->tid) { - if (last_tid >= 0) dump_alignment(out, queue, MAX_POS, aux); - last_tid = c->tid; - } else dump_alignment(out, queue, c->pos, aux); - if ((c->flag&BAM_FUNMAP) || ((c->flag&BAM_FPAIRED) && !force_se)) { - push_queue(queue, b, endpos, score); - } else { - const char *lib; - lib_aux_t *q; - besthash_t *h; - uint32_t key; - int ret; - lib = bam_get_library(in->header, b); - q = lib? get_aux(aux, lib) : get_aux(aux, "\t"); - ++q->n_checked; - h = (c->flag&BAM_FREVERSE)? q->rght : q->left; - key = (c->flag&BAM_FREVERSE)? endpos : c->pos; - k = kh_put(best, h, key, &ret); - if (ret == 0) { // in the hash table - elem_t *p = kh_val(h, k); - ++q->n_removed; - if (p->score < score) { - if (c->flag&BAM_FREVERSE) { // mark "discarded" and push the queue - p->discarded = 1; - kh_val(h, k) = push_queue(queue, b, endpos, score); - } else { // replace - p->score = score; p->endpos = endpos; - bam_copy1(p->b, b); - } - } // otherwise, discard the alignment - } else kh_val(h, k) = push_queue(queue, b, endpos, score); - } - } - dump_alignment(out, queue, MAX_POS, aux); - - for (k = kh_begin(aux); k != kh_end(aux); ++k) { - if (kh_exist(aux, k)) { - lib_aux_t *q = &kh_val(aux, k); - fprintf(stderr, "[bam_rmdupse_core] %lld / %lld = %.4lf in library '%s'\n", (long long)q->n_removed, - (long long)q->n_checked, (double)q->n_removed/q->n_checked, kh_key(aux, k)); - kh_destroy(best, q->left); kh_destroy(best, q->rght); - free((char*)kh_key(aux, k)); - } - } - kh_destroy(lib, aux); - bam_destroy1(b); - kl_destroy(q, queue); -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_sort.c --- a/chimerascan/pysam/samtools/bam_sort.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,357 +0,0 @@\n-#include <stdlib.h>\n-#include <ctype.h>\n-#include <assert.h>\n-#include <stdio.h>\n-#include <string.h>\n-#include <unistd.h>\n-#include "bam.h"\n-#include "ksort.h"\n-\n-static int g_is_by_qname = 0;\n-\n-static inline int strnum_cmp(const char *a, const char *b)\n-{\n-\tchar *pa, *pb;\n-\tpa = (char*)a; pb = (char*)b;\n-\twhile (*pa && *pb) {\n-\t\tif (isdigit(*pa) && isdigit(*pb)) {\n-\t\t\tlong ai, bi;\n-\t\t\tai = strtol(pa, &pa, 10);\n-\t\t\tbi = strtol(pb, &pb, 10);\n-\t\t\tif (ai != bi) return ai<bi? -1 : ai>bi? 1 : 0;\n-\t\t} else {\n-\t\t\tif (*pa != *pb) break;\n-\t\t\t++pa; ++pb;\n-\t\t}\n-\t}\n-\tif (*pa == *pb)\n-\t\treturn (pa-a) < (pb-b)? -1 : (pa-a) > (pb-b)? 1 : 0;\n-\treturn *pa<*pb? -1 : *pa>*pb? 1 : 0;\n-}\n-\n-#define HEAP_EMPTY 0xffffffffffffffffull\n-\n-typedef struct {\n-\tint i;\n-\tuint64_t pos, idx;\n-\tbam1_t *b;\n-} heap1_t;\n-\n-#define __pos_cmp(a, b) ((a).pos > (b).pos || ((a).pos == (b).pos && ((a).i > (b).i || ((a).i == (b).i && (a).idx > (b).idx))))\n-\n-static inline int heap_lt(const heap1_t a, const heap1_t b)\n-{\n-\tif (g_is_by_qname) {\n-\t\tint t;\n-\t\tif (a.b == 0 || b.b == 0) return a.b == 0? 1 : 0;\n-\t\tt = strnum_cmp(bam1_qname(a.b), bam1_qname(b.b));\n-\t\treturn (t > 0 || (t == 0 && __pos_cmp(a, b)));\n-\t} else return __pos_cmp(a, b);\n-}\n-\n-KSORT_INIT(heap, heap1_t, heap_lt)\n-\n-static void swap_header_text(bam_header_t *h1, bam_header_t *h2)\n-{\n-\tint tempi;\n-\tchar *temps;\n-\ttempi = h1->l_text, h1->l_text = h2->l_text, h2->l_text = tempi;\n-\ttemps = h1->text, h1->text = h2->text, h2->text = temps;\n-}\n-\n-/*!\n- @abstract Merge multiple sorted BAM.\n- @param is_by_qname whether to sort by query name\n- @param out output BAM file name\n- @param headers name of SAM file from which to copy \'@\' header lines,\n- or NULL to copy them from the first file to be merged\n- @param n number of files to be merged\n- @param fn names of files to be merged\n-\n- @discussion Padding information may NOT correctly maintained. This\n- function is NOT thread safe.\n- */\n-void bam_merge_core(int by_qname, const char *out, const char *headers, int n, char * const *fn, int add_RG)\n-{\n-\tbamFile fpout, *fp;\n-\theap1_t *heap;\n-\tbam_header_t *hout = 0;\n-\tbam_header_t *hheaders = NULL;\n-\tint i, j, *RG_len = 0;\n-\tuint64_t idx = 0;\n-\tchar **RG = 0;\n-\n-\tif (headers) {\n-\t\ttamFile fpheaders = sam_open(headers);\n-\t\tif (fpheaders == 0) {\n-\t\t\tfprintf(stderr, "[bam_merge_core] Cannot open file `%s\'. Continue anyway.\\n", headers);\n-\t\t} else {\n-\t\t\thheaders = sam_header_read(fpheaders);\n-\t\t\tsam_close(fpheaders);\n-\t\t}\n-\t}\n-\n-\tg_is_by_qname = by_qname;\n-\tfp = (bamFile*)calloc(n, sizeof(bamFile));\n-\theap = (heap1_t*)calloc(n, sizeof(heap1_t));\n-\t// prepare RG tag\n-\tif (add_RG) {\n-\t\tRG = (char**)calloc(n, sizeof(void*));\n-\t\tRG_len = (int*)calloc(n, sizeof(int));\n-\t\tfor (i = 0; i != n; ++i) {\n-\t\t\tint l = strlen(fn[i]);\n-\t\t\tconst char *s = fn[i];\n-\t\t\tif (l > 4 && strcmp(s + l - 4, ".bam") == 0) l -= 4;\n-\t\t\tfor (j = l - 1; j >= 0; --j) if (s[j] == \'/\') break;\n-\t\t\t++j; l -= j;\n-\t\t\tRG[i] = calloc(l + 1, 1);\n-\t\t\tRG_len[i] = l;\n-\t\t\tstrncpy(RG[i], s + j, l);\n-\t\t}\n-\t}\n-\t// read the first\n-\tfor (i = 0; i != n; ++i) {\n-\t\theap1_t *h;\n-\t\tbam_header_t *hin;\n-\t\tfp[i] = bam_open(fn[i], "r");\n-\t\tif (fp[i] == 0) {\n-\t\t\tint j;\n-\t\t\tfprintf(stderr, "[bam_merge_core] fail to open file %s\\n", fn[i]);\n-\t\t\tfor (j = 0; j < i; ++j) bam_close(fp[j]);\n-\t\t\tfree(fp); free(heap);\n-\t\t\t// FIXME: possible memory leak\n-\t\t\treturn;\n-\t\t}\n-\t\thin = bam_header_read(fp[i]);\n-\t\tif (i == 0) { // the first SAM\n-\t\t\thout = hin;\n-\t\t\tif (hheaders) {\n-\t\t\t\t// If the text headers to be swapped in include any @SQ headers,\n-\t\t\t\t// check that they are consistent with the existing binary list\n-\t\t\t\t// of reference information.\n-\t\t\t\tif (hheaders->n_targets > 0) {\n-\t\t\t\t\tif (hout->n_targets != hheaders->n_targets)\n-\t\t\t\t\t\tfprintf(stderr, "[bam_merge_core] number of @SQ headers in `%s\' differs from number of target sequences", headers);\n-\t\t\t\t\tfor (j = 0; j < hout->n_targets; ++j)\n-\t\t\t\t\t\tif (strcmp(hout->target_name[j], hheaders'..b' 0 && (((uint64_t)a->core.tid<<32|a->core.pos) < ((uint64_t)b->core.tid<<32|b->core.pos))));\n-\t} else return (((uint64_t)a->core.tid<<32|a->core.pos) < ((uint64_t)b->core.tid<<32|b->core.pos));\n-}\n-KSORT_INIT(sort, bam1_p, bam1_lt)\n-\n-static void sort_blocks(int n, int k, bam1_p *buf, const char *prefix, const bam_header_t *h, int is_stdout)\n-{\n-\tchar *name;\n-\tint i;\n-\tbamFile fp;\n-\tks_mergesort(sort, k, buf, 0);\n-\tname = (char*)calloc(strlen(prefix) + 20, 1);\n-\tif (n >= 0) sprintf(name, "%s.%.4d.bam", prefix, n);\n-\telse sprintf(name, "%s.bam", prefix);\n-\tfp = is_stdout? bam_dopen(fileno(stdout), "w") : bam_open(name, "w");\n-\tif (fp == 0) {\n-\t\tfprintf(stderr, "[sort_blocks] fail to create file %s.\\n", name);\n-\t\tfree(name);\n-\t\t// FIXME: possible memory leak\n-\t\treturn;\n-\t}\n-\tfree(name);\n-\tbam_header_write(fp, h);\n-\tfor (i = 0; i < k; ++i)\n-\t\tbam_write1_core(fp, &buf[i]->core, buf[i]->data_len, buf[i]->data);\n-\tbam_close(fp);\n-}\n-\n-/*!\n- @abstract Sort an unsorted BAM file based on the chromosome order\n- and the leftmost position of an alignment\n-\n- @param is_by_qname whether to sort by query name\n- @param fn name of the file to be sorted\n- @param prefix prefix of the output and the temporary files; upon\n-\t sucessess, prefix.bam will be written.\n- @param max_mem approxiate maximum memory (very inaccurate)\n-\n- @discussion It may create multiple temporary subalignment files\n- and then merge them by calling bam_merge_core(). This function is\n- NOT thread safe.\n- */\n-void bam_sort_core_ext(int is_by_qname, const char *fn, const char *prefix, size_t max_mem, int is_stdout)\n-{\n-\tint n, ret, k, i;\n-\tsize_t mem;\n-\tbam_header_t *header;\n-\tbamFile fp;\n-\tbam1_t *b, **buf;\n-\n-\tg_is_by_qname = is_by_qname;\n-\tn = k = 0; mem = 0;\n-\tfp = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r");\n-\tif (fp == 0) {\n-\t\tfprintf(stderr, "[bam_sort_core] fail to open file %s\\n", fn);\n-\t\treturn;\n-\t}\n-\theader = bam_header_read(fp);\n-\tbuf = (bam1_t**)calloc(max_mem / BAM_CORE_SIZE, sizeof(bam1_t*));\n-\t// write sub files\n-\tfor (;;) {\n-\t\tif (buf[k] == 0) buf[k] = (bam1_t*)calloc(1, sizeof(bam1_t));\n-\t\tb = buf[k];\n-\t\tif ((ret = bam_read1(fp, b)) < 0) break;\n-\t\tmem += ret;\n-\t\t++k;\n-\t\tif (mem >= max_mem) {\n-\t\t\tsort_blocks(n++, k, buf, prefix, header, 0);\n-\t\t\tmem = 0; k = 0;\n-\t\t}\n-\t}\n-\tif (ret != -1)\n-\t\tfprintf(stderr, "[bam_sort_core] truncated file. Continue anyway.\\n");\n-\tif (n == 0) sort_blocks(-1, k, buf, prefix, header, is_stdout);\n-\telse { // then merge\n-\t\tchar **fns, *fnout;\n-\t\tfprintf(stderr, "[bam_sort_core] merging from %d files...\\n", n+1);\n-\t\tsort_blocks(n++, k, buf, prefix, header, 0);\n-\t\tfnout = (char*)calloc(strlen(prefix) + 20, 1);\n-\t\tif (is_stdout) sprintf(fnout, "-");\n-\t\telse sprintf(fnout, "%s.bam", prefix);\n-\t\tfns = (char**)calloc(n, sizeof(char*));\n-\t\tfor (i = 0; i < n; ++i) {\n-\t\t\tfns[i] = (char*)calloc(strlen(prefix) + 20, 1);\n-\t\t\tsprintf(fns[i], "%s.%.4d.bam", prefix, i);\n-\t\t}\n-\t\tbam_merge_core(is_by_qname, fnout, 0, n, fns, 0);\n-\t\tfree(fnout);\n-\t\tfor (i = 0; i < n; ++i) {\n-\t\t\tunlink(fns[i]);\n-\t\t\tfree(fns[i]);\n-\t\t}\n-\t\tfree(fns);\n-\t}\n-\tfor (k = 0; k < max_mem / BAM_CORE_SIZE; ++k) {\n-\t\tif (buf[k]) {\n-\t\t\tfree(buf[k]->data);\n-\t\t\tfree(buf[k]);\n-\t\t}\n-\t}\n-\tfree(buf);\n-\tbam_header_destroy(header);\n-\tbam_close(fp);\n-}\n-\n-void bam_sort_core(int is_by_qname, const char *fn, const char *prefix, size_t max_mem)\n-{\n-\tbam_sort_core_ext(is_by_qname, fn, prefix, max_mem, 0);\n-}\n-\n-int bam_sort(int argc, char *argv[])\n-{\n-\tsize_t max_mem = 500000000;\n-\tint c, is_by_qname = 0, is_stdout = 0;\n-\twhile ((c = getopt(argc, argv, "nom:")) >= 0) {\n-\t\tswitch (c) {\n-\t\tcase \'o\': is_stdout = 1; break;\n-\t\tcase \'n\': is_by_qname = 1; break;\n-\t\tcase \'m\': max_mem = atol(optarg); break;\n-\t\t}\n-\t}\n-\tif (optind + 2 > argc) {\n-\t\tfprintf(stderr, "Usage: samtools sort [-on] [-m <maxMem>] <in.bam> <out.prefix>\\n");\n-\t\treturn 1;\n-\t}\n-\tbam_sort_core_ext(is_by_qname, argv[optind], argv[optind+1], max_mem, is_stdout);\n-\treturn 0;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_stat.c --- a/chimerascan/pysam/samtools/bam_stat.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,78 +0,0 @@ -#include <unistd.h> -#include <assert.h> -#include "bam.h" - -typedef struct { - long long n_reads, n_mapped, n_pair_all, n_pair_map, n_pair_good; - long long n_sgltn, n_read1, n_read2; - long long n_qcfail, n_dup; - long long n_diffchr, n_diffhigh; -} bam_flagstat_t; - -#define flagstat_loop(s, c) do { \ - ++(s)->n_reads; \ - if ((c)->flag & BAM_FPAIRED) { \ - ++(s)->n_pair_all; \ - if ((c)->flag & BAM_FPROPER_PAIR) ++(s)->n_pair_good; \ - if ((c)->flag & BAM_FREAD1) ++(s)->n_read1; \ - if ((c)->flag & BAM_FREAD2) ++(s)->n_read2; \ - if (((c)->flag & BAM_FMUNMAP) && !((c)->flag & BAM_FUNMAP)) ++(s)->n_sgltn; \ - if (!((c)->flag & BAM_FUNMAP) && !((c)->flag & BAM_FMUNMAP)) { \ - ++(s)->n_pair_map; \ - if ((c)->mtid != (c)->tid) { \ - ++(s)->n_diffchr; \ - if ((c)->qual >= 5) ++(s)->n_diffhigh; \ - } \ - } \ - } \ - if (!((c)->flag & BAM_FUNMAP)) ++(s)->n_mapped; \ - if ((c)->flag & BAM_FQCFAIL) ++(s)->n_qcfail; \ - if ((c)->flag & BAM_FDUP) ++(s)->n_dup; \ - } while (0) - -bam_flagstat_t *bam_flagstat_core(bamFile fp) -{ - bam_flagstat_t *s; - bam1_t *b; - bam1_core_t *c; - int ret; - s = (bam_flagstat_t*)calloc(1, sizeof(bam_flagstat_t)); - b = bam_init1(); - c = &b->core; - while ((ret = bam_read1(fp, b)) >= 0) - flagstat_loop(s, c); - bam_destroy1(b); - if (ret != -1) - fprintf(stderr, "[bam_flagstat_core] Truncated file? Continue anyway.\n"); - return s; -} -int bam_flagstat(int argc, char *argv[]) -{ - bamFile fp; - bam_header_t *header; - bam_flagstat_t *s; - if (argc == optind) { - fprintf(stderr, "Usage: samtools flagstat <in.bam>\n"); - return 1; - } - fp = strcmp(argv[optind], "-")? bam_open(argv[optind], "r") : bam_dopen(fileno(stdin), "r"); - assert(fp); - header = bam_header_read(fp); - s = bam_flagstat_core(fp); - printf("%lld in total\n", s->n_reads); - printf("%lld QC failure\n", s->n_qcfail); - printf("%lld duplicates\n", s->n_dup); - printf("%lld mapped (%.2f%%)\n", s->n_mapped, (float)s->n_mapped / s->n_reads * 100.0); - printf("%lld paired in sequencing\n", s->n_pair_all); - printf("%lld read1\n", s->n_read1); - printf("%lld read2\n", s->n_read2); - printf("%lld properly paired (%.2f%%)\n", s->n_pair_good, (float)s->n_pair_good / s->n_pair_all * 100.0); - printf("%lld with itself and mate mapped\n", s->n_pair_map); - printf("%lld singletons (%.2f%%)\n", s->n_sgltn, (float)s->n_sgltn / s->n_pair_all * 100.0); - printf("%lld with mate mapped to a different chr\n", s->n_diffchr); - printf("%lld with mate mapped to a different chr (mapQ>=5)\n", s->n_diffhigh); - free(s); - bam_header_destroy(header); - bam_close(fp); - return 0; -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bam_tview.c --- a/chimerascan/pysam/samtools/bam_tview.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,424 +0,0 @@\n-#undef _HAVE_CURSES\n-\n-#if _CURSES_LIB == 0\n-#elif _CURSES_LIB == 1\n-#include <curses.h>\n-#ifndef NCURSES_VERSION\n-#warning "_CURSES_LIB=1 but NCURSES_VERSION not defined; tview is NOT compiled"\n-#else\n-#define _HAVE_CURSES\n-#endif\n-#elif _CURSES_LIB == 2\n-#include <xcurses.h>\n-#define _HAVE_CURSES\n-#else\n-#warning "_CURSES_LIB is not 0, 1 or 2; tview is NOT compiled"\n-#endif\n-\n-#ifdef _HAVE_CURSES\n-#include <ctype.h>\n-#include <assert.h>\n-#include <string.h>\n-#include "bam.h"\n-#include "faidx.h"\n-#include "bam_maqcns.h"\n-\n-char bam_aux_getCEi(bam1_t *b, int i);\n-char bam_aux_getCSi(bam1_t *b, int i);\n-char bam_aux_getCQi(bam1_t *b, int i);\n-\n-#define TV_MIN_ALNROW 2\n-#define TV_MAX_GOTO 40\n-#define TV_LOW_MAPQ 10\n-\n-#define TV_COLOR_MAPQ 0\n-#define TV_COLOR_BASEQ 1\n-#define TV_COLOR_NUCL 2\n-#define TV_COLOR_COL 3\n-#define TV_COLOR_COLQ 4\n-\n-#define TV_BASE_NUCL 0\n-#define TV_BASE_COLOR_SPACE 1\n-\n-typedef struct {\n-\tint mrow, mcol;\n-\tWINDOW *wgoto, *whelp;\n-\n-\tbam_index_t *idx;\n-\tbam_lplbuf_t *lplbuf;\n-\tbam_header_t *header;\n-\tbamFile fp;\n-\tint curr_tid, left_pos;\n-\tfaidx_t *fai;\n-\tbam_maqcns_t *bmc;\n-\n-\tint ccol, last_pos, row_shift, base_for, color_for, is_dot, l_ref, ins, no_skip, show_name;\n-\tchar *ref;\n-} tview_t;\n-\n-int tv_pl_func(uint32_t tid, uint32_t pos, int n, const bam_pileup1_t *pl, void *data)\n-{\n-\ttview_t *tv = (tview_t*)data;\n-\tint i, j, c, rb, attr, max_ins = 0;\n-\tuint32_t call = 0;\n-\tif (pos < tv->left_pos || tv->ccol > tv->mcol) return 0; // out of screen\n-\t// print referece\n-\trb = (tv->ref && pos - tv->left_pos < tv->l_ref)? tv->ref[pos - tv->left_pos] : \'N\';\n-\tfor (i = tv->last_pos + 1; i < pos; ++i) {\n-\t\tif (i%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", i+1);\n-\t\tc = tv->ref? tv->ref[i - tv->left_pos] : \'N\';\n-\t\tmvaddch(1, tv->ccol++, c);\n-\t}\n-\tif (pos%10 == 0 && tv->mcol - tv->ccol >= 10) mvprintw(0, tv->ccol, "%-d", pos+1);\n-\t// print consensus\n-\tcall = bam_maqcns_call(n, pl, tv->bmc);\n-\tattr = A_UNDERLINE;\n-\tc = ",ACMGRSVTWYHKDBN"[call>>28&0xf];\n-\ti = (call>>8&0xff)/10+1;\n-\tif (i > 4) i = 4;\n-\tattr |= COLOR_PAIR(i);\n-\tif (c == toupper(rb)) c = \'.\';\n-\tattron(attr);\n-\tmvaddch(2, tv->ccol, c);\n-\tattroff(attr);\n-\tif(tv->ins) {\n-\t\t// calculate maximum insert\n-\t\tfor (i = 0; i < n; ++i) {\n-\t\t\tconst bam_pileup1_t *p = pl + i;\n-\t\t\tif (p->indel > 0 && max_ins < p->indel) max_ins = p->indel;\n-\t\t}\n-\t}\n-\t// core loop\n-\tfor (j = 0; j <= max_ins; ++j) {\n-\t\tfor (i = 0; i < n; ++i) {\n-\t\t\tconst bam_pileup1_t *p = pl + i;\n-\t\t\tint row = TV_MIN_ALNROW + p->level - tv->row_shift;\n-\t\t\tif (j == 0) {\n-\t\t\t\tif (!p->is_del) {\n-\t\t\t\t\tif (tv->base_for == TV_BASE_COLOR_SPACE && \n-\t\t\t\t\t\t\t(c = bam_aux_getCSi(p->b, p->qpos))) {\n-\t\t\t\t\t\tc = bam_aux_getCSi(p->b, p->qpos);\n-\t\t\t\t\t\t// assume that if we found one color, we will be able to get the color error\n-\t\t\t\t\t\tif (tv->is_dot && \'-\' == bam_aux_getCEi(p->b, p->qpos)) c = bam1_strand(p->b)? \',\' : \'.\';\n-\t\t\t\t\t} else {\n-\t\t\t\t\t\tif (tv->show_name) {\n-\t\t\t\t\t\t\tchar *name = bam1_qname(p->b);\n-\t\t\t\t\t\t\tc = (p->qpos + 1 >= p->b->core.l_qname)? \' \' : name[p->qpos];\n-\t\t\t\t\t\t} else {\n-\t\t\t\t\t\t\tc = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos)];\n-\t\t\t\t\t\t\tif (tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? \',\' : \'.\';\n-\t\t\t\t\t\t}\n-\t\t\t\t\t}\n-\t\t\t\t} else c = \'*\';\n-\t\t\t} else { // padding\n-\t\t\t\tif (j > p->indel) c = \'*\';\n-\t\t\t\telse { // insertion\n-\t\t\t\t\tif (tv->base_for == TV_BASE_NUCL) {\n-\t\t\t\t\t\tif (tv->show_name) {\n-\t\t\t\t\t\t\tchar *name = bam1_qname(p->b);\n-\t\t\t\t\t\t\tc = (p->qpos + j + 1 >= p->b->core.l_qname)? \' \' : name[p->qpos + j];\n-\t\t\t\t\t\t} else {\n-\t\t\t\t\t\t\tc = bam_nt16_rev_table[bam1_seqi(bam1_seq(p->b), p->qpos + j)];\n-\t\t\t\t\t\t\tif (j == 0 && tv->is_dot && toupper(c) == toupper(rb)) c = bam1_strand(p->b)? \',\' : \'.\';\n-\t\t\t\t\t\t}\n-\t\t\t\t\t} else {\n-\t\t\t\t\t\tc = bam_aux_getCSi(p->b, p->qpos + j);\n-\t\t\t\t\t\tif (tv->is_dot && \'-\' == bam_aux_getCEi(p->b, p->qpos + j)) c = bam1_strand(p->b)? \',\' : \'.\';\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t}\n-\t\t\tif (row > TV_MIN_ALNROW && row < tv->mrow) {\n-\t'..b' l = 0;\n-\t\telse if (c == \'\\033\') return;\n-\t\tstr[l] = \'\\0\';\n-\t\tfor (i = 0; i < TV_MAX_GOTO; ++i) mvwaddch(tv->wgoto, 1, 8 + i, \' \');\n-\t\tmvwprintw(tv->wgoto, 1, 8, "%s", str);\n-\t}\n-}\n-\n-static void tv_win_help(tview_t *tv) {\n-\tint r = 1;\n-\tWINDOW *win = tv->whelp;\n-\twborder(win, \'|\', \'|\', \'-\', \'-\', \'+\', \'+\', \'+\', \'+\');\n-\tmvwprintw(win, r++, 2, " -=- Help -=- ");\n-\tr++;\n-\tmvwprintw(win, r++, 2, "? This window");\n-\tmvwprintw(win, r++, 2, "Arrows Small scroll movement");\n-\tmvwprintw(win, r++, 2, "h,j,k,l Small scroll movement");\n-\tmvwprintw(win, r++, 2, "H,J,K,L Large scroll movement");\n-\tmvwprintw(win, r++, 2, "ctrl-H Scroll 1k left");\n-\tmvwprintw(win, r++, 2, "ctrl-L Scroll 1k right");\n-\tmvwprintw(win, r++, 2, "space Scroll one screen");\n-\tmvwprintw(win, r++, 2, "backspace Scroll back one screen");\n-\tmvwprintw(win, r++, 2, "g Go to specific location");\n-\tmvwprintw(win, r++, 2, "m Color for mapping qual");\n-\tmvwprintw(win, r++, 2, "n Color for nucleotide");\n-\tmvwprintw(win, r++, 2, "b Color for base quality");\n-\tmvwprintw(win, r++, 2, "c Color for cs color");\n-\tmvwprintw(win, r++, 2, "z Color for cs qual");\n-\tmvwprintw(win, r++, 2, ". Toggle on/off dot view");\n-\tmvwprintw(win, r++, 2, "s Toggle on/off ref skip");\n-\tmvwprintw(win, r++, 2, "r Toggle on/off rd name");\n-\tmvwprintw(win, r++, 2, "N Turn on nt view");\n-\tmvwprintw(win, r++, 2, "C Turn on cs view");\n-\tmvwprintw(win, r++, 2, "i Toggle on/off ins");\n-\tmvwprintw(win, r++, 2, "q Exit");\n-\tr++;\n-\tmvwprintw(win, r++, 2, "Underline: Secondary or orphan");\n-\tmvwprintw(win, r++, 2, "Blue: 0-9 Green: 10-19");\n-\tmvwprintw(win, r++, 2, "Yellow: 20-29 White: >=30");\n-\twrefresh(win);\n-\twgetch(win);\n-}\n-\n-void tv_loop(tview_t *tv)\n-{\n-\tint tid, pos;\n-\ttid = tv->curr_tid; pos = tv->left_pos;\n-\twhile (1) {\n-\t\tint c = getch();\n-\t\tswitch (c) {\n-\t\t\tcase \'?\': tv_win_help(tv); break;\n-\t\t\tcase \'\\033\':\n-\t\t\tcase \'q\': goto end_loop;\n-\t\t\tcase \'/\': \n-\t\t\tcase \'g\': tv_win_goto(tv, &tid, &pos); break;\n-\t\t\tcase \'m\': tv->color_for = TV_COLOR_MAPQ; break;\n-\t\t\tcase \'b\': tv->color_for = TV_COLOR_BASEQ; break;\n-\t\t\tcase \'n\': tv->color_for = TV_COLOR_NUCL; break;\n-\t\t\tcase \'c\': tv->color_for = TV_COLOR_COL; break;\n-\t\t\tcase \'z\': tv->color_for = TV_COLOR_COLQ; break;\n-\t\t\tcase \'s\': tv->no_skip = !tv->no_skip; break;\n-\t\t\tcase \'r\': tv->show_name = !tv->show_name; break;\n-\t\t\tcase KEY_LEFT:\n-\t\t\tcase \'h\': --pos; break;\n-\t\t\tcase KEY_RIGHT:\n-\t\t\tcase \'l\': ++pos; break;\n-\t\t\tcase KEY_SLEFT:\n-\t\t\tcase \'H\': pos -= 20; break;\n-\t\t\tcase KEY_SRIGHT:\n-\t\t\tcase \'L\': pos += 20; break;\n-\t\t\tcase \'.\': tv->is_dot = !tv->is_dot; break;\n-\t\t\tcase \'N\': tv->base_for = TV_BASE_NUCL; break;\n-\t\t\tcase \'C\': tv->base_for = TV_BASE_COLOR_SPACE; break;\n-\t\t\tcase \'i\': tv->ins = !tv->ins; break;\n-\t\t\tcase \'\\010\': pos -= 1000; break;\n-\t\t\tcase \'\\014\': pos += 1000; break;\n-\t\t\tcase \' \': pos += tv->mcol; break;\n-\t\t\tcase KEY_UP:\n-\t\t\tcase \'j\': --tv->row_shift; break;\n-\t\t\tcase KEY_DOWN:\n-\t\t\tcase \'k\': ++tv->row_shift; break;\n-\t\t\tcase KEY_BACKSPACE:\n-\t\t\tcase \'\\177\': pos -= tv->mcol; break;\n-\t\t\tcase KEY_RESIZE: getmaxyx(stdscr, tv->mrow, tv->mcol); break;\n-\t\t\tdefault: continue;\n-\t\t}\n-\t\tif (pos < 0) pos = 0;\n-\t\tif (tv->row_shift < 0) tv->row_shift = 0;\n-\t\ttv_draw_aln(tv, tid, pos);\n-\t}\n-end_loop:\n-\treturn;\n-}\n-\n-int bam_tview_main(int argc, char *argv[])\n-{\n-\ttview_t *tv;\n-\tif (argc == 1) {\n-\t\tfprintf(stderr, "Usage: bamtk tview <aln.bam> [ref.fasta]\\n");\n-\t\treturn 1;\n-\t}\n-\ttv = tv_init(argv[1], (argc == 2)? 0 : argv[2]);\n-\ttv_draw_aln(tv, 0, 0);\n-\ttv_loop(tv);\n-\ttv_destroy(tv);\n-\treturn 0;\n-}\n-#else // #ifdef _HAVE_CURSES\n-#include <stdio.h>\n-#warning "No curses library is available; tview is disabled."\n-int bam_tview_main(int argc, char *argv[])\n-{\n-\tfprintf(stderr, "[bam_tview_main] The ncurses library is unavailable; tview is not compiled.\\n");\n-\treturn 1;\n-}\n-#endif // #ifdef _HAVE_CURSES\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bgzf.c --- a/chimerascan/pysam/samtools/bgzf.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,671 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology\n-\n- Permission is hereby granted, free of charge, to any person obtaining a copy\n- of this software and associated documentation files (the "Software"), to deal\n- in the Software without restriction, including without limitation the rights\n- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n- copies of the Software, and to permit persons to whom the Software is\n- furnished to do so, subject to the following conditions:\n-\n- The above copyright notice and this permission notice shall be included in\n- all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n- THE SOFTWARE.\n-*/\n-\n-/*\n- 2009-06-29 by lh3: cache recent uncompressed blocks.\n- 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.\n- 2009-06-12 by lh3: support a mode string like "wu" where \'u\' for uncompressed output */\n-\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <unistd.h>\n-#include <fcntl.h>\n-#include <sys/types.h>\n-#include <sys/stat.h>\n-#include "bgzf.h"\n-\n-#include "khash.h"\n-typedef struct {\n-\tint size;\n-\tuint8_t *block;\n-\tint64_t end_offset;\n-} cache_t;\n-KHASH_MAP_INIT_INT64(cache, cache_t)\n-\n-#if defined(_WIN32) || defined(_MSC_VER)\n-#define ftello(fp) ftell(fp)\n-#define fseeko(fp, offset, whence) fseek(fp, offset, whence)\n-#else\n-extern off_t ftello(FILE *stream);\n-extern int fseeko(FILE *stream, off_t offset, int whence);\n-#endif\n-\n-typedef int8_t bgzf_byte_t;\n-\n-static const int DEFAULT_BLOCK_SIZE = 64 * 1024;\n-static const int MAX_BLOCK_SIZE = 64 * 1024;\n-\n-static const int BLOCK_HEADER_LENGTH = 18;\n-static const int BLOCK_FOOTER_LENGTH = 8;\n-\n-static const int GZIP_ID1 = 31;\n-static const int GZIP_ID2 = 139;\n-static const int CM_DEFLATE = 8;\n-static const int FLG_FEXTRA = 4;\n-static const int OS_UNKNOWN = 255;\n-static const int BGZF_ID1 = 66; // \'B\'\n-static const int BGZF_ID2 = 67; // \'C\'\n-static const int BGZF_LEN = 2;\n-static const int BGZF_XLEN = 6; // BGZF_LEN+4\n-\n-static const int GZIP_WINDOW_BITS = -15; // no zlib header\n-static const int Z_DEFAULT_MEM_LEVEL = 8;\n-\n-\n-inline\n-void\n-packInt16(uint8_t* buffer, uint16_t value)\n-{\n- buffer[0] = value;\n- buffer[1] = value >> 8;\n-}\n-\n-inline\n-int\n-unpackInt16(const uint8_t* buffer)\n-{\n- return (buffer[0] | (buffer[1] << 8));\n-}\n-\n-inline\n-void\n-packInt32(uint8_t* buffer, uint32_t value)\n-{\n- buffer[0] = value;\n- buffer[1] = value >> 8;\n- buffer[2] = value >> 16;\n- buffer[3] = value >> 24;\n-}\n-\n-static inline\n-int\n-bgzf_min(int x, int y)\n-{\n- return (x < y) ? x : y;\n-}\n-\n-static\n-void\n-report_error(BGZF* fp, const char* message) {\n- fp->error = message;\n-}\n-\n-static BGZF *bgzf_read_init()\n-{\n-\tBGZF *fp;\n-\tfp = calloc(1, sizeof(BGZF));\n- fp->uncompressed_block_size = MAX_BLOCK_SIZE;\n- fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);\n- fp->compressed_block_size = MAX_BLOCK_SIZE;\n- fp->compressed_block = malloc(MAX_BLOCK_SIZE);\n-\tfp->cache_size = 0;\n-\tfp->cache = kh_init(cache);\n-\treturn fp;\n-}\n-\n-static\n-BGZF*\n-open_read(int fd)\n-{\n-#ifdef _USE_KNETFILE\n- knetFile *file = knet_dopen(fd, "r");\n-#else\n- FILE* file = fdopen(fd, "r");\n-#endif\n- BGZF* fp;\n-\tif (file == 0) return 0;\n-\tfp = bgzf_read_init();\n- fp->file_descriptor = fd;\n- fp->open_mode = \'r\';\n-#ifdef _USE_KNETFILE\n- fp->x.fpr = file;\n-#else\n- fp->file = file;\n-#endif\n- return fp;\n-}\n-\n-static\n-BGZF*\n-open_write(int'..b'_length = deflate_block(fp, fp->block_offset);\n- if (block_length < 0) return -1;\n-#ifdef _USE_KNETFILE\n- count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);\n-#else\n- count = fwrite(fp->compressed_block, 1, block_length, fp->file);\n-#endif\n- if (count != block_length) {\n- report_error(fp, "write failed");\n- return -1;\n- }\n- fp->block_address += block_length;\n- }\n- return 0;\n-}\n-\n-int bgzf_flush_try(BGZF *fp, int size)\n-{\n-\tif (fp->block_offset + size > fp->uncompressed_block_size)\n-\t\treturn bgzf_flush(fp);\n-\treturn -1;\n-}\n-\n-int bgzf_write(BGZF* fp, const void* data, int length)\n-{\n- if (fp->open_mode != \'w\') {\n- report_error(fp, "file not open for writing");\n- return -1;\n- }\n-\n- if (fp->uncompressed_block == NULL)\n- fp->uncompressed_block = malloc(fp->uncompressed_block_size);\n-\n- const bgzf_byte_t* input = data;\n- int block_length = fp->uncompressed_block_size;\n- int bytes_written = 0;\n- while (bytes_written < length) {\n- int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);\n- bgzf_byte_t* buffer = fp->uncompressed_block;\n- memcpy(buffer + fp->block_offset, input, copy_length);\n- fp->block_offset += copy_length;\n- input += copy_length;\n- bytes_written += copy_length;\n- if (fp->block_offset == block_length) {\n- if (bgzf_flush(fp) != 0) {\n- break;\n- }\n- }\n- }\n- return bytes_written;\n-}\n-\n-int bgzf_close(BGZF* fp)\n-{\n- if (fp->open_mode == \'w\') {\n- if (bgzf_flush(fp) != 0) return -1;\n-\t\t{ // add an empty block\n-\t\t\tint count, block_length = deflate_block(fp, 0);\n-#ifdef _USE_KNETFILE\n-\t\t\tcount = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);\n-#else\n-\t\t\tcount = fwrite(fp->compressed_block, 1, block_length, fp->file);\n-#endif\n-\t\t}\n-#ifdef _USE_KNETFILE\n- if (fflush(fp->x.fpw) != 0) {\n-#else\n- if (fflush(fp->file) != 0) {\n-#endif\n- report_error(fp, "flush failed");\n- return -1;\n- }\n- }\n- if (fp->owned_file) {\n-#ifdef _USE_KNETFILE\n-\t\tint ret;\n-\t\tif (fp->open_mode == \'w\') ret = fclose(fp->x.fpw);\n-\t\telse ret = knet_close(fp->x.fpr);\n- if (ret != 0) return -1;\n-#else\n- if (fclose(fp->file) != 0) return -1;\n-#endif\n- }\n- free(fp->uncompressed_block);\n- free(fp->compressed_block);\n-\tfree_cache(fp);\n- free(fp);\n- return 0;\n-}\n-\n-void bgzf_set_cache_size(BGZF *fp, int cache_size)\n-{\n-\tif (fp) fp->cache_size = cache_size;\n-}\n-\n-int bgzf_check_EOF(BGZF *fp)\n-{\n-\tstatic uint8_t magic[28] = "\\037\\213\\010\\4\\0\\0\\0\\0\\0\\377\\6\\0\\102\\103\\2\\0\\033\\0\\3\\0\\0\\0\\0\\0\\0\\0\\0\\0";\n-\tuint8_t buf[28];\n-\toff_t offset;\n-#ifdef _USE_KNETFILE\n-\toffset = knet_tell(fp->x.fpr);\n-\tif (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;\n-\tknet_read(fp->x.fpr, buf, 28);\n-\tknet_seek(fp->x.fpr, offset, SEEK_SET);\n-#else\n-\toffset = ftello(fp->file);\n-\tif (fseeko(fp->file, -28, SEEK_END) != 0) return -1;\n-\tfread(buf, 1, 28, fp->file);\n-\tfseeko(fp->file, offset, SEEK_SET);\n-#endif\n-\treturn (memcmp(magic, buf, 28) == 0)? 1 : 0;\n-}\n-\n-int64_t bgzf_seek(BGZF* fp, int64_t pos, int where)\n-{\n-\tint block_offset;\n-\tint64_t block_address;\n-\n- if (fp->open_mode != \'r\') {\n- report_error(fp, "file not open for read");\n- return -1;\n- }\n- if (where != SEEK_SET) {\n- report_error(fp, "unimplemented seek option");\n- return -1;\n- }\n- block_offset = pos & 0xFFFF;\n- block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;\n-#ifdef _USE_KNETFILE\n- if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {\n-#else\n- if (fseeko(fp->file, block_address, SEEK_SET) != 0) {\n-#endif\n- report_error(fp, "seek failed");\n- return -1;\n- }\n- fp->block_length = 0; // indicates current block is not loaded\n- fp->block_address = block_address;\n- fp->block_offset = block_offset;\n- return 0;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/bgzf.h --- a/chimerascan/pysam/samtools/bgzf.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,157 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#ifndef __BGZF_H -#define __BGZF_H - -#include <stdint.h> -#include <stdio.h> -#include <stdbool.h> -#include <zlib.h> -#ifdef _USE_KNETFILE -#include "knetfile.h" -#endif - -//typedef int8_t bool; - -typedef struct { - int file_descriptor; - char open_mode; // 'r' or 'w' - bool owned_file, is_uncompressed; -#ifdef _USE_KNETFILE - union { - knetFile *fpr; - FILE *fpw; - } x; -#else - FILE* file; -#endif - int uncompressed_block_size; - int compressed_block_size; - void* uncompressed_block; - void* compressed_block; - int64_t block_address; - int block_length; - int block_offset; - int cache_size; - const char* error; - void *cache; // a pointer to a hash table -} BGZF; - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Open an existing file descriptor for reading or writing. - * Mode must be either "r" or "w". - * A subsequent bgzf_close will not close the file descriptor. - * Returns null on error. - */ -BGZF* bgzf_fdopen(int fd, const char* __restrict mode); - -/* - * Open the specified file for reading or writing. - * Mode must be either "r" or "w". - * Returns null on error. - */ -BGZF* bgzf_open(const char* path, const char* __restrict mode); - -/* - * Close the BGZ file and free all associated resources. - * Does not close the underlying file descriptor if created with bgzf_fdopen. - * Returns zero on success, -1 on error. - */ -int bgzf_close(BGZF* fp); - -/* - * Read up to length bytes from the file storing into data. - * Returns the number of bytes actually read. - * Returns zero on end of file. - * Returns -1 on error. - */ -int bgzf_read(BGZF* fp, void* data, int length); - -/* - * Write length bytes from data to the file. - * Returns the number of bytes written. - * Returns -1 on error. - */ -int bgzf_write(BGZF* fp, const void* data, int length); - -/* - * Return a virtual file pointer to the current location in the file. - * No interpetation of the value should be made, other than a subsequent - * call to bgzf_seek can be used to position the file at the same point. - * Return value is non-negative on success. - * Returns -1 on error. - */ -#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) - -/* - * Set the file to read from the location specified by pos, which must - * be a value previously returned by bgzf_tell for this file (but not - * necessarily one returned by this file handle). - * The where argument must be SEEK_SET. - * Seeking on a file opened for write is not supported. - * Returns zero on success, -1 on error. - */ -int64_t bgzf_seek(BGZF* fp, int64_t pos, int where); - -/* - * Set the cache size. Zero to disable. By default, caching is - * disabled. The recommended cache size for frequent random access is - * about 8M bytes. - */ -void bgzf_set_cache_size(BGZF *fp, int cache_size); - -int bgzf_check_EOF(BGZF *fp); -int bgzf_read_block(BGZF* fp); -int bgzf_flush(BGZF* fp); -int bgzf_flush_try(BGZF *fp, int size); - -#ifdef __cplusplus -} -#endif - -static inline int bgzf_getc(BGZF *fp) -{ - int c; - if (fp->block_offset >= fp->block_length) { - if (bgzf_read_block(fp) != 0) return -2; /* error */ - if (fp->block_length == 0) return -1; /* end-of-file */ - } - c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; - if (fp->block_offset == fp->block_length) { -#ifdef _USE_KNETFILE - fp->block_address = knet_tell(fp->x.fpr); -#else - fp->block_address = ftello(fp->file); -#endif - fp->block_offset = 0; - fp->block_length = 0; - } - return c; -} - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/faidx.c --- a/chimerascan/pysam/samtools/faidx.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,422 +0,0 @@\n-#include <ctype.h>\n-#include <string.h>\n-#include <stdlib.h>\n-#include <stdio.h>\n-#include "faidx.h"\n-#include "khash.h"\n-\n-typedef struct {\n-\tuint64_t len:32, line_len:16, line_blen:16;\n-\tuint64_t offset;\n-} faidx1_t;\n-KHASH_MAP_INIT_STR(s, faidx1_t)\n-\n-#ifndef _NO_RAZF\n-#include "razf.h"\n-#else\n-#ifdef _WIN32\n-#define ftello(fp) ftell(fp)\n-#define fseeko(fp, offset, whence) fseek(fp, offset, whence)\n-#else\n-extern off_t ftello(FILE *stream);\n-extern int fseeko(FILE *stream, off_t offset, int whence);\n-#endif\n-#define RAZF FILE\n-#define razf_read(fp, buf, size) fread(buf, 1, size, fp)\n-#define razf_open(fn, mode) fopen(fn, mode)\n-#define razf_close(fp) fclose(fp)\n-#define razf_seek(fp, offset, whence) fseeko(fp, offset, whence)\n-#define razf_tell(fp) ftello(fp)\n-#endif\n-#ifdef _USE_KNETFILE\n-#include "knetfile.h"\n-#endif\n-\n-struct __faidx_t {\n-\tRAZF *rz;\n-\tint n, m;\n-\tchar **name;\n-\tkhash_t(s) *hash;\n-};\n-\n-#ifndef kroundup32\n-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))\n-#endif\n-\n-static inline void fai_insert_index(faidx_t *idx, const char *name, int len, int line_len, int line_blen, uint64_t offset)\n-{\n-\tkhint_t k;\n-\tint ret;\n-\tfaidx1_t t;\n-\tif (idx->n == idx->m) {\n-\t\tidx->m = idx->m? idx->m<<1 : 16;\n-\t\tidx->name = (char**)realloc(idx->name, sizeof(void*) * idx->m);\n-\t}\n-\tidx->name[idx->n] = strdup(name);\n-\tk = kh_put(s, idx->hash, idx->name[idx->n], &ret);\n-\tt.len = len; t.line_len = line_len; t.line_blen = line_blen; t.offset = offset;\n-\tkh_value(idx->hash, k) = t;\n-\t++idx->n;\n-}\n-\n-faidx_t *fai_build_core(RAZF *rz)\n-{\n-\tchar c, *name;\n-\tint l_name, m_name, ret;\n-\tint len, line_len, line_blen, state;\n-\tint l1, l2;\n-\tfaidx_t *idx;\n-\tuint64_t offset;\n-\n-\tidx = (faidx_t*)calloc(1, sizeof(faidx_t));\n-\tidx->hash = kh_init(s);\n-\tname = 0; l_name = m_name = 0;\n-\tlen = line_len = line_blen = -1; state = 0; l1 = l2 = -1; offset = 0;\n-\twhile (razf_read(rz, &c, 1)) {\n-\t\tif (c == \'\\n\') { // an empty line\n-\t\t\tif (state == 1) {\n-\t\t\t\toffset = razf_tell(rz);\n-\t\t\t\tcontinue;\n-\t\t\t} else if ((state == 0 && len < 0) || state == 2) continue;\n-\t\t}\n-\t\tif (c == \'>\') { // fasta header\n-\t\t\tif (len >= 0)\n-\t\t\t\tfai_insert_index(idx, name, len, line_len, line_blen, offset);\n-\t\t\tl_name = 0;\n-\t\t\twhile ((ret = razf_read(rz, &c, 1)) != 0 && !isspace(c)) {\n-\t\t\t\tif (m_name < l_name + 2) {\n-\t\t\t\t\tm_name = l_name + 2;\n-\t\t\t\t\tkroundup32(m_name);\n-\t\t\t\t\tname = (char*)realloc(name, m_name);\n-\t\t\t\t}\n-\t\t\t\tname[l_name++] = c;\n-\t\t\t}\n-\t\t\tname[l_name] = \'\\0\';\n-\t\t\tif (ret == 0) {\n-\t\t\t\tfprintf(stderr, "[fai_build_core] the last entry has no sequence\\n");\n-\t\t\t\tfree(name); fai_destroy(idx);\n-\t\t\t\treturn 0;\n-\t\t\t}\n-\t\t\tif (c != \'\\n\') while (razf_read(rz, &c, 1) && c != \'\\n\');\n-\t\t\tstate = 1; len = 0;\n-\t\t\toffset = razf_tell(rz);\n-\t\t} else {\n-\t\t\tif (state == 3) {\n-\t\t\t\tfprintf(stderr, "[fai_build_core] inlined empty line is not allowed in sequence \'%s\'.\\n", name);\n-\t\t\t\tfree(name); fai_destroy(idx);\n-\t\t\t\treturn 0;\n-\t\t\t}\n-\t\t\tif (state == 2) state = 3;\n-\t\t\tl1 = l2 = 0;\n-\t\t\tdo {\n-\t\t\t\t++l1;\n-\t\t\t\tif (isgraph(c)) ++l2;\n-\t\t\t} while ((ret = razf_read(rz, &c, 1)) && c != \'\\n\');\n-\t\t\tif (state == 3 && l2) {\n-\t\t\t\tfprintf(stderr, "[fai_build_core] different line length in sequence \'%s\'.\\n", name);\n-\t\t\t\tfree(name); fai_destroy(idx);\n-\t\t\t\treturn 0;\n-\t\t\t}\n-\t\t\t++l1; len += l2;\n-\t\t\tif (l2 >= 0x10000) {\n-\t\t\t\tfprintf(stderr, "[fai_build_core] line length exceeds 65535 in sequence \'%s\'.\\n", name);\n-\t\t\t\tfree(name); fai_destroy(idx);\n-\t\t\t\treturn 0;\n-\t\t\t}\n-\t\t\tif (state == 1) line_len = l1, line_blen = l2, state = 0;\n-\t\t\telse if (state == 0) {\n-\t\t\t\tif (l1 != line_len || l2 != line_blen) state = 2;\n-\t\t\t}\n-\t\t}\n-\t}\n-\tfai_insert_index(idx, name, len, line_len, line_blen, offset);\n-\tfree(name);\n-\treturn idx;\n-}\n-\n-void fai_save(const faidx_t *fai, FILE *fp)\n-{\n-\tkhint_t k;\n-\tint i;\n-\tfor (i = 0; i < fai->n; ++i) {\n-\t\tfaidx1_t x;\n-\t\tk = kh_get(s, fai->hash, fai->name[i]);\n-\t\tx = kh_value(fai->hash, k);\n-#ifdef _WIN32\n-\t\tfprintf(fp, "%s\\t%'..b' knet_close(fp_remote);\n-\n- return fopen(fn, "r");\n-}\n-#endif\n-\n-faidx_t *fai_load(const char *fn)\n-{\n-\tchar *str;\n-\tFILE *fp;\n-\tfaidx_t *fai;\n-\tstr = (char*)calloc(strlen(fn) + 5, 1);\n-\tsprintf(str, "%s.fai", fn);\n-\n-#ifdef _USE_KNETFILE\n- if (strstr(fn, "ftp://") == fn || strstr(fn, "http://") == fn)\n- {\n- fp = download_and_open(str);\n- if ( !fp )\n- {\n- fprintf(stderr, "[fai_load] failed to open remote FASTA index %s\\n", str);\n- free(str);\n- return 0;\n- }\n- }\n- else\n-#endif\n- fp = fopen(str, "rb");\n-\tif (fp == 0) {\n-\t\tfprintf(stderr, "[fai_load] build FASTA index.\\n");\n-\t\tfai_build(fn);\n-\t\tfp = fopen(str, "rb");\n-\t\tif (fp == 0) {\n-\t\t\tfprintf(stderr, "[fai_load] fail to open FASTA index.\\n");\n-\t\t\tfree(str);\n-\t\t\treturn 0;\n-\t\t}\n-\t}\n-\n-\tfai = fai_read(fp);\n-\tfclose(fp);\n-\n-\tfai->rz = razf_open(fn, "rb");\n-\tfree(str);\n-\tif (fai->rz == 0) {\n-\t\tfprintf(stderr, "[fai_load] fail to open FASTA file.\\n");\n-\t\treturn 0;\n-\t}\n-\treturn fai;\n-}\n-\n-char *fai_fetch(const faidx_t *fai, const char *str, int *len)\n-{\n-\tchar *s, *p, c;\n-\tint i, l, k;\n-\tkhiter_t iter;\n-\tfaidx1_t val;\n-\tkhash_t(s) *h;\n-\tint beg, end;\n-\n-\tbeg = end = -1;\n-\th = fai->hash;\n-\tl = strlen(str);\n-\tp = s = (char*)malloc(l+1);\n-\t/* squeeze out "," */\n-\tfor (i = k = 0; i != l; ++i)\n-\t\tif (str[i] != \',\' && !isspace(str[i])) s[k++] = str[i];\n-\ts[k] = 0;\n-\tfor (i = 0; i != k; ++i) if (s[i] == \':\') break;\n-\ts[i] = 0;\n-\titer = kh_get(s, h, s); /* get the ref_id */\n-\tif (iter == kh_end(h)) {\n-\t\t*len = 0;\n-\t\tfree(s); return 0;\n-\t}\n-\tval = kh_value(h, iter);\n-\tif (i == k) { /* dump the whole sequence */\n-\t\tbeg = 0; end = val.len;\n-\t} else {\n-\t\tfor (p = s + i + 1; i != k; ++i) if (s[i] == \'-\') break;\n-\t\tbeg = atoi(p);\n-\t\tif (i < k) {\n-\t\t\tp = s + i + 1;\n-\t\t\tend = atoi(p);\n-\t\t} else end = val.len;\n-\t}\n-\tif (beg > 0) --beg;\n-\tif (beg >= val.len) beg = val.len;\n-\tif (end >= val.len) end = val.len;\n-\tif (beg > end) beg = end;\n-\tfree(s);\n-\n-\t// now retrieve the sequence\n-\tl = 0;\n-\ts = (char*)malloc(end - beg + 2);\n-\trazf_seek(fai->rz, val.offset + beg / val.line_blen * val.line_len + beg % val.line_blen, SEEK_SET);\n-\twhile (razf_read(fai->rz, &c, 1) == 1 && l < end - beg && !fai->rz->z_err)\n-\t\tif (isgraph(c)) s[l++] = c;\n-\ts[l] = \'\\0\';\n-\t*len = l;\n-\treturn s;\n-}\n-\n-int faidx_main(int argc, char *argv[])\n-{\n-\tif (argc == 1) {\n-\t\tfprintf(stderr, "Usage: faidx <in.fasta> [<reg> [...]]\\n");\n-\t\treturn 1;\n-\t} else {\n-\t\tif (argc == 2) fai_build(argv[1]);\n-\t\telse {\n-\t\t\tint i, j, k, l;\n-\t\t\tchar *s;\n-\t\t\tfaidx_t *fai;\n-\t\t\tfai = fai_load(argv[1]);\n-\t\t\tif (fai == 0) return 1;\n-\t\t\tfor (i = 2; i != argc; ++i) {\n-\t\t\t\tprintf(">%s\\n", argv[i]);\n-\t\t\t\ts = fai_fetch(fai, argv[i], &l);\n-\t\t\t\tfor (j = 0; j < l; j += 60) {\n-\t\t\t\t\tfor (k = 0; k < 60 && k < l - j; ++k)\n-\t\t\t\t\t\tputchar(s[j + k]);\n-\t\t\t\t\tputchar(\'\\n\');\n-\t\t\t\t}\n-\t\t\t\tfree(s);\n-\t\t\t}\n-\t\t\tfai_destroy(fai);\n-\t\t}\n-\t}\n-\treturn 0;\n-}\n-\n-int faidx_fetch_nseq(const faidx_t *fai) \n-{\n-\treturn fai->n;\n-}\n-\n-char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len)\n-{\n-\tint l;\n-\tchar c;\n- khiter_t iter;\n- faidx1_t val;\n-\tchar *seq=NULL;\n-\n- // Adjust position\n- iter = kh_get(s, fai->hash, c_name);\n- if(iter == kh_end(fai->hash)) return 0;\n- val = kh_value(fai->hash, iter);\n-\tif(p_end_i < p_beg_i) p_beg_i = p_end_i;\n- if(p_beg_i < 0) p_beg_i = 0;\n- else if(val.len <= p_beg_i) p_beg_i = val.len - 1;\n- if(p_end_i < 0) p_end_i = 0;\n- else if(val.len <= p_end_i) p_end_i = val.len - 1;\n-\n- // Now retrieve the sequence \n-\tl = 0;\n-\tseq = (char*)malloc(p_end_i - p_beg_i + 2);\n-\trazf_seek(fai->rz, val.offset + p_beg_i / val.line_blen * val.line_len + p_beg_i % val.line_blen, SEEK_SET);\n-\twhile (razf_read(fai->rz, &c, 1) == 1 && l < p_end_i - p_beg_i + 1)\n-\t\tif (isgraph(c)) seq[l++] = c;\n-\tseq[l] = \'\\0\';\n-\t*len = l;\n-\treturn seq;\n-}\n-\n-#ifdef FAIDX_MAIN\n-int main(int argc, char *argv[]) { return faidx_main(argc, argv); }\n-#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/faidx.h --- a/chimerascan/pysam/samtools/faidx.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,103 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Genome Research Ltd (GRL). - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* Contact: Heng Li <lh3@sanger.ac.uk> */ - -#ifndef FAIDX_H -#define FAIDX_H - -/*! - @header - - Index FASTA files and extract subsequence. - - @copyright The Wellcome Trust Sanger Institute. - */ - -struct __faidx_t; -typedef struct __faidx_t faidx_t; - -#ifdef __cplusplus -extern "C" { -#endif - - /*! - @abstract Build index for a FASTA or razip compressed FASTA file. - @param fn FASTA file name - @return 0 on success; or -1 on failure - @discussion File "fn.fai" will be generated. - */ - int fai_build(const char *fn); - - /*! - @abstract Distroy a faidx_t struct. - @param fai Pointer to the struct to be destroyed - */ - void fai_destroy(faidx_t *fai); - - /*! - @abstract Load index from "fn.fai". - @param fn File name of the FASTA file - */ - faidx_t *fai_load(const char *fn); - - /*! - @abstract Fetch the sequence in a region. - @param fai Pointer to the faidx_t struct - @param reg Region in the format "chr2:20,000-30,000" - @param len Length of the region - @return Pointer to the sequence; null on failure - - @discussion The returned sequence is allocated by malloc family - and should be destroyed by end users by calling free() on it. - */ - char *fai_fetch(const faidx_t *fai, const char *reg, int *len); - - /*! - @abstract Fetch the number of sequences. - @param fai Pointer to the faidx_t struct - @return The number of sequences - */ - int faidx_fetch_nseq(const faidx_t *fai); - - /*! - @abstract Fetch the sequence in a region. - @param fai Pointer to the faidx_t struct - @param c_name Region name - @param p_beg_i Beginning position number (zero-based) - @param p_end_i End position number (zero-based) - @param len Length of the region - @return Pointer to the sequence; null on failure - - @discussion The returned sequence is allocated by malloc family - and should be destroyed by end users by calling free() on it. - */ - char *faidx_fetch_seq(const faidx_t *fai, char *c_name, int p_beg_i, int p_end_i, int *len); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/glf.c --- a/chimerascan/pysam/samtools/glf.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,236 +0,0 @@ -#include <string.h> -#include <stdlib.h> -#include "glf.h" - -#ifdef _NO_BGZF -// then alias bgzf_*() functions -#endif - -static int glf3_is_BE = 0; - -static inline uint32_t bam_swap_endian_4(uint32_t v) -{ - v = ((v & 0x0000FFFFU) << 16) | (v >> 16); - return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); -} - -static inline uint16_t bam_swap_endian_2(uint16_t v) -{ - return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); -} - -static inline int bam_is_big_endian() -{ - long one= 1; - return !(*((char *)(&one))); -} - -glf3_header_t *glf3_header_init() -{ - glf3_is_BE = bam_is_big_endian(); - return (glf3_header_t*)calloc(1, sizeof(glf3_header_t)); -} - -glf3_header_t *glf3_header_read(glfFile fp) -{ - glf3_header_t *h; - char magic[4]; - h = glf3_header_init(); - bgzf_read(fp, magic, 4); - if (strncmp(magic, "GLF\3", 4)) { - fprintf(stderr, "[glf3_header_read] invalid magic.\n"); - glf3_header_destroy(h); - return 0; - } - bgzf_read(fp, &h->l_text, 4); - if (glf3_is_BE) h->l_text = bam_swap_endian_4(h->l_text); - if (h->l_text) { - h->text = (uint8_t*)calloc(h->l_text + 1, 1); - bgzf_read(fp, h->text, h->l_text); - } - return h; -} - -void glf3_header_write(glfFile fp, const glf3_header_t *h) -{ - int32_t x; - bgzf_write(fp, "GLF\3", 4); - x = glf3_is_BE? bam_swap_endian_4(h->l_text) : h->l_text; - bgzf_write(fp, &x, 4); - if (h->l_text) bgzf_write(fp, h->text, h->l_text); -} - -void glf3_header_destroy(glf3_header_t *h) -{ - free(h->text); - free(h); -} - -char *glf3_ref_read(glfFile fp, int *len) -{ - int32_t n, x; - char *str; - *len = 0; - if (bgzf_read(fp, &n, 4) != 4) return 0; - if (glf3_is_BE) n = bam_swap_endian_4(n); - if (n < 0) { - fprintf(stderr, "[glf3_ref_read] invalid reference name length: %d.\n", n); - return 0; - } - str = (char*)calloc(n + 1, 1); // not necesarily n+1 in fact - x = bgzf_read(fp, str, n); - x += bgzf_read(fp, len, 4); - if (x != n + 4) { - free(str); *len = -1; return 0; // truncated - } - if (glf3_is_BE) *len = bam_swap_endian_4(*len); - return str; -} - -void glf3_ref_write(glfFile fp, const char *str, int len) -{ - int32_t m, n = strlen(str) + 1; - m = glf3_is_BE? bam_swap_endian_4(n) : n; - bgzf_write(fp, &m, 4); - bgzf_write(fp, str, n); - if (glf3_is_BE) len = bam_swap_endian_4(len); - bgzf_write(fp, &len, 4); -} - -void glf3_view1(const char *ref_name, const glf3_t *g3, int pos) -{ - int j; - if (g3->rtype == GLF3_RTYPE_END) return; - printf("%s\t%d\t%c\t%d\t%d\t%d", ref_name, pos + 1, - g3->rtype == GLF3_RTYPE_INDEL? '*' : "XACMGRSVTWYHKDBN"[g3->ref_base], - g3->depth, g3->rms_mapQ, g3->min_lk); - if (g3->rtype == GLF3_RTYPE_SUB) - for (j = 0; j != 10; ++j) printf("\t%d", g3->lk[j]); - else { - printf("\t%d\t%d\t%d\t%d\t%d\t%s\t%s\t", g3->lk[0], g3->lk[1], g3->lk[2], g3->indel_len[0], g3->indel_len[1], - g3->indel_len[0]? g3->indel_seq[0] : "*", g3->indel_len[1]? g3->indel_seq[1] : "*"); - } - printf("\n"); -} - -int glf3_write1(glfFile fp, const glf3_t *g3) -{ - int r; - uint8_t c; - uint32_t y[2]; - c = g3->rtype<<4 | g3->ref_base; - r = bgzf_write(fp, &c, 1); - if (g3->rtype == GLF3_RTYPE_END) return r; - y[0] = g3->offset; - y[1] = g3->min_lk<<24 | g3->depth; - if (glf3_is_BE) { - y[0] = bam_swap_endian_4(y[0]); - y[1] = bam_swap_endian_4(y[1]); - } - r += bgzf_write(fp, y, 8); - r += bgzf_write(fp, &g3->rms_mapQ, 1); - if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_write(fp, g3->lk, 10); - else { - int16_t x[2]; - r += bgzf_write(fp, g3->lk, 3); - x[0] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[0]) : g3->indel_len[0]; - x[1] = glf3_is_BE? bam_swap_endian_2(g3->indel_len[1]) : g3->indel_len[1]; - r += bgzf_write(fp, x, 4); - if (g3->indel_len[0]) r += bgzf_write(fp, g3->indel_seq[0], abs(g3->indel_len[0])); - if (g3->indel_len[1]) r += bgzf_write(fp, g3->indel_seq[1], abs(g3->indel_len[1])); - } - return r; -} - -#ifndef kv_roundup32 -#define kv_roundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -int glf3_read1(glfFile fp, glf3_t *g3) -{ - int r; - uint8_t c; - uint32_t y[2]; - r = bgzf_read(fp, &c, 1); - if (r == 0) return 0; - g3->ref_base = c & 0xf; - g3->rtype = c>>4; - if (g3->rtype == GLF3_RTYPE_END) return r; - r += bgzf_read(fp, y, 8); - if (glf3_is_BE) { - y[0] = bam_swap_endian_4(y[0]); - y[1] = bam_swap_endian_4(y[1]); - } - g3->offset = y[0]; - g3->min_lk = y[1]>>24; - g3->depth = y[1]<<8>>8; - r += bgzf_read(fp, &g3->rms_mapQ, 1); - if (g3->rtype == GLF3_RTYPE_SUB) r += bgzf_read(fp, g3->lk, 10); - else { - int16_t x[2], max; - r += bgzf_read(fp, g3->lk, 3); - r += bgzf_read(fp, x, 4); - if (glf3_is_BE) { - x[0] = bam_swap_endian_2(x[0]); - x[1] = bam_swap_endian_2(x[1]); - } - g3->indel_len[0] = x[0]; - g3->indel_len[1] = x[1]; - x[0] = abs(x[0]); x[1] = abs(x[1]); - max = (x[0] > x[1]? x[0] : x[1]) + 1; - if (g3->max_len < max) { - g3->max_len = max; - kv_roundup32(g3->max_len); - g3->indel_seq[0] = (char*)realloc(g3->indel_seq[0], g3->max_len); - g3->indel_seq[1] = (char*)realloc(g3->indel_seq[1], g3->max_len); - } - r += bgzf_read(fp, g3->indel_seq[0], x[0]); - r += bgzf_read(fp, g3->indel_seq[1], x[1]); - g3->indel_seq[0][x[0]] = g3->indel_seq[1][x[1]] = 0; - } - return r; -} - -void glf3_view(glfFile fp) -{ - glf3_header_t *h; - char *name; - glf3_t *g3; - int len; - h = glf3_header_read(fp); - g3 = glf3_init1(); - while ((name = glf3_ref_read(fp, &len)) != 0) { - int pos = 0; - while (glf3_read1(fp, g3) && g3->rtype != GLF3_RTYPE_END) { - pos += g3->offset; - glf3_view1(name, g3, pos); - } - free(name); - } - glf3_header_destroy(h); - glf3_destroy1(g3); -} - -int glf3_view_main(int argc, char *argv[]) -{ - glfFile fp; - if (argc == 1) { - fprintf(stderr, "Usage: glfview <in.glf>\n"); - return 1; - } - fp = (strcmp(argv[1], "-") == 0)? bgzf_fdopen(fileno(stdin), "r") : bgzf_open(argv[1], "r"); - if (fp == 0) { - fprintf(stderr, "Fail to open file '%s'\n", argv[1]); - return 1; - } - glf3_view(fp); - bgzf_close(fp); - return 0; -} - -#ifdef GLFVIEW_MAIN -int main(int argc, char *argv[]) -{ - return glf3_view_main(argc, argv); -} -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/glf.h --- a/chimerascan/pysam/samtools/glf.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,56 +0,0 @@ -#ifndef GLF_H_ -#define GLF_H_ - -typedef struct { - unsigned char ref_base:4, dummy:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */ - unsigned char max_mapQ; /** maximum mapping quality */ - unsigned char lk[10]; /** log likelihood ratio, capped at 255 */ - unsigned min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */ -} glf1_t; - -#include <stdint.h> -#include "bgzf.h" -typedef BGZF *glfFile; - -#define GLF3_RTYPE_END 0 -#define GLF3_RTYPE_SUB 1 -#define GLF3_RTYPE_INDEL 2 - -typedef struct { - uint8_t ref_base:4, rtype:4; /** "XACMGRSVTWYHKDBN"[ref_base] gives the reference base */ - uint8_t rms_mapQ; /** RMS mapping quality */ - uint8_t lk[10]; /** log likelihood ratio, capped at 255 */ - uint32_t min_lk:8, depth:24; /** minimum lk capped at 255, and the number of mapped reads */ - int32_t offset; /** the first base in a chromosome has offset zero. */ - // for indel (lkHom1, lkHom2 and lkHet are the first three elements in lk[10]) - int16_t indel_len[2]; - int32_t max_len; // maximum indel len; will be modified by glf3_read1() - char *indel_seq[2]; -} glf3_t; - -typedef struct { - int32_t l_text; - uint8_t *text; -} glf3_header_t; - -#ifdef __cplusplus -extern "C" { -#endif - -#define glf3_init1() ((glf3_t*)calloc(1, sizeof(glf3_t))) -#define glf3_destroy1(g3) do { free((g3)->indel_seq[0]); free((g3)->indel_seq[1]); free(g3); } while (0) - - glf3_header_t *glf3_header_init(); - glf3_header_t *glf3_header_read(glfFile fp); - void glf3_header_write(glfFile fp, const glf3_header_t *h); - void glf3_header_destroy(glf3_header_t *h); - char *glf3_ref_read(glfFile fp, int *len); - void glf3_ref_write(glfFile fp, const char *name, int len); - int glf3_write1(glfFile fp, const glf3_t *g3); - int glf3_read1(glfFile fp, glf3_t *g3); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/kaln.c --- a/chimerascan/pysam/samtools/kaln.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,370 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2003-2006, 2008, 2009, by Heng Li <lh3lh3@gmail.com>\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-#include <stdlib.h>\n-#include <stdio.h>\n-#include <string.h>\n-#include <stdint.h>\n-#include "kaln.h"\n-\n-#define FROM_M 0\n-#define FROM_I 1\n-#define FROM_D 2\n-\n-typedef struct {\n-\tint i, j;\n-\tunsigned char ctype;\n-} path_t;\n-\n-int aln_sm_blosum62[] = {\n-/*\t A R N D C Q E G H I L K M F P S T W Y V * X */\n-\t 4,-1,-2,-2, 0,-1,-1, 0,-2,-1,-1,-1,-1,-2,-1, 1, 0,-3,-2, 0,-4, 0,\n-\t-1, 5, 0,-2,-3, 1, 0,-2, 0,-3,-2, 2,-1,-3,-2,-1,-1,-3,-2,-3,-4,-1,\n-\t-2, 0, 6, 1,-3, 0, 0, 0, 1,-3,-3, 0,-2,-3,-2, 1, 0,-4,-2,-3,-4,-1,\n-\t-2,-2, 1, 6,-3, 0, 2,-1,-1,-3,-4,-1,-3,-3,-1, 0,-1,-4,-3,-3,-4,-1,\n-\t 0,-3,-3,-3, 9,-3,-4,-3,-3,-1,-1,-3,-1,-2,-3,-1,-1,-2,-2,-1,-4,-2,\n-\t-1, 1, 0, 0,-3, 5, 2,-2, 0,-3,-2, 1, 0,-3,-1, 0,-1,-2,-1,-2,-4,-1,\n-\t-1, 0, 0, 2,-4, 2, 5,-2, 0,-3,-3, 1,-2,-3,-1, 0,-1,-3,-2,-2,-4,-1,\n-\t 0,-2, 0,-1,-3,-2,-2, 6,-2,-4,-4,-2,-3,-3,-2, 0,-2,-2,-3,-3,-4,-1,\n-\t-2, 0, 1,-1,-3, 0, 0,-2, 8,-3,-3,-1,-2,-1,-2,-1,-2,-2, 2,-3,-4,-1,\n-\t-1,-3,-3,-3,-1,-3,-3,-4,-3, 4, 2,-3, 1, 0,-3,-2,-1,-3,-1, 3,-4,-1,\n-\t-1,-2,-3,-4,-1,-2,-3,-4,-3, 2, 4,-2, 2, 0,-3,-2,-1,-2,-1, 1,-4,-1,\n-\t-1, 2, 0,-1,-3, 1, 1,-2,-1,-3,-2, 5,-1,-3,-1, 0,-1,-3,-2,-2,-4,-1,\n-\t-1,-1,-2,-3,-1, 0,-2,-3,-2, 1, 2,-1, 5, 0,-2,-1,-1,-1,-1, 1,-4,-1,\n-\t-2,-3,-3,-3,-2,-3,-3,-3,-1, 0, 0,-3, 0, 6,-4,-2,-2, 1, 3,-1,-4,-1,\n-\t-1,-2,-2,-1,-3,-1,-1,-2,-2,-3,-3,-1,-2,-4, 7,-1,-1,-4,-3,-2,-4,-2,\n-\t 1,-1, 1, 0,-1, 0, 0, 0,-1,-2,-2, 0,-1,-2,-1, 4, 1,-3,-2,-2,-4, 0,\n-\t 0,-1, 0,-1,-1,-1,-1,-2,-2,-1,-1,-1,-1,-2,-1, 1, 5,-2,-2, 0,-4, 0,\n-\t-3,-3,-4,-4,-2,-2,-3,-2,-2,-3,-2,-3,-1, 1,-4,-3,-2,11, 2,-3,-4,-2,\n-\t-2,-2,-2,-3,-2,-1,-2,-3, 2,-1,-1,-2,-1, 3,-3,-2,-2, 2, 7,-1,-4,-1,\n-\t 0,-3,-3,-3,-1,-2,-2,-3,-3, 3, 1,-2, 1,-1,-2,-2, 0,-3,-1, 4,-4,-1,\n-\t-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4, 1,-4,\n-\t 0,-1,-1,-1,-2,-1,-1,-1,-1,-1,-1,-1,-1,-1,-2, 0, 0,-2,-1,-1,-4,-1\n-};\n-\n-int aln_sm_blast[] = {\n-\t1, -3, -3, -3, -2,\n-\t-3, 1, -3, -3, -2,\n-\t-3, -3, 1, -3, -2,\n-\t-3, -3, -3, 1, -2,\n-\t-2, -2, -2, -2, -2\n-};\n-\n-ka_param_t ka_param_blast = { 5, 2, 2, aln_sm_blast, 5, 50 };\n-ka_param_t ka_param_aa2aa = { 10, 2, 2, aln_sm_blosum62, 22, 50 };\n-\n-static uint32_t *ka_path2cigar32(const path_t *path, int path_len, int *n_cigar)\n-{\n-\tint i, n;\n-\tuint32_t *cigar;\n-\tunsigned char last_type;\n-\n-\tif (path_len == 0 || path == 0) {\n-\t\t*n_cigar = 0;\n-\t\treturn 0;\n-\t}\n-\n-\tlast_type = path->ctype;\n-\tfor (i = n = 1; i < path_len; ++i) {\n-\t\tif (last_type != path[i].ctype) ++n;\n-\t\tlast_type = path[i].ctype;\n-\t}\n-\t*n_cigar = n;\n-\tcigar = (uint32_t*)calloc(*n_cigar, 4);\n-\n-\tcigar[0] = 1u << 4 | path[path_len-1].ctype;\n-\tlast_type = path[path_len-1].ctype;\n-\tfor (i = path_len - 2, n = 0; i >= 0; --i) {\n-\t\tif (path[i].ctype == last_type) cigar[n] += 1u << 4;\n-\t\telse {\n-\t\t\tcigar[++n] = 1u << 4 | path[i].ctype;\n-\t\t\tlast'..b'= last; last = s;\n-\n-\t/* core dynamic programming, part 1 */\n-\ttmp_end = (b2 < len2)? b2 : len2 - 1;\n-\tfor (j = 1; j <= tmp_end; ++j) {\n-\t\tq = dpcell[j]; s = curr; SET_INF(*s);\n-\t\tset_end_I(s->I, q, last);\n-\t\tend = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;\n-\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n-\t\t++s; ++q;\n-\t\tfor (i = 1; i != end; ++i, ++s, ++q) {\n-\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */\n-\t\t\tset_I(s->I, q, last + i);\n-\t\t\tset_D(s->D, q, s - 1);\n-\t\t}\n-\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n-\t\tset_D(s->D, q, s - 1);\n-\t\tif (j + b1 - 1 > len1) { /* bug fixed, 040227 */\n-\t\t\tset_end_I(s->I, q, last + i);\n-\t\t} else s->I = MINOR_INF;\n-\t\ts = curr; curr = last; last = s;\n-\t}\n-\t/* last row for part 1, use set_end_D() instead of set_D() */\n-\tif (j == len2 && b2 != len2 - 1) {\n-\t\tq = dpcell[j]; s = curr; SET_INF(*s);\n-\t\tset_end_I(s->I, q, last);\n-\t\tend = (j + b1 <= len1 + 1)? (j + b1 - 1) : len1;\n-\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n-\t\t++s; ++q;\n-\t\tfor (i = 1; i != end; ++i, ++s, ++q) {\n-\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]); /* this will change s->M ! */\n-\t\t\tset_I(s->I, q, last + i);\n-\t\t\tset_end_D(s->D, q, s - 1);\n-\t\t}\n-\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n-\t\tset_end_D(s->D, q, s - 1);\n-\t\tif (j + b1 - 1 > len1) { /* bug fixed, 040227 */\n-\t\t\tset_end_I(s->I, q, last + i);\n-\t\t} else s->I = MINOR_INF;\n-\t\ts = curr; curr = last; last = s;\n-\t\t++j;\n-\t}\n-\n-\t/* core dynamic programming, part 2 */\n-\tfor (; j <= len2 - b2 + 1; ++j) {\n-\t\tSET_INF(curr[j - b2]);\n-\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n-\t\tend = j + b1 - 1;\n-\t\tfor (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i != end; ++i, ++s, ++q) {\n-\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n-\t\t\tset_I(s->I, q, last + i);\n-\t\t\tset_D(s->D, q, s - 1);\n-\t\t}\n-\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n-\t\tset_D(s->D, q, s - 1);\n-\t\ts->I = MINOR_INF;\n-\t\ts = curr; curr = last; last = s;\n-\t}\n-\n-\t/* core dynamic programming, part 3 */\n-\tfor (; j < len2; ++j) {\n-\t\tSET_INF(curr[j - b2]);\n-\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n-\t\tfor (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {\n-\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n-\t\t\tset_I(s->I, q, last + i);\n-\t\t\tset_D(s->D, q, s - 1);\n-\t\t}\n-\t\tset_M(s->M, q, last + len1 - 1, mat[seq1[i]]);\n-\t\tset_end_I(s->I, q, last + i);\n-\t\tset_D(s->D, q, s - 1);\n-\t\ts = curr; curr = last; last = s;\n-\t}\n-\t/* last row */\n-\tif (j == len2) {\n-\t\tSET_INF(curr[j - b2]);\n-\t\tmat = score_matrix + seq2[j] * N_MATRIX_ROW;\n-\t\tfor (i = j - b2 + 1, q = dpcell[j] + i, s = curr + i; i < len1; ++i, ++s, ++q) {\n-\t\t\tset_M(s->M, q, last + i - 1, mat[seq1[i]]);\n-\t\t\tset_I(s->I, q, last + i);\n-\t\t\tset_end_D(s->D, q, s - 1);\n-\t\t}\n-\t\tset_M(s->M, q, last + len1 - 1, mat[seq1[i]]);\n-\t\tset_end_I(s->I, q, last + i);\n-\t\tset_end_D(s->D, q, s - 1);\n-\t\ts = curr; curr = last; last = s;\n-\t}\n-\n-\t*_score = last[len1].M;\n-\tif (n_cigar) { /* backtrace */\n-\t\tpath_t *p, *path = (path_t*)malloc(sizeof(path_t) * (len1 + len2 + 2));\n-\t\ti = len1; j = len2;\n-\t\tq = dpcell[j] + i;\n-\t\ts = last + len1;\n-\t\tmax = s->M; type = q->Mt; ctype = FROM_M;\n-\t\tif (s->I > max) { max = s->I; type = q->It; ctype = FROM_I; }\n-\t\tif (s->D > max) { max = s->D; type = q->Dt; ctype = FROM_D; }\n-\n-\t\tp = path;\n-\t\tp->ctype = ctype; p->i = i; p->j = j; /* bug fixed 040408 */\n-\t\t++p;\n-\t\tdo {\n-\t\t\tswitch (ctype) {\n-\t\t\tcase FROM_M: --i; --j; break;\n-\t\t\tcase FROM_I: --j; break;\n-\t\t\tcase FROM_D: --i; break;\n-\t\t\t}\n-\t\t\tq = dpcell[j] + i;\n-\t\t\tctype = type;\n-\t\t\tswitch (type) {\n-\t\t\tcase FROM_M: type = q->Mt; break;\n-\t\t\tcase FROM_I: type = q->It; break;\n-\t\t\tcase FROM_D: type = q->Dt; break;\n-\t\t\t}\n-\t\t\tp->ctype = ctype; p->i = i; p->j = j;\n-\t\t\t++p;\n-\t\t} while (i || j);\n-\t\tcigar = ka_path2cigar32(path, p - path - 1, n_cigar);\n-\t\tfree(path);\n-\t}\n-\n-\t/* free memory */\n-\tfor (j = b2 + 1; j <= len2; ++j)\n-\t\tdpcell[j] += j - b2;\n-\tfor (j = 0; j <= len2; ++j)\n-\t\tfree(dpcell[j]);\n-\tfree(dpcell);\n-\tfree(curr); free(last);\n-\n-\treturn cigar;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/kaln.h --- a/chimerascan/pysam/samtools/kaln.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,55 +0,0 @@ -/* The MIT License - - Copyright (c) 2003-2006, 2008, 2009 by Heng Li <lh3@live.co.uk> - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -#ifndef LH3_KALN_H_ -#define LH3_KALN_H_ - -#include <stdint.h> - -#define MINOR_INF -1073741823 - -typedef struct { - int gap_open; - int gap_ext; - int gap_end; - - int *matrix; - int row; - int band_width; -} ka_param_t; - -#ifdef __cplusplus -extern "C" { -#endif - - uint32_t *ka_global_core(uint8_t *seq1, int len1, uint8_t *seq2, int len2, const ka_param_t *ap, int *_score, int *n_cigar); - -#ifdef __cplusplus -} -#endif - -extern ka_param_t ka_param_blast; /* = { 5, 2, 2, aln_sm_blast, 5, 50 }; */ - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/khash.h --- a/chimerascan/pysam/samtools/khash.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,486 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-/*\n- An example:\n-\n-#include "khash.h"\n-KHASH_MAP_INIT_INT(32, char)\n-int main() {\n-\tint ret, is_missing;\n-\tkhiter_t k;\n-\tkhash_t(32) *h = kh_init(32);\n-\tk = kh_put(32, h, 5, &ret);\n-\tif (!ret) kh_del(32, h, k);\n-\tkh_value(h, k) = 10;\n-\tk = kh_get(32, h, 10);\n-\tis_missing = (k == kh_end(h));\n-\tk = kh_get(32, h, 5);\n-\tkh_del(32, h, k);\n-\tfor (k = kh_begin(h); k != kh_end(h); ++k)\n-\t\tif (kh_exist(h, k)) kh_value(h, k) = 1;\n-\tkh_destroy(32, h);\n-\treturn 0;\n-}\n-*/\n-\n-/*\n- 2008-09-19 (0.2.3):\n-\n-\t* Corrected the example\n-\t* Improved interfaces\n-\n- 2008-09-11 (0.2.2):\n-\n-\t* Improved speed a little in kh_put()\n-\n- 2008-09-10 (0.2.1):\n-\n-\t* Added kh_clear()\n-\t* Fixed a compiling error\n-\n- 2008-09-02 (0.2.0):\n-\n-\t* Changed to token concatenation which increases flexibility.\n-\n- 2008-08-31 (0.1.2):\n-\n-\t* Fixed a bug in kh_get(), which has not been tested previously.\n-\n- 2008-08-31 (0.1.1):\n-\n-\t* Added destructor\n-*/\n-\n-\n-#ifndef __AC_KHASH_H\n-#define __AC_KHASH_H\n-\n-/*!\n- @header\n-\n- Generic hash table library.\n-\n- @copyright Heng Li\n- */\n-\n-#define AC_VERSION_KHASH_H "0.2.2"\n-\n-#include <stdint.h>\n-#include <stdlib.h>\n-#include <string.h>\n-\n-typedef uint32_t khint_t;\n-typedef khint_t khiter_t;\n-\n-#define __ac_HASH_PRIME_SIZE 32\n-static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =\n-{\n- 0ul, 3ul, 11ul, 23ul, 53ul,\n- 97ul, 193ul, 389ul, 769ul, 1543ul,\n- 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,\n- 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,\n- 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,\n- 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,\n- 3221225473ul, 4294967291ul\n-};\n-\n-#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)\n-#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)\n-#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)\n-#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))\n-#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))\n-#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))\n-#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))\n-\n-static const double __ac_HASH_UPPER = 0.77;\n-\n-#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \\\n-\ttypedef struct {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkhint_t n_buckets, size, n_occupied, upper_bound;\t\t\t\t\\\n-\t\tuint32_t *flags;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkhkey_t *keys;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkhval_t *vals;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t} kh_##name##_t;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inline kh_##name##_t *kh_init_##name() {\t\t\t\t\t\t\\\n-\t\treturn (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inlin'..b'e, h, k) kh_get_##name(h, k)\n-\n-/*! @function\n- @abstract Remove a key from the hash table.\n- @param name Name of the hash table [symbol]\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param k Iterator to the element to be deleted [khint_t]\n- */\n-#define kh_del(name, h, k) kh_del_##name(h, k)\n-\n-\n-/*! @function\n- @abstract Test whether a bucket contains data.\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param x Iterator to the bucket [khint_t]\n- @return 1 if containing data; 0 otherwise [int]\n- */\n-#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))\n-\n-/*! @function\n- @abstract Get key given an iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param x Iterator to the bucket [khint_t]\n- @return Key [type of keys]\n- */\n-#define kh_key(h, x) ((h)->keys[x])\n-\n-/*! @function\n- @abstract Get value given an iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param x Iterator to the bucket [khint_t]\n- @return Value [type of values]\n- @discussion For hash sets, calling this results in segfault.\n- */\n-#define kh_val(h, x) ((h)->vals[x])\n-\n-/*! @function\n- @abstract Alias of kh_val()\n- */\n-#define kh_value(h, x) ((h)->vals[x])\n-\n-/*! @function\n- @abstract Get the start iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return The start iterator [khint_t]\n- */\n-#define kh_begin(h) (khint_t)(0)\n-\n-/*! @function\n- @abstract Get the end iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return The end iterator [khint_t]\n- */\n-#define kh_end(h) ((h)->n_buckets)\n-\n-/*! @function\n- @abstract Get the number of elements in the hash table\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return Number of elements in the hash table [khint_t]\n- */\n-#define kh_size(h) ((h)->size)\n-\n-/*! @function\n- @abstract Get the number of buckets in the hash table\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return Number of buckets in the hash table [khint_t]\n- */\n-#define kh_n_buckets(h) ((h)->n_buckets)\n-\n-/* More conenient interfaces */\n-\n-/*! @function\n- @abstract Instantiate a hash set containing integer keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_INT(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing integer keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_INT(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing 64-bit integer keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_INT64(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing 64-bit integer keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_INT64(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)\n-\n-typedef const char *kh_cstr_t;\n-/*! @function\n- @abstract Instantiate a hash map containing const char* keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_STR(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing const char* keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_STR(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)\n-\n-#endif /* __AC_KHASH_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/klist.h --- a/chimerascan/pysam/samtools/klist.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,96 +0,0 @@ -#ifndef _LH3_KLIST_H -#define _LH3_KLIST_H - -#include <stdlib.h> - -#define KMEMPOOL_INIT(name, kmptype_t, kmpfree_f) \ - typedef struct { \ - size_t cnt, n, max; \ - kmptype_t **buf; \ - } kmp_##name##_t; \ - static inline kmp_##name##_t *kmp_init_##name() { \ - return calloc(1, sizeof(kmp_##name##_t)); \ - } \ - static inline void kmp_destroy_##name(kmp_##name##_t *mp) { \ - size_t k; \ - for (k = 0; k < mp->n; ++k) { \ - kmpfree_f(mp->buf[k]); free(mp->buf[k]); \ - } \ - free(mp->buf); free(mp); \ - } \ - static inline kmptype_t *kmp_alloc_##name(kmp_##name##_t *mp) { \ - ++mp->cnt; \ - if (mp->n == 0) return calloc(1, sizeof(kmptype_t)); \ - return mp->buf[--mp->n]; \ - } \ - static inline void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \ - --mp->cnt; \ - if (mp->n == mp->max) { \ - mp->max = mp->max? mp->max<<1 : 16; \ - mp->buf = realloc(mp->buf, sizeof(void*) * mp->max); \ - } \ - mp->buf[mp->n++] = p; \ - } - -#define kmempool_t(name) kmp_##name##_t -#define kmp_init(name) kmp_init_##name() -#define kmp_destroy(name, mp) kmp_destroy_##name(mp) -#define kmp_alloc(name, mp) kmp_alloc_##name(mp) -#define kmp_free(name, mp, p) kmp_free_##name(mp, p) - -#define KLIST_INIT(name, kltype_t, kmpfree_t) \ - struct __kl1_##name { \ - kltype_t data; \ - struct __kl1_##name *next; \ - }; \ - typedef struct __kl1_##name kl1_##name; \ - KMEMPOOL_INIT(name, kl1_##name, kmpfree_t) \ - typedef struct { \ - kl1_##name *head, *tail; \ - kmp_##name##_t *mp; \ - size_t size; \ - } kl_##name##_t; \ - static inline kl_##name##_t *kl_init_##name() { \ - kl_##name##_t *kl = calloc(1, sizeof(kl_##name##_t)); \ - kl->mp = kmp_init(name); \ - kl->head = kl->tail = kmp_alloc(name, kl->mp); \ - kl->head->next = 0; \ - return kl; \ - } \ - static inline void kl_destroy_##name(kl_##name##_t *kl) { \ - kl1_##name *p; \ - for (p = kl->head; p != kl->tail; p = p->next) \ - kmp_free(name, kl->mp, p); \ - kmp_free(name, kl->mp, p); \ - kmp_destroy(name, kl->mp); \ - free(kl); \ - } \ - static inline kltype_t *kl_pushp_##name(kl_##name##_t *kl) { \ - kl1_##name *q, *p = kmp_alloc(name, kl->mp); \ - q = kl->tail; p->next = 0; kl->tail->next = p; kl->tail = p; \ - ++kl->size; \ - return &q->data; \ - } \ - static inline int kl_shift_##name(kl_##name##_t *kl, kltype_t *d) { \ - kl1_##name *p; \ - if (kl->head->next == 0) return -1; \ - --kl->size; \ - p = kl->head; kl->head = kl->head->next; \ - if (d) *d = p->data; \ - kmp_free(name, kl->mp, p); \ - return 0; \ - } - -#define kliter_t(name) kl1_##name -#define klist_t(name) kl_##name##_t -#define kl_val(iter) ((iter)->data) -#define kl_next(iter) ((iter)->next) -#define kl_begin(kl) ((kl)->head) -#define kl_end(kl) ((kl)->tail) - -#define kl_init(name) kl_init_##name() -#define kl_destroy(name, kl) kl_destroy_##name(kl) -#define kl_pushp(name, kl) kl_pushp_##name(kl) -#define kl_shift(name, kl, d) kl_shift_##name(kl, d) - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/knetfile.c --- a/chimerascan/pysam/samtools/knetfile.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,630 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-/* Probably I will not do socket programming in the next few years and\n- therefore I decide to heavily annotate this file, for Linux and\n- Windows as well. -lh3 */\n-\n-#include <time.h>\n-#include <stdio.h>\n-#include <ctype.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <errno.h>\n-#include <unistd.h>\n-#include <sys/types.h>\n-\n-#ifndef _WIN32\n-#include <netdb.h>\n-#include <arpa/inet.h>\n-#include <sys/socket.h>\n-#endif\n-\n-#include "knetfile.h"\n-\n-/* In winsock.h, the type of a socket is SOCKET, which is: "typedef\n- * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed\n- * integer -1. In knetfile.c, I use "int" for socket type\n- * throughout. This should be improved to avoid confusion.\n- *\n- * In Linux/Mac, recv() and read() do almost the same thing. You can see\n- * in the header file that netread() is simply an alias of read(). In\n- * Windows, however, they are different and using recv() is mandatory.\n- */\n-\n-/* This function tests if the file handler is ready for reading (or\n- * writing if is_read==0). */\n-static int socket_wait(int fd, int is_read)\n-{\n-\tfd_set fds, *fdr = 0, *fdw = 0;\n-\tstruct timeval tv;\n-\tint ret;\n-\ttv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out\n-\tFD_ZERO(&fds);\n-\tFD_SET(fd, &fds);\n-\tif (is_read) fdr = &fds;\n-\telse fdw = &fds;\n-\tret = select(fd+1, fdr, fdw, 0, &tv);\n-#ifndef _WIN32\n-\tif (ret == -1) perror("select");\n-#else\n-\tif (ret == 0)\n-\t\tfprintf(stderr, "select time-out\\n");\n-\telse if (ret == SOCKET_ERROR)\n-\t\tfprintf(stderr, "select: %d\\n", WSAGetLastError());\n-#endif\n-\treturn ret;\n-}\n-\n-#ifndef _WIN32\n-/* This function does not work with Windows due to the lack of\n- * getaddrinfo() in winsock. It is addapted from an example in "Beej\'s\n- * Guide to Network Programming" (http://beej.us/guide/bgnet/). */\n-static int socket_connect(const char *host, const char *port)\n-{\n-#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)\n-\n-\tint on = 1, fd;\n-\tstruct linger lng = { 0, 0 };\n-\tstruct addrinfo hints, *res;\n-\tmemset(&hints, 0, sizeof(struct addrinfo));\n-\thints.ai_family = AF_UNSPEC;\n-\thints.ai_socktype = SOCK_STREAM;\n-\t/* In Unix/Mac, getaddrinfo() is the most convenient way to get\n-\t * server information. */\n-\tif (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");\n-\tif ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");\n-\t/* The following two setsockopt() are used by ftplib\n-\t * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they\n-\t * necessary. */\n-\tif (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");\n-\tif (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("setsockopt");\n-\tif (connect('..b'fp->fd == -1) {\n-\t\tknet_close(fp);\n-\t\treturn 0;\n-\t}\n-\treturn fp;\n-}\n-\n-knetFile *knet_dopen(int fd, const char *mode)\n-{\n-\tknetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));\n-\tfp->type = KNF_TYPE_LOCAL;\n-\tfp->fd = fd;\n-\treturn fp;\n-}\n-\n-off_t knet_read(knetFile *fp, void *buf, off_t len)\n-{\n-\toff_t l = 0;\n-\tif (fp->fd == -1) return 0;\n-\tif (fp->type == KNF_TYPE_FTP) {\n-\t\tif (fp->is_ready == 0) {\n-\t\t\tif (!fp->no_reconnect) kftp_reconnect(fp);\n-\t\t\tkftp_connect_file(fp);\n-\t\t}\n-\t} else if (fp->type == KNF_TYPE_HTTP) {\n-\t\tif (fp->is_ready == 0)\n-\t\t\tkhttp_connect_file(fp);\n-\t}\n-\tif (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX\n-\t\toff_t rest = len, curr;\n-\t\twhile (rest) {\n-\t\t\tcurr = read(fp->fd, buf + l, rest);\n-\t\t\tif (curr == 0) break;\n-\t\t\tl += curr; rest -= curr;\n-\t\t}\n-\t} else l = my_netread(fp->fd, buf, len);\n-\tfp->offset += l;\n-\treturn l;\n-}\n-\n-off_t knet_seek(knetFile *fp, int64_t off, int whence)\n-{\n-\tif (whence == SEEK_SET && off == fp->offset) return 0;\n-\tif (fp->type == KNF_TYPE_LOCAL) {\n-\t\t/* Be aware that lseek() returns the offset after seeking,\n-\t\t * while fseek() returns zero on success. */\n-\t\toff_t offset = lseek(fp->fd, off, whence);\n-\t\tif (offset == -1) {\n- // Be silent, it is OK for knet_seek to fail when the file is streamed\n- // fprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n-\t\t\treturn -1;\n-\t\t}\n-\t\tfp->offset = offset;\n-\t\treturn 0;\n-\t}\n- else if (fp->type == KNF_TYPE_FTP) \n- {\n- if (whence==SEEK_CUR)\n- fp->offset += off;\n- else if (whence==SEEK_SET)\n- fp->offset = off;\n- else if ( whence==SEEK_END)\n- fp->offset = fp->file_size+off;\n-\t\tfp->is_ready = 0;\n-\t\treturn 0;\n-\t} \n- else if (fp->type == KNF_TYPE_HTTP) \n- {\n-\t\tif (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?\n-\t\t\tfprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\\n");\n-\t\t\terrno = ESPIPE;\n-\t\t\treturn -1;\n-\t\t}\n- if (whence==SEEK_CUR)\n- fp->offset += off;\n- else if (whence==SEEK_SET)\n- fp->offset = off;\n-\t\tfp->is_ready = 0;\n-\t\treturn 0;\n-\t}\n-\terrno = EINVAL;\n- fprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n-\treturn -1;\n-}\n-\n-int knet_close(knetFile *fp)\n-{\n-\tif (fp == 0) return 0;\n-\tif (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific\n-\tif (fp->fd != -1) {\n-\t\t/* On Linux/Mac, netclose() is an alias of close(), but on\n-\t\t * Windows, it is an alias of closesocket(). */\n-\t\tif (fp->type == KNF_TYPE_LOCAL) close(fp->fd);\n-\t\telse netclose(fp->fd);\n-\t}\n-\tfree(fp->host); free(fp->port);\n-\tfree(fp->response); free(fp->retr); // FTP specific\n-\tfree(fp->path); free(fp->http_host); // HTTP specific\n-\tfree(fp);\n-\treturn 0;\n-}\n-\n-#ifdef KNETFILE_MAIN\n-int main(void)\n-{\n-\tchar *buf;\n-\tknetFile *fp;\n-\tint type = 4, l;\n-#ifdef _WIN32\n-\tknet_win32_init();\n-#endif\n-\tbuf = calloc(0x100000, 1);\n-\tif (type == 0) {\n-\t\tfp = knet_open("knetfile.c", "r");\n-\t\tknet_seek(fp, 1000, SEEK_SET);\n-\t} else if (type == 1) { // NCBI FTP, large file\n-\t\tfp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");\n-\t\tknet_seek(fp, 2500000000ll, SEEK_SET);\n-\t\tl = knet_read(fp, buf, 255);\n-\t} else if (type == 2) {\n-\t\tfp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");\n-\t\tknet_seek(fp, 1000, SEEK_SET);\n-\t} else if (type == 3) {\n-\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");\n-\t\tknet_seek(fp, 1000, SEEK_SET);\n-\t} else if (type == 4) {\n-\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");\n-\t\tknet_read(fp, buf, 10000);\n-\t\tknet_seek(fp, 20000, SEEK_SET);\n-\t\tknet_seek(fp, 10000, SEEK_SET);\n-\t\tl = knet_read(fp, buf+10000, 10000000) + 10000;\n-\t}\n-\tif (type != 4 && type != 1) {\n-\t\tknet_read(fp, buf, 255);\n-\t\tbuf[255] = 0;\n-\t\tprintf("%s\\n", buf);\n-\t} else write(fileno(stdout), buf, l);\n-\tknet_close(fp);\n-\tfree(buf);\n-\treturn 0;\n-}\n-#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/knetfile.h --- a/chimerascan/pysam/samtools/knetfile.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,75 +0,0 @@ -#ifndef KNETFILE_H -#define KNETFILE_H - -#include <stdint.h> -#include <fcntl.h> - -#ifndef _WIN32 -#define netread(fd, ptr, len) read(fd, ptr, len) -#define netwrite(fd, ptr, len) write(fd, ptr, len) -#define netclose(fd) close(fd) -#else -#include <winsock2.h> -#define netread(fd, ptr, len) recv(fd, ptr, len, 0) -#define netwrite(fd, ptr, len) send(fd, ptr, len, 0) -#define netclose(fd) closesocket(fd) -#endif - -// FIXME: currently I/O is unbuffered - -#define KNF_TYPE_LOCAL 1 -#define KNF_TYPE_FTP 2 -#define KNF_TYPE_HTTP 3 - -typedef struct knetFile_s { - int type, fd; - int64_t offset; - char *host, *port; - - // the following are for FTP only - int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; - char *response, *retr, *size_cmd; - int64_t seek_offset; // for lazy seek - int64_t file_size; - - // the following are for HTTP only - char *path, *http_host; -} knetFile; - -#define knet_tell(fp) ((fp)->offset) -#define knet_fileno(fp) ((fp)->fd) - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _WIN32 - int knet_win32_init(); - void knet_win32_destroy(); -#endif - - knetFile *knet_open(const char *fn, const char *mode); - - /* - This only works with local files. - */ - knetFile *knet_dopen(int fd, const char *mode); - - /* - If ->is_ready==0, this routine updates ->fd; otherwise, it simply - reads from ->fd. - */ - off_t knet_read(knetFile *fp, void *buf, off_t len); - - /* - This routine only sets ->offset and ->is_ready=0. It does not - communicate with the FTP server. - */ - off_t knet_seek(knetFile *fp, int64_t off, int whence); - int knet_close(knetFile *fp); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/kseq.h --- a/chimerascan/pysam/samtools/kseq.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,227 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-/*\n- 2009-07-16 (lh3): in kstream_t, change "char*" to "unsigned char*"\n- */\n-\n-/* Last Modified: 12APR2009 */\n-\n-#ifndef AC_KSEQ_H\n-#define AC_KSEQ_H\n-\n-#include <ctype.h>\n-#include <string.h>\n-#include <stdlib.h>\n-\n-#define KS_SEP_SPACE 0 // isspace(): \\t, \\n, \\v, \\f, \\r\n-#define KS_SEP_TAB 1 // isspace() && !\' \'\n-#define KS_SEP_MAX 1\n-\n-#define __KS_TYPE(type_t)\t\t\t\t\t\t\\\n-\ttypedef struct __kstream_t {\t\t\t\t\\\n-\t\tunsigned char *buf;\t\t\t\t\t\t\\\n-\t\tint begin, end, is_eof;\t\t\t\t\t\\\n-\t\ttype_t f;\t\t\t\t\t\t\t\t\\\n-\t} kstream_t;\n-\n-#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)\n-#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)\n-\n-#define __KS_BASIC(type_t, __bufsize)\t\t\t\t\t\t\t\t\\\n-\tstatic inline kstream_t *ks_init(type_t f)\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));\t\\\n-\t\tks->f = f;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tks->buf = malloc(__bufsize);\t\t\t\t\t\t\t\t\\\n-\t\treturn ks;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inline void ks_destroy(kstream_t *ks)\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (ks) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tfree(ks->buf);\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tfree(ks);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-#define __KS_GETC(__read, __bufsize)\t\t\t\t\t\t\\\n-\tstatic inline int ks_getc(kstream_t *ks)\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (ks->is_eof && ks->begin >= ks->end) return -1;\t\\\n-\t\tif (ks->begin >= ks->end) {\t\t\t\t\t\t\t\\\n-\t\t\tks->begin = 0;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tks->end = __read(ks->f, ks->buf, __bufsize);\t\\\n-\t\t\tif (ks->end < __bufsize) ks->is_eof = 1;\t\t\\\n-\t\t\tif (ks->end == 0) return -1;\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn (int)ks->buf[ks->begin++];\t\t\t\t\t\\\n-\t}\n-\n-#ifndef KSTRING_T\n-#define KSTRING_T kstring_t\n-typedef struct __kstring_t {\n-\tsize_t l, m;\n-\tchar *s;\n-} kstring_t;\n-#endif\n-\n-#ifndef kroundup32\n-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))\n-#endif\n-\n-#define __KS_GETUNTIL(__read, __bufsize)\t\t\t\t\t\t\t\t\\\n-\tstatic int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (dret) *dret = 0;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tstr->l = 0;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (ks->begin >= ks->end && ks->is_eof) return -1;\t\t\t\t\\\n-\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tint i;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (ks->begin >= ks->end) {\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (!ks->is_eof) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tks->begin = 0;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tks->end = __read(ks->f, ks->buf, __bufsize);\t\t\\\n-\t\t\t\t\tif (ks->end < __bufsize) ks->is_eof = 1;\t\t\t\\\n-\t\t\t\t\tif (ks->end == 0) break;\t\t\t\t\t\t\t\\\n-\t\t\t\t} else break;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (delimiter > KS_SEP_MAX) {\t\t\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = ks->begin; i < ks->end; ++i)\t\t\t\t\t\\\n-\t\t\t\t\tif (ks->buf[i] == delimiter) break;\t\t\t\t\t\\\n-\t\t\t} else if (delimiter == KS_SEP_SPACE) {\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = ks->begin; i < ks->end; ++i)\t\t\t\t\t\\\n-\t\t\t\t\tif (isspace(ks->buf[i])) break'..b"\t\\\n-\t\t\t\tstr->s = (char*)realloc(str->s, str->m);\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tmemcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \\\n-\t\t\tstr->l = str->l + (i - ks->begin);\t\t\t\t\t\t\t\\\n-\t\t\tks->begin = i + 1;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (i < ks->end) {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (dret) *dret = ks->buf[i];\t\t\t\t\t\t\t\\\n-\t\t\t\tbreak;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (str->l == 0) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tstr->m = 1;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tstr->s = (char*)calloc(1, 1);\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tstr->s[str->l] = '\\0';\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn str->l;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-#define KSTREAM_INIT(type_t, __read, __bufsize) \\\n-\t__KS_TYPE(type_t)\t\t\t\t\t\t\t\\\n-\t__KS_BASIC(type_t, __bufsize)\t\t\t\t\\\n-\t__KS_GETC(__read, __bufsize)\t\t\t\t\\\n-\t__KS_GETUNTIL(__read, __bufsize)\n-\n-#define __KSEQ_BASIC(type_t)\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inline kseq_t *kseq_init(type_t fd)\t\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));\t\t\t\t\t\\\n-\t\ts->f = ks_init(fd);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\treturn s;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inline void kseq_rewind(kseq_t *ks)\t\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tks->last_char = 0;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tks->f->is_eof = ks->f->begin = ks->f->end = 0;\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inline void kseq_destroy(kseq_t *ks)\t\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (!ks) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfree(ks->name.s); free(ks->comment.s); free(ks->seq.s);\tfree(ks->qual.s); \\\n-\t\tks_destroy(ks->f);\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfree(ks);\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-/* Return value:\n- >=0 length of the sequence (normal)\n- -1 end-of-file\n- -2 truncated quality string\n- */\n-#define __KSEQ_READ\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic int kseq_read(kseq_t *seq)\t\t\t\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tint c;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkstream_t *ks = seq->f;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (seq->last_char == 0) { /* then jump to the next header line */ \\\n-\t\t\twhile ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');\t\\\n-\t\t\tif (c == -1) return -1; /* end of file */\t\t\t\t\t\\\n-\t\t\tseq->last_char = c;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t} /* the first header char has been read */\t\t\t\t\t\t\\\n-\t\tseq->comment.l = seq->seq.l = seq->qual.l = 0;\t\t\t\t\t\\\n-\t\tif (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;\t\t\t\\\n-\t\tif (c != '\\n') ks_getuntil(ks, '\\n', &seq->comment, 0);\t\t\t\\\n-\t\twhile ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \\\n-\t\t\tif (isgraph(c)) { /* printable non-space character */\t\t\\\n-\t\t\t\tif (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \\\n-\t\t\t\t\tseq->seq.m = seq->seq.l + 2;\t\t\t\t\t\t\\\n-\t\t\t\t\tkroundup32(seq->seq.m); /* rounded to next closest 2^k */ \\\n-\t\t\t\t\tseq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tseq->seq.s[seq->seq.l++] = (char)c;\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */\t\\\n-\t\tseq->seq.s[seq->seq.l] = 0;\t/* null terminated string */\t\t\\\n-\t\tif (c != '+') return seq->seq.l; /* FASTA */\t\t\t\t\t\\\n-\t\tif (seq->qual.m < seq->seq.m) {\t/* allocate enough memory */\t\\\n-\t\t\tseq->qual.m = seq->seq.m;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tseq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\twhile ((c = ks_getc(ks)) != -1 && c != '\\n'); /* skip the rest of '+' line */ \\\n-\t\tif (c == -1) return -2; /* we should not stop here */\t\t\t\\\n-\t\twhile ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)\t\t\\\n-\t\t\tif (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c;\t\\\n-\t\tseq->qual.s[seq->qual.l] = 0; /* null terminated string */\t\t\\\n-\t\tseq->last_char = 0;\t/* we have not come to the next header line */ \\\n-\t\tif (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \\\n-\t\treturn seq->seq.l;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-#define __KSEQ_TYPE(type_t)\t\t\t\t\t\t\\\n-\ttypedef struct {\t\t\t\t\t\t\t\\\n-\t\tkstring_t name, comment, seq, qual;\t\t\\\n-\t\tint last_char;\t\t\t\t\t\t\t\\\n-\t\tkstream_t *f;\t\t\t\t\t\t\t\\\n-\t} kseq_t;\n-\n-#define KSEQ_INIT(type_t, __read)\t\t\t\t\\\n-\tKSTREAM_INIT(type_t, __read, 4096)\t\t\t\\\n-\t__KSEQ_TYPE(type_t)\t\t\t\t\t\t\t\\\n-\t__KSEQ_BASIC(type_t)\t\t\t\t\t\t\\\n-\t__KSEQ_READ\n-\n-#endif\n" |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/ksort.h --- a/chimerascan/pysam/samtools/ksort.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,271 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-/*\n- 2008-11-16 (0.1.4):\n-\n- * Fixed a bug in introsort() that happens in rare cases.\n-\n- 2008-11-05 (0.1.3):\n-\n- * Fixed a bug in introsort() for complex comparisons.\n-\n-\t* Fixed a bug in mergesort(). The previous version is not stable.\n-\n- 2008-09-15 (0.1.2):\n-\n-\t* Accelerated introsort. On my Mac (not on another Linux machine),\n-\t my implementation is as fast as std::sort on random input.\n-\n-\t* Added combsort and in introsort, switch to combsort if the\n-\t recursion is too deep.\n-\n- 2008-09-13 (0.1.1):\n-\n-\t* Added k-small algorithm\n-\n- 2008-09-05 (0.1.0):\n-\n-\t* Initial version\n-\n-*/\n-\n-#ifndef AC_KSORT_H\n-#define AC_KSORT_H\n-\n-#include <stdlib.h>\n-#include <string.h>\n-\n-typedef struct {\n-\tvoid *left, *right;\n-\tint depth;\n-} ks_isort_stack_t;\n-\n-#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }\n-\n-#define KSORT_INIT(name, type_t, __sort_lt)\t\t\t\t\t\t\t\t\\\n-\tvoid ks_mergesort_##name(size_t n, type_t array[], type_t temp[])\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *a2[2], *a, *b;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tint curr, shift;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ta2[0] = array;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ta2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);\t\t\\\n-\t\tfor (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {\t\t\t\\\n-\t\t\ta = a2[curr]; b = a2[1-curr];\t\t\t\t\t\t\t\t\\\n-\t\t\tif (shift == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\ttype_t *p = b, *i, *eb = a + n;\t\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = a; i < eb; i += 2) {\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (i == eb - 1) *p++ = *i;\t\t\t\t\t\t\t\\\n-\t\t\t\t\telse {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tif (__sort_lt(*(i+1), *i)) {\t\t\t\t\t\\\n-\t\t\t\t\t\t\t*p++ = *(i+1); *p++ = *i;\t\t\t\t\t\\\n-\t\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t*p++ = *i; *p++ = *(i+1);\t\t\t\t\t\\\n-\t\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tsize_t i, step = 1ul<<shift;\t\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = 0; i < n; i += step<<1) {\t\t\t\t\t\t\\\n-\t\t\t\t\ttype_t *p, *j, *k, *ea, *eb;\t\t\t\t\t\t\\\n-\t\t\t\t\tif (n < i + step) {\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tea = a + n; eb = a;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tea = a + i + step;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\teb = a + (n < i + (step<<1)? n : i + (step<<1)); \\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tj = a + i; k = a + i + step; p = b + i;\t\t\t\t\\\n-\t\t\t\t\twhile (j < ea && k < eb) {\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tif (__sort_lt(*k, *j)) *p++ = *k++;\t\t\t\t\\\n-\t\t\t\t\t\telse *p++ = *j++;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\twhile (j < ea) *p++ = *j++;\t\t\t\t\t\t\t\\\n-\t\t\t\t\twhile (k < eb) *p++ = *k++;\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tcurr = 1 - curr;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (curr == 1) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\ttype_t *p = a2[0], *i = a2[1], *eb = array + n;\t\t\t\t\\\n-\t\t\tfor (; p < eb; ++i) *p++ = *i;\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (temp == 0) free(a2[1]);\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tvoid ks_heapadjust_##name(size_t i, size_t n, type_t l[])\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tsize_t k ='..b'\t\t\t\t\t\t\t\t\\\n-\tvoid ks_introsort_##name(size_t n, type_t a[])\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tint d;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tks_isort_stack_t *top, *stack;\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t rp, swap_tmp;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *s, *t, *i, *j, *k;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (n < 1) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\telse if (n == 2) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \\\n-\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfor (d = 2; 1ul<<d < n; ++d);\t\t\t\t\t\t\t\t\t\\\n-\t\tstack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \\\n-\t\ttop = stack; s = a; t = a + (n-1); d <<= 1;\t\t\t\t\t\t\\\n-\t\twhile (1) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (s < t) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (--d == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tks_combsort_##name(t - s + 1, s);\t\t\t\t\t\\\n-\t\t\t\t\tt = s;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tcontinue;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\ti = s; j = t; k = i + ((j-i)>>1) + 1;\t\t\t\t\t\\\n-\t\t\t\tif (__sort_lt(*k, *i)) {\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (__sort_lt(*k, *j)) k = j;\t\t\t\t\t\t\\\n-\t\t\t\t} else k = __sort_lt(*j, *i)? i : j;\t\t\t\t\t\\\n-\t\t\t\trp = *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }\t\\\n-\t\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tdo ++i; while (__sort_lt(*i, rp));\t\t\t\t\t\\\n-\t\t\t\t\tdo --j; while (i <= j && __sort_lt(rp, *j));\t\t\\\n-\t\t\t\t\tif (j <= i) break;\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tswap_tmp = *i; *i = *j; *j = swap_tmp;\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tswap_tmp = *i; *i = *t; *t = swap_tmp;\t\t\t\t\t\\\n-\t\t\t\tif (i-s > t-i) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \\\n-\t\t\t\t\ts = t-i > 16? i+1 : t;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \\\n-\t\t\t\t\tt = i-s > 16? i-1 : s;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (top == stack) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tfree(stack);\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t__ks_insertsort_##name(a, a+n);\t\t\t\t\t\t\\\n-\t\t\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t/* This function is adapted from: http://ndevilla.free.fr/median/ */ \\\n-\t/* 0 <= kk < n */\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\ttype_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *low, *high, *k, *ll, *hh, *mid;\t\t\t\t\t\t\t\\\n-\t\tlow = arr; high = arr + n - 1; k = arr + kk;\t\t\t\t\t\\\n-\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (high <= low) return *k;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (high == low + 1) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n-\t\t\t\treturn *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tmid = low + (high - low) / 2;\t\t\t\t\t\t\t\t\\\n-\t\t\tif (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \\\n-\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n-\t\t\tif (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);\t\\\n-\t\t\tKSORT_SWAP(type_t, *mid, *(low+1));\t\t\t\t\t\t\t\\\n-\t\t\tll = low + 1; hh = high;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tdo ++ll; while (__sort_lt(*ll, *low));\t\t\t\t\t\\\n-\t\t\t\tdo --hh; while (__sort_lt(*low, *hh));\t\t\t\t\t\\\n-\t\t\t\tif (hh < ll) break;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tKSORT_SWAP(type_t, *ll, *hh);\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tKSORT_SWAP(type_t, *low, *hh);\t\t\t\t\t\t\t\t\\\n-\t\t\tif (hh <= k) low = ll;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (hh >= k) high = hh - 1;\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)\n-#define ks_introsort(name, n, a) ks_introsort_##name(n, a)\n-#define ks_combsort(name, n, a) ks_combsort_##name(n, a)\n-#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)\n-#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)\n-#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)\n-#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)\n-\n-#define ks_lt_generic(a, b) ((a) < (b))\n-#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)\n-\n-typedef const char *ksstr_t;\n-\n-#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)\n-#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)\n-\n-#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/kstring.c --- a/chimerascan/pysam/samtools/kstring.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,165 +0,0 @@ -#include <stdarg.h> -#include <stdio.h> -#include <ctype.h> -#include <string.h> -#include <stdint.h> -#include "kstring.h" - -int ksprintf(kstring_t *s, const char *fmt, ...) -{ - va_list ap; - int l; - va_start(ap, fmt); - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'. - va_end(ap); - if (l + 1 > s->m - s->l) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - va_start(ap, fmt); - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); - } - va_end(ap); - s->l += l; - return l; -} - -// s MUST BE a null terminated string; l = strlen(s) -int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) -{ - int i, n, max, last_char, last_start, *offsets, l; - n = 0; max = *_max; offsets = *_offsets; - l = strlen(s); - -#define __ksplit_aux do { \ - if (_offsets) { \ - s[i] = 0; \ - if (n == max) { \ - max = max? max<<1 : 2; \ - offsets = (int*)realloc(offsets, sizeof(int) * max); \ - } \ - offsets[n++] = last_start; \ - } else ++n; \ - } while (0) - - for (i = 0, last_char = last_start = 0; i <= l; ++i) { - if (delimiter == 0) { - if (isspace(s[i]) || s[i] == 0) { - if (isgraph(last_char)) __ksplit_aux; // the end of a field - } else { - if (isspace(last_char) || last_char == 0) last_start = i; - } - } else { - if (s[i] == delimiter || s[i] == 0) { - if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field - } else { - if (last_char == delimiter || last_char == 0) last_start = i; - } - } - last_char = s[i]; - } - *_max = max; *_offsets = offsets; - return n; -} - -/********************** - * Boyer-Moore search * - **********************/ - -// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html -int *ksBM_prep(const uint8_t *pat, int m) -{ - int i, *suff, *prep, *bmGs, *bmBc; - prep = calloc(m + 256, 1); - bmGs = prep; bmBc = prep + m; - { // preBmBc() - for (i = 0; i < 256; ++i) bmBc[i] = m; - for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; - } - suff = calloc(m, sizeof(int)); - { // suffixes() - int f = 0, g; - suff[m - 1] = m; - g = m - 1; - for (i = m - 2; i >= 0; --i) { - if (i > g && suff[i + m - 1 - f] < i - g) - suff[i] = suff[i + m - 1 - f]; - else { - if (i < g) g = i; - f = i; - while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; - suff[i] = f - g; - } - } - } - { // preBmGs() - int j = 0; - for (i = 0; i < m; ++i) bmGs[i] = m; - for (i = m - 1; i >= 0; --i) - if (suff[i] == i + 1) - for (; j < m - 1 - i; ++j) - if (bmGs[j] == m) - bmGs[j] = m - 1 - i; - for (i = 0; i <= m - 2; ++i) - bmGs[m - 1 - suff[i]] = m - 1 - i; - } - free(suff); - return prep; -} - -int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches) -{ - int i, j, *prep, *bmGs, *bmBc; - int *matches = 0, mm = 0, nm = 0; - prep = _prep? _prep : ksBM_prep(pat, m); - bmGs = prep; bmBc = prep + m; - j = 0; - while (j <= n - m) { - for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); - if (i < 0) { - if (nm == mm) { - mm = mm? mm<<1 : 1; - matches = realloc(matches, mm * sizeof(int)); - } - matches[nm++] = j; - j += bmGs[0]; - } else { - int max = bmBc[str[i+j]] - m + 1 + i; - if (max < bmGs[i]) max = bmGs[i]; - j += max; - } - } - *n_matches = nm; - if (_prep == 0) free(prep); - return matches; -} - -#ifdef KSTRING_MAIN -#include <stdio.h> -int main() -{ - kstring_t *s; - int *fields, n, i; - s = (kstring_t*)calloc(1, sizeof(kstring_t)); - // test ksprintf() - ksprintf(s, " abcdefg: %d ", 100); - printf("'%s'\n", s->s); - // test ksplit() - fields = ksplit(s, 0, &n); - for (i = 0; i < n; ++i) - printf("field[%d] = '%s'\n", i, s->s + fields[i]); - free(s); - - { - static char *str = "abcdefgcdg"; - static char *pat = "cd"; - int n, *matches; - matches = ksBM_search(str, strlen(str), pat, strlen(pat), 0, &n); - printf("%d: \n", n); - for (i = 0; i < n; ++i) - printf("- %d\n", matches[i]); - free(matches); - } - return 0; -} -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/kstring.h --- a/chimerascan/pysam/samtools/kstring.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,102 +0,0 @@ -#ifndef KSTRING_H -#define KSTRING_H - -#include <stdlib.h> -#include <string.h> -#include <stdint.h> - -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -#ifndef KSTRING_T -#define KSTRING_T kstring_t -typedef struct __kstring_t { - size_t l, m; - char *s; -} kstring_t; -#endif - -int ksprintf(kstring_t *s, const char *fmt, ...); -int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); - -// calculate the auxiliary array, allocated by calloc() -int *ksBM_prep(const uint8_t *pat, int m); - -/* Search pat in str and returned the list of matches. The size of the - * list is returned as n_matches. _prep is the array returned by - * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */ -int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches); - -static inline int kputsn(const char *p, int l, kstring_t *s) -{ - if (s->l + l + 1 >= s->m) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - strncpy(s->s + s->l, p, l); - s->l += l; - s->s[s->l] = 0; - return l; -} - -static inline int kputs(const char *p, kstring_t *s) -{ - return kputsn(p, strlen(p), s); -} - -static inline int kputc(int c, kstring_t *s) -{ - if (s->l + 1 >= s->m) { - s->m = s->l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - s->s[s->l++] = c; - s->s[s->l] = 0; - return c; -} - -static inline int kputw(int c, kstring_t *s) -{ - char buf[16]; - int l, x; - if (c == 0) return kputc('0', s); - for (l = 0, x = c < 0? -c : c; x > 0; x /= 10) buf[l++] = x%10 + '0'; - if (c < 0) buf[l++] = '-'; - if (s->l + l + 1 >= s->m) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - for (x = l - 1; x >= 0; --x) s->s[s->l++] = buf[x]; - s->s[s->l] = 0; - return 0; -} - -static inline int kputuw(unsigned c, kstring_t *s) -{ - char buf[16]; - int l, i; - unsigned x; - if (c == 0) return kputc('0', s); - for (l = 0, x = c; x > 0; x /= 10) buf[l++] = x%10 + '0'; - if (s->l + l + 1 >= s->m) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i]; - s->s[s->l] = 0; - return 0; -} - -static inline int *ksplit(kstring_t *s, int delimiter, int *n) -{ - int max = 0, *offsets = 0; - *n = ksplit_core(s->s, delimiter, &max, &offsets); - return offsets; -} - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/razf.c --- a/chimerascan/pysam/samtools/razf.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,853 +0,0 @@\n-/*\n- * RAZF : Random Access compressed(Z) File\n- * Version: 1.0\n- * Release Date: 2008-10-27\n- *\n- * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk>\n- *\n- * All rights reserved.\n- *\n- * Redistribution and use in source and binary forms, with or without\n- * modification, are permitted provided that the following conditions\n- * are met:\n- * 1. Redistributions of source code must retain the above copyright\n- * notice, this list of conditions and the following disclaimer.\n- * 2. Redistributions in binary form must reproduce the above copyright\n- * notice, this list of conditions and the following disclaimer in the\n- * documentation and/or other materials provided with the distribution.\n- *\n- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS\'\' AND\n- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE\n- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS\n- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)\n- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF\n- * SUCH DAMAGE.\n- */\n-\n-#ifndef _NO_RAZF\n-\n-#include <fcntl.h>\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <unistd.h>\n-#include "razf.h"\n-\n-\n-#if ZLIB_VERNUM < 0x1221\n-struct _gz_header_s {\n- int text;\n- uLong time;\n- int xflags;\n- int os;\n- Bytef *extra;\n- uInt extra_len;\n- uInt extra_max;\n- Bytef *name;\n- uInt name_max;\n- Bytef *comment;\n- uInt comm_max;\n- int hcrc;\n- int done;\n-};\n-#warning "zlib < 1.2.2.1; RAZF writing is disabled."\n-#endif\n-\n-#define DEF_MEM_LEVEL 8\n-\n-static inline uint32_t byte_swap_4(uint32_t v){\n-\tv = ((v & 0x0000FFFFU) << 16) | (v >> 16);\n-\treturn ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);\n-}\n-\n-static inline uint64_t byte_swap_8(uint64_t v){\n-\tv = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);\n-\tv = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);\n-\treturn ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);\n-}\n-\n-static inline int is_big_endian(){\n-\tint x = 0x01;\n-\tchar *c = (char*)&x;\n-\treturn (c[0] != 0x01);\n-}\n-\n-#ifndef _RZ_READONLY\n-static void add_zindex(RAZF *rz, int64_t in, int64_t out){\n-\tif(rz->index->size == rz->index->cap){\n-\t\trz->index->cap = rz->index->cap * 1.5 + 2;\n-\t\trz->index->cell_offsets = realloc(rz->index->cell_offsets, sizeof(int) * rz->index->cap);\n-\t\trz->index->bin_offsets = realloc(rz->index->bin_offsets, sizeof(int64_t) * (rz->index->cap/RZ_BIN_SIZE + 1));\n-\t}\n-\tif(rz->index->size % RZ_BIN_SIZE == 0) rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE] = out;\n-\trz->index->cell_offsets[rz->index->size] = out - rz->index->bin_offsets[rz->index->size / RZ_BIN_SIZE];\n-\trz->index->size ++;\n-}\n-\n-static void save_zindex(RAZF *rz, int fd){\n-\tint32_t i, v32;\n-\tint is_be;\n-\tis_be = is_big_endian();\n-\tif(is_be) write(fd, &rz->index->size, sizeof(int));\n-\telse {\n-\t\tv32 = byte_swap_4((uint32_t)rz->index->size);\n-\t\twrite(fd, &v32, sizeof(uint32_t));\n-\t}\n-\tv32 = rz->index->size / RZ_BIN_SIZE + 1;\n-\tif(!is_be){\n-\t\tfor(i=0;i<v32;i++) rz->index->bin_offsets[i] = byte_swap_8((uint64_t)rz->index->bin_offsets[i]);\n-\t\tfor(i=0;i<rz->index->size;i++) rz->index->cell_offsets[i] = byte_swap_4((uint32_t)rz->index->cell_offsets[i]);\n-\t}\n-\twrite(fd, rz->index->bin_offsets, sizeof(int64_t) * v32);\n-\twrite(fd, rz->index->cell_offsets, sizeof(int32_t) * rz->index->size);\n-}\n-#endif\n-\n-#ifdef _USE_KNETFILE\n-static void load_zindex(RAZF *rz, knetFile *fp'..b'E_TYPE_PLAIN){\n-\t\trz->buf_off = rz->buf_len = 0;\n-\t\tpos = block_start + block_offset;\n-#ifdef _USE_KNETFILE\n-\t\tknet_seek(rz->x.fpr, pos, SEEK_SET);\n- pos = knet_tell(rz->x.fpr);\n-#else\n-\t\tpos = lseek(rz->filedes, pos, SEEK_SET);\n-#endif\n-\t\trz->out = rz->in = pos;\n-\t\treturn pos;\n-\t}\n-\tif(block_start == rz->block_pos && block_offset >= rz->block_off) {\n-\t\tblock_offset -= rz->block_off;\n-\t\tgoto SKIP; // Needn\'t reset inflate\n-\t}\n-\tif(block_start == 0) block_start = rz->header_size; // Automaticly revist wrong block_start\n-\t_razf_reset_read(rz, block_start, 0);\n-\tSKIP:\n-\tif(block_offset) razf_skip(rz, block_offset);\n-\treturn rz->block_off;\n-}\n-\n-int64_t razf_seek(RAZF* rz, int64_t pos, int where){\n-\tint64_t idx;\n-\tint64_t seek_pos, new_out;\n-\trz->z_eof = 0;\n-\tif (where == SEEK_CUR) pos += rz->out;\n-\telse if (where == SEEK_END) pos += rz->src_end;\n-\tif(rz->file_type == FILE_TYPE_PLAIN){\n-#ifdef _USE_KNETFILE\n-\t\tknet_seek(rz->x.fpr, pos, SEEK_SET);\n- seek_pos = knet_tell(rz->x.fpr);\n-#else\n-\t\tseek_pos = lseek(rz->filedes, pos, SEEK_SET);\n-#endif\n-\t\trz->buf_off = rz->buf_len = 0;\n-\t\trz->out = rz->in = seek_pos;\n-\t\treturn seek_pos;\n-\t} else if(rz->file_type == FILE_TYPE_GZ){\n-\t\tif(pos >= rz->out) goto SKIP;\n-\t\treturn rz->out;\n-\t}\n-\tif(pos == rz->out) return pos;\n-\tif(pos > rz->src_end) return rz->out;\n-\tif(!rz->seekable || !rz->load_index){\n-\t\tif(pos >= rz->out) goto SKIP;\n-\t}\n-\tidx = pos / RZ_BLOCK_SIZE - 1;\n-\tseek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);\n-\tnew_out = (idx + 1) * RZ_BLOCK_SIZE;\n-\tif(pos > rz->out && new_out <= rz->out) goto SKIP;\n-\t_razf_reset_read(rz, seek_pos, new_out);\n-\tSKIP:\n-\trazf_skip(rz, (int)(pos - rz->out));\n-\treturn rz->out;\n-}\n-\n-uint64_t razf_tell2(RAZF *rz)\n-{\n-\t/*\n-\tif (rz->load_index) {\n-\t\tint64_t idx, seek_pos;\n-\t\tidx = rz->out / RZ_BLOCK_SIZE - 1;\n-\t\tseek_pos = (idx < 0)? rz->header_size:(rz->index->cell_offsets[idx] + rz->index->bin_offsets[idx / RZ_BIN_SIZE]);\n-\t\tif (seek_pos != rz->block_pos || rz->out%RZ_BLOCK_SIZE != rz->block_off)\n-\t\t\tfprintf(stderr, "[razf_tell2] inconsistent block offset: (%lld, %lld) != (%lld, %lld)\\n",\n-\t\t\t\t\t(long long)seek_pos, (long long)rz->out%RZ_BLOCK_SIZE, (long long)rz->block_pos, (long long) rz->block_off);\n-\t}\n-\t*/\n-\treturn (uint64_t)rz->block_pos<<16 | (rz->block_off&0xffff);\n-}\n-\n-int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where)\n-{\n-\tif (where != SEEK_SET) return -1;\n-\treturn razf_jump(rz, voffset>>16, voffset&0xffff);\n-}\n-\n-void razf_close(RAZF *rz){\n-\tif(rz->mode == \'w\'){\n-#ifndef _RZ_READONLY\n-\t\trazf_end_flush(rz);\n-\t\tdeflateEnd(rz->stream);\n-#ifdef _USE_KNETFILE\n-\t\tsave_zindex(rz, rz->x.fpw);\n-\t\tif(is_big_endian()){\n-\t\t\twrite(rz->x.fpw, &rz->in, sizeof(int64_t));\n-\t\t\twrite(rz->x.fpw, &rz->out, sizeof(int64_t));\n-\t\t} else {\n-\t\t\tuint64_t v64 = byte_swap_8((uint64_t)rz->in);\n-\t\t\twrite(rz->x.fpw, &v64, sizeof(int64_t));\n-\t\t\tv64 = byte_swap_8((uint64_t)rz->out);\n-\t\t\twrite(rz->x.fpw, &v64, sizeof(int64_t));\n-\t\t}\n-#else\n-\t\tsave_zindex(rz, rz->filedes);\n-\t\tif(is_big_endian()){\n-\t\t\twrite(rz->filedes, &rz->in, sizeof(int64_t));\n-\t\t\twrite(rz->filedes, &rz->out, sizeof(int64_t));\n-\t\t} else {\n-\t\t\tuint64_t v64 = byte_swap_8((uint64_t)rz->in);\n-\t\t\twrite(rz->filedes, &v64, sizeof(int64_t));\n-\t\t\tv64 = byte_swap_8((uint64_t)rz->out);\n-\t\t\twrite(rz->filedes, &v64, sizeof(int64_t));\n-\t\t}\n-#endif\n-#endif\n-\t} else if(rz->mode == \'r\'){\n-\t\tif(rz->stream) inflateEnd(rz->stream);\n-\t}\n-\tif(rz->inbuf) free(rz->inbuf);\n-\tif(rz->outbuf) free(rz->outbuf);\n-\tif(rz->header){\n-\t\tfree(rz->header->extra);\n-\t\tfree(rz->header->name);\n-\t\tfree(rz->header->comment);\n-\t\tfree(rz->header);\n-\t}\n-\tif(rz->index){\n-\t\tfree(rz->index->bin_offsets);\n-\t\tfree(rz->index->cell_offsets);\n-\t\tfree(rz->index);\n-\t}\n-\tfree(rz->stream);\n-#ifdef _USE_KNETFILE\n- if (rz->mode == \'r\')\n- knet_close(rz->x.fpr);\n- if (rz->mode == \'w\')\n- close(rz->x.fpw);\n-#else\n-\tclose(rz->filedes);\n-#endif\n-\tfree(rz);\n-}\n-\n-#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/razf.h --- a/chimerascan/pysam/samtools/razf.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,134 +0,0 @@ - /*- - * RAZF : Random Access compressed(Z) File - * Version: 1.0 - * Release Date: 2008-10-27 - * - * Copyright 2008, Jue Ruan <ruanjue@gmail.com>, Heng Li <lh3@sanger.ac.uk> - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - - -#ifndef __RAZF_RJ_H -#define __RAZF_RJ_H - -#include <stdint.h> -#include <stdio.h> -#include "zlib.h" - -#ifdef _USE_KNETFILE -#include "knetfile.h" -#endif - -#if ZLIB_VERNUM < 0x1221 -#define _RZ_READONLY -struct _gz_header_s; -typedef struct _gz_header_s _gz_header; -#define gz_header _gz_header -#endif - -#define WINDOW_BITS 15 - -#ifndef RZ_BLOCK_SIZE -#define RZ_BLOCK_SIZE (1<<WINDOW_BITS) -#endif - -#ifndef RZ_BUFFER_SIZE -#define RZ_BUFFER_SIZE 4096 -#endif - -#ifndef RZ_COMPRESS_LEVEL -#define RZ_COMPRESS_LEVEL 6 -#endif - -#define RZ_BIN_SIZE ((1LLU << 32) / RZ_BLOCK_SIZE) - -typedef struct { - uint32_t *cell_offsets; // i - int64_t *bin_offsets; // i / BIN_SIZE - int size; - int cap; -} ZBlockIndex; -/* When storing index, output bytes in Big-Endian everywhere */ - -#define FILE_TYPE_RZ 1 -#define FILE_TYPE_PLAIN 2 -#define FILE_TYPE_GZ 3 - -typedef struct RandomAccessZFile { - char mode; /* 'w' : write mode; 'r' : read mode */ - int file_type; - /* plain file or rz file, razf_read support plain file as input too, in this case, razf_read work as buffered fread */ -#ifdef _USE_KNETFILE - union { - knetFile *fpr; - int fpw; - } x; -#else - int filedes; /* the file descriptor */ -#endif - z_stream *stream; - ZBlockIndex *index; - int64_t in, out, end, src_end; - /* in: n bytes total in; out: n bytes total out; */ - /* end: the end of all data blocks, while the start of index; src_end: the true end position in uncompressed file */ - int buf_flush; // buffer should be flush, suspend inflate util buffer is empty - int64_t block_pos, block_off, next_block_pos; - /* block_pos: the start postiion of current block in compressed file */ - /* block_off: tell how many bytes have been read from current block */ - void *inbuf, *outbuf; - int header_size; - gz_header *header; - /* header is used to transfer inflate_state->mode from HEAD to TYPE after call inflateReset */ - int buf_off, buf_len; - int z_err, z_eof; - int seekable; - /* Indice where the source is seekable */ - int load_index; - /* set has_index to 0 in mode 'w', then index will be discarded */ -} RAZF; - -#ifdef __cplusplus -extern "C" { -#endif - - RAZF* razf_dopen(int data_fd, const char *mode); - RAZF *razf_open(const char *fn, const char *mode); - int razf_write(RAZF* rz, const void *data, int size); - int razf_read(RAZF* rz, void *data, int size); - int64_t razf_seek(RAZF* rz, int64_t pos, int where); - void razf_close(RAZF* rz); - -#define razf_tell(rz) ((rz)->out) - - RAZF* razf_open2(const char *filename, const char *mode); - RAZF* razf_dopen2(int fd, const char *mode); - uint64_t razf_tell2(RAZF *rz); - int64_t razf_seek2(RAZF *rz, uint64_t voffset, int where); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/sam.c --- a/chimerascan/pysam/samtools/sam.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,175 +0,0 @@ -#include <string.h> -#include <unistd.h> -#include "faidx.h" -#include "sam.h" - -#define TYPE_BAM 1 -#define TYPE_READ 2 - -bam_header_t *bam_header_dup(const bam_header_t *h0) -{ - bam_header_t *h; - int i; - h = bam_header_init(); - *h = *h0; - h->hash = h->dict = h->rg2lib = 0; - h->text = (char*)calloc(h->l_text + 1, 1); - memcpy(h->text, h0->text, h->l_text); - h->target_len = (uint32_t*)calloc(h->n_targets, 4); - h->target_name = (char**)calloc(h->n_targets, sizeof(void*)); - for (i = 0; i < h->n_targets; ++i) { - h->target_len[i] = h0->target_len[i]; - h->target_name[i] = strdup(h0->target_name[i]); - } - return h; -} -static void append_header_text(bam_header_t *header, char* text, int len) -{ - int x = header->l_text + 1; - int y = header->l_text + len + 1; // 1 byte null - if (text == 0) return; - kroundup32(x); - kroundup32(y); - if (x < y) header->text = (char*)realloc(header->text, y); - strncpy(header->text + header->l_text, text, len); // we cannot use strcpy() here. - header->l_text += len; - header->text[header->l_text] = 0; -} - -samfile_t *samopen(const char *fn, const char *mode, const void *aux) -{ - samfile_t *fp; - fp = (samfile_t*)calloc(1, sizeof(samfile_t)); - if (mode[0] == 'r') { // read - fp->type |= TYPE_READ; - if (mode[1] == 'b') { // binary - fp->type |= TYPE_BAM; - fp->x.bam = strcmp(fn, "-")? bam_open(fn, "r") : bam_dopen(fileno(stdin), "r"); - if (fp->x.bam == 0) goto open_err_ret; - fp->header = bam_header_read(fp->x.bam); - } else { // text - fp->x.tamr = sam_open(fn); - if (fp->x.tamr == 0) goto open_err_ret; - fp->header = sam_header_read(fp->x.tamr); - if (fp->header->n_targets == 0) { // no @SQ fields - if (aux) { // check if aux is present - bam_header_t *textheader = fp->header; - fp->header = sam_header_read2((const char*)aux); - if (fp->header == 0) goto open_err_ret; - append_header_text(fp->header, textheader->text, textheader->l_text); - bam_header_destroy(textheader); - } - if (fp->header->n_targets == 0) - fprintf(stderr, "[samopen] no @SQ lines in the header.\n"); - } else fprintf(stderr, "[samopen] SAM header is present: %d sequences.\n", fp->header->n_targets); - } - } else if (mode[0] == 'w') { // write - fp->header = bam_header_dup((const bam_header_t*)aux); - if (mode[1] == 'b') { // binary - char bmode[3]; - bmode[0] = 'w'; bmode[1] = strstr(mode, "u")? 'u' : 0; bmode[2] = 0; - fp->type |= TYPE_BAM; - fp->x.bam = strcmp(fn, "-")? bam_open(fn, bmode) : bam_dopen(fileno(stdout), bmode); - if (fp->x.bam == 0) goto open_err_ret; - bam_header_write(fp->x.bam, fp->header); - } else { // text - // open file - fp->x.tamw = strcmp(fn, "-")? fopen(fn, "w") : stdout; - if (fp->x.tamr == 0) goto open_err_ret; - if (strstr(mode, "X")) fp->type |= BAM_OFSTR<<2; - else if (strstr(mode, "x")) fp->type |= BAM_OFHEX<<2; - else fp->type |= BAM_OFDEC<<2; - // write header - if (strstr(mode, "h")) { - int i; - bam_header_t *alt; - // parse the header text - alt = bam_header_init(); - alt->l_text = fp->header->l_text; alt->text = fp->header->text; - sam_header_parse(alt); - alt->l_text = 0; alt->text = 0; - // check if there are @SQ lines in the header - fwrite(fp->header->text, 1, fp->header->l_text, fp->x.tamw); - if (alt->n_targets) { // then write the header text without dumping ->target_{name,len} - if (alt->n_targets != fp->header->n_targets) - fprintf(stderr, "[samopen] inconsistent number of target sequences.\n"); - } else { // then dump ->target_{name,len} - for (i = 0; i < fp->header->n_targets; ++i) - fprintf(fp->x.tamw, "@SQ\tSN:%s\tLN:%d\n", fp->header->target_name[i], fp->header->target_len[i]); - } - bam_header_destroy(alt); - } - } - } - return fp; - -open_err_ret: - free(fp); - return 0; -} - -void samclose(samfile_t *fp) -{ - if (fp == 0) return; - if (fp->header) bam_header_destroy(fp->header); - if (fp->type & TYPE_BAM) bam_close(fp->x.bam); - else if (fp->type & TYPE_READ) sam_close(fp->x.tamr); - else fclose(fp->x.tamw); - free(fp); -} - -int samread(samfile_t *fp, bam1_t *b) -{ - if (fp == 0 || !(fp->type & TYPE_READ)) return -1; // not open for reading - if (fp->type & TYPE_BAM) return bam_read1(fp->x.bam, b); - else return sam_read1(fp->x.tamr, fp->header, b); -} - -int samwrite(samfile_t *fp, const bam1_t *b) -{ - if (fp == 0 || (fp->type & TYPE_READ)) return -1; // not open for writing - if (fp->type & TYPE_BAM) return bam_write1(fp->x.bam, b); - else { - char *s = bam_format1_core(fp->header, b, fp->type>>2&3); - int l = strlen(s); - fputs(s, fp->x.tamw); fputc('\n', fp->x.tamw); - free(s); - return l + 1; - } -} - -int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *func_data) -{ - bam_plbuf_t *buf; - int ret; - bam1_t *b; - b = bam_init1(); - buf = bam_plbuf_init(func, func_data); - bam_plbuf_set_mask(buf, mask); - while ((ret = samread(fp, b)) >= 0) - bam_plbuf_push(b, buf); - bam_plbuf_push(0, buf); - bam_plbuf_destroy(buf); - bam_destroy1(b); - return 0; -} - -char *samfaipath(const char *fn_ref) -{ - char *fn_list = 0; - if (fn_ref == 0) return 0; - fn_list = calloc(strlen(fn_ref) + 5, 1); - strcat(strcpy(fn_list, fn_ref), ".fai"); - if (access(fn_list, R_OK) == -1) { // fn_list is unreadable - if (access(fn_ref, R_OK) == -1) { - fprintf(stderr, "[samfaipath] fail to read file %s.\n", fn_ref); - } else { - fprintf(stderr, "[samfaipath] build FASTA index...\n"); - if (fai_build(fn_ref) == -1) { - fprintf(stderr, "[samfaipath] fail to build FASTA index.\n"); - free(fn_list); fn_list = 0; - } - } - } - return fn_list; -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/sam.h --- a/chimerascan/pysam/samtools/sam.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,98 +0,0 @@ -#ifndef BAM_SAM_H -#define BAM_SAM_H - -#include "bam.h" - -/*! - @header - - This file provides higher level of I/O routines and unifies the APIs - for SAM and BAM formats. These APIs are more convenient and - recommended. - - @copyright Genome Research Ltd. - */ - -/*! @typedef - @abstract SAM/BAM file handler - @field type type of the handler; bit 1 for BAM, 2 for reading and bit 3-4 for flag format - @field bam BAM file handler; valid if (type&1) == 1 - @field tamr SAM file handler for reading; valid if type == 2 - @field tamw SAM file handler for writing; valid if type == 0 - @field header header struct - */ -typedef struct { - int type; - union { - tamFile tamr; - bamFile bam; - FILE *tamw; - } x; - bam_header_t *header; -} samfile_t; - -#ifdef __cplusplus -extern "C" { -#endif - - /*! - @abstract Open a SAM/BAM file - - @param fn SAM/BAM file name; "-" is recognized as stdin (for - reading) or stdout (for writing). - - @param mode open mode /[rw](b?)(u?)(h?)([xX]?)/: 'r' for reading, - 'w' for writing, 'b' for BAM I/O, 'u' for uncompressed BAM output, - 'h' for outputing header in SAM, 'x' for HEX flag and 'X' for - string flag. If 'b' present, it must immediately follow 'r' or - 'w'. Valid modes are "r", "w", "wh", "wx", "whx", "wX", "whX", - "rb", "wb" and "wbu" exclusively. - - @param aux auxiliary data; if mode[0]=='w', aux points to - bam_header_t; if strcmp(mode, "rb")!=0 and @SQ header lines in SAM - are absent, aux points the file name of the list of the reference; - aux is not used otherwise. If @SQ header lines are present in SAM, - aux is not used, either. - - @return SAM/BAM file handler - */ - samfile_t *samopen(const char *fn, const char *mode, const void *aux); - - /*! - @abstract Close a SAM/BAM handler - @param fp file handler to be closed - */ - void samclose(samfile_t *fp); - - /*! - @abstract Read one alignment - @param fp file handler - @param b alignment - @return bytes read - */ - int samread(samfile_t *fp, bam1_t *b); - - /*! - @abstract Write one alignment - @param fp file handler - @param b alignment - @return bytes written - */ - int samwrite(samfile_t *fp, const bam1_t *b); - - /*! - @abstract Get the pileup for a whole alignment file - @param fp file handler - @param mask mask transferred to bam_plbuf_set_mask() - @param func user defined function called in the pileup process - #param data user provided data for func() - */ - int sampileup(samfile_t *fp, int mask, bam_pileup_f func, void *data); - - char *samfaipath(const char *fn_ref); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/sam_header.c --- a/chimerascan/pysam/samtools/sam_header.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,735 +0,0 @@\n-#include "sam_header.h"\n-#include <stdio.h>\n-#include <string.h>\n-#include <ctype.h>\n-#include <stdlib.h>\n-#include <stdarg.h>\n-\n-#include "khash.h"\n-KHASH_MAP_INIT_STR(str, const char *)\n-\n-struct _HeaderList\n-{\n- struct _HeaderList *last; // Hack: Used and maintained only by list_append_to_end. Maintained in the root node only.\n- struct _HeaderList *next;\n- void *data;\n-};\n-typedef struct _HeaderList list_t;\n-typedef list_t HeaderDict;\n-\n-typedef struct\n-{\n- char key[2];\n- char *value;\n-}\n-HeaderTag;\n-\n-typedef struct\n-{\n- char type[2];\n- list_t *tags;\n-}\n-HeaderLine;\n-\n-const char *o_hd_tags[] = {"SO","GO",NULL};\n-const char *r_hd_tags[] = {"VN",NULL};\n-\n-const char *o_sq_tags[] = {"AS","M5","UR","SP",NULL};\n-const char *r_sq_tags[] = {"SN","LN",NULL};\n-const char *u_sq_tags[] = {"SN",NULL};\n-\n-const char *o_rg_tags[] = {"LB","DS","PU","PI","CN","DT","PL",NULL};\n-const char *r_rg_tags[] = {"ID",NULL};\n-const char *u_rg_tags[] = {"ID",NULL};\n-\n-const char *o_pg_tags[] = {"VN","CL",NULL};\n-const char *r_pg_tags[] = {"ID",NULL};\n-\n-const char *types[] = {"HD","SQ","RG","PG","CO",NULL};\n-const char **optional_tags[] = {o_hd_tags,o_sq_tags,o_rg_tags,o_pg_tags,NULL,NULL};\n-const char **required_tags[] = {r_hd_tags,r_sq_tags,r_rg_tags,r_pg_tags,NULL,NULL};\n-const char **unique_tags[] = {NULL, u_sq_tags,u_rg_tags,NULL,NULL,NULL};\n-\n-\n-static void debug(const char *format, ...)\n-{\n- va_list ap;\n- va_start(ap, format);\n- vfprintf(stderr, format, ap);\n- va_end(ap);\n-}\n-\n-#if 0\n-// Replaced by list_append_to_end\n-static list_t *list_prepend(list_t *root, void *data)\n-{\n- list_t *l = malloc(sizeof(list_t));\n- l->next = root;\n- l->data = data;\n- return l;\n-}\n-#endif\n-\n-// Relies on the root->last being correct. Do not use with the other list_*\n-// routines unless they are fixed to modify root->last as well.\n-static list_t *list_append_to_end(list_t *root, void *data)\n-{\n- list_t *l = malloc(sizeof(list_t));\n- l->last = l;\n- l->next = NULL;\n- l->data = data;\n-\n- if ( !root )\n- return l;\n-\n- root->last->next = l;\n- root->last = l;\n- return root;\n-}\n-\n-static list_t *list_append(list_t *root, void *data)\n-{\n- list_t *l = root;\n- while (l && l->next)\n- l = l->next;\n- if ( l ) \n- {\n- l->next = malloc(sizeof(list_t));\n- l = l->next;\n- }\n- else\n- {\n- l = malloc(sizeof(list_t));\n- root = l;\n- }\n- l->data = data;\n- l->next = NULL;\n- return root;\n-}\n-\n-static void list_free(list_t *root)\n-{\n- list_t *l = root;\n- while (root)\n- {\n- l = root;\n- root = root->next;\n- free(l);\n- }\n-}\n-\n-\n-\n-// Look for a tag "XY" in a predefined const char *[] array.\n-static int tag_exists(const char *tag, const char **tags)\n-{\n- int itag=0;\n- if ( !tags ) return -1;\n- while ( tags[itag] )\n- {\n- if ( tags[itag][0]==tag[0] && tags[itag][1]==tag[1] ) return itag; \n- itag++;\n- }\n- return -1;\n-}\n-\n-\n-\n-// Mimics the behaviour of getline, except it returns pointer to the next chunk of the text\n-// or NULL if everything has been read. The lineptr should be freed by the caller. The\n-// newline character is stripped.\n-static const char *nextline(char **lineptr, size_t *n, const char *text)\n-{\n- int len;\n- const char *to = text;\n-\n- if ( !*to ) return NULL;\n-\n- while ( *to && *to!=\'\\n\' && *to!=\'\\r\' ) to++;\n- len = to - text + 1;\n-\n- if ( *to )\n- {\n- // Advance the pointer for the next call\n- if ( *to==\'\\n\' ) to++;\n- else if ( *to==\'\\r\' && *(to+1)==\'\\n\' ) to+=2;\n- }\n- if ( !len )\n- return to;\n-\n- if ( !*lineptr ) \n- {\n- *lineptr = malloc(len);\n- *n = len;\n- }\n- else if ( *n<len ) \n- {\n- *lineptr = realloc(*lineptr, len);\n- *n = len;\n- }\n- if ( !*lineptr ) {\n-\t\tdebug("[nextline] Insufficient memory!\\n");\n-\t\treturn 0;\n-\t}\n-\n- memcp'..b'e);\n- else\n- {\n-\t\t\tif (hline) sam_header_line_free(hline);\n-\t\t\tsam_header_free(hlines);\n- if ( buf ) free(buf);\n- return NULL;\n- }\n- }\n- if ( buf ) free(buf);\n-\n- return hlines;\n-}\n-\n-void *sam_header2tbl(const void *_dict, char type[2], char key_tag[2], char value_tag[2])\n-{\n-\tconst HeaderDict *dict = (const HeaderDict*)_dict;\n- const list_t *l = dict;\n- khash_t(str) *tbl = kh_init(str);\n- khiter_t k;\n- int ret;\n-\n-\tif (_dict == 0) return tbl; // return an empty (not null) hash table\n- while (l)\n- {\n- HeaderLine *hline = l->data;\n- if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) \n- {\n- l = l->next;\n- continue;\n- }\n- \n- HeaderTag *key, *value;\n- key = header_line_has_tag(hline,key_tag);\n- value = header_line_has_tag(hline,value_tag); \n- if ( !key || !value )\n- {\n- l = l->next;\n- continue;\n- }\n- \n- k = kh_get(str, tbl, key->value);\n- if ( k != kh_end(tbl) )\n- debug("[sam_header_lookup_table] They key %s not unique.\\n", key->value);\n- k = kh_put(str, tbl, key->value, &ret);\n- kh_value(tbl, k) = value->value;\n-\n- l = l->next;\n- }\n- return tbl;\n-}\n-\n-char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n)\n-{\n-\tconst HeaderDict *dict = (const HeaderDict*)_dict;\n- const list_t *l = dict;\n- int max, n;\n-\tchar **ret;\n-\n-\tret = 0; *_n = max = n = 0;\n- while (l)\n- {\n- HeaderLine *hline = l->data;\n- if ( hline->type[0]!=type[0] || hline->type[1]!=type[1] ) \n- {\n- l = l->next;\n- continue;\n- }\n- \n- HeaderTag *key;\n- key = header_line_has_tag(hline,key_tag);\n- if ( !key )\n- {\n- l = l->next;\n- continue;\n- }\n-\n-\t\tif (n == max) {\n-\t\t\tmax = max? max<<1 : 4;\n-\t\t\tret = realloc(ret, max * sizeof(void*));\n-\t\t}\n-\t\tret[n++] = key->value;\n-\n- l = l->next;\n- }\n-\t*_n = n;\n- return ret;\n-}\n-\n-const char *sam_tbl_get(void *h, const char *key)\n-{\n-\tkhash_t(str) *tbl = (khash_t(str)*)h;\n-\tkhint_t k;\n-\tk = kh_get(str, tbl, key);\n-\treturn k == kh_end(tbl)? 0 : kh_val(tbl, k);\n-}\n-\n-int sam_tbl_size(void *h)\n-{\n-\tkhash_t(str) *tbl = (khash_t(str)*)h;\n-\treturn h? kh_size(tbl) : 0;\n-}\n-\n-void sam_tbl_destroy(void *h)\n-{\n-\tkhash_t(str) *tbl = (khash_t(str)*)h;\n-\tkh_destroy(str, tbl);\n-}\n-\n-void *sam_header_merge(int n, const void **_dicts)\n-{\n-\tconst HeaderDict **dicts = (const HeaderDict**)_dicts;\n- HeaderDict *out_dict;\n- int idict, status;\n-\n- if ( n<2 ) return NULL;\n-\n- out_dict = sam_header_clone(dicts[0]);\n-\n- for (idict=1; idict<n; idict++)\n- {\n- const list_t *tmpl_hlines = dicts[idict];\n-\n- while ( tmpl_hlines )\n- {\n- list_t *out_hlines = out_dict;\n- int inserted = 0;\n- while ( out_hlines )\n- {\n- status = sam_header_compare_lines(tmpl_hlines->data, out_hlines->data);\n- if ( status==0 )\n- {\n- out_hlines = out_hlines->next;\n- continue;\n- }\n- \n- if ( status==2 ) \n- {\n- print_header_line(stderr,tmpl_hlines->data);\n- print_header_line(stderr,out_hlines->data);\n- debug("Conflicting lines, cannot merge the headers.\\n");\n-\t\t\t\t\treturn 0;\n- }\n- if ( status==3 )\n- sam_header_line_merge_with(out_hlines->data, tmpl_hlines->data);\n-\n- inserted = 1;\n- break;\n- }\n- if ( !inserted )\n- out_dict = list_append(out_dict, sam_header_line_clone(tmpl_hlines->data));\n-\n- tmpl_hlines = tmpl_hlines->next;\n- }\n- }\n-\n- return out_dict;\n-}\n-\n-\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/sam_header.h --- a/chimerascan/pysam/samtools/sam_header.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,24 +0,0 @@ -#ifndef __SAM_HEADER_H__ -#define __SAM_HEADER_H__ - -#ifdef __cplusplus -extern "C" { -#endif - - void *sam_header_parse2(const char *headerText); - void *sam_header_merge(int n, const void **dicts); - void sam_header_free(void *header); - char *sam_header_write(const void *headerDict); // returns a newly allocated string - - char **sam_header2list(const void *_dict, char type[2], char key_tag[2], int *_n); - - void *sam_header2tbl(const void *dict, char type[2], char key_tag[2], char value_tag[2]); - const char *sam_tbl_get(void *h, const char *key); - int sam_tbl_size(void *h); - void sam_tbl_destroy(void *h); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/samtools/sam_view.c --- a/chimerascan/pysam/samtools/sam_view.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,255 +0,0 @@\n-#include <stdlib.h>\n-#include <string.h>\n-#include <stdio.h>\n-#include <unistd.h>\n-#include <math.h>\n-#include "sam_header.h"\n-#include "sam.h"\n-#include "faidx.h"\n-#include "khash.h"\n-KHASH_SET_INIT_STR(rg)\n-\n-typedef khash_t(rg) *rghash_t;\n-\n-rghash_t g_rghash = 0;\n-static int g_min_mapQ = 0, g_flag_on = 0, g_flag_off = 0;\n-static char *g_library, *g_rg;\n-static int g_sol2sanger_tbl[128];\n-\n-static void sol2sanger(bam1_t *b)\n-{\n-\tint l;\n-\tuint8_t *qual = bam1_qual(b);\n-\tif (g_sol2sanger_tbl[30] == 0) {\n-\t\tfor (l = 0; l != 128; ++l) {\n-\t\t\tg_sol2sanger_tbl[l] = (int)(10.0 * log(1.0 + pow(10.0, (l - 64 + 33) / 10.0)) / log(10.0) + .499);\n-\t\t\tif (g_sol2sanger_tbl[l] >= 93) g_sol2sanger_tbl[l] = 93;\n-\t\t}\n-\t}\n-\tfor (l = 0; l < b->core.l_qseq; ++l) {\n-\t\tint q = qual[l];\n-\t\tif (q > 127) q = 127;\n-\t\tqual[l] = g_sol2sanger_tbl[q];\n-\t}\n-}\n-\n-static inline int __g_skip_aln(const bam_header_t *h, const bam1_t *b)\n-{\n-\tif (b->core.qual < g_min_mapQ || ((b->core.flag & g_flag_on) != g_flag_on) || (b->core.flag & g_flag_off))\n-\t\treturn 1;\n-\tif (g_rg || g_rghash) {\n-\t\tuint8_t *s = bam_aux_get(b, "RG");\n-\t\tif (s) {\n-\t\t\tif (g_rg) return (strcmp(g_rg, (char*)(s + 1)) == 0)? 0 : 1;\n-\t\t\tif (g_rghash) {\n-\t\t\t\tkhint_t k = kh_get(rg, g_rghash, (char*)(s + 1));\n-\t\t\t\treturn (k != kh_end(g_rghash))? 0 : 1;\n-\t\t\t}\n-\t\t}\n-\t}\n-\tif (g_library) {\n-\t\tconst char *p = bam_get_library((bam_header_t*)h, b);\n-\t\treturn (p && strcmp(p, g_library) == 0)? 0 : 1;\n-\t}\n-\treturn 0;\n-}\n-\n-// callback function for bam_fetch()\n-static int view_func(const bam1_t *b, void *data)\n-{\n-\tif (!__g_skip_aln(((samfile_t*)data)->header, b))\n-\t\tsamwrite((samfile_t*)data, b);\n-\treturn 0;\n-}\n-\n-static int usage(int is_long_help);\n-\n-int main_samview(int argc, char *argv[])\n-{\n-\tint c, is_header = 0, is_header_only = 0, is_bamin = 1, ret = 0, is_uncompressed = 0, is_bamout = 0, slx2sngr = 0;\n-\tint of_type = BAM_OFDEC, is_long_help = 0;\n-\tsamfile_t *in = 0, *out = 0;\n-\tchar in_mode[5], out_mode[5], *fn_out = 0, *fn_list = 0, *fn_ref = 0, *fn_rg = 0;\n-\n-\t/* parse command-line options */\n-\tstrcpy(in_mode, "r"); strcpy(out_mode, "w");\n-\twhile ((c = getopt(argc, argv, "Sbt:hHo:q:f:F:ul:r:xX?T:CR:")) >= 0) {\n-\t\tswitch (c) {\n-\t\tcase \'C\': slx2sngr = 1; break;\n-\t\tcase \'S\': is_bamin = 0; break;\n-\t\tcase \'b\': is_bamout = 1; break;\n-\t\tcase \'t\': fn_list = strdup(optarg); is_bamin = 0; break;\n-\t\tcase \'h\': is_header = 1; break;\n-\t\tcase \'H\': is_header_only = 1; break;\n-\t\tcase \'o\': fn_out = strdup(optarg); break;\n-\t\tcase \'f\': g_flag_on = strtol(optarg, 0, 0); break;\n-\t\tcase \'F\': g_flag_off = strtol(optarg, 0, 0); break;\n-\t\tcase \'q\': g_min_mapQ = atoi(optarg); break;\n-\t\tcase \'u\': is_uncompressed = 1; break;\n-\t\tcase \'l\': g_library = strdup(optarg); break;\n-\t\tcase \'r\': g_rg = strdup(optarg); break;\n-\t\tcase \'R\': fn_rg = strdup(optarg); break;\n-\t\tcase \'x\': of_type = BAM_OFHEX; break;\n-\t\tcase \'X\': of_type = BAM_OFSTR; break;\n-\t\tcase \'?\': is_long_help = 1; break;\n-\t\tcase \'T\': fn_ref = strdup(optarg); is_bamin = 0; break;\n-\t\tdefault: return usage(is_long_help);\n-\t\t}\n-\t}\n-\tif (is_uncompressed) is_bamout = 1;\n-\tif (is_header_only) is_header = 1;\n-\tif (is_bamout) strcat(out_mode, "b");\n-\telse {\n-\t\tif (of_type == BAM_OFHEX) strcat(out_mode, "x");\n-\t\telse if (of_type == BAM_OFSTR) strcat(out_mode, "X");\n-\t}\n-\tif (is_bamin) strcat(in_mode, "b");\n-\tif (is_header) strcat(out_mode, "h");\n-\tif (is_uncompressed) strcat(out_mode, "u");\n-\tif (argc == optind) return usage(is_long_help); // potential memory leak...\n-\n-\t// read the list of read groups\n-\tif (fn_rg) {\n-\t\tFILE *fp_rg;\n-\t\tchar buf[1024];\n-\t\tint ret;\n-\t\tg_rghash = kh_init(rg);\n-\t\tfp_rg = fopen(fn_rg, "r");\n-\t\twhile (!feof(fp_rg) && fscanf(fp_rg, "%s", buf) > 0) // this is not a good style, but bear me...\n-\t\t\tkh_put(rg, g_rghash, strdup(buf), &ret); // we\'d better check duplicates...\n-\t\tfclose(fp_rg);\n-\t}\n-\n-\t// generate the fn_list if necessary\n-\tif (fn_list == 0 && fn_ref) fn_list = samfaipath(fn_ref);\n-\t// open file handlers\n-\tif ((in = samopen(arg'..b'rr, "[main_samview] fail to get the reference name. Continue anyway.\\n");\n-\t\t\t\tcontinue;\n-\t\t\t}\n-\t\t\tbam_fetch(in->x.bam, idx, tid, beg, end, out, view_func); // fetch alignments\n-\t\t}\n-\t\tbam_index_destroy(idx); // destroy the BAM index\n-\t}\n-\n-view_end:\n-\t// close files, free and return\n-\tfree(fn_list); free(fn_ref); free(fn_out); free(g_library); free(g_rg); free(fn_rg);\n-\tif (g_rghash) {\n-\t\tkhint_t k;\n-\t\tfor (k = 0; k < kh_end(g_rghash); ++k)\n-\t\t\tif (kh_exist(g_rghash, k)) free((char*)kh_key(g_rghash, k));\n-\t\tkh_destroy(rg, g_rghash);\n-\t}\n-\tsamclose(in);\n-\tsamclose(out);\n-\treturn ret;\n-}\n-\n-static int usage(int is_long_help)\n-{\n-\tfprintf(stderr, "\\n");\n-\tfprintf(stderr, "Usage: samtools view [options] <in.bam>|<in.sam> [region1 [...]]\\n\\n");\n-\tfprintf(stderr, "Options: -b output BAM\\n");\n-\tfprintf(stderr, " -h print header for the SAM output\\n");\n-\tfprintf(stderr, " -H print header only (no alignments)\\n");\n-\tfprintf(stderr, " -S input is SAM\\n");\n-\tfprintf(stderr, " -u uncompressed BAM output (force -b)\\n");\n-\tfprintf(stderr, " -x output FLAG in HEX (samtools-C specific)\\n");\n-\tfprintf(stderr, " -X output FLAG in string (samtools-C specific)\\n");\n-\tfprintf(stderr, " -t FILE list of reference names and lengths (force -S) [null]\\n");\n-\tfprintf(stderr, " -T FILE reference sequence file (force -S) [null]\\n");\n-\tfprintf(stderr, " -o FILE output file name [stdout]\\n");\n-\tfprintf(stderr, " -R FILE list of read groups to be outputted [null]\\n");\n-\tfprintf(stderr, " -f INT required flag, 0 for unset [0]\\n");\n-\tfprintf(stderr, " -F INT filtering flag, 0 for unset [0]\\n");\n-\tfprintf(stderr, " -q INT minimum mapping quality [0]\\n");\n-\tfprintf(stderr, " -l STR only output reads in library STR [null]\\n");\n-\tfprintf(stderr, " -r STR only output reads in read group STR [null]\\n");\n-\tfprintf(stderr, " -? longer help\\n");\n-\tfprintf(stderr, "\\n");\n-\tif (is_long_help)\n-\t\tfprintf(stderr, "Notes:\\n\\\n-\\n\\\n- 1. By default, this command assumes the file on the command line is in\\n\\\n- the BAM format and it prints the alignments in SAM. If `-t\' is\\n\\\n- applied, the input file is assumed to be in the SAM format. The\\n\\\n- file supplied with `-t\' is SPACE/TAB delimited with the first two\\n\\\n- fields of each line consisting of the reference name and the\\n\\\n- corresponding sequence length. The `.fai\' file generated by `faidx\'\\n\\\n- can be used here. This file may be empty if reads are unaligned.\\n\\\n-\\n\\\n- 2. SAM->BAM conversion: `samtools view -bT ref.fa in.sam.gz\'.\\n\\\n-\\n\\\n- 3. BAM->SAM conversion: `samtools view in.bam\'.\\n\\\n-\\n\\\n- 4. A region should be presented in one of the following formats:\\n\\\n- `chr1\', `chr2:1,000\' and `chr3:1000-2,000\'. When a region is\\n\\\n- specified, the input alignment file must be an indexed BAM file.\\n\\\n-\\n\\\n- 5. Option `-u\' is preferred over `-b\' when the output is piped to\\n\\\n- another samtools command.\\n\\\n-\\n\\\n- 6. In a string FLAG, each character represents one bit with\\n\\\n- p=0x1 (paired), P=0x2 (properly paired), u=0x4 (unmapped),\\n\\\n- U=0x8 (mate unmapped), r=0x10 (reverse), R=0x20 (mate reverse)\\n\\\n- 1=0x40 (first), 2=0x80 (second), s=0x100 (not primary), \\n\\\n- f=0x200 (failure) and d=0x400 (duplicate). Note that `-x\' and\\n\\\n- `-X\' are samtools-C specific. Picard and older samtools do not\\n\\\n- support HEX or string flags.\\n\\\n-\\n");\n-\treturn 1;\n-}\n-\n-int main_import(int argc, char *argv[])\n-{\n-\tint argc2, ret;\n-\tchar **argv2;\n-\tif (argc != 4) {\n-\t\tfprintf(stderr, "Usage: bamtk import <in.ref_list> <in.sam> <out.bam>\\n");\n-\t\treturn 1;\n-\t}\n-\targc2 = 6;\n-\targv2 = calloc(6, sizeof(char*));\n-\targv2[0] = "import", argv2[1] = "-o", argv2[2] = argv[3], argv2[3] = "-bt", argv2[4] = argv[1], argv2[5] = argv[2];\n-\tret = main_samview(argc2, argv2);\n-\tfree(argv2);\n-\treturn ret;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/setup.cfg --- a/chimerascan/pysam/setup.cfg Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,6 +0,0 @@ -[bdist_rpm] -doc_files = README doc/*.html ChangeLog -vendor = TDB -packager = TDB <email@email.com> -distribution-name = Red Hat Linux -requires = python |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/setup.py --- a/chimerascan/pysam/setup.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,116 +0,0 @@ -#!/usr/bin/python -''' - -pysam -***** - -''' - -import os, sys, glob, shutil, hashlib - -name = "pysam" - -# collect pysam version -sys.path.insert( 0, "pysam") -import version - -version = version.__version__ - -samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c", "errmod.c", "bam_reheader.c", "bam2bcf.c" ) -samtools_dest = os.path.abspath( "samtools" ) -tabix_exclude = ( "main.c", ) -tabix_dest = os.path.abspath( "tabix" ) - -# copy samtools source -if len(sys.argv) >= 2 and sys.argv[1] == "import": - if len(sys.argv) < 3: raise ValueError("missing PATH to samtools source directory") - if len(sys.argv) < 4: raise ValueError("missing PATH to tabix source directory") - - for destdir, srcdir, exclude in zip( - (samtools_dest, tabix_dest), - sys.argv[2:4], - (samtools_exclude, tabix_exclude)): - - srcdir = os.path.abspath( srcdir ) - if not os.path.exists( srcdir ): raise IOError( "samtools src dir `%s` does not exist." % srcdir ) - - cfiles = glob.glob( os.path.join( srcdir, "*.c" ) ) - hfiles = glob.glob( os.path.join( srcdir, "*.h" ) ) - ncopied = 0 - for new_file in cfiles + hfiles: - f = os.path.basename(new_file) - if f in exclude: continue - old_file = os.path.join( destdir, f ) - if os.path.exists( old_file ): - md5_old = hashlib.md5("".join(open(old_file,"r").readlines())).digest() - md5_new = hashlib.md5("".join(open(new_file,"r").readlines())).digest() - if md5_old == md5_new: continue - raise ValueError( "incompatible files for %s and %s" % (old_file, new_file )) - - shutil.copy( new_file, destdir ) - ncopied += 1 - print "installed latest source code from %s: %i files copied" % (srcdir, ncopied) - sys.exit(0) - -from distutils.core import setup, Extension -from Cython.Distutils import build_ext - -classifiers = """ -Development Status :: 2 - Alpha -Operating System :: MacOS :: MacOS X -Operating System :: Microsoft :: Windows :: Windows NT/2000 -Operating System :: OS Independent -Operating System :: POSIX -Operating System :: POSIX :: Linux -Operating System :: Unix -Programming Language :: Python -Topic :: Scientific/Engineering -Topic :: Scientific/Engineering :: Bioinformatics -""" - -samtools = Extension( - "csamtools", # name of extension - [ "pysam/csamtools.pyx" ] +\ - [ "pysam/%s" % x for x in ( - "pysam_util.c", )] +\ - glob.glob( os.path.join( "samtools", "*.c" ) ), - library_dirs=[], - include_dirs=[ "samtools", "pysam" ], - libraries=[ "z", ], - language="c", - define_macros = [('FILE_OFFSET_BITS','64'), - ('_USE_KNETFILE','')], - ) - -tabix = Extension( - "ctabix", # name of extension - [ "pysam/ctabix.pyx" ] +\ - [ "pysam/%s" % x for x in ()] +\ - glob.glob( os.path.join( "tabix", "*.c" ) ), - library_dirs=[], - include_dirs=[ "tabix", "pysam" ], - libraries=[ "z", ], - language="c", - ) - -metadata = { - 'name': name, - 'version': version, - 'description': "pysam", - 'long_description': __doc__, - 'author': "Andreas Heger", - 'author_email': "andreas.heger@gmail.com", - 'license': "MIT", - 'platforms': "ALL", - 'url': "http://code.google.com/p/pysam/", - 'py_modules': [ - "pysam/__init__", - "pysam/Pileup", - "pysam/namedtuple", - "pysam/version" ], - 'ext_modules': [samtools, tabix], - 'cmdclass' : {'build_ext': build_ext}, - } - -if __name__=='__main__': - dist = setup(**metadata) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/bam_endian.h --- a/chimerascan/pysam/tabix/bam_endian.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,42 +0,0 @@ -#ifndef BAM_ENDIAN_H -#define BAM_ENDIAN_H - -#include <stdint.h> - -static inline int bam_is_big_endian() -{ - long one= 1; - return !(*((char *)(&one))); -} -static inline uint16_t bam_swap_endian_2(uint16_t v) -{ - return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); -} -static inline void *bam_swap_endian_2p(void *x) -{ - *(uint16_t*)x = bam_swap_endian_2(*(uint16_t*)x); - return x; -} -static inline uint32_t bam_swap_endian_4(uint32_t v) -{ - v = ((v & 0x0000FFFFU) << 16) | (v >> 16); - return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); -} -static inline void *bam_swap_endian_4p(void *x) -{ - *(uint32_t*)x = bam_swap_endian_4(*(uint32_t*)x); - return x; -} -static inline uint64_t bam_swap_endian_8(uint64_t v) -{ - v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); - v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); - return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); -} -static inline void *bam_swap_endian_8p(void *x) -{ - *(uint64_t*)x = bam_swap_endian_8(*(uint64_t*)x); - return x; -} - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/bgzf.c --- a/chimerascan/pysam/tabix/bgzf.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,676 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology\n-\n- Permission is hereby granted, free of charge, to any person obtaining a copy\n- of this software and associated documentation files (the "Software"), to deal\n- in the Software without restriction, including without limitation the rights\n- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell\n- copies of the Software, and to permit persons to whom the Software is\n- furnished to do so, subject to the following conditions:\n-\n- The above copyright notice and this permission notice shall be included in\n- all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR\n- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,\n- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE\n- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER\n- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,\n- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN\n- THE SOFTWARE.\n-*/\n-\n-/*\n- 2009-06-29 by lh3: cache recent uncompressed blocks.\n- 2009-06-25 by lh3: optionally use my knetfile library to access file on a FTP.\n- 2009-06-12 by lh3: support a mode string like "wu" where \'u\' for uncompressed output */\n-\n-#include <stdio.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <unistd.h>\n-#include <fcntl.h>\n-#include <sys/types.h>\n-#include <sys/stat.h>\n-#include "bgzf.h"\n-\n-#include "khash.h"\n-typedef struct {\n-\tint size;\n-\tuint8_t *block;\n-\tint64_t end_offset;\n-} cache_t;\n-KHASH_MAP_INIT_INT64(cache, cache_t)\n-\n-#if defined(_WIN32) || defined(_MSC_VER)\n-#define ftello(fp) ftell(fp)\n-#define fseeko(fp, offset, whence) fseek(fp, offset, whence)\n-#else\n-extern off_t ftello(FILE *stream);\n-extern int fseeko(FILE *stream, off_t offset, int whence);\n-#endif\n-\n-typedef int8_t bgzf_byte_t;\n-\n-static const int DEFAULT_BLOCK_SIZE = 64 * 1024;\n-static const int MAX_BLOCK_SIZE = 64 * 1024;\n-\n-static const int BLOCK_HEADER_LENGTH = 18;\n-static const int BLOCK_FOOTER_LENGTH = 8;\n-\n-static const int GZIP_ID1 = 31;\n-static const int GZIP_ID2 = 139;\n-static const int CM_DEFLATE = 8;\n-static const int FLG_FEXTRA = 4;\n-static const int OS_UNKNOWN = 255;\n-static const int BGZF_ID1 = 66; // \'B\'\n-static const int BGZF_ID2 = 67; // \'C\'\n-static const int BGZF_LEN = 2;\n-static const int BGZF_XLEN = 6; // BGZF_LEN+4\n-\n-static const int GZIP_WINDOW_BITS = -15; // no zlib header\n-static const int Z_DEFAULT_MEM_LEVEL = 8;\n-\n-\n-inline\n-void\n-packInt16(uint8_t* buffer, uint16_t value)\n-{\n- buffer[0] = value;\n- buffer[1] = value >> 8;\n-}\n-\n-inline\n-int\n-unpackInt16(const uint8_t* buffer)\n-{\n- return (buffer[0] | (buffer[1] << 8));\n-}\n-\n-inline\n-void\n-packInt32(uint8_t* buffer, uint32_t value)\n-{\n- buffer[0] = value;\n- buffer[1] = value >> 8;\n- buffer[2] = value >> 16;\n- buffer[3] = value >> 24;\n-}\n-\n-static inline\n-int\n-bgzf_min(int x, int y)\n-{\n- return (x < y) ? x : y;\n-}\n-\n-static\n-void\n-report_error(BGZF* fp, const char* message) {\n- fp->error = message;\n-}\n-\n-static BGZF *bgzf_read_init()\n-{\n-\tBGZF *fp;\n-\tfp = calloc(1, sizeof(BGZF));\n- fp->uncompressed_block_size = MAX_BLOCK_SIZE;\n- fp->uncompressed_block = malloc(MAX_BLOCK_SIZE);\n- fp->compressed_block_size = MAX_BLOCK_SIZE;\n- fp->compressed_block = malloc(MAX_BLOCK_SIZE);\n-\tfp->cache_size = 0;\n-\tfp->cache = kh_init(cache);\n-\treturn fp;\n-}\n-\n-static\n-BGZF*\n-open_read(int fd)\n-{\n-#ifdef _USE_KNETFILE\n- knetFile *file = knet_dopen(fd, "r");\n-#else\n- FILE* file = fdopen(fd, "r");\n-#endif\n- BGZF* fp;\n-\tif (file == 0) return 0;\n-\tfp = bgzf_read_init();\n- fp->file_descriptor = fd;\n- fp->open_mode = \'r\';\n-#ifdef _USE_KNETFILE\n- fp->x.fpr = file;\n-#else\n- fp->file = file;\n-#endif\n- return fp;\n-}\n-\n-static\n-BGZF*\n-open_write(int'..b'nt\n-flush_block(BGZF* fp)\n-{\n- while (fp->block_offset > 0) {\n- int block_length = deflate_block(fp, fp->block_offset);\n- if (block_length < 0) {\n- return -1;\n- }\n-#ifdef _USE_KNETFILE\n- int count = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);\n-#else\n- int count = fwrite(fp->compressed_block, 1, block_length, fp->file);\n-#endif\n- if (count != block_length) {\n- report_error(fp, "write failed");\n- return -1;\n- }\n- fp->block_address += block_length;\n- }\n- return 0;\n-}\n-\n-int\n-bgzf_write(BGZF* fp, const void* data, int length)\n-{\n- if (fp->open_mode != \'w\') {\n- report_error(fp, "file not open for writing");\n- return -1;\n- }\n-\n- if (fp->uncompressed_block == NULL) {\n- fp->uncompressed_block = malloc(fp->uncompressed_block_size);\n- }\n-\n- const bgzf_byte_t* input = data;\n- int block_length = fp->uncompressed_block_size;\n- int bytes_written = 0;\n- while (bytes_written < length) {\n- int copy_length = bgzf_min(block_length - fp->block_offset, length - bytes_written);\n- bgzf_byte_t* buffer = fp->uncompressed_block;\n- memcpy(buffer + fp->block_offset, input, copy_length);\n- fp->block_offset += copy_length;\n- input += copy_length;\n- bytes_written += copy_length;\n- if (fp->block_offset == block_length) {\n- if (flush_block(fp) != 0) {\n- break;\n- }\n- }\n- }\n- return bytes_written;\n-}\n-\n-int\n-bgzf_close(BGZF* fp)\n-{\n- if (fp->open_mode == \'w\') {\n- if (flush_block(fp) != 0) {\n- return -1;\n- }\n-\t\t{ // add an empty block\n-\t\t\tint count, block_length = deflate_block(fp, 0);\n-#ifdef _USE_KNETFILE\n-\t\t\tcount = fwrite(fp->compressed_block, 1, block_length, fp->x.fpw);\n-#else\n-\t\t\tcount = fwrite(fp->compressed_block, 1, block_length, fp->file);\n-#endif\n-\t\t}\n-#ifdef _USE_KNETFILE\n- if (fflush(fp->x.fpw) != 0) {\n-#else\n- if (fflush(fp->file) != 0) {\n-#endif\n- report_error(fp, "flush failed");\n- return -1;\n- }\n- }\n- if (fp->owned_file) {\n-#ifdef _USE_KNETFILE\n-\t\tint ret;\n-\t\tif (fp->open_mode == \'w\') ret = fclose(fp->x.fpw);\n-\t\telse ret = knet_close(fp->x.fpr);\n- if (ret != 0) return -1;\n-#else\n- if (fclose(fp->file) != 0) {\n- return -1;\n- }\n-#endif\n- }\n- free(fp->uncompressed_block);\n- free(fp->compressed_block);\n-\tfree_cache(fp);\n- free(fp);\n- return 0;\n-}\n-\n-void bgzf_set_cache_size(BGZF *fp, int cache_size)\n-{\n-\tif (fp) fp->cache_size = cache_size;\n-}\n-\n-int bgzf_check_EOF(BGZF *fp)\n-{\n-\tstatic uint8_t magic[28] = "\\037\\213\\010\\4\\0\\0\\0\\0\\0\\377\\6\\0\\102\\103\\2\\0\\033\\0\\3\\0\\0\\0\\0\\0\\0\\0\\0\\0";\n-\tuint8_t buf[28];\n-\toff_t offset;\n-#ifdef _USE_KNETFILE\n-\toffset = knet_tell(fp->x.fpr);\n-\tif (knet_seek(fp->x.fpr, -28, SEEK_END) != 0) return -1;\n-\tknet_read(fp->x.fpr, buf, 28);\n-\tknet_seek(fp->x.fpr, offset, SEEK_SET);\n-#else\n-\toffset = ftello(fp->file);\n-\tif (fseeko(fp->file, -28, SEEK_END) != 0) return -1;\n-\tfread(buf, 1, 28, fp->file);\n-\tfseeko(fp->file, offset, SEEK_SET);\n-#endif\n-\treturn (memcmp(magic, buf, 28) == 0)? 1 : 0;\n-}\n-\n-int64_t\n-bgzf_seek(BGZF* fp, int64_t pos, int where)\n-{\n- if (fp->open_mode != \'r\') {\n- report_error(fp, "file not open for read");\n- return -1;\n- }\n- if (where != SEEK_SET) {\n- report_error(fp, "unimplemented seek option");\n- return -1;\n- }\n- int block_offset = pos & 0xFFFF;\n- int64_t block_address = (pos >> 16) & 0xFFFFFFFFFFFFLL;\n-#ifdef _USE_KNETFILE\n- if (knet_seek(fp->x.fpr, block_address, SEEK_SET) != 0) {\n-#else\n- if (fseeko(fp->file, block_address, SEEK_SET) != 0) {\n-#endif\n- report_error(fp, "seek failed");\n- return -1;\n- }\n- fp->block_length = 0; // indicates current block is not loaded\n- fp->block_address = block_address;\n- fp->block_offset = block_offset;\n- return 0;\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/bgzf.h --- a/chimerascan/pysam/tabix/bgzf.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,156 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#ifndef __BGZF_H -#define __BGZF_H - -#include <stdint.h> -#include <stdio.h> -#include <stdbool.h> -#include <zlib.h> -#ifdef _USE_KNETFILE -#include "knetfile.h" -#endif - -//typedef int8_t bool; - -typedef struct { - int file_descriptor; - char open_mode; // 'r' or 'w' - bool owned_file, is_uncompressed; -#ifdef _USE_KNETFILE - union { - knetFile *fpr; - FILE *fpw; - } x; -#else - FILE* file; -#endif - int uncompressed_block_size; - int compressed_block_size; - void* uncompressed_block; - void* compressed_block; - int64_t block_address; - int block_length; - int block_offset; - int cache_size; - const char* error; - void *cache; // a pointer to a hash table -} BGZF; - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Open an existing file descriptor for reading or writing. - * Mode must be either "r" or "w". - * A subsequent bgzf_close will not close the file descriptor. - * Returns null on error. - */ -BGZF* bgzf_fdopen(int fd, const char* __restrict mode); - -/* - * Open the specified file for reading or writing. - * Mode must be either "r" or "w". - * Returns null on error. - */ -BGZF* bgzf_open(const char* path, const char* __restrict mode); - -/* - * Close the BGZ file and free all associated resources. - * Does not close the underlying file descriptor if created with bgzf_fdopen. - * Returns zero on success, -1 on error. - */ -int bgzf_close(BGZF* fp); - -/* - * Read up to length bytes from the file storing into data. - * Returns the number of bytes actually read. - * Returns zero on end of file. - * Returns -1 on error. - */ -int bgzf_read(BGZF* fp, void* data, int length); - -/* - * Write length bytes from data to the file. - * Returns the number of bytes written. - * Returns -1 on error. - */ -int bgzf_write(BGZF* fp, const void* data, int length); - -/* - * Return a virtual file pointer to the current location in the file. - * No interpetation of the value should be made, other than a subsequent - * call to bgzf_seek can be used to position the file at the same point. - * Return value is non-negative on success. - * Returns -1 on error. - */ -#define bgzf_tell(fp) ((fp->block_address << 16) | (fp->block_offset & 0xFFFF)) - -/* - * Set the file to read from the location specified by pos, which must - * be a value previously returned by bgzf_tell for this file (but not - * necessarily one returned by this file handle). - * The where argument must be SEEK_SET. - * Seeking on a file opened for write is not supported. - * Returns zero on success, -1 on error. - */ -int64_t bgzf_seek(BGZF* fp, int64_t pos, int where); - -/* - * Set the cache size. Zero to disable. By default, caching is - * disabled. The recommended cache size for frequent random access is - * about 8M bytes. - */ -void bgzf_set_cache_size(BGZF *fp, int cache_size); - -int bgzf_check_EOF(BGZF *fp); - -int bgzf_read_block(BGZF* fp); - -#ifdef __cplusplus -} -#endif - -static inline int bgzf_getc(BGZF *fp) -{ - int c; - if (fp->block_offset >= fp->block_length) { - if (bgzf_read_block(fp) != 0) return -2; /* error */ - if (fp->block_length == 0) return -1; /* end-of-file */ - } - c = ((unsigned char*)fp->uncompressed_block)[fp->block_offset++]; - if (fp->block_offset == fp->block_length) { -#ifdef _USE_KNETFILE - fp->block_address = knet_tell(fp->x.fpr); -#else - fp->block_address = ftello(fp->file); -#endif - fp->block_offset = 0; - fp->block_length = 0; - } - return c; -} - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/bgzip.c --- a/chimerascan/pysam/tabix/bgzip.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,201 +0,0 @@ -/* The MIT License - - Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in - all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - THE SOFTWARE. -*/ - -#include <stdlib.h> -#include <string.h> -#include <stdio.h> -#include <fcntl.h> -#include <unistd.h> -#include <errno.h> -#include <sys/select.h> -#include <sys/stat.h> -#include "bgzf.h" - -static const int WINDOW_SIZE = 64 * 1024; - -static int bgzip_main_usage() -{ - fprintf(stderr, "\n"); - fprintf(stderr, "Usage: bgzip [options] [file] ...\n\n"); - fprintf(stderr, "Options: -c write on standard output, keep original files unchanged\n"); - fprintf(stderr, " -d decompress\n"); - fprintf(stderr, " -f overwrite files without asking\n"); - fprintf(stderr, " -b INT decompress at virtual file pointer INT\n"); - fprintf(stderr, " -s INT decompress INT bytes in the uncompressed file\n"); - fprintf(stderr, " -h give this help\n"); - fprintf(stderr, "\n"); - return 1; -} - -static int write_open(const char *fn, int is_forced) -{ - int fd = -1; - char c; - if (!is_forced) { - if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC | O_EXCL, 0666)) < 0 && errno == EEXIST) { - fprintf(stderr, "[bgzip] %s already exists; do you wish to overwrite (y or n)? ", fn); - scanf("%c", &c); - if (c != 'Y' && c != 'y') { - fprintf(stderr, "[bgzip] not overwritten\n"); - exit(1); - } - } - } - if (fd < 0) { - if ((fd = open(fn, O_WRONLY | O_CREAT | O_TRUNC, 0666)) < 0) { - fprintf(stderr, "[bgzip] %s: Fail to write\n", fn); - exit(1); - } - } - return fd; -} - -static void fail(BGZF* fp) -{ - fprintf(stderr, "Error: %s\n", fp->error); - exit(1); -} - -int main(int argc, char **argv) -{ - int c, compress, pstdout, is_forced; - BGZF *fp; - void *buffer; - long start, end, size; - - compress = 1; pstdout = 0; start = 0; size = -1; end = -1; is_forced = 0; - while((c = getopt(argc, argv, "cdhfb:s:")) >= 0){ - switch(c){ - case 'h': return bgzip_main_usage(); - case 'd': compress = 0; break; - case 'c': pstdout = 1; break; - case 'b': start = atol(optarg); break; - case 's': size = atol(optarg); break; - case 'f': is_forced = 1; break; - } - } - if (size >= 0) end = start + size; - if (end >= 0 && end < start) { - fprintf(stderr, "[bgzip] Illegal region: [%ld, %ld]\n", start, end); - return 1; - } - if (compress == 1) { - struct stat sbuf; - int f_src = fileno(stdin); - int f_dst = fileno(stdout); - - if ( argc>optind ) - { - if ( stat(argv[optind],&sbuf)<0 ) - { - fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); - return 1; - } - - if ((f_src = open(argv[optind], O_RDONLY)) < 0) { - fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); - return 1; - } - - if (pstdout) - f_dst = fileno(stdout); - else - { - char *name = malloc(strlen(argv[optind]) + 5); - strcpy(name, argv[optind]); - strcat(name, ".gz"); - f_dst = write_open(name, is_forced); - if (f_dst < 0) return 1; - free(name); - } - } - else if (!pstdout && isatty(fileno((FILE *)stdout)) ) - return bgzip_main_usage(); - - fp = bgzf_fdopen(f_dst, "w"); - buffer = malloc(WINDOW_SIZE); - while ((c = read(f_src, buffer, WINDOW_SIZE)) > 0) - if (bgzf_write(fp, buffer, c) < 0) fail(fp); - // f_dst will be closed here - if (bgzf_close(fp) < 0) fail(fp); - if (argc > optind) unlink(argv[optind]); - free(buffer); - close(f_src); - return 0; - } else { - struct stat sbuf; - int f_dst; - - if ( argc>optind ) - { - if ( stat(argv[optind],&sbuf)<0 ) - { - fprintf(stderr, "[bgzip] %s: %s\n", strerror(errno), argv[optind]); - return 1; - } - char *name; - int len = strlen(argv[optind]); - if ( strcmp(argv[optind]+len-3,".gz") ) - { - fprintf(stderr, "[bgzip] %s: unknown suffix -- ignored\n", argv[optind]); - return 1; - } - fp = bgzf_open(argv[optind], "r"); - if (fp == NULL) { - fprintf(stderr, "[bgzip] Could not open file: %s\n", argv[optind]); - return 1; - } - - name = strdup(argv[optind]); - name[strlen(name) - 3] = '\0'; - f_dst = write_open(name, is_forced); - free(name); - } - else if (!pstdout && isatty(fileno((FILE *)stdin)) ) - return bgzip_main_usage(); - else - { - f_dst = fileno(stdout); - fp = bgzf_fdopen(fileno(stdin), "r"); - if (fp == NULL) { - fprintf(stderr, "[bgzip] Could not read from stdin: %s\n", strerror(errno)); - return 1; - } - } - buffer = malloc(WINDOW_SIZE); - if (bgzf_seek(fp, start, SEEK_SET) < 0) fail(fp); - while (1) { - if (end < 0) c = bgzf_read(fp, buffer, WINDOW_SIZE); - else c = bgzf_read(fp, buffer, (end - start > WINDOW_SIZE)? WINDOW_SIZE:(end - start)); - if (c == 0) break; - if (c < 0) fail(fp); - start += c; - write(f_dst, buffer, c); - if (end >= 0 && start >= end) break; - } - free(buffer); - if (bgzf_close(fp) < 0) fail(fp); - if (!pstdout) unlink(argv[optind]); - return 0; - } -} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/index.c --- a/chimerascan/pysam/tabix/index.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,954 +0,0 @@\n-#include <ctype.h>\n-#include <assert.h>\n-#include <sys/stat.h>\n-#include "khash.h"\n-#include "ksort.h"\n-#include "kstring.h"\n-#include "bam_endian.h"\n-#ifdef _USE_KNETFILE\n-#include "knetfile.h"\n-#endif\n-#include "tabix.h"\n-\n-#define TAD_MIN_CHUNK_GAP 32768\n-// 1<<14 is the size of minimum bin.\n-#define TAD_LIDX_SHIFT 14\n-\n-typedef struct {\n-\tuint64_t u, v;\n-} pair64_t;\n-\n-#define pair64_lt(a,b) ((a).u < (b).u)\n-KSORT_INIT(off, pair64_t, pair64_lt)\n-\n-typedef struct {\n-\tuint32_t m, n;\n-\tpair64_t *list;\n-} ti_binlist_t;\n-\n-typedef struct {\n-\tint32_t n, m;\n-\tuint64_t *offset;\n-} ti_lidx_t;\n-\n-KHASH_MAP_INIT_INT(i, ti_binlist_t)\n-KHASH_MAP_INIT_STR(s, int)\n-\n-struct __ti_index_t {\n-\tti_conf_t conf;\n-\tint32_t n, max;\n-\tkhash_t(s) *tname;\n-\tkhash_t(i) **index;\n-\tti_lidx_t *index2;\n-};\n-\n-struct __ti_iter_t {\n-\tint from_first; // read from the first record; no random access\n-\tint tid, beg, end, n_off, i, finished;\n-\tuint64_t curr_off;\n-\tkstring_t str;\n-\tconst ti_index_t *idx;\n-\tpair64_t *off;\n-};\n-\n-typedef struct {\n-\tint tid, beg, end, bin;\n-} ti_intv_t;\n-\n-ti_conf_t ti_conf_gff = { 0, 1, 4, 5, \'#\', 0 };\n-ti_conf_t ti_conf_bed = { TI_FLAG_UCSC, 1, 2, 3, \'#\', 0 };\n-ti_conf_t ti_conf_psltbl = { TI_FLAG_UCSC, 15, 17, 18, \'#\', 0 };\n-ti_conf_t ti_conf_sam = { TI_PRESET_SAM, 3, 4, 0, \'@\', 0 };\n-ti_conf_t ti_conf_vcf = { TI_PRESET_VCF, 1, 2, 0, \'#\', 0 };\n-\n-/***************\n- * read a line *\n- ***************/\n-\n-/*\n-int ti_readline(BGZF *fp, kstring_t *str)\n-{\n-\tint c, l = 0;\n-\tstr->l = 0;\n-\twhile ((c = bgzf_getc(fp)) >= 0 && c != \'\\n\') {\n-\t\t++l;\n-\t\tif (c != \'\\r\') kputc(c, str);\n-\t}\n-\tif (c < 0 && l == 0) return -1; // end of file\n-\treturn str->l;\n-}\n-*/\n-\n-/* Below is a faster implementation largely equivalent to the one\n- * commented out above. */\n-int ti_readline(BGZF *fp, kstring_t *str)\n-{\n-\tint l, state = 0;\n-\tunsigned char *buf = (unsigned char*)fp->uncompressed_block;\n-\tstr->l = 0;\n-\tdo {\n-\t\tif (fp->block_offset >= fp->block_length) {\n-\t\t\tif (bgzf_read_block(fp) != 0) { state = -2; break; }\n-\t\t\tif (fp->block_length == 0) { state = -1; break; }\n-\t\t}\n-\t\tfor (l = fp->block_offset; l < fp->block_length && buf[l] != \'\\n\'; ++l);\n-\t\tif (l < fp->block_length) state = 1;\n-\t\tl -= fp->block_offset;\n-\t\tif (str->l + l + 1 >= str->m) {\n-\t\t\tstr->m = str->l + l + 2;\n-\t\t\tkroundup32(str->m);\n-\t\t\tstr->s = (char*)realloc(str->s, str->m);\n-\t\t}\n-\t\tmemcpy(str->s + str->l, buf + fp->block_offset, l);\n-\t\tstr->l += l;\n-\t\tfp->block_offset += l + 1;\n-\t\tif (fp->block_offset >= fp->block_length) {\n-#ifdef _USE_KNETFILE\n-\t\t\tfp->block_address = knet_tell(fp->x.fpr);\n-#else\n-\t\t\tfp->block_address = ftello(fp->file);\n-#endif\n-\t\t\tfp->block_offset = 0;\n-\t\t\tfp->block_length = 0;\n-\t\t} \n-\t} while (state == 0);\n-\tif (str->l == 0 && state < 0) return state;\n-\tstr->s[str->l] = 0;\n-\treturn str->l;\n-}\n-\n-/*************************************\n- * get the interval from a data line *\n- *************************************/\n-\n-static inline int ti_reg2bin(uint32_t beg, uint32_t end)\n-{\n-\t--end;\n-\tif (beg>>14 == end>>14) return 4681 + (beg>>14);\n-\tif (beg>>17 == end>>17) return 585 + (beg>>17);\n-\tif (beg>>20 == end>>20) return 73 + (beg>>20);\n-\tif (beg>>23 == end>>23) return 9 + (beg>>23);\n-\tif (beg>>26 == end>>26) return 1 + (beg>>26);\n-\treturn 0;\n-}\n-\n-static int get_tid(ti_index_t *idx, const char *ss)\n-{\n-\tkhint_t k;\n-\tint tid;\n-\tk = kh_get(s, idx->tname, ss);\n-\tif (k == kh_end(idx->tname)) { // a new target sequence\n-\t\tint ret, size;\n-\t\t// update idx->n, ->max, ->index and ->index2\n-\t\tif (idx->n == idx->max) {\n-\t\t\tidx->max = idx->max? idx->max<<1 : 8;\n-\t\t\tidx->index = realloc(idx->index, idx->max * sizeof(void*));\n-\t\t\tidx->index2 = realloc(idx->index2, idx->max * sizeof(ti_lidx_t));\n-\t\t}\n-\t\tmemset(&idx->index2[idx->n], 0, sizeof(ti_lidx_t));\n-\t\tidx->index[idx->n++] = kh_init(i);\n-\t\t// update ->tname\n-\t\ttid = size = kh_size(idx->tname);\n-\t\tk = kh_put(s, idx->tname, strdup(ss), &ret);\n-\t\tkh_value(idx->tname, k) = size;\n-\t\tassert(idx->n == kh_'..b'n_off, off);\n-\t\t// resolve completely contained adjacent blocks\n-\t\tfor (i = 1, l = 0; i < n_off; ++i)\n-\t\t\tif (off[l].v < off[i].v)\n-\t\t\t\toff[++l] = off[i];\n-\t\tn_off = l + 1;\n-\t\t// resolve overlaps between adjacent blocks; this may happen due to the merge in indexing\n-\t\tfor (i = 1; i < n_off; ++i)\n-\t\t\tif (off[i-1].v >= off[i].u) off[i-1].v = off[i].u;\n-\t\t{ // merge adjacent blocks\n-\t\t\tfor (i = 1, l = 0; i < n_off; ++i) {\n-\t\t\t\tif (off[l].v>>16 == off[i].u>>16) off[l].v = off[i].v;\n-\t\t\t\telse off[++l] = off[i];\n-\t\t\t}\n-\t\t\tn_off = l + 1;\n-\t\t}\n-\t}\n-\titer->n_off = n_off; iter->off = off;\n-\treturn iter;\n-}\n-\n-const char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len)\n-{\n-\tif (iter->finished) return 0;\n-\tif (iter->from_first) {\n-\t\tint ret;\n-\t\tif ((ret = ti_readline(fp, &iter->str)) < 0) {\n-\t\t\titer->finished = 1;\n-\t\t\treturn 0;\n-\t\t} else {\n-\t\t\tif (len) *len = iter->str.l;\n-\t\t\treturn iter->str.s;\n-\t\t}\n-\t}\n-\tif (iter->n_off == 0) return 0;\n-\twhile (1) {\n-\t\tint ret;\n-\t\tif (iter->curr_off == 0 || iter->curr_off >= iter->off[iter->i].v) { // then jump to the next chunk\n-\t\t\tif (iter->i == iter->n_off - 1) break; // no more chunks\n-\t\t\tif (iter->i >= 0) assert(iter->curr_off == iter->off[iter->i].v); // otherwise bug\n-\t\t\tif (iter->i < 0 || iter->off[iter->i].v != iter->off[iter->i+1].u) { // not adjacent chunks; then seek\n-\t\t\t\tbgzf_seek(fp, iter->off[iter->i+1].u, SEEK_SET);\n-\t\t\t\titer->curr_off = bgzf_tell(fp);\n-\t\t\t}\n-\t\t\t++iter->i;\n-\t\t}\n-\t\tif ((ret = ti_readline(fp, &iter->str)) >= 0) {\n-\t\t\tti_intv_t intv;\n-\t\t\titer->curr_off = bgzf_tell(fp);\n-\t\t\tif (iter->str.s[0] == iter->idx->conf.meta_char) continue;\n-\t\t\tget_intv((ti_index_t*)iter->idx, &iter->str, &intv);\n-\t\t\tif (intv.tid != iter->tid || intv.beg >= iter->end) break; // no need to proceed\n-\t\t\telse if (intv.end > iter->beg && iter->end > intv.beg) {\n-\t\t\t\tif (len) *len = iter->str.l;\n-\t\t\t\treturn iter->str.s;\n-\t\t\t}\n-\t\t} else break; // end of file\n-\t}\n-\titer->finished = 1;\n-\treturn 0;\n-}\n-\n-void ti_iter_destroy(ti_iter_t iter)\n-{\n-\tif (iter) {\n-\t\tfree(iter->str.s); free(iter->off);\n-\t\tfree(iter);\n-\t}\n-}\n-\n-int ti_fetch(BGZF *fp, const ti_index_t *idx, int tid, int beg, int end, void *data, ti_fetch_f func)\n-{\n-\tti_iter_t iter;\n-\tconst char *s;\n-\tint len;\n-\titer = ti_iter_query(idx, tid, beg, end);\n-\twhile ((s = ti_iter_read(fp, iter, &len)) != 0)\n-\t\tfunc(len, s, data);\n-\tti_iter_destroy(iter);\n-\treturn 0;\n-}\n-\n-/*******************\n- * High-level APIs *\n- *******************/\n-\n-tabix_t *ti_open(const char *fn, const char *fnidx)\n-{\n-\ttabix_t *t;\n-\tBGZF *fp;\n-\tif ((fp = bgzf_open(fn, "r")) == 0) return 0;\n-\tt = calloc(1, sizeof(tabix_t));\n-\tt->fn = strdup(fn);\n-\tif (fnidx) t->fnidx = strdup(fnidx);\n-\tt->fp = fp;\n-\treturn t;\n-}\n-\n-void ti_close(tabix_t *t)\n-{\n-\tif (t) {\n-\t\tbgzf_close(t->fp);\n-\t\tif (t->idx) ti_index_destroy(t->idx);\n-\t\tfree(t->fn); free(t->fnidx);\n-\t\tfree(t);\n-\t}\n-}\n-\n-int ti_lazy_index_load(tabix_t *t)\n-{\n-\tif (t->idx == 0) { // load index\n-\t\tif (t->fnidx) t->idx = ti_index_load_local(t->fnidx);\n-\t\telse t->idx = ti_index_load(t->fn);\n-\t\tif (t->idx == 0) return -1; // fail to load index\n-\t}\n-\treturn 0;\n-}\n-\n-ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end)\n-{\n-\tif (tid < 0) return ti_iter_first();\n-\tif (ti_lazy_index_load(t) != 0) return 0;\n-\treturn ti_iter_query(t->idx, tid, beg, end);\t\n-}\n-\n-ti_iter_t ti_querys(tabix_t *t, const char *reg)\n-{\n-\tint tid, beg, end;\n-\tif (reg == 0) return ti_iter_first();\n-\tif (ti_lazy_index_load(t) != 0) return 0;\n-\tif (ti_parse_region(t->idx, reg, &tid, &beg, &end) < 0) return 0;\n-\treturn ti_iter_query(t->idx, tid, beg, end);\n-}\n-\n-ti_iter_t ti_query(tabix_t *t, const char *name, int beg, int end)\n-{\n-\tint tid;\n-\tif (name == 0) return ti_iter_first();\n-\t// then need to load the index\n-\tif (ti_lazy_index_load(t) != 0) return 0;\n-\tif ((tid = ti_get_tid(t->idx, name)) < 0) return 0;\n-\treturn ti_iter_query(t->idx, tid, beg, end);\n-}\n-\n-const char *ti_read(tabix_t *t, ti_iter_t iter, int *len)\n-{\n-\treturn ti_iter_read(t->fp, iter, len);\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/khash.h --- a/chimerascan/pysam/tabix/khash.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,486 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-/*\n- An example:\n-\n-#include "khash.h"\n-KHASH_MAP_INIT_INT(32, char)\n-int main() {\n-\tint ret, is_missing;\n-\tkhiter_t k;\n-\tkhash_t(32) *h = kh_init(32);\n-\tk = kh_put(32, h, 5, &ret);\n-\tif (!ret) kh_del(32, h, k);\n-\tkh_value(h, k) = 10;\n-\tk = kh_get(32, h, 10);\n-\tis_missing = (k == kh_end(h));\n-\tk = kh_get(32, h, 5);\n-\tkh_del(32, h, k);\n-\tfor (k = kh_begin(h); k != kh_end(h); ++k)\n-\t\tif (kh_exist(h, k)) kh_value(h, k) = 1;\n-\tkh_destroy(32, h);\n-\treturn 0;\n-}\n-*/\n-\n-/*\n- 2008-09-19 (0.2.3):\n-\n-\t* Corrected the example\n-\t* Improved interfaces\n-\n- 2008-09-11 (0.2.2):\n-\n-\t* Improved speed a little in kh_put()\n-\n- 2008-09-10 (0.2.1):\n-\n-\t* Added kh_clear()\n-\t* Fixed a compiling error\n-\n- 2008-09-02 (0.2.0):\n-\n-\t* Changed to token concatenation which increases flexibility.\n-\n- 2008-08-31 (0.1.2):\n-\n-\t* Fixed a bug in kh_get(), which has not been tested previously.\n-\n- 2008-08-31 (0.1.1):\n-\n-\t* Added destructor\n-*/\n-\n-\n-#ifndef __AC_KHASH_H\n-#define __AC_KHASH_H\n-\n-/*!\n- @header\n-\n- Generic hash table library.\n-\n- @copyright Heng Li\n- */\n-\n-#define AC_VERSION_KHASH_H "0.2.2"\n-\n-#include <stdint.h>\n-#include <stdlib.h>\n-#include <string.h>\n-\n-typedef uint32_t khint_t;\n-typedef khint_t khiter_t;\n-\n-#define __ac_HASH_PRIME_SIZE 32\n-static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =\n-{\n- 0ul, 3ul, 11ul, 23ul, 53ul,\n- 97ul, 193ul, 389ul, 769ul, 1543ul,\n- 3079ul, 6151ul, 12289ul, 24593ul, 49157ul,\n- 98317ul, 196613ul, 393241ul, 786433ul, 1572869ul,\n- 3145739ul, 6291469ul, 12582917ul, 25165843ul, 50331653ul,\n- 100663319ul, 201326611ul, 402653189ul, 805306457ul, 1610612741ul,\n- 3221225473ul, 4294967291ul\n-};\n-\n-#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)\n-#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)\n-#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)\n-#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))\n-#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))\n-#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))\n-#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))\n-\n-static const double __ac_HASH_UPPER = 0.77;\n-\n-#define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, __hash_equal) \\\n-\ttypedef struct {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkhint_t n_buckets, size, n_occupied, upper_bound;\t\t\t\t\\\n-\t\tuint32_t *flags;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkhkey_t *keys;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tkhval_t *vals;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t} kh_##name##_t;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inline kh_##name##_t *kh_init_##name() {\t\t\t\t\t\t\\\n-\t\treturn (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t));\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tstatic inlin'..b'e, h, k) kh_get_##name(h, k)\n-\n-/*! @function\n- @abstract Remove a key from the hash table.\n- @param name Name of the hash table [symbol]\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param k Iterator to the element to be deleted [khint_t]\n- */\n-#define kh_del(name, h, k) kh_del_##name(h, k)\n-\n-\n-/*! @function\n- @abstract Test whether a bucket contains data.\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param x Iterator to the bucket [khint_t]\n- @return 1 if containing data; 0 otherwise [int]\n- */\n-#define kh_exist(h, x) (!__ac_iseither((h)->flags, (x)))\n-\n-/*! @function\n- @abstract Get key given an iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param x Iterator to the bucket [khint_t]\n- @return Key [type of keys]\n- */\n-#define kh_key(h, x) ((h)->keys[x])\n-\n-/*! @function\n- @abstract Get value given an iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @param x Iterator to the bucket [khint_t]\n- @return Value [type of values]\n- @discussion For hash sets, calling this results in segfault.\n- */\n-#define kh_val(h, x) ((h)->vals[x])\n-\n-/*! @function\n- @abstract Alias of kh_val()\n- */\n-#define kh_value(h, x) ((h)->vals[x])\n-\n-/*! @function\n- @abstract Get the start iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return The start iterator [khint_t]\n- */\n-#define kh_begin(h) (khint_t)(0)\n-\n-/*! @function\n- @abstract Get the end iterator\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return The end iterator [khint_t]\n- */\n-#define kh_end(h) ((h)->n_buckets)\n-\n-/*! @function\n- @abstract Get the number of elements in the hash table\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return Number of elements in the hash table [khint_t]\n- */\n-#define kh_size(h) ((h)->size)\n-\n-/*! @function\n- @abstract Get the number of buckets in the hash table\n- @param h Pointer to the hash table [khash_t(name)*]\n- @return Number of buckets in the hash table [khint_t]\n- */\n-#define kh_n_buckets(h) ((h)->n_buckets)\n-\n-/* More conenient interfaces */\n-\n-/*! @function\n- @abstract Instantiate a hash set containing integer keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_INT(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, uint32_t, char, 0, kh_int_hash_func, kh_int_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing integer keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_INT(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, uint32_t, khval_t, 1, kh_int_hash_func, kh_int_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing 64-bit integer keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_INT64(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, uint64_t, char, 0, kh_int64_hash_func, kh_int64_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing 64-bit integer keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_INT64(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, uint64_t, khval_t, 1, kh_int64_hash_func, kh_int64_hash_equal)\n-\n-typedef const char *kh_cstr_t;\n-/*! @function\n- @abstract Instantiate a hash map containing const char* keys\n- @param name Name of the hash table [symbol]\n- */\n-#define KHASH_SET_INIT_STR(name)\t\t\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, kh_cstr_t, char, 0, kh_str_hash_func, kh_str_hash_equal)\n-\n-/*! @function\n- @abstract Instantiate a hash map containing const char* keys\n- @param name Name of the hash table [symbol]\n- @param khval_t Type of values [type]\n- */\n-#define KHASH_MAP_INIT_STR(name, khval_t)\t\t\t\t\t\t\t\t\\\n-\tKHASH_INIT(name, kh_cstr_t, khval_t, 1, kh_str_hash_func, kh_str_hash_equal)\n-\n-#endif /* __AC_KHASH_H */\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/knetfile.c --- a/chimerascan/pysam/tabix/knetfile.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,632 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-/* Probably I will not do socket programming in the next few years and\n- therefore I decide to heavily annotate this file, for Linux and\n- Windows as well. -lh3 */\n-\n-#include <time.h>\n-#include <stdio.h>\n-#include <ctype.h>\n-#include <stdlib.h>\n-#include <string.h>\n-#include <errno.h>\n-#include <unistd.h>\n-#include <sys/types.h>\n-\n-#ifdef _WIN32\n-#include <winsock.h>\n-#else\n-#include <netdb.h>\n-#include <arpa/inet.h>\n-#include <sys/socket.h>\n-#endif\n-\n-#include "knetfile.h"\n-\n-/* In winsock.h, the type of a socket is SOCKET, which is: "typedef\n- * u_int SOCKET". An invalid SOCKET is: "(SOCKET)(~0)", or signed\n- * integer -1. In knetfile.c, I use "int" for socket type\n- * throughout. This should be improved to avoid confusion.\n- *\n- * In Linux/Mac, recv() and read() do almost the same thing. You can see\n- * in the header file that netread() is simply an alias of read(). In\n- * Windows, however, they are different and using recv() is mandatory.\n- */\n-\n-/* This function tests if the file handler is ready for reading (or\n- * writing if is_read==0). */\n-static int socket_wait(int fd, int is_read)\n-{\n-\tfd_set fds, *fdr = 0, *fdw = 0;\n-\tstruct timeval tv;\n-\tint ret;\n-\ttv.tv_sec = 5; tv.tv_usec = 0; // 5 seconds time out\n-\tFD_ZERO(&fds);\n-\tFD_SET(fd, &fds);\n-\tif (is_read) fdr = &fds;\n-\telse fdw = &fds;\n-\tret = select(fd+1, fdr, fdw, 0, &tv);\n-#ifndef _WIN32\n-\tif (ret == -1) perror("select");\n-#else\n-\tif (ret == 0)\n-\t\tfprintf(stderr, "select time-out\\n");\n-\telse if (ret == SOCKET_ERROR)\n-\t\tfprintf(stderr, "select: %d\\n", WSAGetLastError());\n-#endif\n-\treturn ret;\n-}\n-\n-#ifndef _WIN32\n-/* This function does not work with Windows due to the lack of\n- * getaddrinfo() in winsock. It is addapted from an example in "Beej\'s\n- * Guide to Network Programming" (http://beej.us/guide/bgnet/). */\n-static int socket_connect(const char *host, const char *port)\n-{\n-#define __err_connect(func) do { perror(func); freeaddrinfo(res); return -1; } while (0)\n-\n-\tint on = 1, fd;\n-\tstruct linger lng = { 0, 0 };\n-\tstruct addrinfo hints, *res;\n-\tmemset(&hints, 0, sizeof(struct addrinfo));\n-\thints.ai_family = AF_UNSPEC;\n-\thints.ai_socktype = SOCK_STREAM;\n-\t/* In Unix/Mac, getaddrinfo() is the most convenient way to get\n-\t * server information. */\n-\tif (getaddrinfo(host, port, &hints, &res) != 0) __err_connect("getaddrinfo");\n-\tif ((fd = socket(res->ai_family, res->ai_socktype, res->ai_protocol)) == -1) __err_connect("socket");\n-\t/* The following two setsockopt() are used by ftplib\n-\t * (http://nbpfaus.net/~pfau/ftplib/). I am not sure if they\n-\t * necessary. */\n-\tif (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == -1) __err_connect("setsockopt");\n-\tif (setsockopt(fd, SOL_SOCKET, SO_LINGER, &lng, sizeof(lng)) == -1) __err_connect("'..b'(fp);\n-\t\treturn 0;\n-\t}\n-\treturn fp;\n-}\n-\n-knetFile *knet_dopen(int fd, const char *mode)\n-{\n-\tknetFile *fp = (knetFile*)calloc(1, sizeof(knetFile));\n-\tfp->type = KNF_TYPE_LOCAL;\n-\tfp->fd = fd;\n-\treturn fp;\n-}\n-\n-off_t knet_read(knetFile *fp, void *buf, off_t len)\n-{\n-\toff_t l = 0;\n-\tif (fp->fd == -1) return 0;\n-\tif (fp->type == KNF_TYPE_FTP) {\n-\t\tif (fp->is_ready == 0) {\n-\t\t\tif (!fp->no_reconnect) kftp_reconnect(fp);\n-\t\t\tkftp_connect_file(fp);\n-\t\t}\n-\t} else if (fp->type == KNF_TYPE_HTTP) {\n-\t\tif (fp->is_ready == 0)\n-\t\t\tkhttp_connect_file(fp);\n-\t}\n-\tif (fp->type == KNF_TYPE_LOCAL) { // on Windows, the following block is necessary; not on UNIX\n-\t\toff_t rest = len, curr;\n-\t\twhile (rest) {\n-\t\t\tcurr = read(fp->fd, buf + l, rest);\n-\t\t\tif (curr == 0) break;\n-\t\t\tl += curr; rest -= curr;\n-\t\t}\n-\t} else l = my_netread(fp->fd, buf, len);\n-\tfp->offset += l;\n-\treturn l;\n-}\n-\n-off_t knet_seek(knetFile *fp, int64_t off, int whence)\n-{\n-\tif (whence == SEEK_SET && off == fp->offset) return 0;\n-\tif (fp->type == KNF_TYPE_LOCAL) {\n-\t\t/* Be aware that lseek() returns the offset after seeking,\n-\t\t * while fseek() returns zero on success. */\n-\t\toff_t offset = lseek(fp->fd, off, whence);\n-\t\tif (offset == -1) {\n- // Be silent, it is OK for knet_seek to fail when the file is streamed\n- // fprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n-\t\t\treturn -1;\n-\t\t}\n-\t\tfp->offset = offset;\n-\t\treturn 0;\n-\t}\n- else if (fp->type == KNF_TYPE_FTP) \n- {\n- if (whence==SEEK_CUR)\n- fp->offset += off;\n- else if (whence==SEEK_SET)\n- fp->offset = off;\n- else if ( whence==SEEK_END)\n- fp->offset = fp->file_size+off;\n-\t\tfp->is_ready = 0;\n-\t\treturn 0;\n-\t} \n- else if (fp->type == KNF_TYPE_HTTP) \n- {\n-\t\tif (whence == SEEK_END) { // FIXME: can we allow SEEK_END in future?\n-\t\t\tfprintf(stderr, "[knet_seek] SEEK_END is not supported for HTTP. Offset is unchanged.\\n");\n-\t\t\terrno = ESPIPE;\n-\t\t\treturn -1;\n-\t\t}\n- if (whence==SEEK_CUR)\n- fp->offset += off;\n- else if (whence==SEEK_SET)\n- fp->offset = off;\n-\t\tfp->is_ready = 0;\n-\t\treturn fp->offset;\n-\t}\n-\terrno = EINVAL;\n- fprintf(stderr,"[knet_seek] %s\\n", strerror(errno));\n-\treturn -1;\n-}\n-\n-int knet_close(knetFile *fp)\n-{\n-\tif (fp == 0) return 0;\n-\tif (fp->ctrl_fd != -1) netclose(fp->ctrl_fd); // FTP specific\n-\tif (fp->fd != -1) {\n-\t\t/* On Linux/Mac, netclose() is an alias of close(), but on\n-\t\t * Windows, it is an alias of closesocket(). */\n-\t\tif (fp->type == KNF_TYPE_LOCAL) close(fp->fd);\n-\t\telse netclose(fp->fd);\n-\t}\n-\tfree(fp->host); free(fp->port);\n-\tfree(fp->response); free(fp->retr); free(fp->size_cmd); // FTP specific\n-\tfree(fp->path); free(fp->http_host); // HTTP specific\n-\tfree(fp);\n-\treturn 0;\n-}\n-\n-#ifdef KNETFILE_MAIN\n-int main(void)\n-{\n-\tchar *buf;\n-\tknetFile *fp;\n-\tint type = 4, l;\n-#ifdef _WIN32\n-\tknet_win32_init();\n-#endif\n-\tbuf = calloc(0x100000, 1);\n-\tif (type == 0) {\n-\t\tfp = knet_open("knetfile.c", "r");\n-\t\tknet_seek(fp, 1000, SEEK_SET);\n-\t} else if (type == 1) { // NCBI FTP, large file\n-\t\tfp = knet_open("ftp://ftp.ncbi.nih.gov/1000genomes/ftp/data/NA12878/alignment/NA12878.chrom6.SLX.SRP000032.2009_06.bam", "r");\n-\t\tknet_seek(fp, 2500000000ll, SEEK_SET);\n-\t\tl = knet_read(fp, buf, 255);\n-\t} else if (type == 2) {\n-\t\tfp = knet_open("ftp://ftp.sanger.ac.uk/pub4/treefam/tmp/index.shtml", "r");\n-\t\tknet_seek(fp, 1000, SEEK_SET);\n-\t} else if (type == 3) {\n-\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/index.shtml", "r");\n-\t\tknet_seek(fp, 1000, SEEK_SET);\n-\t} else if (type == 4) {\n-\t\tfp = knet_open("http://www.sanger.ac.uk/Users/lh3/ex1.bam", "r");\n-\t\tknet_read(fp, buf, 10000);\n-\t\tknet_seek(fp, 20000, SEEK_SET);\n-\t\tknet_seek(fp, 10000, SEEK_SET);\n-\t\tl = knet_read(fp, buf+10000, 10000000) + 10000;\n-\t}\n-\tif (type != 4 && type != 1) {\n-\t\tknet_read(fp, buf, 255);\n-\t\tbuf[255] = 0;\n-\t\tprintf("%s\\n", buf);\n-\t} else write(fileno(stdout), buf, l);\n-\tknet_close(fp);\n-\tfree(buf);\n-\treturn 0;\n-}\n-#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/knetfile.h --- a/chimerascan/pysam/tabix/knetfile.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,75 +0,0 @@ -#ifndef KNETFILE_H -#define KNETFILE_H - -#include <stdint.h> -#include <fcntl.h> - -#ifndef _WIN32 -#define netread(fd, ptr, len) read(fd, ptr, len) -#define netwrite(fd, ptr, len) write(fd, ptr, len) -#define netclose(fd) close(fd) -#else -#include <winsock2.h> -#define netread(fd, ptr, len) recv(fd, ptr, len, 0) -#define netwrite(fd, ptr, len) send(fd, ptr, len, 0) -#define netclose(fd) closesocket(fd) -#endif - -// FIXME: currently I/O is unbuffered - -#define KNF_TYPE_LOCAL 1 -#define KNF_TYPE_FTP 2 -#define KNF_TYPE_HTTP 3 - -typedef struct knetFile_s { - int type, fd; - int64_t offset; - char *host, *port; - - // the following are for FTP only - int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; - char *response, *retr, *size_cmd; - int64_t seek_offset; // for lazy seek - int64_t file_size; - - // the following are for HTTP only - char *path, *http_host; -} knetFile; - -#define knet_tell(fp) ((fp)->offset) -#define knet_fileno(fp) ((fp)->fd) - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _WIN32 - int knet_win32_init(); - void knet_win32_destroy(); -#endif - - knetFile *knet_open(const char *fn, const char *mode); - - /* - This only works with local files. - */ - knetFile *knet_dopen(int fd, const char *mode); - - /* - If ->is_ready==0, this routine updates ->fd; otherwise, it simply - reads from ->fd. - */ - off_t knet_read(knetFile *fp, void *buf, off_t len); - - /* - This routine only sets ->offset and ->is_ready=0. It does not - communicate with the FTP server. - */ - off_t knet_seek(knetFile *fp, int64_t off, int whence); - int knet_close(knetFile *fp); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/ksort.h --- a/chimerascan/pysam/tabix/ksort.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,271 +0,0 @@\n-/* The MIT License\n-\n- Copyright (c) 2008 Genome Research Ltd (GRL).\n-\n- Permission is hereby granted, free of charge, to any person obtaining\n- a copy of this software and associated documentation files (the\n- "Software"), to deal in the Software without restriction, including\n- without limitation the rights to use, copy, modify, merge, publish,\n- distribute, sublicense, and/or sell copies of the Software, and to\n- permit persons to whom the Software is furnished to do so, subject to\n- the following conditions:\n-\n- The above copyright notice and this permission notice shall be\n- included in all copies or substantial portions of the Software.\n-\n- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\n- EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\n- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n- NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\n- BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\n- ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\n- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\n- SOFTWARE.\n-*/\n-\n-/* Contact: Heng Li <lh3@sanger.ac.uk> */\n-\n-/*\n- 2008-11-16 (0.1.4):\n-\n- * Fixed a bug in introsort() that happens in rare cases.\n-\n- 2008-11-05 (0.1.3):\n-\n- * Fixed a bug in introsort() for complex comparisons.\n-\n-\t* Fixed a bug in mergesort(). The previous version is not stable.\n-\n- 2008-09-15 (0.1.2):\n-\n-\t* Accelerated introsort. On my Mac (not on another Linux machine),\n-\t my implementation is as fast as std::sort on random input.\n-\n-\t* Added combsort and in introsort, switch to combsort if the\n-\t recursion is too deep.\n-\n- 2008-09-13 (0.1.1):\n-\n-\t* Added k-small algorithm\n-\n- 2008-09-05 (0.1.0):\n-\n-\t* Initial version\n-\n-*/\n-\n-#ifndef AC_KSORT_H\n-#define AC_KSORT_H\n-\n-#include <stdlib.h>\n-#include <string.h>\n-\n-typedef struct {\n-\tvoid *left, *right;\n-\tint depth;\n-} ks_isort_stack_t;\n-\n-#define KSORT_SWAP(type_t, a, b) { register type_t t=(a); (a)=(b); (b)=t; }\n-\n-#define KSORT_INIT(name, type_t, __sort_lt)\t\t\t\t\t\t\t\t\\\n-\tvoid ks_mergesort_##name(size_t n, type_t array[], type_t temp[])\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *a2[2], *a, *b;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tint curr, shift;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ta2[0] = array;\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ta2[1] = temp? temp : (type_t*)malloc(sizeof(type_t) * n);\t\t\\\n-\t\tfor (curr = 0, shift = 0; (1ul<<shift) < n; ++shift) {\t\t\t\\\n-\t\t\ta = a2[curr]; b = a2[1-curr];\t\t\t\t\t\t\t\t\\\n-\t\t\tif (shift == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\ttype_t *p = b, *i, *eb = a + n;\t\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = a; i < eb; i += 2) {\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (i == eb - 1) *p++ = *i;\t\t\t\t\t\t\t\\\n-\t\t\t\t\telse {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tif (__sort_lt(*(i+1), *i)) {\t\t\t\t\t\\\n-\t\t\t\t\t\t\t*p++ = *(i+1); *p++ = *i;\t\t\t\t\t\\\n-\t\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t*p++ = *i; *p++ = *(i+1);\t\t\t\t\t\\\n-\t\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tsize_t i, step = 1ul<<shift;\t\t\t\t\t\t\t\\\n-\t\t\t\tfor (i = 0; i < n; i += step<<1) {\t\t\t\t\t\t\\\n-\t\t\t\t\ttype_t *p, *j, *k, *ea, *eb;\t\t\t\t\t\t\\\n-\t\t\t\t\tif (n < i + step) {\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tea = a + n; eb = a;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tea = a + i + step;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\teb = a + (n < i + (step<<1)? n : i + (step<<1)); \\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tj = a + i; k = a + i + step; p = b + i;\t\t\t\t\\\n-\t\t\t\t\twhile (j < ea && k < eb) {\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\tif (__sort_lt(*k, *j)) *p++ = *k++;\t\t\t\t\\\n-\t\t\t\t\t\telse *p++ = *j++;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\twhile (j < ea) *p++ = *j++;\t\t\t\t\t\t\t\\\n-\t\t\t\t\twhile (k < eb) *p++ = *k++;\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tcurr = 1 - curr;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (curr == 1) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\ttype_t *p = a2[0], *i = a2[1], *eb = array + n;\t\t\t\t\\\n-\t\t\tfor (; p < eb; ++i) *p++ = *i;\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (temp == 0) free(a2[1]);\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\tvoid ks_heapadjust_##name(size_t i, size_t n, type_t l[])\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tsize_t k ='..b'\t\t\t\t\t\t\t\t\\\n-\tvoid ks_introsort_##name(size_t n, type_t a[])\t\t\t\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tint d;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tks_isort_stack_t *top, *stack;\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t rp, swap_tmp;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *s, *t, *i, *j, *k;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tif (n < 1) return;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\telse if (n == 2) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (__sort_lt(a[1], a[0])) { swap_tmp = a[0]; a[0] = a[1]; a[1] = swap_tmp; } \\\n-\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\tfor (d = 2; 1ul<<d < n; ++d);\t\t\t\t\t\t\t\t\t\\\n-\t\tstack = (ks_isort_stack_t*)malloc(sizeof(ks_isort_stack_t) * ((sizeof(size_t)*d)+2)); \\\n-\t\ttop = stack; s = a; t = a + (n-1); d <<= 1;\t\t\t\t\t\t\\\n-\t\twhile (1) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (s < t) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (--d == 0) {\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tks_combsort_##name(t - s + 1, s);\t\t\t\t\t\\\n-\t\t\t\t\tt = s;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tcontinue;\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\ti = s; j = t; k = i + ((j-i)>>1) + 1;\t\t\t\t\t\\\n-\t\t\t\tif (__sort_lt(*k, *i)) {\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (__sort_lt(*k, *j)) k = j;\t\t\t\t\t\t\\\n-\t\t\t\t} else k = __sort_lt(*j, *i)? i : j;\t\t\t\t\t\\\n-\t\t\t\trp = *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (k != t) { swap_tmp = *k; *k = *t; *t = swap_tmp; }\t\\\n-\t\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tdo ++i; while (__sort_lt(*i, rp));\t\t\t\t\t\\\n-\t\t\t\t\tdo --j; while (i <= j && __sort_lt(rp, *j));\t\t\\\n-\t\t\t\t\tif (j <= i) break;\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tswap_tmp = *i; *i = *j; *j = swap_tmp;\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tswap_tmp = *i; *i = *t; *t = swap_tmp;\t\t\t\t\t\\\n-\t\t\t\tif (i-s > t-i) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (i-s > 16) { top->left = s; top->right = i-1; top->depth = d; ++top; } \\\n-\t\t\t\t\ts = t-i > 16? i+1 : t;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tif (t-i > 16) { top->left = i+1; top->right = t; top->depth = d; ++top; } \\\n-\t\t\t\t\tt = i-s > 16? i-1 : s;\t\t\t\t\t\t\t\t\\\n-\t\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t} else {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (top == stack) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\tfree(stack);\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t\t__ks_insertsort_##name(a, a+n);\t\t\t\t\t\t\\\n-\t\t\t\t\treturn;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\t} else { --top; s = (type_t*)top->left; t = (type_t*)top->right; d = top->depth; } \\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t/* This function is adapted from: http://ndevilla.free.fr/median/ */ \\\n-\t/* 0 <= kk < n */\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\ttype_t ks_ksmall_##name(size_t n, type_t arr[], size_t kk)\t\t\t\\\n-\t{\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\ttype_t *low, *high, *k, *ll, *hh, *mid;\t\t\t\t\t\t\t\\\n-\t\tlow = arr; high = arr + n - 1; k = arr + kk;\t\t\t\t\t\\\n-\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (high <= low) return *k;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (high == low + 1) {\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n-\t\t\t\treturn *k;\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tmid = low + (high - low) / 2;\t\t\t\t\t\t\t\t\\\n-\t\t\tif (__sort_lt(*high, *mid)) KSORT_SWAP(type_t, *mid, *high); \\\n-\t\t\tif (__sort_lt(*high, *low)) KSORT_SWAP(type_t, *low, *high); \\\n-\t\t\tif (__sort_lt(*low, *mid)) KSORT_SWAP(type_t, *mid, *low);\t\\\n-\t\t\tKSORT_SWAP(type_t, *mid, *(low+1));\t\t\t\t\t\t\t\\\n-\t\t\tll = low + 1; hh = high;\t\t\t\t\t\t\t\t\t\\\n-\t\t\tfor (;;) {\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tdo ++ll; while (__sort_lt(*ll, *low));\t\t\t\t\t\\\n-\t\t\t\tdo --hh; while (__sort_lt(*low, *hh));\t\t\t\t\t\\\n-\t\t\t\tif (hh < ll) break;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\t\tKSORT_SWAP(type_t, *ll, *hh);\t\t\t\t\t\t\t\\\n-\t\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tKSORT_SWAP(type_t, *low, *hh);\t\t\t\t\t\t\t\t\\\n-\t\t\tif (hh <= k) low = ll;\t\t\t\t\t\t\t\t\t\t\\\n-\t\t\tif (hh >= k) high = hh - 1;\t\t\t\t\t\t\t\t\t\\\n-\t\t}\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\\\n-\t}\n-\n-#define ks_mergesort(name, n, a, t) ks_mergesort_##name(n, a, t)\n-#define ks_introsort(name, n, a) ks_introsort_##name(n, a)\n-#define ks_combsort(name, n, a) ks_combsort_##name(n, a)\n-#define ks_heapsort(name, n, a) ks_heapsort_##name(n, a)\n-#define ks_heapmake(name, n, a) ks_heapmake_##name(n, a)\n-#define ks_heapadjust(name, i, n, a) ks_heapadjust_##name(i, n, a)\n-#define ks_ksmall(name, n, a, k) ks_ksmall_##name(n, a, k)\n-\n-#define ks_lt_generic(a, b) ((a) < (b))\n-#define ks_lt_str(a, b) (strcmp((a), (b)) < 0)\n-\n-typedef const char *ksstr_t;\n-\n-#define KSORT_INIT_GENERIC(type_t) KSORT_INIT(type_t, type_t, ks_lt_generic)\n-#define KSORT_INIT_STR KSORT_INIT(str, ksstr_t, ks_lt_str)\n-\n-#endif\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/kstring.c --- a/chimerascan/pysam/tabix/kstring.c Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,165 +0,0 @@ -#include <stdarg.h> -#include <stdio.h> -#include <ctype.h> -#include <string.h> -#include <stdint.h> -#include "kstring.h" - -int ksprintf(kstring_t *s, const char *fmt, ...) -{ - va_list ap; - int l; - va_start(ap, fmt); - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); // This line does not work with glibc 2.0. See `man snprintf'. - va_end(ap); - if (l + 1 > s->m - s->l) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - va_start(ap, fmt); - l = vsnprintf(s->s + s->l, s->m - s->l, fmt, ap); - } - va_end(ap); - s->l += l; - return l; -} - -// s MUST BE a null terminated string; l = strlen(s) -int ksplit_core(char *s, int delimiter, int *_max, int **_offsets) -{ - int i, n, max, last_char, last_start, *offsets, l; - n = 0; max = *_max; offsets = *_offsets; - l = strlen(s); - -#define __ksplit_aux do { \ - if (_offsets) { \ - s[i] = 0; \ - if (n == max) { \ - max = max? max<<1 : 2; \ - offsets = (int*)realloc(offsets, sizeof(int) * max); \ - } \ - offsets[n++] = last_start; \ - } else ++n; \ - } while (0) - - for (i = 0, last_char = last_start = 0; i <= l; ++i) { - if (delimiter == 0) { - if (isspace(s[i]) || s[i] == 0) { - if (isgraph(last_char)) __ksplit_aux; // the end of a field - } else { - if (isspace(last_char) || last_char == 0) last_start = i; - } - } else { - if (s[i] == delimiter || s[i] == 0) { - if (last_char != 0 && last_char != delimiter) __ksplit_aux; // the end of a field - } else { - if (last_char == delimiter || last_char == 0) last_start = i; - } - } - last_char = s[i]; - } - *_max = max; *_offsets = offsets; - return n; -} - -/********************** - * Boyer-Moore search * - **********************/ - -// reference: http://www-igm.univ-mlv.fr/~lecroq/string/node14.html -int *ksBM_prep(const uint8_t *pat, int m) -{ - int i, *suff, *prep, *bmGs, *bmBc; - prep = calloc(m + 256, 1); - bmGs = prep; bmBc = prep + m; - { // preBmBc() - for (i = 0; i < 256; ++i) bmBc[i] = m; - for (i = 0; i < m - 1; ++i) bmBc[pat[i]] = m - i - 1; - } - suff = calloc(m, sizeof(int)); - { // suffixes() - int f = 0, g; - suff[m - 1] = m; - g = m - 1; - for (i = m - 2; i >= 0; --i) { - if (i > g && suff[i + m - 1 - f] < i - g) - suff[i] = suff[i + m - 1 - f]; - else { - if (i < g) g = i; - f = i; - while (g >= 0 && pat[g] == pat[g + m - 1 - f]) --g; - suff[i] = f - g; - } - } - } - { // preBmGs() - int j = 0; - for (i = 0; i < m; ++i) bmGs[i] = m; - for (i = m - 1; i >= 0; --i) - if (suff[i] == i + 1) - for (; j < m - 1 - i; ++j) - if (bmGs[j] == m) - bmGs[j] = m - 1 - i; - for (i = 0; i <= m - 2; ++i) - bmGs[m - 1 - suff[i]] = m - 1 - i; - } - free(suff); - return prep; -} - -int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches) -{ - int i, j, *prep, *bmGs, *bmBc; - int *matches = 0, mm = 0, nm = 0; - prep = _prep? _prep : ksBM_prep(pat, m); - bmGs = prep; bmBc = prep + m; - j = 0; - while (j <= n - m) { - for (i = m - 1; i >= 0 && pat[i] == str[i+j]; --i); - if (i < 0) { - if (nm == mm) { - mm = mm? mm<<1 : 1; - matches = realloc(matches, mm * sizeof(int)); - } - matches[nm++] = j; - j += bmGs[0]; - } else { - int max = bmBc[str[i+j]] - m + 1 + i; - if (max < bmGs[i]) max = bmGs[i]; - j += max; - } - } - *n_matches = nm; - if (_prep == 0) free(prep); - return matches; -} - -#ifdef KSTRING_MAIN -#include <stdio.h> -int main() -{ - kstring_t *s; - int *fields, n, i; - s = (kstring_t*)calloc(1, sizeof(kstring_t)); - // test ksprintf() - ksprintf(s, " abcdefg: %d ", 100); - printf("'%s'\n", s->s); - // test ksplit() - fields = ksplit(s, 0, &n); - for (i = 0; i < n; ++i) - printf("field[%d] = '%s'\n", i, s->s + fields[i]); - free(s); - - { - static char *str = "abcdefgcdg"; - static char *pat = "cd"; - int n, *matches; - matches = ksBM_search(str, strlen(str), pat, strlen(pat), 0, &n); - printf("%d: \n", n); - for (i = 0; i < n; ++i) - printf("- %d\n", matches[i]); - free(matches); - } - return 0; -} -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/kstring.h --- a/chimerascan/pysam/tabix/kstring.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,68 +0,0 @@ -#ifndef KSTRING_H -#define KSTRING_H - -#include <stdlib.h> -#include <string.h> -#include <stdint.h> - -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -#ifndef KSTRING_T -#define KSTRING_T kstring_t -typedef struct __kstring_t { - size_t l, m; - char *s; -} kstring_t; -#endif - -int ksprintf(kstring_t *s, const char *fmt, ...); -int ksplit_core(char *s, int delimiter, int *_max, int **_offsets); - -// calculate the auxiliary array, allocated by calloc() -int *ksBM_prep(const uint8_t *pat, int m); - -/* Search pat in str and returned the list of matches. The size of the - * list is returned as n_matches. _prep is the array returned by - * ksBM_prep(). If it is a NULL pointer, ksBM_prep() will be called. */ -int *ksBM_search(const uint8_t *str, int n, const uint8_t *pat, int m, int *_prep, int *n_matches); - -static inline int kputsn(const char *p, int l, kstring_t *s) -{ - if (s->l + l + 1 >= s->m) { - s->m = s->l + l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - strncpy(s->s + s->l, p, l); - s->l += l; - s->s[s->l] = 0; - return l; -} - -static inline int kputs(const char *p, kstring_t *s) -{ - return kputsn(p, strlen(p), s); -} - -static inline int kputc(int c, kstring_t *s) -{ - if (s->l + 1 >= s->m) { - s->m = s->l + 2; - kroundup32(s->m); - s->s = (char*)realloc(s->s, s->m); - } - s->s[s->l++] = c; - s->s[s->l] = 0; - return c; -} - -static inline int *ksplit(kstring_t *s, int delimiter, int *n) -{ - int max = 0, *offsets = 0; - *n = ksplit_core(s->s, delimiter, &max, &offsets); - return offsets; -} - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tabix/tabix.h --- a/chimerascan/pysam/tabix/tabix.h Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,137 +0,0 @@ -/* The MIT License - - Copyright (c) 2009 Genome Research Ltd (GRL), 2010 Broad Institute - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be - included in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE. -*/ - -/* Contact: Heng Li <lh3@live.co.uk> */ - -#ifndef __TABIDX_H -#define __TABIDX_H - -#include <stdint.h> -#include "kstring.h" -#include "bgzf.h" - -#define TI_PRESET_GENERIC 0 -#define TI_PRESET_SAM 1 -#define TI_PRESET_VCF 2 - -#define TI_FLAG_UCSC 0x10000 - -typedef int (*ti_fetch_f)(int l, const char *s, void *data); - -struct __ti_index_t; -typedef struct __ti_index_t ti_index_t; - -struct __ti_iter_t; -typedef struct __ti_iter_t *ti_iter_t; - -typedef struct { - BGZF *fp; - ti_index_t *idx; - char *fn, *fnidx; -} tabix_t; - -typedef struct { - int32_t preset; - int32_t sc, bc, ec; // seq col., beg col. and end col. - int32_t meta_char, line_skip; -} ti_conf_t; - -extern ti_conf_t ti_conf_gff, ti_conf_bed, ti_conf_psltbl, ti_conf_vcf, ti_conf_sam; // preset - -#ifdef __cplusplus -extern "C" { -#endif - - /******************* - * High-level APIs * - *******************/ - - tabix_t *ti_open(const char *fn, const char *fnidx); - int ti_lazy_index_load(tabix_t *t); - void ti_close(tabix_t *t); - ti_iter_t ti_query(tabix_t *t, const char *name, int beg, int end); - ti_iter_t ti_queryi(tabix_t *t, int tid, int beg, int end); - ti_iter_t ti_querys(tabix_t *t, const char *reg); - const char *ti_read(tabix_t *t, ti_iter_t iter, int *len); - - /* Destroy the iterator */ - void ti_iter_destroy(ti_iter_t iter); - - /* Get the list of sequence names. Each "char*" pointer points to a - * internal member of the index, so DO NOT modify the returned - * pointer; otherwise the index will be corrupted. The returned - * pointer should be freed by a single free() call by the routine - * calling this function. The number of sequences is returned at *n. */ - const char **ti_seqname(const ti_index_t *idx, int *n); - - /****************** - * Low-level APIs * - ******************/ - - /* Build the index for file <fn>. File <fn>.tbi will be generated - * and overwrite the file of the same name. Return -1 on failure. */ - int ti_index_build(const char *fn, const ti_conf_t *conf); - - /* Load the index from file <fn>.tbi. If <fn> is a URL and the index - * file is not in the working directory, <fn>.tbi will be - * downloaded. Return NULL on failure. */ - ti_index_t *ti_index_load(const char *fn); - - ti_index_t *ti_index_load_local(const char *fnidx); - - /* Destroy the index */ - void ti_index_destroy(ti_index_t *idx); - - /* Parse a region like: chr2, chr2:100, chr2:100-200. Return -1 on failure. */ - int ti_parse_region(const ti_index_t *idx, const char *str, int *tid, int *begin, int *end); - - int ti_get_tid(const ti_index_t *idx, const char *name); - - /* Get the iterator pointing to the first record at the current file - * position. If the file is just openned, the iterator points to the - * first record in the file. */ - ti_iter_t ti_iter_first(void); - - /* Get the iterator pointing to the first record in region tid:beg-end */ - ti_iter_t ti_iter_query(const ti_index_t *idx, int tid, int beg, int end); - - /* Get the data line pointed by the iterator and iterate to the next record. */ - const char *ti_iter_read(BGZF *fp, ti_iter_t iter, int *len); - - /******************* - * Deprecated APIs * - *******************/ - - /* The callback version for random access */ - int ti_fetch(BGZF *fp, const ti_index_t *idx, int tid, int beg, int end, void *data, ti_fetch_f func); - - /* Read one line. */ - int ti_readline(BGZF *fp, kstring_t *str); - -#ifdef __cplusplus -} -#endif - -#endif |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/00README.txt --- a/chimerascan/pysam/tests/00README.txt Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,32 +0,0 @@ -File ex1.fa contains two sequences cut from the human genome -build36. They were exatracted with command: - - samtools faidx human_b36.fa 2:2043966-2045540 20:67967-69550 - -Sequence names were changed manually for simplicity. File ex1.sam.gz -contains MAQ alignments exatracted with: - - (samtools view NA18507_maq.bam 2:2044001-2045500; - samtools view NA18507_maq.bam 20:68001-69500) - -and processed with `samtools fixmate' to make it self-consistent as a -standalone alignment. - -To try samtools, you may run the following commands: - - samtools faidx ex1.fa # index the reference FASTA - samtools import ex1.fa.fai ex1.sam.gz ex1.bam # SAM->BAM - samtools index ex1.bam # index BAM - samtools tview ex1.bam ex1.fa # view alignment - samtools pileup -cf ex1.fa ex1.bam # pileup and consensus - samtools pileup -cf ex1.fa -t ex1.fa.fai ex1.sam.gz - -In order for the script pysam_test.py to work, you will need pysam -in your PYTHONPATH. - -In order for the script example.py to work, you will need pysam -in your PYTHONPATH and run - - make all - -beforehand. |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/Makefile --- a/chimerascan/pysam/tests/Makefile Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,33 +0,0 @@ -all: ex1.glf ex1.pileup.gz ex1.bam.bai ex1.glfview.gz \ - ex2.sam.gz ex2.sam ex1.sam \ - ex2.bam \ - ex3.bam ex3.bam.bai \ - ex4.bam ex4.bam.bai \ - ex5.bam ex5.bam.bai \ - ex6.bam \ - ex8.bam - -ex2.sam.gz: ex1.bam ex1.bam.bai - samtools view -h ex1.bam | gzip > ex2.sam.gz - -%.bam: %.sam ex1.fa.fai - samtools import ex1.fa.fai $< $@ - -%.sam: %.sam.gz - gunzip < $< > $@ - -ex1.fa.fai:ex1.fa - samtools faidx ex1.fa -ex1.bam:ex1.sam.gz ex1.fa.fai - samtools import ex1.fa.fai ex1.sam.gz ex1.bam -%.bam.bai:%.bam - samtools index $< -ex1.pileup.gz:ex1.bam ex1.fa - samtools pileup -cf ex1.fa ex1.bam | gzip > ex1.pileup.gz -ex1.glf:ex1.bam ex1.fa - samtools pileup -gf ex1.fa ex1.bam > ex1.glf -ex1.glfview.gz:ex1.glf - samtools glfview ex1.glf | gzip > ex1.glfview.gz - -clean: - rm -fr *.bam *.bai *.glf* *.fai *.pileup* *~ calDepth *.dSYM pysam_*.sam ex2.sam ex2.sam.gz ex1.sam |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/ex1.fa --- a/chimerascan/pysam/tests/ex1.fa Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,56 +0,0 @@ ->chr1 -CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCT -GTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCAC -GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAG -TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTC -AGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAA -CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACC -AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT -CTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA -ATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGC -AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAAC -AACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACAC -ATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATAC -CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCT -TTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT -TCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT -GCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT -ACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGA -ACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTG -TGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTA -CGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAG -TCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC -TTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTC -TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTG -TTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGG -AGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATA -TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTC -TCCCTCGTCTTCTTA ->chr2 -TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAG -CTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT -TATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTT -CAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA -AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT -AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC -ATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAG -GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCAT -CAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATT -TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTA -AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA -ATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT -TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATA -AAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACC -TCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATA -GATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATT -AATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA -AATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGT -AAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATAT -AACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAAT -ACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGAT -GATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG -CGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATA -GCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAA -AAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAA -TTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGC -CAGAAAAAAATATTTACAGTAACT |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/ex1.sam.gz |
b |
Binary file chimerascan/pysam/tests/ex1.sam.gz has changed |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/ex3.sam --- a/chimerascan/pysam/tests/ex3.sam Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,13 +0,0 @@ -@HD VN:1.0 -@SQ SN:chr1 LN:1575 -@SQ SN:chr2 LN:1584 -@RG ID:L1 PU:SC_1_10 LB:SC_1 SM:NA12891 CN:name:with:colon -@RG ID:L2 PU:SC_2_12 LB:SC_2 SM:NA12891 CN:name:with:colon -@PG ID:P1 VN:1.0 -@PG ID:P2 VN:1.1 -@CO this is a comment -@CO this is another comment -read_28833_29006_6945 99 chr1 33 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 PG:Z:P1 XT:A:U -read_28701_28881_323b 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 PG:Z:P2 XT:A:R -read_28701_28881_323c 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< -test_clipped1 99 chr2 997 20 4S6M1D20M5S = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 PG:Z:P1 XT:A:U |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/ex4.sam --- a/chimerascan/pysam/tests/ex4.sam Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,9 +0,0 @@ -@HD VN:1.0 -@SQ SN:chr1 LN:100 -@SQ SN:chr2 LN:100 -@RG ID:L1 PU:SC_1_10 LB:SC_1 SM:NA12891 -@RG ID:L2 PU:SC_2_12 LB:SC_2 SM:NA12891 -@CO this is a comment -@CO this is another comment -read_28833_29006_6945 99 chr1 21 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 -read_28701_28881_323b 147 chr2 21 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/ex5.sam --- a/chimerascan/pysam/tests/ex5.sam Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,5 +0,0 @@ -@HD VN:1.0 -@SQ SN:chr1 LN:100 -@SQ SN:chr2 LN:100 -read_28833_29006_6945 0 * * * * * 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< -read_28701_28881_323b 0 * * * * * 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/ex6.sam --- a/chimerascan/pysam/tests/ex6.sam Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,5 +0,0 @@ -@HD VN:1.0 -@SQ SN:chr1 LN:1575 -@SQ SN:chr2 LN:1584 -read_28833_29006_6945 99 chr1 33 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 -read_28701_28881_323b 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/ex7.sam --- a/chimerascan/pysam/tests/ex7.sam Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,2 +0,0 @@ -read_28833_29006_6945 99 chr1 33 20 10M1D25M = 200 167 AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG <<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<< NM:i:1 RG:Z:L1 PG:Z:P1 XT:A:U -read_28701_28881_323b 147 chr2 88 30 35M = 500 412 ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA <<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<< MF:i:18 RG:Z:L2 PG:Z:P2 XT:A:R |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/ex8.sam --- a/chimerascan/pysam/tests/ex8.sam Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,3 +0,0 @@ -@HD VN:1.0 -@SQ SN:2 LN:48297693 -GJP00TM04CAQ5W 0 2 38297693 60 45H51M1D13M1D12M1D9M2D5M1D7M4D2M1I6M1D28M1D5M1D2M1D18M55H * 0 0 CATGAAGAACCGCTGGGTATGGAGCACACCTCACCTGATGGACAGTTGATTATGCTCACCTTAACGCTAATTGAGAGCAGCACAAGAGGACTGGAAACTAGAATTTACTCCTCATCTCCGAAGATGTGAATATTCTAAATTCAGCTTGCCTCTTGCTTC IID7757111/=;?///:D>777;EEGAAAEEIHHIIIIIIIIIIIIIIBBBIIIIH==<<<DDGEEE;<<<A><<<DEDDA>>>D?1112544556::03---//25.22=;DD?;;;>BDDDEEEGGGA<888<BAA888<GGGGGEB?9::DD551 NM:i:15 MD:Z:51^T13^A12^A9^AA5^A7^AAAA8^T28^T5^A2^T18 RG:Z:GJP00TM04 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/example.gtf.gz |
b |
Binary file chimerascan/pysam/tests/example.gtf.gz has changed |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/example.gtf.gz.tbi |
b |
Binary file chimerascan/pysam/tests/example.gtf.gz.tbi has changed |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/example.py --- a/chimerascan/pysam/tests/example.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,121 +0,0 @@ -import sys -import pysam - -samfile = pysam.Samfile( "ex1.bam", "rb" ) - -print "###################" -# check different ways to iterate -print len(list(samfile.fetch())) -print len(list(samfile.fetch( "chr1", 10, 200 ))) -print len(list(samfile.fetch( region="chr1:10-200" ))) -print len(list(samfile.fetch( "chr1" ))) -print len(list(samfile.fetch( region="chr1"))) -print len(list(samfile.fetch( "chr2" ))) -print len(list(samfile.fetch( region="chr2"))) -print len(list(samfile.fetch())) -print len(list(samfile.fetch( "chr1" ))) -print len(list(samfile.fetch( region="chr1"))) -print len(list(samfile.fetch())) - -print len(list(samfile.pileup( "chr1", 10, 200 ))) -print len(list(samfile.pileup( region="chr1:10-200" ))) -print len(list(samfile.pileup( "chr1" ))) -print len(list(samfile.pileup( region="chr1"))) -print len(list(samfile.pileup( "chr2" ))) -print len(list(samfile.pileup( region="chr2"))) -print len(list(samfile.pileup())) -print len(list(samfile.pileup())) - -print "########### fetch with callback ################" -def my_fetch_callback( alignment ): print str(alignment) -samfile.fetch( region="chr1:10-200", callback=my_fetch_callback ) - -print "########## pileup with callback ################" -def my_pileup_callback( column ): print str(column) -samfile.pileup( region="chr1:10-200", callback=my_pileup_callback ) - -print "##########iterator row #################" -iter = pysam.IteratorRow( samfile, 0, 10, 200) -for x in iter: print str(x) - -print "##########iterator col #################" -iter = pysam.IteratorColumn( samfile, 0, 10, 200 ) -for x in iter: print str(x) - -print "#########row all##################" -iter = pysam.IteratorRowAll( samfile ) -for x in iter: print str(x) - - -print "###################" - -class Counter: - mCounts = 0 - def __call__(self, alignment): - self.mCounts += 1 - -c = Counter() -samfile.fetch( "chr1:10-200", c ) -print "counts=", c.mCounts - -sys.exit(0) -print samfile.getTarget( 0 ) -print samfile.getTarget( 1 ) - -for p in pysam.pileup( "-c", "ex1.bam" ): - print str(p) - -print pysam.pileup.getMessages() - -for p in pysam.pileup( "-c", "ex1.bam", raw=True ): - print str(p), - - - -print "###########################" - -samfile = pysam.Samfile( "ex2.sam.gz", "r" ) - -print "num targets=", samfile.getNumTargets() - -iter = pysam.IteratorRowAll( samfile ) -for x in iter: print str(x) - -samfile.close() - -print "###########################" -samfile = pysam.Samfile( "ex2.sam.gz", "r" ) -def my_fetch_callback( alignment ): - print str(alignment) - -try: - samfile.fetch( "chr1:10-20", my_fetch_callback ) -except AssertionError: - print "caught fetch exception" - -samfile.close() - -print "###########################" -samfile = pysam.Samfile( "ex2.sam.gz", "r" ) -def my_pileup_callback( pileups ): - print str(pileups) -try: - samfile.pileup( "chr1:10-20", my_pileup_callback ) -except NotImplementedError: - print "caught pileup exception" - -# playing arount with headers -samfile = pysam.Samfile( "ex3.sam", "r" ) -print samfile.targets -print samfile.lengths -print samfile.text -print samdile.header -header = samfile.header -samfile.close() - -header["HD"]["SO"] = "unsorted" -outfile = pysam.Samfile( "out.sam", "wh", - header = header ) - -outfile.close() - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/pysam_test.py --- a/chimerascan/pysam/tests/pysam_test.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1008 +0,0 @@\n-#!/usr/bin/env python\n-\'\'\'unit testing code for pysam.\n-\n-Execute in the :file:`tests` directory as it requires the Makefile\n-and data files located there.\n-\'\'\'\n-\n-import pysam\n-import unittest\n-import os, re\n-import itertools\n-import subprocess\n-import shutil\n-\n-\n-def checkBinaryEqual( filename1, filename2 ):\n- \'\'\'return true if the two files are binary equal.\'\'\'\n- if os.path.getsize( filename1 ) != os.path.getsize( filename2 ):\n- return False\n-\n- infile1 = open(filename1, "rb")\n- infile2 = open(filename2, "rb")\n-\n- def chariter( infile ):\n- while 1:\n- c = infile.read(1)\n- if c == "": break\n- yield c\n-\n- found = False\n- for c1,c2 in itertools.izip( chariter( infile1), chariter( infile2) ):\n- if c1 != c2: break\n- else:\n- found = True\n-\n- infile1.close()\n- infile2.close()\n- return found\n-\n-def runSamtools( cmd ):\n- \'\'\'run a samtools command\'\'\'\n-\n- try:\n- retcode = subprocess.call(cmd, shell=True)\n- if retcode < 0:\n- print >>sys.stderr, "Child was terminated by signal", -retcode\n- except OSError, e:\n- print >>sys.stderr, "Execution failed:", e\n-\n-def getSamtoolsVersion():\n- \'\'\'return samtools version\'\'\'\n-\n- pipe = subprocess.Popen("samtools", shell=True, stderr=subprocess.PIPE).stderr\n- lines = "".join(pipe.readlines())\n- return re.search( "Version:\\s+(\\S+)", lines).groups()[0]\n-\n-class BinaryTest(unittest.TestCase):\n- \'\'\'test samtools command line commands and compare\n- against pysam commands.\n-\n- Tests fail, if the output is not binary identical.\n- \'\'\'\n-\n- first_time = True\n-\n- # a list of commands to test\n- mCommands = \\\n- { "faidx" : \\\n- ( \n- ("ex1.fa.fai", "samtools faidx ex1.fa"), \n- ("pysam_ex1.fa.fai", (pysam.faidx, "ex1.fa") ),\n- ),\n- "import" :\n- (\n- ("ex1.bam", "samtools import ex1.fa.fai ex1.sam.gz ex1.bam" ),\n- ("pysam_ex1.bam", (pysam.samimport, "ex1.fa.fai ex1.sam.gz pysam_ex1.bam") ),\n- ),\n- "index":\n- (\n- ("ex1.bam.bai", "samtools index ex1.bam" ),\n- ("pysam_ex1.bam.bai", (pysam.index, "pysam_ex1.bam" ) ),\n- ),\n- "pileup1" :\n- (\n- ("ex1.pileup", "samtools pileup -cf ex1.fa ex1.bam > ex1.pileup" ),\n- ("pysam_ex1.pileup", (pysam.pileup, "-c -f ex1.fa ex1.bam" ) )\n- ),\n- "pileup2" :\n- (\n- ("ex1.glf", "samtools pileup -gf ex1.fa ex1.bam > ex1.glf" ),\n- ("pysam_ex1.glf", (pysam.pileup, "-g -f ex1.fa ex1.bam" ) )\n- ),\n- "glfview" :\n- (\n- ("ex1.glfview", "samtools glfview ex1.glf > ex1.glfview"),\n- ("pysam_ex1.glfview", (pysam.glfview, "ex1.glf" ) ),\n- ),\n- "view" :\n- (\n- ("ex1.view", "samtools view ex1.bam > ex1.view"),\n- ("pysam_ex1.view", (pysam.view, "ex1.bam" ) ),\n- ),\n- "view2" :\n- (\n- ("ex1.view", "samtools view -bT ex1.fa -o ex1.view2 ex1.sam"),\n- # note that -o ex1.view2 throws exception.\n- ("pysam_ex1.view", (pysam.view, "-bT ex1.fa -oex1.view2 ex1.sam" ) ),\n- ),\n- }\n-\n- # some tests depend on others. The order specifies in which order\n- # the samtools commands are executed.\n- mOrder = (\'faidx\', \'import\', \'index\', \'pileup1\', \'pileup2\', \'glfview\', \'view\', \'view2\' )\n-\n- def setUp( self ):\n- \'\'\'setup tests. \n-\n- For setup, all commands will be run before the first test is\n- executed. Individual tests will then just compare the output\n- files.\n- \'\'\'\n- if BinaryTest.first_time:\n- # copy the source \n- shutil.copy( "ex1.fa", "pysam_ex1.fa" )\n-\n- '..b', self.reads):\n- self.checkFieldEqual( other, denovo )\n- self.assertEqual( other.compare( denovo ), 0 )\n-\n- def testSAMPerRead( self ):\n- \'\'\'check if individual reads are binary equal.\'\'\'\n- infile = pysam.Samfile( self.samfile, "r")\n-\n- others = list(infile)\n- for denovo, other in zip( others, self.reads):\n- self.checkFieldEqual( other, denovo )\n- self.assertEqual( other.compare( denovo), 0 )\n- \n- def testBAMWholeFile( self ):\n- \n- tmpfilename = "tmp_%i.bam" % id(self)\n-\n- outfile = pysam.Samfile( tmpfilename, "wb", header = self.header )\n-\n- for x in self.reads: outfile.write( x )\n- outfile.close()\n- \n- self.assertTrue( checkBinaryEqual( tmpfilename, self.bamfile ),\n- "mismatch when construction BAM file, see %s %s" % (tmpfilename, self.bamfile))\n- \n- os.unlink( tmpfilename )\n-\n-\n-class TestDoubleFetch(unittest.TestCase):\n- \'\'\'check if two iterators on the same bamfile are independent.\'\'\'\n- \n- def testDoubleFetch( self ):\n-\n- samfile1 = pysam.Samfile(\'ex1.bam\', \'rb\')\n-\n- for a,b in zip(samfile1.fetch(), samfile1.fetch()):\n- self.assertEqual( a.compare( b ), 0 )\n-\n- def testDoubleFetchWithRegion( self ):\n-\n- samfile1 = pysam.Samfile(\'ex1.bam\', \'rb\')\n- chr, start, stop = \'chr1\', 200, 3000000\n- self.assertTrue(len(list(samfile1.fetch ( chr, start, stop))) > 0) #just making sure the test has something to catch\n-\n- for a,b in zip(samfile1.fetch( chr, start, stop), samfile1.fetch( chr, start, stop)):\n- self.assertEqual( a.compare( b ), 0 ) \n-\n- def testDoubleFetchUntilEOF( self ):\n-\n- samfile1 = pysam.Samfile(\'ex1.bam\', \'rb\')\n-\n- for a,b in zip(samfile1.fetch( until_eof = True), \n- samfile1.fetch( until_eof = True )):\n- self.assertEqual( a.compare( b), 0 )\n-\n-class TestRemoteFileFTP(unittest.TestCase):\n- \'\'\'test remote access.\n-\n- \'\'\'\n-\n- # Need to find an ftp server without password on standard\n- # port.\n-\n- url = "ftp://ftp.sanger.ac.uk/pub/rd/humanSequences/CV.bam"\n- region = "1:1-1000"\n-\n- def testFTPView( self ):\n- result = pysam.view( self.url, self.region )\n- self.assertEqual( len(result), 36 )\n- \n- def testFTPFetch( self ):\n- samfile = pysam.Samfile(self.url, "rb") \n- result = list(samfile.fetch( region = self.region ))\n- self.assertEqual( len(result), 36 )\n-\n-class TestRemoteFileHTTP( unittest.TestCase):\n-\n- url = "http://genserv.anat.ox.ac.uk/downloads/pysam/test/ex1.bam"\n- region = "chr1:1-1000"\n- local = "ex1.bam"\n-\n- def testView( self ):\n- self.assertRaises( pysam.SamtoolsError, pysam.view, self.url, self.region )\n- \n- def testFetch( self ):\n- samfile = pysam.Samfile(self.url, "rb") \n- result = list(samfile.fetch( region = self.region ))\n- samfile_local = pysam.Samfile(self.local, "rb") \n- ref = list(samfile_local.fetch( region = self.region ))\n-\n- self.assertEqual( len(ref), len(result) )\n- for x, y in zip(result, ref):\n- self.assertEqual( x.compare( y ), 0 )\n-\n- def testFetchAll( self ):\n- samfile = pysam.Samfile(self.url, "rb") \n- result = list(samfile.fetch())\n- samfile_local = pysam.Samfile(self.local, "rb") \n- ref = list(samfile_local.fetch() )\n-\n- self.assertEqual( len(ref), len(result) )\n- for x, y in zip(result, ref):\n- self.assertEqual( x.compare( y ), 0 )\n-\n-\n-# TODOS\n-# 1. finish testing all properties within pileup objects\n-# 2. check exceptions and bad input problems (missing files, optional fields that aren\'t present, etc...)\n-\n-if __name__ == "__main__":\n- # build data files\n- print "building data files"\n- subprocess.call( "make", shell=True)\n- print "starting tests"\n- unittest.main()\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/segfault_tests.py --- a/chimerascan/pysam/tests/segfault_tests.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,37 +0,0 @@ -#!/usr/bin/env python -'''unit testing code for pysam.''' - -import pysam -import unittest -import os -import itertools -import subprocess -import shutil - -class TestExceptions(unittest.TestCase): - - def setUp(self): - self.samfile=pysam.Samfile( "ex1.bam","rb" ) - - def testOutOfRangeNegativeNewFormat(self): - self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, -10 ) - self.assertRaises( ValueError, self.samfile.fetch, "chr1", 5, 0 ) - self.assertRaises( ValueError, self.samfile.fetch, "chr1", -5, -10 ) - - def testOutOfRangeNegativeOldFormat(self): - self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5-10" ) - self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5-0" ) - self.assertRaises( ValueError, self.samfile.fetch, "chr1:-5--10" ) - - def testOutOfRangeLargeNewFormat(self): - self.assertRaises( ValueError, self.samfile.fetch, "chr1", 99999999999999999, 999999999999999999 ) - - def testOutOfRangeLargeOldFormat(self): - self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" ) - - def tearDown(self): - self.samfile.close() - -if __name__ == "__main__": - unittest.main() - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/tests/tabix_test.py --- a/chimerascan/pysam/tests/tabix_test.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,225 +0,0 @@ -#!/usr/bin/env python -'''unit testing code for pysam. - -Execute in the :file:`tests` directory as it requires the Makefile -and data files located there. -''' - -import sys, os, shutil, gzip -import pysam -import unittest -import itertools -import subprocess - -def checkBinaryEqual( filename1, filename2 ): - '''return true if the two files are binary equal.''' - if os.path.getsize( filename1 ) != os.path.getsize( filename2 ): - return False - - infile1 = open(filename1, "rb") - infile2 = open(filename2, "rb") - - def chariter( infile ): - while 1: - c = infile.read(1) - if c == "": break - yield c - - found = False - for c1,c2 in itertools.izip( chariter( infile1), chariter( infile2) ): - if c1 != c2: break - else: - found = True - - infile1.close() - infile2.close() - return found - -class TestIndexing(unittest.TestCase): - filename = "example.gtf.gz" - filename_idx = "example.gtf.gz.tbi" - - def setUp( self ): - - self.tmpfilename = "tmp_%i.gtf.gz" % id(self) - shutil.copyfile( self.filename, self.tmpfilename ) - - def testIndexPreset( self ): - '''test indexing via preset.''' - - pysam.tabix_index( self.tmpfilename, preset = "gff" ) - checkBinaryEqual( self.tmpfilename + ".tbi", self.filename_idx ) - - def tearDown( self ): - os.unlink( self.tmpfilename ) - os.unlink( self.tmpfilename + ".tbi" ) - -class TestCompression(unittest.TestCase): - filename = "example.gtf.gz" - filename_idx = "example.gtf.gz.tbi" - - def setUp( self ): - - self.tmpfilename = "tmp_%i.gtf" % id(self) - infile = gzip.open( self.filename, "r") - outfile = open( self.tmpfilename, "w" ) - outfile.write( "".join(infile.readlines()) ) - outfile.close() - infile.close() - - def testIndexPreset( self ): - '''test indexing via preset.''' - - pysam.tabix_index( self.tmpfilename, preset = "gff" ) - checkBinaryEqual( self.tmpfilename + ".gz", self.filename ) - checkBinaryEqual( self.tmpfilename + ".gz.tbi", self.filename_idx ) - - def tearDown( self ): - os.unlink( self.tmpfilename + ".gz" ) - os.unlink( self.tmpfilename + ".gz.tbi" ) - -class TestIteration( unittest.TestCase ): - - filename = "example.gtf.gz" - - def setUp( self ): - - self.tabix = pysam.Tabixfile( self.filename ) - lines = gzip.open(self.filename).readlines() - # creates index of contig, start, end, adds content without newline. - self.compare = [ - (x[0][0], int(x[0][3]), int(x[0][4]), x[1]) - for x in [ (y.split("\t"), y[:-1]) for y in lines ] ] - - def getSubset( self, contig = None, start = None, end = None): - - if contig == None: - # all lines - subset = [ x[3] for x in self.compare ] - else: - if start != None and end == None: - # until end of contig - subset = [ x[3] for x in self.compare if x[0] == contig and x[2] > start ] - elif start == None and end != None: - # from start of contig - subset = [ x[3] for x in self.compare if x[0] == contig and x[1] <= end ] - elif start == None and end == None: - subset = [ x[3] for x in self.compare if x[0] == contig ] - else: - # all within interval - subset = [ x[3] for x in self.compare if x[0] == contig and \ - min( x[2], end) - max(x[1], start) > 0 ] - - return subset - - def checkPairwise( self, result, ref ): - - result.sort() - ref.sort() - - a = set(result) - b = set(ref) - - self.assertEqual( len(result), len(ref), - "unexpected number of results: %i, expected %i, differences are %s: %s" \ - % (len(result), len(ref), - a.difference(b), - b.difference(a) )) - - for x, d in enumerate( zip( result, ref )): - - self.assertEqual( d[0], d[1], - "unexpected results in pair %i: '%s', expected '%s'" % \ - (x, - d[0], - d[1]) ) - - - def testAll( self ): - result = list(self.tabix.fetch()) - ref = self.getSubset( ) - self.checkPairwise( result, ref ) - - def testPerContig( self ): - for contig in ("chr1", "chr2", "chr1", "chr2" ): - result = list(self.tabix.fetch( contig )) - ref = self.getSubset( contig ) - self.checkPairwise( result, ref ) - - def testPerContigToEnd( self ): - - end = None - for contig in ("chr1", "chr2", "chr1", "chr2" ): - for start in range( 0, 200000, 1000): - result = list(self.tabix.fetch( contig, start, end )) - ref = self.getSubset( contig, start, end ) - self.checkPairwise( result, ref ) - - def testPerContigFromStart( self ): - - start = None - for contig in ("chr1", "chr2", "chr1", "chr2" ): - for end in range( 0, 200000, 1000): - result = list(self.tabix.fetch( contig, start, end )) - ref = self.getSubset( contig, start, end ) - self.checkPairwise( result, ref ) - - def testPerContig( self ): - - start, end = None, None - for contig in ("chr1", "chr2", "chr1", "chr2" ): - result = list(self.tabix.fetch( contig, start, end )) - ref = self.getSubset( contig, start, end ) - self.checkPairwise( result, ref ) - - def testPerInterval( self ): - - start, end = None, None - for contig in ("chr1", "chr2", "chr1", "chr2" ): - for start in range( 0, 200000, 2000): - for end in range( start, start + 2000, 500): - result = list(self.tabix.fetch( contig, start, end )) - ref = self.getSubset( contig, start, end ) - self.checkPairwise( result, ref ) - - - def testInvalidIntervals( self ): - - self.assertRaises( ValueError, self.tabix.fetch, "chr1", 0, -10) - self.assertRaises( ValueError, self.tabix.fetch, "chr1", -10, 200) - self.assertRaises( ValueError, self.tabix.fetch, "chr1", 200, 0) - self.assertRaises( ValueError, self.tabix.fetch, "chr1", -10, -20) - self.assertRaises( ValueError, self.tabix.fetch, "chrUn" ) - - def testGetContigs( self ): - self.assertEqual( sorted(self.tabix.contigs), ["chr1", "chr2"] ) - # check that contigs is read-only - self.assertRaises( AttributeError, setattr, self.tabix, "contigs", ["chr1", "chr2"] ) - -class TestParser( unittest.TestCase ): - - filename = "example.gtf.gz" - - def setUp( self ): - - self.tabix = pysam.Tabixfile( self.filename ) - self.compare = [ x[:-1].split("\t") for x in gzip.open( self.filename, "r") ] - - def testGTF( self ): - - for x, r in enumerate(self.tabix.fetch( parser = pysam.asGTF() )): - self.assertEqual( "\t".join( self.compare[x]), str(r) ) - - def testTuple( self ): - - for x, r in enumerate(self.tabix.fetch( parser = pysam.asTuple() )): - self.assertEqual( self.compare[x], list(r) ) - - self.assertEqual( len(self.compare[x]), len(r) ) - for c in range(0,len(r)): - self.assertEqual( self.compare[x][c], r[c] ) - -if __name__ == "__main__": - unittest.main() - - |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/pysam/version.py --- a/chimerascan/pysam/version.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -# pysam versioning information -__version__ = "0.3.1" -__samtools_version__ = "0.1.8" -__tabix_version__ = "0.2.1" |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/run.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/run.sh Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,8 @@ +#!/usr/bin/bash + +if [ "$(cat $1/installation_chk 2>&1)" != "true" ]; then + python $1/setup.py build && python $1/setup.py install + mv $1/chimerascan $1/chimerascan_bak + ln -s /usr/local/lib/python2.7/dist-packages/chimerascan $1 + echo true > $1/installation_chk +fi |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/setup-cython.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/setup-cython.py Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,19 @@ +''' +Created on Feb 3, 2011 + +@author: mkiyer +''' +from distutils.core import setup +from distutils.extension import Extension +from Cython.Distutils import build_ext + +# local imports +from setup import get_cython_extension_modules, setup_kwargs + +def main(): + setup(ext_modules=get_cython_extension_modules(), + cmdclass={'build_ext': build_ext}, + **setup_kwargs) + +if __name__ == '__main__': + main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/setup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/setup.py Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,108 @@ +''' +chimerascan + +Created on Jan 5, 2011 + +@author: mkiyer +''' +from distutils.core import setup +from distutils.extension import Extension + +import os +import glob + +# local imports +import chimerascan + +# ------ Setup instructions ------------------------------------------------- + +setup_kwargs = {"name": "chimerascan", + "version": chimerascan.__version__, + "description": "chimeric transcript discovery from RNA-seq", + "long_description": __doc__, + "author": "Matthew Iyer", + "author_email": "mkiyer@umich.edu", + "license": "GPL3", + "platforms": "Linux", + "url": "http://chimerascan.googlecode.com", + "packages": ["chimerascan", + "chimerascan.pysam", + "chimerascan.bx", + "chimerascan.pipeline", + "chimerascan.lib", + "chimerascan.tools"], + "package_data": {'chimerascan.tools': ['table_template.html']}, + "scripts": ["chimerascan/chimerascan_run.py", + "chimerascan/chimerascan_index.py", + "chimerascan/tools/chimerascan_html_table.py", + "chimerascan/tools/gtf_to_genepred.py", + "chimerascan/tools/make_false_positive_file.py"]} + +# ---- Extension Modules ---------------------------------------------------- + +def get_cython_extension_modules(): + # pysam - samtools + samtools = Extension("chimerascan.pysam.csamtools", # name of extension + ["chimerascan/pysam/csamtools.pyx", + "chimerascan/pysam/pysam_util.c"] +\ + glob.glob( os.path.join( "chimerascan", "pysam", "samtools", "*.c" )), + library_dirs=[], + include_dirs=[ "chimerascan/pysam/samtools", "chimerascan/pysam" ], + libraries=[ "z", ], + language="c", + define_macros = [('FILE_OFFSET_BITS','64'), + ('_USE_KNETFILE','')]) + # pysam - tabix + tabix = Extension("chimerascan.pysam.ctabix", # name of extension + ["chimerascan/pysam/ctabix.pyx" ] +\ + glob.glob(os.path.join("chimerascan", "pysam", "tabix", "*.c")), + library_dirs=[], + include_dirs=[ "chimerascan/pysam/tabix", "chimerascan/pysam" ], + libraries=[ "z", ], + language="c", + ) + # Interval clustering + bx_cluster = Extension("chimerascan.bx.cluster", + ["chimerascan/bx/cluster.pyx", "chimerascan/bx/intervalcluster.c"], + include_dirs=["chimerascan/bx"]) + # Interval intersection + bx_interval = Extension("chimerascan.bx.intersection", + ["chimerascan/bx/intersection.pyx" ]) + return [samtools, tabix, bx_cluster, bx_interval] + +def get_c_extension_modules(): + # pysam - samtools + samtools = Extension("chimerascan.pysam.csamtools", # name of extension + ["chimerascan/pysam/csamtools.c", + "chimerascan/pysam/pysam_util.c"] +\ + glob.glob( os.path.join( "chimerascan", "pysam", "samtools", "*.c" )), + library_dirs=[], + include_dirs=[ "chimerascan/pysam/samtools", "chimerascan/pysam" ], + libraries=[ "z", ], + language="c", + define_macros = [('FILE_OFFSET_BITS','64'), + ('_USE_KNETFILE','')]) + # pysam - tabix + tabix = Extension("chimerascan.pysam.ctabix", # name of extension + ["chimerascan/pysam/ctabix.c" ] +\ + glob.glob(os.path.join("chimerascan", "pysam", "tabix", "*.c")), + library_dirs=[], + include_dirs=[ "chimerascan/pysam/tabix", "chimerascan/pysam" ], + libraries=[ "z", ], + language="c", + ) + # Interval clustering + bx_cluster = Extension("chimerascan.bx.cluster", + ["chimerascan/bx/cluster.c", "chimerascan/bx/intervalcluster.c"], + include_dirs=["chimerascan/bx"]) + # Interval intersection + bx_interval = Extension("chimerascan.bx.intersection", + ["chimerascan/bx/intersection.c"]) + return [samtools, tabix, bx_cluster, bx_interval] + +def main(): + setup(ext_modules=get_c_extension_modules(), + **setup_kwargs) + +if __name__ == '__main__': + main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/test/test_homology.py --- a/chimerascan/test/test_homology.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,30 +0,0 @@ -''' -Created on Jul 21, 2011 - -@author: mkiyer -''' -import unittest - -from chimerascan.lib.seq import calc_homology - -class TestLibraries(unittest.TestCase): - - def testHomology(self): - a = "AAAAGGGGTTTTCCCC" - b = "AAAAGGGGTTTTCCCC" - self.assertEquals(calc_homology(a, b, 0), 16) - b = "AAAAGGGGTTTTCCCG" - self.assertEquals(calc_homology(a, b, 0), 15) - b = "AAATTTGGTTTTCCCC" - self.assertEquals(calc_homology(a, b, 0), 3) - self.assertEquals(calc_homology(a, b, 1), 4) - self.assertEquals(calc_homology(a, b, 2), 5) - self.assertEquals(calc_homology(a, b, 3), 16) - - - - - -if __name__ == "__main__": - #import sys;sys.argv = ['', 'Test.testName'] - unittest.main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/AHCYL1-RAD51C_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/AHCYL1-RAD51C_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,52 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:7:1277:1620/1 +GTTTATTTATACTTTAAGGTAACAAGTCCACTTGT ++PATHBIO-SOLEXA2_30LEJAAXX:7:7:1277:1620/1 +aaaaaaXaaaabaZaaa\aaa[aa__Zaaaaaa\a +@PATHBIO-SOLEXA2_30LEJAAXX:7:89:1051:1091/1 +ACCGGGGTCGGGGAGGAGCTGAAGCAGGCCAAGGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:89:1051:1091/1 +bbbaaaaWababbbbaaaabbaaaaaaa_`_aaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:24:940:1753/1 +CTGGGTTATAATTCTTCCTCTGGGTCTCGTGACCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:24:940:1753/1 +bbbbabbbbbabbbbbbbbb`VQVaaaaX`U_]_] +@PATHBIO-SOLEXA2_30LEJAAXX:7:19:340:1535/1 +GTTTGCAATGAACATGCAGAAGTAACAACAGTATC ++PATHBIO-SOLEXA2_30LEJAAXX:7:19:340:1535/1 +bbbaaaaaaaaaaaaaaaaaaaaaaaaaa^^_``a +@PATHBIO-SOLEXA2_30LEJAAXX:7:8:639:991/1 +CTGGGTTATAATTCTTCCTCTGGGTCTCGTGACCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:8:639:991/1 +babaaZabaaaabbbaaa`S_a\aaaa`[`aa\aa +@PATHBIO-SOLEXA2_30LEJAAXX:7:56:1468:1938/1 +CTGGGTTATAATTCTTCCTCTGGGTCTCGTGACCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:56:1468:1938/1 +bbWa\a]S]aaaaaaW\[^aaXKHZUXZEKMZ_U] +@PATHBIO-SOLEXA2_30LEJAAXX:7:65:500:1793/1 +CGCTGGCCGGGCCGGCCGGGGAATGTCGATGCCTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:65:500:1793/1 +bbbabbbaaaaaaaaaaaaa^\^^a`_^_^[[[^U +@PATHBIO-SOLEXA2_30LEJAAXX:7:87:688:1556/1 +GCCGGGAGCAGCCAAAACCTGCTAAGTCTCAGAAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:87:688:1556/1 +aaaOaaaWaaaaaa^aaaaaaa\\Zaabaaab\aa +@PATHBIO-SOLEXA2_30LEJAAXX:7:46:1482:684/1 +TACACTTTGAGATTTGTTTCTGGGTTATAATTCTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:46:1482:684/1 +aaaabaaaaZ^aaabaaabaaXSSaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:89:237:1128/1 +CGGGGTCGGGGAGGAGCTGAAGCAGGCCAAGGAGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:89:237:1128/1 +baaWaaaaaaaaaaaaaaa^aabaaaaaaaaaaa\ +@PATHBIO-SOLEXA2_30LEJAAXX:7:32:573:692/1 +GGAATGTCGATGCCTGACGCGATGCCGCTGCCCGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:32:573:692/1 +bbaaaaaabbbbaaaaaaaaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:25:1383:1407/1 +GGCGGGGAATGTCGATGCCTGACGCGATGCCGCTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:25:1383:1407/1 +babaaaaaabaaaaaaaaaaa_`_aa\a[Q^aMEX +@PATHBIO-SOLEXA2_30LEJAAXX:7:90:1243:1836/1 +TTCTTCCTCTGGGTCTCGTGACCGTTTCCGGGTGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:90:1243:1836/1 +aaaababbbaaaaaaaa^aaaaaaaaaaaWQQ[SQ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/AHCYL1-RAD51C_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/AHCYL1-RAD51C_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,52 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:7:1277:1620/2 +GTACTCCTTCATGGCCACCGTCACCAAGGCGCCCA ++PATHBIO-SOLEXA2_30LEJAAXX:7:7:1277:1620/2 +abaaaabaL^bbaaaaaaa^\aaaa^Q^[aWa\aU +@PATHBIO-SOLEXA2_30LEJAAXX:7:89:1051:1091/2 +GGGTGCTCAAGGAACCTTATGTTTGCAATGAACAT ++PATHBIO-SOLEXA2_30LEJAAXX:7:89:1051:1091/2 +babbbbbbaaa^aaaaa`]`aaaaaaaaa\a_]_X +@PATHBIO-SOLEXA2_30LEJAAXX:7:24:940:1753/2 +GTCGATGCCTGACGCGATGCCGCTGCCCGGGGTCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:24:940:1753/2 +abbbbaaaaaaaaaaaaaaaaaaa_]]aaaa_XZX +@PATHBIO-SOLEXA2_30LEJAAXX:7:19:340:1535/2 +CGGGGTCGGGGAGGAGCTGAAGCAGGCCAAGGAGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:19:340:1535/2 +bbbabaaaaaaaaaaabaaaaaaaaaa^^^_^[ZU +@PATHBIO-SOLEXA2_30LEJAAXX:7:8:639:991/2 +CGATGCCTGACGCGATGCCGCTGCCCGGGGTCGGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:8:639:991/2 +ab_J`bbbaaabbaaaaaaaaaaaaaabaaaaaaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:56:1468:1938/2 +GAATGTCGATGCCTGACGCGATGCCGCTGCCCGGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:56:1468:1938/2 +aa`X`aaaaaaabaZa^J^aaaaaa^\^UKOKXMK +@PATHBIO-SOLEXA2_30LEJAAXX:7:65:500:1793/2 +GTTCGGGTGCTCAAGGAACCTTCTGTTTGCAATGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:65:500:1793/2 +bbbaaaaaaaaaaaaaaaaaaaaaaaaaa_``__Z +@PATHBIO-SOLEXA2_30LEJAAXX:7:87:688:1556/2 +CTCGTGACCGTTTCCGGGTGCTCAAGGAACCTTCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:87:688:1556/2 +aaaaaaaabaabaaa\baaaaaa[XXaaaaaabaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:46:1482:684/2 +GGAGGGGAGGAGCTGAAGCAGGCCAAGGAGATCGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:46:1482:684/2 +baaaaaaabaaaa^aaaa\aaaZZaaabaaaUMXR +@PATHBIO-SOLEXA2_30LEJAAXX:7:89:237:1128/2 +GTACACTTTGAGATTTGTTTCTGGGTTATAATTCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:89:237:1128/2 +aaabaaaaaaaaaaaaaabaaa^M\aaZaabaaa[ +@PATHBIO-SOLEXA2_30LEJAAXX:7:32:573:692/2 +CTCTGGGTCTCGTGACCGTTTCCGGGTGCTCAAGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:32:573:692/2 +aaaabaaaaaaaaaaaaaaaaaaaaaaaaa[^\aZ +@PATHBIO-SOLEXA2_30LEJAAXX:7:25:1383:1407/2 +CCTCTGGGTCTCGTGACCGTTTCCGGGTGCTCAAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:25:1383:1407/2 +ababaaaaba^aaaba^aaaaa]SUZZ]S]^\VUX +@PATHBIO-SOLEXA2_30LEJAAXX:7:90:1243:1836/2 +GGGGAATGTCGATGCCTGACGCGATGCCGCTGCCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:90:1243:1836/2 +abaaaaaaaaaaaaaaaaa[]_aaaaaaaaaPXXR |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/ARFGEF2-SULF2_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/ARFGEF2-SULF2_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,568 @@\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:23:64/1\n+GTCCCGCCCTGCTCCATGATGCGCCGGGTCTTGTT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:23:64/1\n+aaaaaaaab\\\\a\\aaaaa^aaaaaaWabaaaaaa^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:53:140:335/1\n+GCAGTTCTCATTGTTGGTGTAGGTGTTGTGGTTGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:53:140:335/1\n+aa^aaabaaaaaaaaabaa[V^aZbaaXa^^a_M_\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:34:1433:1923/1\n+GGAGCGTGAGGGGCAGCACATGGGTGTGGTCACGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:34:1433:1923/1\n+bbababbaaaabaaaaaaaaaaaaaaaUa_][aaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:72:1296:386/1\n+CAGCACTCCCAGCTGCGCAGGGCCTGCCAGGTGGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:72:1296:386/1\n+babaaaaaaaabbaaaaW`_Xaaaaaaaaaa\\aL^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:44:1403:1658/1\n+GGGCCATGCAGGAGAGCCAGACCAAGAGCATGTTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:44:1403:1658/1\n+abbbaaaababaabaaaaaabbaaa\\aa^aaaa__\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:97:215:106/1\n+GCCTAGCTCGCCATCTCGCTCACGCCGCCCGCCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:97:215:106/1\n+aaaab^bbaaabbbbaaaaaaaaaa`Z_a_\\^aaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:49:1199:1607/1\n+CGGGCCCTGGTGAAGATCCTAGCCGACAAGGAGGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:49:1199:1607/1\n+aaaaabbaa[K[abaaaaaa^S^aaaaaaaaaaa`\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:95:1383:1396/1\n+GGGCCATGCAGGAGAGCCAGACCAAGACCATGTTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:95:1383:1396/1\n+bbabaaaaaaaaaa^aaaaaaaaaaaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:73:79:1456/1\n+AGGAGGGCGAGGAGCAGTTCTCATTGTTGGTGTAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:73:79:1456/1\n+aabaabaaaaaaaaaaaaaabaaaaaaaaaa^`[L\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:23:1773:1946/1\n+AGCCAGACCAAGAGCATGTTCGTGTCCCGGGCCCT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:23:1773:1946/1\n+aabaaaaababbbabbaaaaaaabaaabaaaabaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:49:241:582/1\n+CATTGTTGGTGTAGGTGTTGTGGTTGTGGACGTAT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:49:241:582/1\n+a\\aaaaaaaaaUQ^_S]aaUaM[MREKQMKPRREK\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:18:965:1429/1\n+CAAGAGCATGTTCGTGTCCCGGGCCCTGGAGAAGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:18:965:1429/1\n+babaabaababaaabaabaaa^aaaaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:91:404:1991/1\n+GCAGTTCTCATTGTTGGTGTAGGTGTTGTGGTTGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:91:404:1991/1\n+aZEUaaa^a^aaaa^^aZVVV^[aWaaaaZI^aGE\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:91:289:1577/1\n+GCCAGACCAAGAGCATGTTCGTGTCCCGGGCCCTG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:91:289:1577/1\n+bbaaaaaaaaaaaaaaaaa`__aaaa`_]]_aa^[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:34:1304:1646/1\n+GTGCCTGCCAGGAGGGCGAGGAGCAGTTCTCATTG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:34:1304:1646/1\n+aaaaaaaaaaaaaaaaaaaa`_`aaaaaaa^^[aa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:7:1465:1275/1\n+GCCCCAGCACTCCCAGCTGCGCAGGGCCTGCCAGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:7:1465:1275/1\n+abaaaZ\\aaaaabaaaa\\^aaaaaaa^aa\\aaaa_\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:86:1572:2023/1\n+GTACTTGCCGGTGAGGATGGAGGAGCGTGAGGGGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:86:1572:2023/1\n+aaaaaabaaaaaaaaaaa^aa[a^aaabaaaaa^O\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:16:679:1555/1\n+TTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCCGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:16:679:1555/1\n+aaaLaZabbaaaaZaaaaaLaWLaaa^[J^Zaa^^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:91:915:1083/1\n+TGCCGGTGAGGATGGAGGAGCGTGAGGGGCAGCAC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:91:915:1083/1\n+babbbbaabaa^aaaUaaa^XVS[Q\\aaZRHUKXU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:39:1584:573/1\n+CGCGGCCGGTGCCGGCCGGGACGCCGGGCCCGCAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:39:1584:573/1\n+baabbaaaaaaabaaaaaa``]aZZ^a`X]a\\VQ^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:70:378:1964/1\n+GCACTCCCAGCTGCGCAGGGCCTGCCAGGTGGCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:70:378:1964/1\n+baaaaab`W`aaaaa^Q^aaaaaaaXMXZOXZ_a_\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:59:129:1017/1\n+GTGTAGGTGTTGTGGTTGTGGACGTACTTGCCGGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:59:129:1017/1\n+baaababaaabbaaaa`X`U___ZXOUXXEMREMS\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:12:365:706/1\n+CCCAGCTGCGCAGGGCCTGCCAGGTGGCGCTCGGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:12:365:706/1\n+aabbaaaaaababaaaaWabbaaaSS`aaaaaWaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:83:888:907/1\n+GCCGGGTCTTGTTCATCACCTGCATGGAACCGAGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:83:888:907/1\n+aabbabbaaaa^aab`]_aaa\\aa`[Z\\aaaaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:14:649:1237/1\n+GGTGTAGGTGTTGTGGTTGTGGACGTACTTTCCGG\n++PATHBIO-SOLEXA2_'..b'AAXX:7:44:170:547/1\n+CGCGGGGCCGTCAGCCCCCGCCGGGCCGGGGCCAT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:44:170:547/1\n+aabaaaabbaaa^aaUK[baaaWaaa^a^aaSMEX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:31:327:927/1\n+GTTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:31:327:927/1\n+babaaaaabbaaaaaaaa][_aaaaaa__`aaaa^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:11:670:1627/1\n+GTGGTTGTGGACGTACTTGCCGGTGAGGATGGGGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:11:670:1627/1\n+bbbbbbaaaaaa]\\\\^`^ZZ^XPMUEXZOX]UEXZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:81:159:597/1\n+TGCGTCCCGCCCTGCTCCATGATGCGCCGGGTCTT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:81:159:597/1\n+abaaaabaaaa[EKbaa`]`[_aXaa\\XEE_aaaV\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:70:1099:1499/1\n+AGCCAGACCAAGAGCATGTTCGTGTCCCGGGCCCT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:70:1099:1499/1\n+aabaaaaaaaaa\\aaaaaaaaaaaaaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:42:354:538/1\n+TGTGGTTGTGGACGTACTTGCCGGTGAGGATGGAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:42:354:538/1\n+aaaaabaa[Q[a\\Z^\\aaZUaaa\\aUUEUS]aV[[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:51:460:769/1\n+CTCATTGTTGGTGTAGGTGTTGTGGTTGTTGACGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:51:460:769/1\n+bbaaabaWaa\\aaaaaZaaa`SJMZaUEUJKEOGO\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:60:609:90/1\n+GTGAGGGGCAGCACATGGGTGTGGTCACGAAGGCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:60:609:90/1\n+aOaaaabaaaaaaa^ababaaaaaaaaaaaaZaaX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:9:542:343/1\n+CGTACTTGCCGGTGAGGATGGAGGAGCGTGAGGGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:9:542:343/1\n+aaaaaabbaaaaaaaa^aaaaaabbaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:26:1767:531/1\n+GCCGGGCCGGGGCCATGCAGGAGAGCCAGACCAAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:26:1767:531/1\n+bbbaaabaaaaababaa_U\\^[^aaa\\__aaa[XX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:20:1032:2010/1\n+GCGAGGAGCAGTTCTCATTGTTGGTGTAGGTGTTG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:20:1032:2010/1\n+bbaaaaaaaaaaaabaaaaaaaXX]aXRR[aa^_S\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:59:426:623/1\n+GCAGGAGAGCCAGACCAAGAGCATGTTCGTGTCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:59:426:623/1\n+baaaaabbbbaaabaaaaaaaaaaaaaaa`__aaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:43:525:1541/1\n+TGTTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:43:525:1541/1\n+aaabbaaaaabaaaaaaaaaaaaaaaaa`]`aaa\\\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:100:373:464/1\n+CTTGCCGGTGAGGATGGAGGAGCGTGAGGGGCAGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:100:373:464/1\n+aba^aaaaaaaaaaaaa[aaaaa___a[X^_[Z[a\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:56:1266:1813/1\n+GGGTGAGGAGCAGTTCTCATTGTTGGTGTAGGTGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:56:1266:1813/1\n+aaaaaaaaaabaUaaa_ZZa^VVaZQREZUKEXaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:84:752:1980/1\n+GGAGAAGATCCTAGCCGACAAGGAGGTGAAGCGGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:84:752:1980/1\n+ba\\aaaaabaZaaab_]Ua^Xaa\\a[JX[Xaaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:13:592:1334/1\n+CAGCACTTTGGGAGGCCGAGGCAGGCGGATCACAA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:13:592:1334/1\n+aaaaaaaZabaaaaaaJaabaaaaaaa^S[\\ZU\\_\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:95:638:809/1\n+GGCGTACTTGCCGGTGAGGATGGAGGAGCGTGAGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:95:638:809/1\n+a\\aaLaaaaaaaaaZ^aaabaaa]_ZW\\ZVV[W[[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:66:513:1615/1\n+CCGGGGCCCTGGAGAAGATCCTAGCCGACAAGGAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:66:513:1615/1\n+aaaaaaaa^aaaaaaaa^aaa\\aaZa^QZaaa_]`\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:302:1352/1\n+GAAGATCCTAGCCGACAAGGAGGTGAAGCGGCCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:302:1352/1\n+aaaaaaabbbabaaaaaZaaXaaaaa^aaaaaaa[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:45:943:526/1\n+GTGAGGGCCAGCACATGGGTGTGGTTACGAAGGCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:45:943:526/1\n+aLWZWa[EKaba^WaaaaWUSS_\\KMEZaVVJJZ[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:63:849:221/1\n+GTGGTTGTGGACGTACTTGCCGGTGAGGATGGAGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:63:849:221/1\n+bbbaaabaaaaaaaaaaaaa^Z^aZKRUR[UXERU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:21:471:627/1\n+CTCGTGCTGTGCCTGCCAGGAGGGCGAGGAGCAGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:21:471:627/1\n+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_Z[aa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:8:216:1709/1\n+TGTGGACGTACTTGCCGGTGAGGATGGAGGAGCGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:8:216:1709/1\n+baaaaaaaaaaa```aa_][EXUUXXUU[SU[_[S\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:33:848:276/1\n+GCCGGGCCGGGGCCATGCAGGAGAGCCAGACCAAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:33:848:276/1\n+aaaaaaWWaUa^aa\\a\\aaa]X`aZRR]XV[ZJRE\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/ARFGEF2-SULF2_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/ARFGEF2-SULF2_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,568 @@\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:23:64/2\n+GAAGATCCTAGCCGACAAGGAGGGGAAGCGGCCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:23:64/2\n+aaaaabbaaaaaa^aaaaaaZaaWL\\a^aaaaa\\X\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:53:140:335/2\n+GAAGATCCTAGCCGACAAGGAGGTGAAGCGGCCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:53:140:335/2\n+aaaaaaababaaaaa[H^aaUaaaUMXaaaaOKRX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:34:1433:1923/2\n+GCCAGACCAAGAGCATGTTCGTGTCCCGGGCCCTG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:34:1433:1923/2\n+aaaaaaaaaaaabbaabbbbaaUaaa^LaaZaa^X\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:72:1296:386/2\n+GTGCCTGCCAGGAGGGCGAGGAGCAGTTCTCATTG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:72:1296:386/2\n+aWaaaabaaaaaaaaXV^aaaaaaaaaaaa__`aU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:44:1403:1658/2\n+CAGCACATGGGTGTGGTCACGAAGGCGTTGATGAA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:44:1403:1658/2\n+abbaaaaa\\aaaaaaaba^[^^baaaaaaaUZZVU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:97:215:106/2\n+GCTGGGTCTTGTTCATCACCTGCATGGAACCGAGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:97:215:106/2\n+bbaaa^aabaa\\a_M`XRXaaaXP[aaUHPaa_XP\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:49:1199:1607/2\n+TGTGGACGTACTTGCCGGTGAGGATGGAGGAGCGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:49:1199:1607/2\n+baaaaaaaaaaaaaaaaaaaZa\\WaaaaaZV[\\^R\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:95:1383:1396/2\n+GGGGCAGCACATGGGTGTGGTCACGAAGGCGTTGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:95:1383:1396/2\n+aa\\abaaaaaaaaaaaaaaaa^aaaaaaaaaaaaK\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:73:79:1456/2\n+GGAGGTGAAGCGGCCCCAGCACTCCCCGCTGCGCA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:73:79:1456/2\n+ababaab^aaaaaabbaWaa\\aaaa`S_aa^aa`H\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:23:1773:1946/2\n+GGAGGAGCGTGAGGGGCAGCACATGGGTGTGGTCA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:23:1773:1946/2\n+aaaaaaaaaaaaaWaaaaaa^WWaaaaLaaWaL\\Z\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:49:241:582/2\n+GAGCCGACAAGGAGGTGAAGCGGCCCCAGCACTCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:49:241:582/2\n+HXabaaaaa]]_]aXE[aaUaaUKUa^U[XMX[UX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:18:965:1429/2\n+CTTGCCGGTGAGGATGGAGGAGCGTGAGGGGCAGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:18:965:1429/2\n+aaabaaaaaa\\aaaaaUXUaaaaaaaW_Z]aaaXE\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:91:404:1991/2\n+CGACAAGGAGGTGAAGCGGCCCCAGCACTCCCCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:91:404:1991/2\n+baZU_Uaaab]W[aa^^I\\aaaaaL[H[EUa_PE[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:91:289:1577/2\n+GTGAGGGGCAGCACATGGGTGTGGTCACGAAGGCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:91:289:1577/2\n+bbaaaaabaaaabaaaaaaaaaaaa^[[^UUX[_U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:34:1304:1646/2\n+GCTGCGCAGGGCCTCCCAGGTGGCGCTCGGTGGGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:34:1304:1646/2\n+bbbbbbaaaaaaaaaaaaa^[Zaaaaaaa^V\\a_P\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:7:1465:1275/2\n+GCGAGGAGCAGTTCTCATTGTTGGTGTAGGTGTTG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:7:1465:1275/2\n+aaaaabaaaaaaaW\\aaaa^aa\\aaa^[[]aO^[G\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:86:1572:2023/2\n+GTTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:86:1572:2023/2\n+aaaaababaaaaa^a_S]\\aaaaba\\[^[aaURXX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:16:679:1555/2\n+AAGGCGTTGATGAAGTGCGTCCCGACCTGCTCCAT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:16:679:1555/2\n+aaabbaaaaaaaaaaaaaaaaXEKEXaVQUEOUUE\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:91:915:1083/2\n+GTTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:91:915:1083/2\n+bbabaaaabbaaaaaaaa\\aaaaaaa^\\^^ZOZUR\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:39:1584:573/2\n+CGTTGATGAAGTGCGTCCCGCCCTGCTCCATGATG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:39:1584:573/2\n+abaababaaabaaaabbaaaaaaaVQ\\a^Q[X[XX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:70:378:1964/2\n+GGCGAGGAGCAGTTCTCATTGTTGGTGTAGGTGTT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:70:378:1964/2\n+bba`[`_U_aaaaaab_Z^aaaaaa_]XUKX]Z_U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:59:129:1017/2\n+AGCCGACAAGGAGGTGAAGCGGCCCCAGCACTCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:59:129:1017/2\n+abaabbaaaaaaaaaaaaaaaaaaaZH[^____aX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:12:365:706/2\n+CTCTCGTGCTGTGCCTGCCAGGAGGGCGAGGAGCA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:12:365:706/2\n+bbaabaa^aa\\aaaabaaa^aXEUaa\\_S`[[^[E\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:83:888:907/2\n+GGAGAAGATCCTAGCCGACAAGGAGGTGAAGCGGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:83:888:907/2\n+abaaaaaaa[SS[__aROX`aUaZaXEPQQMRXXG\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:14:649:1237/2\n+AGAAGATCCTAGCCGACAAGGAGGGGAAGCCGCCC\n++PATHBIO-SOLEXA2_'..b'AAXX:7:44:170:547/2\n+GGTCACGAAGGCGTTGATGAAGTGCGTCCCGCCCT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:44:170:547/2\n+aaaaaaaaaaaaaa^a\\aaaaWa[]]Zaaaaaa[M\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:31:327:927/2\n+TGAGGATGGAGGAGCGTGAGGGGCAGCACATGGGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:31:327:927/2\n+bbaaabbaaabbbaaabaaaaaa]]`aaaaaaaaK\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:11:670:1627/2\n+GCGGCCCCAGCACTCCCAGCTGCGCAGGGCCTGCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:11:670:1627/2\n+baaaaabaaaaaaaabaaaaaaaa^S^aaa\\V^aX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:81:159:597/2\n+CGCCGCCCGCCCCCGGGGCCGTCAGCCCCCCCCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:81:159:597/2\n+babaWaa_J_aaJa^aaaaZEEUXX[V\\aa^U[HK\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:70:1099:1499/2\n+GTGAGGGGCAGCACATGGGTGTGGTCACGAAGGCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:70:1099:1499/2\n+aaaaaaaaaabbbabaaaaaaaaaa[[Q[ZaaaUR\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:42:354:538/2\n+GTCCCGGGCCCTGGAGAAGATCCTAGCCGACAAGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:42:354:538/2\n+aaa^abba^baaaaaaZaaaaaaaaaaaaaaaXa[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:51:460:769/2\n+GGTGAAGCGGCCCCAGCACTCCCAGCTGCGCAGGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:51:460:769/2\n+aaWbaaaaa^aaaaO\\aLaaab\\LZXHUaaUEE[[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:60:609:90/2\n+CCAAGAGCATGTTCGTGTCCCGGGCCCTGGAGAAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:60:609:90/2\n+baaaabaWaaaa^aaaZaaba^aa^aaaaaaaaaU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:9:542:343/2\n+GCATGTTCGTGTCCCGGGCCCTGGAGAAGATCCTA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:9:542:343/2\n+aaaaabbb\\aZabaaaaaaaabZaaaaaaaaaa[X\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:26:1767:531/2\n+GGGGCAGCACATGGGTGTGGTCACGAAGGCGTTGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:26:1767:531/2\n+aaaaaabbaaaaabaaaaaaaaaa[V^aa\\aXa]K\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:20:1032:2010/2\n+GGTGAAGCGGCCCCAGCACTCCCAGCTGCGCAGGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:20:1032:2010/2\n+aaaaaaaaabaaaaaa[HSHaaXE[XSaZZUEOXR\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:59:426:623/2\n+GGAGCGTGAGGGGCAGCACATGGGTGTGGTCACGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:59:426:623/2\n+baaaabababbaaaaaaaaabaaaaa^aaaaaa^M\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:43:525:1541/2\n+CCCTGCTCAATGATGCGCCGGGTCTTGTTCATCAC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:43:525:1541/2\n+bbababbaaaaaaaaaaaaaaaaaaaXaaaaa`]X\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:100:373:464/2\n+CCAAGAGCATGTTCGTGTCCCGGGCCCTGGAGAAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:100:373:464/2\n+babaaaaaaabbaaaaaabaaa\\aaaa`W_aaaaZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:56:1266:1813/2\n+GTGAAGCGGCCCCAGCACTCCCAGCTGCGCAGGGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:56:1266:1813/2\n+aaa\\\\aaaaa]`[a^K^aaWaaRUOa\\aaaXMKZU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:84:752:1980/2\n+GCCGGGTCTTGTTCATCACCTGCATGGAACCGAGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:84:752:1980/2\n+aa\\\\UaaaaLaaa^V^aabaaa^[^XV^]`_[[[P\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:13:592:1334/2\n+GTCTTGCTCTGTCATCAGGATGGAGTGCAGTGGCT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:13:592:1334/2\n+abaaaaaaaaaaaaaaaaUEXaZa\\UK[`X`aaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:95:638:809/2\n+CCCGGGCCCTGGAGAAGAGCCTAGCCGACAAGGAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:95:638:809/2\n+abbbaaabaaaaabaabaLaaaaaaaaaaaa^aaX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:66:513:1615/2\n+GCGGACGTACTTGCCGGTGAGGATGGAGGAGCGTG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:66:513:1615/2\n+aL^aaaaaaaaaaLaaaZ\\W^aaa^baa]W[XaJR\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:302:1352/2\n+CAGGTGTTGTGGTTGTGGACGTACTTGCCGGTGGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:302:1352/2\n+a\\aa\\aabaa^aaaa_WX][Q_ZOR^JH[VVHHPM\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:45:943:526/2\n+GGCCGGGGCCATGCAGGAGAGCCAGACCAAGAGCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:45:943:526/2\n+aaLaWXaaaaa^aa^aaa^aaXEXaaaaa^a^aWE\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:63:849:221/2\n+TGAAGATCCTAGCCGACAAGGAGGTGAAGCGGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:63:849:221/2\n+bbaaaababaaaaaaaaaaaa``^^^aaaaaa`_X\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:21:471:627/2\n+CCCAGCTGCGCAGGGCCTGCCAGGTGGCGCTCGGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:21:471:627/2\n+bbbaaaaaabbaaaaaaaaa^V^aaaaaaaaaaaU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:8:216:1709/2\n+CGTGTCCCGGGCCCTGGAGAAGATCCTAGCCGACA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:8:216:1709/2\n+baaaabbbaaaabaaa^S^X]aa\\[^^V[^_[E[E\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:33:848:276/2\n+CGGTGTGGTCACGAAGGCGTTGATGTAGTGCGTCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:33:848:276/2\n+aaW^baaabaa^ZaaZa^QU^EEKGMUKEOEMOM[\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/BC035340-MCF2L_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/BC035340-MCF2L_1.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:73:483:1179/1 +CCTGGATGTCGGCAGCACAGAGCGGGACGATGTCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:73:483:1179/1 +bbbbaaaaaaaaaaaaaaaaaaaaaaaaa\aa\aa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/BC035340-MCF2L_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/BC035340-MCF2L_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:73:483:1179/2 +GCCTCATGTTTGACTGTTGGAGATTTATCCTGTGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:73:483:1179/2 +aaababaabaaabaaaabaaaaaabbaaaaa^aU[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/BCAS4-BCAS3_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/BCAS4-BCAS3_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,964 @@\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:57:1302:766/1\n+TGCTCGTGGACGCTGATCAGCCGGAGCCCATGCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:57:1302:766/1\n+aabbbaaa\\aaaabaaaaaaaaaaaaaaaaaa_UX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:32:41:790/1\n+CCACGGGCTCCCAGGCAGCCTCCGCCAGCCGGACC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:32:41:790/1\n+aaaWaaaabaab^aaaaaaaabaaaaaaa^H^aZa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:37:1377:1073/1\n+GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:37:1377:1073/1\n+ababbaaabaaaaabaZaaaaaaZXXXX^^\\a^[^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:57:946:2022/1\n+CCCGCACACCTCCAGCAGGGTCACGCTCCTGTCAA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:57:946:2022/1\n+bbbabbabbaababaabaaaabaaaaaaaaaaUXZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:64:290:441/1\n+CGGGCTCCCAGGCAGCCTCCGCCAGCCGGACCCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:64:290:441/1\n+baaabbbbbabbaaabbbbaaaaaaaaaaZaaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:76:1184:1673/1\n+ATCAGCCGGAGCCCATGCGCAGCGGGGCGCGCGAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:76:1184:1673/1\n+aaaaaaabaaUaaabababaaaaaaLa^baaZHSS\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:18:254:1175/1\n+CCCAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:18:254:1175/1\n+aaaa[H^aa\\abb`]`aaaaa\\\\_Q_aaUK[a^S^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:52:1132:327/1\n+CCCCGGAGGCCCTCCTCCGTGTGCTCCATGGAGGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:52:1132:327/1\n+babbbaaabaaaabbaaaaaXRUaaa_^__[^^aZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:13:221:1785/1\n+GCGTCGGCAAGTCGCTCCCGGAGGCCCTCCTCCGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:13:221:1785/1\n+abbaaaaaaaaWaaaaaaaZa^aaa_ZZ_a^aaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:33:309:789/1\n+AGGCAGCCTCCGCCAGCCGGACCCCGTCGCCCTCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:33:309:789/1\n+aaaaaaaaa^baaaaaaZW^aaaaaZaaaaaaLa]\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:1658:1977/1\n+CGCTGATCAGCCGGAGCCCATGCGCAGCGGGGCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:1658:1977/1\n+aabaaaaaaaa\\^aaaaaZUZaaa]]Xaa[H^aLa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:59:1326:1906/1\n+CGGGCGCAACCACGGGCTCCCAGGCAGCCTCCGCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:59:1326:1906/1\n+abbaaaaaaaaaaaaa^baa\\aaaaabaaaLaUaL\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:38:270:183/1\n+CCCCGTCGCCCTCCTGATGCTGCTCGTGGACGCTG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:38:270:183/1\n+abbababaaabbbbbaa^aaaaaaaaa^URXa\\XI\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:31:1692:1137/1\n+CGCTGATCAGCCGGAGCCCATGCGCAGCGGGGCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:31:1692:1137/1\n+baaaaabbaaaab^aaaaaaaaaaaaa[V^aa_[^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:95:1237:1905/1\n+CAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCCTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:95:1237:1905/1\n+aaaaabbaaaaaaaba^aaaaUaaaaaabababab\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:14:618:35/1\n+CGGGCTCCCAGGCAGCCTCCGCCAGCCGGACCCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:14:618:35/1\n+aaaaaaaaaa_]_aaaaaaaaaaaaaaa\\[]_a_X\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:30:837:629/1\n+CTCCGTGTGCTCCATGGAGGACATGTGCCGCAGCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:30:837:629/1\n+abbabaZaaaabbbaaaaaaaabaaaaaaaaaa`_\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:84:1185:804/1\n+CGGGCTCCCAGGCAGCCTCCGCCAGCCGGACCCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:84:1185:804/1\n+aaabaaaaaaaa_]Saaaaa\\aaaaaaaa[aaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:78:236:1529/1\n+GCCGGAGCCCATGCGCAGCGGGGCGCGCGAGGTCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:78:236:1529/1\n+baaaa^aaaaaaabaaaaaaaaaaaaWaaO\\MJ_W\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:18:276:1452/1\n+CCCGGAGGCCCTCCTCCGTGTGCTCCATGGAGGAC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:18:276:1452/1\n+babaa^abaabaaaaaaaaaaaaab_U]a__^aaZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:32:510:1332/1\n+CGCTCCCGGAGGCCCTCCTCCGTGTGCTCCATGGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:32:510:1332/1\n+aabWabaaa^aaa^aaaa\\aaa^QQaaaaaLa\\[I\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:2:963:1089/1\n+GCGTCGGCAAGTCGCTCCCGGAGGCCCTCCTCCGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:2:963:1089/1\n+bbbbbbbaaaaaaaaaaa`Z[Q^aaa[X[^Z[[OO\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:44:864:680/1\n+TCCTGATGCTGCTCGTGGACGCTGATCAGCCGGAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:44:864:680/1\n+aaabaabbbaaaa^Q[aaaaaaaaaa^XX^XEKU^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:27:1068:808/1\n+CGCACACCTCCAGAAGGGTCACGCTCCTGTCAAAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:27:1068:808/1\n+aaaaaaaaaabaaUZa^HJXJ[a\\^aaZaLaLaaV\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:92:694:1379/1\n+GCCAGCCGGACCCCGTCGCCCTCCTGATGCTGCTC\n++PATHBIO-SOLEXA2_30LE'..b'1\n+GACCCCGTCGCCCTCCTGATGCTGCTCGTGGACGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:44:659:963/1\n+babaaaaaaaabbaabaaaaaaaaaaZZ\\aX^[Z[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:5:1524:224/1\n+ACCAGCCGGACCCCGTCGCCCTCCTGATGCTGCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:5:1524:224/1\n+aaaaaaWaWaaaU`ZZ`S\\Z^Z^`]QXEKMOEPEG\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:13:761:1512/1\n+CTCGTGGACGCTGATCAGCCGGAGCCCATGCGCAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:13:761:1512/1\n+bbababbbabbaaaa__`aa_^[_aaaaaaaa_^U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:69:1545:865/1\n+CGCTGATCAGCCGGAGCCCATGCGCAGCGGGGCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:69:1545:865/1\n+a^aaaa\\aaab_X_aaaa_S`aaWa^aaaaabaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:930:1174/1\n+CGCGCTCTTCCTGACCCCCGATCCTGGGGCCGAGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:930:1174/1\n+aaaaaWa^aaaaaaaaaaaaaWaa^VSSaaaaaba\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:83:718:889/1\n+CGCTCCATGGAGGACATGTGCCGCAGCCCGAAGCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:83:718:889/1\n+aabaaaaabaaaaaaaaaaaaaaaaaaaaaaa]_`\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:1417:658/1\n+CACCGGCTAGGTGACTCGGCCATGGCGTCGGCAAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:1417:658/1\n+a^aZaaabaWWLaabaX[[ZHPVQ[J[HXWaZXZU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:66:867:1539/1\n+CCTGATGCTGCTCGTGGACGCTGATCAGCCGGAGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:66:867:1539/1\n+bbaaaaaaaaaa_Z_aaa`_``]_^^^X^XOX^[^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:79:1578:1197/1\n+CCATGCGCAGCGGGGCGCGCGAGCTCGCGCTCTTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:79:1578:1197/1\n+babaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:33:1021:322/1\n+TTCCTCCGTGTGCTCCATGGAGGACATGTGCCGCA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:33:1021:322/1\n+aaaaaaaaWaaaaaabaaaaaWaaa^aaaaaaaZa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:61:1267:1990/1\n+CGTCGCCCTCCTGATGCTGCTCGTGGACGCTGATC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:61:1267:1990/1\n+aaabbaaaaaa`]`aaa`Z`a^V^\\^_UOU^VX[K\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:91:1242:1241/1\n+CGGCAAGTCGCTCCCGGAGGCCCTCCTCCGTGTGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:91:1242:1241/1\n+\\aaaaaaaa^U^aW`Z`aaaZaXEZ[[`]aaa_]_\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:62:1224:1029/1\n+CCCGTCGCCCTCCTGATGCTGCTCGTGGACGCTGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:62:1224:1029/1\n+aaaabaaabaaaaaaaaaaaZaaaa[HXUaaZ`KO\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:38:1108:325/1\n+GAGGACATGTGCCGCAGCCCGAAGCCCTCAGGCCA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:38:1108:325/1\n+abbbaabbbbabbaaaaaaaaaaaaa_X^SX^aa^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:49:970:1046/1\n+CCGGCTAGGTGACTCGGCCATGGCGTCGGCAAGTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:49:970:1046/1\n+bbabaaaaaaaaaaaaX]Za[X^ZZZZ^ZHOU_PK\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:82:1503/1\n+TCCCGGCTAGGTGACTCGGCCATGGCGTCGGCAAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:82:1503/1\n+bbbababa^aaaabaa`]_aaWaaaaaaa]]UGXZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:85:302:1496/1\n+GCCGCAGCCCGAAGCCCTCAGGCCTGCTCCCGCAC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:85:302:1496/1\n+abbaabababaaaababaaaaaabaaaaaaaaZU[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:26:441:786/1\n+CCGGCTAGGTGACTCGGCCATGGCGTCGGCAAGTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:26:441:786/1\n+aaaaabaaaWaa^aaaaaQI\\b^WaLaaaWaaaaW\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:8:434:166/1\n+ATCAGCCGGAGCCCATGCGCAGCGGGGCGCGCGAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:8:434:166/1\n+abbaaaaaaaaaaaa___a^\\__`___a_`__[UX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:37:940:1981/1\n+GCGCAGCGGGGCGCGCGAGCTCGCGCTCTTCCTGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:37:940:1981/1\n+aabaaaaaabaa`_[^__]XVX^Z\\REEEMOGERK\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1018:1410/1\n+GTCCCGGCTAGGTGACTCGGCCATGGCGTCGGCAA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:17:1018:1410/1\n+aababbabbbaaWabaaaaaa^QX]]Saa`]]aa^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:84:1297:1236/1\n+CGTCGCTCCCGGAGGCCCTCCTCCGTGTGCTCCAT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:84:1297:1236/1\n+aaaabaaaaaaabaaaaaaaaaa^VXa[J^aaZZ^\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:45:1219:280/1\n+TGATGCTGCTCGTGGACGCTGATCAGCCGGAGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:45:1219:280/1\n+baaaaaaaabaaaaXX[aaaaZaaaXEX[`__`a[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:36:1381:1758/1\n+GGAGGACATGTGCCGCAGCCCGAAGCCCTCAGGCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:36:1381:1758/1\n+aaaaaaaWaaaaWaaa\\aaa^aaaaaaaaaaaaWa\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:29:27:808/1\n+GCTGGCCAGTCAGAAGCAGGTTGCTGGTACCTACG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:29:27:808/1\n+bbbaabaaaaaaaaaaaaaaaaaaaa[aa_``aaa\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/BCAS4-BCAS3_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/BCAS4-BCAS3_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,964 @@\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:57:1302:766/2\n+CTCGGCCATGGCGTCGGCAAGTCGCTCCCGGAGGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:57:1302:766/2\n+bababbaaaaaaaaaaaaa__]_Z`aaa__[^[VO\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:32:41:790/2\n+GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:32:41:790/2\n+aaaaaaaaabbaaaaaaaaaaaaaaaaaa\\aaaaX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:37:1377:1073/2\n+CGCTGATCAGCCGGAGCCCATGCGCAGCGGGGCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:37:1377:1073/2\n+aaaaaaaaaaaaaaaaZaaa`SZaaaV[[Z_[H[H\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:57:946:2022/2\n+CGCAACCACGGGCTCCCAGGCAGCCTCCGCCAGCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:57:946:2022/2\n+bbabbbbbabaaaaaaaaaaaaa^^^aa__]aa\\R\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:64:290:441/2\n+GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:64:290:441/2\n+abaaaaaaabbbaaaaaaaaaaaaaaaaaaUaaa[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:76:1184:1673/2\n+CCACGACGTCCCGGCTAGGTGACTCGGCCACGGGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:76:1184:1673/2\n+aaabaaaaWa^aaaZLaaLZaaLUUKRHVZIVaLG\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:18:254:1175/2\n+CCGTGTGCTCCATGGAGGACATGTGCCGCAGCCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:18:254:1175/2\n+aaa\\aaaabaaXaaaa^Q[Q[aWaaaaaaaaaZRU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:52:1132:327/2\n+CGCCAGCCGGACCCCGTCGCCCTCCTGATGCTGCT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:52:1132:327/2\n+abbbbbbbaaaabbaaaaaaaaaaaa[\\^aa`]`Z\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:13:221:1785/2\n+GTCGCCCTCCTGATGCTGCTCGTGGACGCTGATCA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:13:221:1785/2\n+aabbaaa\\abb^aababaaaaaWa^QKSaOEKUaM\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:33:309:789/2\n+GTGCTCCATGGAGGACATGTGCCGCAGCCCGAAGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:33:309:789/2\n+aZ^a\\^aaa\\aaaWaaaLa\\aaaZUa\\aabaL^aZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:1658:1977/2\n+GGCTAGGTGACTCGGCCACGGCGTCGGCAAGTCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:1658:1977/2\n+aaaabaaaaaaabaZaaXEXa^aWW^aaaW^La_E\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:59:1326:1906/2\n+CGCACACTGCTCTACCCTTTACAAGGTGCTTTCAC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:59:1326:1906/2\n+Za^aaabaaaaaZa^Zaaaa^aaa[UWWaaaaLaE\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:38:270:183/2\n+CGCTCCCGGAGGCCCTCCTCCGTGTGCTCCCTGGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:38:270:183/2\n+babbaabababaaaaaabaaa\\\\\\a\\`[_RGRVXR\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:31:1692:1137/2\n+CCCCGCACACCTCCAGCAGGGTCACGCTCCTGTCA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:31:1692:1137/2\n+babbbaaabbaaaa\\aa`ZRXaaaaaaaaaaaaaZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:95:1237:1905/2\n+CTCCCGGAGGCCCTCCTCCGTGTGCTCCATGGAGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:95:1237:1905/2\n+baba^abZWaaaaa\\aOaaa`]Maaaa[L^aUEPU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:14:618:35/2\n+GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:14:618:35/2\n+aaaa`]`aaaaaaaaaaaaaaaaZaaaaaa^\\[_X\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:30:837:629/2\n+CAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCCTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:30:837:629/2\n+aabaabbbbaaaaaaaaaaaaaaaaaaaaaaa__[\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:84:1185:804/2\n+GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:84:1185:804/2\n+aaabaaaaaaaaaaaaaa^aaaaZZ\\aaa^aaaaR\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:78:236:1529/2\n+CGGCTAGGTGACTCGGCCATGGCGTCGGGAAGGCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:78:236:1529/2\n+bbaaaaa\\\\aaaaaababWaLaaaL^a]UIQVIIU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:18:276:1452/2\n+GGCAGCCTCCGCCAGCCGGACCCCGTCGCCCTCCT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:18:276:1452/2\n+aaaaabbaaaabaaaaaaaaaaabREX^^V^S_XE\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:32:510:1332/2\n+CAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:32:510:1332/2\n+^bbaaaaaaaaa^aaWaaa\\aaabaaLaaZaa\\LU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:2:963:1089/2\n+CTCGTGGACGCTGATCAGCCGGAGCCCATGCGCAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:2:963:1089/2\n+abaabaaaaaaaaaaa`]`aaaaaaa^[[aaa__U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:44:864:680/2\n+GGCGTCGGCAAGTCGCTCCCGGAGGCCCCCCTCCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:44:864:680/2\n+aWaaabaaa^aSHXQSaUaUEKGKXSWXQVZQQ^G\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:27:1068:808/2\n+GCCGGAGCCCATGCGCAGCGGGGCGCGCGTGCTCG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:27:1068:808/2\n+H[Waaa^ZaaaWaaZa\\ZZI\\aXHSOa\\EERHQIH\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:92:694:1379/2\n+GTGCTCCATGGAGGACATGTGCCGCAGCCCGAAGC\n++PATHBIO-SOLEXA2_30LE'..b'2\n+TCCTCCGTGTGCTCCATGGAGGACATGTGCCGCAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:44:659:963/2\n+abababaaaaaaabaaaaaaaaaaaaaaaaaa_]U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:5:1524:224/2\n+ACTCCGTGTGCTCCATGGAGGACATGTGCCGCAGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:5:1524:224/2\n+aaaaaX]N_aaaa^Z^SH^a]ZQUKKXKR[VV[XX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:13:761:1512/2\n+CTCGGCCATGGCGTCGGCAAGTCGCTCCCGGGGGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:13:761:1512/2\n+aaaaaaaaaaaaaaaaa`[^XXUKMVVS[HPEUXK\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:69:1545:865/2\n+GCCATGGCGTCGGCAAGTCGCTCCCGGAGGCCCTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:69:1545:865/2\n+aaaaaaaaaa_X[S[a^aa`UGRKXZKRUaW[JHO\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:930:1174/2\n+GCCCGAAGCCCTCAGGCCAGCTCCCGCACACCTCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:930:1174/2\n+aaaaaabaa\\aaaa^^aa\\aa^aZaWaaaZaZLaZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:83:718:889/2\n+CCCAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:83:718:889/2\n+bbaaaaabaaabbbabaaababaaaaaaaaabaaZ\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:1417:658/2\n+GTGGACGCTGATCAGCCGGAGCCCATGCGCAGCGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:1417:658/2\n+aZaaaOLaa\\aZaaZaaUa\\a\\aaa^aaaaaaLa_\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:66:867:1539/2\n+GGCCATGGCGTCGGCAAGTCGCTCCCGGAGGCCCT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:66:867:1539/2\n+abbbaaaabaaaaaaaaaaaaaaa_^^^[[^\\^^U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:79:1578:1197/2\n+CGGATTCCACGATGTCCCGGCTAGGTGACTCGGCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:79:1578:1197/2\n+baabbbabbaaaaaabaaa\\aaaa\\`S_aa^QV[Z\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:33:1021:322/2\n+ACGGGCTCCCAGGCAGCCTCCGCCAGCCGGAGCCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:33:1021:322/2\n+aaaabaL\\abaaaaaZaa\\a^Q\\a\\aZaWaZJSaX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:61:1267:1990/2\n+GTCGGCAAGTCGCTCCCGGAGGCCCTCCTCCGTGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:61:1267:1990/2\n+baaaaaaaaaaaaaaa`Z]_____a_____`][VR\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:91:1242:1241/2\n+CTCCTGATGCTGCTCGTGGACGCTGATCAGCCGGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:91:1242:1241/2\n+aaaa\\aaaaaaWaaaa_UMaaaaaLa__]_[^^`U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:62:1224:1029/2\n+CGCTCCCGGAGGCCCTCCTCCGTGTGCTCCATGGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:62:1224:1029/2\n+^aa^aabaa]X_aaaaaa^aaaaaaaa[VV_W_RK\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:38:1108:325/2\n+CCACGGGCTCCCAGGCAGCCTCCGCCAGCCGGACC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:38:1108:325/2\n+baaabaaaaaaaaabaaaaabaaaaZERaa\\aaQX\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:49:970:1046/2\n+CTCGTGGACGCTGATCAGCCGGAGCCCATGCGCAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:49:970:1046/2\n+bbaaaabaaaaaaaaaaaaaaaaaaaa[[[a_OZU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:6:82:1503/2\n+TGGACGCTGATCAGCCGGAGCCCATGCGCAGCGGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:6:82:1503/2\n+abaaaababaaaWaaaaaaaaaaZaaaa`]`aX[X\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:85:302:1496/2\n+CCGAGGCCCGGGCGCAACCACGGGCTCCCCGGCAG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:85:302:1496/2\n+abaaaaabbaaaaaaWaaaU]J_aaaaa`J]a[HO\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:26:441:786/2\n+CTGATCAGCCGGAGCCCATGCGCAGCGGGGCGCGC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:26:441:786/2\n+aaaaabaaZaaaaabaabaaabaaaaaaaaaa\\`E\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:8:434:166/2\n+ACGACGTCCCGGCTAGGTGACTCGGCCATGGCGTC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:8:434:166/2\n+bbabbbaaaaaabbaaa`]_aa[MX_aa[[[\\Z^U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:37:940:1981/2\n+CCCGCACACCTCCAGCAGGGTAACGCTCCTGTCAA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:37:940:1981/2\n+aaaaabbababaaaaaaaaaEZPGU_^[[a_][^U\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1018:1410/2\n+GTGGACGCTGATCAGCCGGAGCCCATGCGCAGCGG\n++PATHBIO-SOLEXA2_30LEJAAXX:7:17:1018:1410/2\n+aaaabaaaaabaaaaaaaaaaa^^QZ^WaZ]_`aU\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:84:1297:1236/2\n+CCCGTCGCCCTCCTGATGCTGCTCGTGGGCGCTGA\n++PATHBIO-SOLEXA2_30LEJAAXX:7:84:1297:1236/2\n+aaaWaa\\aaaZ\\^MVZaIIZ[V^HS[MOEPRaUGP\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:45:1219:280/2\n+CCCGGCTAGGTGACTCGGCCATGGCGTCGGCAAGT\n++PATHBIO-SOLEXA2_30LEJAAXX:7:45:1219:280/2\n+abaaaaaJa\\\\aaa`Q[^aa_V^[a[J`aaWaaXM\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:36:1381:1758/2\n+CACGGGCTCCCAGGCAGCCTCCGCCAGCCGGACCC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:36:1381:1758/2\n+aaaaaaZaaaaaWaZaaLbaaWaWWaaL][XaLaE\n+@PATHBIO-SOLEXA2_30LEJAAXX:7:29:27:808/2\n+GGCGCGCGAGCTCGCGCTCTTCCTGACCCCCGATC\n++PATHBIO-SOLEXA2_30LEJAAXX:7:29:27:808/2\n+aababbaaaaaaaaaaZaaaaa]JUU_aaa^EEGU\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/CXorf15-SYAP1_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/CXorf15-SYAP1_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,12 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:75:946:1909/1 +GGCGGACGGCGACGTAGCCCGCGGCAGAAGATAAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:75:946:1909/1 +abaabaaabaaaaaaaaaaaaabaVV_Z]aX]X][ +@PATHBIO-SOLEXA2_30LEJAAXX:7:45:1722:953/1 +GCTGTTTCAGCAACTGATTCAGTTATATTTTTTGT ++PATHBIO-SOLEXA2_30LEJAAXX:7:45:1722:953/1 +aaaa^^bbbbaaaaaaaaa\aaXaa`W`a^aa^H^ +@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1450:1241/1 +GGCGACTGAGGCAGGACGGGGCGGACGGCGAGGCA ++PATHBIO-SOLEXA2_30LEJAAXX:7:17:1450:1241/1 +a^\aaaaaa^a^K^WaaLWLaaaaVS[aaLEKU^Z |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/CXorf15-SYAP1_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/CXorf15-SYAP1_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,12 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:75:946:1909/2 +CTTCATCGTTAGTGTCAACCCCTGGGGGCCCAGCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:75:946:1909/2 +abbbaabbbaa\aaaaaaaa^JMUH_aZKEXOKRE +@PATHBIO-SOLEXA2_30LEJAAXX:7:45:1722:953/2 +GAGGAGGCAGCGCGGGGAAGAGGCGGCGGGGGCGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:45:1722:953/2 +aaaabaabaabbbbabaaaabaaba_S]^K\ILQE +@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1450:1241/2 +CGATTTTTCCTTCTTCTACGGATTTCTTTATTTTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:17:1450:1241/2 +XEKQ[a\\L\aaaaaaaQVXHKa^`MXbXPURE[U |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_1.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,8 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:59:769:345/1 +ATTTGAAGCAGCCTATGTACTTGGGATTTGAAAAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:59:769:345/1 +aabaaaa^aa^bbbabaaaabaa^\aaaa\aaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:81:220:1548/1 +GTCACTTGAAAGAGCCTCTACTTACATTTCATCTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:81:220:1548/1 +aaaaaabbaaaaaaabaaaaaa`U^U^aa^V^aaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,8 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:59:769:345/2 +CACAAATACTTCTGGTAGGCTGGCCCCAATGCAGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:59:769:345/2 +babbb^abababa^aaaaabaaaababaaaaaaaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:81:220:1548/2 +CTTATGTGGATGGCGACAATAACAAACTGGACAAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:81:220:1548/2 +abaaabaaaaaabaabaaa_]`a_]`aaaU[Z[VX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/EP300-MRFAP1_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/EP300-MRFAP1_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,64 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1145:806/1 +GAGATTTCCTGAGGATTCTGGTTTTCCTCGCTTGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:17:1145:806/1 +bbbbbbaaaaaaaaaaaaaaaaaa`_]`[Q[`^KU +@PATHBIO-SOLEXA2_30LEJAAXX:7:22:1648:420/1 +GGCCGAGAATGTGGTGGAACCGGGGCCGCCTTCAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:22:1648:420/1 +aaaaaaaaaaaaaaaaZaaaa[S]`aaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:91:590:1587/1 +AAAAATGGCCGAGAATGTGGTGGAACCGGGGCCGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:91:590:1587/1 +baaaaaWaa^aaa^aaaaaaaaa\abaaaa`_Xaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:69:578:501/1 +GCATCTCTTCCCACTGCCTCGGAACCGCAATAGCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:69:578:501/1 +W^aaaaaXabZaaaaZaabaaaaa_]`a_W^V[aa +@PATHBIO-SOLEXA2_30LEJAAXX:7:16:987:1601/1 +GTGGAACCGGGGCCGCCTTCAGCCAAGCGGCCTAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:16:987:1601/1 +aaa^aababaaaaaaaabbaaabaaaaabaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:20:1374:264/1 +CCTGGGTGCGGCGCGGGGACCCCGGGCCGAAGAAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:20:1374:264/1 +Zaa^aaJaaaWaabaaaaaa\aaaaaZabaWaZaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:88:641:1803/1 +CGACGATGTCCAGGGGCCGCATCTCTTCCCACTGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:88:641:1803/1 +baaaaabaabaaaaaabaaaaaaaabbaa_X_a\a +@PATHBIO-SOLEXA2_30LEJAAXX:7:51:1479:654/1 +GTCCTCGCGCATCTCGTTGATGACCGGGAGCAGAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:51:1479:654/1 +aabbbaaababbbbaaaa\a^Q^a^RE[__\^^aZ +@PATHBIO-SOLEXA2_30LEJAAXX:7:63:347:1159/1 +CGGGAATGTGGTGGAACCGGGGCCGCCTTCAGCCA ++PATHBIO-SOLEXA2_30LEJAAXX:7:63:347:1159/1 +aNaaaaaaaaaaaaaaaaaaaaaabaaaaL\a_J] +@PATHBIO-SOLEXA2_30LEJAAXX:7:100:1324:832/1 +CTCTCATCTCCGGCCCTCTCGGCGTCCGCCAGCGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:100:1324:832/1 +aaab\^aaaaaaaaaa^baaaaaaaaXS_PEUKMa +@PATHBIO-SOLEXA2_30LEJAAXX:7:35:460:430/1 +AAAGAATTAAAAATGGCCGAGAATGTGGTGGAACC ++PATHBIO-SOLEXA2_30LEJAAXX:7:35:460:430/1 +aaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:68:216:472/1 +CGCGGGGACCCCGGGCCGAAGAAGAGATTTCCTGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:68:216:472/1 +aaaaaaa^^aba^aaaaaL\a^Z^aaaaaaabaab +@PATHBIO-SOLEXA2_30LEJAAXX:7:24:935:1284/1 +CGAGAATGTGGTGGAACCGGGGCCGCCTTCAGCCA ++PATHBIO-SOLEXA2_30LEJAAXX:7:24:935:1284/1 +baaabbbbbbbaaaaaa`Z_aaaa___aa[^^^UX +@PATHBIO-SOLEXA2_30LEJAAXX:7:24:420:2011/1 +GTTGATGACCGGGAGCAGAAACTGCTCGAAATCCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:24:420:2011/1 +aaaaaababbaaaaaaaaaaaaaaaaaa^aaaa[[ +@PATHBIO-SOLEXA2_30LEJAAXX:7:78:1306:986/1 +GCCTCGCGCATCTCGTTGATGACCGGGAGCAGAAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:78:1306:986/1 +abaabbbaaaaabaZabaaaaaaa^aXKX_`_Z`[ +@PATHBIO-SOLEXA2_30LEJAAXX:7:58:580:1277/1 +CGGGCTCCAGCACCTCCACTTCCTCCGGTTCCGCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:58:580:1277/1 +baaaaaaaaaaaaa^aaaXMPaWaaaaOQS[\VVX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/EP300-MRFAP1_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/EP300-MRFAP1_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,64 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1145:806/2 +GCCAGCTCGACGATGTCCAGGGGCCGCATCTCTTC ++PATHBIO-SOLEXA2_30LEJAAXX:7:17:1145:806/2 +abaaabbbaaaaaaaaaaa_]_aaa____aaaa^U +@PATHBIO-SOLEXA2_30LEJAAXX:7:22:1648:420/2 +CGGGAGCAGAAACTGCTCGAAATCCTCCTCGGGCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:22:1648:420/2 +babababbabaaaaaaaabaabbbaabaaaaZaaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:91:590:1587/2 +GGCCGCATCTCTTCCCACTGCCTCGGAACCGCAAT ++PATHBIO-SOLEXA2_30LEJAAXX:7:91:590:1587/2 +babaaa\abaa\aabaMaUaaaaaUMEX]]OaZ[G +@PATHBIO-SOLEXA2_30LEJAAXX:7:69:578:501/2 +CTGGGTGCGGCGCGGGGACCGCGGGCCGAAGAAGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:69:578:501/2 +abaaa^aaaaababaaZaaXH[aVLIULZa[UP[K +@PATHBIO-SOLEXA2_30LEJAAXX:7:16:987:1601/2 +CTCGGTGATGACCGGGAGCAGAAACTGCTCGAAAT ++PATHBIO-SOLEXA2_30LEJAAXX:7:16:987:1601/2 +aZaaaMaa\baabaaaZaaaabaaaaaLUaaa^VP +@PATHBIO-SOLEXA2_30LEJAAXX:7:20:1374:264/2 +CTTCCCACTGCCTCGGAACAGCAATAGCGATGTCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:20:1374:264/2 +aab^aaaaa\aaaaa\aaa\Uaaaaaaaa^WaMaE +@PATHBIO-SOLEXA2_30LEJAAXX:7:88:641:1803/2 +GCTGAGAATGTGGTGGAACCGGGGCCGCCTTCAGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:88:641:1803/2 +aaaaaaaaaaaabaaaaaaaaaaabaaaa\X]_aU +@PATHBIO-SOLEXA2_30LEJAAXX:7:51:1479:654/2 +GCCGCCTTCAGCCAAGCGGCCTAAACTCTCATCTC ++PATHBIO-SOLEXA2_30LEJAAXX:7:51:1479:654/2 +abaaaaaaaaaaaaaaaaaaa^V^aa`__aaaaaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:63:347:1159/2 +TCTCGTTGATGACCGGGAGCAGAAACAGCTCGAAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:63:347:1159/2 +aaaaaaa^aREXaaUaaaUQQVQUaMEOZZaPEUM +@PATHBIO-SOLEXA2_30LEJAAXX:7:100:1324:832/2 +TGCTCGCGCGTCAGCGACGCGATGTCCTCGCGCAT ++PATHBIO-SOLEXA2_30LEJAAXX:7:100:1324:832/2 +aLaaW^aZaZaaaZa^^aaaZ\\^aabOEZabZ]X +@PATHBIO-SOLEXA2_30LEJAAXX:7:35:460:430/2 +CTCGAAATCCTCCTCGGGCTCCAGCACCTCCACTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:35:460:430/2 +aaaaaabaaaZaaaaaaaaaaUEPaaaaaaaa]JK +@PATHBIO-SOLEXA2_30LEJAAXX:7:68:216:472/2 +CGACGATGTCCAGGGGCCGCATCTCTTCCCACTGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:68:216:472/2 +aaaaaba^abba^aaaaaa\aaaaaaWaaaLaa^Z +@PATHBIO-SOLEXA2_30LEJAAXX:7:24:935:1284/2 +CCGACGATGTCCAGGGGCCGCATCTCTTCCCACTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:24:935:1284/2 +bbbbbaaabaaaa`]`aaaaaaaaaaaa^\^X^^M +@PATHBIO-SOLEXA2_30LEJAAXX:7:24:420:2011/2 +GGAACCGGGGCCGCCTTCAGCCAAGCGGCCTAAAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:24:420:2011/2 +babaaaabbbaabaabababaaaaaaaaaa[V^\U +@PATHBIO-SOLEXA2_30LEJAAXX:7:78:1306:986/2 +GAACCGGGGCCGCCTTCAGCCAAGCGGCCTAAACT ++PATHBIO-SOLEXA2_30LEJAAXX:7:78:1306:986/2 +baaaaaZ^aaaaaaaa^VVaVQXa^[[a_V[[\UM +@PATHBIO-SOLEXA2_30LEJAAXX:7:58:580:1277/2 +GTTTTCCTCGCTTGTATCTCCGAAAGAATTAAAAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:58:580:1277/2 +babbaaaaaaaa`S[aaaaZ^]]ZK\[ZEZa^aa[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/EWSR1-TFF1_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/EWSR1-TFF1_1.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:43:757:1645/1 +GAAGAGGTGGCCTCATGGATCGTGGTGGTCCCGGT ++PATHBIO-SOLEXA2_30LEJAAXX:7:43:757:1645/1 +abbbaaa^aaa]J`^aaaaaaaaaaaaaUKRXaXE |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/EWSR1-TFF1_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/EWSR1-TFF1_2.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:43:757:1645/2 +CGGGCGTGACACCAGGAAAACCACAATTCTGTCTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:43:757:1645/2 +aaaaaaa^aabaaaaaaaaaa^aaaaaaaaaa\aZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/MYO9B-FCHO1_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/MYO9B-FCHO1_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,40 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:91:1384:1511/1 +CGCGTGAACCAGTGCATCGTGATCTCGGGTGAGAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:91:1384:1511/1 +aaaaaaaaaaaaaaaa^aaaaaaa^aaWaLa^aWa +@PATHBIO-SOLEXA2_30LEJAAXX:7:81:1032:792/1 +TGACGTCAAAGCCCTTGGAGCGGTATGTGCTAGAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:81:1032:792/1 +aaaaaabaaaaaaaaaaaaaaaa^aaaaaaaa^]Z +@PATHBIO-SOLEXA2_30LEJAAXX:7:83:340:930/1 +AGTTCTCGCGGGACTTGGGCAGGAGCTGGCTGACG ++PATHBIO-SOLEXA2_30LEJAAXX:7:83:340:930/1 +aababbbabaaabbbaaaaaaaaaaaaaaaa][V^ +@PATHBIO-SOLEXA2_30LEJAAXX:7:10:418:605/1 +GCGCTGGCCGACGTGGCCTACTACACCATGCTCAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:10:418:605/1 +aaaaabaaaaaaaaaabaaaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:14:562:448/1 +GGACCATCCTGGGTGCTGGCCCTGTGCTGGAGTGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:14:562:448/1 +bbbbbbbbbaaaaaaaaaaaaaaaaaaa_Z_V_Z\ +@PATHBIO-SOLEXA2_30LEJAAXX:7:25:1647:751/1 +CTCGTGGGACTTGGGCAGGAGATGGCTGACGCCCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:25:1647:751/1 +aaaaaaa_X[a^^[aaaaaaUXX][^^aZ[UXU\U +@PATHBIO-SOLEXA2_30LEJAAXX:7:84:437:1347/1 +CCAGTGCATCGTGATCTCGGGTGAGAGCGGCTCCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:84:437:1347/1 +aaaaabaaaaaaaaaabaaaaa\\^`]aaaaaa_Z +@PATHBIO-SOLEXA2_30LEJAAXX:7:45:696:1167/1 +CTACACCATGCTCAGGAAGCGCGTGAACCAGTGCA ++PATHBIO-SOLEXA2_30LEJAAXX:7:45:696:1167/1 +abbabaaaaaabbbaaaaababaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:95:842:1694/1 +CACCTGCACAGCATCCAAGGTGCTCACCACTTCCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:95:842:1694/1 +aaabaaaaaaaaaab^U[^[^aaaaaaaa`_]aaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:49:555:429/1 +GCCGCTCCTGGTCCATGCGACGGTTCAGGTAGTTC ++PATHBIO-SOLEXA2_30LEJAAXX:7:49:555:429/1 +aabaaaaaabbW^aLa\aa^REXLa[Z^[IKVHX[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/MYO9B-FCHO1_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/MYO9B-FCHO1_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,40 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:91:1384:1511/2 +CAGGAGCTGGCTGACGCCCGAGAGTACCTGAACAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:91:1384:1511/2 +aabaa^aaaaaaaaaaaaa^^ab\WaaaaQQVaaR +@PATHBIO-SOLEXA2_30LEJAAXX:7:81:1032:792/2 +CTTGGCCATGGTAGACAACCTGCAGGGGGACTCTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:81:1032:792/2 +bbaaaaaaaaaaaaaaaaaa[ZZ_Zaaa_UZa^_U +@PATHBIO-SOLEXA2_30LEJAAXX:7:83:340:930/2 +CGCCCTCAGCCAGAAGGGCTACGCCAGCGGCGTCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:83:340:930/2 +aaabbaaaaaaabaaaabbabaaa_U^aaaaaaaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:10:418:605/2 +GCCGCCTTCTTGGTTTTAGTCTCCGCCTCCAGCAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:10:418:605/2 +aaabaaaaaabaaaaaa^[[bab^H[aaaaaaaaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:14:562:448/2 +CGCGGGACTTGGGCAGGAGCTGGCTGACGCCCGAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:14:562:448/2 +aaaabaaaaaaaaaaaaaaaaaaaa\^[[[^ZRRX +@PATHBIO-SOLEXA2_30LEJAAXX:7:25:1647:751/2 +CGTGATCTCGGGTGAGAGCGGCTCCGGCAAGACCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:25:1647:751/2 +baaaaLbaaaaa``____aa^^^XE[aUEU[VURX +@PATHBIO-SOLEXA2_30LEJAAXX:7:84:437:1347/2 +CGCGGGACTTGGGCAGGAGCTGGCTGACGCCCGAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:84:437:1347/2 +babbaaaaaaaaaa^aaaaaaaaaaaaaaaaa[VR +@PATHBIO-SOLEXA2_30LEJAAXX:7:45:696:1167/2 +CCCGAGAGTACCTGCACAGCATCCAAGGTGCTCAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:45:696:1167/2 +abbabaabaabaaaaaaaa^QXaaU\ZaLaZa[JU +@PATHBIO-SOLEXA2_30LEJAAXX:7:95:842:1694/2 +CGCTGGCCGACGTGGCCTACTACACCATGCTCAGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:95:842:1694/2 +bbaaaaabbabaaaaaaaaaaaaaaaaaaaaaaa[ +@PATHBIO-SOLEXA2_30LEJAAXX:7:49:555:429/2 +CCCAGAGCACCAACTTCCTCATCCACTGCCTCACC ++PATHBIO-SOLEXA2_30LEJAAXX:7:49:555:429/2 +aaaaa\abbaaaab\baa^aaaa\\aaabaaa^aX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/NAV1-GPR37L1_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/NAV1-GPR37L1_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,20 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:72:139:396/1 +CGGGTGTCCCCGGGTGTCAGGCGAGAGCGGTCCTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:72:139:396/1 +bbabbbbbbbaaaaaaaaa_]`aaaa`]__]_aaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:74:1328:1715/1 +CGGTGAATGGCCGCCTGAGCCGGGGAAGATGCTTC ++PATHBIO-SOLEXA2_30LEJAAXX:7:74:1328:1715/1 +aaaLaabaaabaaaaaaaaaaaaaaaaaaaaaaa\ +@PATHBIO-SOLEXA2_30LEJAAXX:7:34:988:884/1 +CTCCGCGTGGCCCACAGCTCATACCTTTTCGGGTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:34:988:884/1 +abbabaaabbbabaa^aabaaaaaaaaabaWS[aU +@PATHBIO-SOLEXA2_30LEJAAXX:7:28:728:1661/1 +CTCATGGGCAAGACCATGACGGAGGATGATGACAT ++PATHBIO-SOLEXA2_30LEJAAXX:7:28:728:1661/1 +baaaaaaababaaabaaaaaaaaaaaaaaaa]_]a +@PATHBIO-SOLEXA2_30LEJAAXX:7:16:1595:2032/1 +GGCACAGAGGCTGAAAGTCGTGACTCCCAGCGAGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:16:1595:2032/1 +a\aaaabaaaaaaLaZaaaa^aZaaaaa^^Q^^aX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/NAV1-GPR37L1_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/NAV1-GPR37L1_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,20 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:72:139:396/2 +ATGGGCCTCACCTTGGGCAGGGTGCTGGTGGCCAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:72:139:396/2 +babbbbbbaaabaaaaaaaaaaaaaaaaaaaa[[U +@PATHBIO-SOLEXA2_30LEJAAXX:7:74:1328:1715/2 +AGATGACAGCCAACTTGGCCAGGATGGATTGGCAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:74:1328:1715/2 +aaaaaabaaaaaaaZRUUaa\aaa___ZaUEMJaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:34:988:884/2 +GGCGCAGAGGCTGAAAGTCGTGACTCCCAGAGAGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:34:988:884/2 +aaaaaaaaaaabaaaabaaabaaaaaaaaaX_]aU +@PATHBIO-SOLEXA2_30LEJAAXX:7:28:728:1661/2 +GGCACAGAGGCTGAAAGTCGTGACTCCCAGAGAGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:28:728:1661/2 +aabaabbaaaaababaaaaaaaaaaaaZKUUVV[X +@PATHBIO-SOLEXA2_30LEJAAXX:7:16:1595:2032/2 +GGTTTCCGACCCTCCGCGTGGCCCACAGCTCATAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:16:1595:2032/2 +baaaaaaaaaaaaaaaaba\aaaaaaL^a\aWbaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/PAPOLA-AK7_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/PAPOLA-AK7_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,8 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:69:384:648/1 +GGAGAAGCGCTTAAAGCGGCGGGAGCGGTGCGGGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:69:384:648/1 +baaaaababbaaaaaabaaaaa[HXH^[J^KX^XM +@PATHBIO-SOLEXA2_30LEJAAXX:7:68:1726:1492/1 +CGTGTGTAACATGCCTCCTTCCGCTCCATACTTGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:68:1726:1492/1 +URKQ^aaaaaa[ERS^aaaaaaaaaa^^^^aZEEE |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/PAPOLA-AK7_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/PAPOLA-AK7_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,8 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:69:384:648/2 +GCCTCCTTCCGCTCCATACTGGAGTCCAGCCGCAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:69:384:648/2 +aabababaabaaaaa^aVZZaUKRZZ_XZZ^ZVKU +@PATHBIO-SOLEXA2_30LEJAAXX:7:68:1726:1492/2 +GAGCGCTTAAAGCGGCGGCAGCGGTGCGGGAGAGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:68:1726:1492/2 +ab\ababaaaaaaaaa[UEEZLaaSREXXQH^JSU |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/RPS6KB1-TMEM49_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/RPS6KB1-TMEM49_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,124 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:10:1274:1893/1 +GGCAGGAGTGTTTGACATAGACCTGGACCAGCCAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:10:1274:1893/1 +aaaaaaaaaaaabbaaabaaaaaaaa`]_aaaaaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:100:634:300/1 +CCAGCCAGAGGACGCGGGCTCTGAGGATGAGCTGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:100:634:300/1 +ZaabaZaaaaaaZaaaaa^a^aaLaababaaaa^a +@PATHBIO-SOLEXA2_30LEJAAXX:7:14:202:351/1 +CAAAGAAGGTCCAAAAACGCACCAGAAAGTGTCCA ++PATHBIO-SOLEXA2_30LEJAAXX:7:14:202:351/1 +aaaaaabaaaaaaaWa^IZWaaaaaaaa^JJ^a_M +@PATHBIO-SOLEXA2_30LEJAAXX:7:2:225:1594/1 +CTTCTGCAGAGATGGACCTATGCCGGGGACAGCAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:2:225:1594/1 +abababaaa^aabaaaabbaaaabaaaaaaaa^[U +@PATHBIO-SOLEXA2_30LEJAAXX:7:30:864:1299/1 +GTGCCCATTTCGCTTTTGTGGTGAAGCTTCTGCCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:30:864:1299/1 +aaaaaaaaaababaaa^Z\Z\RKU[KX^SVUEUZa +@PATHBIO-SOLEXA2_30LEJAAXX:7:86:1461:970/1 +CTGCAGAGATGGACCTATGCCGGGGACAGCAAGTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:86:1461:970/1 +bbbbaaaaaaaaaaaaa^X^`]`aaa^^^`^^^a_ +@PATHBIO-SOLEXA2_30LEJAAXX:7:50:550:1851/1 +GCGGGACGGCTTTTACCCAGCCCCGGACTTCCGAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:50:550:1851/1 +baaababababbbaaabaaaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:39:1115:223/1 +GACGGCTTTTACCCAGCCCCGGACTTCCGAGACAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:39:1115:223/1 +aaaaa\baaZaaaabaaUab\aH[^a^ab^[HZKM +@PATHBIO-SOLEXA2_30LEJAAXX:7:90:883:1309/1 +GGCTTCTGCAGAGATGGACCTATGCCGGGGACAGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:90:883:1309/1 +b^^VbaaLaaaWaa^abWaba^aVVXaaa^aaaa] +@PATHBIO-SOLEXA2_30LEJAAXX:7:48:1382:1277/1 +GCCCATTTCGCTTTTGTGGTGAAGCTTCTGCCGTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:48:1382:1277/1 +baaaaaabaaaababa`UXa^ZVQaaaaaXaaXa_ +@PATHBIO-SOLEXA2_30LEJAAXX:7:41:1024:486/1 +GAGACAGGGAAGCTGAGGACATGGCAGGAGTGTTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:41:1024:486/1 +aaaaaaOZaaaaaa[aaa]__aa[Q^aXEUMXW[] +@PATHBIO-SOLEXA2_30LEJAAXX:7:45:502:1712/1 +GAAGCTTCTGCCGTTGAGCCTCCAGGTACTCCTGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:45:502:1712/1 +bbaabbabaaaaaa]W`aaaaaaaa_U_aaaa^S^ +@PATHBIO-SOLEXA2_30LEJAAXX:7:28:1040:80/1 +CAGTTAAATGAAAGCATGGACCATGGGGGAGTTGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:28:1040:80/1 +aaa\baaaaaabaabaaaaaaaaaaaaaaZX[[\a +@PATHBIO-SOLEXA2_30LEJAAXX:7:83:418:1017/1 +GGCTTCTGCAGAGATGGACCTATGCCGGGGACAGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:83:418:1017/1 +babbbbaaaaaaaaaaaaaaaaaaaaaaa``_aaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:86:329:1148/1 +CAGAGAGTGAAGCAAGCAGCATGGCCCACAGTCTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:86:329:1148/1 +a^aaaabLa^abaaaaaaaaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:12:40:1105/1 +GACGGCTTTTACCCAGCCCCGGACTTCCGAGACAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:12:40:1105/1 +abaaaabaabaaaaaaaaaUZQ\aaaaaa^aa^V^ +@PATHBIO-SOLEXA2_30LEJAAXX:7:43:1027:1268/1 +TTCGCTTTTGTGGTGAAGCTTCTGCCGTTGAGCCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:43:1027:1268/1 +aaaaaaaabaLLW^aaaaabaW_[WXEX[ERGXa\ +@PATHBIO-SOLEXA2_30LEJAAXX:7:77:269:664/1 +GGCTTCTGCAGAGATGGACCTATGCCGCGGACAGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:77:269:664/1 +aabbababbaaababaaaaabaaaaa\I^aaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:27:1158:1997/1 +CGACGAAGGAGGCGGGACGGGTTTTACCCAGCCCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:27:1158:1997/1 +baaaaaaaaa^\aaabaaaOEUaaaaaXEKJZa^b +@PATHBIO-SOLEXA2_30LEJAAXX:7:62:348:653/1 +GCCCATGAGGCGACGAAGGAGGCGGGAGGGCTGTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:62:348:653/1 +aaaaZa[QHbaZaaa\a^K[aWaaaaHEUaa^QVX +@PATHBIO-SOLEXA2_30LEJAAXX:7:94:413:35/1 +TGGTGAAGCTTCTGCCGTTGAGCCTCCAGGTACTC ++PATHBIO-SOLEXA2_30LEJAAXX:7:94:413:35/1 +aaaaaaa^aaaa^Q^aaaaaaWaaa\ZUEPS^U_a +@PATHBIO-SOLEXA2_30LEJAAXX:7:96:1411:1152/1 +ATAGGATGAAGTAACACACCATGACAACGACCAAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:96:1411:1152/1 +bbbbabaaaaaaaaaaaabbaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:68:468:704/1 +GAAGCTTCTGCCGTTGAGCCTCCAGGTACTCCTGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:68:468:704/1 +aaaaaaZaaaaaaaaaLaaXaaaaa[V\aaaaa[X +@PATHBIO-SOLEXA2_30LEJAAXX:7:78:390:343/1 +GCGGGCTCTGAGGATGAGCTGGAGGAGGGGGGTCA ++PATHBIO-SOLEXA2_30LEJAAXX:7:78:390:343/1 +aaaaaaabbaaaabbaaabaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:80:168:362/1 +CCGAGACAGGGAAGCTGAGGACATGGCAGGGGTGT ++PATHBIO-SOLEXA2_30LEJAAXX:7:80:168:362/1 +aaaWaaaaZab\aaaabZaaaaaaaaa^aa\aXaL +@PATHBIO-SOLEXA2_30LEJAAXX:7:38:257:225/1 +GCCCATTTCGCTTTTGTGGTGAAGCTTCTGCCGTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:38:257:225/1 +bbbaaabbbaaaa\aa`[X_Z[^aaaaaaXaaaa^ +@PATHBIO-SOLEXA2_30LEJAAXX:7:15:300:871/1 +CTGCCGTTGAGCCTCCAGGTACTCCTGAAATGGCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:15:300:871/1 +baaaaaaaaaaaaaaaaaa_]`aaa_VXUXa^Z^a +@PATHBIO-SOLEXA2_30LEJAAXX:7:96:705:1693/1 +ACCCATTTCGCTTTTGTGGTGAAGCTTCTGCCGTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:96:705:1693/1 +ababaaaabababb`W`aa_ZZaaabbbaaaaaa^ +@PATHBIO-SOLEXA2_30LEJAAXX:7:56:488:257/1 +TGAAATGGCTTCTGCAGAGATGGACCTATGCCGGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:56:488:257/1 +baaaaababbbaaaa]_]`]`aaaaaaa_V_^V[O +@PATHBIO-SOLEXA2_30LEJAAXX:7:33:364:81/1 +GCCAACTTTTCAAACATCCAGGACAACCAGTTTTC ++PATHBIO-SOLEXA2_30LEJAAXX:7:33:364:81/1 +aab^abbbaaabbbaaabbaaaaaaaaaaaaabaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:55:572:1065/1 +GTTAATGATAGATAGGATGAAGTAACACACCATGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:55:572:1065/1 +bbbbbaaabaaaaaaaaaaaaaaaaaaaaa`_`__ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/RPS6KB1-TMEM49_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/RPS6KB1-TMEM49_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,124 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:10:1274:1893/2 +GCCCATTTCGCTTTTGTGGTGAAGCTTCTTCCGTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:10:1274:1893/2 +bbaa^abaaabbbaaaaa\aSX`Uaa`JXEUJ`aX +@PATHBIO-SOLEXA2_30LEJAAXX:7:100:634:300/2 +CCCATTTCGCTTTTGTGGTGAAGCTTCTGCCGTTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:100:634:300/2 +QZaa^aW^aaaaaa^WJMUEMbWaaaaEKXEE^ZE +@PATHBIO-SOLEXA2_30LEJAAXX:7:14:202:351/2 +GTTTGACATAGACATGGACCAGCCAGAGGACGCGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:14:202:351/2 +_JJ`baaabaabXE[aaaabaaZaaaaaaa\IK_Z +@PATHBIO-SOLEXA2_30LEJAAXX:7:2:225:1594/2 +CGCGGGCTCTGAGGATGAGCTGGAGGAGGGGGAAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:2:225:1594/2 +aabbaababaa^\aaaaZaaaaaaaaZaaa^aa^U +@PATHBIO-SOLEXA2_30LEJAAXX:7:30:864:1299/2 +GGGAAGCTGAGGACATGGCAGGAGTGTTTGACATA ++PATHBIO-SOLEXA2_30LEJAAXX:7:30:864:1299/2 +baaaaaaabaaaaaaabaaaZ`]a[a^aa^_^\^P +@PATHBIO-SOLEXA2_30LEJAAXX:7:86:1461:970/2 +GGCGGGCTCTGAGGATGAGCTGGAGGAGGGGGGTC ++PATHBIO-SOLEXA2_30LEJAAXX:7:86:1461:970/2 +aabbbaaaaaaaaaaaaaaaa_[_aa[Z\]_aa[U +@PATHBIO-SOLEXA2_30LEJAAXX:7:50:550:1851/2 +GGCTTCTGCAGAGATGGACCTATGCCGGGGACAGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:50:550:1851/2 +ababbaaaaaaaaaaaaaaaaaaaaaaa^`RMU[K +@PATHBIO-SOLEXA2_30LEJAAXX:7:39:1115:223/2 +GCCTCCAGGTACTCCTGAATTGGCTTCTGCAGAGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:39:1115:223/2 +baaaaaL\aWbaaa\aLa[H[WWaaaaREXaaUEX +@PATHBIO-SOLEXA2_30LEJAAXX:7:90:883:1309/2 +GGCCCATGAGGCGACGAAGGAGGCGGGACGGCTTT ++PATHBIO-SOLEXA2_30LEJAAXX:7:90:883:1309/2 +aaWaaaaaaLaaaaabaaaZaabWaaaEEOaZW^U +@PATHBIO-SOLEXA2_30LEJAAXX:7:48:1382:1277/2 +TGGCAGGAGTGTTTGACATAGACCTGGACCAGCCA ++PATHBIO-SOLEXA2_30LEJAAXX:7:48:1382:1277/2 +aaaaaaaaaaababaaaabaaaaaaaaaaaaaaaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:41:1024:486/2 +TTCGCTTTTGTGGTGAAGCTTCTGCCGTTTAGCCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:41:1024:486/2 +aaa[H[aaPEUOEUE[[Q[aUUZGEUUaXEEEKZE +@PATHBIO-SOLEXA2_30LEJAAXX:7:45:502:1712/2 +CGGCTTTTACCCAGCCCCGGACTTCCGAGACAGGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:45:502:1712/2 +aaaababaaaabaaaaaba[X^aaa^I^Z^aaaaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:28:1040:80/2 +GAAGCTTCTGCCGTTGAGCCTCCAGGTACTCCTGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:28:1040:80/2 +aaaaaaabaaa_S`_S^Z[aaaaXRUaaaaaa_QU +@PATHBIO-SOLEXA2_30LEJAAXX:7:83:418:1017/2 +GGCGGGACGGCTTTTACCCAGCCCCGGACTTCCGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:83:418:1017/2 +baabbaabbaaaabaaaaa[V^aaaa[V^aaaaXE +@PATHBIO-SOLEXA2_30LEJAAXX:7:86:329:1148/2 +GTTTGGGGCTGGAATGAAAGGCCATGAATCTGGAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:86:329:1148/2 +aaaaaab^aa^W^aabaaaaaaaaaaa^aaa\aaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:12:40:1105/2 +TGGTGAAGCTTCTGCCGTTGAGCCTCCAGGTACTC ++PATHBIO-SOLEXA2_30LEJAAXX:7:12:40:1105/2 +Oaaaaaaaaabaaa\abaab^^aaaaa^aaLaaaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:43:1027:1268/2 +GGAAGCTGAGGACATGGCAGGAGTGTTTGACATAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:43:1027:1268/2 +aaaaaaaaaWbaaaaaa\abaWa\aa^^aZWa^^Z +@PATHBIO-SOLEXA2_30LEJAAXX:7:77:269:664/2 +GACGAAGGAGGCGGGACGGCTTTTACCCAGCCCCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:77:269:664/2 +baaba^aaaaaaaaa\aaa\aaaaaaaaaaaaaaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:27:1158:1997/2 +CTGAAATGGCTTCTGCAGAGATGGACCTATGCCGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:27:1158:1997/2 +abaaaaaaaaababab\aaaa\aaWaaSQ[ZHSSR +@PATHBIO-SOLEXA2_30LEJAAXX:7:62:348:653/2 +CTTATGCAGAGAGGGACCTATGCCGGGGACAGCAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:62:348:653/2 +aa^H[baXEUZQUXH[[EZaIZZ`ZZaaZJ__[^K +@PATHBIO-SOLEXA2_30LEJAAXX:7:94:413:35/2 +CCGAGACAGGGAAGCTGAGGACATGGCAGGAGTGT ++PATHBIO-SOLEXA2_30LEJAAXX:7:94:413:35/2 +aaaababaaaaaaaaaaaaaaa^baaaaaaaa\aX +@PATHBIO-SOLEXA2_30LEJAAXX:7:96:1411:1152/2 +GTTAAATGAAAGCATGGACCATGGGGGAGTTGGAC ++PATHBIO-SOLEXA2_30LEJAAXX:7:96:1411:1152/2 +bbbbbbaaabaaaaaaaaaaaaaaa^aaaaaaaaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:68:468:704/2 +TTCCGAGACAGGGAAGCTGAGGACATGGCAGGAGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:68:468:704/2 +babbaaab^a^aWaaaaaa^aaa\``_Za\a^QXH +@PATHBIO-SOLEXA2_30LEJAAXX:7:78:390:343/2 +CAACGACCAACTTTTCAAACATCCAGGACAACCAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:78:390:343/2 +aaaaaaaaWabaaaaaZaabaaaaOEU`aUa\aQU +@PATHBIO-SOLEXA2_30LEJAAXX:7:80:168:362/2 +GAAGCTTCTGCCGTTGAGCCTCCAGGTACTCCTGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:80:168:362/2 +aaaaaaaababaaba^baabaaaaaaabbbaa\\X +@PATHBIO-SOLEXA2_30LEJAAXX:7:38:257:225/2 +GGAGTGTTTGACATAGACCTGGACCAGCCAGAGGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:38:257:225/2 +aa\aabbbbaabba^aaaaaaaaaaaaa_^QaaaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:15:300:871/2 +CTTTTACCCAGCCCCGGACTTCCGAGACAGGGAAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:15:300:871/2 +bbbbaaabaaaabb`]_]`aaa^VV^^^[^_^^^` +@PATHBIO-SOLEXA2_30LEJAAXX:7:96:705:1693/2 +GGAGTGTTTGACATAGACCTGGACCAGCCAGAGGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:96:705:1693/2 +abaaaaabaaababbaaabaaaaaaaaabaaabaX +@PATHBIO-SOLEXA2_30LEJAAXX:7:56:488:257/2 +CTTTTACCCAGCCCCGGACTTCCGAGACAGGGAAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:56:488:257/2 +bbabaaabaaaabbaaaWaaaaaa[]]_]`][PMX +@PATHBIO-SOLEXA2_30LEJAAXX:7:33:364:81/2 +GAGGACGCGGGCTCTGAGGATGAGCTGGAGGAGGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:33:364:81/2 +aaaaaaaaabaaaaaaaab^aaaaaaaabaaaaaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:55:572:1065/2 +TGAAAGCATGGACCATGGGGGAGTTGGACCATATG ++PATHBIO-SOLEXA2_30LEJAAXX:7:55:572:1065/2 +baaaaaabbababaaaaaaaaaaaaaaaa`^H^^U |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/STK11-MIDN_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/STK11-MIDN_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,40 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:91:902:1724/1 +GAGCTTGCATCACGGACTGCTCCGGCCTTGAGGCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:91:902:1724/1 +bbabbabaaaabbaaaaaaabbaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:70:727:1607/1 +CTGGGCGTGGCAGCGGAGCTGGGCCAGCGAAGGGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:70:727:1607/1 +baaabaaabbaaaaaa\aaaaaa\aaaaa]X`aaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:85:1254:2020/1 +AGCCGCGCCGCAAGCGGGCCAAGCTCATCGGCAAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:85:1254:2020/1 +ababababababaabaaaaaZaaaaaUaaaaaaa_ +@PATHBIO-SOLEXA2_30LEJAAXX:7:92:651:457/1 +AGCGGGCCAAGCTCATCGGCAAGTACCTGATGGGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:92:651:457/1 +aaaaabbaaaaabaaaaaaaaaaaaaaa]]`^S^_ +@PATHBIO-SOLEXA2_30LEJAAXX:7:30:916:209/1 +CGGTGGGTACCAAGGTCAGCTTGCTGCCATCACCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:30:916:209/1 +aWaa^Wa^Waaaaaa^aZa\a]J_[SVVQ[\^_aa +@PATHBIO-SOLEXA2_30LEJAAXX:7:86:886:1490/1 +CTGGACTCGGAGACGCTGTGCAGGAGGGCCGTCAA ++PATHBIO-SOLEXA2_30LEJAAXX:7:86:886:1490/1 +bbbaabbaaaaaabbaaaaa[X[_aaa^X[^[RU[ +@PATHBIO-SOLEXA2_30LEJAAXX:7:51:1121:401/1 +GTCAGCTTGCTGCCATCACCCACGCCGAACTCCTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:51:1121:401/1 +baaaabaaaaaaabbaaaabaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:94:660:647/1 +CTGCCATCACCCACGCCGAACTCCTGCAGCTTCCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:94:660:647/1 +bbabbabbbbbaaaaaaaaaaaaaaa_V^aaaaaa +@PATHBIO-SOLEXA2_30LEJAAXX:7:28:565:460/1 +GCCCGCTTCCACGGTGGGTACCAAGGTCAGCTTGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:28:565:460/1 +aabb^aaaaaaaa^aZ\a^aaaaaaaa^^^aaZX] +@PATHBIO-SOLEXA2_30LEJAAXX:7:5:1206:149/1 +GCTGGACTCGGAGACGCTGTGCAGGAGGGCCGTGA ++PATHBIO-SOLEXA2_30LEJAAXX:7:5:1206:149/1 +L]S]La\a_Z`aaaaaaaaaaWaVXUX]`GRUKEX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/STK11-MIDN_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/STK11-MIDN_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,40 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:91:902:1724/2 +GTGAAGGAGGTGCTGGACTCGGAGACGCTGTGCAG ++PATHBIO-SOLEXA2_30LEJAAXX:7:91:902:1724/2 +abbaaaaaabaaaabbabbbaaaaaaaaaaa^MUX +@PATHBIO-SOLEXA2_30LEJAAXX:7:70:727:1607/2 +CCCGGGGCGCCCGCGAGTGAGGCGCGGGGCGGCGG ++PATHBIO-SOLEXA2_30LEJAAXX:7:70:727:1607/2 +aabaaaaaaaaaaa]S_aa[X`a^V[]aaZaaJUK +@PATHBIO-SOLEXA2_30LEJAAXX:7:85:1254:2020/2 +GCCATCACCCACGCCGAACTCCTGCAGCTTCCCCG ++PATHBIO-SOLEXA2_30LEJAAXX:7:85:1254:2020/2 +baaaabaaaa^a\b_[[aaaaaZaaaaaaaaabaP +@PATHBIO-SOLEXA2_30LEJAAXX:7:92:651:457/2 +GCTGCCATCACCCACGCCGAACTCCTGCAGCTTCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:92:651:457/2 +bbabbaaaaaaaaaaaaaaaaaaaaaa^Z_aaaa[ +@PATHBIO-SOLEXA2_30LEJAAXX:7:30:916:209/2 +TCTGGACTCGGAGACGCTGTGCAGGAGGGCCGTCT ++PATHBIO-SOLEXA2_30LEJAAXX:7:30:916:209/2 +abbaabaaLaWaaabbLWaUEER]aa]JM\UUEEK +@PATHBIO-SOLEXA2_30LEJAAXX:7:86:886:1490/2 +GTCAGCTTGCTGCCATCACCCACGCCGAACTCCTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:86:886:1490/2 +abbaaaababaaabbaaaaa^V^aaa\^^aaaa[P +@PATHBIO-SOLEXA2_30LEJAAXX:7:51:1121:401/2 +AGGTGCTGGACTCGGAGACGCTGTGCAGGAGGGCC ++PATHBIO-SOLEXA2_30LEJAAXX:7:51:1121:401/2 +aaaaabbbaaabbaaaabbaabaaaaaaaaaaaaU +@PATHBIO-SOLEXA2_30LEJAAXX:7:94:660:647/2 +CGCAAGCGGGCCAAGCTCATCGGCAAGTACCTGAT ++PATHBIO-SOLEXA2_30LEJAAXX:7:94:660:647/2 +aaaaaaaaaaaaaaaaa`X`aaa`__aaaaaaa_X +@PATHBIO-SOLEXA2_30LEJAAXX:7:28:565:460/2 +GCAAGTACCTGATGGGGGACCTGCTGGGGGAAGGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:28:565:460/2 +baaaaabbaaaaaaZaabaaaababa^aaaaaa]U +@PATHBIO-SOLEXA2_30LEJAAXX:7:5:1206:149/2 +GCTTGCATCACGGACTGTTCCGGCCTTGAGGCCTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:5:1206:149/2 +E[aaaaaaaaaaWabaXaaa_X_aaa^H^QUaa]K |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/SULF2-PRICKLE2_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/SULF2-PRICKLE2_1.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:28:1112:1028/1 +CGCCGAGGGCGAGCAGGAGCGAGAGTGTGTCGAGC ++PATHBIO-SOLEXA2_30LEJAAXX:7:28:1112:1028/1 +bbbaaUaaaaaaabWbaaaaaabbaZaaaaZaZaZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/mcf7_pe_35bp/SULF2-PRICKLE2_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/mcf7_pe_35bp/SULF2-PRICKLE2_2.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30LEJAAXX:7:28:1112:1028/2 +CCATCTCCTGGTACTGCTGGGCAGAGAGCAGGCTG ++PATHBIO-SOLEXA2_30LEJAAXX:7:28:1112:1028/2 +ZbaZabaaaaaaaaaaa`Z]V^Zaaa^aaaaaaaU |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/APP-AR_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/APP-AR_1.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:33:970:1315/1 +TTTGTGTGTTGCCCACTGGCTGAAGAAAGTGACAATGTGGATTCTGCTGATGC ++PATHBIO-SOLEXA2_30TUEAAXX:3:33:970:1315/1 +ababbbaababaaba^aaaaaaabbabaaaaaaaaaaaaaabbbaaabaaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/APP-AR_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/APP-AR_2.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:33:970:1315/2 +GGACGGCGGCCGAGGGTAGACCCTTCCCAGCCCTAACTGCACTTCCATCCTTG ++PATHBIO-SOLEXA2_30TUEAAXX:3:33:970:1315/2 +aaabaaaaabaaaaaaaaaabbaaabbbaaaaaaaaaaaabaaaaaaaaaaa^ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/BC018860-NDRG1_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/BC018860-NDRG1_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:7:321:939/1 +TGTGGACCACTTCCACGTTACTCTGCATTTCTTCCTTCCCAAAAAGGTGGGAC ++PATHBIO-SOLEXA2_30TUEAAXX:3:7:321:939/1 +aaaaaaaaaababaaaaaaa^K\baaaaaaa[E[aa\aaaaaaaaaaa^X[b] |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/BC018860-NDRG1_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/BC018860-NDRG1_2.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:7:321:939/2 +TGACTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTGCTATAGTGGAAGC ++PATHBIO-SOLEXA2_30TUEAAXX:3:7:321:939/2 +baaaaabaaaabbaaaaabbaba^abbbabbbbaaabbaaabaaaaWaaZ\^` |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/BC021729-FRY_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/BC021729-FRY_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,12 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:40:1302:1557/1 +CGCGATTTTGGCTCACGGCAAGCTCCACCTCCTAAATGGCTCACGCCATTCTC ++PATHBIO-SOLEXA2_30TUEAAXX:3:40:1302:1557/1 +abaaaaabaaaababaaaaabababaaaaabaaaabaaaaaaaa^ba^a\aa^ +@PATHBIO-SOLEXA2_30TUEAAXX:3:91:975:1707/1 +GGAAATGACATTAGAGGAACACTTCAAAGAAACATTAAGAACTTGGATCCCAT ++PATHBIO-SOLEXA2_30TUEAAXX:3:91:975:1707/1 +aOa\aa[SZX`aaaaUaaaJaaaa[K[aZZX]UaaaaaZ^a_[QX_\aabaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:77:805:952/1 +AATGATTATAAATCATCAAGAAGGAGTAGCCTGCCACCTGAACTACTCATAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:77:805:952/1 +abaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_[`aaaaaaaa_]` |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/BC021729-FRY_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/BC021729-FRY_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,12 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:40:1302:1557/2 +TCCAAGGCAGGCAGATCAGGAGGTCAGGAGATCGAGACCATCCTGGCTAACAT ++PATHBIO-SOLEXA2_30TUEAAXX:3:40:1302:1557/2 +aabaaaaaWaabaaaaabaaaabababaaaabaaaaaabaaaaabaa^aaa^b +@PATHBIO-SOLEXA2_30TUEAAXX:3:91:975:1707/2 +TCTGGGCCCACATTGATGGGTAGCATGGTTGGCGGCCCTTTCTCCCTGTGCGT ++PATHBIO-SOLEXA2_30TUEAAXX:3:91:975:1707/2 +XaaaaaM[aa`X`aab^HXbaaaaWaaaaaaaaaaaa^^SVabaZababaaaX +@PATHBIO-SOLEXA2_30TUEAAXX:3:77:805:952/2 +CGGCCCTTTCTCCCTGTGCGTGCCAGAAGCAGGTGGAACCGGAGGCTTGATGT ++PATHBIO-SOLEXA2_30TUEAAXX:3:77:805:952/2 +aaaabaaabaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaa_^_aaa`]]_Z^ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/HJURP-EIF4E2_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/HJURP-EIF4E2_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,24 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:72:1274:1895/1 +ACGAGGGACCATCAGTTCCCTGCAAAAAGACCCAGGCGATCAGAACCCCAGGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:72:1274:1895/1 +bbbaaaabbbbbaaaaaaaaabbaaaaaaaaabaaa[Q^abbaaaaaaaa``` +@PATHBIO-SOLEXA2_30TUEAAXX:3:37:371:2034/1 +GTTTTTTCCTTCTCACCATCTTTCTGTGTGCTGTTTTCTTCATTCTGATCATG ++PATHBIO-SOLEXA2_30TUEAAXX:3:37:371:2034/1 +bbbbbbbbbbbbbbabbbbbbbbbbaabaabbbaaabaaaabbaabbaabab^ +@PATHBIO-SOLEXA2_30TUEAAXX:3:96:73:466/1 +TTTTTTCCTTCTCACCATCTTTCTGTGTGCTGTTTTCTTCATTCTGATCATGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:96:73:466/1 +abbaaababbbbabbbaabbaabbbbabbb_X_aaabaaaabbabbbabbbaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:54:166:83/1 +TGTTTTCTTCATTCTGATCATGGTCCCCACTGTCATCATCTTTCAAACTCTCT ++PATHBIO-SOLEXA2_30TUEAAXX:3:54:166:83/1 +baaaabaa\bbaaaaaaaaaZaaa^aaabaabaaZabZaaaaaaaaaaaaaWa +@PATHBIO-SOLEXA2_30TUEAAXX:3:98:145:271/1 +TTTTCTTCATTCTGATCATGGTCCCCACTGTCATCATCTTTCAAACTCTCTTC ++PATHBIO-SOLEXA2_30TUEAAXX:3:98:145:271/1 +aaabababbaabbaabbabbaaabbbbaaaabbbabbabbabaaabaabaaba +@PATHBIO-SOLEXA2_30TUEAAXX:3:1:1745:1639/1 +GTTTTTTCCTTCTCACCATCTTTCTGTGTGCTGTTTTCTTCATTCTGATCATG ++PATHBIO-SOLEXA2_30TUEAAXX:3:1:1745:1639/1 +babbbbbaaababbabaaaaaabaabaaaaaaaMaaaaaaaaabaaaaaaaa[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/HJURP-EIF4E2_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/HJURP-EIF4E2_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,24 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:72:1274:1895/2 +TGCTGTTTTCTTCATTCTGATCATGGTCCCCACTGTCATCATCTTTCAAACTC ++PATHBIO-SOLEXA2_30TUEAAXX:3:72:1274:1895/2 +bbbaaabbbbbbaaabbbbbbaaaaaabbbaaabaaaaaaaabaaaa`]]aba +@PATHBIO-SOLEXA2_30TUEAAXX:3:37:371:2034/2 +GAGGGACCATCAGTTCCCTGCAAAAAGACCCAGGCTATCAGAACCCCAGGGCT ++PATHBIO-SOLEXA2_30TUEAAXX:3:37:371:2034/2 +babbaaaaaaaaaaabbbbbaaaaaaaaabaaaaaaaaaaaaaaab`U``aaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:96:73:466/2 +AGTTCCCTGCAAAAAGACCCAGGCTATCAGAACCCCAGGGCTCCGGACGCCAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:96:73:466/2 +[ZQ\`aaaaa\aaaaaabbaaaabaaaaa_X]abaaaaaaaaaaaaaaaaaXX +@PATHBIO-SOLEXA2_30TUEAAXX:3:54:166:83/2 +CCATCACGAGGGATGGCACGAGGGACCATCAGTTCCCTGCAAAAAGACCCAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:54:166:83/2 +aaaaaaaaaZaa\aaaababaaaaaabaaaaaaaaaaabaaaMUWaaaaaUaZ +@PATHBIO-SOLEXA2_30TUEAAXX:3:98:145:271/2 +CTCGTGCCATCACGAGGGATGGCACGAGGGACCATCAGTTCCCTGCAAAAAGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:98:145:271/2 +aabaa^abbaaaaabaaaaaaababaaaaaabaaaaXabbbbbabaa^W___a +@PATHBIO-SOLEXA2_30TUEAAXX:3:1:1745:1639/2 +AGGGACCATCAGTTCCCTGCAAAAAGACCCAGGCTATCAGAACCCCAGGGCTC ++PATHBIO-SOLEXA2_30TUEAAXX:3:1:1745:1639/2 +aaaaaaaaUaaaaaaaaaaaaZaa\_Z_aaaaaa`X`aaaRXR^aXG[V^[V^ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/INPP4A-HJURP_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/INPP4A-HJURP_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,12 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:46:530:1413/1 +TCTTCCATACTGTAAGACGTGTTCTCTCCTCTGCGCATGCACTCCAGGGCCTG ++PATHBIO-SOLEXA2_30TUEAAXX:3:46:530:1413/1 +baaa_S_a\ab^Z^aaab^JVVaaa[aW^V_^abLaaaaaabaaaaaa_SZaJ +@PATHBIO-SOLEXA2_30TUEAAXX:3:36:797:1728/1 +AAATAACCTAGCTACACACTTTTAGTTTCCAATTTTTCTAGCATGAAATCACT ++PATHBIO-SOLEXA2_30TUEAAXX:3:36:797:1728/1 +ababaabbbabbbbbbbbbbabbababbbaabbbababaaabbabaaaababa +@PATHBIO-SOLEXA2_30TUEAAXX:3:73:1113:793/1 +GAAATAACCTAGCTACACACTTTTAGTTTCCAATTTTTCTAGCATGAAATCAC ++PATHBIO-SOLEXA2_30TUEAAXX:3:73:1113:793/1 +aaaaabbbbbaaabbbbbbbbbbbaaaaabaaaaaabbabaaabbbaaaaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/INPP4A-HJURP_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/INPP4A-HJURP_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,12 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:46:530:1413/2 +ATGCCCGGAAGAATAAGAACGTCGACATTATCTGGCAAGCTGCTGAGAGCTGC ++PATHBIO-SOLEXA2_30TUEAAXX:3:46:530:1413/2 +[aLabbbaaaa_V^aaaaaQU^aLa^Z_^J^aa^S^aaWaaaXa[E[Uaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:36:797:1728/2 +TCACCAGCTGCAAGAGCGCTAAGGACCGTACAGCCATGTCGGTGACACTGGAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:36:797:1728/2 +baaaaaabaaaaaaaaaabaaaaaaabbabaaaaaaaaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:73:1113:793/2 +GCTAAGGACCGTACAGCCATGTCGGTGACACTGGAGCAGTGCCTGATCCTGCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:73:1113:793/2 +aabbbaaaabbaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaa`_`aaaa`\ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/PIK3C2A-TEAD1_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/PIK3C2A-TEAD1_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,16 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:48:1022:1308/1 +TTGGAATGAAAATCACGAGATTTCCTTCTGGCAAGAACCTGAATGTGACTAGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:48:1022:1308/1 +bbbbabbabbaaababbbbaabbbbbbbaaaaaaaaaaaaaaaaaabaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:36:333:663/1 +CCGAGACGATCTGGGCTGAGGACATGGCCGCCATGTGCTGCAGGGCCTTATCC ++PATHBIO-SOLEXA2_30TUEAAXX:3:36:333:663/1 +baaaaaaaabbaaaaaaaaaaaaaaaababbaaaaaaaaa_]`aaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:74:156:525/1 +GAAGCTTTCGGCGGCGGCTGAGCCAGCTGAGGGGAAAAATGGCTCGGACTGTG ++PATHBIO-SOLEXA2_30TUEAAXX:3:74:156:525/1 +baaaababaaaaaaaaa^a`__aaZa]_X_`aa^X[aa_O[ZPX[R[KK_[MR +@PATHBIO-SOLEXA2_30TUEAAXX:3:3:1578:1127/1 +CGGACTGTGGCGGCTTCGGCGGCTCAGGGTGTGGTAGAGGGGGAGGCTCAAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:3:1578:1127/1 +aabaaaaaaaaaa^aaaaa]__JXUER]S_OEE[OKUEEUER[[^^HOEEEUE |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/PIK3C2A-TEAD1_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/PIK3C2A-TEAD1_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,16 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:48:1022:1308/2 +TTTCGGCGGCGGCTGAGCCAGCTGAGGGGAAAAATGGCTCGGACTGTGGCGGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:48:1022:1308/2 +bbbbaaaaaaaaaaaaaaaaaaa[^^aa^S^aaa^V_aa`Z[GU]^Z^^V[[G +@PATHBIO-SOLEXA2_30TUEAAXX:3:36:333:663/2 +TTGTGGTAGAGGCGGAGGCTCAGGCAGCTTGAGGTAGGAATGAATTGATAGCC ++PATHBIO-SOLEXA2_30TUEAAXX:3:36:333:663/2 +baaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`_`a_``aaaaaaaa[ +@PATHBIO-SOLEXA2_30TUEAAXX:3:74:156:525/2 +TGGAATGAAAATCACGAGGCTTCCTTCTGGCAAGAACCTGAATGTGACTAGAC ++PATHBIO-SOLEXA2_30TUEAAXX:3:74:156:525/2 +bbbaaaaaaaaababbaXGXaabbabaaaaaaaaaabbaaaaaaaaaaaaaa^ +@PATHBIO-SOLEXA2_30TUEAAXX:3:3:1578:1127/2 +CGCCATGTGCTGCAGGGCCTTATCCTTTGCAGTCTGATCCTTTAGCTTGGAAT ++PATHBIO-SOLEXA2_30TUEAAXX:3:3:1578:1127/2 +aaaabbbaaaaaaaaaabbaaaaaaaabaaaaaaabbaaaaaaaaaaaaaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/RANBP17-DOCK2_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/RANBP17-DOCK2_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,16 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:93:1666:460/1 +GAAGGTCTCAATGTAGAAGGAGTAGTGCTGGTCACCCATCTGGTTTAAGATGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:93:1666:460/1 +aaaaaaaabaaababaabaaaaaaabaaaaaaaaaaaaaaaaaaaaaaabaa_ +@PATHBIO-SOLEXA2_30TUEAAXX:3:4:124:1495/1 +TTGTCTTTGAAGACTGTCGGAACCAGTGGTCAGTATCCAGGCCTCTCCTGGGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:4:124:1495/1 +ababbbbbaaaaaabaaaaaaaa_X_^aaaaZ^IZaaaaaaaaaaaba^Z\ba +@PATHBIO-SOLEXA2_30TUEAAXX:3:46:889:124/1 +GCCTCTCCTGGGGCTCATCCTGCTCAATGAGAAGGCCTTCACCTACCACCATA ++PATHBIO-SOLEXA2_30TUEAAXX:3:46:889:124/1 +aaaaaaaa[U^aabbaaaaaaaa][`a_S]_`[XEMGZaaa\[^aaa^S^^JU +@PATHBIO-SOLEXA2_30TUEAAXX:3:35:1057:977/1 +TGCTGGTCACCCATCTGGTTTAAGATGGCTGTCATACATGCCACAAAGTGACT ++PATHBIO-SOLEXA2_30TUEAAXX:3:35:1057:977/1 +aabbbb^baabbbbaaaaabbbbbbbbaaaaaaaaaaaaaaaaaaa]]Zaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/RANBP17-DOCK2_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/RANBP17-DOCK2_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,16 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:93:1666:460/2 +TCAGTATCCAGGCCTCTCCTGGGGCTCATCCTGCTCAATGAGAAGGCCTTCAC ++PATHBIO-SOLEXA2_30TUEAAXX:3:93:1666:460/2 +baaaaaaabaaaaaaaaaaaaaaaaabaaaaaaaa`__aaaaa]]`aaaaZaR +@PATHBIO-SOLEXA2_30TUEAAXX:3:4:124:1495/2 +TGCTGGTCACCCATCTGGTTTAAGATGGCTGTCATACATGCCACAAAGTGACT ++PATHBIO-SOLEXA2_30TUEAAXX:3:4:124:1495/2 +UUaaaaaa^aa_U_aWaabba][]aaaabaaaaa^aUKU^bXEXERJ\Uaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:46:889:124/2 +TGGAAGGTCTCAATGTAGAAGGAGTAGTGATGGTCACCCATCTGGTTTAAGAT ++PATHBIO-SOLEXA2_30TUEAAXX:3:46:889:124/2 +aaUaa\HKRGOaaaXGU`_]a`_^Q^W^\Q_a_Z_`[_a^V^aaaaUGRQV_a +@PATHBIO-SOLEXA2_30TUEAAXX:3:35:1057:977/2 +ACCATTGTCTTTGAAGACTGTCGGAACCAGTGGTCAGTATCCAGGCCTCTCCT ++PATHBIO-SOLEXA2_30TUEAAXX:3:35:1057:977/2 +aabaabaaaaaaaaabababbaaaaaababaaaaaaaaaaabaaaaaaaaaba |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/RC3H2-RGS3_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/RC3H2-RGS3_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,24 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:37:646:111/1 +AAGGACTCAGATACCCTTTGAAGTCCCACAGTACCCACAGACAGAAGGCAGAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:37:646:111/1 +abbaaaaaaaabaaaabbaaabaaabbaaabaaa^babaaaabbbbaabba^b +@PATHBIO-SOLEXA2_30TUEAAXX:3:7:1283:1650/1 +CAGCACACTCTCTTCTTCTGCTCCTGCGAGTGCGCTTCCAAAGTGAATAAGCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:7:1283:1650/1 +aaabaaabababbaabbbaababbabbaaaaaaaaaabaaa^a^aaaaaaaba +@PATHBIO-SOLEXA2_30TUEAAXX:3:17:1054:638/1 +GGTGATGCTGCCAGCTGTTGCTGCTTGGCGATGTTCTCCGACAGGCACCAGCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:17:1054:638/1 +abbbbbbbaaaaaaaaaaaaaaaaaaaaa^\^_]`__]`^[^^^^]___[[^[ +@PATHBIO-SOLEXA2_30TUEAAXX:3:51:540:507/1 +TGGCATTGGTTGAGTTGACCGGCCCCATCTCCTGCTGGAGAGAGCCGAAAGGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:51:540:507/1 +aababbbaaaaaaaabababaaaabaaaaabaabaaaaaaaaaaaba\aaaaX +@PATHBIO-SOLEXA2_30TUEAAXX:3:84:152:1144/1 +AAAAATCCAGCCCATTTCTAACTAGAGGACCAGTATATCCTCCGCATTCTGAA ++PATHBIO-SOLEXA2_30TUEAAXX:3:84:152:1144/1 +baaaaaaaaaaaa\aaaabbabaaaaaOaaaaaaaaaaaabaaXEOWaaaZaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:96:1139:137/1 +CTTCTATCCTGGGTGGCATTGGTTGAGTTGACCGGCCCCATCTCCTGCTGGAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:96:1139:137/1 +bbbaaaabbaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/RC3H2-RGS3_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/RC3H2-RGS3_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,24 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:37:646:111/2 +ATTGGTTGAGTTGACCGGCCCCATCTCCTGCTGGAGAGAGCCGAAAGGGTGGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:37:646:111/2 +aaabaababaaababaaabbaaaaaabaaabaabaabaaaaaaaaWaaa^aba +@PATHBIO-SOLEXA2_30TUEAAXX:3:7:1283:1650/2 +AGAGCTGAATTCTGTGCCTCAAAAATCCAGCCCATTTCTAACTAGAGTACCAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:7:1283:1650/2 +aaaabaaaabbaabbaabbaaaaa^aaaZababaaaabaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:17:1054:638/2 +CTAACTAGAGTACCAGTATATCCTCCGCATTCTGAAAACATTTAGTATTTTCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:17:1054:638/2 +baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa^Z_aaaaaa_``aaaa^ +@PATHBIO-SOLEXA2_30TUEAAXX:3:51:540:507/2 +GCTGCTGGGCCCTCTGCAGATTCTGTAACTGAAAAAAGGCAGAGTGCTTATTC ++PATHBIO-SOLEXA2_30TUEAAXX:3:51:540:507/2 +bbaabaaaabbbaabbaaaaaabbbaaabbaaaaaaaaaaaaaaaaaaaaab_ +@PATHBIO-SOLEXA2_30TUEAAXX:3:84:152:1144/2 +CGGGGGTGATGCTGCCAGCTGTTGCTGCTTGGCGGGGTGCTCCGACCGGCACC ++PATHBIO-SOLEXA2_30TUEAAXX:3:84:152:1144/2 +a^V_aaLaaabaLaaba\a[QK^aa`UIQ^baaa\IZUJ]aaaaZ^HH[aaa^ +@PATHBIO-SOLEXA2_30TUEAAXX:3:96:1139:137/2 +TCAGATACCCTTTGAAGTCCCACAGTACCCACAGACAGAAGGCAGAGTGCTTA ++PATHBIO-SOLEXA2_30TUEAAXX:3:96:1139:137/2 +aaaaaabbbabbaaaaaaaabaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/TMPRSS2-ERG_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/TMPRSS2-ERG_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,744 @@\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:70:760:740/1\n+CGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:70:760:740/1\n+aaaZLXXa_[_aaaaWaaaaa^aaaa_]`aaaaaZaaa`]_aaaaaaaaaUGR\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:93:627:338/1\n+TGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:93:627:338/1\n+aaabbabbbbaaaaabaaaaaaaaaaaaaaaaaaaaa`Z_aaaaaaaaaaaa`\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:80:610:176/1\n+AGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:80:610:176/1\n+aaaababbaaaabaaaaaaaaaaaaaaaaaaaaaaa^Z\\aaaaaaa_Z]aaa]\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:66:533:945/1\n+GAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCTT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:66:533:945/1\n+baaabaaaabbaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaa__]`Z\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:52:63:295/1\n+GGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:52:63:295/1\n+aa\\Laaaaaaaaaaaaabaaaaa^aaXaaaabZ\\^WaaaaaaaZ\\WZ]_a^QQ\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:9:249:1837/1\n+GGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:9:249:1837/1\n+baZaabaaaababaaaaaababa_V\\aaabbaa\\a^Q^aa`_`\\V[UMK^a\\U\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:50:1383:772/1\n+GCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:50:1383:772/1\n+aaaaabababaaaaabaaaaaaaaaaaaaaaabaaaaaaabbaaaaaaaaaaX\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:75:1678:1421/1\n+CTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:75:1678:1421/1\n+abaaaaaaa\\aaaaaaaaaaaaaaaabaaW^aaaaW\\baa^aaaaaa][Saaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:15:885:1025/1\n+GGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:15:885:1025/1\n+aaaaaaaaaaabaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:6:362:672/1\n+GCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:6:362:672/1\n+aababbbbaaaaabbbaaaaaaaaaaaaaaa```aaaaaaaaaaaaa_]_aa^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:64:1391:490/1\n+CAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:64:1391:490/1\n+abab\\aaaaabaaaabaaabaaaaaaabaaaaaab\\aaLZaa^^aaaaLaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:38:116:785/1\n+GGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCATGAAGCCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:38:116:785/1\n+a_UUabaaaaaaaZabbbaaaaaaabaaaabbaLaaaWaabaaaaZa[aa\\a_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:4:1499:269/1\n+CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:4:1499:269/1\n+aabbaaaaaaa\\baMaaaaaaaaaaa_]_aaaaaaaaaaaaaaa\\aZ\\QZ]^Q\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:1:216:647/1\n+GCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGTA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:1:216:647/1\n+bbaa[Z^aXa^Q^aaaaaaaXNaaaaa^QXa\\_UX]aaaMHQ^aaaHJ[S^QH\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:64:863:109/1\n+CTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:64:863:109/1\n+bbbbbbaaaaaaaaaaaaaaaaaaaaaa``_aaaaaaaaaaaaaaaaaaa^[_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:50:1646:1212/1\n+GCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:50:1646:1212/1\n+aaaaaaaaaaaaaaaaaaaaaaaaa`_]aaaaaa__`aaaaa__`\\__^W`RR\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:35:479:925/1\n+GAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:35:479:925/1\n+bbaabbaaaaabaaaabbbaabaabaaaaaaaaaabaaaaabaabaabaaa^Q\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:45:229:1247/1\n+CTAAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:45:229:1247/1\n+aaaaababa^aaabaaabaaaa^abaaaaabaaX[`WaaaaaaaU[aaaaaZO\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:36:356:614/1\n+CCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:36:356:614/1\n+abaabbabaaaababbabbbbaaaaabbaabaaaaaaaaaaaaaabaaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:40:1488:108/1\n+CGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:40:1488:108/1\n+bbbaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa[X[`X[\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:89:524:344/1\n+GGAGG'..b'aaaaaaLW^aaaaaaLaaaaaaZaaaaaaaaaaaaaaaXE\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:9:701:1223/1\n+GTTGAGACAGCCAATCCTGCTGAGGGACGCGAGGGCTCATCTTGGAAGTCTGT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:9:701:1223/1\n+aaaaaaaaba^VXabbaaaaaa^V[a`J`bUEX`_`aaaa[JS]WSaaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:27:1728:775/1\n+GTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:27:1728:775/1\n+aUaaaaa\\aaaWaaab`KHQVQ\\_S_aa_JXSa[[`aaaaaaXXaaa]Z_^Q[\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:38:616:1973/1\n+GCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:38:616:1973/1\n+aaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaa___aa_[]]__\\Za^Q_aZa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:45:1687:1005/1\n+GAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCTT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:45:1687:1005/1\n+abbabaaababbaaaaaaaaaabbaaa^a_[[aaaaaaaaaaaaa_J_^[Va[\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:4:1196:1118/1\n+TTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:4:1196:1118/1\n+bbabbaaaaabbbbbaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:41:1537:842/1\n+TAAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGCGCGCTGCGTG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:41:1537:842/1\n+aaaaUXLaa^aaaXZaWaOaaaaaWaUEUU_a^Waa^baOaLaLaLOLOaLLL\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:60:936:619/1\n+TGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTAGGAAGTCTGTCC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:60:936:619/1\n+bbaaa_Z_[[aaU[U_aaaaaaaaabLaaLaaaaXKX`[`JaXMGKV_aa^[K\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:22:1310:1233/1\n+CCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:22:1310:1233/1\n+bbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_`_a^X[^__`aaaa[\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:86:917:29/1\n+GGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:86:917:29/1\n+aaabaaaaaaaaabaaaaaaaaaaaaaaaaaaa]``aaaaaaaaaaaaaa_\\^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:72:1178:1343/1\n+CACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:72:1178:1343/1\n+KZ\\\\a^aa[GXaaaaaa`Z`ZaaaaaaaabLVQVaaaaZaaaaZEURER\\^aW\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:79:1659:1749/1\n+AAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:79:1659:1749/1\n+aabaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaa`__aaaa]__\\a_JZ^_^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:24:1462:302/1\n+CGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:24:1462:302/1\n+aaaaabbaaaabbbbbaaaabaabaaabbbbaaaaaaaaaa_[`aaa_V^aaX\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:28:1231:968/1\n+GGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:28:1231:968/1\n+bbaabbaaaabbaaaaaaabbaaaaabaaaaaaaaaaabaaaaaaaaaaaaa_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:55:369:1726/1\n+TTACATTCCATTTTGATGGTGACCCTGGCTGGGGGTTGAGACAGCCAATCCTG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:55:369:1726/1\n+aaa^aa^ababaaabaa^QZaabaa]__aaaaaaMaaaaaa[[H`_[aaa^__\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:7:1368:717/1\n+CGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCTTATCA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:7:1368:717/1\n+aaaaaaabaaaaaaaaaa^_^aaaaaaaaaaaaaaaaZaaaaaaaaURZ[[a^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:48:1643:1117/1\n+CTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:48:1643:1117/1\n+aaaaaaaaaaaaaaaaaaaaaaaaaaaa`__]]aaaaaaaaaaaaaaa`_`_^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:8:1732:1312/1\n+GAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:8:1732:1312/1\n+aaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaa^_]]aaaa_]`aaaaU\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:12:1770:1382/1\n+CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:12:1770:1382/1\n+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_]_aaaa`[]aaXXUaaaaaX]_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:85:88:633/1\n+TTTTGATGGTGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:85:88:633/1\n+aababbbbabaaababbbababbaaaaaaabaaaaaaaaaaaaaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:81:768:1069/1\n+TTCAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGACTGTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:81:768:1069/1\n+aUG[S`aUKXaaaZaaaZaaa^aaaaWOKG\\QQ^]Xaa^Q\\]aWaaXGMaaL[\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/TMPRSS2-ERG_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/TMPRSS2-ERG_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,744 @@\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:70:760:740/2\n+TTGAGACAGCCAATCCTGCCGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:70:760:740/2\n+aaaaaaaaaaaaaaab_J_aaaaababbaaaaaaaaaa^[_aaaaaaLaa^a_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:93:627:338/2\n+AGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:93:627:338/2\n+bbbaaaabbbabaaaabbaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaa^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:80:610:176/2\n+GCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGA\n++PATHBIO-SOLEXA2_30TUEAAXX:3:80:610:176/2\n+bbbbbaaaabaaabaababbbbbaaaaaaaaaaababaaaaaaaaaaaaaaaU\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:66:533:945/2\n+TGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:66:533:945/2\n+bbaaaaaaaaabaaabaaaaaaaaabaaaaaaaaaaa^[^aaaaaaaaaaaa_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:52:63:295/2\n+GCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGACTGGCG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:52:63:295/2\n+aaaaaaaZaa\\VZWa\\WbaWaaQUIUUKU^ZaZWaaWZRKXaEEUaaaHHEPK\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:9:249:1837/2\n+TTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGCAGTCTGTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:9:249:1837/2\n+baaa_U^V^b^JQ[aaaabb^Z^a^L_aaaaaab^V[EU^aaa[EM_\\aa^X^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:50:1383:772/2\n+CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:50:1383:772/2\n+aaaabbaaaaaaaaaaaababaabbbaaabbaaababbaaaaaaaaaaaaKE[\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:75:1678:1421/2\n+GGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:75:1678:1421/2\n+aaaaaaaaaaaaaaaaaaaaaabaabaaa^aaabaaaaaaaaa\\aaaZaaaa\\\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:15:885:1025/2\n+TGGCTGGGGGTTCAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:15:885:1025/2\n+aaabaaaaaaaZEXaaaaabaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaa_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:6:362:672/2\n+CGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGTAAAC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:6:362:672/2\n+aaaaaaaaaaaaabaaaaaabaaaaaaaaaaaa`__^^^aaaaa[KXERKX^K\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:64:1391:490/2\n+CCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTCCATAGTCGC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:64:1391:490/2\n+aaaa^bbaaababaaaaabaaWabZaaaaaabaaa\\aaaaaZ^aaaabbaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:38:116:785/2\n+CCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:38:116:785/2\n+a^V^aaaaaaaa`S_]_aa\\aaa_[Mabaaaa\\aaaaaaaaabaaaaVHX[aa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:4:1499:269/2\n+TGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:4:1499:269/2\n+aaaaaaaaaabaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_]]aa^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:1:216:647/2\n+GGTTGAGACAGCCAATCCTGCTGAGGGACGCGTCGGCTCATCTTGGAAGTCTG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:1:216:647/2\n+X_aaLWaabZaaaa^aaaabbabaaa``_aaaVHXabaaaaWaaa[Q^XG[aV\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:64:863:109/2\n+GCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:64:863:109/2\n+aaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaa`_`aaaaaZ\\^[K[^[\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:50:1646:1212/2\n+TGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCCGAGGGACGCGTGGGCTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:50:1646:1212/2\n+aaabbaaaaaaaaaaaaaaaaaaaaaaaaaaa_[_a`_`a^^_aa_^_a^[[X\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:35:479:925/2\n+GCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:35:479:925/2\n+bbbaaabbaaaaaaaaaaaaaaaabaaaaaaaaabaaaa\\aaaaaaba\\aabb\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:45:229:1247/2\n+GGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:45:229:1247/2\n+bbbb^H[abaaaabaaaaabaaaaaaaaaaababaaaaababaaabVQ__]]a\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:36:356:614/2\n+GGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:36:356:614/2\n+aaaaaababababaaaaaababaaaaaaabaaaaaaaaaaaaaababaa^aaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:40:1488:108/2\n+CGATGGTGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:40:1488:108/2\n+bbbabaaaaaaaaaaaaaaaaaabaaaaaaabaaaaaaaaaa`_]aaaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:89:524:344/2\n+TTGAG'..b'aa\\a^abbbaaaaaaaaababaaaaaaaaaaaaaaaaa[H\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:9:701:1223/2\n+CAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:9:701:1223/2\n+aaaa^aaaaZaabaaaaaaaaaaZaabaaaaUabaaaaaa\\[H^aba__]_`V\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:27:1728:775/2\n+CTAAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:27:1728:775/2\n+aZaaaa^aaaabaaaZaaaaabaa^a^a\\aaaaaaaaaLaa\\_JUUZIQbWOM\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:38:616:1973/2\n+GTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:38:616:1973/2\n+aaaaaaababaaaaZabbaaa^aaaaaabbbaaaaaaaaaaaaaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:45:1687:1005/2\n+CCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:45:1687:1005/2\n+abbaababaaaabbaaaaaaabbaaabaaaaabbabaaaaaaaaaaaaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:4:1196:1118/2\n+CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:4:1196:1118/2\n+aabaaaaaaabaaaabaaaaaaaaaaaaabaZaaaaaa^ZMXaaaaaZaa[V[\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:41:1537:842/2\n+GTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:41:1537:842/2\n+aMabaaaW^abaaaa\\aaabbbbabbabaa^aaa^`J_^bWaaa^\\ba\\aaZL\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:60:936:619/2\n+TAAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:60:936:619/2\n+aaaaaabaa]M`aaaaaaaaaaaaaa^aa\\a_]`aaaaa`S`a[ZVaaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:22:1310:1233/2\n+AGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:22:1310:1233/2\n+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa^V[aaaaaa^Z\\XOX]X\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:86:917:29/2\n+GGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:86:917:29/2\n+aaaaaaaaaaaaaaaaabbaaaaaaaaaa_[_a_Z`aaaaa\\^^aaaaa^V^U\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:72:1178:1343/2\n+CGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCCGGTGCGCGGCGCC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:72:1178:1343/2\n+aaaaaZaa[QXaaa[KKaaaaaa^aaaZaaaaZa^^aL[HV_JRKUaaWaUSS\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:79:1659:1749/2\n+TTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:79:1659:1749/2\n+aaaaaaaaabbaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa__`aaa_\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:24:1462:302/2\n+TGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:24:1462:302/2\n+abbbabbbabbabbbaabbb_[_^abaXaaaaaaaaaaaaaaaaaaX_`aaaX\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:28:1231:968/2\n+CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:28:1231:968/2\n+aaaaabababaaaaaaaabaaaaaaaaaaa_^^aaaaaaaaa_M_aabaa[]`\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:55:369:1726/2\n+GGAGGCGGAGGGCGAGGGGCGGCGAGCGCCGCCTGGAGCGCGGCAGGGAGCCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:55:369:1726/2\n+aaaababaaabbb^Q^baa[Uaab\\ZZUaaaaZ]_]]VQ^_Z^_Q^XERaa^E\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:7:1368:717/2\n+TTGAGACAGCCAATCCTGCTGAGTGACGCGTGGGCTCATCTTGGAAGTCTGTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:7:1368:717/2\n+aaaaaaaaaaaaaaabbbaaaa^Q^aaaaaaaaaaaaaaaaaa[^^aaaaaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:48:1643:1117/2\n+GAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:48:1643:1117/2\n+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa__`aaaaaaaU_ZR[aaaa\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:8:1732:1312/2\n+TTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTC\n++PATHBIO-SOLEXA2_30TUEAAXX:3:8:1732:1312/2\n+aaaabaaabbaabbaaaaaaaaaaaaaaaaaaa^aaaaababaa^[^__aa\\\\\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:12:1770:1382/2\n+GGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:12:1770:1382/2\n+aaaaaaaaaabaaabaaaaaaaaaaaaaaaaaaaaaaaaaa__`aaaa^[^XU\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:85:88:633/2\n+GGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCTTATCAG\n++PATHBIO-SOLEXA2_30TUEAAXX:3:85:88:633/2\n+aa^aaabaaabaabaaaaaaabbbabbaaaaaaaaaa__`aaa_]_a_\\^aa^\n+@PATHBIO-SOLEXA2_30TUEAAXX:3:81:768:1069/2\n+CTAAGCAGTAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCT\n++PATHBIO-SOLEXA2_30TUEAAXX:3:81:768:1069/2\n+aa^KV[aaPE[aaaUERaaaZH[aaLUPERH^^GGK[UV^[^a_^V[]aaaaa\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/TYMP-SCO2_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/TYMP-SCO2_1.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:63:1353:655/1 +CTGGCCCTGCCCACCTGTCTCTGCAGGGCCCTGCCTTGACAAAAGCCAGGACC ++PATHBIO-SOLEXA2_30TUEAAXX:3:63:1353:655/1 +aaaaaaaabaaaaaabbababa\^aaaaabbaaaaaabaaaaaaaaabaa^aZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/TYMP-SCO2_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/TYMP-SCO2_2.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,4 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:63:1353:655/2 +GCGCCCGGGAGCAGGAGGAGCTGCTGGCGCCCGCAGATGGAGCATCAGATCCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:63:1353:655/2 +aaa^aaXEXaaaaaaa^aaaaaaabaa^abbaaaWaWaZXGXaaaaaaZaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/USP10-ZDHHC7_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/USP10-ZDHHC7_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,12 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:1:1485:1153/1 +GCAGCCGTCACGGATGAACCAGACCCGGTCAGCCACGTCAGCCTCGGAGGAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:1:1485:1153/1 +aaaaabaaaaaaaaaaaaaa``_aa``^^^_^^`]_```_^\^^[^XEZXOXZ +@PATHBIO-SOLEXA2_30TUEAAXX:3:84:1623:1043/1 +AGCCGTCACGGATGAACCAGACCCGGTCAGCCACGTCAGCCTCGGAGGAGGAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:84:1623:1043/1 +abaaaaabbaaaaaaaaaaaaaaaaaaaaaa^aaaZ^aa^XRXaaaaUa[IZU +@PATHBIO-SOLEXA2_30TUEAAXX:3:94:1029:1011/1 +GGCGGGGGAAGCAGCGTGAGCAGCCGGAGGATCGCGGAGTCCCAATGAAACGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:94:1029:1011/1 +aaaaaaa`Z`aaaaaaaaaaaaaaaaaaa`[__\\^^VXEU_`_RMXX\ZHXX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/USP10-ZDHHC7_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/USP10-ZDHHC7_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,12 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:1:1485:1153/2 +CAGCCGGAGGATCGCGGAGTCCCAATGAAACGGGCAGCCATGGCCCTCCACCG ++PATHBIO-SOLEXA2_30TUEAAXX:3:1:1485:1153/2 +aaaaaaaaaaaaaaaaaaaaaaaaaaa_``_``^RRXUXZ[X[[[XOX[ERER +@PATHBIO-SOLEXA2_30TUEAAXX:3:84:1623:1043/2 +GGCGGGGGAAGCAGCGTGAGCAGCCGGAGGATCGCGGAGTCCCAATGAAACGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:84:1623:1043/2 +baaaaaba\aaaaaaaaaaabaWa[V^^aaL^aaaaa\_]Xa]__XV^aa^aa +@PATHBIO-SOLEXA2_30TUEAAXX:3:94:1029:1011/2 +TCGGAGGAGGAGGACGATGAAGAGTCATAGTTGTCATTTTCAGCCAGGAGAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:94:1029:1011/2 +baaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaa_\^_^_aaZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/VWA2-PRKCH_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/VWA2-PRKCH_1.fq Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,8 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:43:285:459/1 +TCCCACGTGGCAGCCGCGCCCCGGGCGCCCCTCCTGTGATCCCGTAGCGCCCC ++PATHBIO-SOLEXA2_30TUEAAXX:3:43:285:459/1 +aaababaaaabaaaaa_Z`bbaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:13:1139:1547/1 +AGCGCCCCCTGGCCCGAGCCGCGCCCGGGTCTGTGAGTAGAGCCGCCCGGGCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:13:1139:1547/1 +aaaabaaabba\aaaaa^\aaaaaaaaabaaaWUaaa\abaaaaa\Z^aaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/VWA2-PRKCH_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/VWA2-PRKCH_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,8 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:43:285:459/2 +TAAAATGTTTGAAGATCCGGTCTCTCTGGAGAGTAGCTCTGGAAAACAGGAAA ++PATHBIO-SOLEXA2_30TUEAAXX:3:43:285:459/2 +baaa^baabaababbbbbababbabbaaabaaabbabbabaaaaaaaaaaaa_ +@PATHBIO-SOLEXA2_30TUEAAXX:3:13:1139:1547/2 +ATTGATCTGGTGGACTCGCCTTCGCATAGCCCTTTGGCGCTTCCTGGTAAAAT ++PATHBIO-SOLEXA2_30TUEAAXX:3:13:1139:1547/2 +Wabba\aaa^H^aaabaaaaaaaaaa_J_aNZ^[[_ZZaXJH^Q^\IVH[aZ[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/ZDHHC7-ABCB9_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/ZDHHC7-ABCB9_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,64 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:92:573:1783/1 +CTGCGGCATGATCTGTGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGACT ++PATHBIO-SOLEXA2_30TUEAAXX:3:92:573:1783/1 +aaaaaaa\aaaabaaaaaaaaa^aaaaaaaaaaaZXXXEXX]aaaaaZS]a\U +@PATHBIO-SOLEXA2_30TUEAAXX:3:13:326:121/1 +GGCTGCGGCATGATCTGTGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:13:326:121/1 +bbbbaaaabbaaaaaaaaaaaaaaaaaaa`__aaaaaaa^^_aa^Q^_]_Z^[ +@PATHBIO-SOLEXA2_30TUEAAXX:3:33:576:1681/1 +CGCTGCGCCGCTGCAAGGTCGGAGCTGAGACTGGCGTGCCTTGAGACAGGCTG ++PATHBIO-SOLEXA2_30TUEAAXX:3:33:576:1681/1 +aababaaabbaaaabaaaaaaaaaaaaaaaaaaaaaa^^[aaaaaaaaa^X^X +@PATHBIO-SOLEXA2_30TUEAAXX:3:1:668:1831/1 +TGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGACTTCGTGGTGACTTTCG ++PATHBIO-SOLEXA2_30TUEAAXX:3:1:668:1831/1 +aaaaaaaaaaaabbaaaaa__]`]`aa^S^aaXEX_`aaaaaa^[[[^__a]U +@PATHBIO-SOLEXA2_30TUEAAXX:3:33:1105:1973/1 +CGGTCCAGGTGGCTGAAGATATAGATGGCCGTGGTCACGCAGATGTCCACACT ++PATHBIO-SOLEXA2_30TUEAAXX:3:33:1105:1973/1 +bbabbaaaaaaaaaaaaa_V^aabbaaaaaaaaaaaaaaaaa__]a[[[^^a^ +@PATHBIO-SOLEXA2_30TUEAAXX:3:7:647:325/1 +GCAGACTTCGTGGTGACTTTCGTCATGCTGCTGCCTTCCAAAGACTTCTGGTA ++PATHBIO-SOLEXA2_30TUEAAXX:3:7:647:325/1 +aaaaaabbaaaaaabaababababaaaaaaaababaaaabbaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:92:512:1173/1 +CAGGTTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:92:512:1173/1 +aaaaaabaaaabaaaabaaabaababaaaa\aababababaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:89:1268:572/1 +TCGCCTATGCAGACTTCGTGGTGACTTTCGTCATGCTGCTGCCTTCCAAAGAC ++PATHBIO-SOLEXA2_30TUEAAXX:3:89:1268:572/1 +aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa``_Z_aaaaaaaaaa_^^aaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:38:1633:1991/1 +GGTGACTTTCGTCATGCTGCTGCCTTCCAAAGACTTCTGGTACTCTGTGGTCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:38:1633:1991/1 +bbaaabbbbaaabaaaaaaaaaaaaaaaaaaaaaaaa___aaaaaaaaaa_^X +@PATHBIO-SOLEXA2_30TUEAAXX:3:76:1226:1886/1 +TGGAGGTGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCACGGGGGCAAGGCCAT ++PATHBIO-SOLEXA2_30TUEAAXX:3:76:1226:1886/1 +aaaaaa^aZaabbaa\aabaaaaaaaa^H^aaaaabaaaaaXa^aaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:74:1204:1439/1 +CATGATCTGTGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGACTTCGTGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:74:1204:1439/1 +babbabbbbaabbabaaaabaaabbaaaaZaaaaaaaaaaaaaaaaaaaaaa` +@PATHBIO-SOLEXA2_30TUEAAXX:3:58:213:1628/1 +TGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCACGGGGGCAAGGCCATCATCAT ++PATHBIO-SOLEXA2_30TUEAAXX:3:58:213:1628/1 +ababbaaaaabbaaaaaabaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaa^ +@PATHBIO-SOLEXA2_30TUEAAXX:3:2:627:209/1 +TTCGTCATGCTGCTGCCTTCCAAAGACTTCTGGTACTCTGTGGTCAACGGGGT ++PATHBIO-SOLEXA2_30TUEAAXX:3:2:627:209/1 +bbaaabbaaaaaaaaaaaaaaaa__`aaaaaaaaaaaaaaa__`_````_^^[ +@PATHBIO-SOLEXA2_30TUEAAXX:3:20:978:442/1 +CCTGCACAGTGAAAACTGGGCTGGACCCAACCCTTGTGGGCATTTGTGGTGAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:20:978:442/1 +aaabaabaaaabaabaaaaaabaaaabbbabaaaaaaaaaaaaaaaaaaaaab +@PATHBIO-SOLEXA2_30TUEAAXX:3:85:572:390/1 +CAGGTTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:85:572:390/1 +aaaaaaaababbaabaaaaaabaabbaabaaababaaaaaaaaabbaaaabaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:73:901:1886/1 +CTATGCAGACTTCGTGGTGACTTTCGTCATGCTGCTGCCTTCCAAAGACTTCT ++PATHBIO-SOLEXA2_30TUEAAXX:3:73:901:1886/1 +babaaaaaaabaaaaaaabbaaabaaaaaaaaaaaaaaaaaaaaaaaaabbaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/ZDHHC7-ABCB9_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/ZDHHC7-ABCB9_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,64 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:92:573:1783/2 +GGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGGCAGGGTCGGTGAGCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:92:573:1783/2 +aaaaa_KQ\aaaaaaaa`U`b`]_S_````]_aa^Q[`^V[a_]_a]SZHX[G +@PATHBIO-SOLEXA2_30TUEAAXX:3:13:326:121/2 +GGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGGCAGGGTCGGTGAGCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:13:326:121/2 +aaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa^[[a[R[U[XO +@PATHBIO-SOLEXA2_30TUEAAXX:3:33:576:1681/2 +TCTGTGGTCAACGGGGTCATCTTTAACTGCTTGGCCGTGCTTGCCCTGTCATC ++PATHBIO-SOLEXA2_30TUEAAXX:3:33:576:1681/2 +baaaaaaaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaa_]` +@PATHBIO-SOLEXA2_30TUEAAXX:3:1:668:1831/2 +CAGGTTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:1:668:1831/2 +aZaaaaaaaaaa_Z_aaaaaaaaaab^Q^`]_^V[_[GZEUX[a[MHX^XEUK +@PATHBIO-SOLEXA2_30TUEAAXX:3:33:1105:1973/2 +CCCTGTCATCCCACCTGAGAACCATGCTCACCGACCCTGCCTGGCCTCGCCCT ++PATHBIO-SOLEXA2_30TUEAAXX:3:33:1105:1973/2 +bbbbbbbaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_Z^aaaaaaaa_ +@PATHBIO-SOLEXA2_30TUEAAXX:3:7:647:325/2 +CCGCATCCTGCTGGTTGGAGGTGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:7:647:325/2 +aaaaaabaaaaaaaaabaabaabaaaabbaaaaaaabaaaa[aaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:92:512:1173/2 +ATGATCTGTGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGACTTCGTGGT ++PATHBIO-SOLEXA2_30TUEAAXX:3:92:512:1173/2 +aaaaaaaaaabaaaaaaaabababaaaaaaaababaaaaaaaaaabbaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:89:1268:572/2 +CGCATCCTGCTGGTTGGAGGTGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCAC ++PATHBIO-SOLEXA2_30TUEAAXX:3:89:1268:572/2 +aaaaaabaaaaaaaaaaaaaaaaaaaaaaaa`[^^aaa_^__^^__``_`_`` +@PATHBIO-SOLEXA2_30TUEAAXX:3:38:1633:1991/2 +GAAGGCCAGGTTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:38:1633:1991/2 +bbabbbaaaaaaaaaaaaaaaaaaaa`Z`aaaaa_`_aa`_`^V^Z__^V[M[ +@PATHBIO-SOLEXA2_30TUEAAXX:3:76:1226:1886/2 +AGACTTCGTGGTGACTTTCGTCATGCTGCTGCCTTCCAAAGACTTCTGGTACT ++PATHBIO-SOLEXA2_30TUEAAXX:3:76:1226:1886/2 +aaabbabaaabaaaaabaaaaaaaabaaaaabbaaabaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:74:1204:1439/2 +TTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGGCAGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:74:1204:1439/2 +aaaaaabbabaaaaaabbbbabaaabaaababaaaaaabbaaaabaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:58:213:1628/2 +TGGCTTCTGGTCGCCTATGCAGACTTCGTGGTGACTTTCGTCATGCTGCTGCC ++PATHBIO-SOLEXA2_30TUEAAXX:3:58:213:1628/2 +aabbaaaaaa^aaaa\aabaaaaaabbaaaaaaaaaaaaaa_S_aaWaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:2:627:209/2 +CCGCTGCAAGGTCGGAGCTGAGACTGGCGTGCCTTGAGACAGGCTGGGGGAAA ++PATHBIO-SOLEXA2_30TUEAAXX:3:2:627:209/2 +aaaaaaaaaa```a`__`aaaaa```a`^[^^^^_XU[XXU[^XUPEXXEKXU +@PATHBIO-SOLEXA2_30TUEAAXX:3:20:978:442/2 +TCCCACCCGCGCTGCGCCGCTGCAAGGTCGGAGCTGAGACTGGCGTGCCTTGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:20:978:442/2 +baaaabbaaaaaaabbaaaaabaaaaa^aaaaaabb^bababbaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:85:572:390/2 +TGCTGTCATGACGTGGCTTCTGGCCGCCTATGCAGACTTCGTGGTGACTTTCG ++PATHBIO-SOLEXA2_30TUEAAXX:3:85:572:390/2 +a^a\\Wa^baaaaaaaaaaaabZI\aaaaaaaaaaaaaaaa^aa\aaaaaaa^ +@PATHBIO-SOLEXA2_30TUEAAXX:3:73:901:1886/2 +GTTGGAGGTGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCACGGGGGCAAGGCC ++PATHBIO-SOLEXA2_30TUEAAXX:3:73:901:1886/2 +baaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaa`]]aa^ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/ZNF649-ZNF577_1.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/ZNF649-ZNF577_1.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,36 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:92:1636:1302/1 +AGGTACTTCTTACTCCACAGTCCATACACTTGCCACCTGGCTAAAATTATTTG ++PATHBIO-SOLEXA2_30TUEAAXX:3:92:1636:1302/1 +abaaaaaabaaabababaabbbaababaaaaabaabaaababaaa^aaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:89:916:968/1 +GCTAGCAACTCTAGTATGTTCTCTCTCTTCTGTCTATTCTGGGCCTTCCCAGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:89:916:968/1 +abaaabaaabaabbaabaab^aabaaabbaabbabaabbababbaaaaaaaLa +@PATHBIO-SOLEXA2_30TUEAAXX:3:79:83:1163/1 +CTCATTGTACATCACAGAACTCACACGGGAGAGAGACCCTATGGCTGTGATGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:79:83:1163/1 +baaabbbbbaaaaaaaaaaabbbbbabaababbaaaabaaaaaaaa`UZU]Z\ +@PATHBIO-SOLEXA2_30TUEAAXX:3:30:232:643/1 +TCTCAGGTCAAGCTACCACTGGAAATGATGATCTTCCCCAGCCTGGAAGCTCC ++PATHBIO-SOLEXA2_30TUEAAXX:3:30:232:643/1 +abbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:20:1494:1497/1 +TCTGTCTATTCTGGGCCTTCCCAGAAGTGGTGGTCAGGTATCATCTCAGGTCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:20:1494:1497/1 +aaaaa^aaaaUaZSV^aa\aaZ_W`^VS[aaa[GUKU[KUXER[[M_[[XMXX +@PATHBIO-SOLEXA2_30TUEAAXX:3:51:343:357/1 +TCTCTTCTGTCTATTCTGGGCCTTCCCAGAAGTGGTGGTCAGGTATCATCTCA ++PATHBIO-SOLEXA2_30TUEAAXX:3:51:343:357/1 +aaaaaabbbabb]U]aaaaaaaaaaaaaaaa^H^aVV[Wa\aa_S]aaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:85:776:696/1 +CTCATTGTACATCACAGAACTCACACGGGAGAGAGACCCTATGGCTGTGATGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:85:776:696/1 +abaaabbbbaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaa[KZQ[XV +@PATHBIO-SOLEXA2_30TUEAAXX:3:88:32:1594/1 +TGGGAAAGCCTTCCTTACAAAGACAATGCTCATTGTACATCACAGAACTCACA ++PATHBIO-SOLEXA2_30TUEAAXX:3:88:32:1594/1 +bbaaaaaabbabbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa` +@PATHBIO-SOLEXA2_30TUEAAXX:3:35:417:135/1 +AACCCTATAAATGCAGTGACTGTGGGAAAGCCTTCCTTACAAAGACAATGCTC ++PATHBIO-SOLEXA2_30TUEAAXX:3:35:417:135/1 +aaaabbaaaaabbaaaabbabaaaaaabaaaaaaaaaaaaaaaaaaaaaaaa_ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tests/vcap_pe_53bp/ZNF649-ZNF577_2.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/chimerascan/tests/vcap_pe_53bp/ZNF649-ZNF577_2.fq Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,36 @@ +@PATHBIO-SOLEXA2_30TUEAAXX:3:92:1636:1302/2 +CAGAAGTCAGGACTCATTAGACATCAGAAAATTCACTCAGGAGAGAAACCCTA ++PATHBIO-SOLEXA2_30TUEAAXX:3:92:1636:1302/2 +aaaaaaOaabaaaaaaaaaaaaaaaaaababaaabaaaaaaaaabaZaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:89:916:968/2 +ACAATGCTCATTGTACATCACAGAACTCACACGGGAGAGAGACCCTATGGCTG ++PATHBIO-SOLEXA2_30TUEAAXX:3:89:916:968/2 +aaabaaaaaaa\aWaababaaaabaaababbaabaaaaaaWaaaaaaaWabaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:79:83:1163/2 +GCAACTCTAGTATGTTCTCTCTCTTCTGTCTATTCTGGGCCTTCCCAGAAGTG ++PATHBIO-SOLEXA2_30TUEAAXX:3:79:83:1163/2 +aaaaaabaaaaaaaaabbbabbbbbbbbbbb^abbaaaabbbbbaaaaaaaaa +@PATHBIO-SOLEXA2_30TUEAAXX:3:30:232:643/2 +AGACATCAGAAAATTCACTCAGGAGAGAAACCCTATAAATGCAGTGACTGTGG ++PATHBIO-SOLEXA2_30TUEAAXX:3:30:232:643/2 +aaaaaaaaaaaaaaaaaabaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaa` +@PATHBIO-SOLEXA2_30TUEAAXX:3:20:1494:1497/2 +CATGATGCCCTCACCAAGTTGGAACAAGGAGAACCACTATGGACACTAGAAGA ++PATHBIO-SOLEXA2_30TUEAAXX:3:20:1494:1497/2 +[EOaaaZaaaZaa_Q_aaaaaXaaaaaa\a_X`^^QX_aaa_]]aa^^^V[_Z +@PATHBIO-SOLEXA2_30TUEAAXX:3:51:343:357/2 +TTCCTTACAAAGACAATGCTCATTGTACATCACAGAACTCACACGGGAGAGAG ++PATHBIO-SOLEXA2_30TUEAAXX:3:51:343:357/2 +aaaaaaaaaaaaa`U]aabbaaaaa_Z_aaaaaa`S]aa[Z^aaaaaaaa_XS +@PATHBIO-SOLEXA2_30TUEAAXX:3:85:776:696/2 +AGCAACTCTAGTATGTTCTCTCTCTTCTGTCTATTCTGGGCCTTCCCAGAAGT ++PATHBIO-SOLEXA2_30TUEAAXX:3:85:776:696/2 +aaaaaabbaaaaaaaabbbbbbbbbbbbbbbaaabbaaaaaaaaabaa^Z^aa +@PATHBIO-SOLEXA2_30TUEAAXX:3:88:32:1594/2 +CTCTCTCTTCTGTCTATTCTGGGCCTTCCCAGAAGTGGTGGTCAGGTATCATC ++PATHBIO-SOLEXA2_30TUEAAXX:3:88:32:1594/2 +bbbabbbbbbbbbbaaabbaaaaaaaabb`X^MEZaaaaa^XXG[[MRXXEXX +@PATHBIO-SOLEXA2_30TUEAAXX:3:35:417:135/2 +TCCTTCTTCCATTACTGAAAATGTCTTGTTCCTATAGGCCAGAACCTCAAATA ++PATHBIO-SOLEXA2_30TUEAAXX:3:35:417:135/2 +aabbbaaaaaabbaaaaaaaaaaabbbbbaaba^aaabbaaaaaaaaa_[`aa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tools/chimerascan_html_table.py --- a/chimerascan/tools/chimerascan_html_table.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,150 +0,0 @@ -#!/usr/bin/env python -''' -Created on Feb 12, 2011 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2011 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import logging -import sys -from jinja2 import Environment, PackageLoader - -# local imports -from chimerascan.lib.chimera import Chimera, ChimeraTypes - -# setup html template environment -env = Environment(loader=PackageLoader("chimerascan", "tools")) - -# URLs for special links -GENECARDS_URL = "http://www.genecards.org/cgi-bin/carddisp.pl?gene=" -UCSC_POS_URL = "http://genome.ucsc.edu/cgi-bin/hgTracks?" - -def get_header_row(): - return ["5' genomic region", - "5' strand", - "3' genomic region", - "3' strand", - "Cluster ID", - "5' transcripts", "3' transcripts", - "5' genes", "3' genes", - "Type", "5' -> 3' distance", - "Total frags", - "Spanning frags", - "Unique alignment positions", - "Isoform fraction 5'", - "Isoform fraction 3'", - "Breakpoint spanning reads", - "Chimera IDs"] - -def generate_row_data(line_iter, show_read_throughs, - header_fields): - type_col_num = header_fields.index("type") - txs5p_col_num = header_fields.index("transcript_ids_5p") - txs3p_col_num = header_fields.index("transcript_ids_3p") - genes5p_col_num = header_fields.index("genes5p") - genes3p_col_num = header_fields.index("genes3p") - spanning_reads_col_num = header_fields.index("breakpoint_spanning_reads") - chimera_ids_col_num = header_fields.index("chimera_ids") - for line in line_iter: - fields = line.strip().split('\t') - if ((not show_read_throughs) and - (fields[type_col_num] == ChimeraTypes.READTHROUGH)): - continue - newfields = [] - # 5' position (chr12:65432) and strand - newfields.append(("ucsc_pos", ["%s:%s-%s" % (fields[0], fields[1], fields[2])])) - newfields.append(("string", fields[8])) - # 3' position (chr12:76543) and strand - newfields.append(("ucsc_pos", ["%s:%s-%s" % (fields[3], fields[4], fields[5])])) - newfields.append(("string", fields[9])) - # cluster id - newfields.append(("string", fields[6])) - # transcripts - newfields.append(("ucsc_pos", fields[txs5p_col_num].split(","))) - newfields.append(("ucsc_pos", fields[txs3p_col_num].split(","))) - # genes - newfields.append(("genecards", fields[genes5p_col_num].split(","))) - newfields.append(("genecards", fields[genes3p_col_num].split(","))) - # chimera type - newfields.append(("string", fields[14])) - # distance - newfields.append(("string", fields[15])) - # total frags - newfields.append(("string", fields[16])) - # spanning frags - newfields.append(("string", fields[17])) - # unique alignment positions - newfields.append(("string", fields[18])) - # isoform fraction 5p - newfields.append(("string", fields[19])) - # isoform fraction 3p - newfields.append(("string", fields[20])) - # breakpoint spanning reads - newfields.append(("list", fields[21].split(","))) - # chimera ids - newfields.append(("list", fields[22].split(","))) - yield newfields - -def make_html_table(input_file, - ucsc_db, - show_read_throughs=False): - ucsc_pos_url = UCSC_POS_URL + "db=%s&position=" % (ucsc_db) - line_iter = open(input_file) - header_line = line_iter.next()[1:] - header_fields = header_line.strip().split('\t') - row_iter = generate_row_data(line_iter, - show_read_throughs=show_read_throughs, - header_fields=header_fields) - t = env.get_template("table_template.html") - htmlstring = t.render(colnames=get_header_row(), - ucsc_pos_url=ucsc_pos_url, - genecards_url=GENECARDS_URL, - rows=row_iter) - return htmlstring - -def main(): - from optparse import OptionParser - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog [options] <chimeras.txt>") - parser.add_option("-o", dest="output_file", default=None, - help="output file [default=stdout]") - parser.add_option("--ucsc-db", dest="ucsc_db", default="hg19", - help="UCSC Genome Version (specific to organism and " - "revision e.g. 'hg19'") - parser.add_option("--read-throughs", dest="show_read_throughs", - action="store_true", default=False, - help="include read-through chimeras in output " - "[default=%default]") - options, args = parser.parse_args() - input_file = args[0] - if options.output_file is None: - fileh = sys.stdout - else: - fileh = open(options.output_file, "w") - res = make_html_table(input_file, - ucsc_db=options.ucsc_db, - show_read_throughs=options.show_read_throughs) - print >>fileh, res - if options.output_file is not None: - fileh.close() - - -if __name__ == '__main__': - main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tools/gtf_to_genepred.py --- a/chimerascan/tools/gtf_to_genepred.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,84 +0,0 @@ -#!/usr/bin/env python -''' -Created on Feb 6, 2012 - -@author: mkiyer - -chimerascan: chimeric transcript discovery using RNA-seq - -Copyright (C) 2012 Matthew Iyer - -This program is free software: you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation, either version 3 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program. If not, see <http://www.gnu.org/licenses/>. -''' -import logging -import collections -import operator -import os -import sys -from optparse import OptionParser - -from chimerascan.lib import gtf - -def gtf_to_genepred(gtf_file, genepred_file): - # group by transcript id - logging.info("Reading GTF file") - chrom_exon_features = collections.defaultdict(lambda: collections.defaultdict(lambda: [])) - for feature in gtf.GTFFeature.parse(open(gtf_file)): - if feature.feature_type == "exon": - transcript_id = feature.attrs["transcript_id"] - chrom_exon_features[feature.seqid][transcript_id].append(feature) - # convert to genepred - logging.info("Writing GenePred file") - outfh = open(genepred_file, "w") - for chrom in sorted(chrom_exon_features): - logging.debug("Chromosome %s" % (chrom)) - exon_features = chrom_exon_features[chrom].values() - exon_features.sort(key=lambda exon_list: min(x.start for x in exon_list)) - for exons in exon_features: - # sort exons - exons.sort(key=operator.attrgetter('start')) - chrom = exons[0].seqid - tx_start = exons[0].start - tx_end = exons[-1].end - strand = exons[0].strand - transcript_id = exons[0].attrs['transcript_id'] - gene_name = exons[0].attrs['gene_name'] - # write genepred fields - fields = [transcript_id, chrom, strand, str(tx_start), - str(tx_end), str(tx_start), str(tx_start), - str(len(exons)), - ",".join(map(str,[x.start for x in exons])) + ",", - ",".join(map(str,[x.end for x in exons])) + ",", - gene_name] - print >>outfh, "\t".join(fields) - outfh.close() - -def main(): - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog <input.gtf> <genepred_output.txt>") - options, args = parser.parse_args() - # check command line arguments - if len(args) < 2: - parser.error("Incorrect number of command line arguments") - gtf_file = args[0] - genepred_file = args[1] - # check that input files exist - if not os.path.isfile(gtf_file): - parser.error("GTF file '%s' not found" % (gtf_file)) - gtf_to_genepred(gtf_file, genepred_file) - return 0 - -if __name__ == '__main__': - sys.exit(main()) |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tools/make_false_positive_file.py --- a/chimerascan/tools/make_false_positive_file.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,47 +0,0 @@ -#!/usr/bin/env python -''' -Created on Jul 6, 2011 - -@author: mkiyer -''' -import logging -import sys -import collections - -from chimerascan.lib.chimera import Chimera - -def main(): - from optparse import OptionParser - logging.basicConfig(level=logging.DEBUG, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s") - parser = OptionParser("usage: %prog [options] <chimeras.txt> [<chimeras2.txt> <chimeras3.txt> ...]") - parser.add_option("-o", dest="output_file", default=None, - help="output file [default=stdout]") - parser.add_option("-n", dest="num_files", type="int", default=1, - help="chimera must be recurrent in N samples " - "to make considered a false positive " - "[default=%default]") - options, args = parser.parse_args() - input_files = args - false_pos_chimeras = collections.defaultdict(lambda: 0) - for input_file in input_files: - logging.info("Processing file %s" % (input_file)) - num_chimeras = 0 - for c in Chimera.parse(open(input_file)): - key = (c.partner5p.tx_name, c.partner5p.end, c.partner3p.tx_name, c.partner3p.start) - false_pos_chimeras[key] += 1 - num_chimeras += 1 - logging.info("\tchimeras in file: %d" % (num_chimeras)) - logging.info("\tcurrent false positive candidates: %d" % (len(false_pos_chimeras))) - if options.output_file is None: - fileh = sys.stdout - else: - fileh = open(options.output_file, "w") - for key,recurrence in false_pos_chimeras.iteritems(): - if recurrence >= options.num_files: - print >>fileh, '\t'.join(map(str,key)) - if options.output_file is not None: - fileh.close() - -if __name__ == '__main__': - main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tools/sortable.js --- a/chimerascan/tools/sortable.js Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,323 +0,0 @@\n-/*\n-Table sorting script by Joost de Valk, check it out at http://www.joostdevalk.nl/code/sortable-table/.\n-Based on a script from http://www.kryogenix.org/code/browser/sorttable/.\n-Distributed under the MIT license: http://www.kryogenix.org/code/browser/licence.html .\n-\n-Copyright (c) 1997-2007 Stuart Langridge, Joost de Valk.\n-\n-Version 1.5.7\n-*/\n-\n-/* You can change these values */\n-var image_path = "http://www.joostdevalk.nl/code/sortable-table/";\n-var image_up = "arrow-up.gif";\n-var image_down = "arrow-down.gif";\n-var image_none = "arrow-none.gif";\n-var europeandate = true;\n-var alternate_row_colors = true;\n-\n-/* Don\'t change anything below this unless you know what you\'re doing */\n-addEvent(window, "load", sortables_init);\n-\n-var SORT_COLUMN_INDEX;\n-var thead = false;\n-\n-function sortables_init() {\n-\t// Find all tables with class sortable and make them sortable\n-\tif (!document.getElementsByTagName) return;\n-\ttbls = document.getElementsByTagName("table");\n-\tfor (ti=0;ti<tbls.length;ti++) {\n-\t\tthisTbl = tbls[ti];\n-\t\tif (((\' \'+thisTbl.className+\' \').indexOf("sortable") != -1) && (thisTbl.id)) {\n-\t\t\tts_makeSortable(thisTbl);\n-\t\t}\n-\t}\n-}\n-\n-function ts_makeSortable(t) {\n-\tif (t.rows && t.rows.length > 0) {\n-\t\tif (t.tHead && t.tHead.rows.length > 0) {\n-\t\t\tvar firstRow = t.tHead.rows[t.tHead.rows.length-1];\n-\t\t\tthead = true;\n-\t\t} else {\n-\t\t\tvar firstRow = t.rows[0];\n-\t\t}\n-\t}\n-\tif (!firstRow) return;\n-\t\n-\t// We have a first row: assume it\'s the header, and make its contents clickable links\n-\tfor (var i=0;i<firstRow.cells.length;i++) {\n-\t\tvar cell = firstRow.cells[i];\n-\t\tvar txt = ts_getInnerText(cell);\n-\t\tif (cell.className != "unsortable" && cell.className.indexOf("unsortable") == -1) {\n-\t\t\tcell.innerHTML = \'<a href="#" class="sortheader" onclick="ts_resortTable(this, \'+i+\');return false;">\'+txt+\'<span class="sortarrow"> <img src="\'+ image_path + image_none + \'" alt="↓"/></span></a>\';\n-\t\t}\n-\t}\n-\tif (alternate_row_colors) {\n-\t\talternate(t);\n-\t}\n-}\n-\n-function ts_getInnerText(el) {\n-\tif (typeof el == "string") return el;\n-\tif (typeof el == "undefined") { return el };\n-\tif (el.innerText) return el.innerText;\t//Not needed but it is faster\n-\tvar str = "";\n-\t\n-\tvar cs = el.childNodes;\n-\tvar l = cs.length;\n-\tfor (var i = 0; i < l; i++) {\n-\t\tswitch (cs[i].nodeType) {\n-\t\t\tcase 1: //ELEMENT_NODE\n-\t\t\t\tstr += ts_getInnerText(cs[i]);\n-\t\t\t\tbreak;\n-\t\t\tcase 3:\t//TEXT_NODE\n-\t\t\t\tstr += cs[i].nodeValue;\n-\t\t\t\tbreak;\n-\t\t}\n-\t}\n-\treturn str;\n-}\n-\n-function ts_resortTable(lnk, clid) {\n-\tvar span;\n-\tfor (var ci=0;ci<lnk.childNodes.length;ci++) {\n-\t\tif (lnk.childNodes[ci].tagName && lnk.childNodes[ci].tagName.toLowerCase() == \'span\') span = lnk.childNodes[ci];\n-\t}\n-\tvar spantext = ts_getInnerText(span);\n-\tvar td = lnk.parentNode;\n-\tvar column = clid || td.cellIndex;\n-\tvar t = getParent(td,\'TABLE\');\n-\t// Work out a type for the column\n-\tif (t.rows.length <= 1) return;\n-\tvar itm = "";\n-\tvar i = 0;\n-\twhile (itm == "" && i < t.tBodies[0].rows.length) {\n-\t\tvar itm = ts_getInnerText(t.tBodies[0].rows[i].cells[column]);\n-\t\titm = trim(itm);\n-\t\tif (itm.substr(0,4) == "<!--" || itm.length == 0) {\n-\t\t\titm = "";\n-\t\t}\n-\t\ti++;\n-\t}\n-\tif (itm == "") return; \n-\tsortfn = ts_sort_caseinsensitive;\n-\tif (itm.match(/^\\d\\d[\\/\\.-][a-zA-z][a-zA-Z][a-zA-Z][\\/\\.-]\\d\\d\\d\\d$/)) sortfn = ts_sort_date;\n-\tif (itm.match(/^\\d\\d[\\/\\.-]\\d\\d[\\/\\.-]\\d\\d\\d{2}?$/)) sortfn = ts_sort_date;\n-\tif (itm.match(/^-?[\xa3$\x80\xdb\xa2\xb4]\\d/)) sortfn = ts_sort_numeric;\n-\tif (itm.match(/^-?(\\d+[,\\.]?)+(E[-+][\\d]+)?%?$/)) sortfn = ts_sort_numeric;\n-\tSORT_COLUMN_INDEX = column;\n-\tvar firstRow = new Array();\n-\tvar newRows = new Array();\n-\tfor (k=0;k<t.tBodies.length;k++) {\n-\t\tfor (i=0;i<t.tBodies[k].rows[0].length;i++) { \n-\t\t\tfirstRow[i] = t.tBodies[k].rows[0][i]; \n-\t\t}\n-\t}\n-\tfor (k=0;k<t.tBodies.length;k++) {\n-\t\tif (!thead) {\n-\t\t\t// Skip the first row\n-\t\t\tfor (j=1;j<t.tBodies[k].rows.length;j++) { \n-\t\t\t\tnewRows[j-1] = t.tBodies[k].rows[j];\n-\t\t\t}\n-\t\t} else {\n-\t\t\t// Do NOT skip the f'..b'{\n-\t\tmtstr = date.substr(3,3);\n-\t\tmtstr = mtstr.toLowerCase();\n-\t\tswitch(mtstr) {\n-\t\t\tcase "jan": var mt = "01"; break;\n-\t\t\tcase "feb": var mt = "02"; break;\n-\t\t\tcase "mar": var mt = "03"; break;\n-\t\t\tcase "apr": var mt = "04"; break;\n-\t\t\tcase "may": var mt = "05"; break;\n-\t\t\tcase "jun": var mt = "06"; break;\n-\t\t\tcase "jul": var mt = "07"; break;\n-\t\t\tcase "aug": var mt = "08"; break;\n-\t\t\tcase "sep": var mt = "09"; break;\n-\t\t\tcase "oct": var mt = "10"; break;\n-\t\t\tcase "nov": var mt = "11"; break;\n-\t\t\tcase "dec": var mt = "12"; break;\n-\t\t\t// default: var mt = "00";\n-\t\t}\n-\t\tdt = date.substr(7,4)+mt+date.substr(0,2);\n-\t\treturn dt;\n-\t} else if (date.length == 10) {\n-\t\tif (europeandate == false) {\n-\t\t\tdt = date.substr(6,4)+date.substr(0,2)+date.substr(3,2);\n-\t\t\treturn dt;\n-\t\t} else {\n-\t\t\tdt = date.substr(6,4)+date.substr(3,2)+date.substr(0,2);\n-\t\t\treturn dt;\n-\t\t}\n-\t} else if (date.length == 8) {\n-\t\tyr = date.substr(6,2);\n-\t\tif (parseInt(yr) < 50) { \n-\t\t\tyr = \'20\'+yr; \n-\t\t} else { \n-\t\t\tyr = \'19\'+yr; \n-\t\t}\n-\t\tif (europeandate == true) {\n-\t\t\tdt = yr+date.substr(3,2)+date.substr(0,2);\n-\t\t\treturn dt;\n-\t\t} else {\n-\t\t\tdt = yr+date.substr(0,2)+date.substr(3,2);\n-\t\t\treturn dt;\n-\t\t}\n-\t}\n-\treturn dt;\n-}\n-\n-function ts_sort_date(a,b) {\n-\tdt1 = sort_date(ts_getInnerText(a.cells[SORT_COLUMN_INDEX]));\n-\tdt2 = sort_date(ts_getInnerText(b.cells[SORT_COLUMN_INDEX]));\n-\t\n-\tif (dt1==dt2) {\n-\t\treturn 0;\n-\t}\n-\tif (dt1<dt2) { \n-\t\treturn -1;\n-\t}\n-\treturn 1;\n-}\n-function ts_sort_numeric(a,b) {\n-\tvar aa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n-\taa = clean_num(aa);\n-\tvar bb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n-\tbb = clean_num(bb);\n-\treturn compare_numeric(aa,bb);\n-}\n-function compare_numeric(a,b) {\n-\tvar a = parseFloat(a);\n-\ta = (isNaN(a) ? 0 : a);\n-\tvar b = parseFloat(b);\n-\tb = (isNaN(b) ? 0 : b);\n-\treturn a - b;\n-}\n-function ts_sort_caseinsensitive(a,b) {\n-\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]).toLowerCase();\n-\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]).toLowerCase();\n-\tif (aa==bb) {\n-\t\treturn 0;\n-\t}\n-\tif (aa<bb) {\n-\t\treturn -1;\n-\t}\n-\treturn 1;\n-}\n-function ts_sort_default(a,b) {\n-\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n-\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n-\tif (aa==bb) {\n-\t\treturn 0;\n-\t}\n-\tif (aa<bb) {\n-\t\treturn -1;\n-\t}\n-\treturn 1;\n-}\n-function addEvent(elm, evType, fn, useCapture)\n-// addEvent and removeEvent\n-// cross-browser event handling for IE5+,\tNS6 and Mozilla\n-// By Scott Andrew\n-{\n-\tif (elm.addEventListener){\n-\t\telm.addEventListener(evType, fn, useCapture);\n-\t\treturn true;\n-\t} else if (elm.attachEvent){\n-\t\tvar r = elm.attachEvent("on"+evType, fn);\n-\t\treturn r;\n-\t} else {\n-\t\talert("Handler could not be removed");\n-\t}\n-}\n-function clean_num(str) {\n-\tstr = str.replace(new RegExp(/[^-?0-9.]/g),"");\n-\treturn str;\n-}\n-function trim(s) {\n-\treturn s.replace(/^\\s+|\\s+$/g, "");\n-}\n-function alternate(table) {\n-\t// Take object table and get all it\'s tbodies.\n-\tvar tableBodies = table.getElementsByTagName("tbody");\n-\t// Loop through these tbodies\n-\tfor (var i = 0; i < tableBodies.length; i++) {\n-\t\t// Take the tbody, and get all it\'s rows\n-\t\tvar tableRows = tableBodies[i].getElementsByTagName("tr");\n-\t\t// Loop through these rows\n-\t\t// Start at 1 because we want to leave the heading row untouched\n-\t\tfor (var j = 0; j < tableRows.length; j++) {\n-\t\t\t// Check if j is even, and apply classes for both possible results\n-\t\t\tif ( (j % 2) == 0 ) {\n-\t\t\t\tif ( !(tableRows[j].className.indexOf(\'odd\') == -1) ) {\n-\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'odd\', \'even\');\n-\t\t\t\t} else {\n-\t\t\t\t\tif ( tableRows[j].className.indexOf(\'even\') == -1 ) {\n-\t\t\t\t\t\ttableRows[j].className += " even";\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} else {\n-\t\t\t\tif ( !(tableRows[j].className.indexOf(\'even\') == -1) ) {\n-\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'even\', \'odd\');\n-\t\t\t\t} else {\n-\t\t\t\t\tif ( tableRows[j].className.indexOf(\'odd\') == -1 ) {\n-\t\t\t\t\t\ttableRows[j].className += " odd";\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} \n-\t\t}\n-\t}\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tools/sortable_us.js --- a/chimerascan/tools/sortable_us.js Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,323 +0,0 @@\n-/*\n-Table sorting script by Joost de Valk, check it out at http://www.joostdevalk.nl/code/sortable-table/.\n-Based on a script from http://www.kryogenix.org/code/browser/sorttable/.\n-Distributed under the MIT license: http://www.kryogenix.org/code/browser/licence.html .\n-\n-Copyright (c) 1997-2007 Stuart Langridge, Joost de Valk.\n-\n-Version 1.5.7\n-*/\n-\n-/* You can change these values */\n-var image_path = "http://www.joostdevalk.nl/code/sortable-table/";\n-var image_up = "arrow-up.gif";\n-var image_down = "arrow-down.gif";\n-var image_none = "arrow-none.gif";\n-var europeandate = false;\n-var alternate_row_colors = true;\n-\n-/* Don\'t change anything below this unless you know what you\'re doing */\n-addEvent(window, "load", sortables_init);\n-\n-var SORT_COLUMN_INDEX;\n-var thead = false;\n-\n-function sortables_init() {\n-\t// Find all tables with class sortable and make them sortable\n-\tif (!document.getElementsByTagName) return;\n-\ttbls = document.getElementsByTagName("table");\n-\tfor (ti=0;ti<tbls.length;ti++) {\n-\t\tthisTbl = tbls[ti];\n-\t\tif (((\' \'+thisTbl.className+\' \').indexOf("sortable") != -1) && (thisTbl.id)) {\n-\t\t\tts_makeSortable(thisTbl);\n-\t\t}\n-\t}\n-}\n-\n-function ts_makeSortable(t) {\n-\tif (t.rows && t.rows.length > 0) {\n-\t\tif (t.tHead && t.tHead.rows.length > 0) {\n-\t\t\tvar firstRow = t.tHead.rows[t.tHead.rows.length-1];\n-\t\t\tthead = true;\n-\t\t} else {\n-\t\t\tvar firstRow = t.rows[0];\n-\t\t}\n-\t}\n-\tif (!firstRow) return;\n-\t\n-\t// We have a first row: assume it\'s the header, and make its contents clickable links\n-\tfor (var i=0;i<firstRow.cells.length;i++) {\n-\t\tvar cell = firstRow.cells[i];\n-\t\tvar txt = ts_getInnerText(cell);\n-\t\tif (cell.className != "unsortable" && cell.className.indexOf("unsortable") == -1) {\n-\t\t\tcell.innerHTML = \'<a href="#" class="sortheader" onclick="ts_resortTable(this, \'+i+\');return false;">\'+txt+\'<span class="sortarrow"> <img src="\'+ image_path + image_none + \'" alt="↓"/></span></a>\';\n-\t\t}\n-\t}\n-\tif (alternate_row_colors) {\n-\t\talternate(t);\n-\t}\n-}\n-\n-function ts_getInnerText(el) {\n-\tif (typeof el == "string") return el;\n-\tif (typeof el == "undefined") { return el };\n-\tif (el.innerText) return el.innerText;\t//Not needed but it is faster\n-\tvar str = "";\n-\t\n-\tvar cs = el.childNodes;\n-\tvar l = cs.length;\n-\tfor (var i = 0; i < l; i++) {\n-\t\tswitch (cs[i].nodeType) {\n-\t\t\tcase 1: //ELEMENT_NODE\n-\t\t\t\tstr += ts_getInnerText(cs[i]);\n-\t\t\t\tbreak;\n-\t\t\tcase 3:\t//TEXT_NODE\n-\t\t\t\tstr += cs[i].nodeValue;\n-\t\t\t\tbreak;\n-\t\t}\n-\t}\n-\treturn str;\n-}\n-\n-function ts_resortTable(lnk, clid) {\n-\tvar span;\n-\tfor (var ci=0;ci<lnk.childNodes.length;ci++) {\n-\t\tif (lnk.childNodes[ci].tagName && lnk.childNodes[ci].tagName.toLowerCase() == \'span\') span = lnk.childNodes[ci];\n-\t}\n-\tvar spantext = ts_getInnerText(span);\n-\tvar td = lnk.parentNode;\n-\tvar column = clid || td.cellIndex;\n-\tvar t = getParent(td,\'TABLE\');\n-\t// Work out a type for the column\n-\tif (t.rows.length <= 1) return;\n-\tvar itm = "";\n-\tvar i = 0;\n-\twhile (itm == "" && i < t.tBodies[0].rows.length) {\n-\t\tvar itm = ts_getInnerText(t.tBodies[0].rows[i].cells[column]);\n-\t\titm = trim(itm);\n-\t\tif (itm.substr(0,4) == "<!--" || itm.length == 0) {\n-\t\t\titm = "";\n-\t\t}\n-\t\ti++;\n-\t}\n-\tif (itm == "") return; \n-\tsortfn = ts_sort_caseinsensitive;\n-\tif (itm.match(/^\\d\\d[\\/\\.-][a-zA-z][a-zA-Z][a-zA-Z][\\/\\.-]\\d\\d\\d\\d$/)) sortfn = ts_sort_date;\n-\tif (itm.match(/^\\d\\d[\\/\\.-]\\d\\d[\\/\\.-]\\d\\d\\d{2}?$/)) sortfn = ts_sort_date;\n-\tif (itm.match(/^-?[\xa3$\x80\xdb\xa2\xb4]\\d/)) sortfn = ts_sort_numeric;\n-\tif (itm.match(/^-?(\\d+[,\\.]?)+(E[-+][\\d]+)?%?$/)) sortfn = ts_sort_numeric;\n-\tSORT_COLUMN_INDEX = column;\n-\tvar firstRow = new Array();\n-\tvar newRows = new Array();\n-\tfor (k=0;k<t.tBodies.length;k++) {\n-\t\tfor (i=0;i<t.tBodies[k].rows[0].length;i++) { \n-\t\t\tfirstRow[i] = t.tBodies[k].rows[0][i]; \n-\t\t}\n-\t}\n-\tfor (k=0;k<t.tBodies.length;k++) {\n-\t\tif (!thead) {\n-\t\t\t// Skip the first row\n-\t\t\tfor (j=1;j<t.tBodies[k].rows.length;j++) { \n-\t\t\t\tnewRows[j-1] = t.tBodies[k].rows[j];\n-\t\t\t}\n-\t\t} else {\n-\t\t\t// Do NOT skip the '..b'{\n-\t\tmtstr = date.substr(3,3);\n-\t\tmtstr = mtstr.toLowerCase();\n-\t\tswitch(mtstr) {\n-\t\t\tcase "jan": var mt = "01"; break;\n-\t\t\tcase "feb": var mt = "02"; break;\n-\t\t\tcase "mar": var mt = "03"; break;\n-\t\t\tcase "apr": var mt = "04"; break;\n-\t\t\tcase "may": var mt = "05"; break;\n-\t\t\tcase "jun": var mt = "06"; break;\n-\t\t\tcase "jul": var mt = "07"; break;\n-\t\t\tcase "aug": var mt = "08"; break;\n-\t\t\tcase "sep": var mt = "09"; break;\n-\t\t\tcase "oct": var mt = "10"; break;\n-\t\t\tcase "nov": var mt = "11"; break;\n-\t\t\tcase "dec": var mt = "12"; break;\n-\t\t\t// default: var mt = "00";\n-\t\t}\n-\t\tdt = date.substr(7,4)+mt+date.substr(0,2);\n-\t\treturn dt;\n-\t} else if (date.length == 10) {\n-\t\tif (europeandate == false) {\n-\t\t\tdt = date.substr(6,4)+date.substr(0,2)+date.substr(3,2);\n-\t\t\treturn dt;\n-\t\t} else {\n-\t\t\tdt = date.substr(6,4)+date.substr(3,2)+date.substr(0,2);\n-\t\t\treturn dt;\n-\t\t}\n-\t} else if (date.length == 8) {\n-\t\tyr = date.substr(6,2);\n-\t\tif (parseInt(yr) < 50) { \n-\t\t\tyr = \'20\'+yr; \n-\t\t} else { \n-\t\t\tyr = \'19\'+yr; \n-\t\t}\n-\t\tif (europeandate == true) {\n-\t\t\tdt = yr+date.substr(3,2)+date.substr(0,2);\n-\t\t\treturn dt;\n-\t\t} else {\n-\t\t\tdt = yr+date.substr(0,2)+date.substr(3,2);\n-\t\t\treturn dt;\n-\t\t}\n-\t}\n-\treturn dt;\n-}\n-\n-function ts_sort_date(a,b) {\n-\tdt1 = sort_date(ts_getInnerText(a.cells[SORT_COLUMN_INDEX]));\n-\tdt2 = sort_date(ts_getInnerText(b.cells[SORT_COLUMN_INDEX]));\n-\t\n-\tif (dt1==dt2) {\n-\t\treturn 0;\n-\t}\n-\tif (dt1<dt2) { \n-\t\treturn -1;\n-\t}\n-\treturn 1;\n-}\n-function ts_sort_numeric(a,b) {\n-\tvar aa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n-\taa = clean_num(aa);\n-\tvar bb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n-\tbb = clean_num(bb);\n-\treturn compare_numeric(aa,bb);\n-}\n-function compare_numeric(a,b) {\n-\tvar a = parseFloat(a);\n-\ta = (isNaN(a) ? 0 : a);\n-\tvar b = parseFloat(b);\n-\tb = (isNaN(b) ? 0 : b);\n-\treturn a - b;\n-}\n-function ts_sort_caseinsensitive(a,b) {\n-\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]).toLowerCase();\n-\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]).toLowerCase();\n-\tif (aa==bb) {\n-\t\treturn 0;\n-\t}\n-\tif (aa<bb) {\n-\t\treturn -1;\n-\t}\n-\treturn 1;\n-}\n-function ts_sort_default(a,b) {\n-\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n-\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n-\tif (aa==bb) {\n-\t\treturn 0;\n-\t}\n-\tif (aa<bb) {\n-\t\treturn -1;\n-\t}\n-\treturn 1;\n-}\n-function addEvent(elm, evType, fn, useCapture)\n-// addEvent and removeEvent\n-// cross-browser event handling for IE5+,\tNS6 and Mozilla\n-// By Scott Andrew\n-{\n-\tif (elm.addEventListener){\n-\t\telm.addEventListener(evType, fn, useCapture);\n-\t\treturn true;\n-\t} else if (elm.attachEvent){\n-\t\tvar r = elm.attachEvent("on"+evType, fn);\n-\t\treturn r;\n-\t} else {\n-\t\talert("Handler could not be removed");\n-\t}\n-}\n-function clean_num(str) {\n-\tstr = str.replace(new RegExp(/[^-?0-9.]/g),"");\n-\treturn str;\n-}\n-function trim(s) {\n-\treturn s.replace(/^\\s+|\\s+$/g, "");\n-}\n-function alternate(table) {\n-\t// Take object table and get all it\'s tbodies.\n-\tvar tableBodies = table.getElementsByTagName("tbody");\n-\t// Loop through these tbodies\n-\tfor (var i = 0; i < tableBodies.length; i++) {\n-\t\t// Take the tbody, and get all it\'s rows\n-\t\tvar tableRows = tableBodies[i].getElementsByTagName("tr");\n-\t\t// Loop through these rows\n-\t\t// Start at 1 because we want to leave the heading row untouched\n-\t\tfor (var j = 0; j < tableRows.length; j++) {\n-\t\t\t// Check if j is even, and apply classes for both possible results\n-\t\t\tif ( (j % 2) == 0 ) {\n-\t\t\t\tif ( !(tableRows[j].className.indexOf(\'odd\') == -1) ) {\n-\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'odd\', \'even\');\n-\t\t\t\t} else {\n-\t\t\t\t\tif ( tableRows[j].className.indexOf(\'even\') == -1 ) {\n-\t\t\t\t\t\ttableRows[j].className += " even";\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} else {\n-\t\t\t\tif ( !(tableRows[j].className.indexOf(\'even\') == -1) ) {\n-\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'even\', \'odd\');\n-\t\t\t\t} else {\n-\t\t\t\t\tif ( tableRows[j].className.indexOf(\'odd\') == -1 ) {\n-\t\t\t\t\t\ttableRows[j].className += " odd";\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} \n-\t\t}\n-\t}\n-}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tools/table_style.css --- a/chimerascan/tools/table_style.css Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,35 +0,0 @@ -/* Copyright 2006 Joost de Valk */ -a img { - border: 0; -} -table.sortable { - border-spacing: 0; - border: 1px solid #000; - border-collapse: collapse; -} -table.sortable th, table.sortable td { - text-align: left; - padding: 2px 4px 2px 4px; - width: 100px; - border-style: solid; - border-color: #444; -} -table.sortable th { - border-width: 0px 1px 1px 1px; - background-color: #ccc; -} -table.sortable td { - border-width: 0px 1px 0px 1px; - font: 12px "Lucida Grande", Helvetica, "Arial Unicode MS", "Arial Unicode", Arial, sans-serif; -} -table.sortable tr.odd td { - background-color: #BFEFFF; -} -table.sortable tr.even td { - background-color: #ffffff; -} -table.sortable tr.sortbottom td { - border-top: 1px solid #444; - background-color: #ccc; - font-weight: bold; -} \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan/tools/table_template.html --- a/chimerascan/tools/table_template.html Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,404 +0,0 @@\n-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n-<html xmlns="http://www.w3.org/1999/xhtml">\n-\n-<head>\n-<head>\n- <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />\n- <title>chimerascan results</title>\n- <!-- <link rel="stylesheet" type="text/css" href="table_style.css"/> -->\n- <!-- <script type="text/javascript" src="sortable.js"></script> -->\n- <script type="text/javascript">\n-/*\n-Table sorting script by Joost de Valk, check it out at http://www.joostdevalk.nl/code/sortable-table/.\n-Based on a script from http://www.kryogenix.org/code/browser/sorttable/.\n-Distributed under the MIT license: http://www.kryogenix.org/code/browser/licence.html .\n-\n-Copyright (c) 1997-2007 Stuart Langridge, Joost de Valk.\n-\n-Version 1.5.7\n-*/\n-\n-/* You can change these values */\n-var image_path = "http://www.joostdevalk.nl/code/sortable-table/";\n-var image_up = "arrow-up.gif";\n-var image_down = "arrow-down.gif";\n-var image_none = "arrow-none.gif";\n-var europeandate = false;\n-var alternate_row_colors = true;\n-\n-/* Don\'t change anything below this unless you know what you\'re doing */\n-addEvent(window, "load", sortables_init);\n-\n-var SORT_COLUMN_INDEX;\n-var thead = false;\n-\n-function sortables_init() {\n-\t// Find all tables with class sortable and make them sortable\n-\tif (!document.getElementsByTagName) return;\n-\ttbls = document.getElementsByTagName("table");\n-\tfor (ti=0;ti<tbls.length;ti++) {\n-\t\tthisTbl = tbls[ti];\n-\t\tif (((\' \'+thisTbl.className+\' \').indexOf("sortable") != -1) && (thisTbl.id)) {\n-\t\t\tts_makeSortable(thisTbl);\n-\t\t}\n-\t}\n-}\n-\n-function ts_makeSortable(t) {\n-\tif (t.rows && t.rows.length > 0) {\n-\t\tif (t.tHead && t.tHead.rows.length > 0) {\n-\t\t\tvar firstRow = t.tHead.rows[t.tHead.rows.length-1];\n-\t\t\tthead = true;\n-\t\t} else {\n-\t\t\tvar firstRow = t.rows[0];\n-\t\t}\n-\t}\n-\tif (!firstRow) return;\n-\t\n-\t// We have a first row: assume it\'s the header, and make its contents clickable links\n-\tfor (var i=0;i<firstRow.cells.length;i++) {\n-\t\tvar cell = firstRow.cells[i];\n-\t\tvar txt = ts_getInnerText(cell);\n-\t\tif (cell.className != "unsortable" && cell.className.indexOf("unsortable") == -1) {\n-\t\t\tcell.innerHTML = \'<a href="#" class="sortheader" onclick="ts_resortTable(this, \'+i+\');return false;">\'+txt+\'<span class="sortarrow"> <img src="\'+ image_path + image_none + \'" alt="↓"/></span></a>\';\n-\t\t}\n-\t}\n-\tif (alternate_row_colors) {\n-\t\talternate(t);\n-\t}\n-}\n-\n-function ts_getInnerText(el) {\n-\tif (typeof el == "string") return el;\n-\tif (typeof el == "undefined") { return el };\n-\tif (el.innerText) return el.innerText;\t//Not needed but it is faster\n-\tvar str = "";\n-\t\n-\tvar cs = el.childNodes;\n-\tvar l = cs.length;\n-\tfor (var i = 0; i < l; i++) {\n-\t\tswitch (cs[i].nodeType) {\n-\t\t\tcase 1: //ELEMENT_NODE\n-\t\t\t\tstr += ts_getInnerText(cs[i]);\n-\t\t\t\tbreak;\n-\t\t\tcase 3:\t//TEXT_NODE\n-\t\t\t\tstr += cs[i].nodeValue;\n-\t\t\t\tbreak;\n-\t\t}\n-\t}\n-\treturn str;\n-}\n-\n-function ts_resortTable(lnk, clid) {\n-\tvar span;\n-\tfor (var ci=0;ci<lnk.childNodes.length;ci++) {\n-\t\tif (lnk.childNodes[ci].tagName && lnk.childNodes[ci].tagName.toLowerCase() == \'span\') span = lnk.childNodes[ci];\n-\t}\n-\tvar spantext = ts_getInnerText(span);\n-\tvar td = lnk.parentNode;\n-\tvar column = clid || td.cellIndex;\n-\tvar t = getParent(td,\'TABLE\');\n-\t// Work out a type for the column\n-\tif (t.rows.length <= 1) return;\n-\tvar itm = "";\n-\tvar i = 1;\n-\twhile (itm == "" && i < t.tBodies[0].rows.length) {\n-\t\tvar itm = ts_getInnerText(t.tBodies[0].rows[i].cells[column]);\n-\t\titm = trim(itm);\n-\t\tif (itm.substr(0,4) == "<!--" || itm.length == 0) {\n-\t\t\titm = "";\n-\t\t}\n-\t\ti++;\n-\t}\n-\tif (itm == "") return; \n-\t// alert(itm)\n-\tsortfn = ts_sort_caseinsensitive;\n-\tif (itm.match(/^\\d\\d[\\/\\.-][a-zA-z][a-zA-Z][a-zA-Z][\\/\\.-]\\d\\d\\d\\d$/)) sortfn = ts_sort_date;\n-\tif (itm.match(/^\\d\\d[\\/\\.-]\\d\\d[\\/\\.-]\\d\\d\\d{2}?$/)) sortfn = ts_sort_date;\n-\tif (itm.match(/^-?[$\\u017d]\\d/)) sortfn = ts_sort_numeric;\n-\tif (itm.match(/'..b'aN(a) ? 0 : a);\n-\tvar b = parseFloat(b);\n-\tb = (isNaN(b) ? 0 : b);\n-\treturn a - b;\n-}\n-function ts_sort_caseinsensitive(a,b) {\n-\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]).toLowerCase();\n-\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]).toLowerCase();\n-\tif (aa==bb) {\n-\t\treturn 0;\n-\t}\n-\tif (aa<bb) {\n-\t\treturn -1;\n-\t}\n-\treturn 1;\n-}\n-function ts_sort_default(a,b) {\n-\taa = ts_getInnerText(a.cells[SORT_COLUMN_INDEX]);\n-\tbb = ts_getInnerText(b.cells[SORT_COLUMN_INDEX]);\n-\tif (aa==bb) {\n-\t\treturn 0;\n-\t}\n-\tif (aa<bb) {\n-\t\treturn -1;\n-\t}\n-\treturn 1;\n-}\n-function addEvent(elm, evType, fn, useCapture)\n-// addEvent and removeEvent\n-// cross-browser event handling for IE5+,\tNS6 and Mozilla\n-// By Scott Andrew\n-{\n-\tif (elm.addEventListener){\n-\t\telm.addEventListener(evType, fn, useCapture);\n-\t\treturn true;\n-\t} else if (elm.attachEvent){\n-\t\tvar r = elm.attachEvent("on"+evType, fn);\n-\t\treturn r;\n-\t} else {\n-\t\talert("Handler could not be removed");\n-\t}\n-}\n-function clean_num(str) {\n-\tstr = str.replace(new RegExp(/[^-?0-9.]/g),"");\n-\treturn str;\n-}\n-function trim(s) {\n-\treturn s.replace(/^\\s+|\\s+$/g, "");\n-}\n-function alternate(table) {\n-\t// Take object table and get all it\'s tbodies.\n-\tvar tableBodies = table.getElementsByTagName("tbody");\n-\t// Loop through these tbodies\n-\tfor (var i = 0; i < tableBodies.length; i++) {\n-\t\t// Take the tbody, and get all it\'s rows\n-\t\tvar tableRows = tableBodies[i].getElementsByTagName("tr");\n-\t\t// Loop through these rows\n-\t\t// Start at 1 because we want to leave the heading row untouched\n-\t\tfor (var j = 0; j < tableRows.length; j++) {\n-\t\t\t// Check if j is even, and apply classes for both possible results\n-\t\t\tif ( (j % 2) == 0 ) {\n-\t\t\t\tif ( !(tableRows[j].className.indexOf(\'odd\') == -1) ) {\n-\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'odd\', \'even\');\n-\t\t\t\t} else {\n-\t\t\t\t\tif ( tableRows[j].className.indexOf(\'even\') == -1 ) {\n-\t\t\t\t\t\ttableRows[j].className += " even";\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} else {\n-\t\t\t\tif ( !(tableRows[j].className.indexOf(\'even\') == -1) ) {\n-\t\t\t\t\ttableRows[j].className = tableRows[j].className.replace(\'even\', \'odd\');\n-\t\t\t\t} else {\n-\t\t\t\t\tif ( tableRows[j].className.indexOf(\'odd\') == -1 ) {\n-\t\t\t\t\t\ttableRows[j].className += " odd";\n-\t\t\t\t\t}\n-\t\t\t\t}\n-\t\t\t} \n-\t\t}\n-\t}\n-}\n- </script>\n- <style type="text/css">\n-a img {\n-\tborder: 0;\n-}\n-table.sortable {\n-\tborder-spacing: 0;\n-\tborder: 1px solid #000;\n-\tborder-collapse: collapse;\n-}\n-table.sortable th, table.sortable td {\n-\ttext-align: left;\n-\tpadding: 2px 4px 2px 4px;\n-\twidth: 100px;\n-\tborder-style: solid;\n-\tborder-color: #444;\n-}\n-table.sortable th {\n-\tborder-width: 0px 1px 1px 1px;\n-\tbackground-color: #ccc;\n-\tfont: 14px "Lucida Grande", Helvetica, "Arial Unicode MS", "Arial Unicode", Arial, sans-serif;\t\n-\tfont-weight: bold;\n-}\n-table.sortable td {\n-\tborder-width: 0px 1px 0px 1px;\n-\tfont: 12px "Lucida Grande", Helvetica, "Arial Unicode MS", "Arial Unicode", Arial, sans-serif;\n-}\n-table.sortable tr.odd td {\n-\tbackground-color: #BFEFFF;\n-}\n-table.sortable tr.even td {\n-\tbackground-color: #ffffff;\n-}\n-table.sortable tr.sortbottom td {\n-\tborder-top: 1px solid #444;\n-\tbackground-color: #ccc;\n-\tfont-weight: bold;\n-} \n- </style>\n-</head>\n-\n-<body>\n-\n-<table class="sortable" id="anyid" cellpadding="0" cellspacing="0">\n-<tr>{% for colname in colnames %}<th>{{ colname }}</th>{% endfor %}</tr>\n-{% for rowdata in rows %}\n-<tr>\n- {% for datatype,col in rowdata %}\n- <td>\n- {% if datatype == "ucsc_pos" %}\n- {% for itm in col %}\n- <a href="{{ ucsc_pos_url }}{{ itm }}" target="_blank">{{ itm }}</a><br/>\n- {% endfor %}\n- {% elif datatype == "genecards" %}\n- {% for itm in col %}\n- <a href="{{ genecards_url }}{{ itm }}" target="_blank">{{ itm }}</a><br/>\n- {% endfor %}\n- {% elif datatype == "list" %}\n- {% for itm in col %}{{ itm }}<br/>{% endfor %}\n- {% else %}\n- {{ col }}\n- {% endif %}\n- </td>\n- {% endfor %}\n-</tr>\n-{% endfor %}\n-</table>\n-\n-</body>\n-</html>\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 chimerascan_run.py --- a/chimerascan_run.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,1032 +0,0 @@\n-#!/usr/bin/env python\n-\'\'\'\n-Created on Jan 5, 2011\n-\n-@author: mkiyer\n-\n-chimerascan: chimeric transcript discovery using RNA-seq\n-\n-Copyright (C) 2011 Matthew Iyer\n-\n-This program is free software: you can redistribute it and/or modify\n-it under the terms of the GNU General Public License as published by\n-the Free Software Foundation, either version 3 of the License, or\n-(at your option) any later version.\n-\n-This program is distributed in the hope that it will be useful,\n-but WITHOUT ANY WARRANTY; without even the implied warranty of\n-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n-GNU General Public License for more details.\n-\n-You should have received a copy of the GNU General Public License\n-along with this program. If not, see <http://www.gnu.org/licenses/>.\n-\'\'\'\n-from chimerascan import __version__\n-\n-__author__ = "Matthew Iyer"\n-__copyright__ = "Copyright 2011, chimerascan project"\n-__credits__ = ["Matthew Iyer", "Christopher Maher"]\n-__license__ = "GPL"\n-__maintainer__ = "Matthew Iyer"\n-__email__ = "mkiyer@med.umich.edu"\n-__status__ = "beta"\n-\n-###\n-#\n-# Modified by \n-#\tBaekdoo Kim(baegi7942@gmail.com)\n-#\n-###\n-\n-import logging\n-import os\n-import subprocess\n-import sys\n-import shutil\n-from optparse import OptionParser, OptionGroup\n-import xml.etree.ElementTree as etree\n-\n-# check for python version 2.6.0 or greater\n-if sys.version_info < (2,6,0):\n- sys.stderr.write("You need python 2.6 or later to run chimerascan\\n")\n- sys.exit(1)\n-\n-# local imports\n-from chimerascan import pysam\n-import chimerascan.lib.config as config\n-from chimerascan.lib.config import JOB_SUCCESS, JOB_ERROR, MIN_SEGMENT_LENGTH\n-from chimerascan.lib.base import LibraryTypes, check_executable, \\\n- parse_bool, indent_xml, up_to_date\n-from chimerascan.lib.seq import FASTQ_QUAL_FORMATS, SANGER_FORMAT\n-from chimerascan.lib.fragment_size_distribution import InsertSizeDistribution\n-\n-from chimerascan.pipeline.fastq_inspect_reads import inspect_reads, detect_read_length, get_min_max_read_lengths\n-from chimerascan.pipeline.align_bowtie import align_pe, align_sr, trim_align_pe_sr\n-from chimerascan.pipeline.find_discordant_reads import find_discordant_fragments\n-from chimerascan.pipeline.discordant_reads_to_bedpe import discordant_reads_to_bedpe, sort_bedpe\n-from chimerascan.pipeline.nominate_chimeras import nominate_chimeras\n-from chimerascan.pipeline.chimeras_to_breakpoints import chimeras_to_breakpoints\n-from chimerascan.pipeline.nominate_spanning_reads import nominate_encomp_spanning_reads, extract_single_mapped_reads, nominate_single_mapped_spanning_reads\n-from chimerascan.pipeline.merge_spanning_alignments import merge_spanning_alignments\n-from chimerascan.pipeline.resolve_discordant_reads import resolve_discordant_reads\n-from chimerascan.pipeline.filter_chimeras import filter_chimeras, filter_highest_coverage_isoforms, filter_encompassing_chimeras\n-from chimerascan.pipeline.filter_homologous_genes import filter_homologous_genes\n-from chimerascan.pipeline.write_output import write_output\n-\n-# defaults for bowtie\n-DEFAULT_NUM_PROCESSORS = config.BASE_PROCESSORS\n-DEFAULT_BOWTIE_PATH = ""\n-DEFAULT_BOWTIE_ARGS = "--best --strata"\n-DEFAULT_DISCORD_BOWTIE_ARGS = "--best"\n-DEFAULT_MULTIHITS = 100\n-DEFAULT_MISMATCHES = 2\n-DEFAULT_DISCORD_MISMATCHES = 3\n-DEFAULT_SEGMENT_LENGTH = 25\n-DEFAULT_TRIM5 = 0\n-DEFAULT_TRIM3 = 0\n-DEFAULT_MIN_FRAG_LENGTH = 0\n-DEFAULT_MAX_FRAG_LENGTH = 1000\n-DEFAULT_NUM_SAMPLES_TO_DETERMINE_READ_LENGTHS = 10000\n-DEFAULT_FASTQ_QUAL_FORMAT = SANGER_FORMAT\n-DEFAULT_LIBRARY_TYPE = LibraryTypes.FR_UNSTRANDED\n-\n-DEFAULT_ISIZE_MEAN = 200\n-DEFAULT_ISIZE_STDEV = 40\n-DEFAULT_HOMOLOGY_MISMATCHES = config.BREAKPOINT_HOMOLOGY_MISMATCHES\n-DEFAULT_ANCHOR_MIN = 4\n-DEFAULT_ANCHOR_LENGTH = 8\n-DEFAULT_ANCHOR_MISMATCHES = 0\n-DEFAULT_FILTER_ISIZE_PROB = 0.01\n-DEFAULT_FILTER_UNIQUE_FRAGS = 2.0\n-DEFAULT_FILTER_ISOFORM_FRACTION = 0.01\n-NUM_POSITIONAL_ARGS = 4\n-DEFAULT_KEEP_TMP = True\n-\n-class RunConfig(object):\n-\n- '..b'E)\n- msg = "Filtering chimeras"\n- if up_to_date(filtered_chimera_file, resolved_spanning_chimera_file):\n- logging.info("[SKIPPED] %s" % (msg))\n- else:\n- logging.info(msg)\n- # get insert size at prob\n- filter_chimeras(input_file=resolved_spanning_chimera_file,\n- output_file=filtered_chimera_file,\n- index_dir=runconfig.index_dir,\n- bam_file=sorted_aligned_bam_file,\n- unique_frags=runconfig.filter_unique_frags,\n- isoform_fraction=runconfig.filter_isoform_fraction,\n- false_pos_file=runconfig.filter_false_pos_file)\n- #\n- # Filter homologous genes\n- #\n- homolog_filtered_chimera_file = os.path.join(tmp_dir, config.HOMOLOG_FILTERED_CHIMERA_FILE)\n- msg = "Filtering homologous chimeras"\n- if up_to_date(homolog_filtered_chimera_file, filtered_chimera_file):\n- logging.info("[SKIPPED] %s" % (msg))\n- else:\n- logging.info(msg)\n- min_isize = isize_dist.isize_at_percentile(1.0)\n- max_isize = isize_dist.isize_at_percentile(99.0)\n- filter_homologous_genes(input_file=filtered_chimera_file,\n- output_file=homolog_filtered_chimera_file,\n- index_dir=runconfig.index_dir,\n- homolog_segment_length=runconfig.segment_length-1,\n- min_isize=min_isize,\n- max_isize=max_isize,\n- bowtie_bin=bowtie_bin,\n- num_processors=runconfig.num_processors,\n- tmp_dir=tmp_dir)\n- #\n- # Choose best isoform for chimeras that share the same breakpoint\n- #\n- best_isoform_chimera_file = os.path.join(tmp_dir, config.BEST_FILTERED_CHIMERA_FILE)\n- msg = "Choosing best isoform for each chimera"\n- if up_to_date(best_isoform_chimera_file, homolog_filtered_chimera_file):\n- logging.info("[SKIPPED] %s" % (msg))\n- else:\n- logging.info(msg)\n- retcode = filter_highest_coverage_isoforms(index_dir=runconfig.index_dir,\n- input_file=homolog_filtered_chimera_file,\n- output_file=best_isoform_chimera_file)\n- #\n- # Write user-friendly output file\n- #\n- chimera_output_file = os.path.join(runconfig.output_dir, config.CHIMERA_OUTPUT_FILE)\n- #msg = "Writing chimeras to file %s" % (chimera_output_file)\n- if up_to_date(chimera_output_file, best_isoform_chimera_file):\n- logging.info("[SKIPPED] %s" % (msg))\n- else:\n- logging.info(msg)\n- write_output(best_isoform_chimera_file,\n- bam_file=sorted_aligned_bam_file,\n- output_file=chimera_output_file,\n- index_dir=runconfig.index_dir)\n- \n- #\n- # Move output to Galaxy data file\n- #\n- cmd = "mv %s/chimerascan_tmp/chimeras.bedpe %s/%s" % (os.path.dirname(runconfig.output_file_path), os.path.dirname(runconfig.output_file_path), runconfig.output_file_name)\n- p = subprocess.check_output(cmd.split())\n-\n- #\n- # Cleanup\n- #\n- if not runconfig.keep_tmp:\n- logging.info("Cleaning up temporary files")\n- shutil.rmtree(tmp_dir)\n- cmd_rm = "rm -r %s/chimerascan_tmp" % os.path.dirname(runconfig.output_file_path)\n- p = subprocess.check_output(cmd_rm.split())\n-\n- #\n- # Done\n- #\n- logging.info("Finished run.")\n- return JOB_SUCCESS\n-\n-\n-def main():\n- logging.basicConfig(level=logging.INFO,\n- format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")\n- # parse run parameters in config file and command line\n- runconfig = RunConfig()\n- runconfig.from_args(sys.argv[1:])\n- # run chimerascan\n- sys.exit(run_chimerascan(runconfig))\n-\n-if __name__ == \'__main__\':\n- main()\n-\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 run.sh --- a/run.sh Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,8 +0,0 @@ -#!/usr/bin/bash - -if [ "$(cat $1/installation_chk 2>&1)" != "true" ]; then - python $1/setup.py build && python $1/setup.py install - mv $1/chimerascan $1/chimerascan_bak - ln -s /usr/local/lib/python2.7/dist-packages/chimerascan $1 - echo true > $1/installation_chk -fi |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 setup-cython.py --- a/setup-cython.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,19 +0,0 @@ -''' -Created on Feb 3, 2011 - -@author: mkiyer -''' -from distutils.core import setup -from distutils.extension import Extension -from Cython.Distutils import build_ext - -# local imports -from setup import get_cython_extension_modules, setup_kwargs - -def main(): - setup(ext_modules=get_cython_extension_modules(), - cmdclass={'build_ext': build_ext}, - **setup_kwargs) - -if __name__ == '__main__': - main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 setup.py --- a/setup.py Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,108 +0,0 @@ -''' -chimerascan - -Created on Jan 5, 2011 - -@author: mkiyer -''' -from distutils.core import setup -from distutils.extension import Extension - -import os -import glob - -# local imports -import chimerascan - -# ------ Setup instructions ------------------------------------------------- - -setup_kwargs = {"name": "chimerascan", - "version": chimerascan.__version__, - "description": "chimeric transcript discovery from RNA-seq", - "long_description": __doc__, - "author": "Matthew Iyer", - "author_email": "mkiyer@umich.edu", - "license": "GPL3", - "platforms": "Linux", - "url": "http://chimerascan.googlecode.com", - "packages": ["chimerascan", - "chimerascan.pysam", - "chimerascan.bx", - "chimerascan.pipeline", - "chimerascan.lib", - "chimerascan.tools"], - "package_data": {'chimerascan.tools': ['table_template.html']}, - "scripts": ["chimerascan/chimerascan_run.py", - "chimerascan/chimerascan_index.py", - "chimerascan/tools/chimerascan_html_table.py", - "chimerascan/tools/gtf_to_genepred.py", - "chimerascan/tools/make_false_positive_file.py"]} - -# ---- Extension Modules ---------------------------------------------------- - -def get_cython_extension_modules(): - # pysam - samtools - samtools = Extension("chimerascan.pysam.csamtools", # name of extension - ["chimerascan/pysam/csamtools.pyx", - "chimerascan/pysam/pysam_util.c"] +\ - glob.glob( os.path.join( "chimerascan", "pysam", "samtools", "*.c" )), - library_dirs=[], - include_dirs=[ "chimerascan/pysam/samtools", "chimerascan/pysam" ], - libraries=[ "z", ], - language="c", - define_macros = [('FILE_OFFSET_BITS','64'), - ('_USE_KNETFILE','')]) - # pysam - tabix - tabix = Extension("chimerascan.pysam.ctabix", # name of extension - ["chimerascan/pysam/ctabix.pyx" ] +\ - glob.glob(os.path.join("chimerascan", "pysam", "tabix", "*.c")), - library_dirs=[], - include_dirs=[ "chimerascan/pysam/tabix", "chimerascan/pysam" ], - libraries=[ "z", ], - language="c", - ) - # Interval clustering - bx_cluster = Extension("chimerascan.bx.cluster", - ["chimerascan/bx/cluster.pyx", "chimerascan/bx/intervalcluster.c"], - include_dirs=["chimerascan/bx"]) - # Interval intersection - bx_interval = Extension("chimerascan.bx.intersection", - ["chimerascan/bx/intersection.pyx" ]) - return [samtools, tabix, bx_cluster, bx_interval] - -def get_c_extension_modules(): - # pysam - samtools - samtools = Extension("chimerascan.pysam.csamtools", # name of extension - ["chimerascan/pysam/csamtools.c", - "chimerascan/pysam/pysam_util.c"] +\ - glob.glob( os.path.join( "chimerascan", "pysam", "samtools", "*.c" )), - library_dirs=[], - include_dirs=[ "chimerascan/pysam/samtools", "chimerascan/pysam" ], - libraries=[ "z", ], - language="c", - define_macros = [('FILE_OFFSET_BITS','64'), - ('_USE_KNETFILE','')]) - # pysam - tabix - tabix = Extension("chimerascan.pysam.ctabix", # name of extension - ["chimerascan/pysam/ctabix.c" ] +\ - glob.glob(os.path.join("chimerascan", "pysam", "tabix", "*.c")), - library_dirs=[], - include_dirs=[ "chimerascan/pysam/tabix", "chimerascan/pysam" ], - libraries=[ "z", ], - language="c", - ) - # Interval clustering - bx_cluster = Extension("chimerascan.bx.cluster", - ["chimerascan/bx/cluster.c", "chimerascan/bx/intervalcluster.c"], - include_dirs=["chimerascan/bx"]) - # Interval intersection - bx_interval = Extension("chimerascan.bx.intersection", - ["chimerascan/bx/intersection.c"]) - return [samtools, tabix, bx_cluster, bx_interval] - -def main(): - setup(ext_modules=get_c_extension_modules(), - **setup_kwargs) - -if __name__ == '__main__': - main() \ No newline at end of file |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/PostAnalysis --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/PostAnalysis Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,66 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is a wrapup for Post analysis based on the ratio file output + +use strict; +use Getopt::Long; +my $RatioFile=""; +my $OutputFile = ""; +my $JunctionCut=5; +my $CutoffLevel="M"; +my $noIRM = 0; +my $noIRMstr=""; + +GetOptions ( + "i:s"=>\$RatioFile, + "o:s"=>\$OutputFile, + "c:s"=>\$CutoffLevel, + "noIRM|noirm"=>\$noIRM, + "j:i"=>\$JunctionCut +); + +my $InputParaDes=" Usage of the script: + -i input file (.ratio file) + -o output file + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads per junction requirement for each exon-isoform [5] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($RatioFile eq "") +{ + print $InputParaDes; + exit; +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} +if($noIRM) +{ + $noIRMstr= "noirm"; +} + + +system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); + +open(rawfile, "$OutputFile.raw"); +open(outfile, ">$OutputFile"); +while(my $line=<rawfile>) +{ + chomp($line); + my @a=split("\t",$line); + if($noIRM) + { + print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } + else + { + print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } +} +close(outfile); +close(rawfile); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/SpliceChange --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/SpliceChange Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,176 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# compare two outputs from SpliceTrap +use strict; + +# the information needed +# inclusion ratio input file +# filtered out or not input file +# minimal inclusion ratio at least 0.1 for one condition +# minimal splicing changes parameter +# orignial pipeline written by Martin Akerman +# re-organized and re-written by Jie Wu + +use FileHandle; + +use Getopt::Long; + +my @programs = ('grep','mkdir','R','paste','awk','sort'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); +} + + +my $InputFileName1 = ""; +my $InputFileName2 = ""; +my $OutputFileName = ""; +my $minchange = 0.3; +my $mininc = 0.1; +my $noIRM = 0; + + +GetOptions ( + "1:s"=>\$InputFileName1, + "2:s"=>\$InputFileName2, + "o:s"=>\$OutputFileName, + "noIRM|noirm"=>\$noIRM, + "m:f"=>\$mininc, + "c:f"=>\$minchange +); + +my $InputParaDes=" Usage of the script: + -1 input file 1, output from SpliceTrap, *.raw file in the output folder + -2 input file 2. see above. + -o output file prefix. + -c minimal change required, [default:0.3] + -m minimal inclusion ratio for at least one condition. [defualt:0.1] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") +{ + print $InputParaDes; + exit; +} + + + +if(-d "$OutputFileName.cache" ) +{ + print "Aborted! output cache folder exists: $OutputFileName.cache \n"; + exit; +} +else +{ + system("mkdir $OutputFileName.cache"); +} + +# +my %ir1; # records ir from file1 +my %ir2; # records ir from file2 +# only records trios above the cutoffs + +open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; +while(my $line=<input1>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir1{$a[0]} = $a[1]; + } + else + { + $ir1{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; +close(input1); + +open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; +while(my $line=<input2>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir2{$a[0]} = $a[1]; + } + else + { + $ir2{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; + + +close(input2); + + +## +my %mean; +my %sd; + +my %num; + +my %filehandles; + +my @types = ("CA", "IR", "AD","AA"); + +foreach my $type (@types) +{ + my $fh = new FileHandle; + open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; + $filehandles{$type} = $fh; +} + + +foreach my $key (keys %ir1) +{ + if(exists $ir2{$key}) + { + if(($ir1{$key} + $ir2{$key}) > 0) + { + #find the type + my $type = substr($key, 0, 2); + $type = "CA" if $type eq "CS"; + $num{$type}++; + + my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); + $mean{$type} = $mean{$type} + $change; + $sd{$type} = $change*$change + $sd{$type}; + + $change = sprintf("%.4f",$change); + + my $fout = $filehandles{$type}; + print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; + } + } +} + +foreach my $type (keys %filehandles) +{ + close($filehandles{$type}); + if($num{$type} == 0) + { + warn "no AS events passed filters for both files\n"; + next; + } + $mean{$type} = $mean{$type}/$num{$type}; + $sd{$type} = sqrt($sd{$type}/$num{$type}); + system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); + system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); + print "$num{$type} $type events processed...\n"; + #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; + +} +system("rm $OutputFileName.cache -rf"); + + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/SpliceTrap.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/SpliceTrap.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,285 @@\n+#!/usr/bin/perl\n+# Author: wuj@cshl.edu\n+# Modified: Baekdoo Kim (baegi7942@gmail.com)\n+use strict;\n+use Getopt::Long;\n+use Data::Dumper;\n+####################\n+use Cwd;\n+my $PROG = $0;\n+my $CUR_DIR = Cwd::abs_path(Cwd::cwd());\n+my $PROG_ABS_PATH = Cwd::abs_path($PROG);\n+#my $SrcFolder=`dirname $PROG_ABS_PATH`;\n+#chomp($SrcFolder);\n+#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini";\n+#my $SrcFolder=$config{SrcFolder};\n+\n+my @programs = (\'R\',\'echo\',\'cat\',\'bash\',\'perl\',\'ln\',\'mkdir\',\'paste\',\'grep\',\'sort\',\'basename\',\'awk\',\'wc\',\'mv\',\'cd\',\'rm\',\'split\',\'head\' );\n+foreach my $program (@programs)\n+{\n+ die ("CHECK: $program not found\\n") if(system("hash $program >/dev/null"));\n+\n+}\n+\n+####################\n+my $SrcFolder="";\n+my $MapSoftware="bowtie";\n+my $DatabasePrefix="hg38";\n+my $ReadFileFormat="";\n+my $ReadFile1Name="";\n+my $ReadFile2Name="";\n+my $CutoffLevel="M";\n+my $Outputfolder=$CUR_DIR;\n+my $OutputPrefix="Result";\n+#my $CutoffOnly=0;\n+my $ReadSize=36;\n+my $JunctionCut=5;\n+my $onGalaxy_raw="";\n+my $onGalaxy_txt="";\n+my $BowtieThreads=1;\n+my $noIRMstr="";\n+my $noIRM = 0;\n+\n+my $num_args = $#ARGV;\n+$onGalaxy_raw = $ARGV[$num_args-1];\n+$onGalaxy_txt = $ARGV[$num_args];\n+\n+GetOptions (\n+\t"l:s"=>\\$SrcFolder,\n+ "m:s"=>\\$MapSoftware,\n+ "d:s"=>\\$DatabasePrefix,\n+# "f:s"=>\\$ReadFileFormat,\n+ "1:s"=>\\$ReadFile1Name,\n+ "2:s"=>\\$ReadFile2Name,\n+ "c:s"=>\\$CutoffLevel,\n+ "outdir:s"=>\\$Outputfolder,\n+ "o:s"=>\\$OutputPrefix,\n+ "j:i"=>\\$JunctionCut,\n+ "s:i"=>\\$ReadSize,\n+ "p:i"=>\\$BowtieThreads,\n+ "noIRM|noirm"=>\\$noIRM\n+# "local:s"=>\\$local,\n+# "rerun"=>\\$CutoffOnly\n+);\n+#-O for galaxy output\n+\n+\n+my $InputParaDes=" Usage of the script:\n+\t-l\tBase Location (required)\n+ -m Mapping software: [bowtie]/rmap\n+ -d Database prefix: [hg18]/mm9/rn4/userdefined\n+ -1 Read File 1\n+ -2 Read File 2\n+ -c Cutoff Level:H/[M]/L\n+ Means High, Middle or Low\n+ -j Junction reads requirement per junction for each exon-isoform [5]\n+ -o Output prefix {Result}\n+ -s Read Size [36]\n+ --outdir Output folder [./]\n+ -p Bowtie parameter, threads number, only use this when you don\'t use qsub [1]\n+ --noIRM Skip the IRM correction step\n+ \n+ This is a quick help, please refer to the README file for details.\n+";\n+\n+\n+if($SrcFolder eq "") {\n+\tprint "[CHECK] - Please provide the location of the script (option \'-l\')\\n\\n";\n+\texit;\n+}\n+\n+if($ReadFile2Name eq "")\n+{\n+ $ReadFile2Name = $ReadFile1Name;\n+ #trigger singled end mode\n+}\n+\n+if($ReadFile1Name eq "" or $ReadFile2Name eq "" )\n+{\n+ print $InputParaDes;\n+ exit;\n+}\n+\n+if($BowtieThreads < 1)\n+{\n+ print $InputParaDes;\n+ exit;\n+}\n+\n+if (! -e "$SrcFolder/db/$DatabasePrefix/parallel")\n+{\n+ print "CHECK: Error, the database you specified is not properly installed.\\n";\n+ #print $InputParaDes;\n+ exit;\n+\n+}\n+\n+if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L")\n+{\n+ print $InputParaDes;\n+ exit;\n+}\n+\n+\n+$ReadFile1Name = Cwd::abs_path($ReadFile1Name);\n+$ReadFile2Name = Cwd::abs_path($ReadFile2Name);\n+\n+#check the files\n+open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\\n");\n+my $checkoneline = <check>;\n+if(substr($checkoneline,0,1) eq ">")\n+{\n+ $ReadFileFormat = "fasta";\n+}\n+elsif(substr($checkoneline,0,1) eq "@")\n+{\n+ $ReadFileFormat = "fastq";\n+}\n+else\n+{\n+ die("CHECK: ERROR:Please check $ReadFile1Name\\n");\n+}\n+close(check);\n+\n+open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\\n");\n+my $checkoneline = <check>;\n+if(substr($checkoneline,0,1) eq ">")\n+{\n+ die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\\n") if ($ReadFileFormat ne "fasta");\n+}\n+elsif(substr($checkoneline,0,1) eq "'..b'print "CHECK: checking rmap...\\n";\n+ if(system("type rmap &>/dev/null") ==0 )\n+ {\n+ print "CHECK: rmap found, continue\\n";\n+ }\n+ else\n+ {\n+ die "CHECK: No rmap found in PATH, EXIT!\\n";\n+ }\n+}\n+else\n+{\n+ die "CHECK: option -m only takes rmap or bowtie as inputs\\n";\n+}\n+\n+if($ReadSize == 0)\n+{\n+ die "CHECK: Please check option -s Read size\\n";\n+}\n+\n+if($noIRM)\n+{\n+ $noIRMstr= "noirm";\n+}\n+\n+#write more checks later\n+print "PARAMETERS:\\tMapping software: ",$MapSoftware,"\\n";\n+print "PARAMETERS:\\tDatabase prefix: ",$DatabasePrefix,"\\n";\n+print "PARAMETERS:\\tRead end 1: ",$ReadFile1Name,"\\n";\n+print "PARAMETERS:\\tRead end 2: ",$ReadFile2Name,"\\n" if($ReadFile2Name ne $ReadFile1Name);\n+print "PARAMETERS:\\tGalaxy_raw: ",$onGalaxy_raw,"\\n"; #if($onGalaxy_raw ne "");\n+print "PARAMETERS:\\tGalaxy_txt: ",$onGalaxy_txt,"\\n"; #if($onGalaxy_txt ne "");\n+print "PARAMETERS:\\tCutoff level: ",$CutoffLevel,"\\n";\n+print "PARAMETERS:\\tJunction reads.min:",$JunctionCut,"\\n";\n+print "PARAMETERS:\\tOutput folder: ",$Outputfolder,"\\n";\n+print "PARAMETERS:\\tOutput prefix: ",$OutputPrefix,"\\n";\n+print "PARAMETERS:\\tRead size: ",$ReadSize,"\\n";\n+print "PARAMETERS:\\tBowtie threads #: ",$BowtieThreads,"\\n";\n+print "PARAMETERS:\\tNo IRM.\\n" if ($noIRM);\n+\n+if($MapSoftware eq "bowtie")\n+{\n+ print "=================STAGE 1 MAPPING===================\\n";\n+ system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads");\n+ system("bash $SrcFolder/bin/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name);\n+ print "=================STAGE 2 ESTIMATION================\\n";\n+ # ratio, log, nums\n+ system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;\n+ print "=================STAGE 3 CUTOFF====================\\n";\n+ # raw\n+ system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");\n+\n+\n+}\n+\n+if($MapSoftware eq "rmap")\n+{\n+ print "=================STAGE 1 MAPPING===================\\n";\n+\n+ system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ;\n+ system("bash $SrcFolder/bin/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name);\n+ print "=================STAGE 2 ESTIMATION================\\n";\n+\n+ system("bash $SrcFolder/bin/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ;\n+ print "=================STAGE 3 CUTOFF====================\\n";\n+ system("bash $SrcFolder/bin/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr");\n+\n+\n+}\n+\n+#print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\\n";\n+\n+if($onGalaxy_raw ne "" && $onGalaxy_txt ne "")\n+{\n+ print "OUTPUTFILE:$OutputPrefix.raw\\n";\n+ system("grep -v na $Outputfolder/$OutputPrefix.raw >$onGalaxy_raw");\n+ print "OUTPUTFILE:$OutputPrefix.txt\\n";\n+ system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy_txt");\n+}\n+\n+print "============Clean up\\n";\n+system("rm -r $Outputfolder/$OutputPrefix.*");\n+\n+sub random_sessid {\n+ #my @chars = (0..9,a..z,A..Z);\n+ my @chars = (\'a\'..\'z\',\'A\'..\'Z\');\n+ my $len = 10;\n+ my $string = join \'\', map {$chars[rand(@chars)]} (1..$len);\n+ return $string;\n+} \n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/TXdbgen --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/TXdbgen Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,97 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is to generate TXdb database files from bed/gtf file + +use strict; +use Cwd; +use Getopt::Long; + +my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + + +my $genomedir = ""; + +my $annofilename = ""; +my $txdbname = "userdefined"; +my $knownonly = 0; +my $gtfinput = 0; + +GetOptions ( + "g:s"=>\$genomedir, + "a:s"=>\$annofilename, + "n:s"=>\$txdbname, + "gtf"=>\$gtfinput, + "knownonly"=>\$knownonly +); + +my $InputParaDes=" Usage of the script: + -g genome fasta file location + -a annotation file (bed/gtf) + -n txdb name + --gtf specify this if annotation file is in gtf format +"; + +if($genomedir eq "" or $annofilename eq "") +{ + print $InputParaDes; + exit; +} + +$genomedir = Cwd::abs_path($genomedir); +$annofilename = Cwd::abs_path($annofilename); + +my $annofilebase = `basename $annofilename`; +chomp($annofilebase); +#need a cache folder to avoid mess + +my $cachefolder = $annofilebase.".cache"; + +if (! -e $cachefolder) +{ + mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; +} +if($gtfinput) +{ + print "TXDBGEN: converting gtf file into bed format\n"; + system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); + $annofilename = "$cachefolder/$annofilebase.bed"; +} + + +print "TXDBGEN: scan $annofilename for AS events...\n"; +system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); +print "TXDBGEN: fetch sequences from $genomedir...\n"; +system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); +#get fasta file list +system("ls $genomedir/*.fa >$cachefolder/chr.list"); + +system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); + +print "TXDBGEN: generate files for parallel computing...\n"; +if (! -e "$cachefolder/parallel") +{ + mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; +} +system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); +system("rm $cachefolder/out.bed"); +system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); +system("rm $cachefolder/TXdb.tmp.evi"); +system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); +print "TXDBGEN: build Bowtie index...\n"; + +if (! -e "$cachefolder/btw") +{ + mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; +} +system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); +system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); +print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; + +system("mv $cachefolder $SrcFolder/../db/$txdbname"); +print "TXDBGEN: Done!\n"; + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/ApplyCutoff.jie.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/ApplyCutoff.jie.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,108 @@ +#apply our cuttoff hash table on the IR calculated by Jie +#Modified from Martin's code +use strict; + +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +my $SrcFolder=`dirname $PROG_ABS_PATH`; +chomp($SrcFolder); + +my %cutoff; +my @Exlen; + +my $cutoff_level=$ARGV[1]; +my $JunctionCut = $ARGV[2]; +my $noirm = $ARGV[3]; + +my $cutoff_level_index=7; + + $cutoff_level_index=8 if $cutoff_level eq "H"; +$cutoff_level_index=6 if $cutoff_level eq "L"; + +open(CUT,"$SrcFolder/../cutoffs/cutoff.pair.0".$cutoff_level_index.".txt") || die "cutoff file not found $!\n"; + +while(<CUT>){ + chomp; + my @a=split(/\t/,$_); + push @Exlen,$a[0]; + $cutoff{$a[0]}=$a[1]; +} +close(CUT); + +open(IN,$ARGV[0]); + +while(<IN>){ + chomp; + my @a=split(/\t/,$_); + my $Ez='Ez=yes'; + my $print=$_; + if($a[0]=~m/#/g){next} + my $eventid=substr($a[0],0,2); + my $bir =$a[2]; + $bir =$a[1] if($noirm eq "noirm"); + my $j12 = $a[8]; + my $j23 = $a[9]; + my $j13 = $a[10]; + my $cov1=$a[11]; + my $cov2=$a[12]; + my $cov3=$a[13]; + my $siz1=$a[15]; + my $siz2=$a[16]; + my $siz3=$a[17]; + + + my $stat1='exon1='.cutoff($siz1,$cov1,\@Exlen,%cutoff); + my $stat2='exon2='.cutoff($siz2,$cov2,\@Exlen,%cutoff); + my $stat3='exon3='.cutoff($siz3,$cov3,\@Exlen,%cutoff); + if($stat1 eq "exon1=yes" and $stat3 eq "exon3=yes") + { + #$Ez="passed"; + $Ez=$eventid if $eventid eq "AA"; + $Ez=$eventid if $eventid eq "AD"; + $Ez=$eventid if $eventid eq "IR"; + if ($eventid eq "CS" or $eventid eq "CA" or $eventid eq "ME") + { + if($bir >0.9) + { + $Ez = "CS"; + } + else + { + $Ez = "CA"; + } + } + + } + else + { + #$Ez="declined"; + $Ez = "na"; + } + if( ($j12<$JunctionCut or $j23<$JunctionCut) and $j13 <$JunctionCut) + { + $Ez = "na"; + } + print $print,"\t",$stat1,"\t",$stat2,"\t",$stat3,"\t",$Ez,"\n"; +} +close(IN); +#################################################################### + +sub cutoff{ + my($s,$c,$E,%cutoff)=@_; + my @Exlen=@$E; + if($c eq 'NA'){return('NA')} + my $range=$Exlen[$#Exlen]; + foreach my $l(@Exlen){if($s<$l){$range=$l;last}} + if($c<$cutoff{$range}){return('no')} + return('yes') +} + + +sub contain{ + my ($a,@a)=@_; + foreach(@a){if($a eq $_){return(1)}} + return(0) +} + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/Pair_estimate_c |
b |
Binary file splicetrap/bin/Pair_estimate_c has changed |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/PostAnalysis --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/PostAnalysis Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,66 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is a wrapup for Post analysis based on the ratio file output + +use strict; +use Getopt::Long; +my $RatioFile=""; +my $OutputFile = ""; +my $JunctionCut=5; +my $CutoffLevel="M"; +my $noIRM = 0; +my $noIRMstr=""; + +GetOptions ( + "i:s"=>\$RatioFile, + "o:s"=>\$OutputFile, + "c:s"=>\$CutoffLevel, + "noIRM|noirm"=>\$noIRM, + "j:i"=>\$JunctionCut +); + +my $InputParaDes=" Usage of the script: + -i input file (.ratio file) + -o output file + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads per junction requirement for each exon-isoform [5] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($RatioFile eq "") +{ + print $InputParaDes; + exit; +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} +if($noIRM) +{ + $noIRMstr= "noirm"; +} + + +system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); + +open(rawfile, "$OutputFile.raw"); +open(outfile, ">$OutputFile"); +while(my $line=<rawfile>) +{ + chomp($line); + my @a=split("\t",$line); + if($noIRM) + { + print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } + else + { + print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } +} +close(outfile); +close(rawfile); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/PostAnalysis.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/PostAnalysis.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,64 @@ +# this script is a wrapup for Post analysis based on the ratio file output + +use strict; +use Getopt::Long; +my $RatioFile=""; +my $OutputFile = ""; +my $JunctionCut=5; +my $CutoffLevel="M"; +my $noIRM = 0; +my $noIRMstr=""; + +GetOptions ( + "i:s"=>\$RatioFile, + "o:s"=>\$OutputFile, + "c:s"=>\$CutoffLevel, + "noIRM|noirm"=>\$noIRM, + "j:i"=>\$JunctionCut +); + +my $InputParaDes=" Usage of the script: + -i input file (.ratio file) + -o output file + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads per junction requirement for each exon-isoform [5] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($RatioFile eq "") +{ + print $InputParaDes; + exit; +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} +if($noIRM) +{ + $noIRMstr= "noirm"; +} + + +system("perl $SrcFolder/ApplyCutoff.jie.pl $RatioFile $CutoffLevel $JunctionCut $noIRMstr >$OutputFile.raw"); + +open(rawfile, "$OutputFile.raw"); +open(outfile, ">$OutputFile"); +while(my $line=<rawfile>) +{ + chomp($line); + my @a=split("\t",$line); + if($noIRM) + { + print outfile join("\t",$a[21],$a[1],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } + else + { + print outfile join("\t",$a[21],$a[2],$a[3],$a[4],$a[5],$a[6],$a[7],$a[11],$a[12],$a[13],$a[14]),"\n"; + } +} +close(outfile); +close(rawfile); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/SpliceChange --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/SpliceChange Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,176 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# compare two outputs from SpliceTrap +use strict; + +# the information needed +# inclusion ratio input file +# filtered out or not input file +# minimal inclusion ratio at least 0.1 for one condition +# minimal splicing changes parameter +# orignial pipeline written by Martin Akerman +# re-organized and re-written by Jie Wu + +use FileHandle; + +use Getopt::Long; + +my @programs = ('grep','mkdir','R','paste','awk','sort'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); +} + + +my $InputFileName1 = ""; +my $InputFileName2 = ""; +my $OutputFileName = ""; +my $minchange = 0.3; +my $mininc = 0.1; +my $noIRM = 0; + + +GetOptions ( + "1:s"=>\$InputFileName1, + "2:s"=>\$InputFileName2, + "o:s"=>\$OutputFileName, + "noIRM|noirm"=>\$noIRM, + "m:f"=>\$mininc, + "c:f"=>\$minchange +); + +my $InputParaDes=" Usage of the script: + -1 input file 1, output from SpliceTrap, *.raw file in the output folder + -2 input file 2. see above. + -o output file prefix. + -c minimal change required, [default:0.3] + -m minimal inclusion ratio for at least one condition. [defualt:0.1] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") +{ + print $InputParaDes; + exit; +} + + + +if(-d "$OutputFileName.cache" ) +{ + print "Aborted! output cache folder exists: $OutputFileName.cache \n"; + exit; +} +else +{ + system("mkdir $OutputFileName.cache"); +} + +# +my %ir1; # records ir from file1 +my %ir2; # records ir from file2 +# only records trios above the cutoffs + +open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; +while(my $line=<input1>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir1{$a[0]} = $a[1]; + } + else + { + $ir1{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; +close(input1); + +open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; +while(my $line=<input2>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir2{$a[0]} = $a[1]; + } + else + { + $ir2{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; + + +close(input2); + + +## +my %mean; +my %sd; + +my %num; + +my %filehandles; + +my @types = ("CA", "IR", "AD","AA"); + +foreach my $type (@types) +{ + my $fh = new FileHandle; + open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; + $filehandles{$type} = $fh; +} + + +foreach my $key (keys %ir1) +{ + if(exists $ir2{$key}) + { + if(($ir1{$key} + $ir2{$key}) > 0) + { + #find the type + my $type = substr($key, 0, 2); + $type = "CA" if $type eq "CS"; + $num{$type}++; + + my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); + $mean{$type} = $mean{$type} + $change; + $sd{$type} = $change*$change + $sd{$type}; + + $change = sprintf("%.4f",$change); + + my $fout = $filehandles{$type}; + print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; + } + } +} + +foreach my $type (keys %filehandles) +{ + close($filehandles{$type}); + if($num{$type} == 0) + { + warn "no AS events passed filters for both files\n"; + next; + } + $mean{$type} = $mean{$type}/$num{$type}; + $sd{$type} = sqrt($sd{$type}/$num{$type}); + system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); + system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); + print "$num{$type} $type events processed...\n"; + #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; + +} +system("rm $OutputFileName.cache -rf"); + + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/SpliceChange.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/SpliceChange.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,174 @@ +# compare two outputs from SpliceTrap +use strict; + +# the information needed +# inclusion ratio input file +# filtered out or not input file +# minimal inclusion ratio at least 0.1 for one condition +# minimal splicing changes parameter +# orignial pipeline written by Martin Akerman +# re-organized and re-written by Jie Wu + +use FileHandle; + +use Getopt::Long; + +my @programs = ('grep','mkdir','R','paste','awk','sort'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); +} + + +my $InputFileName1 = ""; +my $InputFileName2 = ""; +my $OutputFileName = ""; +my $minchange = 0.3; +my $mininc = 0.1; +my $noIRM = 0; + + +GetOptions ( + "1:s"=>\$InputFileName1, + "2:s"=>\$InputFileName2, + "o:s"=>\$OutputFileName, + "noIRM|noirm"=>\$noIRM, + "m:f"=>\$mininc, + "c:f"=>\$minchange +); + +my $InputParaDes=" Usage of the script: + -1 input file 1, output from SpliceTrap, *.raw file in the output folder + -2 input file 2. see above. + -o output file prefix. + -c minimal change required, [default:0.3] + -m minimal inclusion ratio for at least one condition. [defualt:0.1] + --noIRM Use the unadjusted inclusion ratios (before IRM correction) +"; + +if($InputFileName1 eq "" or $InputFileName2 eq "" or $OutputFileName eq "") +{ + print $InputParaDes; + exit; +} + + + +if(-d "$OutputFileName.cache" ) +{ + print "Aborted! output cache folder exists: $OutputFileName.cache \n"; + exit; +} +else +{ + system("mkdir $OutputFileName.cache"); +} + +# +my %ir1; # records ir from file1 +my %ir2; # records ir from file2 +# only records trios above the cutoffs + +open(input1, $InputFileName1) or die "$InputFileName1 open error!\n"; +while(my $line=<input1>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir1{$a[0]} = $a[1]; + } + else + { + $ir1{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir1) )," records loaded from $InputFileName1\n"; +close(input1); + +open(input2, $InputFileName2) or die "$InputFileName2 open error!\n"; +while(my $line=<input2>) +{ + chomp($line); + my @a = split("\t", $line); + if($a[21] ne "na") + { + if($noIRM) + { + $ir2{$a[0]} = $a[1]; + } + else + { + $ir2{$a[0]} = $a[2]; + } + } +} +print scalar(keys (%ir2) )," records loaded from $InputFileName2\n"; + + +close(input2); + + +## +my %mean; +my %sd; + +my %num; + +my %filehandles; + +my @types = ("CA", "IR", "AD","AA"); + +foreach my $type (@types) +{ + my $fh = new FileHandle; + open($fh, ">$OutputFileName.cache/$type") or die "Cannot open $OutputFileName.cache/$type\n"; + $filehandles{$type} = $fh; +} + + +foreach my $key (keys %ir1) +{ + if(exists $ir2{$key}) + { + if(($ir1{$key} + $ir2{$key}) > 0) + { + #find the type + my $type = substr($key, 0, 2); + $type = "CA" if $type eq "CS"; + $num{$type}++; + + my $change = ($ir2{$key} - $ir1{$key})/ ($ir1{$key} + $ir2{$key}); + $mean{$type} = $mean{$type} + $change; + $sd{$type} = $change*$change + $sd{$type}; + + $change = sprintf("%.4f",$change); + + my $fout = $filehandles{$type}; + print $fout $key,"\t",$ir1{$key},"\t",$ir2{$key},"\t",$change,"\n"; + } + } +} + +foreach my $type (keys %filehandles) +{ + close($filehandles{$type}); + if($num{$type} == 0) + { + warn "no AS events passed filters for both files\n"; + next; + } + $mean{$type} = $mean{$type}/$num{$type}; + $sd{$type} = sqrt($sd{$type}/$num{$type}); + system("R --slave --args $OutputFileName.cache/$type $mean{$type} $sd{$type} $num{$type} <$SrcFolder/calc_pval.R"); + system("paste $OutputFileName.cache/$type $OutputFileName.cache/$type.p |awk '(\$2>$mininc||\$3>$mininc)&&(\$4>$minchange||\$4<-$minchange)' |sort -k4nr >$OutputFileName.$type.report"); + print "$num{$type} $type events processed...\n"; + #print $mean{$type},"\t", $sd{$type} ,"\t",$num{$type},"\n"; + +} +system("rm $OutputFileName.cache -rf"); + + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/SpliceTrap --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/SpliceTrap Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,263 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# Author: wuj@cshl.edu +use strict; +use Getopt::Long; +#################### +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +#my $SrcFolder=`dirname $PROG_ABS_PATH`; +#chomp($SrcFolder); +#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +#my $SrcFolder=$config{SrcFolder}; + +my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' ); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + +#################### +my $MapSoftware="bowtie"; +my $DatabasePrefix="hg18"; +my $ReadFileFormat=""; +my $ReadFile1Name=""; +my $ReadFile2Name=""; +my $CutoffLevel="M"; +my $Outputfolder=$CUR_DIR; +my $OutputPrefix="Result"; +#my $CutoffOnly=0; +my $ReadSize=36; +my $JunctionCut=5; +my $onGalaxy=""; +my $BowtieThreads=1; +my $noIRMstr=""; +my $noIRM = 0; + +GetOptions ( + "m:s"=>\$MapSoftware, + "d:s"=>\$DatabasePrefix, +# "f:s"=>\$ReadFileFormat, + "1:s"=>\$ReadFile1Name, + "2:s"=>\$ReadFile2Name, + "c:s"=>\$CutoffLevel, + "outdir:s"=>\$Outputfolder, + "o:s"=>\$OutputPrefix, + "j:i"=>\$JunctionCut, + "s:i"=>\$ReadSize, + "p:i"=>\$BowtieThreads, + "noIRM|noirm"=>\$noIRM, + "g:s"=>\$onGalaxy +# "local:s"=>\$local, +# "rerun"=>\$CutoffOnly +); +#-O for galaxy output + + +my $InputParaDes=" Usage of the script: + -m Mapping software: [bowtie]/rmap + -d Database prefix: [hg18]/mm9/rn4/userdefined + -1 Read File 1 + -2 Read File 2 + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads requirement per junction for each exon-isoform [5] + -o Output prefix {Result} + -s Read Size [36] + --outdir Output folder [./] + -p Bowtie parameter, threads number, only use this when you don't use qsub [1] + --noIRM Skip the IRM correction step + + This is a quick help, please refer to the README file for details. +"; + +if($ReadFile2Name eq "") +{ + $ReadFile2Name = $ReadFile1Name; + #trigger singled end mode +} + +if($ReadFile1Name eq "" or $ReadFile2Name eq "" ) +{ + print $InputParaDes; + exit; +} + +if($BowtieThreads < 1) +{ + print $InputParaDes; + exit; +} + +if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel") +{ + print "CHECK: Error, the database you specified is not properly installed.\n"; + #print $InputParaDes; + exit; + +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} + +$ReadFile1Name = Cwd::abs_path($ReadFile1Name); +$ReadFile2Name = Cwd::abs_path($ReadFile2Name); + +#check the files +open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + $ReadFileFormat = "fasta"; +} +elsif(substr($checkoneline,0,1) eq "@") +{ + $ReadFileFormat = "fastq"; +} +else +{ + die("CHECK: ERROR:Please check $ReadFile1Name\n"); +} +close(check); + +open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta"); +} +elsif(substr($checkoneline,0,1) eq "@") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq"); +} +else +{ + die("CHECK: ERROR:Please check $ReadFile2Name\n"); +} +close(check); + +$Outputfolder= Cwd::abs_path($Outputfolder); +if($Outputfolder eq "/tmp") +{ + while(-e $Outputfolder) + { + my $random_foldername = random_sessid(); + $Outputfolder = "/tmp/".$random_foldername; + } +} + + +if(! -e $Outputfolder) +{ + mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n"; +} +if(! -d $Outputfolder) +{ + die "CHECK: $Outputfolder is not a folder\n"; +} + +if($MapSoftware eq "bowtie") +{ + print "CHECK: whether bowtie installed and in PATH\n"; + my $bowtiechecker=`bowtie --version`; + if($bowtiechecker ne "") + { + print "CHECK: bowtie found, information below:\n"; + print $bowtiechecker,"\n"; + } + else + { + die "CHECK: No bowtie found in PATH, EXIT!\n"; + } +} +elsif($MapSoftware eq "rmap") +{ + print "CHECK: checking rmap...\n"; + if(system("type rmap &>/dev/null") ==0 ) + { + print "CHECK: rmap found, continue\n"; + } + else + { + die "CHECK: No rmap found in PATH, EXIT!\n"; + } +} +else +{ + die "CHECK: option -m only takes rmap or bowtie as inputs\n"; +} + +if($ReadSize == 0) +{ + die "CHECK: Please check option -s Read size\n"; +} + +if($noIRM) +{ + $noIRMstr= "noirm"; +} + +#write more checks later +print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n"; +print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n"; +print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n"; +print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name); +print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n"; +print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n"; +print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n"; +print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n"; +print "PARAMETERS:\tRead size: ",$ReadSize,"\n"; +print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n"; +print "PARAMETERS:\tNo IRM.\n" if ($noIRM); + +if($MapSoftware eq "bowtie") +{ + print "=================STAGE 1 MAPPING===================\n"; + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads"); + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +if($MapSoftware eq "rmap") +{ + print "=================STAGE 1 MAPPING===================\n"; + + system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ; + system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; + +if($onGalaxy ne "") +{ + system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy"); +} + +sub random_sessid +{ + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; + } + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/SpliceTrap.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/SpliceTrap.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,261 @@ +# Author: wuj@cshl.edu +use strict; +use Getopt::Long; +#################### +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +#my $SrcFolder=`dirname $PROG_ABS_PATH`; +#chomp($SrcFolder); +#my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +#my $SrcFolder=$config{SrcFolder}; + +my @programs = ('R','echo','cat','bash','perl','ln','mkdir','paste','grep','sort','basename','awk','wc','mv','cd','rm','split','head' ); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + +#################### +my $MapSoftware="bowtie"; +my $DatabasePrefix="hg18"; +my $ReadFileFormat=""; +my $ReadFile1Name=""; +my $ReadFile2Name=""; +my $CutoffLevel="M"; +my $Outputfolder=$CUR_DIR; +my $OutputPrefix="Result"; +#my $CutoffOnly=0; +my $ReadSize=36; +my $JunctionCut=5; +my $onGalaxy=""; +my $BowtieThreads=1; +my $noIRMstr=""; +my $noIRM = 0; + +GetOptions ( + "m:s"=>\$MapSoftware, + "d:s"=>\$DatabasePrefix, +# "f:s"=>\$ReadFileFormat, + "1:s"=>\$ReadFile1Name, + "2:s"=>\$ReadFile2Name, + "c:s"=>\$CutoffLevel, + "outdir:s"=>\$Outputfolder, + "o:s"=>\$OutputPrefix, + "j:i"=>\$JunctionCut, + "s:i"=>\$ReadSize, + "p:i"=>\$BowtieThreads, + "noIRM|noirm"=>\$noIRM, + "g:s"=>\$onGalaxy +# "local:s"=>\$local, +# "rerun"=>\$CutoffOnly +); +#-O for galaxy output + + +my $InputParaDes=" Usage of the script: + -m Mapping software: [bowtie]/rmap + -d Database prefix: [hg18]/mm9/rn4/userdefined + -1 Read File 1 + -2 Read File 2 + -c Cutoff Level:H/[M]/L + Means High, Middle or Low + -j Junction reads requirement per junction for each exon-isoform [5] + -o Output prefix {Result} + -s Read Size [36] + --outdir Output folder [./] + -p Bowtie parameter, threads number, only use this when you don't use qsub [1] + --noIRM Skip the IRM correction step + + This is a quick help, please refer to the README file for details. +"; + +if($ReadFile2Name eq "") +{ + $ReadFile2Name = $ReadFile1Name; + #trigger singled end mode +} + +if($ReadFile1Name eq "" or $ReadFile2Name eq "" ) +{ + print $InputParaDes; + exit; +} + +if($BowtieThreads < 1) +{ + print $InputParaDes; + exit; +} + +if (! -e "$SrcFolder/../db/$DatabasePrefix/parallel") +{ + print "CHECK: Error, the database you specified is not properly installed.\n"; + #print $InputParaDes; + exit; + +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} + +$ReadFile1Name = Cwd::abs_path($ReadFile1Name); +$ReadFile2Name = Cwd::abs_path($ReadFile2Name); + +#check the files +open(check,$ReadFile1Name) or die ("CHECK: Error when opening $ReadFile1Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + $ReadFileFormat = "fasta"; +} +elsif(substr($checkoneline,0,1) eq "@") +{ + $ReadFileFormat = "fastq"; +} +else +{ + die("CHECK: ERROR:Please check $ReadFile1Name\n"); +} +close(check); + +open(check,$ReadFile2Name) or die ("CHECK: Error when opening $ReadFile2Name\n"); +my $checkoneline = <check>; +if(substr($checkoneline,0,1) eq ">") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fasta"); +} +elsif(substr($checkoneline,0,1) eq "@") +{ + die("CHECK: $ReadFile2Name has a different format as $ReadFile1Name\n") if ($ReadFileFormat ne "fastq"); +} +else +{ + die("CHECK: ERROR:Please check $ReadFile2Name\n"); +} +close(check); + +$Outputfolder= Cwd::abs_path($Outputfolder); +if($Outputfolder eq "/tmp") +{ + while(-e $Outputfolder) + { + my $random_foldername = random_sessid(); + $Outputfolder = "/tmp/".$random_foldername; + } +} + + +if(! -e $Outputfolder) +{ + mkdir $Outputfolder or die "CHECK: cannot mkdir $Outputfolder\n"; +} +if(! -d $Outputfolder) +{ + die "CHECK: $Outputfolder is not a folder\n"; +} + +if($MapSoftware eq "bowtie") +{ + print "CHECK: whether bowtie installed and in PATH\n"; + my $bowtiechecker=`bowtie --version`; + if($bowtiechecker ne "") + { + print "CHECK: bowtie found, information below:\n"; + print $bowtiechecker,"\n"; + } + else + { + die "CHECK: No bowtie found in PATH, EXIT!\n"; + } +} +elsif($MapSoftware eq "rmap") +{ + print "CHECK: checking rmap...\n"; + if(system("type rmap &>/dev/null") ==0 ) + { + print "CHECK: rmap found, continue\n"; + } + else + { + die "CHECK: No rmap found in PATH, EXIT!\n"; + } +} +else +{ + die "CHECK: option -m only takes rmap or bowtie as inputs\n"; +} + +if($ReadSize == 0) +{ + die "CHECK: Please check option -s Read size\n"; +} + +if($noIRM) +{ + $noIRMstr= "noirm"; +} + +#write more checks later +print "PARAMETERS:\tMapping software: ",$MapSoftware,"\n"; +print "PARAMETERS:\tDatabase prefix: ",$DatabasePrefix,"\n"; +print "PARAMETERS:\tRead end 1: ",$ReadFile1Name,"\n"; +print "PARAMETERS:\tRead end 2: ",$ReadFile2Name,"\n" if($ReadFile2Name ne $ReadFile1Name); +print "PARAMETERS:\tCutoff level: ",$CutoffLevel,"\n"; +print "PARAMETERS:\tJunction reads.min:",$JunctionCut,"\n"; +print "PARAMETERS:\tOutput folder: ",$Outputfolder,"\n"; +print "PARAMETERS:\tOutput prefix: ",$OutputPrefix,"\n"; +print "PARAMETERS:\tRead size: ",$ReadSize,"\n"; +print "PARAMETERS:\tBowtie threads #: ",$BowtieThreads,"\n"; +print "PARAMETERS:\tNo IRM.\n" if ($noIRM); + +if($MapSoftware eq "bowtie") +{ + print "=================STAGE 1 MAPPING===================\n"; + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads"); + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder $BowtieThreads") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +if($MapSoftware eq "rmap") +{ + print "=================STAGE 1 MAPPING===================\n"; + + system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") ; + system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat $DatabasePrefix $Outputfolder $SrcFolder") if($ReadFile2Name ne $ReadFile1Name); + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize $DatabasePrefix $Outputfolder $SrcFolder $noIRMstr") ; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel $Outputfolder $JunctionCut $SrcFolder $noIRMstr"); + + +} + +print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; + +if($onGalaxy ne "") +{ + system("grep -v na $Outputfolder/$OutputPrefix.txt >$onGalaxy"); +} + +sub random_sessid +{ + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; + } + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/SpliceTrap_measure.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/SpliceTrap_measure.pl Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,111 @@ +# Author: wuj@cshl.edu +use strict; +use Getopt::Long; +#################### +my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +my $SrcFolder=$config{SrcFolder}; +#my $SrcFolder="/data/zhang/wuj/tools/SpliceTrap.0.8"; +#################### +my $MapSoftware="eland"; +my $ReadFileFormat=""; +my $ReadFile1Name=""; +my $ReadFile2Name=""; +my $CutoffLevel="H"; +my $OutputPrefix="Result"; +my $CutoffOnly=0; +my $ReadSize=36; + +GetOptions ( + "m:s"=>\$MapSoftware, + "f:s"=>\$ReadFileFormat, + "1:s"=>\$ReadFile1Name, + "2:s"=>\$ReadFile2Name, + "c:s"=>\$CutoffLevel, + "o:s"=>\$OutputPrefix, + "s:i"=>\$ReadSize, +# "local:s"=>\$local, + "rerun"=>\$CutoffOnly +); + + +my $InputParaDes=" Usage of the script (v0.82): + -m Mapping software: eland/bowtie/rmap + -f Read File Format: fasta/fastq + -1 Read File 1 + -2 Read File 2 + -c Cutoff Level:H/M/L + Means High, Middle or Low + -o Output prefix + -s Read Size 36 + --rerun Only run the last step, which is filtering +"; + +if($ReadFile1Name eq "" or $ReadFile2Name eq "" or $ReadFileFormat eq "") +{ + print $InputParaDes; + exit; +} + +if($ReadFileFormat ne "fastq" and $ReadFileFormat ne "fasta") +{ + print $InputParaDes; + exit; + +} + +if($CutoffLevel ne "H" and $CutoffLevel ne "M" and $CutoffLevel ne "L") +{ + print $InputParaDes; + exit; +} + +my $dirname1=`dirname $ReadFile1Name`; +my $dirname2=`dirname $ReadFile2Name`; +if($dirname1 ne ".") +{ + system("ln -s $ReadFile1Name ./"); +} +if($dirname2 ne ".") +{ + system("ln -s $ReadFile2Name ./"); +} + +$ReadFile1Name = `basename $ReadFile1Name`; +chomp($ReadFile1Name); +$ReadFile2Name = `basename $ReadFile2Name`; +chomp($ReadFile2Name); +my $start = time; +if($MapSoftware eq "bowtie") +{ + print "=================STAGE 1 MAPPING===================\n"; + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile1Name $ReadFileFormat") if not $CutoffOnly; + system("bash $SrcFolder/mapping_bowtie.sh $ReadFile2Name $ReadFileFormat") if not $CutoffOnly; + print "STAGE 1 FINISHED IN ",time-$start," seconds\n"; + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize") if not $CutoffOnly; + print "STAGE 2 FINISHED IN ",time-$start," seconds\n"; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel"); + print "STAGE 3 FINISHED IN ",time-$start," seconds\n"; + + + +} + +if($MapSoftware eq "rmap") +{ + print "=================STAGE 1 MAPPING===================\n"; + + system("bash $SrcFolder/mapping_rmap.sh $ReadFile1Name $ReadFileFormat") if not $CutoffOnly; + system("bash $SrcFolder/mapping_rmap.sh $ReadFile2Name $ReadFileFormat") if not $CutoffOnly; + print "=================STAGE 2 ESTIMATION================\n"; + + system("bash $SrcFolder/batch_para_cov10p_fit.sh $ReadFile1Name $ReadFile2Name $OutputPrefix $ReadSize") if not $CutoffOnly; + print "=================STAGE 3 CUTOFF====================\n"; + system("bash $SrcFolder/apply_cutoff.sh $OutputPrefix $CutoffLevel"); + + +} + +print "============ALL DONE, OUTPUTFILE:$OutputPrefix.txt\n"; |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/TXdbgen --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/TXdbgen Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,97 @@ +#!/usr/bin/perl +my $SrcFolder="/home/galaxy/galaxy-dist/tools/SpliceTrap.0.90.1/bin"; +# this script is to generate TXdb database files from bed/gtf file + +use strict; +use Cwd; +use Getopt::Long; + +my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + + +my $genomedir = ""; + +my $annofilename = ""; +my $txdbname = "userdefined"; +my $knownonly = 0; +my $gtfinput = 0; + +GetOptions ( + "g:s"=>\$genomedir, + "a:s"=>\$annofilename, + "n:s"=>\$txdbname, + "gtf"=>\$gtfinput, + "knownonly"=>\$knownonly +); + +my $InputParaDes=" Usage of the script: + -g genome fasta file location + -a annotation file (bed/gtf) + -n txdb name + --gtf specify this if annotation file is in gtf format +"; + +if($genomedir eq "" or $annofilename eq "") +{ + print $InputParaDes; + exit; +} + +$genomedir = Cwd::abs_path($genomedir); +$annofilename = Cwd::abs_path($annofilename); + +my $annofilebase = `basename $annofilename`; +chomp($annofilebase); +#need a cache folder to avoid mess + +my $cachefolder = $annofilebase.".cache"; + +if (! -e $cachefolder) +{ + mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; +} +if($gtfinput) +{ + print "TXDBGEN: converting gtf file into bed format\n"; + system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); + $annofilename = "$cachefolder/$annofilebase.bed"; +} + + +print "TXDBGEN: scan $annofilename for AS events...\n"; +system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); +print "TXDBGEN: fetch sequences from $genomedir...\n"; +system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); +#get fasta file list +system("ls $genomedir/*.fa >$cachefolder/chr.list"); + +system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); + +print "TXDBGEN: generate files for parallel computing...\n"; +if (! -e "$cachefolder/parallel") +{ + mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; +} +system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); +system("rm $cachefolder/out.bed"); +system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); +system("rm $cachefolder/TXdb.tmp.evi"); +system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); +print "TXDBGEN: build Bowtie index...\n"; + +if (! -e "$cachefolder/btw") +{ + mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; +} +system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); +system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); +print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; + +system("mv $cachefolder $SrcFolder/../db/$txdbname"); +print "TXDBGEN: Done!\n"; + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/TXdbgen.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/TXdbgen.pl Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,95 @@ +# this script is to generate TXdb database files from bed/gtf file + +use strict; +use Cwd; +use Getopt::Long; + +my @programs = ('split','bowtie-build','sort', 'uniq', 'ls','bash','rm','mv','cut','grep','echo'); +foreach my $program (@programs) +{ + die ("CHECK: $program not found\n") if(system("hash $program >/dev/null")); + +} + + +my $genomedir = ""; + +my $annofilename = ""; +my $txdbname = "userdefined"; +my $knownonly = 0; +my $gtfinput = 0; + +GetOptions ( + "g:s"=>\$genomedir, + "a:s"=>\$annofilename, + "n:s"=>\$txdbname, + "gtf"=>\$gtfinput, + "knownonly"=>\$knownonly +); + +my $InputParaDes=" Usage of the script: + -g genome fasta file location + -a annotation file (bed/gtf) + -n txdb name + --gtf specify this if annotation file is in gtf format +"; + +if($genomedir eq "" or $annofilename eq "") +{ + print $InputParaDes; + exit; +} + +$genomedir = Cwd::abs_path($genomedir); +$annofilename = Cwd::abs_path($annofilename); + +my $annofilebase = `basename $annofilename`; +chomp($annofilebase); +#need a cache folder to avoid mess + +my $cachefolder = $annofilebase.".cache"; + +if (! -e $cachefolder) +{ + mkdir $cachefolder or die "TXDBGEN: could not create cache folder $cachefolder\n"; +} +if($gtfinput) +{ + print "TXDBGEN: converting gtf file into bed format\n"; + system ("perl $SrcFolder/gtf2bed.pl $annofilename >$cachefolder/$annofilebase.bed"); + $annofilename = "$cachefolder/$annofilebase.bed"; +} + + +print "TXDBGEN: scan $annofilename for AS events...\n"; +system("perl $SrcFolder/scanbed2txdb.pl $annofilename $cachefolder/TXdb.tmp"); +print "TXDBGEN: fetch sequences from $genomedir...\n"; +system("sort -k1,1 $cachefolder/TXdb.tmp >$cachefolder/TXdb.tmp.sort"); +#get fasta file list +system("ls $genomedir/*.fa >$cachefolder/chr.list"); + +system("perl $SrcFolder/get_bed_fa_j.pl $cachefolder/TXdb.tmp.sort $cachefolder/chr.list $cachefolder/out.bed $cachefolder/TXdb.fasta"); + +print "TXDBGEN: generate files for parallel computing...\n"; +if (! -e "$cachefolder/parallel") +{ + mkdir "$cachefolder/parallel" or die "TXDBGEN: could not create $cachefolder/parallel\n"; +} +system("grep L $cachefolder/out.bed >$cachefolder/TXdb.bed"); +system("rm $cachefolder/out.bed"); +system("sort $cachefolder/TXdb.tmp.evi >$cachefolder/TXdb.evi"); +system("rm $cachefolder/TXdb.tmp.evi"); +system("bash $SrcFolder/splitdb.sh $cachefolder/parallel"); +print "TXDBGEN: build Bowtie index...\n"; + +if (! -e "$cachefolder/btw") +{ + mkdir "$cachefolder/btw" or die "TXDBGEN: could not create $cachefolder/btw\n"; +} +system("bowtie-build $cachefolder/TXdb.fasta $cachefolder/btw/TXdb"); +system("rm $cachefolder/TXdb.tmp* $cachefolder/chr.list"); +print "TXDBGEN: Copy files to $SrcFolder/../db/$txdbname\n"; + +system("mv $cachefolder $SrcFolder/../db/$txdbname"); +print "TXDBGEN: Done!\n"; + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/apply_cutoff.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/apply_cutoff.sh Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,36 @@ +#SrcFolder="/data/zhang/wuj/scripts/SpliceTrap.0.8"; + +outputname=$1; +CutoffLevel=$2; +Outputfolder=$3 +SrcFolder=$5 +JunctionCut=$4 +noIRM=$8 + +echo "CUTOFF: Entering cutoff step..."; +echo "CUTOFF: Cache folder: $outputname.filter" +mkdir $Outputfolder/$outputname.filter +cd $Outputfolder/$outputname.filter +ln -s ../$outputname.ratio +ln -s ../$outputname.nums +echo "CUTOFF: spliting file....and generating shell scripts..." +split -11000 $outputname.ratio + +for ratiofiles in x* +do + echo "perl $SrcFolder/ApplyCutoff.jie.pl $ratiofiles $CutoffLevel $JunctionCut $noIRM > $ratiofiles.out" >>filter.sh +done + +echo "CUTOFF: submit scripts..." +perl $SrcFolder/batchqsub.pl filter.sh +echo "CUTOFF: merging file...." +cat *.out >../$outputname.raw +cd ../ +#perl /data/zhang/wuj/tools/SpliceTrap.0.8/ApplyCutoff.jie.pl $outputname.ratio $outputname.nums 8 >$outputname.txt +if [ "$noIRM" ];then + awk '{printf $22"\t"$2"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$12"\t"$13"\t"$14"\t"$15"\n"}' $outputname.raw >$outputname.txt +else + awk '{printf $22"\t"$3"\t"$4"\t"$5"\t"$6"\t"$7"\t"$8"\t"$12"\t"$13"\t"$14"\t"$15"\n"}' $outputname.raw >$outputname.txt +fi +rm $outputname.filter -rf + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/batch_para_cov10p_fit.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/batch_para_cov10p_fit.sh Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,101 @@ +#!/bin/bash +ReadFile1Name=`basename $1` +ReadFile2Name=`basename $2` +outputname=$3 +readsize=$4 +DatabasePrefix=$5 +Outputfolder=$6 +SrcFolder=$7 +noIRM=$8 + +cd $Outputfolder +if [ $ReadFile1Name != $ReadFile2Name ];then + echo "ESTIMATE: Getting fragment size information from data..." + perl $SrcFolder/get.frag.size.pl $ReadFile1Name.nomt $ReadFile2Name.nomt $readsize + perl $SrcFolder/get.hist.pl $ReadFile1Name.nomt.fragsize -w=1 -c=1 +else + echo "ESTIMATE: Generating the other half of reads..." + readnum=`wc -l $ReadFile1Name.nomt |cut -f1 -d" "` + for (( i=0; i<$readnum; i++ )) + do + echo "NM" >>$ReadFile1Name.f.nomt + done + echo "#Width:1" >$ReadFile1Name.nomt.fragsize.hist +fi +echo "ESTIMATE: Creating cache folder.." +if [ $ReadFile1Name != $ReadFile2Name ];then + ReadFile2FinalName=$ReadFile2Name.nomt +else + ReadFile2FinalName=$ReadFile1Name.f.nomt +fi + +mkdir $ReadFile1Name.result +cd $ReadFile1Name.result +ln -s ../$ReadFile1Name.nomt ./ +ln -s ../$ReadFile2FinalName ./ +ln -s ../$ReadFile1Name.nomt.fragsize.hist ./ +echo "ESTIMATE: Split mapping results via chromosomes..." +perl $SrcFolder/scan_nomt.pl $ReadFile1Name.nomt $ReadFile2FinalName +loopi=0 +echo "ESTIMATE: Generating shell scripts for Loop $loopi..." +while read chrlist +do + chr=`echo $chrlist |tr -d "\n"` + for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.* + do + base=`basename $dbfile` + echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -s $readsize" >>r$loopi.sh + done +done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list + +echo "ESTIMATE: Submit shell scripts for Loop $loopi..." +perl $SrcFolder/batchqsub.pl r$loopi.sh +echo "ESTIMATE: Loop $loopi done..." + +cat $ReadFile1Name.$loopi.*.ratio >$outputname.$loopi.ratio +cat $ReadFile1Name.$loopi.*.log >$outputname.$loopi.log +cat $ReadFile1Name.$loopi.*.nums >$outputname.$loopi.nums +rm $ReadFile1Name.$loopi.*.ratio +rm $ReadFile1Name.$loopi.*.log +rm $ReadFile1Name.$loopi.*.nums + + +if [ "$noIRM" ];then + echo "ESTIMATE: No IRM correction, skipped..." + mv $outputname.$loopi.ratio $outputname.ratio + mv $outputname.$loopi.log $outputname.log + mv $outputname.$loopi.nums $outputname.nums +else + + echo "ESTIMATE: derive IRMs from data..." + awk '{if ($15>=10) printf $0"\n"}' $outputname.$loopi.ratio >$outputname.mle + perl $SrcFolder/get_event_dist_fit.pl $outputname.mle -c=2 -w=0.001 + + loopi=1 + echo "ESTIMATE: Generating shell scripts for Loop $loopi..." + while read chrlist + do + chr=`echo $chrlist |tr -d "\n"` + for dbfile in $SrcFolder/../db/$DatabasePrefix/parallel/$chr.* + do + base=`basename $dbfile` + echo "$SrcFolder/Pair_estimate_c -f $ReadFile1Name.nomt.fragsize.hist -o $ReadFile1Name.$loopi.$base -d $dbfile -1 $ReadFile1Name.nomt.$chr -2 $ReadFile2FinalName.$chr -b $outputname.mle.fit.hist -s $readsize" >>r$loopi.sh + done + done <$SrcFolder/../db/$DatabasePrefix/parallel/chr.list + echo "ESTIMATE: Submit shell scripts for Loop $loopi..." + +#perl $SrcFolder/qsub/batchqsub.pl r$loopi.sh $taskname + perl $SrcFolder/batchqsub.pl r$loopi.sh + echo "ESTIMATE: Loop $loopi done..." + cat $ReadFile1Name.$loopi.*.ratio >$outputname.ratio + cat $ReadFile1Name.$loopi.*.log >$outputname.log + cat $ReadFile1Name.$loopi.*.nums >$outputname.nums + rm $ReadFile1Name.$loopi.*.ratio + rm $ReadFile1Name.$loopi.*.log + rm $ReadFile1Name.$loopi.*.nums +fi + +mv $outputname.ratio $outputname.log $outputname.nums ../ +cd ../ +rm $ReadFile1Name.result -rf +rm $ReadFile1Name.nomt $ReadFile2FinalName |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/batchqsub.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/batchqsub.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,158 @@ + +# modified from Chenghai Xue's script + +#test if qsub works + +my $qsub_checker = 0; +if(system("hash qsub >/dev/null")) +{ + $qsub_checker = 0; +} +else +{ + $test_randname=random_sessid(); + system("mkdir $test_randname;"); + system("echo 'mkdir $test_randname/$test_randname' >$test_randname/$test_randname.sh"); + system("mkdir $test_randname/qsub_cache"); + system ("qsub -cwd -v TMPDIR=$test_randname/qsub_cache -V -e $test_randname/qsub_cache -o $test_randname/qsub_cache -N $test_randname $test_randname/$test_randname.sh"); + + $status=0; + $sec=5; + while(1) + { + $chkresult=`qstat |grep $test_randname |wc -l`; + chomp($chkresult); + if ($chkresult == 0) + { + $sec=10; + $status++; + last if ($status==3); + } + else + { + $status=0; + $sec=5; + } + print "QSTAT: $chkresult testing tasks running.....$taskname\n"; + sleep($sec); + } + print "QSUB: testing done\n"; + print "$test_randname/$test_randname\n"; + if(-d "$test_randname/$test_randname") + { + $qsub_checker=1; + print "QSUB: working well!\n"; + } +#$qsub_checker=`qsub </dev/null 2>&1|grep stdin|wc -l`; + system("rm $test_randname -rf"); +} +#$qsub_checker=0; +if($qsub_checker == 0) +{ + print "QSUB: No GRID qsub found\n"; + print "QSUB: if you are using PBS qsub, please wait for the next version! Thanks.\n"; + print "QSUB: Running in serial mode...\n"; + system("sh $ARGV[0]"); + exit; +} + +$performListFile = $ARGV[0]; +$taskname = ""; +$taskname = $ARGV[1]; +if (not $taskname) +{ + $taskname=random_sessid(); +} +#$outfullDir = $ARGV[2]; +$outfullDir ="qsub_cache"; + +# correct path +if(! (-d $outfullDir) ){ + system ("mkdir $outfullDir"); +} + +# create a temp cache +@temp = split("/", $0); +$prog = pop @temp; +$cache = $outfullDir."/".$prog."_".$taskname; +if(! (-d $cache) ){ + system ("mkdir $cache"); +} + +open (IN_1, "$performListFile") or die "can not open file $performListFile to read\n"; +@performList = (<IN_1>); +chomp @performList; +close IN_1 or die "can't close the input file : $!"; + + +$scriptListFile = $outfullDir."/".$taskname."_scripts.list"; +open (FSCRIPLIST, ">$scriptListFile"); +for($i=0; $i<@performList; $i++){ + $scriptFile = $outfullDir."/".$taskname."_script$i.sh"; + print FSCRIPLIST "$scriptFile\n"; + open (FOUT, ">$scriptFile"); + +# print FOUT "#!/usr/bin/sh\n"; + print FOUT "$performList[$i]\n"; + +# print OUT_1 "$outfile.map\n"; + close (FOUT); +} +close (FSCRIPLIST); + +open (IN_2, "$scriptListFile") or die "can not open file $scriptListFile to read\n"; +$basename=`basename $performListFile`; +chop($basename); +$taskname=$taskname."_".$basename; +#print $basename; +while(<IN_2>){ + $f = $_; + chomp $f; + @temp = split("/", $f); + $base = pop @temp; + + #use default queues +# print "/opt/n1ge6/bin/lx24-amd64/qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f\n\n\n"; + #system ("qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); + system ("qsub -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); + +} +close IN_2 or die "can't close the input file : $!"; + +#start to check stats of qsub tasks +####################################### +my $taskname_query=substr($taskname,0,10); + + +$status=0; +$sec=60; +while(1) +{ + $chkresult=`qstat |grep $taskname_query |wc -l`; + chomp($chkresult); + if ($chkresult == 0) + { + $sec=10; + $status++; + last if ($status==3); + } + else + { + $status=0; + $sec=60; + } + print "QSTAT: $chkresult tasks running.....$taskname\n"; + sleep($sec); +} +print "QSUB: done: $taskname \n"; +###################################### + +# +sub random_sessid +{ + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/batchqsub.pl_orig --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/batchqsub.pl_orig Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,158 @@ + +# modified from Chenghai Xue's script + +#test if qsub works + +my $qsub_checker = 0; +if(system("hash qsub >/dev/null")) +{ + $qsub_checker = 0; +} +else +{ + $test_randname=random_sessid(); + system("mkdir $test_randname;"); + system("echo 'mkdir $test_randname/$test_randname' >$test_randname/$test_randname.sh"); + system("mkdir $test_randname/qsub_cache"); + system ("qsub -cwd -v TMPDIR=$test_randname/qsub_cache -V -e $test_randname/qsub_cache -o $test_randname/qsub_cache -N $test_randname $test_randname/$test_randname.sh"); + + $status=0; + $sec=5; + while(1) + { + $chkresult=`qstat |grep $test_randname |wc -l`; + chomp($chkresult); + if ($chkresult == 0) + { + $sec=10; + $status++; + last if ($status==3); + } + else + { + $status=0; + $sec=5; + } + print "QSTAT: $chkresult testing tasks running.....$taskname\n"; + sleep($sec); + } + print "QSUB: testing done\n"; + print "$test_randname/$test_randname\n"; + if(-d "$test_randname/$test_randname") + { + $qsub_checker=1; + print "QSUB: working well!\n"; + } +#$qsub_checker=`qsub </dev/null 2>&1|grep stdin|wc -l`; + system("rm $test_randname -rf"); +} +#$qsub_checker=0; +if($qsub_checker == 0) +{ + print "QSUB: No GRID qsub found\n"; + print "QSUB: if you are using PBS qsub, please wait for the next version! Thanks.\n"; + print "QSUB: Running in serial mode...\n"; + system("sh $ARGV[0]"); + exit; +} + +$performListFile = $ARGV[0]; +$taskname = ""; +$taskname = $ARGV[1]; +if (not $taskname) +{ + $taskname=random_sessid(); +} +#$outfullDir = $ARGV[2]; +$outfullDir ="qsub_cache"; + +# correct path +if(! (-d $outfullDir) ){ + system ("mkdir $outfullDir"); +} + +# create a temp cache +@temp = split("/", $0); +$prog = pop @temp; +$cache = $outfullDir."/".$prog."_".$taskname; +if(! (-d $cache) ){ + system ("mkdir $cache"); +} + +open (IN_1, "$performListFile") or die "can not open file $performListFile to read\n"; +@performList = (<IN_1>); +chomp @performList; +close IN_1 or die "can't close the input file : $!"; + + +$scriptListFile = $outfullDir."/".$taskname."_scripts.list"; +open (FSCRIPLIST, ">$scriptListFile"); +for($i=0; $i<@performList; $i++){ + $scriptFile = $outfullDir."/".$taskname."_script$i.sh"; + print FSCRIPLIST "$scriptFile\n"; + open (FOUT, ">$scriptFile"); + +# print FOUT "#!/usr/bin/sh\n"; + print FOUT "$performList[$i]\n"; + +# print OUT_1 "$outfile.map\n"; + close (FOUT); +} +close (FSCRIPLIST); + +open (IN_2, "$scriptListFile") or die "can not open file $scriptListFile to read\n"; +$basename=`basename $performListFile`; +chop($basename); +$taskname=$taskname."_".$basename; +#print $basename; +while(<IN_2>){ + $f = $_; + chomp $f; + @temp = split("/", $f); + $base = pop @temp; + + #use default queues +# print "/opt/n1ge6/bin/lx24-amd64/qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f\n\n\n"; + #system ("qsub -l virtual_free=1.7G -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); + system ("qsub -cwd -v TMPDIR=$cache -V -e $cache -o $cache -N $taskname.$base $f"); + +} +close IN_2 or die "can't close the input file : $!"; + +#start to check stats of qsub tasks +####################################### +my $taskname_query=substr($taskname,0,10); + + +$status=0; +$sec=60; +while(1) +{ + $chkresult=`qstat |grep $taskname_query |wc -l`; + chomp($chkresult); + if ($chkresult == 0) + { + $sec=10; + $status++; + last if ($status==3); + } + else + { + $status=0; + $sec=60; + } + print "QSTAT: $chkresult tasks running.....$taskname\n"; + sleep($sec); +} +print "QSUB: done: $taskname \n"; +###################################### + +# +sub random_sessid +{ + #my @chars = (0..9,a..z,A..Z); + my @chars = ('a'..'z','A'..'Z'); + my $len = 10; + my $string = join '', map {$chars[rand(@chars)]} (1..$len); + return $string; +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/beta_fit.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/beta_fit.R Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,39 @@ +args = commandArgs(); +input_file=args[4]; +#input_file="control_a.0.1.flt.ratio.tmpca"; +#print (input_file); + + +library(MASS); + +p=array(0,dim=1000); + +for (i in 0:999) +{ + p[i]=0.001 +} + +if ( file.info(input_file)["size"]>0 ) +{ + +data=read.table(input_file); +col=1; +x=data[,col]; +x1=x; +if (length(x)>10) +{ + x1[x==0] <- .Machine$double.eps; + x1[x==1] <- (1-.Machine$double.eps); + xbar=mean(x1) + xvar=var(x1) + a <- (xbar*(1-xbar)/xvar - 1)*xbar + b <- (1-xbar)*a/xbar + (f=fitdistr(x1,"beta",list(shape1=a,shape2=b))) + for (i in 0:999) + { + p[i]=dbeta(i/1000,f[["estimate"]][["shape1"]],f[["estimate"]][["shape2"]]) + } +} + +} +write(p,file=paste(input_file,"fit",sep="."),ncolumns=1); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/bowtie2eland.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/bowtie2eland.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,175 @@ +use strict; + +my $bowtiefilename=$ARGV[0]; +my $readsfilename=$ARGV[1]; +my $elandfilename=$ARGV[2]; + +open(readsfile, $readsfilename); + +my $detectformat=`head -c 1 $readsfilename`; + +#my $firstletter=$detectformat; +#my $looplinenumbers=4; + +#$looplinenumbers=2 if ($detectformat eq ">"); +open(bowtiefile, $bowtiefilename); +open(elandfile, ">".$elandfilename); +my $readfilelinenum=0; +# hash the positions of the alignments for each read id +my %readposhash; +my $bowtiepos = tell (bowtiefile); +while (my $bowtieline=<bowtiefile>) +{ + my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); + if (not exists $readposhash{$bowtiereadname} ) + { + $readposhash{$bowtiereadname} = $bowtiepos; + } + $bowtiepos = tell (bowtiefile); +} + +while(my $readline=<readsfile>) +{ + $readfilelinenum++; + if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + if( not exists $readposhash{$readname} ) + { + print elandfile $readname,"\tNA\tNM\n"; + next; + } + else + { + my @mapped_ids=(); + my @mapped_pos=(); + my @mapped_strand=(); + seek(bowtiefile, $readposhash{$readname}, 0); + while (my $bowtieline=<bowtiefile>) + { + my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); + if($readname eq $bowtiereadname) + { + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$pos); + push(@mapped_strand,$strand); + } + else + { + last; + } + + } + print elandfile $readname,"\t"; + print elandfile "NA\t"; + print elandfile scalar(@mapped_ids),":0:0\t"; + for(my $i=0;$i<@mapped_ids;$i++) + { + print elandfile "/",$mapped_ids[$i]; + print elandfile ":",$mapped_pos[$i]+1; + if($mapped_strand[$i] eq "+") + { + print elandfile "F0,"; + } + else + { + print elandfile "R0,"; + } + + } + print elandfile "\n"; + + } + } +} + +close(elandfile); +close(bowtiefile); +close(readsfile); + +exit; +while(my $bowtieline=<bowtiefile>) +{ + my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); + while(my $readline=<readsfile>) + { + $readfilelinenum++; + if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) + #if($readline=~/^$detectformat/) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + + + if($readname ne $bowtiereadname) + { + print elandfile $readname,"\tNA\tNM\n"; + next; + } + else + { + my @mapped_ids=(); + my @mapped_pos=(); + my @mapped_strand=(); + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$pos); + push(@mapped_strand,$strand); + while(1) + { + $bowtieline=<bowtiefile>; + my ($bowtiereadname, $strand, $mapped_id, $pos, $seq, $qt,$num, $mapinfo)=split("\t",$bowtieline); + if( $bowtiereadname eq $readname ) + { + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$pos); + push(@mapped_strand,$strand); + } + else + { + seek(bowtiefile, -1*length($bowtieline),1); + print elandfile $readname,"\t"; + print elandfile "NA\t"; + print elandfile scalar(@mapped_ids),":0:0\t"; + for(my $i=0;$i<@mapped_ids;$i++) + { + print elandfile "/",$mapped_ids[$i]; + print elandfile ":",$mapped_pos[$i]+1; + if($mapped_strand[$i] eq "+") + { + print elandfile "F0,"; + } + else + { + print elandfile "R0,"; + } + + } + print elandfile "\n"; + last; + } + } + last; + + } + } + } +} + +while(my $readline=<readsfile>) +{ + $readfilelinenum++; + if(($readline=~/^$detectformat/) && ($readfilelinenum%2 ==1)) + #if($readline=~/^$detectformat/) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + print elandfile $readname,"\tNA\tNM\n"; + } +} + +close(elandfile); +close(bowtiefile); + + +close(readsfile); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/calc_pval.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/calc_pval.R Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,29 @@ +args = commandArgs(); +input_file=args[4]; +av=as.numeric(args[5]); +sd=as.numeric(args[6]); +nu=as.numeric(args[7]); + + +data=read.table(input_file); + +col=4; +x=data[,col]; +pup=pnorm(x, mean=av, sd=sd, lower.tail = FALSE); +adpup=p.adjust(pup,method="fdr"); +pdn=pnorm(x, mean=av, sd=sd, lower.tail = TRUE); +adpdn=p.adjust(pdn,method="fdr"); + +p=pup; + +size = length(x); + +for (i in 1:size) +{ + if(x[i]<0) + { + p[i]=pdn[i]; + } +} +write(p, file=paste(input_file,"p",sep="."),ncolumns=1); + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/downloaddb.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/downloaddb.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,54 @@ +use strict; +use Cwd; + +my %flags=( + "hg18"=>0, + "mm9"=>0, + "rn4"=>0, +); +my $PROG = $0; +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +$PROG_ABS_PATH = `dirname $PROG_ABS_PATH`; +chomp($PROG_ABS_PATH); +print "\n\tPrepare to download databases from CSHL...\n"; + +print "\tWhich database(s) do you want to download?\n"; +print "\tChoose from\t "; +foreach my $key (keys %flags) +{ + print $key,"/"; +} +print "ALL \n\n\tseparated by blank...don't enter anything if you don't want to download!\n\nPlease enter:[NONE]"; + +my @dbnames = split(/\s+/,<>); +for (my $i=0;$i<@dbnames;$i++) +{ + if (uc($dbnames[$i]) eq "ALL" ) + { + foreach my $key (keys %flags) + { + $flags{$key}=1; + } + last; + } + + if( exists $flags{$dbnames[$i]}) + { + $flags{$dbnames[$i]} = 1; + } + #system ("wget http://rulai.cshl.edu/splicetrap/db/") +} + +foreach my $key (keys %flags) +{ + if ($flags{$key} ==1) + { + system ("wget http://rulai.cshl.edu/splicetrap/db/".$key.".tar.gz"); + print "untar the database file for $key...please wait..."; + system ("tar -ixzf $key.tar.gz"); + system ("rm $key.tar.gz"); + mkdir "$key/parallel"; + print "creating files for parallel computing...\n"; + system("bash $PROG_ABS_PATH/splitdb.sh $key/parallel") + } +} |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/get.frag.size.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/get.frag.size.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,55 @@ +#for the results from paired end +#The two inputs are the results from the two ends +use strict; +my $read_size = $ARGV[2]; +open(input1, $ARGV[0]); +open(input2, $ARGV[1]); +open(output, ">$ARGV[0].fragsize"); +#open(fusefile,">$ARGV[0].fuse"); + +#my $LongMarker="L"; +#my $ShortMarker="S"; + +while(my $line1=<input1>) +{ + my $line2=<input2>; + chomp($line1); + chomp($line2); + next if($line1=~/$\NM/ or $line2=~/$\NM/ or $line1=~/$\MT/ or $line2=~/$\MT/); + my @array1 = split("\t",$line1); + #my $read_size=length($array1[1]); + my @array2 = split("\t",$line2); + my $match1=$array1[3]; + my $match2=$array2[3]; +# my $marker=$LongMarker.$ShortMarker; + my @sizes=(); + #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g) + while($match1=~/\/(\S[^,]*\[\w+\])\S[^,]*:(\d*)[RF]/g) + { + my $name=$1; + my $posa=$2; + #print $name,"\n"; + + if($match2=~/\Q$name\E\S[^,]*:(\d*)[RF]/) + { + #print "match\n"; + my $posb=$1; + push @sizes, abs($posb-$posa)+$read_size; + + } + + } + my %saw; + @saw{@sizes}=(); + my @keya= keys %saw; + #print scalar(@keya),"\n"; + if(scalar(@keya)==1) + { + print output $keya[0],"\n"; + } + +} +#close(fusefile); +close(output); +close(input2); +close(input1); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/get.hist.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/get.hist.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,49 @@ +use Getopt::Long; +use strict; + +my $InputFileName=$ARGV[0]; +my $OutputFileName = $ARGV[0].".hist"; +#$OutputFileName=$ARGV[1] if $ARGV[1] ne ""; +my $width=0.01; +my $verbose=1; +my $col=2; +my $start=0; +my $end=1.000; + + +GetOptions ( + 'w:f'=>\$width, + 'c:i'=>\$col, + 'start:f'=>\$start, + 'end:f'=>\$end, + 'v'=>\$verbose +); + +$width=$width*1; +#print "IRM: #Generate hist with delta width of $width \n"; +#print "IRM: #data source from col $col\n"; + +$col=$col-1; + +my @hist; +my $totalnum=0; + +open(Input, $InputFileName); +while(my $line=<Input>) +{ + next if($line=~/^#/); + chomp($line); + my @array=split(/\s/,$line); + $hist[int($array[$col]/$width)]++; + $totalnum++; + +} +close(Input); + +open(OutputFile, ">$OutputFileName"); +print OutputFile "#Width:$width\n"; +for(my $i=0;$i<@hist;$i++) +{ + print OutputFile $hist[$i]/$totalnum,"\n"; +} +close(OutputFile); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/get_bed_fa_j.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/get_bed_fa_j.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,330 @@\n+# Adapted from Chenghai Xue\'s script\n+\n+$starttime=time();\n+\n+$input_file_1 = $ARGV[0];\t# exon junction file\n+$input_file_2 = $ARGV[1];\t# genome file list\n+$output_file_1 = $ARGV[2];\t# exon junction bed (might be less than input_file_1\n+$output_file_2 = $ARGV[3];\t# exon junction fa\n+#$leftLen = $ARGV[4];\n+#$rightLen = $ARGV[5];\n+\r\n+open(IN_1, "$input_file_1") or die "can\'t open the input file : $!";\n+open(IN_2, "$input_file_2") or die "can\'t open the input file : $!";\n+open OUT_1, ">$output_file_1" or die "Can not open output_file : $!";\r\n+open OUT_2, ">$output_file_2" or die "Can not open output_file : $!";\r\n+\n+@chromList = (<IN_2>);\n+chomp(@chromList);\n+$len_chromList = @chromList;\n+print "BED2FA: in $input_file_2, found $len_chromList chromosomes\\n";\n+foreach $one (@chromList){\n+\tif($one =~ /\\/(chr.[^\\/]*?)\\.*fa$/i){\n+\t\t$chr_hash{$1} = $one;\n+\t\t#print $1,"\\n";\t\n+\t}\n+}\n+@key_chr_hash = keys(%chr_hash);\r\n+$len_key_chr_hash = @key_chr_hash;\r\n+@sort_key_chr_hash = sort_chromNo(@key_chr_hash);\n+$len_sort_key_chr_hash = @sort_key_chr_hash;\n+#for($i=0; $i<$len_sort_key_chr_hash; $i++){\r\n+#\tprint "$sort_key_chr_hash[$i]\t$chr_hash{$sort_key_chr_hash[$i]}\\n";\r\n+#}\n+\n+$num_1=0;\n+$num_2=0;\n+$num_count_chrom=0;\n+my ($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts);\r\n+$current_chrom = "";\n+while(<IN_1>){\n+\t$num_1++;\r\n+\t$line = $_;\n+\tchomp $line;\n+\t#print $line,"\\n";\n+\t@cols = split ("\\t", $line);\n+ if(scalar(@cols)==12)\n+\t{\n+\t($chrom, $chromStart, $chromEnd, $name, $score, $strand, $thickStart, $thickEnd, $itemRgb, $blockCount, $blockSizes, $blockStarts) = @cols;\n+\t}\n+\tif(scalar(@cols)!=12)\n+\t{\n+\t\t($chrom, $chromStart, $chromEnd, $name, $score, $strand)=@cols;\n+\t\t$thickStart=$chromStart;\n+\t#\tprint $thickStart,"\\n";\n+\t\t$thickEnd = $chromEnd;\n+\t\t$blockCount=1;\n+\t\t$blockSizes=$chromEnd-$chromStart;\n+\t\t$blockStarts = 0;\t\n+\t}\n+\t$strand="+" if !$strand;\n+\t@a_blockSizes = split (/\\,/, $blockSizes);\n+\t@a_blockStarts = split (/\\,/, $blockStarts);\n+\tif($chrom ne $current_chrom){\n+\t\tif($num_1 != 1){\n+\t\t\tprint "$num_chr_1\t$num_chr_2\t$len_contigSeqStr\\n";\t\t\t\n+\t\t}\n+\t\tprint "BED2FA: $chrom:\t";\n+\t\t\n+\t\t$num_chr_1=0;\n+\t\t$num_chr_2=0;\t\t\n+\n+\t\tif(exists $chr_hash{$chrom}){\n+\t\t\t$num_count_chrom++;\n+\t\t\t$current_chrom = $chrom;\n+\t\t\t#print $current_chrom,"\\n";\n+#=pod\t\t\t\n+\t\t\t$chromFastaFile = $chr_hash{$chrom};\n+\t\t\t#print $chromFastaFile,"\\n";\n+\t\t\topen($fin, "<$chromFastaFile") or die "can\'t open the chrom file : $!";\n+\t\t\tlocal ($/) = undef;\n+\t\t\t$contigSeqStr = <$fin>;\n+\t\t\tclose ($fin);\n+\t\t\t#print $contigSeqStr,"mark\\t";\n+\t\t\t$contigSeqStr =~s/^\\>.*?\\n//g;\n+ #print $contigSeqStr,"mark2\\t";\n+\n+\t\t\t$contigSeqStr =~s/\\s|\\n//g;\n+ #print $contigSeqStr,"mark3\\n";\n+\n+\t\t\t$len_contigSeqStr = length $contigSeqStr;\n+#=cut\n+\t\t}\n+\t\telse{\n+\t\t\t$num_chr_1++;\n+\t\t\tnext;\n+\t\t}\n+\t}\n+\t$num_chr_1++;\n+\t\r\n+# modify from here................................\n+\tmy @Starts;\n+\tmy @Ends;\n+\tmy @JuncSeq;\n+\tmy $ssStrTag=1;\n+\tfor($i_wuj=0;$i_wuj<$blockCount;$i_wuj++)\n+\t{\n+\t\t$Starts[$i_wuj] = $chromStart + $a_blockStarts[$i_wuj];\n+\t\t$Ends[$i_wuj] = $Starts[$i_wuj] + $a_blockSizes[$i_wuj];\n+\t\t$JuncSeq[$i_wuj] = uc substr ($contigSeqStr,$Starts[$i_wuj], $a_blockSizes[$i_wuj]);\n+\t\tif($strand eq "-"){\n+\t\t $JuncSeq[$i_wuj] = uc string_reverse_complement(lc $JuncSeq[$i_wuj]);\n+\t\t}\n+\t}\t \n+ # for($i_wuj=0;$i_wuj<$blockCount-1;$i_wuj++)\n+#\t{\n+#\t $ssStr = uc substr ($contigSeqStr, $Ends[$i_wuj], 2) . substr ($contigSeqStr, $Starts[$i_wuj+1] - 2, 2);\n+#\t if($strand eq "-"){\n+ # $ssStr = uc string_reverse_complement(lc $ssStr);\n+ #$ssStr = $rc_ssStr;\n+#\t }\n+#\t\t$ssStrTag = 0 if ($ssStr ne "GTAG");\n+\t\t\n+ # }\n+#\tif($ssStrTag ==1){\n+ if(1){\n+\t\t$num_2++;\n+\t\t$num_chr_2++;\n+\t\tprint OUT_1 "$line\\n";\n+\t\t#print OUT_2 ">$name\\|$chrom\\|$chromStart\\|$chromEnd\\|$strand\\|$ssStr\\|$num_2\\n$junctionSeqStrLeft'..b'ret;\r\n+}\n+\r\n+sub sort_chromNo{\n+\tlocal(@chrom) = @_;\n+\tlocal($len_key_chr_hash, $i, @sort_chr_hash);\n+\tlocal(@digit_random, @words_random, @digit_other_1, @digit_other_2, @words_other_1, @words_other_2, @digit, @words);\n+\tlocal(@sort_digit, @sort_words, @sort_digit_random, @sort_words_random, @sort_digit_other, @sort_words_other);\n+\tlocal($len_digit, $len_words, $len_digit_random, $len_words_random, $len_digit_other, $len_words_other, $term);\n+\t\n+\t$len_key_chr_hash = @chrom;\n+\t# sort via chr number for printing result\r\n+\tfor($i=0; $i<$len_key_chr_hash; $i++){\n+\t\tif($key_chr_hash[$i] =~ /chr(\\d+)\\_random/){\r\n+\t\t\tpush(@digit_random, $1);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)\\_random/){\r\n+\t\t\tpush(@words_random, $1);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\d+)\\_([\\w\\d\\_]+)/){\r\n+\t\t\tpush(@digit_other_1, $1);\n+\t\t\tpush(@digit_other_2, $2);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)\\_([\\w\\d\\_]+)/){\r\n+\t\t\tpush(@words_other_1, $1);\n+\t\t\tpush(@words_other_2, $2);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\d+)/){\r\n+\t\t\tpush(@digit, $1);\r\n+\t\t}\r\n+\t\telsif($key_chr_hash[$i] =~ /chr(\\w+)/){\r\n+\t\t\tpush(@words, $1);\r\n+\t\t}\r\n+\t\telse{\r\n+\t\t\tprint "BED2FA: There is unknown type of chromosomes: $key_chr_hash[$i]\\n";\r\n+\t\t}\r\n+\t}\r\n+\t@sort_digit = sort by_mostly_numeric @digit;\r\n+\t@sort_words = sort by_mostly_string @words;\n+\t@sort_digit_random = sort by_mostly_numeric @digit_random;\r\n+\t@sort_words_random = sort by_mostly_string @words_random;\n+\t@sort_digit_other = sort_2_array_number_string(\\@digit_other_1, \\@digit_other_2);\r\n+\t@sort_words_other = sort_2_array_string_string(\\@words_other_1, \\@words_other_2);\r\n+\t\r\n+\t$len_digit = @sort_digit;\r\n+\tfor($i=0; $i<$len_digit; $i++){\r\n+\t\t$term = "chr".$sort_digit[$i];\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\r\n+\t$len_words = @sort_words;\r\n+\tfor($i=0; $i<$len_words; $i++){\r\n+\t\t$term = "chr".$sort_words[$i];\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\n+\t$len_digit_random = @sort_digit_random;\r\n+\tfor($i=0; $i<$len_digit_random; $i++){\r\n+\t\t$term = "chr".$sort_digit_random[$i]."_random";\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\r\n+\t$len_words_random = @sort_words_random;\r\n+\tfor($i=0; $i<$len_words_random; $i++){\r\n+\t\t$term = "chr".$sort_words_random[$i]."_random";\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\t\r\n+\t$len_digit_other = @sort_digit_other;\r\n+\tfor($i=0; $i<$len_digit_other; $i=$i+2){\r\n+\t\t$term = "chr".$sort_digit_other[$i]."_".$sort_digit_other[$i+1];\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\r\n+\t$len_words_other = @sort_words_other;\r\n+\tfor($i=0; $i<$len_words_other; $i=$i+2){\r\n+\t\t$term = "chr".$sort_words_other[$i]."_".$sort_words_other[$i+1];\r\n+\t\tpush(@sort_chr_hash, $term);\r\n+\t}\t\n+\t\n+\treturn @sort_chr_hash;\n+}\r\n+\n+sub sort_2_array_number_string{\r\n+\tlocal($a, $b) = @_;\r\n+\tlocal($len_a, $len_b, $i, %family, $one, $two);\r\n+\tlocal(@ret);\r\n+\t\r\n+\t$len_a = @$a;\r\n+\t$len_b = @$b;\r\n+\tif($len_a == $len_b){\r\n+\t\tfor($i=0; $i<$len_a; $i++){\r\n+\t\t\t$family{$$a[$i]}{$$b[$i]} = 0;\t\t\t\t\t\t\t\t\t\r\n+\t\t}\r\n+\t\tfor $one (sort by_mostly_numeric keys %family) {\r\n+\t\t\tfor $two (sort by_mostly_string keys %{ $family{$one} }) {\r\n+\t\t\t\t\tpush(@ret, $one);\r\n+\t\t\t\t\tpush(@ret, $two);\r\n+\t\t\t}\t\r\n+\t\t}\r\n+\t}\r\n+\telse{\t\t\r\n+\t\tprint "ERROR: Sort array is not same size\\n";\r\n+\t\tprint "a $len_a, b $len_b\\n";\r\n+\t}\t\r\n+\t\r\n+\treturn @ret;\r\n+}\n+\n+sub sort_2_array_string_string{\r\n+\tlocal($a, $b) = @_;\r\n+\tlocal($len_a, $len_b, $i, %family, $one, $two);\r\n+\tlocal(@ret);\r\n+\t\r\n+\t$len_a = @$a;\r\n+\t$len_b = @$b;\r\n+\tif($len_a == $len_b){\r\n+\t\tfor($i=0; $i<$len_a; $i++){\r\n+\t\t\t$family{$$a[$i]}{$$b[$i]} = 0;\t\t\t\t\t\t\t\t\t\r\n+\t\t}\r\n+\t\tfor $one (sort by_mostly_string keys %family) {\r\n+\t\t\tfor $two (sort by_mostly_string keys %{ $family{$one} }) {\r\n+\t\t\t\t\tpush(@ret, $one);\r\n+\t\t\t\t\tpush(@ret, $two);\r\n+\t\t\t}\t\r\n+\t\t}\r\n+\t}\r\n+\telse{\t\t\r\n+\t\tprint "ERROR: Sort array is not same size\\n";\r\n+\t\tprint "a $len_a, b $len_b\\n";\r\n+\t}\t\r\n+\t\r\n+\treturn @ret;\r\n+}\n+\n+sub by_mostly_numeric{\r\n+#\t( $a <=> $b ) || ( $a cmp $b );\r\n+\t( $a <=> $b );\r\n+}\r\n+\r\n+sub by_mostly_string{\r\n+#\t( $a <=> $b ) || ( $a cmp $b );\r\n+\t( $a cmp $b );\r\n+}\n+\r\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/get_event_dist_fit.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/get_event_dist_fit.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,108 @@ +use Getopt::Long; +use strict; + +use Cwd; +my $PROG = $0; +my $CUR_DIR = Cwd::abs_path(Cwd::cwd()); +my $PROG_ABS_PATH = Cwd::abs_path($PROG); +my $SrcFolder=`dirname $PROG_ABS_PATH`; +chomp($SrcFolder); + +#my $SrcFolder="/data/zhang/wuj/scripts/SpliceTrap.0.8/"; +my $InputFileName=$ARGV[0]; +my $OutputFileName = $ARGV[0].".hist"; +#$OutputFileName=$ARGV[1] if $ARGV[1] ne ""; +my $width=0.001; +my $verbose=1; +my $col=2; + +GetOptions ( + 'w:f'=>\$width, + 'c:i'=>\$col, + 'v'=>\$verbose +); + +$width=$width*1; +my $binnum=1/$width; +$col=$col-1; + +my @CAratios; +my @CSratios; +my @ADratios; +my @AAratios; +my @AIratios; +my @IRratios; + +open(Input, $InputFileName); +while(my $line=<Input>) +{ + next if($line=~/^#/); + chomp($line); + my @array=split(/\s/,$line); + next if($array[$col]<=0.001 or $array[$col]>=0.999); +# push( @CAratios,$array[$col]) if($array[0]=~/^C[AS]/ or $array[0]=~/^ME/); + push( @CAratios,$array[$col]) if($array[0]=~/^CA/ or $array[0]=~/^ME/); + push( @CSratios,$array[$col]) if($array[0]=~/^CS/); + push( @ADratios,$array[$col]) if($array[0]=~/^AD/); + push( @AAratios,$array[$col]) if($array[0]=~/^AA/); + push( @AIratios,$array[$col]) if($array[0]=~/^AI/); + push( @IRratios,$array[$col]) if($array[0]=~/^IR/); + +} + +close(Input); + +open(tmpFile, ">$InputFileName.tmpca"); +for(my $i=0;$i<@CAratios;$i++) +{ + print tmpFile $CAratios[$i],"\n"; +} +close(tmpFile); +open(tmpFile, ">$InputFileName.tmpcs"); +for(my $i=0;$i<@CSratios;$i++) +{ + print tmpFile $CSratios[$i],"\n"; +} +close(tmpFile); + + +open(tmpFile, ">$InputFileName.tmpad"); +for(my $i=0;$i<@ADratios;$i++) +{ + print tmpFile $ADratios[$i],"\n"; +} +close(tmpFile); + +open(tmpFile, ">$InputFileName.tmpaa"); +for(my $i=0;$i<@AAratios;$i++) +{ + print tmpFile $AAratios[$i],"\n"; +} +close(tmpFile); + +open(tmpFile, ">$InputFileName.tmpai"); +for(my $i=0;$i<@AIratios;$i++) +{ + print tmpFile $AIratios[$i],"\n"; +} +close(tmpFile); + +open(tmpFile, ">$InputFileName.tmpir"); +for(my $i=0;$i<@IRratios;$i++) +{ + print tmpFile $IRratios[$i],"\n"; +} +close(tmpFile); + +system("R --slave --args $InputFileName.tmpca <$SrcFolder/beta_fit.R"); +system("R --slave --args $InputFileName.tmpad <$SrcFolder/beta_fit.R"); +system("R --slave --args $InputFileName.tmpaa <$SrcFolder/beta_fit.R"); +#system("R --slave --args $InputFileName.tmpai <$SrcFolder/R/beta_fit.R"); +system("R --slave --args $InputFileName.tmpir <$SrcFolder/beta_fit.R"); +system("R --slave --args $InputFileName.tmpcs <$SrcFolder/beta_fit.R"); + +system("echo '#Width:$width' >$InputFileName.fit.hist"); +#system("paste $InputFileName.tmpca.fit $InputFileName.tmpad.fit $InputFileName.tmpaa.fit $InputFileName.tmpai.fit $InputFileName.tmpir.fit $InputFileName.tmpcs.fit >>$InputFileName.fit.hist"); + +system("paste $InputFileName.tmpca.fit $InputFileName.tmpad.fit $InputFileName.tmpaa.fit $InputFileName.tmpir.fit $InputFileName.tmpcs.fit >>$InputFileName.fit.hist"); +#system("rm $InputFileName.tmpca* $InputFileName.tmpad* $InputFileName.tmpaa* $InputFileName.tmpai* $InputFileName.tmpir* "); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/gtf2bed.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/gtf2bed.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,81 @@ +# rewrite on Sep 7th,2022 + +#part of package SpliceTrap + +#Jie Wu +use strict; + +my $inputfilename = $ARGV[0]; + +# input file is a gtf file, +# "transcript_id" is required for each line and should not be ambiguous. +# only the "exon" lines are used + +my %chr_hash; +my %strand_hash; +my %tx_exons; #tx_exons{$tx_id){$start} = $size; + +my $linenum = 0; + +open(input, $inputfilename); + +while(my $line=<input>) +{ + $linenum++; + my @a = split("\t",$line); + if ($a[2] eq "exon") + { + my $txid; + if($a[8]=~/transcript_id "(\S*?)"/) + { + $txid = $1; + } + else + { + die ("$inputfilename format error! No transcript_id in line $linenum \n"); + } + + if( exists $chr_hash{$txid} and $chr_hash{$txid} ne $a[0]) + { + warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped \n"); + next; + } + if( exists $strand_hash{$txid} and $strand_hash{$txid} ne $a[6]) + { + warn ("$inputfilename: ambiguous transcript_id in line $linenum: $txid Skipped\n"); + } + $chr_hash{$txid} = $a[0]; + $strand_hash{$txid} = $a[6]; + $tx_exons{$txid}{$a[3]} = $a[4] - $a[3] +1; + + } + +} + +foreach my $txid (keys %chr_hash) +{ + my @starts; + my @sizes; + foreach my $start (sort {$a<=>$b} (keys %{$tx_exons{$txid}} ) ) + { + push (@starts, $start); + push (@sizes, $tx_exons{$txid}{$start}); + } + my $exon_num = scalar(@sizes); + my $starts_str = ""; + for(my $i = 0; $i < $exon_num; $i++) + { + $starts_str = $starts_str.($starts[$i] - $starts[0]).","; + if($i>0) + { + warn "$txid, intron size..".($starts[$i]-$starts[$i-1])."\n" if ($starts[$i]-$starts[$i-1]>1000000); + } + } + my $sizes_str = join(",",@sizes); + my $end = $starts[$exon_num-1] + $sizes[$exon_num-1] -1; + print join("\t",$chr_hash{$txid}, $starts[0]-1, $end, $txid,"0",$strand_hash{$txid},$starts[0]-1, $end, "255,0,0",$exon_num,$sizes_str, $starts_str); + print "\n"; +} + + +close(input); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/mapping_bowtie.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/mapping_bowtie.sh Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,89 @@ +#!/bin/bash +#SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8' +InputFileName=$1 +faorfq=$2 +DatabasePrefix=$3 +Outputfolder=$4 +SrcFolder=$5 +Threads=$6 +DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/btw/TXdb' +TmpFolderName=`basename $1` +#fasta or fastq + +cd $Outputfolder; +#prepare the folder +if [ -d $TmpFolderName.result ];then + echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !" + echo "MAPPING: !!!change the name of that folder first in case I erase them..." + exit +fi +echo "MAPPING: Start mapping $InputFileName...Creating cache folder $TmpFolderName.result" +mkdir $TmpFolderName".result" +mkdir $TmpFolderName".result"/cache + +cd $TmpFolderName".result" +cd cache +echo "MAPPING: Split to pieces ..." +split -l 1000000 $InputFileName +for name in x* +do + + if [ $faorfq == "fasta" ];then + add="-f" + fi +# if [ $name != $InputFileName ];then + echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh + echo $name >>checklist +# fi +done + +echo "MAPPING: submit scripts..." +perl $SrcFolder/batchqsub.pl map.sh + +tasknum=`wc -l map.sh |tr -d "\n"` +#checking.. + +echo "MAPPING: mapping $InputFileName to TXdb done...start to check.." +while [ 1 ] +do + if [ -f mapcheck.sh ];then + rm mapcheck.sh + fi + while read checklist + do + + name=`echo $checklist |tr -d "\n"` + echo "MAPPING: checking $name...." + readnum=`wc -l $name | cut -f1 -d" "` + if [ $faorfq == "fasta" ];then + readnum=`echo "$readnum/2"|bc` + else + readnum=`echo "$readnum/4"|bc` + fi + if [ -f $name.nomt ];then + bowtienum=`wc -l $name.nomt | cut -f1 -d" "` + else + bowtienum=0 + fi + if [ $bowtienum != $readnum ];then + echo "bowtie -p $Threads -a -v 2 $DatabaseFolder $name $add >$name.btw; perl $SrcFolder/bowtie2eland.pl $name.btw $name $name.eland;rm $name.btw ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh + + fi + done <checklist + if [ -f mapcheck.sh ];then + checktasknum=`wc -l mapcheck.sh |tr -d "\n"` + if [ $checktasknum == $tasknum ];then + echo "MAPPING: warning! none of the mapping tasks properly finished!" + fi + echo "MAPPING: resubmit TASKS...." + perl $SrcFolder/batchqsub.pl mapcheck.sh + else + break + fi +done +echo "MAPPING: Done.....merging files..." +cat *.nomt >$Outputfolder/$TmpFolderName.nomt +cd ../../ +rm $TmpFolderName.result -rf +#/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a $DatabaseFolderTXdb -f + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/mapping_rmap.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/mapping_rmap.sh Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,80 @@ +#/data/zhang/wuj/tools/bowtie-0.12.3/bowtie -a --best /data/zhang/wuj/database/hg18/AS/TXdb.2/btw/TXdb -f -t s_1_sequence.txtparta >s_1.map & +#SrcFolder='/data/zhang/wuj/scripts/SpliceTrap.0.8' +InputFileName=$1 +faorfq=$2 +DatabasePrefix=$3 +Outputfolder=$4 +SrcFolder=$5 +DatabaseFolder=$SrcFolder'/../db/'$DatabasePrefix'/TXdb.fasta' +TmpFolderName=`basename $1` + +cd $Outputfolder; +#prepare the folder +if [ -d $TmpFolderName.result ];then + echo "MAPPING: !!!Error, there is already a folder named "$TmpFolderName".result !" + echo "MAPPING: !!!change the name of that folder first in case I erase them..." + exit +fi +echo "MAPPING: Start to map $InputFileName....Creating cache folder $TmpFolderName.result" +mkdir $TmpFolderName".result" +cd $TmpFolderName".result" +mkdir cache stat +cd cache +echo "MAPPING: Split file..." +split -l 1000000 $InputFileName +echo "MAPPING: generating shell scirpts...." +for name in x* +do + + echo "rmap -M 100 -m 2 -c $DatabaseFolder -o $name.rmap $name; perl $SrcFolder/rmap2eland.pl $name.rmap $name $name.eland;rm $name.rmap ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>map.sh + echo $name >>checklist +done +tasknum=`wc -l map.sh |tr -d "\n"` +perl $SrcFolder/batchqsub.pl map.sh + +echo "MAPPING: map $InputFileName to TXdb done...start to check.." +while [ 1 ] +do + if [ -f mapcheck.sh ];then + rm mapcheck.sh + fi + while read checklist + do + + name=`echo $checklist |tr -d "\n"` + echo "MAPPING: checking $name...." + readnum=`wc -l $name | cut -f1 -d" "` + if [ $faorfq == "fasta" ];then + readnum=`echo "$readnum/2"|bc` + else + readnum=`echo "$readnum/4"|bc` + fi + if [ -f $name.nomt ];then + rmapnum=`wc -l $name.nomt | cut -f1 -d" "` + else + rmapnum=0 + fi + if [ $rmapnum != $readnum ];then + echo "rmap -M 100 -m 2 -c $DatabaseFolder -o $name.rmap $name; perl $SrcFolder/rmap2eland.pl $name.rmap $name $name.eland;rm $name.rmap ;perl $SrcFolder/mark.mt.4eland.pl $name.eland >$name.nomt;rm $name.eland">>mapcheck.sh + + fi + done <checklist + if [ -f mapcheck.sh ];then + checktasknum=`wc -l mapcheck.sh |tr -d "\n"` + if [ $checktasknum == $tasknum ];then + echo "MAPPING: warning! none of the mapping tasks properly finished!" + fi + echo "MAPPING: resubmiting TASKS...." + perl $SrcFolder/batchqsub.pl mapcheck.sh + else + break + fi +done +echo "MAPPING: Mapping is really done....merging files..." + + +cat *.nomt >$Outputfolder/$TmpFolderName.nomt +cd ../../ + +rm $TmpFolderName.result -rf + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/mark.mt.4eland.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/mark.mt.4eland.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,56 @@ +# this file is to convert mult mapped reads to nm reads by simply marked it as NM reads. +# its for the convience of inclusion ratio computation, if one read can be mapped to mult positions in the genome, then it will be marked as NM +# later, can be used to add information for dealing with this mult reads, for example, the coverage in the region + +use strict; +my $inputfilename=$ARGV[0]; +my $LongMarker="L"; +my $ShortMarker="S"; + + +open(input, $inputfilename); +while(my $line=<input>) +{ + #print "new line\n"; + chomp($line); + my @array = split("\t",$line); + my $match=$array[3]; + if( $array[2] eq "NM" or $match eq "") + { + print $line,"\n"; + next; + } + + my $marker=$LongMarker.$ShortMarker; + my @genome_pos; + #while($match1=~/\/(\S[^,]*\[[$marker]\])\S[^,]*:(\d*)[RF]/g) + #this array is used to store the mapped position for this read + my @chr; + my @start; + my @end; + while($match=~/(chr\S[^\|]*)\|(\d*)\|(\d*)\|/g) + { + push @chr, $1; + push @start, $2; + push @end, $3; + } + @chr=sort(@chr); + if (scalar(@chr)<=1) + { + print $line,"\n"; + next; + } + + @start=sort(@start); + @end=sort(@end); + if($chr[0] ne $chr[scalar(@chr)-1] or $start[scalar(@chr)-1]-$start[0]>100000) + { + print $line, "\tMT\n"; + } + else + { + print $line,"\n"; + + } +} +close(input); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/rmap2eland.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/rmap2eland.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,103 @@ +use strict; + +my $rmapfilename=$ARGV[0]; +my $readsfilename=$ARGV[1]; +my $elandfilename=$ARGV[2]; + +my $detectformat=`head -c 1 $readsfilename`; + +#system("grep \"$detectformat\" $readsfilename |sort >$readsfilename.sort"); +system("awk 'NR%2==1' $readsfilename |sort >$readsfilename.sort"); +system("sort -k4,4 $rmapfilename >$rmapfilename.sort"); + + +open(readsfile, $readsfilename.".sort"); + + + +#$looplinenumbers=2 if ($detectformat eq ">"); +open(rmapfile, $rmapfilename.".sort"); +open(elandfile, ">".$elandfilename); + +while(my $rmapline=<rmapfile>) +{ + chomp($rmapline); + my ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline); + while(my $readline=<readsfile>) + { + if($readline=~/^$detectformat/) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + + + if($readname ne $rmapreadname) + { + print elandfile $readname,"\tNA\tNM\n"; + next; + } + else + { + my @mapped_ids=(); + my @mapped_pos=(); + my @mapped_strand=(); + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$start); + push(@mapped_strand,$strand); + while(1) + { + $rmapline=<rmapfile>; + chomp($rmapline); + ($mapped_id, $start, $end, $rmapreadname, $mismatch, $strand)=split("\t",$rmapline); + if( $rmapreadname eq $readname ) + { + push(@mapped_ids, $mapped_id); + push(@mapped_pos,$start); + push(@mapped_strand,$strand); + } + else + { + seek(rmapfile, -1*length($rmapline)-1,1); + print elandfile $readname,"\t"; + print elandfile "NA\t"; + print elandfile scalar(@mapped_ids),":0:0\t"; + for(my $i=0;$i<@mapped_ids;$i++) + { + print elandfile "/",$mapped_ids[$i]; + print elandfile ":",$mapped_pos[$i]+1; + if($mapped_strand[$i] eq "+") + { + print elandfile "F0,"; + } + else + { + print elandfile "R0,"; + } + + } + print elandfile "\n"; + last; + } + } + last; + + } + } + } +} + +while(my $readline=<readsfile>) +{ + if($readline=~/^$detectformat/) + { + chomp($readline); + my $readname=substr($readline, 1, length($readline)-1); + print elandfile $readname,"\tNA\tNM\n"; + } +} + +close(elandfile); +close(rmapfile); + + +close(readsfile); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/scan_nomt.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/scan_nomt.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,98 @@ +use strict; +use FileHandle; + + +my $File1Name=$ARGV[0]; +my $File2Name=$ARGV[1]; + +my %FileHandle1; +my %FileHandle2; +my %chrlist; + +open(File1,$File1Name); +open(File2,$File2Name); + +while(my $line1=<File1>) +{ + my $line2=<File2>; + chomp($line1); + chomp($line2); + my $read1status=substr($line1,length($line1)-2,2); + my $read2status=substr($line2,length($line2)-2,2); + #next if( ($read1status eq "NM" or $read1status eq "MT") and ($read2status eq "NM" or $read2status eq "MT"); + my @array1=split("\t",$line1); + my @array2=split("\t",$line2); + my $chr1=""; + my $chr2=""; + if(scalar(@array1) eq 4) + { + if($array1[3]=~/(chr\S*?)\|/) + {$chr1=$1;} + } + if(scalar(@array2) eq 4) + { + if($array2[3]=~/(chr\S*?)\|/) + {$chr2=$1;} + } + my $chr=$chr1; + if ($chr eq "") + { + next if($chr2 eq ""); + $chr=$chr2; + } + else + { + next if($chr2 ne "" and $chr2 ne $chr); + + } + next if $chr eq ""; + if(exists $chrlist{$chr}) + { + my $fout1= $FileHandle1{$chr}; + my $fout2= $FileHandle2{$chr}; + if($read1status eq "MT") + { + print $fout1 $array1[0],"\t",$array1[1],"\tMT\n"; + } + else + { + print $fout1 $line1,"\n"; + } + if($read2status eq "MT") + { + print $fout2 $array2[0],"\t",$array2[1],"\tMT\n"; + } + else + { + print $fout2 $line2,"\n"; + } + + + + } + else + { + $chrlist{$chr}=1; + my $fout1= new FileHandle; + open($fout1, ">".$File1Name.".".$chr); + $FileHandle1{$chr}=$fout1; + + my $fout2= new FileHandle; + open($fout2, ">".$File2Name.".".$chr); + $FileHandle2{$chr}=$fout2; + + } + +} + +foreach my $fout1 (keys %FileHandle1) +{ + close($fout1); +} +foreach my $fout2 (keys %FileHandle2) +{ + close($fout2); +} + +close(File1); +close(File2); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/scanbed2txdb.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/scanbed2txdb.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,490 @@\n+#argv0: input transcript bed file\n+#argv1: output filename, will be in AS format\n+\n+use strict;\n+my $AnnoFileName = $ARGV[0];\n+my $outputFileName = $ARGV[1];\n+\n+if($AnnoFileName eq "" or $outputFileName eq "")\n+{\n+\tprint "TXDBGEN: Please specify your input files\\n";\n+\texit;\n+}\n+\n+my $cachefolder = `basename $outputFileName`;\n+chomp($cachefolder);\n+my $cachefolder = $cachefolder.".cache";\n+my $AnnoFileBase = `basename $AnnoFileName`; \n+chomp($AnnoFileBase);\n+\n+if(! -e $cachefolder)\n+{\n+\tmkdir $cachefolder or die "CHECK: cannot mkdir $cachefolder\\n";\n+\tprint "TXDBGEN: mkdir $cachefolder\\n";\n+}\n+\n+my $CacheAnnoFileName = $cachefolder."/".$AnnoFileBase.".sort";\n+#sort the annotation file\n+\n+print "TXDBGEN: sort $AnnoFileName \\n";\n+\n+system("sort -k6,6 -k1,1 -k2,2n -k3,3n $AnnoFileName >$CacheAnnoFileName");\n+\n+$AnnoFileName = $CacheAnnoFileName;\n+\n+#read the annotations into hashes\n+\n+open(AnnoFile, $CacheAnnoFileName) or die "can not open",$CacheAnnoFileName;\n+#split the genes into contigs\n+\n+my $contigid = 0;\n+my $end_tmp = 0;\n+my $chr_tmp = "chr";\n+my $strand_tmp="NA";\n+\n+my $TXnumtmp=0;\n+\n+my %eventlist =();\n+#my %eventlist_af = ();\n+#my %eventlist_al = ();\n+my %evidences = ();\n+\n+open(my $output,">$outputFileName");\n+\n+open(my $output2, ">$outputFileName.evi");\n+while(my $line =<AnnoFile>)\n+{\n+\tchomp($line);\n+\tmy @a = split("\\t",$line);\n+\tmy $chr = $a[0];\n+\tmy $start = $a[1];\n+\tmy $end = $a[2];\n+\tmy $name = $a[3];\n+\tmy @sizes = split(",",$a[10]);\n+\tmy $strand = $a[5];\n+\tmy @start_shifts = split(",",$a[11]);\n+\t#my $chrstr = $chr.$strand;\n+\tmy @starts;\n+\tmy @ends;\n+\tfor(my $i=0;$i<@start_shifts;$i++)\n+ {\n+ $starts[$i]=$start_shifts[$i]+$start;\n+ $ends[$i] = $starts[$i]+$sizes[$i];\n+ }\n+\tif($start >$end_tmp or $chr ne $chr_tmp or $strand_tmp ne $strand)\n+ {\n+ $contigid++;\n+ #$ctgmultisonum++ if $TXnumtmp>1;\n+ $TXnumtmp=0;\n+\t\tmy $annos = scanevents(\\%eventlist, "inner") ;\n+\t\t#my $annos_af = scanevents(\\%eventlist_af, "af");\n+\t\t#my $annos_al = scanevents(\\%eventlist_al, "al");\n+\t\t\n+\t\tmy $stdout = select ($output);\n+\t\tmy $eventids = printanno($annos,$chr_tmp,$strand_tmp) if(scalar(%$annos)>0);\n+\t\t#printanno($annos_af,$chr_tmp,$strand_tmp) if(scalar(%$annos_af)>0);\n+\t\t#printanno($annos_al,$chr_tmp,$strand_tmp) if(scalar(%$annos_al)>0);\n+\t\tselect($stdout);\n+\t\t$stdout = select ($output2);\n+\t\t#print cross information\n+\t\tforeach my $connect_str (keys %$eventids) \n+\t\t{\n+\t\t\tprint $eventids->{$connect_str},"\\t";\n+\t\t\tforeach my $transcriptid (keys %{$evidences{$connect_str}}) \n+\t\t\t{\n+\t\t\t\tprint $transcriptid,",";\n+\t\t\t}\n+\t\t\tprint "\\n";\n+\t\t}\n+\t\tselect($stdout);\n+\t\tprint "TXDBGEN: Contig ID $contigid at $chr $strand...\\n" if ( $contigid%1000 == 0);\n+\t\t%eventlist = ();\n+\t\t#%eventlist_al = ();\n+\t\t#%eventlist_af = ();\n+ #print CacheContigFile "#ctg$contigid\\n";\n+\n+ }\n+\t$TXnumtmp++;\n+ $end_tmp = $end if ($end > $end_tmp or $chr ne $chr_tmp or $strand_tmp ne $strand);\n+ $chr_tmp = $chr;\n+\t\n+\t$strand_tmp = $strand;\n+\t\n+\t#scan connections, 2 and 3 exons for CA/CS/AF\n+#\tif(scalar(@starts)>2)\n+#\t{\n+#\t\tmy $connectionstr_af =$starts[0]."-".\n+#\t\t\t\t$ends[0].",".\n+#\t\t\t\t$starts[1]."-".\n+#\t\t\t\t$ends[1]."," ;\n+#\t\n+#\t\t$eventlist_af{$connectionstr_af} = $starts[0];\n+#\t\tmy $connectionstr_al = $starts[scalar(@starts)-2]."-".\n+#\t\t\t\t$ends[scalar(@starts)-2].",".\n+#\t\t\t\t$starts[scalar(@starts)-1]."-".\n+#\t\t\t\t$ends[scalar(@starts)-1]."," ;\n+#\t\t$eventlist_al{$connectionstr_al} = $starts[scalar(@starts)-2];\n+#\t}\n+\t\t#didn\'t consider direction yet\n+ #add 1 exon for IR\n+\tfor(my $n=1;$n<4;$n++)\n+ {\n+\t\tfor(my $i=0;$i<scalar(@starts)-$n+1;$i++)\n+ {\n+\t\t\tmy $connectionstr = "";\n+\t\t\tfor(my $j=$i;$j<$i+$n;$j++)\n+ {\n+ $connectionstr = $connectionstr .\n+ $starts[$j]."-".\n+ $en'..b'\t\tprint "0,";\n+\t\t\tprint $a[2]-$a[0],",";\n+\t\t\tprint $a[4]-$a[0],"\\n";\n+\t\t\t\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n+ print $id.".".$num,"[S]\\t";\n+\t\t\tmy $connectstr_ca= $a[0]."-".$a[1].",".$a[4]."-".$a[5].",";\n+\t\t\t$eventids{$connectstr_ca} = $id.".".$num."[S]";\n+ print "0\\t";\n+ print $strand,"\\t";\n+ print $a[0],"\\t",$a[5],"\\t";\n+ print "255,0,0\\t";\n+ print "2\\t"; \n+ print $a[1]-$a[0],",";\n+ #print $a[3]-$a[2],",";\n+ print $a[5]-$a[4],"\\t";\n+ print "0,";\n+ #print $a[2]-$a[0],",";\n+ print $a[4]-$a[0],"\\n";\n+\t\t\t\n+\t\t}\n+\t\tif($annos->{$key} eq "ir")\n+\t\t{\n+\t\t\tmy @a=split(/[,-]/,$key);\n+\n+\t\t\tmy $chrid=substr($chr,3,length($chr)-3);\n+\t\t\tmy $id="IR-IR-$chrid"."-".$a[1]."-".$a[2];\n+\t\t\tmy $num=0;\n+\t\t\t$num = $nums_per_isoform{$id} if exists $nums_per_isoform{$id};\n+\t\t\t$nums_per_isoform{$id}++;\n+\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[3],"\\t";\n+\t\t\tprint $id.".".$num,"[L]\\t";\n+\t\t\tprint "0\\t";\n+\t\t\tprint $strand,"\\t";\n+\t\t\tprint $a[0],"\\t",$a[3],"\\t";\n+\t\t\t$eventids{$a[0]."-".$a[3].","} = $id.".".$num."[L]";\n+\t\t\tprint "255,0,0\\t";\n+\t\t\tprint "3\\t";\n+\t\t\tprint $a[1]-$a[0],",";\n+\t\t\tprint $a[2]-$a[1],",";\n+\t\t\tprint $a[3]-$a[2],"\\t";\n+\t\t\tprint "0,";\n+\t\t\tprint $a[1]-$a[0],",";\n+\t\t\tprint $a[2]-$a[0],"\\n";\n+\t\t\t\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[3],"\\t";\n+ print $id.".".$num,"[S]\\t";\n+\t\t\t$eventids{$key} = $id.".".$num."[S]";\n+ print "0\\t";\n+ print $strand,"\\t";\n+ print $a[0],"\\t",$a[3],"\\t";\n+ print "255,0,0\\t";\n+ print "2\\t"; \n+ print $a[1]-$a[0],",";\n+ #print $a[3]-$a[2],",";\n+ print $a[3]-$a[2],"\\t";\n+ print "0,";\n+ #print $a[2]-$a[0],",";\n+ print $a[2]-$a[0],"\\n";\n+\t\t\n+\t\t}\n+\t\tif($annos->{$key} eq "ss")\n+\t\t{\n+\t\t\tmy @a=split(/[,-]/,$key);\n+\t\t\tmy $chrid=substr($chr,3,length($chr)-3);\n+\t\t\tmy $type="AA";\n+\t\t\tif( ($strand eq "+" && $a[1]==$a[2]) or ($strand eq "-" && $a[3]==$a[4]))\n+\t\t\t{\n+\t\t\t\t$type="AD";\n+\t\t\t}\n+\t\t\tmy $id="$type-$type-$chrid"."-".$a[2]."-".$a[3];\n+\t\t\tmy $connect_str_L = "";\n+\t\t\tmy $connect_str_S = "";\n+\t\t\tif($a[1]==$a[2])\n+\t\t\t{\n+\t\t\t\t$connect_str_L = $a[0]."-".$a[3].",".$a[4]."-".$a[5].",";\n+\t\t\t\t$connect_str_S = $a[0]."-".$a[1].",".$a[4]."-".$a[5].",";\n+\t\t\t}\n+\t\t\telse\n+\t\t\t{\n+\t\t\t\t$connect_str_L = $a[0]."-".$a[1].",".$a[2]."-".$a[5].",";\n+\t\t\t\t$connect_str_S = $a[0]."-".$a[1].",".$a[3]."-".$a[5].",";\n+\t\t\t\t\n+\t\t\t}\n+\t\t\tmy $num=0;\n+\t\t\t$num = $nums_per_isoform{$id} if exists $nums_per_isoform{$id};\n+\t\t\t$nums_per_isoform{$id}++;\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n+\t\t\tprint $id.".".$num,"[L]\\t";\n+\t\t\t$eventids {$connect_str_L} = $id.".".$num."[L]";\n+\t\t\tprint "0\\t";\n+\t\t\tprint $strand,"\\t";\n+\t\t\tprint $a[0],"\\t",$a[5],"\\t";\n+\t\t\tprint "255,0,0\\t";\n+\t\t\tprint "3\\t";\n+\t\t\tprint $a[1]-$a[0],",";\n+\t\t\tprint $a[3]-$a[2],",";\n+\t\t\tprint $a[5]-$a[4],"\\t";\n+\t\t\tprint "0,";\n+\t\t\tprint $a[2]-$a[0],",";\n+\t\t\tprint $a[4]-$a[0],"\\n";\n+\t\t\t\n+\t\t\tprint $chr,"\\t",$a[0],"\\t",$a[5],"\\t";\n+ print $id.".".$num,"[S]\\t";\n+\t\t\t$eventids {$connect_str_S} = $id.".".$num."[S]";\n+ print "0\\t";\n+ print $strand,"\\t";\n+ print $a[0],"\\t",$a[5],"\\t";\n+ print "255,0,0\\t";\n+ print "2\\t"; \n+ print $a[1]-$a[0],",";\n+ #print $a[3]-$a[2],",";\n+ print $a[5]-$a[4],"\\t";\n+ print "0,";\n+ #print $a[2]-$a[0],",";\n+ print $a[4]-$a[0],"\\n";\n+\t\n+\t\n+\t\t}\n+\n+\t}\n+\treturn(\\%eventids);\n+\t\n+}\n+\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/splitdb.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/splitdb.sh Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,11 @@ +workingfolder=$1; +cd $workingfolder +cut -f1 ../TXdb.bed |uniq >chr.list; +while read line ; +do + chr=`echo $line |tr -d "\n"`; + grep -w $chr ../TXdb.bed >$chr.bed; + split -3000 $chr.bed $chr.; + echo $chr....; + rm $chr.bed; +done <chr.list; |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/bin/vslz.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/bin/vslz.pl Thu Oct 12 16:26:36 2017 -0400 |
[ |
@@ -0,0 +1,46 @@ +use strict; +exit; + +my %config=do "$ENV{HOME}/.SpliceTrap.pl.ini"; +my $SrcFolder=$config{SrcFolder}; + +my $BedFileName=$SrcFolder."/db/TXdb.1101.bed"; +my $RatioFileName=$ARGV[0]; +my $BedFileOutName=$ARGV[1]; +my %Ratios; + +open(RatioFile, $RatioFileName); + +while(my $RatioLine=<RatioFile>) +{ + chomp($RatioLine); + my @array=split("\t",$RatioLine); + $Ratios{$array[0]}=$array[2] if $array[14] eq "passed"; +} + +close(RatioFile); + +open(BedFile, $BedFileName); +open(BedFileOut,">".$BedFileOutName.".bed"); +print BedFileOut "track name=$BedFileOutName discription=$BedFileOutName useScore=1\n"; +while(my $BedLine=<BedFile>) +{ + my @array=split("\t",$BedLine); + $array[3]=~/^(\S*)\[([LS])\]/; + my $id=$1; + my $LS=$2; + #print $LS,"\n"; + next if not exists $Ratios{$id}; + $array[4]=$Ratios{$id}; + $array[4]=1-$Ratios{$id} if( $LS eq 'S'); + + $array[4]=sprintf("%.0f",$array[4]*1000); + for (my $i=0;$i<@array; $i++) + { + print BedFileOut $array[$i]; + print BedFileOut "\t" if $i<scalar(@array)-1; + } + #print BedFileOut "\n"; +} +close(BedFileOut); +close(BedFile); |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/cutoffs/cutoff.pair.06.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/cutoffs/cutoff.pair.06.txt Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,111 @@ +9 2.7 0.60603248295953 0.171888000000001 +18 2 0.60524889781465 0.183333 +27 2 0.614125387586727 0.176226000000001 +36 1.9 0.607174152514097 0.174107 +45 1.8 0.600015884070526 0.175151 +54 1.5 0.606211216622966 0.181161 +63 1.5 0.630096537351149 0.17829 +72 1.4 0.600041179609881 0.181678 +81 1.4 0.604419014379505 0.180618 +90 1.4 0.605503082593424 0.179083 +99 1.3 0.615709740786329 0.177374 +108 1.3 0.60153811996046 0.178839 +117 1.3 0.624394647716218 0.172525 +126 1.3 0.607071122577313 0.179287 +135 1.2 0.604935986506864 0.180048 +144 1.2 0.606377388150568 0.179824 +153 1.1 0.609872307866415 0.172894 +162 1.1 0.605221663116349 0.176559 +171 1.1 0.60084962792178 0.177017 +180 1.1 0.606042806460085 0.179842 +189 1.1 0.639661884576477 0.165058000000001 +198 1 0.63233306074533 0.172839 +207 1 0.620992090280475 0.17554 +216 1 0.617967295839559 0.177413000000001 +225 1 0.643160025381914 0.165310000000001 +234 1 0.664093921751576 0.161004 +243 1 0.617444847253655 0.179608 +252 0.9 0.600141496641319 0.18301 +261 0.9 0.61458063483465 0.175533 +270 0.9 0.631646650541943 0.175805 +279 0.9 0.633124568165035 0.169468 +288 0.9 0.611511089439885 0.172566 +297 0.9 0.620072975883583 0.176992 +306 0.8 0.632771460485054 0.177489 +315 0.8 0.616480687227451 0.177778 +324 0.8 0.627081395066885 0.173799 +333 0.8 0.637882935696159 0.166883 +342 0.8 0.601440520784007 0.181724 +351 0.8 0.6308436890493 0.169283000000001 +360 0.8 0.639014375873075 0.164053 +369 0.8 0.608995842436592 0.182319 +378 0.7 0.61599095431047 0.179065 +387 0.7 0.632637490417973 0.173393000000001 +396 0.7 0.611914615604433 0.174309000000001 +405 0.7 0.636212590337212 0.168285 +414 0.7 0.601768900444485 0.17904 +423 0.7 0.652306181407458 0.165425 +432 0.7 0.61284769661545 0.17678 +441 0.7 0.620417630501693 0.179685000000001 +450 0.7 0.630731905615349 0.172766000000001 +459 0.7 0.610688367962893 0.174217 +468 0.7 0.614116817389574 0.169074 +477 0.7 0.627626017337352 0.170694000000001 +486 0.7 0.625196910435048 0.173867 +495 0.7 0.624173913851294 0.170272 +504 0.7 0.600827986205662 0.176072 +513 0.7 0.60691053900984 0.173314 +522 0.7 0.637574555072294 0.164769 +531 0.7 0.623224687213309 0.173131 +540 0.7 0.606854298464756 0.17057 +549 0.7 0.602771105882753 0.176564 +558 0.7 0.635304573393458 0.165681 +567 0.7 0.614272507442932 0.178017 +576 0.7 0.647638749954493 0.161989 +585 0.7 0.635927720935647 0.166479000000001 +594 0.7 0.654872976893549 0.165962 +603 0.6 0.623281215201779 0.169602 +612 0.6 0.620036715664558 0.174000000000001 +621 0.6 0.6207915344063 0.173405000000001 +630 0.6 0.605666404221127 0.172404000000001 +639 0.6 0.605282217899342 0.171117 +648 0.6 0.605873079749684 0.173455 +657 0.6 0.622135854785987 0.171086 +666 0.6 0.609650459959578 0.180212 +675 0.6 0.615727466486383 0.175395 +684 0.6 0.650390040594351 0.157856 +693 0.6 0.600004956285639 0.17354 +702 0.6 0.619480037234101 0.169091000000001 +711 0.6 0.620537197134915 0.170687 +720 0.6 0.63097256862101 0.168571 +729 0.6 0.616862968480703 0.176372 +738 0.6 0.619853798956006 0.169892 +747 0.6 0.668527865891001 0.157351 +756 0.6 0.609299339958315 0.173282 +765 0.6 0.606997082083795 0.176418 +774 0.6 0.60385659737608 0.178646 +783 0.6 0.618166146904953 0.170539000000001 +792 0.6 0.655214353317497 0.162477 +801 0.6 0.667723279232349 0.161535 +810 0.6 0.641501812796978 0.165034000000001 +819 0.6 0.627258070106693 0.167323 +828 0.6 0.611937213679756 0.173702 +837 0.6 0.637164577326402 0.165999 +846 0.6 0.609589936966381 0.16902 +855 0.6 0.605378844036658 0.177104 +864 0.6 0.603894805290932 0.174897 +873 0.6 0.61196823740213 0.177185000000001 +882 0.6 0.645164364920278 0.169894 +891 0.6 0.616946196689952 0.164449 +900 0.6 0.620119902403488 0.173101 +909 0.6 0.612352323569787 0.175357000000001 +918 0.6 0.604523310198474 0.181906 +927 0.6 0.639238406673874 0.161621 +936 0.6 0.626807582940753 0.177092000000001 +945 0.6 0.649699083584984 0.166011 +954 0.6 0.630080161768358 0.175451 +963 0.6 0.634570582339133 0.174267 +972 0.6 0.613974438568147 0.171149000000001 +981 0.6 0.646832147519767 0.16271 +990 0.6 0.611137442619996 0.170677 +999 0.6 0.609074843250805 0.177417 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/cutoffs/cutoff.pair.07.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/cutoffs/cutoff.pair.07.txt Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,111 @@ +9 3.8 0.723195172634673 0.136373 +18 3.1 0.704962064416613 0.145611 +27 2.7 0.707557510009516 0.148332 +36 2.7 0.727407566746925 0.139424000000001 +45 2.6 0.700849445263879 0.141565 +54 2.5 0.722912449885175 0.135534 +63 2.2 0.700978069072888 0.148155 +72 2.1 0.701953684982546 0.145638 +81 2.1 0.714519324839634 0.144058 +90 2 0.706725312179025 0.143797 +99 1.8 0.711963141532287 0.146397 +108 1.8 0.735577668185521 0.136275 +117 1.7 0.702810428805111 0.149303 +126 1.7 0.720543078508623 0.142071 +135 1.7 0.70149933958699 0.145076 +144 1.7 0.71795624649094 0.139588 +153 1.7 0.704835091689678 0.148916 +162 1.5 0.701545731183728 0.149502 +171 1.5 0.724190117580076 0.139706 +180 1.5 0.713150501847426 0.146353 +189 1.4 0.701931396757465 0.150412 +198 1.4 0.701219389501589 0.150832 +207 1.4 0.709880139643964 0.146954 +216 1.4 0.745895944323654 0.133521 +225 1.4 0.708094624059256 0.144051000000001 +234 1.4 0.714653417236785 0.141035 +243 1.4 0.702278770668715 0.146489 +252 1.4 0.714249389046789 0.140401 +261 1.4 0.720916794007688 0.140816 +270 1.4 0.704112631476039 0.140986 +279 1.4 0.725725706846333 0.140364 +288 1.4 0.713439724527154 0.142329 +297 1.3 0.703874456990055 0.145868 +306 1.3 0.714083967774953 0.138665 +315 1.3 0.717635749171277 0.139297 +324 1.3 0.70800987989496 0.14435 +333 1.3 0.703608941539679 0.143911 +342 1.3 0.712538846663198 0.142044 +351 1.3 0.736517626110649 0.137995 +360 1.3 0.752400150484099 0.133463 +369 1.3 0.700408117809905 0.149221 +378 1.2 0.710876753570226 0.145758 +387 1.2 0.719606098956021 0.141686 +396 1.1 0.701573537304199 0.149306 +405 1.1 0.711449711776699 0.146484 +414 1.1 0.7219174559929 0.140383 +423 1.1 0.711374621895577 0.147498 +432 1.1 0.706470648418195 0.146516 +441 1.1 0.702119897887331 0.144641 +450 1.1 0.710888241224795 0.14668 +459 1.1 0.700236191312255 0.145443 +468 1.1 0.709289110346942 0.145098000000001 +477 1.1 0.723682944143134 0.137715000000001 +486 1.1 0.711909258694556 0.148433000000001 +495 1.1 0.706050645629693 0.146412 +504 1.1 0.71026622444805 0.142836 +513 1.1 0.721840069030837 0.140123 +522 1.1 0.721616423306577 0.139145 +531 1.1 0.706599106603714 0.144744 +540 1.1 0.732906503371939 0.138953 +549 1.1 0.710747551039788 0.146028 +558 1.1 0.72403698505923 0.141907 +567 1.1 0.703858944181491 0.142942 +576 1.1 0.732227703846385 0.137018 +585 1.1 0.706081331511759 0.145895 +594 1.1 0.72851699215126 0.140481 +603 1.1 0.735776902337646 0.13646 +612 1 0.703269143581204 0.150933 +621 1 0.705868277579243 0.149988 +630 1 0.735304036557552 0.133891 +639 1 0.701919162079755 0.140775 +648 1 0.714774599695102 0.142638 +657 1 0.702484973766457 0.145888 +666 1 0.714275579611685 0.144289 +675 1 0.704751859660089 0.148070000000001 +684 1 0.725303040977116 0.139712 +693 1 0.7387858141582 0.134658 +702 1 0.737133543332787 0.135015 +711 1 0.71646444290315 0.140297 +720 1 0.735722464691124 0.136141 +729 1 0.709409780786399 0.144924000000001 +738 1 0.732557952972944 0.137912 +747 1 0.70280408850191 0.148749 +756 1 0.736014898114798 0.13601 +765 1 0.719075344267219 0.141568000000001 +774 1 0.705605602403474 0.142286 +783 1 0.714188898670159 0.143287 +792 1 0.708351298492815 0.14399 +801 1 0.701618595782959 0.140845 +810 1 0.70444245319677 0.142743000000001 +819 1 0.701265538602256 0.146322 +828 1 0.731290352078885 0.136012 +837 1 0.717812366249433 0.137717 +846 1 0.724767043040464 0.142316 +855 1 0.742408530369704 0.132985 +864 1 0.715927851943484 0.141312 +873 1 0.70674171896429 0.145039 +882 0.9 0.705311244948852 0.148678 +891 0.9 0.725745115884173 0.136097 +900 0.9 0.717545411117844 0.145251 +909 0.9 0.71139239340832 0.147716 +918 0.9 0.724110435852362 0.14723 +927 0.9 0.730304270252922 0.142741000000001 +936 0.9 0.702790994935148 0.14686 +945 0.9 0.714312901014806 0.142984 +954 0.9 0.718970705842182 0.14072 +963 0.9 0.702918434736422 0.146811 +972 0.9 0.763309367972586 0.135241 +981 0.9 0.70933162192829 0.14406 +990 0.9 0.714425781696018 0.142589 +999 0.9 0.707307109948989 0.146265 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/cutoffs/cutoff.pair.08.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/cutoffs/cutoff.pair.08.txt Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,111 @@ +9 5.7 0.807699596880158 0.11075 +18 4.8 0.812102488456895 0.109521 +27 4.8 0.807059148683326 0.109122 +36 4.5 0.810359129334262 0.106039 +45 4.1 0.800899562591127 0.109428 +54 4 0.819734419515034 0.104148 +63 3.6 0.801137818633087 0.112097 +72 3.5 0.805611266779768 0.107837 +81 3.2 0.804721543883597 0.110678 +90 3.2 0.81977543427128 0.105428 +99 3.2 0.80949445276013 0.109698 +108 3.1 0.800113619307504 0.115339 +117 2.9 0.80163357132874 0.111922 +126 2.9 0.807430653105459 0.110639 +135 2.8 0.809101610526483 0.109881 +144 2.8 0.808015366939668 0.109303 +153 2.8 0.815481435245632 0.106845 +162 2.7 0.813972734460064 0.108256 +171 2.7 0.803878583908181 0.111767 +180 2.7 0.814908823498375 0.110845 +189 2.5 0.803934003900257 0.112334 +198 2.5 0.819507987133127 0.106736 +207 2.5 0.816975135145848 0.109487 +216 2.5 0.801969702289987 0.11426 +225 2.5 0.814854242652841 0.106264 +234 2.5 0.809009743289239 0.110027000000001 +243 2.5 0.816859180637723 0.10951 +252 2.5 0.825923196681225 0.104742 +261 2.3 0.805863672217281 0.108068 +270 2.3 0.812114384187481 0.110547 +279 2.3 0.811109163497932 0.109461 +288 2.3 0.80680260883464 0.104169 +297 2.3 0.800054845624205 0.110627 +306 2 0.80196269021579 0.117711 +315 2 0.809945512786952 0.109737 +324 2 0.819364610024312 0.110094 +333 2 0.812712294607599 0.111396 +342 2 0.801568817408624 0.113051 +351 2 0.804296495631151 0.112833 +360 2 0.809648638295373 0.106319 +369 2 0.810216908330331 0.110967 +378 2 0.80081744573067 0.112898 +387 2 0.809614612996421 0.114576 +396 2 0.800940767696918 0.109701 +405 1.8 0.800397828290365 0.115299 +414 1.8 0.80375463158553 0.116013 +423 1.8 0.80204726507529 0.110069 +432 1.8 0.829673313493101 0.109584 +441 1.8 0.801512586472861 0.110591 +450 1.8 0.801317568038407 0.11341 +459 1.8 0.806266605408301 0.11489 +468 1.8 0.804368073911341 0.112083 +477 1.8 0.80034140400705 0.113126 +486 1.8 0.803037870942011 0.112527 +495 1.8 0.816147055378533 0.113451 +504 1.8 0.804009255355535 0.113167 +513 1.8 0.809536527236661 0.111121 +522 1.8 0.810250462485448 0.113045 +531 1.8 0.813388275601197 0.117925 +540 1.8 0.810529197259932 0.111664 +549 1.8 0.80286846045568 0.114872 +558 1.8 0.805231767462103 0.112439 +567 1.8 0.803715104541846 0.112076 +576 1.8 0.800856311237744 0.113416 +585 1.8 0.815485369320364 0.113187 +594 1.8 0.810404807750137 0.111576 +603 1.8 0.80944688874663 0.114048 +612 1.8 0.813671662890222 0.107955 +621 1.8 0.805581198188585 0.111274 +630 1.5 0.801229670148542 0.121577 +639 1.5 0.812201198793304 0.109719 +648 1.5 0.80667869232878 0.111841 +657 1.5 0.817613487850624 0.110405 +666 1.5 0.816866335035283 0.107114 +675 1.5 0.8101804833249 0.117018 +684 1.5 0.818509280416726 0.115303 +693 1.5 0.80107589244594 0.116714 +702 1.5 0.829601652947073 0.107173 +711 1.5 0.820953668506561 0.108043 +720 1.5 0.811426464962464 0.114567 +729 1.5 0.800211543269215 0.1163 +738 1.5 0.814971683053337 0.111887 +747 1.5 0.810647742818646 0.111346 +756 1.5 0.823589356371534 0.1075 +765 1.5 0.808183381146845 0.115074 +774 1.5 0.804024248189691 0.11739 +783 1.5 0.809214446523965 0.114048 +792 1.5 0.816922307151774 0.116292 +801 1.5 0.817083370511117 0.107909 +810 1.5 0.803218112416446 0.122658 +819 1.5 0.801792365687339 0.115227 +828 1.5 0.824077216658734 0.106899 +837 1.5 0.808010406353855 0.115016 +846 1.5 0.805503181301868 0.111932 +855 1.5 0.810848938809874 0.114185 +864 1.5 0.80077383728855 0.11448 +873 1.5 0.804510616030174 0.115034 +882 1.5 0.800413194808912 0.119586 +891 1.5 0.820672192506787 0.110138 +900 1.5 0.810326379209945 0.112103 +909 1.5 0.811096513155687 0.117781 +918 1.5 0.804732736125644 0.117636 +927 1.5 0.807950118492599 0.113333 +936 1.5 0.807915776288532 0.113945 +945 1.5 0.832853748697962 0.107864 +954 1.5 0.819637006281129 0.112646 +963 1.5 0.807519443743277 0.110865 +972 1.5 0.816853380970702 0.113831 +981 1.5 0.802684401572777 0.116302 +990 1.5 0.809446019377896 0.112676 +999 1.5 0.804655052306582 0.112474 |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/refGenes.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/refGenes.bed Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,66259 @@\n+chr1\t67092175\t67134971\tNM_001276351\t0\t-\t67093004\t67127240\t0\t8\t1429,187,70,113,158,92,86,42,\t0,3059,4076,23176,33576,34990,38966,42754,\n+chr1\t67092175\t67134971\tNM_001276352\t0\t-\t67093579\t67127240\t0\t9\t1429,70,145,68,113,158,92,86,42,\t0,4076,11062,19401,23176,33576,34990,38966,42754,\n+chr1\t67092175\t67134971\tNR_075077\t0\t-\t67134971\t67134971\t0\t10\t1429,70,145,68,143,113,158,92,86,42,\t0,4076,11062,19401,21438,23176,33576,34990,38966,42754,\n+chr1\t201283451\t201332993\tNM_000299\t0\t+\t201283702\t201328836\t0\t15\t453,104,395,145,208,178,63,115,156,177,154,187,85,107,2920,\t0,10490,29714,33101,34120,35166,36364,36815,38526,39561,40976,41489,42302,45310,46622,\n+chr1\t201283451\t201332993\tNM_001005337\t0\t+\t201283702\t201328836\t0\t14\t453,104,395,145,208,178,115,156,177,154,187,85,107,2920,\t0,10490,29714,33101,34120,35166,36815,38526,39561,40976,41489,42302,45310,46622,\n+chr1\t8352403\t8423687\tNM_001042682\t0\t-\t8355086\t8364133\t0\t13\t2717,181,147,721,223,1379,114,162,200,93,163,81,127,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,71157,\n+chr1\t8352403\t8817640\tNM_001042681\t0\t-\t8355086\t8656297\t0\t23\t2717,181,147,721,223,1379,114,162,200,93,163,81,99,100,125,49,105,97,106,126,71,469,481,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,113521,142659,145001,156223,188810,204071,205014,262157,271906,303569,464756,\n+chr1\t8352403\t8817640\tNM_012102\t0\t-\t8355086\t8656297\t0\t24\t2717,181,147,721,223,1379,114,162,200,93,163,81,99,100,125,49,105,97,106,126,71,469,185,481,\t0,3015,3696,5792,7360,7708,9359,10279,11652,12342,13408,70323,113521,142659,145001,156223,188810,204071,205014,262157,271906,303569,440000,464756,\n+chr1\t33513998\t34165274\tNM_001281956\t0\t-\t33519517\t34165097\t0\t71\t2572,213,139,88,113,162,63,180,112,74,174,174,180,177,183,174,189,174,249,174,195,186,189,147,189,114,81,146,178,189,210,117,70,119,105,97,125,204,96,114,117,195,188,139,192,203,127,192,157,170,189,216,117,189,188,139,195,327,183,113,104,122,125,88,78,113,208,195,113,217,364,\t0,5466,5813,7464,9308,10883,13197,19051,19797,23023,23437,26526,27131,28721,32038,36178,43735,45301,53594,55375,57533,58507,63297,66754,69643,72505,73089,86866,88370,91283,97042,100505,102907,103500,108168,109371,110520,111052,112487,119423,121215,122361,132649,138324,143947,148891,178931,184754,186518,195090,200588,202287,210198,210517,211350,212548,225141,229281,258570,274601,278424,296744,305714,306470,311698,332885,404095,421761,518595,574978,650912,\n+chr1\t33513998\t34165842\tNM_052896\t0\t-\t33519517\t34165813\t0\t70\t2572,213,139,88,113,162,63,180,112,74,174,174,180,177,183,174,174,174,195,186,189,147,189,114,81,146,178,189,126,210,117,70,119,105,97,125,204,96,114,117,195,188,139,192,203,127,192,157,170,189,216,117,189,188,139,195,327,183,113,104,122,125,88,78,113,208,195,113,217,96,\t0,5466,5813,7464,9308,10883,13197,19051,19797,23023,23437,26526,27131,28721,32038,36178,53594,55375,57533,58507,63297,66754,69643,72505,73089,86866,88370,91283,91864,97042,100505,102907,103500,108168,109371,110520,111052,112487,119423,121215,122361,132649,138324,143947,148891,178931,184754,186518,195090,200588,202287,210198,210517,211350,212548,225141,229281,258570,274601,278424,296744,305714,306470,311698,332885,404095,421761,518595,574978,651748,\n+chr1\t41847188\t42035925\tNR_038261\t0\t-\t42035925\t42035925\t0\t4\t1257,219,112,119,\t0,1682,71224,188618,\n+chr1\t75202130\t75611114\tNM_001130058\t0\t-\t75203726\t75541447\t0\t24\t1703,85,89,71,74,104,95,263,88,93,100,132,113,84,124,61,126,85,85,74,49,39,82,75,\t0,9337,11574,11788,12474,13623,15735,16359,17126,17669,20230,25595,31855,34856,36382,39870,40755,49079,72827,98481,137451,194452,339304,408909,\n+chr1\t75202132\t75611116\tNM_001320285\t0\t-\t75203726\t75300660\t0\t26\t1701,85,89,71,74,104,95,263,88,93,100,132,113,84,124,61,126,85,85,74,91,49,39,67,82,77,\t0,9335,11572,11786,12472,13621,15733,16357,17124,17667,20228,25593,31853,34854,36380,39868,40753,49077,72825,98479,109482,137449,194450,196222,339302,408907,\n+chr1\t75202132\t75611116\tNM_001320287\t0\t-\t75203726\t75300660\t0\t25\t1701,85,'..b'0123\t230123\t0\t8\t106,146,111,89,40,166,208,100,\t0,3710,5076,6459,7823,9904,12166,12494,\n+chr15_KI270727v1_random\t241092\t241174\tNR_049886\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n+chr15_KI270727v1_random\t241092\t241174\tNR_049895\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n+chr15_KI270727v1_random\t241092\t241174\tNR_128721\t0\t+\t241174\t241174\t0\t1\t82,\t0,\n+chr15_KI270727v1_random\t372321\t373405\tNM_001004719\t0\t+\t372419\t373361\t0\t1\t1084,\t0,\n+chr16_KI270728v1_random\t17231\t19833\tNM_001099687\t0\t+\t17263\t18809\t0\t2\t394,1037,\t0,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNM_016212\t0\t+\t17263\t18809\t0\t2\t394,1037,\t0,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNR_110886\t0\t+\t19833\t19833\t0\t4\t394,167,289,1037,\t0,499,1064,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNR_110897\t0\t+\t19833\t19833\t0\t4\t394,167,289,1037,\t0,499,1064,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNR_110910\t0\t+\t19833\t19833\t0\t3\t394,289,1037,\t0,1064,1565,\n+chr16_KI270728v1_random\t17231\t19833\tNR_110911\t0\t+\t19833\t19833\t0\t3\t394,289,1037,\t0,1064,1565,\n+chr16_KI270728v1_random\t17233\t19273\tNM_001205259\t0\t+\t17263\t18809\t0\t2\t392,477,\t0,1563,\n+chr16_KI270728v1_random\t17233\t19273\tNR_110914\t0\t+\t19273\t19273\t0\t3\t392,289,477,\t0,1062,1563,\n+chr16_KI270728v1_random\t17250\t19837\tNM_001330061\t0\t+\t17263\t18809\t0\t2\t375,1041,\t0,1546,\n+chr16_KI270728v1_random\t17251\t19833\tNM_001330066\t0\t+\t17263\t18809\t0\t2\t374,1037,\t0,1545,\n+chr16_KI270728v1_random\t933855\t936466\tNR_110898\t0\t-\t936466\t936466\t0\t3\t1039,290,268,\t0,1251,2343,\n+chr16_KI270728v1_random\t1001629\t1037618\tNR_135178\t0\t+\t1037618\t1037618\t0\t25\t105,103,322,130,110,172,175,173,104,151,211,133,128,196,134,396,152,192,188,193,183,182,148,197,233,\t0,1009,5706,6177,6753,7361,8561,11932,12489,13586,16444,16928,17168,17388,20254,20921,21434,22284,28770,30290,31190,34103,35109,35455,35756,\n+chr16_KI270728v1_random\t1331814\t1346848\tNR_130771\t0\t-\t1346848\t1346848\t0\t3\t5057,93,295,\t0,6640,14739,\n+chr16_KI270728v1_random\t1331814\t1346848\tNR_130772\t0\t-\t1346848\t1346848\t0\t2\t5057,295,\t0,14739,\n+chr17_GL000205v2_random\t54856\t57966\tNR_003682\t0\t-\t57966\t57966\t0\t1\t3110,\t0,\n+chr22_KI270731v1_random\t69141\t86923\tNR_003267\t0\t+\t86923\t86923\t0\t13\t189,171,135,81,193,158,150,137,188,128,113,114,280,\t0,732,4094,4362,9648,10233,11930,12603,16240,16660,16890,17084,17502,\n+chr22_KI270733v1_random\t122272\t135645\tNR_046235\t0\t+\t135645\t135645\t0\t1\t13373,\t0,\n+chr22_KI270733v1_random\t130203\t135280\tNR_003287\t0\t+\t135280\t135280\t0\t1\t5077,\t0,\n+chr22_KI270733v1_random\t121580\t121672\tNR_106782\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n+chr22_KI270733v1_random\t121580\t121672\tNR_128715\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n+chr22_KI270733v1_random\t121580\t121672\tNR_128716\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n+chr22_KI270733v1_random\t121580\t121672\tNR_128717\t0\t+\t121672\t121672\t0\t1\t92,\t0,\n+chr22_KI270733v1_random\t125128\t125189\tNR_037458\t0\t+\t125189\t125189\t0\t1\t61,\t0,\n+chr22_KI270733v1_random\t125128\t125189\tNR_128714\t0\t+\t125189\t125189\t0\t1\t61,\t0,\n+chr22_KI270733v1_random\t125930\t127799\tNR_003286\t0\t+\t127799\t127799\t0\t1\t1869,\t0,\n+chr22_KI270733v1_random\t128876\t129032\tNR_003285\t0\t+\t129032\t129032\t0\t1\t156,\t0,\n+chr22_KI270733v1_random\t170214\t170275\tNR_037458\t0\t+\t170275\t170275\t0\t1\t61,\t0,\n+chr22_KI270733v1_random\t170214\t170275\tNR_128714\t0\t+\t170275\t170275\t0\t1\t61,\t0,\n+chr22_KI270733v1_random\t171011\t172880\tNR_003286\t0\t+\t172880\t172880\t0\t1\t1869,\t0,\n+chr22_KI270733v1_random\t173955\t174111\tNR_003285\t0\t+\t174111\t174111\t0\t1\t156,\t0,\n+chr22_KI270734v1_random\t72453\t74335\tNR_136575\t0\t+\t74335\t74335\t0\t1\t1882,\t0,\n+chr22_KI270734v1_random\t90958\t98408\tNR_136574\t0\t-\t98408\t98408\t0\t4\t429,155,73,884,\t0,5076,5960,6566,\n+chr22_KI270734v1_random\t131493\t137393\tNM_005675\t0\t+\t131645\t136994\t0\t5\t262,161,101,141,549,\t0,342,3949,4665,5351,\n+chr22_KI270734v1_random\t138078\t161750\tNM_001195226\t0\t-\t138479\t156446\t0\t14\t589,89,99,176,147,93,82,80,117,65,150,35,209,62,\t0,664,4115,5535,6670,6925,8561,9545,10037,10335,12271,12908,18210,23610,\n+chr22_KI270734v1_random\t138078\t161852\tNM_016335\t0\t-\t138479\t161586\t0\t15\t589,89,99,176,147,93,82,80,117,65,150,35,209,313,164,\t0,664,4115,5535,6670,6925,8561,9545,10037,10335,12271,12908,18210,23235,23610,\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/splice_trap.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/splice_trap.xml Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,65 @@ +<tool id="splice_trap" name="SpliceTrap" version="1.0.0"> + <description>A statistic tool for quantifying exon inclusion ratios in paired-end RNA-seq data, with broad applications for the study of alternative splicing. + </description> + <requirements> + <requirement type="package" version="1.2.1.1">bowtie</requirement> + </requirements> + <command detect_errors="exit_code" interpreter="perl"> + $__tool_directory__/SpliceTrap.pl -p 8 -l $__tool_directory__ -d hg38v3 -1 + #if $input_type_conditional.spliceTrap_input_type == "paired" + $input_type_conditional.input_1 -2 $input_type_conditional.input_2 + #else + $input_type_conditional.input.forward -2 $input_type_conditional.input.reverse + #end if + -s $read_size $output1 $output2 + </command> + <inputs> + <conditional name="input_type_conditional"> + <param name="spliceTrap_input_type" type="select" label="Input Type" help="Select between paired and paired collection"> + <option value="paired" selected="true">Paired</option> + <option value="paired_collection">Paired Collection</option> + </param> + <when value="paired"> + <param format='fastq' name='input_1' type='data' label="FASTQ file, reverse reads" /> + <param format='fastq' name='input_2' type='data' label="FASTQ file, forward reads" /> + </when> + <when value="paired_collection"> + <param format="fastq" name='input' type="data_collection" collection_type="paired" label="Select a paired collection" help="Specify paired dataset collection containing paired reads"/> + </when> + </conditional> + + <param name='read_size' type='integer' value='50' label="Read size" /> + </inputs> + <outputs> + <data format="txt" name="output1" /> + <data format="txt" name="output2" /> + </outputs> + <tests> + <test> + <param name="input1" value="input1.fastq"/> + <param name="input2" value="input2.fastq"/> + <output name="output1" file="output1.txt"/> + <output name="output2" file="output2.txt"/> + </test> + <test> + <param name="fastq_input"> + <collection type="paired"> + <element name="forward" value="input1.fastq" /> + <element name="reverse" value="input2.fastq" /> + </collection> + </param> + <param name="input_type" value="paired_collection" /> + <output name="output1" file="output1.txt"/> + <output name="output2" file="output2.txt"/> + </test> + </tests> + <help> + **SpliceTrap** + </help> + <citations> + <citation type="bibtex"> + http://rulai.cshl.edu/splicetrap + </citation> + </citations> +</tool> + |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/src/Makefile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/src/Makefile Thu Oct 12 16:26:36 2017 -0400 |
b |
@@ -0,0 +1,3 @@ +all: + g++ -O2 splicetrap.estimate.cpp -o Pair_estimate_c + mv Pair_estimate_c ../bin |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/src/splicetrap.estimate.cpp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/src/splicetrap.estimate.cpp Thu Oct 12 16:26:36 2017 -0400 |
[ |
b'@@ -0,0 +1,854 @@\n+//Author: Jie Wu@CSHL\n+//TO replace the original Perl code\n+\n+#include <stdio.h>\n+#include <stdlib.h>\n+#include <iostream>\n+#include <fstream>\n+#include <string.h>\n+#include <unistd.h>\n+#include <sstream>\n+#include <map>\n+#include <vector>\n+#include <math.h>\n+#include <time.h>\n+#include <limits>\n+#include <iomanip>\n+\n+using namespace std;\n+\n+int MAX_LINE_LEN = 1024*1024;\n+\n+void printusage()\n+{\n+ cout<< "\\tUsage:"<<endl;\n+ cout<<"\\t-s\\tread_size"<<endl;\n+ cout<<"\\t-b\\tIRM file"<<endl;\n+ cout<<"\\t-f\\tFZM file"<<endl;\n+ cout<<"\\t-1\\tmapping result 1"<<endl;\n+ cout<<"\\t-2\\tmapping result 2"<<endl;\n+ cout<<"\\t-d\\tTXdb database file"<<endl;\n+ cout<<"\\t-o\\toutput prefix"<<endl;\n+ \n+}\n+\n+struct ReadPairInfo\n+{\n+\tbool isPaired;\n+\tvector<int> IsoFlag; //flag as 1 if mapped to the isoform\n+\tvector<int> Size; //fragment size in the isoform\n+\tvector<int> Pos;\n+};\n+\n+class TXdb_entry{\n+\tpublic:\n+\t string chrid;\n+\t //int start;\n+\t //int end;\n+\t string id;\n+\t bool iscomplete;\n+\t int start;\n+\t int end;\n+\t int exonstarts[3];\n+\t char strand;\n+\t int exonsize[3];\n+\t int len1;\n+\t int len2;\n+\t vector< ReadPairInfo > pairs;\n+\t int snum1; //single-end reads\n+\t int snum2;\n+\t int snum12;\n+\t float ir, bir;\n+\t int enum1; //exon body reads\n+\t int enum2;\n+\t int enum3;\n+\t \n+\t int jnum12;\n+\t int jnum23;\n+\t int jnum13;\n+\n+\t int totalreadnum;\n+\t void estimate(vector<double> &FZM, map<string, vector<double> > &IRM, int &read_size)\n+\t {\n+\t\t float Maxe=0;\n+\t\t float Max = -numeric_limits<float>::max();\n+\t\t float BMaxe=0;\n+\t\t float BMax=-numeric_limits<float>::max();\n+\t\t for(float e1=0.001; e1<1;e1=e1+0.001)\n+\t\t {\n+\t\t\t float efix1 = e1*len1/\n+\t\t\t (e1*exonsize[1]+len2);\n+\t\t\t float efix2 = 1-efix1;\n+\t\t\t float LL = snum12*log( efix1/(len1-read_size+1)\n+\t\t\t + efix2/(len2-read_size+1) )\n+\t\t\t + snum1*log(efix1/(len1-read_size+1)) \n+\t\t\t\t + snum2*log(efix2/(len2-read_size+1));\n+\t\t\t\t //cout<<LL<<endl;\n+\t\t\t\t //cout<<efix1<<"\\t" <<pairs.size()<<"\\t" <<len2<<"\\t"\t<<endl;\n+\t\t //int num1=0, num2=0,num12=0;\n+\t\t for(int i=0;i<pairs.size();i++)\n+\t\t\t {\n+ if(pairs[i].IsoFlag[0] == 1)\n+\t\t\t {\n+\t\t\t\t if(pairs[i].IsoFlag[1] == 0)\n+\t\t\t\t {\n+\t\t\t\t\t // num1++;\n+\t\t\t\t\t float tmp=FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1);\n+\t\t\t\t\t if (tmp == 0)\n+\t\t\t\t\t {\n+\t\t\t\t\t \t LL = LL - 308;\n+\t\t\t\t\t }\n+\t\t\t\t\t else\n+\t\t\t\t\t {\n+\t\t\t\t\t\t LL = LL + log(tmp);\n+\t\t\t\t\t }\n+\t\t\t\t }\n+\t\t\t\t else\n+\t\t\t\t {\n+\t\t\t\t\t // num12++;\n+\t\t\t\t\t float tmp=(FZM[pairs[i].Size[0]]*efix1/(len1-pairs[i].Size[0]+1)) \n+\t\t\t\t\t + (FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1));\n+\t\t\t\t \t if(tmp ==0)\n+\t\t\t\t\t {\n+\t\t\t\t\t LL = LL -308;\n+\t\t\t\t\t }\n+\t\t\t\t\t else\n+\t\t\t\t\t {\n+\t\t\t\t\t\t LL = LL + log(tmp);\n+\t\t\t\t\t }\n+\t\t\t\t }\n+\t\t\t }\n+\t\t\t else\n+\t\t\t {\n+\t\t\t\t //num2++;\n+\t\t\t\t float tmp = FZM[pairs[i].Size[1]]*efix2/(len2-pairs[i].Size[1]+1);\n+\t\t\t\t if(tmp ==0)\n+\t\t\t\t {\n+\t\t\t\t LL= LL - 308; \n+\t\t\t\t }\n+\t\t\t\t else\n+\t\t\t\t {\n+\t\t\t\t\t LL = LL +log(tmp);\n+\t\t\t\t }\n+\t\t\t }\n+\t\t\t }\n+\t\t\t //cout<<num1<<"\\t"<<num2<<"\\t"<<num12<<"\\n";\n+\t\t\t //cout<<LL<<endl;\n+\t\t\t \n+\t\t\t if(IRM["CA"].size() > 0)\n+\t\t\t {\n+\t\t\t\t float BLL;\n+\t\t\t\t string eventtype =id.substr(0,2);\n+\t\t\t\t if(!eventtype.compare("ME"))\n+\t\t\t\t {\n+\t\t\t\t\t eventtype = "CA";\n+\t\t\t\t }\n+\t\t\t\t BLL = LL + log(IRM[eventtype][int(e1/0.001)]);\n+\t\t\t\t if(BLL > BMax)\n+\t\t\t\t { \n+\t\t\t\t\t BMax = BLL;\n+\t\t\t\t\t BMaxe = e1;\n+\t\t\t\t }\n+\t\t\t }\n+\t\t\t\t //cout<<LL<<endl;\n+\t\t\t if(LL > Max)\n+\t\t\t {\n+\t\t\t\t Max= LL;\n+\t\t\t\t Maxe = e1;\n+\n+\t\t\t }\n+\t\t\t\t \n+\t\t }\n+\t\t ir= Maxe;\n+\t\t bir= BMaxe;\n+\t }\n+\t\t\n+\t TXdb_entry(char *line)\n+\t {\n+\t\t size_t found;\n+\t\t string linestr(line);\n+\t\t found = linestr.find("[L]");\n+\t\t if(found != string::npos)\n+\t\t {\n+\t\t\t iscomplete=1;\n+\t\t\n+\t\t \tchar *token;\n+\n+\t\t \ttoken = strtok(line,"\\t");\n+\t\t \tif(token==NULL)\n+\t\t \t{\n+\t\t\t\t cout<<"error1"<<endl;\n+\t\t\t\t exit(0);\n+\t\t\t }\n+\t\t\t else\n+\t\t\t {\n+\t\t\t\t c'..b'gle_read_num<<endl;\n+ cout<<"Paired reads used:\\t"<<total_pair_read_num<<endl;\n+\n+ log_file<<"#total_read_num:\\t"<<total_read_num*2<<endl;\n+ log_file<<"#total_single_read_num:\\t"<<total_single_read_num<<endl;\n+ log_file<<"#total_pair_read_num:\\t"<<total_pair_read_num<<endl;\n+ \n+ //Calculate ratios and output\n+ cout<<"Now doing estimations and output to "<<ratio_file_name<<"\\t"<<num_file_name<<endl;\n+ ofstream ratio_file(ratio_file_name.c_str());\n+ ofstream num_file(num_file_name.c_str());\n+ map<string, TXdb_entry>::iterator txdb_it = TXdb_entries.begin();\n+ for(;txdb_it!=TXdb_entries.end();txdb_it++)\n+ {\n+\t string eventid = txdb_it->first;\n+\t //cout<< eventid <<endl; \n+\t txdb_it->second.estimate(FZM,IRM,read_size);\n+/*\t ratio_file<< eventid<<"\\t"\n+\t \t\t<<setprecision(4)<<fixed\n+\t <<txdb_it->second.ir<<"\\t"\n+\t\t\t<<txdb_it->second.bir<<"\\t"\n+\t\t\t<<"NA\\tNA\\tNA\\t"\n+\t\t\t<<txdb_it->second.jnum12<<"\\t"\n+\t\t\t<<txdb_it->second.jnum23<<"\\t"\n+\t\t\t<<txdb_it->second.jnum13<<"\\t"\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\\t"\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\\t"\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\\t"\n+\t\t\t<<txdb_it->second.exonsize[0]<<"\\t"\n+\t\t\t<<txdb_it->second.exonsize[1]<<"\\t"\n+\t\t\t<<txdb_it->second.exonsize[2]<<"\\t"\n+\t\t<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<endl;\n+\t\t*/\n+\t ratio_file<<eventid<<"\\t"\n+\t \t\t<<setprecision(4)<<fixed\n+\t\t\t<<txdb_it->second.ir<<"\\t" //1\n+\t\t\t<<txdb_it->second.bir<<"\\t" //2\n+\t\t\t<<txdb_it->second.chrid<<"\\t" //3\n+\t\t\t<<txdb_it->second.start<<"," //\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\\t" //4\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\\t"//5\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\\t"//6\n+\t\t\t<<txdb_it->second.strand<<"\\t"//7\n+\t\t\t<<txdb_it->second.jnum12<<"\\t"//8\n+\t\t\t<<txdb_it->second.jnum23<<"\\t"//9\n+\t\t\t<<txdb_it->second.jnum13<<"\\t"//10\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum1+txdb_it->second.jnum12 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[0] )<<"\\t"//11\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum2+txdb_it->second.jnum12 +txdb_it->second.jnum23 )/txdb_it->second.exonsize[1] )<<"\\t"//12\n+\t\t\t<<(1.0*read_size*( txdb_it->second.enum3+txdb_it->second.jnum23 +txdb_it->second.jnum13 )/txdb_it->second.exonsize[2] )<<"\\t"//13`\n+\t\t\t<<(1.0*read_size*txdb_it->second.totalreadnum/txdb_it->second.len1)<<"\\t"//14\n+\t\t\t<<txdb_it->second.exonsize[0]<<"\\t"//15\n+\t\t\t<<txdb_it->second.exonsize[1]<<"\\t"//16\n+\t\t\t<<txdb_it->second.exonsize[2]<<endl;//17\n+\n+\t num_file<< eventid<<"\\t"\n+\t\t\t<<txdb_it->second.chrid<<"\\t"\n+\t\t\t<<txdb_it->second.start<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonsize[0]<<"\\t"\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[1]+txdb_it->second.exonsize[1]<<"\\t"\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]<<","\n+\t\t\t<<txdb_it->second.start+txdb_it->second.exonstarts[2]+txdb_it->second.exonsize[2]<<"\\t"\n+\t\t\t<<txdb_it->second.strand<<"\\t"\n+ <<txdb_it->second.enum1<<"\\t"\n+\t\t\t<<txdb_it->second.enum2<<"\\t"\n+\t\t\t<<txdb_it->second.enum3<<"\\t"\n+\t\t\t<<txdb_it->second.jnum12<<"\\t"\n+\t\t\t<<txdb_it->second.jnum23<<"\\t"\n+\t\t\t<<txdb_it->second.jnum13<<endl;\n+ }\n+ log_file.close();\n+ ratio_file.close();\n+ num_file.close();\n+ clock_finish = clock();\n+ cout<<"Done! time used:"<<(double)(clock_finish-clock_start)/CLOCKS_PER_SEC<<" seconds"<<endl;\n+\n+ \n+}\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/test-data/input1.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/test-data/input1.fastq Thu Oct 12 16:26:36 2017 -0400 |
b |
b"@@ -0,0 +1,200484 @@\n+@ERR030881.107 HWI-BRUNOP16X_0001:2:1:13663:1096#0/1\n+ATCTTTTGTGGCTACAGTAAGTTCAATCTGAAGTCAAAACCAACCAATTT\n++\n+5.544,444344555CC?CAEF@EEFFFFFFFFFFFFFFFFFEFFFEFFF\n+@ERR030881.311 HWI-BRUNOP16X_0001:2:1:18330:1130#0/1\n+TCCATACATAGGCCTCGGGGTGGGGGAGTCAGAAGCCCCCAGACCCTGTG\n++\n+GFFFGFFBFCHHHHHHHHHHIHEEE@@@=GHGHHHHHHHHHHHHHHHHHH\n+@ERR030881.1487 HWI-BRUNOP16X_0001:2:1:4144:1420#0/1\n+GTATAACGCTAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCATAGTA\n++\n+55*'+&&5'55('''888:8FFFFFFFFFF4/1;/4./++FFFFF=5:E#\n+@ERR030881.9549 HWI-BRUNOP16X_0001:2:1:1453:3458#0/1\n+AACGGATCCATTGTTTCGAGAACGTGATCGCCCTCATCTACCTAGCCTCA\n++\n+D<@DDA@A:AHHHHHHHHHHHHHHIHHHHHHHHHHHHHHHHHBHHHHHHH\n+@ERR030881.13497 HWI-BRUNOP16X_0001:2:1:16344:4145#0/1\n+GCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCACCATCGTGC\n++\n+F@FFFGGFGFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHH\n+@ERR030881.14070 HWI-BRUNOP16X_0001:2:1:4377:4232#0/1\n+TGGACAGTTGCTCCTGGCTCCAGAACCTGTCTTGCAAGGGACAGTGGGGT\n++\n+A:AA@HHHHHHHHHHHHHHHHHHHIHHHHHHHHHHGF=GFHHHH@@?AA*\n+@ERR030881.16375 HWI-BRUNOP16X_0001:2:1:2265:4573#0/1\n+ATTAGGAAACATGGAATTTTTTTAAAGGTTTTTCTTGTATCTTTTTTTTT\n++\n+@<><CHHHHHHHHHHHHHHHHHGGHHHHHHHHHHGGGHHHHHHHHGGGGG\n+@ERR030881.18437 HWI-BRUNOP16X_0001:2:1:13904:4828#0/1\n+CAATAGCCAGATGGTTGGTGGGGCAGCCAGGCAGGGAGGACCCAGGGCTG\n++\n+555544555544555;AAAAFFBBEEEE;=FCB9F===<<FFFFEFFEEE\n+@ERR030881.18768 HWI-BRUNOP16X_0001:2:1:15563:4868#0/1\n+GTGCCAAATTGTCACATTCGAGCTTGAGGCTGTGGTACTGAGCTTGCAGT\n++\n+D>BFD@@?>>54454?FFGFGGGGGGGGGGGGGEGGGGGGGGGEGGGGGG\n+@ERR030881.20718 HWI-BRUNOP16X_0001:2:1:12184:5115#0/1\n+CCCGGCCTAACTTTCATTTAATTTCAATGAATTTTCTTTTTTTTTTTTTT\n++\n+56455==@=>HHHHHHHHHGHHHHHHHHGH=HHHHHHEEEECEEEEEEEE\n+@ERR030881.22833 HWI-BRUNOP16X_0001:2:1:13089:5358#0/1\n+GGAGAAGGGGCGAGGGAAGAAGACCTTTGCTATCCCAGATACCAGGACTG\n++\n+55544145444/444GFDFG9A@@@DD>.F@><<=FDD@AGG>GGEGGEG\n+@ERR030881.23643 HWI-BRUNOP16X_0001:2:1:7921:5452#0/1\n+CGGCCCCCTGCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCA\n++\n+FBDFFHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHH\n+@ERR030881.28299 HWI-BRUNOP16X_0001:2:1:6428:5960#0/1\n+ATGAGAAGGAGCCATCAGGACCTTATGAAAGCGACGAAGACAAGAGTGAT\n++\n+55554DDFFFBBFFFHHGHHHHHHHHHHHHHHHHHHDHH8HHHHHHHHFH\n+@ERR030881.28475 HWI-BRUNOP16X_0001:2:1:14780:5977#0/1\n+CGAAAACCAACTCTTTACCTAACTTTGCATGGTGCTTAGTCAAGGACTCC\n++\n+555,4&4551FFFFFBF3BDFFFFFFEFFFFBEFFFFFFDFFFFFFFFF=\n+@ERR030881.29253 HWI-BRUNOP16X_0001:2:1:1570:6070#0/1\n+GGAATGTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCA\n++\n+HGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.30545 HWI-BRUNOP16X_0001:2:1:4103:6216#0/1\n+CAACTCTTTACCTAACTTTGCATGGTGCTTAGTCAAGGACTCCTGCGACC\n++\n+54-55A@A@@HHHHHFFGGE555558<=;=55555AAAA?HHHHH>8@@>\n+@ERR030881.32582 HWI-BRUNOP16X_0001:2:1:12474:6471#0/1\n+CTTGCCTCACATGTCAGGGCAGGTATCCACCTAACCAGGCTGCAGGGGAG\n++\n+555555544444544HHHHGHHHHHHHHHHHHHHHHHHHHHH5@HFFF*F\n+@ERR030881.33730 HWI-BRUNOP16X_0001:2:1:14154:6628#0/1\n+CCAGCCTTGATACAGCATTTTCCACTTCTCTCTGTAGAGATCAGACGATT\n++\n+55555555(5@>@=:@=8.@04554CCCCC.441445444-555445555\n+@ERR030881.35226 HWI-BRUNOP16X_0001:2:1:3903:6867#0/1\n+CAGCATCCTGCTTAGGGCCCTGGAAACTGGGGAAATAGGTAGCCAGGTGG\n++\n+55555A@AAAGGEGGGGGGGGGGGGGGGGGGGCGGGFEGFGGGGFGGCGG\n+@ERR030881.38182 HWI-BRUNOP16X_0001:2:1:17495:7451#0/1\n+CACCATCGTGCCCGTTCTTGTCTTCCTTGGAGAGGTGGGCCTGGGAACCC\n++\n+5544455,0545445FFFEEFFFFFFFFFEEBC;D6<5-?FFFFFFFFFF\n+@ERR030881.41234 HWI-BRUNOP16X_0001:2:1:14816:8065#0/1\n+CTCTCCTCTAACCCTCCAGGCCTTAGCTTGCCTCACATGTCAGGGCAGGT\n++\n+55,34)4-53HHEHHGGGGG7DC?@GG;BGGEGGGGGGGGGGGGGGGGGA\n+@ERR030881.55301 HWI-BRUNOP16X_0001:2:1:7892:11256#0/1\n+CAAAAATGTAGCTGCCCTGACCTGGTCTCCCCTGACCCTTCCACGGGGCT\n++\n+56624545442525554455FFECECGEDGFF8DF###############\n+@ERR030881.57346 HWI-BRUNOP16X_0001:2:1:20039:11573#0/1\n+GACAGATGATGTCCAAGCCCCTACATGCCCCAGACCCCAGGGCACGGCTG\n++\n+##################################################\n+@ERR030881.57608 HWI-BRUNOP16X_0001:2:1:16788:11614#0/1\n+ATCTCGTAGTACATCACATAGTGACGCTGCATCTCTGACTTCTCACTGGC\n++\n+5653445555HHHHHHHHHH9;@=@HHHHHHDHHHHHHHHHHHHHHHHDH\n+@ERR030881.58998 HWI-BRUNOP16X_0001:2:1:14252:11816#0/1\n+CACCATTTGACCCTGAGCCAG"..b':6601:197274#0/1\n+CGGCCCCCTGCTAATCCGACTTCTCGCCATCATCCTCCTGGTGGGTGTCA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHH\n+@ERR030881.74446016 HWI-BRUNOP16X_0001:2:68:6384:197508#0/1\n+TGTGTCTTGTGCTAAACATTCCTTTCTCTCCGTGCCTCTGTCTCCCCTCT\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74446277 HWI-BRUNOP16X_0001:2:68:20062:197534#0/1\n+CAGCCCTCTCACCCTGGTACTGCATGCACGCAATGCTAGCTGCCCCTTTC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHEHHHGHIIHHHHHAHHHHHHHHHGH\n+@ERR030881.74446743 HWI-BRUNOP16X_0001:2:68:3752:197585#0/1\n+CTGGGACCCAGGCAGCTGCCACCTTGTCACCATGAGAGAATTTGGGGAGT\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHG\n+@ERR030881.74446915 HWI-BRUNOP16X_0001:2:68:8353:197599#0/1\n+GGACTGTCCACCAGGTCCCGACGGGCAGGAATGCAGATGGGTACCTTTCC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHFHHHHHHHE\n+@ERR030881.74447547 HWI-BRUNOP16X_0001:2:68:9591:197654#0/1\n+GCCAGTGGTGGGCATGCGGCTGCGGAGCACGTCCTGAGCTGTGGGGACGT\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHBDDBB@9@AAHHGHHHHHGHHDHHH\n+@ERR030881.74449534 HWI-BRUNOP16X_0001:2:68:1488:197840#0/1\n+CTACTCCTTCCGCAGCAGGGAGGTGTGCAGAGCCGTGCTCAGCTTCCTCT\n++\n+HHHHHHHHHHHHHHHHHHHHHHH8HAGFGGFHHHFGGHHHHHGHHHIHGH\n+@ERR030881.74453424 HWI-BRUNOP16X_0001:2:68:5325:198191#0/1\n+GTCCTGCCCTACCTCTCCCAAGAGCACCAGCAGCAGGTCTTGGGAGCCAT\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74454854 HWI-BRUNOP16X_0001:2:68:18716:198301#0/1\n+GCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTTGGGACCTGGTCTC\n++\n+55555?>?>>5555444555444442=5<=55444C=6C>2555551544\n+@ERR030881.74455894 HWI-BRUNOP16X_0001:2:68:18831:198398#0/1\n+CTGGGACCTGCGGGAGGGCCGCCAGCTGCAGCAGCATGACTTCAGCTCCC\n++\n+HHHHHHHHHHHHHHHHHGEHHHHHHHCHHHFHHHHHEFGDFHHHEHBFHH\n+@ERR030881.74457151 HWI-BRUNOP16X_0001:2:68:9093:198528#0/1\n+AAACAAAACATTTTCCTTTGGGTTTTTTTTTTTCTTTCTTTTTTCTCCGC\n++\n+HHHHHHGGGHHHHHHHHHHHHHHHHHHHGGGGGBGGGBHHGGGGGGGHHH\n+@ERR030881.74458067 HWI-BRUNOP16X_0001:2:68:15716:198600#0/1\n+GTTCCAACCACCGCCGGGGAGGGAGAGGGCCCCTGTCCCTGCAGGGCCGC\n++\n+ADAD?DEFBEHHHHHCCDGDHCEEHCGBGAHHHHHCDCGD5555424554\n+@ERR030881.74460390 HWI-BRUNOP16X_0001:2:68:15056:198815#0/1\n+CCTGGAACTGCCTGACCATAGTCTGATTCTGCAGGTCCCAGACCACAATG\n++\n+?ACDC?DDGG=DDD>55554GGFFADDDA==<==>D=DAD5445544445\n+@ERR030881.74460430 HWI-BRUNOP16X_0001:2:68:19789:198814#0/1\n+CACAAATCCCGTTCAGCCTTTTGACGATCTCAGCCTGTTTGTGCATCTCG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74460883 HWI-BRUNOP16X_0001:2:68:19795:198864#0/1\n+CTGCCTGGCACGCACCCGGTGGCTGCACCATCCACACGCAAGACTGCAAC\n++\n+HHHHHHHHHHHHHHHHHDHHHHHGHHFHHHHHHHHHHHFHHHFHGHFHHH\n+@ERR030881.74463349 HWI-BRUNOP16X_0001:2:68:7211:199081#0/1\n+CGGGGAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGAGAAA\n++\n+HHHHHHHGEHHHHHHHGGGGEHGHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74463429 HWI-BRUNOP16X_0001:2:68:16435:199090#0/1\n+CGGGCTCCTCGCACCTACCCCAGCAACTCAAATTCACCACCTCGGACTCC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74466171 HWI-BRUNOP16X_0001:2:68:1844:199339#0/1\n+ATTTTTTTAAAGGTTTTTCTTGTATCTTTTTTTTTTTTTTTTTTTTTTTT\n++\n+HHHHHHHGGHHHHGHHHGHC83=;><=@=<CCCCCCCCCCCCCCCCCCCC\n+@ERR030881.74466232 HWI-BRUNOP16X_0001:2:68:10444:199339#0/1\n+CCTGGGTCGCCCACCCTCACCCTGCTCCTCCCAGCTCAGCTAAGCTCGTC\n++\n+HHHHHHHHHHHHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74466444 HWI-BRUNOP16X_0001:2:68:18815:199349#0/1\n+GTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCATAGTA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHIH\n+@ERR030881.74468879 HWI-BRUNOP16X_0001:2:68:9428:199583#0/1\n+CACCAACCAGCCGCGGGCCGCGCAGCTGGTGGACAAGGACAGCACCTTCC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHGHH\n+@ERR030881.74470889 HWI-BRUNOP16X_0001:2:68:4971:199775#0/1\n+CAGAGCTTAGCGGGGGGCTGAGCTGGTGTCTTTGAACCTCTAGTCCCAGG\n++\n+HHHHHHHHHHHHHHHCGGGHEHHFHHEHHHHHHHHHHHEHHHHHFHHHHH\n+@ERR030881.74471439 HWI-BRUNOP16X_0001:2:68:16981:199816#0/1\n+TGTGTGCCCCATTTCTCCATATAGTCTTCCTCAGGCAGGTCCTAGGTCCC\n++\n+??DDDEDECC<=@><CCC@?<<<=@EGGGGG?GGGGCGCE>@@6=55554\n+@ERR030881.74471978 HWI-BRUNOP16X_0001:2:68:9605:199866#0/1\n+CCCAGGTCCTGCCCTACCTCTCCCAAGAGCACCAGCAGCAGGTCTTGGGA\n++\n+HHHHHHHHHGHHHHHHHHGHHHHHHHHHHHHFHHHHHHHHHHGHHHHAHE\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/test-data/input2.fastq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/test-data/input2.fastq Thu Oct 12 16:26:36 2017 -0400 |
b |
b"@@ -0,0 +1,200484 @@\n+@ERR030881.107 HWI-BRUNOP16X_0001:2:1:13663:1096#0/2\n+CGGATTTCAGCTACTGCAAGCTCAGTACCACAGCCTCAAGCTCGAATGTG\n++\n+HH;HHHHHGHHHHHHHHHHGHDHEHHHHHEHHHHBHHFHHHHHHHHHD0F\n+@ERR030881.311 HWI-BRUNOP16X_0001:2:1:18330:1130#0/2\n+GAGTGCGAGGGAAGTCAGGGGAGGATCGCGAGGGAAGCCAGGGGAGGATC\n++\n+HHHHHBF8G>&4555GGGGGHHGGEHHHHHHHHH=HHHHHHHHHHHGB9H\n+@ERR030881.1487 HWI-BRUNOP16X_0001:2:1:4144:1420#0/2\n+AACCGGGGGACGGGCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTT\n++\n+@FEEH>==9=05544FGFGFHHHBHHHFHF>AAAAHHHHHHHEHHHHHHH\n+@ERR030881.9549 HWI-BRUNOP16X_0001:2:1:1453:3458#0/2\n+TCAGCATGCTTCTTAGGGCCCTGGAAACTGGGGAAATAGGTAGCCAGGTG\n++\n+5515555/5515444FFHHHHHHHHHHHHHHHHHHHHHHHEHHHHGHH@H\n+@ERR030881.13497 HWI-BRUNOP16X_0001:2:1:16344:4145#0/2\n+GGCCAAGCAGGTCACCGCTCCCGAGCTGAACTCTATCATCCGACAGCAGC\n++\n+HHHHFGHHHGFAFFFHHFHHHHH/HHHHGHHEHHEHGFHHDGF=AA=@@8\n+@ERR030881.14070 HWI-BRUNOP16X_0001:2:1:4377:4232#0/2\n+TGGAGTCCTTCATGCCCAGGTCTGGAACCCAGGTTCTGACCCCAGGGCCC\n++\n+FDFFFEGGGGHHHHGHHHHH>AAA8GGGGGHHHGHHHHHHHHHHHGFHHH\n+@ERR030881.16375 HWI-BRUNOP16X_0001:2:1:2265:4573#0/2\n+GGCCAGCCGGGCTCCAGAGGGGTCAGGGCGCGACGAGAACCAACTCTTTA\n++\n+FDFFBDFDDBAAADDGHGHHHHBHHHHHGHGHHHHHHHHHHHHHHHHHFH\n+@ERR030881.18437 HWI-BRUNOP16X_0001:2:1:13904:4828#0/2\n+GGGCTCTCCCTCTGTATCGCCTGGGGAGGCTGCTGAGGTGACTTTTTGGA\n++\n+A?DDABFBFFHGHEHHHHHHHHHIHHDHCC55555BFFCD;:9=;=@=><\n+@ERR030881.18768 HWI-BRUNOP16X_0001:2:1:15563:4868#0/2\n+CACAGTAGGCGTTCTATAAATGTGTCACAAGAATGGCTTCCCTCAGGAAG\n++\n+55444;@=@>HHHDHHHHHFFGHHHHHHHHHIHHHFH=HHBB?<D#####\n+@ERR030881.20718 HWI-BRUNOP16X_0001:2:1:12184:5115#0/2\n+GCCTGGGCAACATAGCGAAACCACATCTCTACAAAAAAATCCTCCAAAAT\n++\n+HGIEHHHHGHF=@FF8A>>@HFHH=HHHHHHHIHHHGGGGH@@HHGGGEG\n+@ERR030881.22833 HWI-BRUNOP16X_0001:2:1:13089:5358#0/2\n+AGCCACTGCCTTTCTGCTCAGATGCTGGCACCTCCGCCCCCGGGGCTGCC\n++\n+EHHHFF?GFDGFFB???DDAD<FC<55555FFGGG<?>>61/5444-555\n+@ERR030881.23643 HWI-BRUNOP16X_0001:2:1:7921:5452#0/2\n+CGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAGCTGTCCC\n++\n+HHHHHHHHHHHHHHHGHHHHGGHHHHHHHHHHHHHHHHHHHHHHHHDHHH\n+@ERR030881.28299 HWI-BRUNOP16X_0001:2:1:6428:5960#0/2\n+GGAGTCACAGGATTTGGAGGCAGGAGTGCTGGCGGGAAGGGCATTCAGGA\n++\n+HHHHHHHFEH?=DDDHIFHHEHEDE?DAADH@FHHC'@CE##########\n+@ERR030881.28475 HWI-BRUNOP16X_0001:2:1:14780:5977#0/2\n+CTCGGAAGGCAAGGCACATCTTGTGGTAGAAAATTTCGTGCAAATTAGGA\n++\n+HHHHHGGH=IADDADHHGHH444-4A?A?AGHGHHFGFG@/5544HDHEE\n+@ERR030881.29253 HWI-BRUNOP16X_0001:2:1:1570:6070#0/2\n+CTTCGCGAGGTCTTGCTCCCTTGGGACCTGGTCTCCCATCTGACCCTCCA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.30545 HWI-BRUNOP16X_0001:2:1:4103:6216#0/2\n+GTTTAAAGGTGATACTTATTCTCGGAAGGCAAGGCACATCTTGTGGGAGA\n++\n+EF;GG4445544544FF@FFEHFHFFHGHH####################\n+@ERR030881.32582 HWI-BRUNOP16X_0001:2:1:12474:6471#0/2\n+GGGACAGGGAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGA\n++\n+FF8FFBFFFFFDF@FCD>CFF@@F:HEHEHHHHBHHHHHF==<>5?DDA;\n+@ERR030881.33730 HWI-BRUNOP16X_0001:2:1:14154:6628#0/2\n+GTGAGGGTGGGCGACCCAGGATTCCCCCTCCCCTTCCCAAATAAAGATGA\n++\n+BEFDB44(4411445DA?ADHHHHIFDDC>:::5@DDDC?HHHDEBFFB>\n+@ERR030881.35226 HWI-BRUNOP16X_0001:2:1:3903:6867#0/2\n+CAGAGCGTAAGAAATGGATCCATTGTTCCGAGAACGTGATCGCCCTCATC\n++\n+HH@HHHFDHHHHHHHFHHHGHGHHHHHHHGGHHHHHFHHAHHHHHGHHGH\n+@ERR030881.38182 HWI-BRUNOP16X_0001:2:1:17495:7451#0/2\n+CCTCTCCCGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAG\n++\n+GG/GGHHHHHHHHHHHHHHHHHHHDHHHHHFDHHHHHH@HHEHHHHHHHH\n+@ERR030881.41234 HWI-BRUNOP16X_0001:2:1:14816:8065#0/2\n+GGCAGGTTGGGAGGGGGGACAGAGGGGAGACAGAGGCACGGAGAGAAAGG\n++\n+FFGHH55,5514441>><<BHHEHFF?9F4FFFBFHHHHHHHHGHHFF4H\n+@ERR030881.55301 HWI-BRUNOP16X_0001:2:1:7892:11256#0/2\n+CTTCGCAAATTTGTCCCAGGGATGGATCGCCTGTGCTGCCTTCGCCCGCC\n++\n+D@5AA4453451444GGGFDHH@GEA;DDD=:=+:D@DFDEDHHB#####\n+@ERR030881.57346 HWI-BRUNOP16X_0001:2:1:20039:11573#0/2\n+CCTGTCCAGAGTCTGAGGGGGGAGGCCAGGCCCTGCCTTGGGGTCTGAGG\n++\n+##################################################\n+@ERR030881.57608 HWI-BRUNOP16X_0001:2:1:16788:11614#0/2\n+GGGGGGCGCCGCAGCTGCGCGGCCGCTCCCTCCTAGCCGGCCCTTGAGGG\n++\n+HHHHHHHEGHIHHHHHHDHF@@<A?FFE@FGGGAG4====HHHHHHHEHB\n+@ERR030881.58998 HWI-BRUNOP16X_0001:2:1:14252:11816#0/2\n+CTGAATCCCTTGCCCAGAGGA"..b':6601:197274#0/2\n+CCGCTCCCGAGCTGAACTCTATCATCCGACAGCAGCTCCAAGCCCACCAG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGH\n+@ERR030881.74446016 HWI-BRUNOP16X_0001:2:68:6384:197508#0/2\n+GGCCCTGCCCTTGACCCCACTACCCGTGGGGCTGCAGCCGCCTTCGCTGC\n++\n+HHHHHHHHHHHHHHHHHHIHHGHHH>A??@FHHHFHHFHDHH=HHB>4FF\n+@ERR030881.74446277 HWI-BRUNOP16X_0001:2:68:20062:197534#0/2\n+CTTTATTTGGGAAGGGGAGGGGGAATCCTGGGTCGCCCACCCTCACCCTG\n++\n+HHHHHHHGGHHHHHHHHFHGGGGGHHHHHHHGHHHHFHHEH9BHEDD###\n+@ERR030881.74446743 HWI-BRUNOP16X_0001:2:68:3752:197585#0/2\n+CGGCCGGCTGCATCCCACACCAGCCTGAGCCCCAGACGGTCAGTCAGTGC\n++\n+HHHHHHHHHHHHIHHHHHHHHHHHHHHHHHHHHHHHHHHEHHHHHHHHHH\n+@ERR030881.74446915 HWI-BRUNOP16X_0001:2:68:8353:197599#0/2\n+CGAGGGGTCCAGAGTGGAGAGAGCCCCGAGCAGGAGTGCATCTCCCTCGC\n++\n+HHHHHHHFHHHHHHHFHHHHHHHHHHHHHHHGHHHHHHHHHHIHFHHHGH\n+@ERR030881.74447547 HWI-BRUNOP16X_0001:2:68:9591:197654#0/2\n+GGCTGCAGATTCCATTCAGCAGGCCCGAGAGCAAGCACCACGCTAGCCTG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHG\n+@ERR030881.74449534 HWI-BRUNOP16X_0001:2:68:1488:197840#0/2\n+CAAGACTGCAACTTCAGATGCTCCGCACGCTGGAGATGCTGGACAGGGGC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFFHHHHHHHEHEHE\n+@ERR030881.74453424 HWI-BRUNOP16X_0001:2:68:5325:198191#0/2\n+CTTCCTTGGAGAGGTGGGCCTGGGAACCCAGCGCGGACAGCGAGAGGAGG\n++\n+HHGHHHHHHHHHHHHHHHHHHGHHGHHHHHHHHHHHHHHHHHHHEHHHHG\n+@ERR030881.74454854 HWI-BRUNOP16X_0001:2:68:18716:198301#0/2\n+GGAATGTTTAGCACAAGACACAGCGGAGCTCGGGATTGGCTAAACTCCCA\n++\n+HF@GHD?>DA=<>;=444444245444445>>@>;BECBF@?A<>@AAA8\n+@ERR030881.74455894 HWI-BRUNOP16X_0001:2:68:18831:198398#0/2\n+GGACTGAGGACGACTCCTTGGACTGGAAAATGCTGGCCCCGTACGGCGTC\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHDHHHHHHIH\n+@ERR030881.74457151 HWI-BRUNOP16X_0001:2:68:9093:198528#0/2\n+GGAACCTTCTCCGGATTGGGTTCATGAGCATTTTTGTGGGTGTGTATGTG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGGHHHHHHHHHHFH\n+@ERR030881.74458067 HWI-BRUNOP16X_0001:2:68:15716:198600#0/2\n+GGCCGTCTTTGACCTGCTCCTGGCTGTTGGCATTGCTGCCTACCCTGGCA\n++\n+55555<@@@@===<655244A??DAC:C?#####################\n+@ERR030881.74460390 HWI-BRUNOP16X_0001:2:68:15056:198815#0/2\n+GGTGAGGCCAGCACCTTGTCCATTTGGGACCTGGCGGCGCCCACCCCCCG\n++\n+5-5449=;==BFFBFDBFDDC>?>>D?DDDHHHHHBFFC@44244<<<<<\n+@ERR030881.74460430 HWI-BRUNOP16X_0001:2:68:19789:198814#0/2\n+ATGATGTTTCCACAAAGCAGGCATTCGGGCTCCTCGCACCTACCCCAGCA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74460883 HWI-BRUNOP16X_0001:2:68:19795:198864#0/2\n+TGCTGCGGGTGTCTCCGGCTGGGCATGCGGGGGCCCGGGGACTGCCTGGC\n++\n+HHHHHHHHHHHHGHHHHFDEBDDBB5552*DDBBFHHHHH@FDF######\n+@ERR030881.74463349 HWI-BRUNOP16X_0001:2:68:7211:199081#0/2\n+CTGGTCTCCCATCTGACCCTCCAGGCCTTAGCTTGCCTCACATGTCAGGG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFH\n+@ERR030881.74463429 HWI-BRUNOP16X_0001:2:68:16435:199090#0/2\n+GGACCTGGGCACAAATCCCGTTCAGCCTTTTGACGATCTCAGCCTGTTTG\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH\n+@ERR030881.74466171 HWI-BRUNOP16X_0001:2:68:1844:199339#0/2\n+GGTGGGGGTCGTGGAGTGGGGGAGGGAGGCCAGCCGGGCTCCAGAGGGGT\n++\n+HHHHHHGGGHGBGEFHHHFHHG9GGC;HHEHHHCHFG@FFAA;=9DD;C7\n+@ERR030881.74466232 HWI-BRUNOP16X_0001:2:68:10444:199339#0/2\n+CCGTTTTGAACATGTGTAACCGACAGTCTGCCTGGGCCACAGCCCTCTCA\n++\n+HHHHHHHHHHHHHHHHHHHHHHHHIHHHHHGHDHGHHHBHCFFFFHHHHH\n+@ERR030881.74466444 HWI-BRUNOP16X_0001:2:68:18815:199349#0/2\n+GGAAGGGCCGGGGCTGCTGCGCTTCGCGAGGTCTTGCTCCCTTGGGACCT\n++\n+HHHHIHIHHHHHGHHHHHHHFHDHFGHHHHHEHEHHHHHHHHHHHHGHHH\n+@ERR030881.74468879 HWI-BRUNOP16X_0001:2:68:9428:199583#0/2\n+GGAGGCTGAAGTGCTGGACAGCCACGTAGGCCATGCCGAGGTAGGCAGCA\n++\n+HFHHHHHHHHHHIHGHHHHHHHHHHHHHHHHHEHHHHGGHH?FHHHHHGH\n+@ERR030881.74470889 HWI-BRUNOP16X_0001:2:68:4971:199775#0/2\n+GACATATTTGAGAGACACTGGGGAGACAGAATCGACCTGACCTTGCTGAC\n++\n+HHHHHHHHHHHHHHHHHHHHHHH@HHHEHHHFHHHHHGHAHFBEHHGFBG\n+@ERR030881.74471439 HWI-BRUNOP16X_0001:2:68:16981:199816#0/2\n+GTGACACTGCATTGCTGCTGCCAGCACCCCTTGTTAGGGTTTGTAATTGC\n++\n+F8HHHFGGG8DC>A>ADD1?##############################\n+@ERR030881.74471978 HWI-BRUNOP16X_0001:2:68:9605:199866#0/2\n+CTTGTCTTCCTTGGAGAGGTGGGCCTGGGAACCCAGCGCGGACAGCGAGA\n++\n+HHHHHHIHHHHHDHHGHHHGHHHHHHHHHHHHFGHHHHFHHDHHHCHHHH\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/test-data/output1.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/test-data/output1.txt Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,9901 @@\n+na\t0.0010\tchr10\t100260965,100261028\t100261028,100261044\t100261978,100262063\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100273277,100275490\t100275490,100275493\t100280123,100280228\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100286096,100286213\t100286213,100286332\t100286604,100286712\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100329210,100329324\t100329324,100329346\t100329868,100330486\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100523728,100523929\t100523929,100524139\t100526398,100526554\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100827008,100827095\t100827542,100827561\t100827561,100829941\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t100981167,100981229\t100983304,100984489\t100984489,100985616\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101062541,101063103\t101064205,101064260\t101064260,101064421\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101134174,101134376\t101136667,101136690\t101136690,101137789\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101600737,101600862\t101601169,101601202\t101601202,101601336\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101605139,101605212\t101608834,101608866\t101608866,101608937\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101601202,101601336\t101608834,101608870\t101608870,101608937\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775129,101775216\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775739,101775776\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101828626,101828696\t101828696,101828717\t101829074,101829199\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t101829843,101829897\t101829897,101829942\t101831071,101831167\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102137849,102138038\t102138618,102138719\t102138719,102138765\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102148371,102148521\t102148627,102148633\t102148633,102148694\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102157018,102157074\t102157185,102157188\t102157188,102157328\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102358505,102358729\t102359266,102359269\t102359269,102359435\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102402047,102402159\t102402251,102402254\t102402254,102402529\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102416384,102416631\t102416631,102417121\t102418700,102418759\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t102656105,102656137\t102656244,102656269\t102656269,102656385\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t103451082,103451182\t103451182,103451285\t103452155,103452405\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t106573662,106577285\t106577285,106577555\t106579368,106579474\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t106573662,106577285\t106577285,106577555\t106579090,106579251\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t110875965,110876070\t110881232,110881274\t110881274,110881535\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t112182793,112182887\t112182887,112182891\t112183692,112183779\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t112443514,112443594\t112443594,112443606\t112445169,112445650\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t112446987,112447467\t112448377,112448381\t112448381,112448489\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t113146010,113146097\t113150982,113150997\t113150997,113151123\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t11320856,11320898\t11321188,11321206\t11321206,11321386\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t11314138,11314258\t11321188,11321206\t11321206,11321386\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t113588823,113589079\t113589079,113589082\t113589665,113589797\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t113679668,113679978\t113691963,113691996\t113691996,113692070\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t113697493,113697603\t113721000,113721031\t113721031,113721168\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t114129890,114129971\t114129971,114130669\t114131146,114131336\t-\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.0010\tchr10\t114447879,114448'..b'1387,94571429\t94573959,94574187\t94574553,94574736\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94573959,94574187\t94574553,94574736\t94576661,94576709\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94573959,94574187\t94574553,94574736\t94576661,94576805\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94581325,94581522\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576709\t94577081,94577096\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94581325,94581522\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94574553,94574736\t94576661,94576805\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94576661,94576709\t94581325,94581522\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94577081,94577096\t94581325,94581522\t94582962,94583059\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94581325,94581522\t94582962,94583059\t94588228,94588390\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94581325,94581522\t94582962,94583059\t94583987,94584125\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94582962,94583059\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94583987,94584125\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94576661,94576805\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94576661,94576709\t94588228,94588390\t94590412,94590552\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94588228,94588390\t94590412,94590552\t94590637,94590776\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94590412,94590552\t94590637,94590776\t94592228,94592312\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94590637,94590776\t94592228,94592312\t94592394,94592514\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94592228,94592312\t94592394,94592514\t94593498,94593615\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94592394,94592514\t94593498,94593615\t94594694,94594854\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94593498,94593615\t94594694,94594854\t94596859,94596956\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94594694,94594854\t94596859,94596956\t94597034,94597111\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94596859,94596956\t94597034,94597111\t94601527,94602099\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94683493,94683987\t94687769,94687932\t94688124,94688274\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94687769,94687932\t94688124,94688274\t94694916,94695077\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94706783,94706960\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94720395,94720537\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94706783,94706960\t94720395,94720537\t94724345,94724533\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94694916,94695077\t94720395,94720537\t94724345,94724533\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94720395,94720537\t94724345,94724533\t94733296,94733438\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94724345,94724533\t94733296,94733438\t94735262,94736191\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94762680,94762873\t94775057,94775220\t94775389,94775539\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94775057,94775220\t94775389,94775539\t94780498,94780659\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94775389,94775539\t94780498,94780659\t94781820,94781997\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94780498,94780659\t94781820,94781997\t94820495,94820637\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94781820,94781997\t94820495,94820637\t94842836,94843024\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+na\t0.9990\tchr10\t94820495,94820637\t94842836,94843024\t94849916,94850058\t+\t0.0000\t0.0000\t0.0000\t0.0000\n+\t0.9990\tchr10\t94842836,94843024\t94849916,94850058\t94852732,94853206\t+\t0.0000\t\t\t\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 splicetrap/test-data/output2.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/splicetrap/test-data/output2.txt Thu Oct 12 16:26:36 2017 -0400 |
b |
b'@@ -0,0 +1,9901 @@\n+AA-AA-10-100261028-100261044.0\t0.0010\t0.0010\tchr10\t100260965,100261028\t100261028,100261044\t100261978,100262063\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t63\t16\t85\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100275490-100275493.0\t0.0010\t0.0010\tchr10\t100273277,100275490\t100275490,100275493\t100280123,100280228\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t2213\t3\t105\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100286213-100286332.0\t0.0010\t0.0010\tchr10\t100286096,100286213\t100286213,100286332\t100286604,100286712\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t117\t119\t108\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100329324-100329346.0\t0.0010\t0.0010\tchr10\t100329210,100329324\t100329324,100329346\t100329868,100330486\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t114\t22\t618\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100523929-100524139.0\t0.0010\t0.0010\tchr10\t100523728,100523929\t100523929,100524139\t100526398,100526554\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t201\t210\t156\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100827542-100827561.0\t0.0010\t0.0010\tchr10\t100827008,100827095\t100827542,100827561\t100827561,100829941\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t87\t19\t2380\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-100983304-100984489.0\t0.0010\t0.0010\tchr10\t100981167,100981229\t100983304,100984489\t100984489,100985616\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t62\t1185\t1127\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101064205-101064260.0\t0.0010\t0.0010\tchr10\t101062541,101063103\t101064205,101064260\t101064260,101064421\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t562\t55\t161\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101136667-101136690.0\t0.0010\t0.0010\tchr10\t101134174,101134376\t101136667,101136690\t101136690,101137789\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t202\t23\t1099\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101601169-101601202.0\t0.0010\t0.0010\tchr10\t101600737,101600862\t101601169,101601202\t101601202,101601336\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t125\t33\t134\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101608834-101608866.0\t0.0010\t0.0010\tchr10\t101605139,101605212\t101608834,101608866\t101608866,101608937\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t73\t32\t71\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101608834-101608870.0\t0.0010\t0.0010\tchr10\t101601202,101601336\t101608834,101608870\t101608870,101608937\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t134\t36\t67\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101774879-101774912.0\t0.0010\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775129,101775216\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t148\t33\t87\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101774879-101774912.1\t0.0010\t0.0010\tchr10\t101774731,101774879\t101774879,101774912\t101775739,101775776\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t148\t33\t37\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101828696-101828717.0\t0.0010\t0.0010\tchr10\t101828626,101828696\t101828696,101828717\t101829074,101829199\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t70\t21\t125\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-101829897-101829942.0\t0.0010\t0.0010\tchr10\t101829843,101829897\t101829897,101829942\t101831071,101831167\t-\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t54\t45\t96\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102138618-102138719.0\t0.0010\t0.0010\tchr10\t102137849,102138038\t102138618,102138719\t102138719,102138765\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t189\t101\t46\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102148627-102148633.0\t0.0010\t0.0010\tchr10\t102148371,102148521\t102148627,102148633\t102148633,102148694\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t6\t61\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102157185-102157188.0\t0.0010\t0.0010\tchr10\t102157018,102157074\t102157185,102157188\t102157188,102157328\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t56\t3\t140\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102359266-102359269.0\t0.0010\t0.0010\tchr10\t102358505,102358729\t102359266,102359269\t102359269,102359435\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t224\t3\t166\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102402251-102402254.0\t0.0010\t0.0010\tchr10\t102402047,102402159\t102402251,102402254\t102402254,102402529\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t112\t3\t275\texon1=no\texon2=no\texon3=no\tna\n+AA-AA-10-102416631-10'..b'40\t139\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94590637-94590776.0\t0.0010\t0.9990\tchr10\t94590412,94590552\t94590637,94590776\t94592228,94592312\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t140\t139\t84\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94592228-94592312.0\t0.0010\t0.9990\tchr10\t94590637,94590776\t94592228,94592312\t94592394,94592514\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t139\t84\t120\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94592394-94592514.0\t0.0010\t0.9990\tchr10\t94592228,94592312\t94592394,94592514\t94593498,94593615\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t84\t120\t117\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94593498-94593615.0\t0.0010\t0.9990\tchr10\t94592394,94592514\t94593498,94593615\t94594694,94594854\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t120\t117\t160\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94594694-94594854.0\t0.0010\t0.9990\tchr10\t94593498,94593615\t94594694,94594854\t94596859,94596956\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t117\t160\t97\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94596859-94596956.0\t0.0010\t0.9990\tchr10\t94594694,94594854\t94596859,94596956\t94597034,94597111\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t160\t97\t77\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94597034-94597111.0\t0.0010\t0.9990\tchr10\t94596859,94596956\t94597034,94597111\t94601527,94602099\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t97\t77\t572\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94687769-94687932.0\t0.0010\t0.9990\tchr10\t94683493,94683987\t94687769,94687932\t94688124,94688274\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t494\t163\t150\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94688124-94688274.0\t0.0010\t0.9990\tchr10\t94687769,94687932\t94688124,94688274\t94694916,94695077\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t163\t150\t161\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94694916-94695077.0\t0.0010\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94706783,94706960\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t177\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94694916-94695077.1\t0.0010\t0.9990\tchr10\t94688124,94688274\t94694916,94695077\t94720395,94720537\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t142\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94720395-94720537.0\t0.0010\t0.9990\tchr10\t94706783,94706960\t94720395,94720537\t94724345,94724533\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t177\t142\t188\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94720395-94720537.1\t0.0010\t0.9990\tchr10\t94694916,94695077\t94720395,94720537\t94724345,94724533\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t161\t142\t188\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94724345-94724533.0\t0.0010\t0.9990\tchr10\t94720395,94720537\t94724345,94724533\t94733296,94733438\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t142\t188\t142\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94733296-94733438.0\t0.0010\t0.9990\tchr10\t94724345,94724533\t94733296,94733438\t94735262,94736191\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t188\t142\t929\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94775057-94775220.0\t0.0010\t0.9990\tchr10\t94762680,94762873\t94775057,94775220\t94775389,94775539\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t193\t163\t150\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94775389-94775539.0\t0.0010\t0.9990\tchr10\t94775057,94775220\t94775389,94775539\t94780498,94780659\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t163\t150\t161\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94780498-94780659.0\t0.0010\t0.9990\tchr10\t94775389,94775539\t94780498,94780659\t94781820,94781997\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t150\t161\t177\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94781820-94781997.0\t0.0010\t0.9990\tchr10\t94780498,94780659\t94781820,94781997\t94820495,94820637\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t161\t177\t142\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94820495-94820637.0\t0.0010\t0.9990\tchr10\t94781820,94781997\t94820495,94820637\t94842836,94843024\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t177\t142\t188\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94842836-94843024.0\t0.0010\t0.9990\tchr10\t94820495,94820637\t94842836,94843024\t94849916,94850058\t+\t0\t0\t0\t0.0000\t0.0000\t0.0000\t0.0000\t142\t188\t142\texon1=no\texon2=no\texon3=no\tna\n+CA-CS-10-94849916-94850058.0\t0.0010\t0.9990\tchr10\t94842836,94843024\t94849916,94850058\t94852732,94853206\t+\t0\t0\t0\t0.0000\t\n\\ No newline at end of file\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/AHCYL1-RAD51C_1.fq --- a/tests/mcf7_pe_35bp/AHCYL1-RAD51C_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,52 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:7:1277:1620/1 -GTTTATTTATACTTTAAGGTAACAAGTCCACTTGT -+PATHBIO-SOLEXA2_30LEJAAXX:7:7:1277:1620/1 -aaaaaaXaaaabaZaaa\aaa[aa__Zaaaaaa\a -@PATHBIO-SOLEXA2_30LEJAAXX:7:89:1051:1091/1 -ACCGGGGTCGGGGAGGAGCTGAAGCAGGCCAAGGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:89:1051:1091/1 -bbbaaaaWababbbbaaaabbaaaaaaa_`_aaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:24:940:1753/1 -CTGGGTTATAATTCTTCCTCTGGGTCTCGTGACCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:24:940:1753/1 -bbbbabbbbbabbbbbbbbb`VQVaaaaX`U_]_] -@PATHBIO-SOLEXA2_30LEJAAXX:7:19:340:1535/1 -GTTTGCAATGAACATGCAGAAGTAACAACAGTATC -+PATHBIO-SOLEXA2_30LEJAAXX:7:19:340:1535/1 -bbbaaaaaaaaaaaaaaaaaaaaaaaaaa^^_``a -@PATHBIO-SOLEXA2_30LEJAAXX:7:8:639:991/1 -CTGGGTTATAATTCTTCCTCTGGGTCTCGTGACCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:8:639:991/1 -babaaZabaaaabbbaaa`S_a\aaaa`[`aa\aa -@PATHBIO-SOLEXA2_30LEJAAXX:7:56:1468:1938/1 -CTGGGTTATAATTCTTCCTCTGGGTCTCGTGACCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:56:1468:1938/1 -bbWa\a]S]aaaaaaW\[^aaXKHZUXZEKMZ_U] -@PATHBIO-SOLEXA2_30LEJAAXX:7:65:500:1793/1 -CGCTGGCCGGGCCGGCCGGGGAATGTCGATGCCTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:65:500:1793/1 -bbbabbbaaaaaaaaaaaaa^\^^a`_^_^[[[^U -@PATHBIO-SOLEXA2_30LEJAAXX:7:87:688:1556/1 -GCCGGGAGCAGCCAAAACCTGCTAAGTCTCAGAAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:87:688:1556/1 -aaaOaaaWaaaaaa^aaaaaaa\\Zaabaaab\aa -@PATHBIO-SOLEXA2_30LEJAAXX:7:46:1482:684/1 -TACACTTTGAGATTTGTTTCTGGGTTATAATTCTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:46:1482:684/1 -aaaabaaaaZ^aaabaaabaaXSSaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:89:237:1128/1 -CGGGGTCGGGGAGGAGCTGAAGCAGGCCAAGGAGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:89:237:1128/1 -baaWaaaaaaaaaaaaaaa^aabaaaaaaaaaaa\ -@PATHBIO-SOLEXA2_30LEJAAXX:7:32:573:692/1 -GGAATGTCGATGCCTGACGCGATGCCGCTGCCCGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:32:573:692/1 -bbaaaaaabbbbaaaaaaaaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:25:1383:1407/1 -GGCGGGGAATGTCGATGCCTGACGCGATGCCGCTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:25:1383:1407/1 -babaaaaaabaaaaaaaaaaa_`_aa\a[Q^aMEX -@PATHBIO-SOLEXA2_30LEJAAXX:7:90:1243:1836/1 -TTCTTCCTCTGGGTCTCGTGACCGTTTCCGGGTGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:90:1243:1836/1 -aaaababbbaaaaaaaa^aaaaaaaaaaaWQQ[SQ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/AHCYL1-RAD51C_2.fq --- a/tests/mcf7_pe_35bp/AHCYL1-RAD51C_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,52 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:7:1277:1620/2 -GTACTCCTTCATGGCCACCGTCACCAAGGCGCCCA -+PATHBIO-SOLEXA2_30LEJAAXX:7:7:1277:1620/2 -abaaaabaL^bbaaaaaaa^\aaaa^Q^[aWa\aU -@PATHBIO-SOLEXA2_30LEJAAXX:7:89:1051:1091/2 -GGGTGCTCAAGGAACCTTATGTTTGCAATGAACAT -+PATHBIO-SOLEXA2_30LEJAAXX:7:89:1051:1091/2 -babbbbbbaaa^aaaaa`]`aaaaaaaaa\a_]_X -@PATHBIO-SOLEXA2_30LEJAAXX:7:24:940:1753/2 -GTCGATGCCTGACGCGATGCCGCTGCCCGGGGTCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:24:940:1753/2 -abbbbaaaaaaaaaaaaaaaaaaa_]]aaaa_XZX -@PATHBIO-SOLEXA2_30LEJAAXX:7:19:340:1535/2 -CGGGGTCGGGGAGGAGCTGAAGCAGGCCAAGGAGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:19:340:1535/2 -bbbabaaaaaaaaaaabaaaaaaaaaa^^^_^[ZU -@PATHBIO-SOLEXA2_30LEJAAXX:7:8:639:991/2 -CGATGCCTGACGCGATGCCGCTGCCCGGGGTCGGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:8:639:991/2 -ab_J`bbbaaabbaaaaaaaaaaaaaabaaaaaaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:56:1468:1938/2 -GAATGTCGATGCCTGACGCGATGCCGCTGCCCGGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:56:1468:1938/2 -aa`X`aaaaaaabaZa^J^aaaaaa^\^UKOKXMK -@PATHBIO-SOLEXA2_30LEJAAXX:7:65:500:1793/2 -GTTCGGGTGCTCAAGGAACCTTCTGTTTGCAATGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:65:500:1793/2 -bbbaaaaaaaaaaaaaaaaaaaaaaaaaa_``__Z -@PATHBIO-SOLEXA2_30LEJAAXX:7:87:688:1556/2 -CTCGTGACCGTTTCCGGGTGCTCAAGGAACCTTCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:87:688:1556/2 -aaaaaaaabaabaaa\baaaaaa[XXaaaaaabaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:46:1482:684/2 -GGAGGGGAGGAGCTGAAGCAGGCCAAGGAGATCGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:46:1482:684/2 -baaaaaaabaaaa^aaaa\aaaZZaaabaaaUMXR -@PATHBIO-SOLEXA2_30LEJAAXX:7:89:237:1128/2 -GTACACTTTGAGATTTGTTTCTGGGTTATAATTCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:89:237:1128/2 -aaabaaaaaaaaaaaaaabaaa^M\aaZaabaaa[ -@PATHBIO-SOLEXA2_30LEJAAXX:7:32:573:692/2 -CTCTGGGTCTCGTGACCGTTTCCGGGTGCTCAAGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:32:573:692/2 -aaaabaaaaaaaaaaaaaaaaaaaaaaaaa[^\aZ -@PATHBIO-SOLEXA2_30LEJAAXX:7:25:1383:1407/2 -CCTCTGGGTCTCGTGACCGTTTCCGGGTGCTCAAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:25:1383:1407/2 -ababaaaaba^aaaba^aaaaa]SUZZ]S]^\VUX -@PATHBIO-SOLEXA2_30LEJAAXX:7:90:1243:1836/2 -GGGGAATGTCGATGCCTGACGCGATGCCGCTGCCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:90:1243:1836/2 -abaaaaaaaaaaaaaaaaa[]_aaaaaaaaaPXXR |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/ARFGEF2-SULF2_1.fq --- a/tests/mcf7_pe_35bp/ARFGEF2-SULF2_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,568 +0,0 @@\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:23:64/1\n-GTCCCGCCCTGCTCCATGATGCGCCGGGTCTTGTT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:23:64/1\n-aaaaaaaab\\\\a\\aaaaa^aaaaaaWabaaaaaa^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:53:140:335/1\n-GCAGTTCTCATTGTTGGTGTAGGTGTTGTGGTTGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:53:140:335/1\n-aa^aaabaaaaaaaaabaa[V^aZbaaXa^^a_M_\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:34:1433:1923/1\n-GGAGCGTGAGGGGCAGCACATGGGTGTGGTCACGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:34:1433:1923/1\n-bbababbaaaabaaaaaaaaaaaaaaaUa_][aaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:72:1296:386/1\n-CAGCACTCCCAGCTGCGCAGGGCCTGCCAGGTGGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:72:1296:386/1\n-babaaaaaaaabbaaaaW`_Xaaaaaaaaaa\\aL^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:44:1403:1658/1\n-GGGCCATGCAGGAGAGCCAGACCAAGAGCATGTTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:44:1403:1658/1\n-abbbaaaababaabaaaaaabbaaa\\aa^aaaa__\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:97:215:106/1\n-GCCTAGCTCGCCATCTCGCTCACGCCGCCCGCCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:97:215:106/1\n-aaaab^bbaaabbbbaaaaaaaaaa`Z_a_\\^aaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:49:1199:1607/1\n-CGGGCCCTGGTGAAGATCCTAGCCGACAAGGAGGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:49:1199:1607/1\n-aaaaabbaa[K[abaaaaaa^S^aaaaaaaaaaa`\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:95:1383:1396/1\n-GGGCCATGCAGGAGAGCCAGACCAAGACCATGTTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:95:1383:1396/1\n-bbabaaaaaaaaaa^aaaaaaaaaaaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:73:79:1456/1\n-AGGAGGGCGAGGAGCAGTTCTCATTGTTGGTGTAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:73:79:1456/1\n-aabaabaaaaaaaaaaaaaabaaaaaaaaaa^`[L\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:23:1773:1946/1\n-AGCCAGACCAAGAGCATGTTCGTGTCCCGGGCCCT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:23:1773:1946/1\n-aabaaaaababbbabbaaaaaaabaaabaaaabaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:49:241:582/1\n-CATTGTTGGTGTAGGTGTTGTGGTTGTGGACGTAT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:49:241:582/1\n-a\\aaaaaaaaaUQ^_S]aaUaM[MREKQMKPRREK\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:18:965:1429/1\n-CAAGAGCATGTTCGTGTCCCGGGCCCTGGAGAAGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:18:965:1429/1\n-babaabaababaaabaabaaa^aaaaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:91:404:1991/1\n-GCAGTTCTCATTGTTGGTGTAGGTGTTGTGGTTGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:91:404:1991/1\n-aZEUaaa^a^aaaa^^aZVVV^[aWaaaaZI^aGE\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:91:289:1577/1\n-GCCAGACCAAGAGCATGTTCGTGTCCCGGGCCCTG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:91:289:1577/1\n-bbaaaaaaaaaaaaaaaaa`__aaaa`_]]_aa^[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:34:1304:1646/1\n-GTGCCTGCCAGGAGGGCGAGGAGCAGTTCTCATTG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:34:1304:1646/1\n-aaaaaaaaaaaaaaaaaaaa`_`aaaaaaa^^[aa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:7:1465:1275/1\n-GCCCCAGCACTCCCAGCTGCGCAGGGCCTGCCAGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:7:1465:1275/1\n-abaaaZ\\aaaaabaaaa\\^aaaaaaa^aa\\aaaa_\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:86:1572:2023/1\n-GTACTTGCCGGTGAGGATGGAGGAGCGTGAGGGGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:86:1572:2023/1\n-aaaaaabaaaaaaaaaaa^aa[a^aaabaaaaa^O\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:16:679:1555/1\n-TTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCCGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:16:679:1555/1\n-aaaLaZabbaaaaZaaaaaLaWLaaa^[J^Zaa^^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:91:915:1083/1\n-TGCCGGTGAGGATGGAGGAGCGTGAGGGGCAGCAC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:91:915:1083/1\n-babbbbaabaa^aaaUaaa^XVS[Q\\aaZRHUKXU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:39:1584:573/1\n-CGCGGCCGGTGCCGGCCGGGACGCCGGGCCCGCAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:39:1584:573/1\n-baabbaaaaaaabaaaaaa``]aZZ^a`X]a\\VQ^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:70:378:1964/1\n-GCACTCCCAGCTGCGCAGGGCCTGCCAGGTGGCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:70:378:1964/1\n-baaaaab`W`aaaaa^Q^aaaaaaaXMXZOXZ_a_\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:59:129:1017/1\n-GTGTAGGTGTTGTGGTTGTGGACGTACTTGCCGGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:59:129:1017/1\n-baaababaaabbaaaa`X`U___ZXOUXXEMREMS\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:12:365:706/1\n-CCCAGCTGCGCAGGGCCTGCCAGGTGGCGCTCGGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:12:365:706/1\n-aabbaaaaaababaaaaWabbaaaSS`aaaaaWaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:83:888:907/1\n-GCCGGGTCTTGTTCATCACCTGCATGGAACCGAGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:83:888:907/1\n-aabbabbaaaa^aab`]_aaa\\aa`[Z\\aaaaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:14:649:1237/1\n-GGTGTAGGTGTTGTGGTTGTGGACGTACTTTCCGG\n-+PATHBIO-SOLEXA2_'..b'AAXX:7:44:170:547/1\n-CGCGGGGCCGTCAGCCCCCGCCGGGCCGGGGCCAT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:44:170:547/1\n-aabaaaabbaaa^aaUK[baaaWaaa^a^aaSMEX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:31:327:927/1\n-GTTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:31:327:927/1\n-babaaaaabbaaaaaaaa][_aaaaaa__`aaaa^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:11:670:1627/1\n-GTGGTTGTGGACGTACTTGCCGGTGAGGATGGGGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:11:670:1627/1\n-bbbbbbaaaaaa]\\\\^`^ZZ^XPMUEXZOX]UEXZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:81:159:597/1\n-TGCGTCCCGCCCTGCTCCATGATGCGCCGGGTCTT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:81:159:597/1\n-abaaaabaaaa[EKbaa`]`[_aXaa\\XEE_aaaV\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:70:1099:1499/1\n-AGCCAGACCAAGAGCATGTTCGTGTCCCGGGCCCT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:70:1099:1499/1\n-aabaaaaaaaaa\\aaaaaaaaaaaaaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:42:354:538/1\n-TGTGGTTGTGGACGTACTTGCCGGTGAGGATGGAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:42:354:538/1\n-aaaaabaa[Q[a\\Z^\\aaZUaaa\\aUUEUS]aV[[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:51:460:769/1\n-CTCATTGTTGGTGTAGGTGTTGTGGTTGTTGACGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:51:460:769/1\n-bbaaabaWaa\\aaaaaZaaa`SJMZaUEUJKEOGO\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:60:609:90/1\n-GTGAGGGGCAGCACATGGGTGTGGTCACGAAGGCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:60:609:90/1\n-aOaaaabaaaaaaa^ababaaaaaaaaaaaaZaaX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:9:542:343/1\n-CGTACTTGCCGGTGAGGATGGAGGAGCGTGAGGGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:9:542:343/1\n-aaaaaabbaaaaaaaa^aaaaaabbaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:26:1767:531/1\n-GCCGGGCCGGGGCCATGCAGGAGAGCCAGACCAAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:26:1767:531/1\n-bbbaaabaaaaababaa_U\\^[^aaa\\__aaa[XX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:20:1032:2010/1\n-GCGAGGAGCAGTTCTCATTGTTGGTGTAGGTGTTG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:20:1032:2010/1\n-bbaaaaaaaaaaaabaaaaaaaXX]aXRR[aa^_S\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:59:426:623/1\n-GCAGGAGAGCCAGACCAAGAGCATGTTCGTGTCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:59:426:623/1\n-baaaaabbbbaaabaaaaaaaaaaaaaaa`__aaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:43:525:1541/1\n-TGTTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:43:525:1541/1\n-aaabbaaaaabaaaaaaaaaaaaaaaaa`]`aaa\\\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:100:373:464/1\n-CTTGCCGGTGAGGATGGAGGAGCGTGAGGGGCAGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:100:373:464/1\n-aba^aaaaaaaaaaaaa[aaaaa___a[X^_[Z[a\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:56:1266:1813/1\n-GGGTGAGGAGCAGTTCTCATTGTTGGTGTAGGTGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:56:1266:1813/1\n-aaaaaaaaaabaUaaa_ZZa^VVaZQREZUKEXaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:84:752:1980/1\n-GGAGAAGATCCTAGCCGACAAGGAGGTGAAGCGGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:84:752:1980/1\n-ba\\aaaaabaZaaab_]Ua^Xaa\\a[JX[Xaaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:13:592:1334/1\n-CAGCACTTTGGGAGGCCGAGGCAGGCGGATCACAA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:13:592:1334/1\n-aaaaaaaZabaaaaaaJaabaaaaaaa^S[\\ZU\\_\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:95:638:809/1\n-GGCGTACTTGCCGGTGAGGATGGAGGAGCGTGAGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:95:638:809/1\n-a\\aaLaaaaaaaaaZ^aaabaaa]_ZW\\ZVV[W[[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:66:513:1615/1\n-CCGGGGCCCTGGAGAAGATCCTAGCCGACAAGGAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:66:513:1615/1\n-aaaaaaaa^aaaaaaaa^aaa\\aaZa^QZaaa_]`\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:302:1352/1\n-GAAGATCCTAGCCGACAAGGAGGTGAAGCGGCCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:302:1352/1\n-aaaaaaabbbabaaaaaZaaXaaaaa^aaaaaaa[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:45:943:526/1\n-GTGAGGGCCAGCACATGGGTGTGGTTACGAAGGCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:45:943:526/1\n-aLWZWa[EKaba^WaaaaWUSS_\\KMEZaVVJJZ[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:63:849:221/1\n-GTGGTTGTGGACGTACTTGCCGGTGAGGATGGAGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:63:849:221/1\n-bbbaaabaaaaaaaaaaaaa^Z^aZKRUR[UXERU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:21:471:627/1\n-CTCGTGCTGTGCCTGCCAGGAGGGCGAGGAGCAGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:21:471:627/1\n-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_Z[aa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:8:216:1709/1\n-TGTGGACGTACTTGCCGGTGAGGATGGAGGAGCGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:8:216:1709/1\n-baaaaaaaaaaa```aa_][EXUUXXUU[SU[_[S\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:33:848:276/1\n-GCCGGGCCGGGGCCATGCAGGAGAGCCAGACCAAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:33:848:276/1\n-aaaaaaWWaUa^aa\\a\\aaa]X`aZRR]XV[ZJRE\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/ARFGEF2-SULF2_2.fq --- a/tests/mcf7_pe_35bp/ARFGEF2-SULF2_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,568 +0,0 @@\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:23:64/2\n-GAAGATCCTAGCCGACAAGGAGGGGAAGCGGCCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:23:64/2\n-aaaaabbaaaaaa^aaaaaaZaaWL\\a^aaaaa\\X\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:53:140:335/2\n-GAAGATCCTAGCCGACAAGGAGGTGAAGCGGCCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:53:140:335/2\n-aaaaaaababaaaaa[H^aaUaaaUMXaaaaOKRX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:34:1433:1923/2\n-GCCAGACCAAGAGCATGTTCGTGTCCCGGGCCCTG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:34:1433:1923/2\n-aaaaaaaaaaaabbaabbbbaaUaaa^LaaZaa^X\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:72:1296:386/2\n-GTGCCTGCCAGGAGGGCGAGGAGCAGTTCTCATTG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:72:1296:386/2\n-aWaaaabaaaaaaaaXV^aaaaaaaaaaaa__`aU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:44:1403:1658/2\n-CAGCACATGGGTGTGGTCACGAAGGCGTTGATGAA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:44:1403:1658/2\n-abbaaaaa\\aaaaaaaba^[^^baaaaaaaUZZVU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:97:215:106/2\n-GCTGGGTCTTGTTCATCACCTGCATGGAACCGAGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:97:215:106/2\n-bbaaa^aabaa\\a_M`XRXaaaXP[aaUHPaa_XP\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:49:1199:1607/2\n-TGTGGACGTACTTGCCGGTGAGGATGGAGGAGCGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:49:1199:1607/2\n-baaaaaaaaaaaaaaaaaaaZa\\WaaaaaZV[\\^R\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:95:1383:1396/2\n-GGGGCAGCACATGGGTGTGGTCACGAAGGCGTTGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:95:1383:1396/2\n-aa\\abaaaaaaaaaaaaaaaa^aaaaaaaaaaaaK\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:73:79:1456/2\n-GGAGGTGAAGCGGCCCCAGCACTCCCCGCTGCGCA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:73:79:1456/2\n-ababaab^aaaaaabbaWaa\\aaaa`S_aa^aa`H\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:23:1773:1946/2\n-GGAGGAGCGTGAGGGGCAGCACATGGGTGTGGTCA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:23:1773:1946/2\n-aaaaaaaaaaaaaWaaaaaa^WWaaaaLaaWaL\\Z\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:49:241:582/2\n-GAGCCGACAAGGAGGTGAAGCGGCCCCAGCACTCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:49:241:582/2\n-HXabaaaaa]]_]aXE[aaUaaUKUa^U[XMX[UX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:18:965:1429/2\n-CTTGCCGGTGAGGATGGAGGAGCGTGAGGGGCAGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:18:965:1429/2\n-aaabaaaaaa\\aaaaaUXUaaaaaaaW_Z]aaaXE\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:91:404:1991/2\n-CGACAAGGAGGTGAAGCGGCCCCAGCACTCCCCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:91:404:1991/2\n-baZU_Uaaab]W[aa^^I\\aaaaaL[H[EUa_PE[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:91:289:1577/2\n-GTGAGGGGCAGCACATGGGTGTGGTCACGAAGGCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:91:289:1577/2\n-bbaaaaabaaaabaaaaaaaaaaaa^[[^UUX[_U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:34:1304:1646/2\n-GCTGCGCAGGGCCTCCCAGGTGGCGCTCGGTGGGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:34:1304:1646/2\n-bbbbbbaaaaaaaaaaaaa^[Zaaaaaaa^V\\a_P\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:7:1465:1275/2\n-GCGAGGAGCAGTTCTCATTGTTGGTGTAGGTGTTG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:7:1465:1275/2\n-aaaaabaaaaaaaW\\aaaa^aa\\aaa^[[]aO^[G\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:86:1572:2023/2\n-GTTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:86:1572:2023/2\n-aaaaababaaaaa^a_S]\\aaaaba\\[^[aaURXX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:16:679:1555/2\n-AAGGCGTTGATGAAGTGCGTCCCGACCTGCTCCAT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:16:679:1555/2\n-aaabbaaaaaaaaaaaaaaaaXEKEXaVQUEOUUE\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:91:915:1083/2\n-GTTCGTGTCCCGGGCCCTGGAGAAGATCCTAGCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:91:915:1083/2\n-bbabaaaabbaaaaaaaa\\aaaaaaa^\\^^ZOZUR\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:39:1584:573/2\n-CGTTGATGAAGTGCGTCCCGCCCTGCTCCATGATG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:39:1584:573/2\n-abaababaaabaaaabbaaaaaaaVQ\\a^Q[X[XX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:70:378:1964/2\n-GGCGAGGAGCAGTTCTCATTGTTGGTGTAGGTGTT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:70:378:1964/2\n-bba`[`_U_aaaaaab_Z^aaaaaa_]XUKX]Z_U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:59:129:1017/2\n-AGCCGACAAGGAGGTGAAGCGGCCCCAGCACTCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:59:129:1017/2\n-abaabbaaaaaaaaaaaaaaaaaaaZH[^____aX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:12:365:706/2\n-CTCTCGTGCTGTGCCTGCCAGGAGGGCGAGGAGCA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:12:365:706/2\n-bbaabaa^aa\\aaaabaaa^aXEUaa\\_S`[[^[E\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:83:888:907/2\n-GGAGAAGATCCTAGCCGACAAGGAGGTGAAGCGGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:83:888:907/2\n-abaaaaaaa[SS[__aROX`aUaZaXEPQQMRXXG\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:14:649:1237/2\n-AGAAGATCCTAGCCGACAAGGAGGGGAAGCCGCCC\n-+PATHBIO-SOLEXA2_'..b'AAXX:7:44:170:547/2\n-GGTCACGAAGGCGTTGATGAAGTGCGTCCCGCCCT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:44:170:547/2\n-aaaaaaaaaaaaaa^a\\aaaaWa[]]Zaaaaaa[M\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:31:327:927/2\n-TGAGGATGGAGGAGCGTGAGGGGCAGCACATGGGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:31:327:927/2\n-bbaaabbaaabbbaaabaaaaaa]]`aaaaaaaaK\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:11:670:1627/2\n-GCGGCCCCAGCACTCCCAGCTGCGCAGGGCCTGCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:11:670:1627/2\n-baaaaabaaaaaaaabaaaaaaaa^S^aaa\\V^aX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:81:159:597/2\n-CGCCGCCCGCCCCCGGGGCCGTCAGCCCCCCCCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:81:159:597/2\n-babaWaa_J_aaJa^aaaaZEEUXX[V\\aa^U[HK\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:70:1099:1499/2\n-GTGAGGGGCAGCACATGGGTGTGGTCACGAAGGCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:70:1099:1499/2\n-aaaaaaaaaabbbabaaaaaaaaaa[[Q[ZaaaUR\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:42:354:538/2\n-GTCCCGGGCCCTGGAGAAGATCCTAGCCGACAAGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:42:354:538/2\n-aaa^abba^baaaaaaZaaaaaaaaaaaaaaaXa[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:51:460:769/2\n-GGTGAAGCGGCCCCAGCACTCCCAGCTGCGCAGGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:51:460:769/2\n-aaWbaaaaa^aaaaO\\aLaaab\\LZXHUaaUEE[[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:60:609:90/2\n-CCAAGAGCATGTTCGTGTCCCGGGCCCTGGAGAAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:60:609:90/2\n-baaaabaWaaaa^aaaZaaba^aa^aaaaaaaaaU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:9:542:343/2\n-GCATGTTCGTGTCCCGGGCCCTGGAGAAGATCCTA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:9:542:343/2\n-aaaaabbb\\aZabaaaaaaaabZaaaaaaaaaa[X\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:26:1767:531/2\n-GGGGCAGCACATGGGTGTGGTCACGAAGGCGTTGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:26:1767:531/2\n-aaaaaabbaaaaabaaaaaaaaaa[V^aa\\aXa]K\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:20:1032:2010/2\n-GGTGAAGCGGCCCCAGCACTCCCAGCTGCGCAGGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:20:1032:2010/2\n-aaaaaaaaabaaaaaa[HSHaaXE[XSaZZUEOXR\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:59:426:623/2\n-GGAGCGTGAGGGGCAGCACATGGGTGTGGTCACGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:59:426:623/2\n-baaaabababbaaaaaaaaabaaaaa^aaaaaa^M\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:43:525:1541/2\n-CCCTGCTCAATGATGCGCCGGGTCTTGTTCATCAC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:43:525:1541/2\n-bbababbaaaaaaaaaaaaaaaaaaaXaaaaa`]X\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:100:373:464/2\n-CCAAGAGCATGTTCGTGTCCCGGGCCCTGGAGAAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:100:373:464/2\n-babaaaaaaabbaaaaaabaaa\\aaaa`W_aaaaZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:56:1266:1813/2\n-GTGAAGCGGCCCCAGCACTCCCAGCTGCGCAGGGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:56:1266:1813/2\n-aaa\\\\aaaaa]`[a^K^aaWaaRUOa\\aaaXMKZU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:84:752:1980/2\n-GCCGGGTCTTGTTCATCACCTGCATGGAACCGAGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:84:752:1980/2\n-aa\\\\UaaaaLaaa^V^aabaaa^[^XV^]`_[[[P\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:13:592:1334/2\n-GTCTTGCTCTGTCATCAGGATGGAGTGCAGTGGCT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:13:592:1334/2\n-abaaaaaaaaaaaaaaaaUEXaZa\\UK[`X`aaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:95:638:809/2\n-CCCGGGCCCTGGAGAAGAGCCTAGCCGACAAGGAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:95:638:809/2\n-abbbaaabaaaaabaabaLaaaaaaaaaaaa^aaX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:66:513:1615/2\n-GCGGACGTACTTGCCGGTGAGGATGGAGGAGCGTG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:66:513:1615/2\n-aL^aaaaaaaaaaLaaaZ\\W^aaa^baa]W[XaJR\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:302:1352/2\n-CAGGTGTTGTGGTTGTGGACGTACTTGCCGGTGGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:302:1352/2\n-a\\aa\\aabaa^aaaa_WX][Q_ZOR^JH[VVHHPM\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:45:943:526/2\n-GGCCGGGGCCATGCAGGAGAGCCAGACCAAGAGCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:45:943:526/2\n-aaLaWXaaaaa^aa^aaa^aaXEXaaaaa^a^aWE\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:63:849:221/2\n-TGAAGATCCTAGCCGACAAGGAGGTGAAGCGGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:63:849:221/2\n-bbaaaababaaaaaaaaaaaa``^^^aaaaaa`_X\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:21:471:627/2\n-CCCAGCTGCGCAGGGCCTGCCAGGTGGCGCTCGGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:21:471:627/2\n-bbbaaaaaabbaaaaaaaaa^V^aaaaaaaaaaaU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:8:216:1709/2\n-CGTGTCCCGGGCCCTGGAGAAGATCCTAGCCGACA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:8:216:1709/2\n-baaaabbbaaaabaaa^S^X]aa\\[^^V[^_[E[E\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:33:848:276/2\n-CGGTGTGGTCACGAAGGCGTTGATGTAGTGCGTCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:33:848:276/2\n-aaW^baaabaa^ZaaZa^QU^EEKGMUKEOEMOM[\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/BC035340-MCF2L_1.fq --- a/tests/mcf7_pe_35bp/BC035340-MCF2L_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:73:483:1179/1 -CCTGGATGTCGGCAGCACAGAGCGGGACGATGTCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:73:483:1179/1 -bbbbaaaaaaaaaaaaaaaaaaaaaaaaa\aa\aa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/BC035340-MCF2L_2.fq --- a/tests/mcf7_pe_35bp/BC035340-MCF2L_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:73:483:1179/2 -GCCTCATGTTTGACTGTTGGAGATTTATCCTGTGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:73:483:1179/2 -aaababaabaaabaaaabaaaaaabbaaaaa^aU[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/BCAS4-BCAS3_1.fq --- a/tests/mcf7_pe_35bp/BCAS4-BCAS3_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,964 +0,0 @@\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:57:1302:766/1\n-TGCTCGTGGACGCTGATCAGCCGGAGCCCATGCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:57:1302:766/1\n-aabbbaaa\\aaaabaaaaaaaaaaaaaaaaaa_UX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:32:41:790/1\n-CCACGGGCTCCCAGGCAGCCTCCGCCAGCCGGACC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:32:41:790/1\n-aaaWaaaabaab^aaaaaaaabaaaaaaa^H^aZa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:37:1377:1073/1\n-GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:37:1377:1073/1\n-ababbaaabaaaaabaZaaaaaaZXXXX^^\\a^[^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:57:946:2022/1\n-CCCGCACACCTCCAGCAGGGTCACGCTCCTGTCAA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:57:946:2022/1\n-bbbabbabbaababaabaaaabaaaaaaaaaaUXZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:64:290:441/1\n-CGGGCTCCCAGGCAGCCTCCGCCAGCCGGACCCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:64:290:441/1\n-baaabbbbbabbaaabbbbaaaaaaaaaaZaaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:76:1184:1673/1\n-ATCAGCCGGAGCCCATGCGCAGCGGGGCGCGCGAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:76:1184:1673/1\n-aaaaaaabaaUaaabababaaaaaaLa^baaZHSS\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:18:254:1175/1\n-CCCAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:18:254:1175/1\n-aaaa[H^aa\\abb`]`aaaaa\\\\_Q_aaUK[a^S^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:52:1132:327/1\n-CCCCGGAGGCCCTCCTCCGTGTGCTCCATGGAGGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:52:1132:327/1\n-babbbaaabaaaabbaaaaaXRUaaa_^__[^^aZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:13:221:1785/1\n-GCGTCGGCAAGTCGCTCCCGGAGGCCCTCCTCCGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:13:221:1785/1\n-abbaaaaaaaaWaaaaaaaZa^aaa_ZZ_a^aaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:33:309:789/1\n-AGGCAGCCTCCGCCAGCCGGACCCCGTCGCCCTCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:33:309:789/1\n-aaaaaaaaa^baaaaaaZW^aaaaaZaaaaaaLa]\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:1658:1977/1\n-CGCTGATCAGCCGGAGCCCATGCGCAGCGGGGCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:1658:1977/1\n-aabaaaaaaaa\\^aaaaaZUZaaa]]Xaa[H^aLa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:59:1326:1906/1\n-CGGGCGCAACCACGGGCTCCCAGGCAGCCTCCGCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:59:1326:1906/1\n-abbaaaaaaaaaaaaa^baa\\aaaaabaaaLaUaL\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:38:270:183/1\n-CCCCGTCGCCCTCCTGATGCTGCTCGTGGACGCTG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:38:270:183/1\n-abbababaaabbbbbaa^aaaaaaaaa^URXa\\XI\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:31:1692:1137/1\n-CGCTGATCAGCCGGAGCCCATGCGCAGCGGGGCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:31:1692:1137/1\n-baaaaabbaaaab^aaaaaaaaaaaaa[V^aa_[^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:95:1237:1905/1\n-CAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCCTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:95:1237:1905/1\n-aaaaabbaaaaaaaba^aaaaUaaaaaabababab\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:14:618:35/1\n-CGGGCTCCCAGGCAGCCTCCGCCAGCCGGACCCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:14:618:35/1\n-aaaaaaaaaa_]_aaaaaaaaaaaaaaa\\[]_a_X\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:30:837:629/1\n-CTCCGTGTGCTCCATGGAGGACATGTGCCGCAGCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:30:837:629/1\n-abbabaZaaaabbbaaaaaaaabaaaaaaaaaa`_\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:84:1185:804/1\n-CGGGCTCCCAGGCAGCCTCCGCCAGCCGGACCCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:84:1185:804/1\n-aaabaaaaaaaa_]Saaaaa\\aaaaaaaa[aaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:78:236:1529/1\n-GCCGGAGCCCATGCGCAGCGGGGCGCGCGAGGTCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:78:236:1529/1\n-baaaa^aaaaaaabaaaaaaaaaaaaWaaO\\MJ_W\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:18:276:1452/1\n-CCCGGAGGCCCTCCTCCGTGTGCTCCATGGAGGAC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:18:276:1452/1\n-babaa^abaabaaaaaaaaaaaaab_U]a__^aaZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:32:510:1332/1\n-CGCTCCCGGAGGCCCTCCTCCGTGTGCTCCATGGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:32:510:1332/1\n-aabWabaaa^aaa^aaaa\\aaa^QQaaaaaLa\\[I\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:2:963:1089/1\n-GCGTCGGCAAGTCGCTCCCGGAGGCCCTCCTCCGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:2:963:1089/1\n-bbbbbbbaaaaaaaaaaa`Z[Q^aaa[X[^Z[[OO\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:44:864:680/1\n-TCCTGATGCTGCTCGTGGACGCTGATCAGCCGGAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:44:864:680/1\n-aaabaabbbaaaa^Q[aaaaaaaaaa^XX^XEKU^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:27:1068:808/1\n-CGCACACCTCCAGAAGGGTCACGCTCCTGTCAAAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:27:1068:808/1\n-aaaaaaaaaabaaUZa^HJXJ[a\\^aaZaLaLaaV\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:92:694:1379/1\n-GCCAGCCGGACCCCGTCGCCCTCCTGATGCTGCTC\n-+PATHBIO-SOLEXA2_30LE'..b'1\n-GACCCCGTCGCCCTCCTGATGCTGCTCGTGGACGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:44:659:963/1\n-babaaaaaaaabbaabaaaaaaaaaaZZ\\aX^[Z[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:5:1524:224/1\n-ACCAGCCGGACCCCGTCGCCCTCCTGATGCTGCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:5:1524:224/1\n-aaaaaaWaWaaaU`ZZ`S\\Z^Z^`]QXEKMOEPEG\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:13:761:1512/1\n-CTCGTGGACGCTGATCAGCCGGAGCCCATGCGCAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:13:761:1512/1\n-bbababbbabbaaaa__`aa_^[_aaaaaaaa_^U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:69:1545:865/1\n-CGCTGATCAGCCGGAGCCCATGCGCAGCGGGGCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:69:1545:865/1\n-a^aaaa\\aaab_X_aaaa_S`aaWa^aaaaabaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:930:1174/1\n-CGCGCTCTTCCTGACCCCCGATCCTGGGGCCGAGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:930:1174/1\n-aaaaaWa^aaaaaaaaaaaaaWaa^VSSaaaaaba\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:83:718:889/1\n-CGCTCCATGGAGGACATGTGCCGCAGCCCGAAGCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:83:718:889/1\n-aabaaaaabaaaaaaaaaaaaaaaaaaaaaaa]_`\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:1417:658/1\n-CACCGGCTAGGTGACTCGGCCATGGCGTCGGCAAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:1417:658/1\n-a^aZaaabaWWLaabaX[[ZHPVQ[J[HXWaZXZU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:66:867:1539/1\n-CCTGATGCTGCTCGTGGACGCTGATCAGCCGGAGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:66:867:1539/1\n-bbaaaaaaaaaa_Z_aaa`_``]_^^^X^XOX^[^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:79:1578:1197/1\n-CCATGCGCAGCGGGGCGCGCGAGCTCGCGCTCTTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:79:1578:1197/1\n-babaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:33:1021:322/1\n-TTCCTCCGTGTGCTCCATGGAGGACATGTGCCGCA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:33:1021:322/1\n-aaaaaaaaWaaaaaabaaaaaWaaa^aaaaaaaZa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:61:1267:1990/1\n-CGTCGCCCTCCTGATGCTGCTCGTGGACGCTGATC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:61:1267:1990/1\n-aaabbaaaaaa`]`aaa`Z`a^V^\\^_UOU^VX[K\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:91:1242:1241/1\n-CGGCAAGTCGCTCCCGGAGGCCCTCCTCCGTGTGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:91:1242:1241/1\n-\\aaaaaaaa^U^aW`Z`aaaZaXEZ[[`]aaa_]_\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:62:1224:1029/1\n-CCCGTCGCCCTCCTGATGCTGCTCGTGGACGCTGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:62:1224:1029/1\n-aaaabaaabaaaaaaaaaaaZaaaa[HXUaaZ`KO\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:38:1108:325/1\n-GAGGACATGTGCCGCAGCCCGAAGCCCTCAGGCCA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:38:1108:325/1\n-abbbaabbbbabbaaaaaaaaaaaaa_X^SX^aa^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:49:970:1046/1\n-CCGGCTAGGTGACTCGGCCATGGCGTCGGCAAGTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:49:970:1046/1\n-bbabaaaaaaaaaaaaX]Za[X^ZZZZ^ZHOU_PK\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:82:1503/1\n-TCCCGGCTAGGTGACTCGGCCATGGCGTCGGCAAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:82:1503/1\n-bbbababa^aaaabaa`]_aaWaaaaaaa]]UGXZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:85:302:1496/1\n-GCCGCAGCCCGAAGCCCTCAGGCCTGCTCCCGCAC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:85:302:1496/1\n-abbaabababaaaababaaaaaabaaaaaaaaZU[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:26:441:786/1\n-CCGGCTAGGTGACTCGGCCATGGCGTCGGCAAGTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:26:441:786/1\n-aaaaabaaaWaa^aaaaaQI\\b^WaLaaaWaaaaW\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:8:434:166/1\n-ATCAGCCGGAGCCCATGCGCAGCGGGGCGCGCGAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:8:434:166/1\n-abbaaaaaaaaaaaa___a^\\__`___a_`__[UX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:37:940:1981/1\n-GCGCAGCGGGGCGCGCGAGCTCGCGCTCTTCCTGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:37:940:1981/1\n-aabaaaaaabaa`_[^__]XVX^Z\\REEEMOGERK\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1018:1410/1\n-GTCCCGGCTAGGTGACTCGGCCATGGCGTCGGCAA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:17:1018:1410/1\n-aababbabbbaaWabaaaaaa^QX]]Saa`]]aa^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:84:1297:1236/1\n-CGTCGCTCCCGGAGGCCCTCCTCCGTGTGCTCCAT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:84:1297:1236/1\n-aaaabaaaaaaabaaaaaaaaaa^VXa[J^aaZZ^\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:45:1219:280/1\n-TGATGCTGCTCGTGGACGCTGATCAGCCGGAGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:45:1219:280/1\n-baaaaaaaabaaaaXX[aaaaZaaaXEX[`__`a[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:36:1381:1758/1\n-GGAGGACATGTGCCGCAGCCCGAAGCCCTCAGGCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:36:1381:1758/1\n-aaaaaaaWaaaaWaaa\\aaa^aaaaaaaaaaaaWa\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:29:27:808/1\n-GCTGGCCAGTCAGAAGCAGGTTGCTGGTACCTACG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:29:27:808/1\n-bbbaabaaaaaaaaaaaaaaaaaaaa[aa_``aaa\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/BCAS4-BCAS3_2.fq --- a/tests/mcf7_pe_35bp/BCAS4-BCAS3_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,964 +0,0 @@\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:57:1302:766/2\n-CTCGGCCATGGCGTCGGCAAGTCGCTCCCGGAGGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:57:1302:766/2\n-bababbaaaaaaaaaaaaa__]_Z`aaa__[^[VO\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:32:41:790/2\n-GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:32:41:790/2\n-aaaaaaaaabbaaaaaaaaaaaaaaaaaa\\aaaaX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:37:1377:1073/2\n-CGCTGATCAGCCGGAGCCCATGCGCAGCGGGGCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:37:1377:1073/2\n-aaaaaaaaaaaaaaaaZaaa`SZaaaV[[Z_[H[H\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:57:946:2022/2\n-CGCAACCACGGGCTCCCAGGCAGCCTCCGCCAGCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:57:946:2022/2\n-bbabbbbbabaaaaaaaaaaaaa^^^aa__]aa\\R\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:64:290:441/2\n-GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:64:290:441/2\n-abaaaaaaabbbaaaaaaaaaaaaaaaaaaUaaa[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:76:1184:1673/2\n-CCACGACGTCCCGGCTAGGTGACTCGGCCACGGGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:76:1184:1673/2\n-aaabaaaaWa^aaaZLaaLZaaLUUKRHVZIVaLG\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:18:254:1175/2\n-CCGTGTGCTCCATGGAGGACATGTGCCGCAGCCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:18:254:1175/2\n-aaa\\aaaabaaXaaaa^Q[Q[aWaaaaaaaaaZRU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:52:1132:327/2\n-CGCCAGCCGGACCCCGTCGCCCTCCTGATGCTGCT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:52:1132:327/2\n-abbbbbbbaaaabbaaaaaaaaaaaa[\\^aa`]`Z\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:13:221:1785/2\n-GTCGCCCTCCTGATGCTGCTCGTGGACGCTGATCA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:13:221:1785/2\n-aabbaaa\\abb^aababaaaaaWa^QKSaOEKUaM\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:33:309:789/2\n-GTGCTCCATGGAGGACATGTGCCGCAGCCCGAAGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:33:309:789/2\n-aZ^a\\^aaa\\aaaWaaaLa\\aaaZUa\\aabaL^aZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:1658:1977/2\n-GGCTAGGTGACTCGGCCACGGCGTCGGCAAGTCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:1658:1977/2\n-aaaabaaaaaaabaZaaXEXa^aWW^aaaW^La_E\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:59:1326:1906/2\n-CGCACACTGCTCTACCCTTTACAAGGTGCTTTCAC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:59:1326:1906/2\n-Za^aaabaaaaaZa^Zaaaa^aaa[UWWaaaaLaE\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:38:270:183/2\n-CGCTCCCGGAGGCCCTCCTCCGTGTGCTCCCTGGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:38:270:183/2\n-babbaabababaaaaaabaaa\\\\\\a\\`[_RGRVXR\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:31:1692:1137/2\n-CCCCGCACACCTCCAGCAGGGTCACGCTCCTGTCA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:31:1692:1137/2\n-babbbaaabbaaaa\\aa`ZRXaaaaaaaaaaaaaZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:95:1237:1905/2\n-CTCCCGGAGGCCCTCCTCCGTGTGCTCCATGGAGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:95:1237:1905/2\n-baba^abZWaaaaa\\aOaaa`]Maaaa[L^aUEPU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:14:618:35/2\n-GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:14:618:35/2\n-aaaa`]`aaaaaaaaaaaaaaaaZaaaaaa^\\[_X\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:30:837:629/2\n-CAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCCTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:30:837:629/2\n-aabaabbbbaaaaaaaaaaaaaaaaaaaaaaa__[\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:84:1185:804/2\n-GCTCCATGGAGGACATGTGCCGCAGCCCGAAGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:84:1185:804/2\n-aaabaaaaaaaaaaaaaa^aaaaZZ\\aaa^aaaaR\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:78:236:1529/2\n-CGGCTAGGTGACTCGGCCATGGCGTCGGGAAGGCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:78:236:1529/2\n-bbaaaaa\\\\aaaaaababWaLaaaL^a]UIQVIIU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:18:276:1452/2\n-GGCAGCCTCCGCCAGCCGGACCCCGTCGCCCTCCT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:18:276:1452/2\n-aaaaabbaaaabaaaaaaaaaaabREX^^V^S_XE\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:32:510:1332/2\n-CAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:32:510:1332/2\n-^bbaaaaaaaaa^aaWaaa\\aaabaaLaaZaa\\LU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:2:963:1089/2\n-CTCGTGGACGCTGATCAGCCGGAGCCCATGCGCAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:2:963:1089/2\n-abaabaaaaaaaaaaa`]`aaaaaaa^[[aaa__U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:44:864:680/2\n-GGCGTCGGCAAGTCGCTCCCGGAGGCCCCCCTCCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:44:864:680/2\n-aWaaabaaa^aSHXQSaUaUEKGKXSWXQVZQQ^G\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:27:1068:808/2\n-GCCGGAGCCCATGCGCAGCGGGGCGCGCGTGCTCG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:27:1068:808/2\n-H[Waaa^ZaaaWaaZa\\ZZI\\aXHSOa\\EERHQIH\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:92:694:1379/2\n-GTGCTCCATGGAGGACATGTGCCGCAGCCCGAAGC\n-+PATHBIO-SOLEXA2_30LE'..b'2\n-TCCTCCGTGTGCTCCATGGAGGACATGTGCCGCAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:44:659:963/2\n-abababaaaaaaabaaaaaaaaaaaaaaaaaa_]U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:5:1524:224/2\n-ACTCCGTGTGCTCCATGGAGGACATGTGCCGCAGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:5:1524:224/2\n-aaaaaX]N_aaaa^Z^SH^a]ZQUKKXKR[VV[XX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:13:761:1512/2\n-CTCGGCCATGGCGTCGGCAAGTCGCTCCCGGGGGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:13:761:1512/2\n-aaaaaaaaaaaaaaaaa`[^XXUKMVVS[HPEUXK\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:69:1545:865/2\n-GCCATGGCGTCGGCAAGTCGCTCCCGGAGGCCCTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:69:1545:865/2\n-aaaaaaaaaa_X[S[a^aa`UGRKXZKRUaW[JHO\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:930:1174/2\n-GCCCGAAGCCCTCAGGCCAGCTCCCGCACACCTCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:930:1174/2\n-aaaaaabaa\\aaaa^^aa\\aa^aZaWaaaZaZLaZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:83:718:889/2\n-CCCAGGCAGCCTCCGCCAGCCGGACCCCGTCGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:83:718:889/2\n-bbaaaaabaaabbbabaaababaaaaaaaaabaaZ\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:1417:658/2\n-GTGGACGCTGATCAGCCGGAGCCCATGCGCAGCGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:1417:658/2\n-aZaaaOLaa\\aZaaZaaUa\\a\\aaa^aaaaaaLa_\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:66:867:1539/2\n-GGCCATGGCGTCGGCAAGTCGCTCCCGGAGGCCCT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:66:867:1539/2\n-abbbaaaabaaaaaaaaaaaaaaa_^^^[[^\\^^U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:79:1578:1197/2\n-CGGATTCCACGATGTCCCGGCTAGGTGACTCGGCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:79:1578:1197/2\n-baabbbabbaaaaaabaaa\\aaaa\\`S_aa^QV[Z\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:33:1021:322/2\n-ACGGGCTCCCAGGCAGCCTCCGCCAGCCGGAGCCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:33:1021:322/2\n-aaaabaL\\abaaaaaZaa\\a^Q\\a\\aZaWaZJSaX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:61:1267:1990/2\n-GTCGGCAAGTCGCTCCCGGAGGCCCTCCTCCGTGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:61:1267:1990/2\n-baaaaaaaaaaaaaaa`Z]_____a_____`][VR\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:91:1242:1241/2\n-CTCCTGATGCTGCTCGTGGACGCTGATCAGCCGGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:91:1242:1241/2\n-aaaa\\aaaaaaWaaaa_UMaaaaaLa__]_[^^`U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:62:1224:1029/2\n-CGCTCCCGGAGGCCCTCCTCCGTGTGCTCCATGGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:62:1224:1029/2\n-^aa^aabaa]X_aaaaaa^aaaaaaaa[VV_W_RK\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:38:1108:325/2\n-CCACGGGCTCCCAGGCAGCCTCCGCCAGCCGGACC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:38:1108:325/2\n-baaabaaaaaaaaabaaaaabaaaaZERaa\\aaQX\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:49:970:1046/2\n-CTCGTGGACGCTGATCAGCCGGAGCCCATGCGCAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:49:970:1046/2\n-bbaaaabaaaaaaaaaaaaaaaaaaaa[[[a_OZU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:6:82:1503/2\n-TGGACGCTGATCAGCCGGAGCCCATGCGCAGCGGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:6:82:1503/2\n-abaaaababaaaWaaaaaaaaaaZaaaa`]`aX[X\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:85:302:1496/2\n-CCGAGGCCCGGGCGCAACCACGGGCTCCCCGGCAG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:85:302:1496/2\n-abaaaaabbaaaaaaWaaaU]J_aaaaa`J]a[HO\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:26:441:786/2\n-CTGATCAGCCGGAGCCCATGCGCAGCGGGGCGCGC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:26:441:786/2\n-aaaaabaaZaaaaabaabaaabaaaaaaaaaa\\`E\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:8:434:166/2\n-ACGACGTCCCGGCTAGGTGACTCGGCCATGGCGTC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:8:434:166/2\n-bbabbbaaaaaabbaaa`]_aa[MX_aa[[[\\Z^U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:37:940:1981/2\n-CCCGCACACCTCCAGCAGGGTAACGCTCCTGTCAA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:37:940:1981/2\n-aaaaabbababaaaaaaaaaEZPGU_^[[a_][^U\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1018:1410/2\n-GTGGACGCTGATCAGCCGGAGCCCATGCGCAGCGG\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:17:1018:1410/2\n-aaaabaaaaabaaaaaaaaaaa^^QZ^WaZ]_`aU\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:84:1297:1236/2\n-CCCGTCGCCCTCCTGATGCTGCTCGTGGGCGCTGA\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:84:1297:1236/2\n-aaaWaa\\aaaZ\\^MVZaIIZ[V^HS[MOEPRaUGP\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:45:1219:280/2\n-CCCGGCTAGGTGACTCGGCCATGGCGTCGGCAAGT\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:45:1219:280/2\n-abaaaaaJa\\\\aaa`Q[^aa_V^[a[J`aaWaaXM\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:36:1381:1758/2\n-CACGGGCTCCCAGGCAGCCTCCGCCAGCCGGACCC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:36:1381:1758/2\n-aaaaaaZaaaaaWaZaaLbaaWaWWaaL][XaLaE\n-@PATHBIO-SOLEXA2_30LEJAAXX:7:29:27:808/2\n-GGCGCGCGAGCTCGCGCTCTTCCTGACCCCCGATC\n-+PATHBIO-SOLEXA2_30LEJAAXX:7:29:27:808/2\n-aababbaaaaaaaaaaZaaaaa]JUU_aaa^EEGU\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/CXorf15-SYAP1_1.fq --- a/tests/mcf7_pe_35bp/CXorf15-SYAP1_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,12 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:75:946:1909/1 -GGCGGACGGCGACGTAGCCCGCGGCAGAAGATAAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:75:946:1909/1 -abaabaaabaaaaaaaaaaaaabaVV_Z]aX]X][ -@PATHBIO-SOLEXA2_30LEJAAXX:7:45:1722:953/1 -GCTGTTTCAGCAACTGATTCAGTTATATTTTTTGT -+PATHBIO-SOLEXA2_30LEJAAXX:7:45:1722:953/1 -aaaa^^bbbbaaaaaaaaa\aaXaa`W`a^aa^H^ -@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1450:1241/1 -GGCGACTGAGGCAGGACGGGGCGGACGGCGAGGCA -+PATHBIO-SOLEXA2_30LEJAAXX:7:17:1450:1241/1 -a^\aaaaaa^a^K^WaaLWLaaaaVS[aaLEKU^Z |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/CXorf15-SYAP1_2.fq --- a/tests/mcf7_pe_35bp/CXorf15-SYAP1_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,12 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:75:946:1909/2 -CTTCATCGTTAGTGTCAACCCCTGGGGGCCCAGCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:75:946:1909/2 -abbbaabbbaa\aaaaaaaa^JMUH_aZKEXOKRE -@PATHBIO-SOLEXA2_30LEJAAXX:7:45:1722:953/2 -GAGGAGGCAGCGCGGGGAAGAGGCGGCGGGGGCGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:45:1722:953/2 -aaaabaabaabbbbabaaaabaaba_S]^K\ILQE -@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1450:1241/2 -CGATTTTTCCTTCTTCTACGGATTTCTTTATTTTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:17:1450:1241/2 -XEKQ[a\\L\aaaaaaaQVXHKa^`MXbXPURE[U |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_1.fq --- a/tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:59:769:345/1 -ATTTGAAGCAGCCTATGTACTTGGGATTTGAAAAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:59:769:345/1 -aabaaaa^aa^bbbabaaaabaa^\aaaa\aaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:81:220:1548/1 -GTCACTTGAAAGAGCCTCTACTTACATTTCATCTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:81:220:1548/1 -aaaaaabbaaaaaaabaaaaaa`U^U^aa^V^aaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_2.fq --- a/tests/mcf7_pe_35bp/DEPDC1B-ELOVL7_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,8 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:59:769:345/2 -CACAAATACTTCTGGTAGGCTGGCCCCAATGCAGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:59:769:345/2 -babbb^abababa^aaaaabaaaababaaaaaaaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:81:220:1548/2 -CTTATGTGGATGGCGACAATAACAAACTGGACAAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:81:220:1548/2 -abaaabaaaaaabaabaaa_]`a_]`aaaU[Z[VX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/EP300-MRFAP1_1.fq --- a/tests/mcf7_pe_35bp/EP300-MRFAP1_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,64 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1145:806/1 -GAGATTTCCTGAGGATTCTGGTTTTCCTCGCTTGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:17:1145:806/1 -bbbbbbaaaaaaaaaaaaaaaaaa`_]`[Q[`^KU -@PATHBIO-SOLEXA2_30LEJAAXX:7:22:1648:420/1 -GGCCGAGAATGTGGTGGAACCGGGGCCGCCTTCAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:22:1648:420/1 -aaaaaaaaaaaaaaaaZaaaa[S]`aaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:91:590:1587/1 -AAAAATGGCCGAGAATGTGGTGGAACCGGGGCCGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:91:590:1587/1 -baaaaaWaa^aaa^aaaaaaaaa\abaaaa`_Xaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:69:578:501/1 -GCATCTCTTCCCACTGCCTCGGAACCGCAATAGCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:69:578:501/1 -W^aaaaaXabZaaaaZaabaaaaa_]`a_W^V[aa -@PATHBIO-SOLEXA2_30LEJAAXX:7:16:987:1601/1 -GTGGAACCGGGGCCGCCTTCAGCCAAGCGGCCTAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:16:987:1601/1 -aaa^aababaaaaaaaabbaaabaaaaabaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:20:1374:264/1 -CCTGGGTGCGGCGCGGGGACCCCGGGCCGAAGAAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:20:1374:264/1 -Zaa^aaJaaaWaabaaaaaa\aaaaaZabaWaZaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:88:641:1803/1 -CGACGATGTCCAGGGGCCGCATCTCTTCCCACTGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:88:641:1803/1 -baaaaabaabaaaaaabaaaaaaaabbaa_X_a\a -@PATHBIO-SOLEXA2_30LEJAAXX:7:51:1479:654/1 -GTCCTCGCGCATCTCGTTGATGACCGGGAGCAGAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:51:1479:654/1 -aabbbaaababbbbaaaa\a^Q^a^RE[__\^^aZ -@PATHBIO-SOLEXA2_30LEJAAXX:7:63:347:1159/1 -CGGGAATGTGGTGGAACCGGGGCCGCCTTCAGCCA -+PATHBIO-SOLEXA2_30LEJAAXX:7:63:347:1159/1 -aNaaaaaaaaaaaaaaaaaaaaaabaaaaL\a_J] -@PATHBIO-SOLEXA2_30LEJAAXX:7:100:1324:832/1 -CTCTCATCTCCGGCCCTCTCGGCGTCCGCCAGCGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:100:1324:832/1 -aaab\^aaaaaaaaaa^baaaaaaaaXS_PEUKMa -@PATHBIO-SOLEXA2_30LEJAAXX:7:35:460:430/1 -AAAGAATTAAAAATGGCCGAGAATGTGGTGGAACC -+PATHBIO-SOLEXA2_30LEJAAXX:7:35:460:430/1 -aaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:68:216:472/1 -CGCGGGGACCCCGGGCCGAAGAAGAGATTTCCTGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:68:216:472/1 -aaaaaaa^^aba^aaaaaL\a^Z^aaaaaaabaab -@PATHBIO-SOLEXA2_30LEJAAXX:7:24:935:1284/1 -CGAGAATGTGGTGGAACCGGGGCCGCCTTCAGCCA -+PATHBIO-SOLEXA2_30LEJAAXX:7:24:935:1284/1 -baaabbbbbbbaaaaaa`Z_aaaa___aa[^^^UX -@PATHBIO-SOLEXA2_30LEJAAXX:7:24:420:2011/1 -GTTGATGACCGGGAGCAGAAACTGCTCGAAATCCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:24:420:2011/1 -aaaaaababbaaaaaaaaaaaaaaaaaa^aaaa[[ -@PATHBIO-SOLEXA2_30LEJAAXX:7:78:1306:986/1 -GCCTCGCGCATCTCGTTGATGACCGGGAGCAGAAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:78:1306:986/1 -abaabbbaaaaabaZabaaaaaaa^aXKX_`_Z`[ -@PATHBIO-SOLEXA2_30LEJAAXX:7:58:580:1277/1 -CGGGCTCCAGCACCTCCACTTCCTCCGGTTCCGCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:58:580:1277/1 -baaaaaaaaaaaaa^aaaXMPaWaaaaOQS[\VVX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/EP300-MRFAP1_2.fq --- a/tests/mcf7_pe_35bp/EP300-MRFAP1_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,64 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:17:1145:806/2 -GCCAGCTCGACGATGTCCAGGGGCCGCATCTCTTC -+PATHBIO-SOLEXA2_30LEJAAXX:7:17:1145:806/2 -abaaabbbaaaaaaaaaaa_]_aaa____aaaa^U -@PATHBIO-SOLEXA2_30LEJAAXX:7:22:1648:420/2 -CGGGAGCAGAAACTGCTCGAAATCCTCCTCGGGCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:22:1648:420/2 -babababbabaaaaaaaabaabbbaabaaaaZaaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:91:590:1587/2 -GGCCGCATCTCTTCCCACTGCCTCGGAACCGCAAT -+PATHBIO-SOLEXA2_30LEJAAXX:7:91:590:1587/2 -babaaa\abaa\aabaMaUaaaaaUMEX]]OaZ[G -@PATHBIO-SOLEXA2_30LEJAAXX:7:69:578:501/2 -CTGGGTGCGGCGCGGGGACCGCGGGCCGAAGAAGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:69:578:501/2 -abaaa^aaaaababaaZaaXH[aVLIULZa[UP[K -@PATHBIO-SOLEXA2_30LEJAAXX:7:16:987:1601/2 -CTCGGTGATGACCGGGAGCAGAAACTGCTCGAAAT -+PATHBIO-SOLEXA2_30LEJAAXX:7:16:987:1601/2 -aZaaaMaa\baabaaaZaaaabaaaaaLUaaa^VP -@PATHBIO-SOLEXA2_30LEJAAXX:7:20:1374:264/2 -CTTCCCACTGCCTCGGAACAGCAATAGCGATGTCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:20:1374:264/2 -aab^aaaaa\aaaaa\aaa\Uaaaaaaaa^WaMaE -@PATHBIO-SOLEXA2_30LEJAAXX:7:88:641:1803/2 -GCTGAGAATGTGGTGGAACCGGGGCCGCCTTCAGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:88:641:1803/2 -aaaaaaaaaaaabaaaaaaaaaaabaaaa\X]_aU -@PATHBIO-SOLEXA2_30LEJAAXX:7:51:1479:654/2 -GCCGCCTTCAGCCAAGCGGCCTAAACTCTCATCTC -+PATHBIO-SOLEXA2_30LEJAAXX:7:51:1479:654/2 -abaaaaaaaaaaaaaaaaaaa^V^aa`__aaaaaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:63:347:1159/2 -TCTCGTTGATGACCGGGAGCAGAAACAGCTCGAAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:63:347:1159/2 -aaaaaaa^aREXaaUaaaUQQVQUaMEOZZaPEUM -@PATHBIO-SOLEXA2_30LEJAAXX:7:100:1324:832/2 -TGCTCGCGCGTCAGCGACGCGATGTCCTCGCGCAT -+PATHBIO-SOLEXA2_30LEJAAXX:7:100:1324:832/2 -aLaaW^aZaZaaaZa^^aaaZ\\^aabOEZabZ]X -@PATHBIO-SOLEXA2_30LEJAAXX:7:35:460:430/2 -CTCGAAATCCTCCTCGGGCTCCAGCACCTCCACTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:35:460:430/2 -aaaaaabaaaZaaaaaaaaaaUEPaaaaaaaa]JK -@PATHBIO-SOLEXA2_30LEJAAXX:7:68:216:472/2 -CGACGATGTCCAGGGGCCGCATCTCTTCCCACTGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:68:216:472/2 -aaaaaba^abba^aaaaaa\aaaaaaWaaaLaa^Z -@PATHBIO-SOLEXA2_30LEJAAXX:7:24:935:1284/2 -CCGACGATGTCCAGGGGCCGCATCTCTTCCCACTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:24:935:1284/2 -bbbbbaaabaaaa`]`aaaaaaaaaaaa^\^X^^M -@PATHBIO-SOLEXA2_30LEJAAXX:7:24:420:2011/2 -GGAACCGGGGCCGCCTTCAGCCAAGCGGCCTAAAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:24:420:2011/2 -babaaaabbbaabaabababaaaaaaaaaa[V^\U -@PATHBIO-SOLEXA2_30LEJAAXX:7:78:1306:986/2 -GAACCGGGGCCGCCTTCAGCCAAGCGGCCTAAACT -+PATHBIO-SOLEXA2_30LEJAAXX:7:78:1306:986/2 -baaaaaZ^aaaaaaaa^VVaVQXa^[[a_V[[\UM -@PATHBIO-SOLEXA2_30LEJAAXX:7:58:580:1277/2 -GTTTTCCTCGCTTGTATCTCCGAAAGAATTAAAAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:58:580:1277/2 -babbaaaaaaaa`S[aaaaZ^]]ZK\[ZEZa^aa[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/EWSR1-TFF1_1.fq --- a/tests/mcf7_pe_35bp/EWSR1-TFF1_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:43:757:1645/1 -GAAGAGGTGGCCTCATGGATCGTGGTGGTCCCGGT -+PATHBIO-SOLEXA2_30LEJAAXX:7:43:757:1645/1 -abbbaaa^aaa]J`^aaaaaaaaaaaaaUKRXaXE |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/EWSR1-TFF1_2.fq --- a/tests/mcf7_pe_35bp/EWSR1-TFF1_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:43:757:1645/2 -CGGGCGTGACACCAGGAAAACCACAATTCTGTCTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:43:757:1645/2 -aaaaaaa^aabaaaaaaaaaa^aaaaaaaaaa\aZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/MYO9B-FCHO1_1.fq --- a/tests/mcf7_pe_35bp/MYO9B-FCHO1_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,40 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:91:1384:1511/1 -CGCGTGAACCAGTGCATCGTGATCTCGGGTGAGAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:91:1384:1511/1 -aaaaaaaaaaaaaaaa^aaaaaaa^aaWaLa^aWa -@PATHBIO-SOLEXA2_30LEJAAXX:7:81:1032:792/1 -TGACGTCAAAGCCCTTGGAGCGGTATGTGCTAGAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:81:1032:792/1 -aaaaaabaaaaaaaaaaaaaaaa^aaaaaaaa^]Z -@PATHBIO-SOLEXA2_30LEJAAXX:7:83:340:930/1 -AGTTCTCGCGGGACTTGGGCAGGAGCTGGCTGACG -+PATHBIO-SOLEXA2_30LEJAAXX:7:83:340:930/1 -aababbbabaaabbbaaaaaaaaaaaaaaaa][V^ -@PATHBIO-SOLEXA2_30LEJAAXX:7:10:418:605/1 -GCGCTGGCCGACGTGGCCTACTACACCATGCTCAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:10:418:605/1 -aaaaabaaaaaaaaaabaaaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:14:562:448/1 -GGACCATCCTGGGTGCTGGCCCTGTGCTGGAGTGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:14:562:448/1 -bbbbbbbbbaaaaaaaaaaaaaaaaaaa_Z_V_Z\ -@PATHBIO-SOLEXA2_30LEJAAXX:7:25:1647:751/1 -CTCGTGGGACTTGGGCAGGAGATGGCTGACGCCCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:25:1647:751/1 -aaaaaaa_X[a^^[aaaaaaUXX][^^aZ[UXU\U -@PATHBIO-SOLEXA2_30LEJAAXX:7:84:437:1347/1 -CCAGTGCATCGTGATCTCGGGTGAGAGCGGCTCCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:84:437:1347/1 -aaaaabaaaaaaaaaabaaaaa\\^`]aaaaaa_Z -@PATHBIO-SOLEXA2_30LEJAAXX:7:45:696:1167/1 -CTACACCATGCTCAGGAAGCGCGTGAACCAGTGCA -+PATHBIO-SOLEXA2_30LEJAAXX:7:45:696:1167/1 -abbabaaaaaabbbaaaaababaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:95:842:1694/1 -CACCTGCACAGCATCCAAGGTGCTCACCACTTCCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:95:842:1694/1 -aaabaaaaaaaaaab^U[^[^aaaaaaaa`_]aaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:49:555:429/1 -GCCGCTCCTGGTCCATGCGACGGTTCAGGTAGTTC -+PATHBIO-SOLEXA2_30LEJAAXX:7:49:555:429/1 -aabaaaaaabbW^aLa\aa^REXLa[Z^[IKVHX[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/MYO9B-FCHO1_2.fq --- a/tests/mcf7_pe_35bp/MYO9B-FCHO1_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,40 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:91:1384:1511/2 -CAGGAGCTGGCTGACGCCCGAGAGTACCTGAACAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:91:1384:1511/2 -aabaa^aaaaaaaaaaaaa^^ab\WaaaaQQVaaR -@PATHBIO-SOLEXA2_30LEJAAXX:7:81:1032:792/2 -CTTGGCCATGGTAGACAACCTGCAGGGGGACTCTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:81:1032:792/2 -bbaaaaaaaaaaaaaaaaaa[ZZ_Zaaa_UZa^_U -@PATHBIO-SOLEXA2_30LEJAAXX:7:83:340:930/2 -CGCCCTCAGCCAGAAGGGCTACGCCAGCGGCGTCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:83:340:930/2 -aaabbaaaaaaabaaaabbabaaa_U^aaaaaaaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:10:418:605/2 -GCCGCCTTCTTGGTTTTAGTCTCCGCCTCCAGCAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:10:418:605/2 -aaabaaaaaabaaaaaa^[[bab^H[aaaaaaaaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:14:562:448/2 -CGCGGGACTTGGGCAGGAGCTGGCTGACGCCCGAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:14:562:448/2 -aaaabaaaaaaaaaaaaaaaaaaaa\^[[[^ZRRX -@PATHBIO-SOLEXA2_30LEJAAXX:7:25:1647:751/2 -CGTGATCTCGGGTGAGAGCGGCTCCGGCAAGACCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:25:1647:751/2 -baaaaLbaaaaa``____aa^^^XE[aUEU[VURX -@PATHBIO-SOLEXA2_30LEJAAXX:7:84:437:1347/2 -CGCGGGACTTGGGCAGGAGCTGGCTGACGCCCGAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:84:437:1347/2 -babbaaaaaaaaaa^aaaaaaaaaaaaaaaaa[VR -@PATHBIO-SOLEXA2_30LEJAAXX:7:45:696:1167/2 -CCCGAGAGTACCTGCACAGCATCCAAGGTGCTCAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:45:696:1167/2 -abbabaabaabaaaaaaaa^QXaaU\ZaLaZa[JU -@PATHBIO-SOLEXA2_30LEJAAXX:7:95:842:1694/2 -CGCTGGCCGACGTGGCCTACTACACCATGCTCAGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:95:842:1694/2 -bbaaaaabbabaaaaaaaaaaaaaaaaaaaaaaa[ -@PATHBIO-SOLEXA2_30LEJAAXX:7:49:555:429/2 -CCCAGAGCACCAACTTCCTCATCCACTGCCTCACC -+PATHBIO-SOLEXA2_30LEJAAXX:7:49:555:429/2 -aaaaa\abbaaaab\baa^aaaa\\aaabaaa^aX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/NAV1-GPR37L1_1.fq --- a/tests/mcf7_pe_35bp/NAV1-GPR37L1_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,20 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:72:139:396/1 -CGGGTGTCCCCGGGTGTCAGGCGAGAGCGGTCCTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:72:139:396/1 -bbabbbbbbbaaaaaaaaa_]`aaaa`]__]_aaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:74:1328:1715/1 -CGGTGAATGGCCGCCTGAGCCGGGGAAGATGCTTC -+PATHBIO-SOLEXA2_30LEJAAXX:7:74:1328:1715/1 -aaaLaabaaabaaaaaaaaaaaaaaaaaaaaaaa\ -@PATHBIO-SOLEXA2_30LEJAAXX:7:34:988:884/1 -CTCCGCGTGGCCCACAGCTCATACCTTTTCGGGTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:34:988:884/1 -abbabaaabbbabaa^aabaaaaaaaaabaWS[aU -@PATHBIO-SOLEXA2_30LEJAAXX:7:28:728:1661/1 -CTCATGGGCAAGACCATGACGGAGGATGATGACAT -+PATHBIO-SOLEXA2_30LEJAAXX:7:28:728:1661/1 -baaaaaaababaaabaaaaaaaaaaaaaaaa]_]a -@PATHBIO-SOLEXA2_30LEJAAXX:7:16:1595:2032/1 -GGCACAGAGGCTGAAAGTCGTGACTCCCAGCGAGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:16:1595:2032/1 -a\aaaabaaaaaaLaZaaaa^aZaaaaa^^Q^^aX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/NAV1-GPR37L1_2.fq --- a/tests/mcf7_pe_35bp/NAV1-GPR37L1_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,20 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:72:139:396/2 -ATGGGCCTCACCTTGGGCAGGGTGCTGGTGGCCAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:72:139:396/2 -babbbbbbaaabaaaaaaaaaaaaaaaaaaaa[[U -@PATHBIO-SOLEXA2_30LEJAAXX:7:74:1328:1715/2 -AGATGACAGCCAACTTGGCCAGGATGGATTGGCAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:74:1328:1715/2 -aaaaaabaaaaaaaZRUUaa\aaa___ZaUEMJaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:34:988:884/2 -GGCGCAGAGGCTGAAAGTCGTGACTCCCAGAGAGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:34:988:884/2 -aaaaaaaaaaabaaaabaaabaaaaaaaaaX_]aU -@PATHBIO-SOLEXA2_30LEJAAXX:7:28:728:1661/2 -GGCACAGAGGCTGAAAGTCGTGACTCCCAGAGAGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:28:728:1661/2 -aabaabbaaaaababaaaaaaaaaaaaZKUUVV[X -@PATHBIO-SOLEXA2_30LEJAAXX:7:16:1595:2032/2 -GGTTTCCGACCCTCCGCGTGGCCCACAGCTCATAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:16:1595:2032/2 -baaaaaaaaaaaaaaaaba\aaaaaaL^a\aWbaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/PAPOLA-AK7_1.fq --- a/tests/mcf7_pe_35bp/PAPOLA-AK7_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,8 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:69:384:648/1 -GGAGAAGCGCTTAAAGCGGCGGGAGCGGTGCGGGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:69:384:648/1 -baaaaababbaaaaaabaaaaa[HXH^[J^KX^XM -@PATHBIO-SOLEXA2_30LEJAAXX:7:68:1726:1492/1 -CGTGTGTAACATGCCTCCTTCCGCTCCATACTTGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:68:1726:1492/1 -URKQ^aaaaaa[ERS^aaaaaaaaaa^^^^aZEEE |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/PAPOLA-AK7_2.fq --- a/tests/mcf7_pe_35bp/PAPOLA-AK7_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,8 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:69:384:648/2 -GCCTCCTTCCGCTCCATACTGGAGTCCAGCCGCAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:69:384:648/2 -aabababaabaaaaa^aVZZaUKRZZ_XZZ^ZVKU -@PATHBIO-SOLEXA2_30LEJAAXX:7:68:1726:1492/2 -GAGCGCTTAAAGCGGCGGCAGCGGTGCGGGAGAGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:68:1726:1492/2 -ab\ababaaaaaaaaa[UEEZLaaSREXXQH^JSU |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/RPS6KB1-TMEM49_1.fq --- a/tests/mcf7_pe_35bp/RPS6KB1-TMEM49_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,124 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:10:1274:1893/1 -GGCAGGAGTGTTTGACATAGACCTGGACCAGCCAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:10:1274:1893/1 -aaaaaaaaaaaabbaaabaaaaaaaa`]_aaaaaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:100:634:300/1 -CCAGCCAGAGGACGCGGGCTCTGAGGATGAGCTGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:100:634:300/1 -ZaabaZaaaaaaZaaaaa^a^aaLaababaaaa^a -@PATHBIO-SOLEXA2_30LEJAAXX:7:14:202:351/1 -CAAAGAAGGTCCAAAAACGCACCAGAAAGTGTCCA -+PATHBIO-SOLEXA2_30LEJAAXX:7:14:202:351/1 -aaaaaabaaaaaaaWa^IZWaaaaaaaa^JJ^a_M -@PATHBIO-SOLEXA2_30LEJAAXX:7:2:225:1594/1 -CTTCTGCAGAGATGGACCTATGCCGGGGACAGCAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:2:225:1594/1 -abababaaa^aabaaaabbaaaabaaaaaaaa^[U -@PATHBIO-SOLEXA2_30LEJAAXX:7:30:864:1299/1 -GTGCCCATTTCGCTTTTGTGGTGAAGCTTCTGCCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:30:864:1299/1 -aaaaaaaaaababaaa^Z\Z\RKU[KX^SVUEUZa -@PATHBIO-SOLEXA2_30LEJAAXX:7:86:1461:970/1 -CTGCAGAGATGGACCTATGCCGGGGACAGCAAGTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:86:1461:970/1 -bbbbaaaaaaaaaaaaa^X^`]`aaa^^^`^^^a_ -@PATHBIO-SOLEXA2_30LEJAAXX:7:50:550:1851/1 -GCGGGACGGCTTTTACCCAGCCCCGGACTTCCGAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:50:550:1851/1 -baaababababbbaaabaaaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:39:1115:223/1 -GACGGCTTTTACCCAGCCCCGGACTTCCGAGACAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:39:1115:223/1 -aaaaa\baaZaaaabaaUab\aH[^a^ab^[HZKM -@PATHBIO-SOLEXA2_30LEJAAXX:7:90:883:1309/1 -GGCTTCTGCAGAGATGGACCTATGCCGGGGACAGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:90:883:1309/1 -b^^VbaaLaaaWaa^abWaba^aVVXaaa^aaaa] -@PATHBIO-SOLEXA2_30LEJAAXX:7:48:1382:1277/1 -GCCCATTTCGCTTTTGTGGTGAAGCTTCTGCCGTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:48:1382:1277/1 -baaaaaabaaaababa`UXa^ZVQaaaaaXaaXa_ -@PATHBIO-SOLEXA2_30LEJAAXX:7:41:1024:486/1 -GAGACAGGGAAGCTGAGGACATGGCAGGAGTGTTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:41:1024:486/1 -aaaaaaOZaaaaaa[aaa]__aa[Q^aXEUMXW[] -@PATHBIO-SOLEXA2_30LEJAAXX:7:45:502:1712/1 -GAAGCTTCTGCCGTTGAGCCTCCAGGTACTCCTGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:45:502:1712/1 -bbaabbabaaaaaa]W`aaaaaaaa_U_aaaa^S^ -@PATHBIO-SOLEXA2_30LEJAAXX:7:28:1040:80/1 -CAGTTAAATGAAAGCATGGACCATGGGGGAGTTGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:28:1040:80/1 -aaa\baaaaaabaabaaaaaaaaaaaaaaZX[[\a -@PATHBIO-SOLEXA2_30LEJAAXX:7:83:418:1017/1 -GGCTTCTGCAGAGATGGACCTATGCCGGGGACAGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:83:418:1017/1 -babbbbaaaaaaaaaaaaaaaaaaaaaaa``_aaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:86:329:1148/1 -CAGAGAGTGAAGCAAGCAGCATGGCCCACAGTCTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:86:329:1148/1 -a^aaaabLa^abaaaaaaaaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:12:40:1105/1 -GACGGCTTTTACCCAGCCCCGGACTTCCGAGACAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:12:40:1105/1 -abaaaabaabaaaaaaaaaUZQ\aaaaaa^aa^V^ -@PATHBIO-SOLEXA2_30LEJAAXX:7:43:1027:1268/1 -TTCGCTTTTGTGGTGAAGCTTCTGCCGTTGAGCCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:43:1027:1268/1 -aaaaaaaabaLLW^aaaaabaW_[WXEX[ERGXa\ -@PATHBIO-SOLEXA2_30LEJAAXX:7:77:269:664/1 -GGCTTCTGCAGAGATGGACCTATGCCGCGGACAGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:77:269:664/1 -aabbababbaaababaaaaabaaaaa\I^aaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:27:1158:1997/1 -CGACGAAGGAGGCGGGACGGGTTTTACCCAGCCCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:27:1158:1997/1 -baaaaaaaaa^\aaabaaaOEUaaaaaXEKJZa^b -@PATHBIO-SOLEXA2_30LEJAAXX:7:62:348:653/1 -GCCCATGAGGCGACGAAGGAGGCGGGAGGGCTGTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:62:348:653/1 -aaaaZa[QHbaZaaa\a^K[aWaaaaHEUaa^QVX -@PATHBIO-SOLEXA2_30LEJAAXX:7:94:413:35/1 -TGGTGAAGCTTCTGCCGTTGAGCCTCCAGGTACTC -+PATHBIO-SOLEXA2_30LEJAAXX:7:94:413:35/1 -aaaaaaa^aaaa^Q^aaaaaaWaaa\ZUEPS^U_a -@PATHBIO-SOLEXA2_30LEJAAXX:7:96:1411:1152/1 -ATAGGATGAAGTAACACACCATGACAACGACCAAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:96:1411:1152/1 -bbbbabaaaaaaaaaaaabbaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:68:468:704/1 -GAAGCTTCTGCCGTTGAGCCTCCAGGTACTCCTGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:68:468:704/1 -aaaaaaZaaaaaaaaaLaaXaaaaa[V\aaaaa[X -@PATHBIO-SOLEXA2_30LEJAAXX:7:78:390:343/1 -GCGGGCTCTGAGGATGAGCTGGAGGAGGGGGGTCA -+PATHBIO-SOLEXA2_30LEJAAXX:7:78:390:343/1 -aaaaaaabbaaaabbaaabaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:80:168:362/1 -CCGAGACAGGGAAGCTGAGGACATGGCAGGGGTGT -+PATHBIO-SOLEXA2_30LEJAAXX:7:80:168:362/1 -aaaWaaaaZab\aaaabZaaaaaaaaa^aa\aXaL -@PATHBIO-SOLEXA2_30LEJAAXX:7:38:257:225/1 -GCCCATTTCGCTTTTGTGGTGAAGCTTCTGCCGTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:38:257:225/1 -bbbaaabbbaaaa\aa`[X_Z[^aaaaaaXaaaa^ -@PATHBIO-SOLEXA2_30LEJAAXX:7:15:300:871/1 -CTGCCGTTGAGCCTCCAGGTACTCCTGAAATGGCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:15:300:871/1 -baaaaaaaaaaaaaaaaaa_]`aaa_VXUXa^Z^a -@PATHBIO-SOLEXA2_30LEJAAXX:7:96:705:1693/1 -ACCCATTTCGCTTTTGTGGTGAAGCTTCTGCCGTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:96:705:1693/1 -ababaaaabababb`W`aa_ZZaaabbbaaaaaa^ -@PATHBIO-SOLEXA2_30LEJAAXX:7:56:488:257/1 -TGAAATGGCTTCTGCAGAGATGGACCTATGCCGGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:56:488:257/1 -baaaaababbbaaaa]_]`]`aaaaaaa_V_^V[O -@PATHBIO-SOLEXA2_30LEJAAXX:7:33:364:81/1 -GCCAACTTTTCAAACATCCAGGACAACCAGTTTTC -+PATHBIO-SOLEXA2_30LEJAAXX:7:33:364:81/1 -aab^abbbaaabbbaaabbaaaaaaaaaaaaabaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:55:572:1065/1 -GTTAATGATAGATAGGATGAAGTAACACACCATGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:55:572:1065/1 -bbbbbaaabaaaaaaaaaaaaaaaaaaaaa`_`__ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/RPS6KB1-TMEM49_2.fq --- a/tests/mcf7_pe_35bp/RPS6KB1-TMEM49_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,124 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:10:1274:1893/2 -GCCCATTTCGCTTTTGTGGTGAAGCTTCTTCCGTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:10:1274:1893/2 -bbaa^abaaabbbaaaaa\aSX`Uaa`JXEUJ`aX -@PATHBIO-SOLEXA2_30LEJAAXX:7:100:634:300/2 -CCCATTTCGCTTTTGTGGTGAAGCTTCTGCCGTTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:100:634:300/2 -QZaa^aW^aaaaaa^WJMUEMbWaaaaEKXEE^ZE -@PATHBIO-SOLEXA2_30LEJAAXX:7:14:202:351/2 -GTTTGACATAGACATGGACCAGCCAGAGGACGCGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:14:202:351/2 -_JJ`baaabaabXE[aaaabaaZaaaaaaa\IK_Z -@PATHBIO-SOLEXA2_30LEJAAXX:7:2:225:1594/2 -CGCGGGCTCTGAGGATGAGCTGGAGGAGGGGGAAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:2:225:1594/2 -aabbaababaa^\aaaaZaaaaaaaaZaaa^aa^U -@PATHBIO-SOLEXA2_30LEJAAXX:7:30:864:1299/2 -GGGAAGCTGAGGACATGGCAGGAGTGTTTGACATA -+PATHBIO-SOLEXA2_30LEJAAXX:7:30:864:1299/2 -baaaaaaabaaaaaaabaaaZ`]a[a^aa^_^\^P -@PATHBIO-SOLEXA2_30LEJAAXX:7:86:1461:970/2 -GGCGGGCTCTGAGGATGAGCTGGAGGAGGGGGGTC -+PATHBIO-SOLEXA2_30LEJAAXX:7:86:1461:970/2 -aabbbaaaaaaaaaaaaaaaa_[_aa[Z\]_aa[U -@PATHBIO-SOLEXA2_30LEJAAXX:7:50:550:1851/2 -GGCTTCTGCAGAGATGGACCTATGCCGGGGACAGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:50:550:1851/2 -ababbaaaaaaaaaaaaaaaaaaaaaaa^`RMU[K -@PATHBIO-SOLEXA2_30LEJAAXX:7:39:1115:223/2 -GCCTCCAGGTACTCCTGAATTGGCTTCTGCAGAGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:39:1115:223/2 -baaaaaL\aWbaaa\aLa[H[WWaaaaREXaaUEX -@PATHBIO-SOLEXA2_30LEJAAXX:7:90:883:1309/2 -GGCCCATGAGGCGACGAAGGAGGCGGGACGGCTTT -+PATHBIO-SOLEXA2_30LEJAAXX:7:90:883:1309/2 -aaWaaaaaaLaaaaabaaaZaabWaaaEEOaZW^U -@PATHBIO-SOLEXA2_30LEJAAXX:7:48:1382:1277/2 -TGGCAGGAGTGTTTGACATAGACCTGGACCAGCCA -+PATHBIO-SOLEXA2_30LEJAAXX:7:48:1382:1277/2 -aaaaaaaaaaababaaaabaaaaaaaaaaaaaaaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:41:1024:486/2 -TTCGCTTTTGTGGTGAAGCTTCTGCCGTTTAGCCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:41:1024:486/2 -aaa[H[aaPEUOEUE[[Q[aUUZGEUUaXEEEKZE -@PATHBIO-SOLEXA2_30LEJAAXX:7:45:502:1712/2 -CGGCTTTTACCCAGCCCCGGACTTCCGAGACAGGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:45:502:1712/2 -aaaababaaaabaaaaaba[X^aaa^I^Z^aaaaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:28:1040:80/2 -GAAGCTTCTGCCGTTGAGCCTCCAGGTACTCCTGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:28:1040:80/2 -aaaaaaabaaa_S`_S^Z[aaaaXRUaaaaaa_QU -@PATHBIO-SOLEXA2_30LEJAAXX:7:83:418:1017/2 -GGCGGGACGGCTTTTACCCAGCCCCGGACTTCCGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:83:418:1017/2 -baabbaabbaaaabaaaaa[V^aaaa[V^aaaaXE -@PATHBIO-SOLEXA2_30LEJAAXX:7:86:329:1148/2 -GTTTGGGGCTGGAATGAAAGGCCATGAATCTGGAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:86:329:1148/2 -aaaaaab^aa^W^aabaaaaaaaaaaa^aaa\aaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:12:40:1105/2 -TGGTGAAGCTTCTGCCGTTGAGCCTCCAGGTACTC -+PATHBIO-SOLEXA2_30LEJAAXX:7:12:40:1105/2 -Oaaaaaaaaabaaa\abaab^^aaaaa^aaLaaaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:43:1027:1268/2 -GGAAGCTGAGGACATGGCAGGAGTGTTTGACATAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:43:1027:1268/2 -aaaaaaaaaWbaaaaaa\abaWa\aa^^aZWa^^Z -@PATHBIO-SOLEXA2_30LEJAAXX:7:77:269:664/2 -GACGAAGGAGGCGGGACGGCTTTTACCCAGCCCCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:77:269:664/2 -baaba^aaaaaaaaa\aaa\aaaaaaaaaaaaaaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:27:1158:1997/2 -CTGAAATGGCTTCTGCAGAGATGGACCTATGCCGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:27:1158:1997/2 -abaaaaaaaaababab\aaaa\aaWaaSQ[ZHSSR -@PATHBIO-SOLEXA2_30LEJAAXX:7:62:348:653/2 -CTTATGCAGAGAGGGACCTATGCCGGGGACAGCAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:62:348:653/2 -aa^H[baXEUZQUXH[[EZaIZZ`ZZaaZJ__[^K -@PATHBIO-SOLEXA2_30LEJAAXX:7:94:413:35/2 -CCGAGACAGGGAAGCTGAGGACATGGCAGGAGTGT -+PATHBIO-SOLEXA2_30LEJAAXX:7:94:413:35/2 -aaaababaaaaaaaaaaaaaaa^baaaaaaaa\aX -@PATHBIO-SOLEXA2_30LEJAAXX:7:96:1411:1152/2 -GTTAAATGAAAGCATGGACCATGGGGGAGTTGGAC -+PATHBIO-SOLEXA2_30LEJAAXX:7:96:1411:1152/2 -bbbbbbaaabaaaaaaaaaaaaaaa^aaaaaaaaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:68:468:704/2 -TTCCGAGACAGGGAAGCTGAGGACATGGCAGGAGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:68:468:704/2 -babbaaab^a^aWaaaaaa^aaa\``_Za\a^QXH -@PATHBIO-SOLEXA2_30LEJAAXX:7:78:390:343/2 -CAACGACCAACTTTTCAAACATCCAGGACAACCAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:78:390:343/2 -aaaaaaaaWabaaaaaZaabaaaaOEU`aUa\aQU -@PATHBIO-SOLEXA2_30LEJAAXX:7:80:168:362/2 -GAAGCTTCTGCCGTTGAGCCTCCAGGTACTCCTGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:80:168:362/2 -aaaaaaaababaaba^baabaaaaaaabbbaa\\X -@PATHBIO-SOLEXA2_30LEJAAXX:7:38:257:225/2 -GGAGTGTTTGACATAGACCTGGACCAGCCAGAGGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:38:257:225/2 -aa\aabbbbaabba^aaaaaaaaaaaaa_^QaaaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:15:300:871/2 -CTTTTACCCAGCCCCGGACTTCCGAGACAGGGAAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:15:300:871/2 -bbbbaaabaaaabb`]_]`aaa^VV^^^[^_^^^` -@PATHBIO-SOLEXA2_30LEJAAXX:7:96:705:1693/2 -GGAGTGTTTGACATAGACCTGGACCAGCCAGAGGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:96:705:1693/2 -abaaaaabaaababbaaabaaaaaaaaabaaabaX -@PATHBIO-SOLEXA2_30LEJAAXX:7:56:488:257/2 -CTTTTACCCAGCCCCGGACTTCCGAGACAGGGAAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:56:488:257/2 -bbabaaabaaaabbaaaWaaaaaa[]]_]`][PMX -@PATHBIO-SOLEXA2_30LEJAAXX:7:33:364:81/2 -GAGGACGCGGGCTCTGAGGATGAGCTGGAGGAGGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:33:364:81/2 -aaaaaaaaabaaaaaaaab^aaaaaaaabaaaaaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:55:572:1065/2 -TGAAAGCATGGACCATGGGGGAGTTGGACCATATG -+PATHBIO-SOLEXA2_30LEJAAXX:7:55:572:1065/2 -baaaaaabbababaaaaaaaaaaaaaaaa`^H^^U |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/STK11-MIDN_1.fq --- a/tests/mcf7_pe_35bp/STK11-MIDN_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,40 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:91:902:1724/1 -GAGCTTGCATCACGGACTGCTCCGGCCTTGAGGCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:91:902:1724/1 -bbabbabaaaabbaaaaaaabbaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:70:727:1607/1 -CTGGGCGTGGCAGCGGAGCTGGGCCAGCGAAGGGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:70:727:1607/1 -baaabaaabbaaaaaa\aaaaaa\aaaaa]X`aaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:85:1254:2020/1 -AGCCGCGCCGCAAGCGGGCCAAGCTCATCGGCAAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:85:1254:2020/1 -ababababababaabaaaaaZaaaaaUaaaaaaa_ -@PATHBIO-SOLEXA2_30LEJAAXX:7:92:651:457/1 -AGCGGGCCAAGCTCATCGGCAAGTACCTGATGGGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:92:651:457/1 -aaaaabbaaaaabaaaaaaaaaaaaaaa]]`^S^_ -@PATHBIO-SOLEXA2_30LEJAAXX:7:30:916:209/1 -CGGTGGGTACCAAGGTCAGCTTGCTGCCATCACCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:30:916:209/1 -aWaa^Wa^Waaaaaa^aZa\a]J_[SVVQ[\^_aa -@PATHBIO-SOLEXA2_30LEJAAXX:7:86:886:1490/1 -CTGGACTCGGAGACGCTGTGCAGGAGGGCCGTCAA -+PATHBIO-SOLEXA2_30LEJAAXX:7:86:886:1490/1 -bbbaabbaaaaaabbaaaaa[X[_aaa^X[^[RU[ -@PATHBIO-SOLEXA2_30LEJAAXX:7:51:1121:401/1 -GTCAGCTTGCTGCCATCACCCACGCCGAACTCCTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:51:1121:401/1 -baaaabaaaaaaabbaaaabaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:94:660:647/1 -CTGCCATCACCCACGCCGAACTCCTGCAGCTTCCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:94:660:647/1 -bbabbabbbbbaaaaaaaaaaaaaaa_V^aaaaaa -@PATHBIO-SOLEXA2_30LEJAAXX:7:28:565:460/1 -GCCCGCTTCCACGGTGGGTACCAAGGTCAGCTTGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:28:565:460/1 -aabb^aaaaaaaa^aZ\a^aaaaaaaa^^^aaZX] -@PATHBIO-SOLEXA2_30LEJAAXX:7:5:1206:149/1 -GCTGGACTCGGAGACGCTGTGCAGGAGGGCCGTGA -+PATHBIO-SOLEXA2_30LEJAAXX:7:5:1206:149/1 -L]S]La\a_Z`aaaaaaaaaaWaVXUX]`GRUKEX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/STK11-MIDN_2.fq --- a/tests/mcf7_pe_35bp/STK11-MIDN_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,40 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:91:902:1724/2 -GTGAAGGAGGTGCTGGACTCGGAGACGCTGTGCAG -+PATHBIO-SOLEXA2_30LEJAAXX:7:91:902:1724/2 -abbaaaaaabaaaabbabbbaaaaaaaaaaa^MUX -@PATHBIO-SOLEXA2_30LEJAAXX:7:70:727:1607/2 -CCCGGGGCGCCCGCGAGTGAGGCGCGGGGCGGCGG -+PATHBIO-SOLEXA2_30LEJAAXX:7:70:727:1607/2 -aabaaaaaaaaaaa]S_aa[X`a^V[]aaZaaJUK -@PATHBIO-SOLEXA2_30LEJAAXX:7:85:1254:2020/2 -GCCATCACCCACGCCGAACTCCTGCAGCTTCCCCG -+PATHBIO-SOLEXA2_30LEJAAXX:7:85:1254:2020/2 -baaaabaaaa^a\b_[[aaaaaZaaaaaaaaabaP -@PATHBIO-SOLEXA2_30LEJAAXX:7:92:651:457/2 -GCTGCCATCACCCACGCCGAACTCCTGCAGCTTCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:92:651:457/2 -bbabbaaaaaaaaaaaaaaaaaaaaaa^Z_aaaa[ -@PATHBIO-SOLEXA2_30LEJAAXX:7:30:916:209/2 -TCTGGACTCGGAGACGCTGTGCAGGAGGGCCGTCT -+PATHBIO-SOLEXA2_30LEJAAXX:7:30:916:209/2 -abbaabaaLaWaaabbLWaUEER]aa]JM\UUEEK -@PATHBIO-SOLEXA2_30LEJAAXX:7:86:886:1490/2 -GTCAGCTTGCTGCCATCACCCACGCCGAACTCCTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:86:886:1490/2 -abbaaaababaaabbaaaaa^V^aaa\^^aaaa[P -@PATHBIO-SOLEXA2_30LEJAAXX:7:51:1121:401/2 -AGGTGCTGGACTCGGAGACGCTGTGCAGGAGGGCC -+PATHBIO-SOLEXA2_30LEJAAXX:7:51:1121:401/2 -aaaaabbbaaabbaaaabbaabaaaaaaaaaaaaU -@PATHBIO-SOLEXA2_30LEJAAXX:7:94:660:647/2 -CGCAAGCGGGCCAAGCTCATCGGCAAGTACCTGAT -+PATHBIO-SOLEXA2_30LEJAAXX:7:94:660:647/2 -aaaaaaaaaaaaaaaaa`X`aaa`__aaaaaaa_X -@PATHBIO-SOLEXA2_30LEJAAXX:7:28:565:460/2 -GCAAGTACCTGATGGGGGACCTGCTGGGGGAAGGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:28:565:460/2 -baaaaabbaaaaaaZaabaaaababa^aaaaaa]U -@PATHBIO-SOLEXA2_30LEJAAXX:7:5:1206:149/2 -GCTTGCATCACGGACTGTTCCGGCCTTGAGGCCTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:5:1206:149/2 -E[aaaaaaaaaaWabaXaaa_X_aaa^H^QUaa]K |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/SULF2-PRICKLE2_1.fq --- a/tests/mcf7_pe_35bp/SULF2-PRICKLE2_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:28:1112:1028/1 -CGCCGAGGGCGAGCAGGAGCGAGAGTGTGTCGAGC -+PATHBIO-SOLEXA2_30LEJAAXX:7:28:1112:1028/1 -bbbaaUaaaaaaabWbaaaaaabbaZaaaaZaZaZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/mcf7_pe_35bp/SULF2-PRICKLE2_2.fq --- a/tests/mcf7_pe_35bp/SULF2-PRICKLE2_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30LEJAAXX:7:28:1112:1028/2 -CCATCTCCTGGTACTGCTGGGCAGAGAGCAGGCTG -+PATHBIO-SOLEXA2_30LEJAAXX:7:28:1112:1028/2 -ZbaZabaaaaaaaaaaa`Z]V^Zaaa^aaaaaaaU |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/APP-AR_1.fq --- a/tests/vcap_pe_53bp/APP-AR_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:33:970:1315/1 -TTTGTGTGTTGCCCACTGGCTGAAGAAAGTGACAATGTGGATTCTGCTGATGC -+PATHBIO-SOLEXA2_30TUEAAXX:3:33:970:1315/1 -ababbbaababaaba^aaaaaaabbabaaaaaaaaaaaaaabbbaaabaaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/APP-AR_2.fq --- a/tests/vcap_pe_53bp/APP-AR_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:33:970:1315/2 -GGACGGCGGCCGAGGGTAGACCCTTCCCAGCCCTAACTGCACTTCCATCCTTG -+PATHBIO-SOLEXA2_30TUEAAXX:3:33:970:1315/2 -aaabaaaaabaaaaaaaaaabbaaabbbaaaaaaaaaaaabaaaaaaaaaaa^ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/BC018860-NDRG1_1.fq --- a/tests/vcap_pe_53bp/BC018860-NDRG1_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:7:321:939/1 -TGTGGACCACTTCCACGTTACTCTGCATTTCTTCCTTCCCAAAAAGGTGGGAC -+PATHBIO-SOLEXA2_30TUEAAXX:3:7:321:939/1 -aaaaaaaaaababaaaaaaa^K\baaaaaaa[E[aa\aaaaaaaaaaa^X[b] |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/BC018860-NDRG1_2.fq --- a/tests/vcap_pe_53bp/BC018860-NDRG1_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:7:321:939/2 -TGACTCTTACCTCCCTCTCTCCTACTCCTGCTCGCATCTGCTATAGTGGAAGC -+PATHBIO-SOLEXA2_30TUEAAXX:3:7:321:939/2 -baaaaabaaaabbaaaaabbaba^abbbabbbbaaabbaaabaaaaWaaZ\^` |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/BC021729-FRY_1.fq --- a/tests/vcap_pe_53bp/BC021729-FRY_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,12 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:40:1302:1557/1 -CGCGATTTTGGCTCACGGCAAGCTCCACCTCCTAAATGGCTCACGCCATTCTC -+PATHBIO-SOLEXA2_30TUEAAXX:3:40:1302:1557/1 -abaaaaabaaaababaaaaabababaaaaabaaaabaaaaaaaa^ba^a\aa^ -@PATHBIO-SOLEXA2_30TUEAAXX:3:91:975:1707/1 -GGAAATGACATTAGAGGAACACTTCAAAGAAACATTAAGAACTTGGATCCCAT -+PATHBIO-SOLEXA2_30TUEAAXX:3:91:975:1707/1 -aOa\aa[SZX`aaaaUaaaJaaaa[K[aZZX]UaaaaaZ^a_[QX_\aabaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:77:805:952/1 -AATGATTATAAATCATCAAGAAGGAGTAGCCTGCCACCTGAACTACTCATAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:77:805:952/1 -abaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_[`aaaaaaaa_]` |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/BC021729-FRY_2.fq --- a/tests/vcap_pe_53bp/BC021729-FRY_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,12 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:40:1302:1557/2 -TCCAAGGCAGGCAGATCAGGAGGTCAGGAGATCGAGACCATCCTGGCTAACAT -+PATHBIO-SOLEXA2_30TUEAAXX:3:40:1302:1557/2 -aabaaaaaWaabaaaaabaaaabababaaaabaaaaaabaaaaabaa^aaa^b -@PATHBIO-SOLEXA2_30TUEAAXX:3:91:975:1707/2 -TCTGGGCCCACATTGATGGGTAGCATGGTTGGCGGCCCTTTCTCCCTGTGCGT -+PATHBIO-SOLEXA2_30TUEAAXX:3:91:975:1707/2 -XaaaaaM[aa`X`aab^HXbaaaaWaaaaaaaaaaaa^^SVabaZababaaaX -@PATHBIO-SOLEXA2_30TUEAAXX:3:77:805:952/2 -CGGCCCTTTCTCCCTGTGCGTGCCAGAAGCAGGTGGAACCGGAGGCTTGATGT -+PATHBIO-SOLEXA2_30TUEAAXX:3:77:805:952/2 -aaaabaaabaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaa_^_aaa`]]_Z^ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/HJURP-EIF4E2_1.fq --- a/tests/vcap_pe_53bp/HJURP-EIF4E2_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,24 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:72:1274:1895/1 -ACGAGGGACCATCAGTTCCCTGCAAAAAGACCCAGGCGATCAGAACCCCAGGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:72:1274:1895/1 -bbbaaaabbbbbaaaaaaaaabbaaaaaaaaabaaa[Q^abbaaaaaaaa``` -@PATHBIO-SOLEXA2_30TUEAAXX:3:37:371:2034/1 -GTTTTTTCCTTCTCACCATCTTTCTGTGTGCTGTTTTCTTCATTCTGATCATG -+PATHBIO-SOLEXA2_30TUEAAXX:3:37:371:2034/1 -bbbbbbbbbbbbbbabbbbbbbbbbaabaabbbaaabaaaabbaabbaabab^ -@PATHBIO-SOLEXA2_30TUEAAXX:3:96:73:466/1 -TTTTTTCCTTCTCACCATCTTTCTGTGTGCTGTTTTCTTCATTCTGATCATGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:96:73:466/1 -abbaaababbbbabbbaabbaabbbbabbb_X_aaabaaaabbabbbabbbaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:54:166:83/1 -TGTTTTCTTCATTCTGATCATGGTCCCCACTGTCATCATCTTTCAAACTCTCT -+PATHBIO-SOLEXA2_30TUEAAXX:3:54:166:83/1 -baaaabaa\bbaaaaaaaaaZaaa^aaabaabaaZabZaaaaaaaaaaaaaWa -@PATHBIO-SOLEXA2_30TUEAAXX:3:98:145:271/1 -TTTTCTTCATTCTGATCATGGTCCCCACTGTCATCATCTTTCAAACTCTCTTC -+PATHBIO-SOLEXA2_30TUEAAXX:3:98:145:271/1 -aaabababbaabbaabbabbaaabbbbaaaabbbabbabbabaaabaabaaba -@PATHBIO-SOLEXA2_30TUEAAXX:3:1:1745:1639/1 -GTTTTTTCCTTCTCACCATCTTTCTGTGTGCTGTTTTCTTCATTCTGATCATG -+PATHBIO-SOLEXA2_30TUEAAXX:3:1:1745:1639/1 -babbbbbaaababbabaaaaaabaabaaaaaaaMaaaaaaaaabaaaaaaaa[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/HJURP-EIF4E2_2.fq --- a/tests/vcap_pe_53bp/HJURP-EIF4E2_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,24 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:72:1274:1895/2 -TGCTGTTTTCTTCATTCTGATCATGGTCCCCACTGTCATCATCTTTCAAACTC -+PATHBIO-SOLEXA2_30TUEAAXX:3:72:1274:1895/2 -bbbaaabbbbbbaaabbbbbbaaaaaabbbaaabaaaaaaaabaaaa`]]aba -@PATHBIO-SOLEXA2_30TUEAAXX:3:37:371:2034/2 -GAGGGACCATCAGTTCCCTGCAAAAAGACCCAGGCTATCAGAACCCCAGGGCT -+PATHBIO-SOLEXA2_30TUEAAXX:3:37:371:2034/2 -babbaaaaaaaaaaabbbbbaaaaaaaaabaaaaaaaaaaaaaaab`U``aaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:96:73:466/2 -AGTTCCCTGCAAAAAGACCCAGGCTATCAGAACCCCAGGGCTCCGGACGCCAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:96:73:466/2 -[ZQ\`aaaaa\aaaaaabbaaaabaaaaa_X]abaaaaaaaaaaaaaaaaaXX -@PATHBIO-SOLEXA2_30TUEAAXX:3:54:166:83/2 -CCATCACGAGGGATGGCACGAGGGACCATCAGTTCCCTGCAAAAAGACCCAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:54:166:83/2 -aaaaaaaaaZaa\aaaababaaaaaabaaaaaaaaaaabaaaMUWaaaaaUaZ -@PATHBIO-SOLEXA2_30TUEAAXX:3:98:145:271/2 -CTCGTGCCATCACGAGGGATGGCACGAGGGACCATCAGTTCCCTGCAAAAAGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:98:145:271/2 -aabaa^abbaaaaabaaaaaaababaaaaaabaaaaXabbbbbabaa^W___a -@PATHBIO-SOLEXA2_30TUEAAXX:3:1:1745:1639/2 -AGGGACCATCAGTTCCCTGCAAAAAGACCCAGGCTATCAGAACCCCAGGGCTC -+PATHBIO-SOLEXA2_30TUEAAXX:3:1:1745:1639/2 -aaaaaaaaUaaaaaaaaaaaaZaa\_Z_aaaaaa`X`aaaRXR^aXG[V^[V^ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/INPP4A-HJURP_1.fq --- a/tests/vcap_pe_53bp/INPP4A-HJURP_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,12 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:46:530:1413/1 -TCTTCCATACTGTAAGACGTGTTCTCTCCTCTGCGCATGCACTCCAGGGCCTG -+PATHBIO-SOLEXA2_30TUEAAXX:3:46:530:1413/1 -baaa_S_a\ab^Z^aaab^JVVaaa[aW^V_^abLaaaaaabaaaaaa_SZaJ -@PATHBIO-SOLEXA2_30TUEAAXX:3:36:797:1728/1 -AAATAACCTAGCTACACACTTTTAGTTTCCAATTTTTCTAGCATGAAATCACT -+PATHBIO-SOLEXA2_30TUEAAXX:3:36:797:1728/1 -ababaabbbabbbbbbbbbbabbababbbaabbbababaaabbabaaaababa -@PATHBIO-SOLEXA2_30TUEAAXX:3:73:1113:793/1 -GAAATAACCTAGCTACACACTTTTAGTTTCCAATTTTTCTAGCATGAAATCAC -+PATHBIO-SOLEXA2_30TUEAAXX:3:73:1113:793/1 -aaaaabbbbbaaabbbbbbbbbbbaaaaabaaaaaabbabaaabbbaaaaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/INPP4A-HJURP_2.fq --- a/tests/vcap_pe_53bp/INPP4A-HJURP_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,12 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:46:530:1413/2 -ATGCCCGGAAGAATAAGAACGTCGACATTATCTGGCAAGCTGCTGAGAGCTGC -+PATHBIO-SOLEXA2_30TUEAAXX:3:46:530:1413/2 -[aLabbbaaaa_V^aaaaaQU^aLa^Z_^J^aa^S^aaWaaaXa[E[Uaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:36:797:1728/2 -TCACCAGCTGCAAGAGCGCTAAGGACCGTACAGCCATGTCGGTGACACTGGAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:36:797:1728/2 -baaaaaabaaaaaaaaaabaaaaaaabbabaaaaaaaaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:73:1113:793/2 -GCTAAGGACCGTACAGCCATGTCGGTGACACTGGAGCAGTGCCTGATCCTGCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:73:1113:793/2 -aabbbaaaabbaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaa`_`aaaa`\ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/PIK3C2A-TEAD1_1.fq --- a/tests/vcap_pe_53bp/PIK3C2A-TEAD1_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,16 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:48:1022:1308/1 -TTGGAATGAAAATCACGAGATTTCCTTCTGGCAAGAACCTGAATGTGACTAGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:48:1022:1308/1 -bbbbabbabbaaababbbbaabbbbbbbaaaaaaaaaaaaaaaaaabaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:36:333:663/1 -CCGAGACGATCTGGGCTGAGGACATGGCCGCCATGTGCTGCAGGGCCTTATCC -+PATHBIO-SOLEXA2_30TUEAAXX:3:36:333:663/1 -baaaaaaaabbaaaaaaaaaaaaaaaababbaaaaaaaaa_]`aaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:74:156:525/1 -GAAGCTTTCGGCGGCGGCTGAGCCAGCTGAGGGGAAAAATGGCTCGGACTGTG -+PATHBIO-SOLEXA2_30TUEAAXX:3:74:156:525/1 -baaaababaaaaaaaaa^a`__aaZa]_X_`aa^X[aa_O[ZPX[R[KK_[MR -@PATHBIO-SOLEXA2_30TUEAAXX:3:3:1578:1127/1 -CGGACTGTGGCGGCTTCGGCGGCTCAGGGTGTGGTAGAGGGGGAGGCTCAAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:3:1578:1127/1 -aabaaaaaaaaaa^aaaaa]__JXUER]S_OEE[OKUEEUER[[^^HOEEEUE |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/PIK3C2A-TEAD1_2.fq --- a/tests/vcap_pe_53bp/PIK3C2A-TEAD1_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,16 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:48:1022:1308/2 -TTTCGGCGGCGGCTGAGCCAGCTGAGGGGAAAAATGGCTCGGACTGTGGCGGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:48:1022:1308/2 -bbbbaaaaaaaaaaaaaaaaaaa[^^aa^S^aaa^V_aa`Z[GU]^Z^^V[[G -@PATHBIO-SOLEXA2_30TUEAAXX:3:36:333:663/2 -TTGTGGTAGAGGCGGAGGCTCAGGCAGCTTGAGGTAGGAATGAATTGATAGCC -+PATHBIO-SOLEXA2_30TUEAAXX:3:36:333:663/2 -baaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa`_`a_``aaaaaaaa[ -@PATHBIO-SOLEXA2_30TUEAAXX:3:74:156:525/2 -TGGAATGAAAATCACGAGGCTTCCTTCTGGCAAGAACCTGAATGTGACTAGAC -+PATHBIO-SOLEXA2_30TUEAAXX:3:74:156:525/2 -bbbaaaaaaaaababbaXGXaabbabaaaaaaaaaabbaaaaaaaaaaaaaa^ -@PATHBIO-SOLEXA2_30TUEAAXX:3:3:1578:1127/2 -CGCCATGTGCTGCAGGGCCTTATCCTTTGCAGTCTGATCCTTTAGCTTGGAAT -+PATHBIO-SOLEXA2_30TUEAAXX:3:3:1578:1127/2 -aaaabbbaaaaaaaaaabbaaaaaaaabaaaaaaabbaaaaaaaaaaaaaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/RANBP17-DOCK2_1.fq --- a/tests/vcap_pe_53bp/RANBP17-DOCK2_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,16 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:93:1666:460/1 -GAAGGTCTCAATGTAGAAGGAGTAGTGCTGGTCACCCATCTGGTTTAAGATGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:93:1666:460/1 -aaaaaaaabaaababaabaaaaaaabaaaaaaaaaaaaaaaaaaaaaaabaa_ -@PATHBIO-SOLEXA2_30TUEAAXX:3:4:124:1495/1 -TTGTCTTTGAAGACTGTCGGAACCAGTGGTCAGTATCCAGGCCTCTCCTGGGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:4:124:1495/1 -ababbbbbaaaaaabaaaaaaaa_X_^aaaaZ^IZaaaaaaaaaaaba^Z\ba -@PATHBIO-SOLEXA2_30TUEAAXX:3:46:889:124/1 -GCCTCTCCTGGGGCTCATCCTGCTCAATGAGAAGGCCTTCACCTACCACCATA -+PATHBIO-SOLEXA2_30TUEAAXX:3:46:889:124/1 -aaaaaaaa[U^aabbaaaaaaaa][`a_S]_`[XEMGZaaa\[^aaa^S^^JU -@PATHBIO-SOLEXA2_30TUEAAXX:3:35:1057:977/1 -TGCTGGTCACCCATCTGGTTTAAGATGGCTGTCATACATGCCACAAAGTGACT -+PATHBIO-SOLEXA2_30TUEAAXX:3:35:1057:977/1 -aabbbb^baabbbbaaaaabbbbbbbbaaaaaaaaaaaaaaaaaaa]]Zaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/RANBP17-DOCK2_2.fq --- a/tests/vcap_pe_53bp/RANBP17-DOCK2_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,16 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:93:1666:460/2 -TCAGTATCCAGGCCTCTCCTGGGGCTCATCCTGCTCAATGAGAAGGCCTTCAC -+PATHBIO-SOLEXA2_30TUEAAXX:3:93:1666:460/2 -baaaaaaabaaaaaaaaaaaaaaaaabaaaaaaaa`__aaaaa]]`aaaaZaR -@PATHBIO-SOLEXA2_30TUEAAXX:3:4:124:1495/2 -TGCTGGTCACCCATCTGGTTTAAGATGGCTGTCATACATGCCACAAAGTGACT -+PATHBIO-SOLEXA2_30TUEAAXX:3:4:124:1495/2 -UUaaaaaa^aa_U_aWaabba][]aaaabaaaaa^aUKU^bXEXERJ\Uaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:46:889:124/2 -TGGAAGGTCTCAATGTAGAAGGAGTAGTGATGGTCACCCATCTGGTTTAAGAT -+PATHBIO-SOLEXA2_30TUEAAXX:3:46:889:124/2 -aaUaa\HKRGOaaaXGU`_]a`_^Q^W^\Q_a_Z_`[_a^V^aaaaUGRQV_a -@PATHBIO-SOLEXA2_30TUEAAXX:3:35:1057:977/2 -ACCATTGTCTTTGAAGACTGTCGGAACCAGTGGTCAGTATCCAGGCCTCTCCT -+PATHBIO-SOLEXA2_30TUEAAXX:3:35:1057:977/2 -aabaabaaaaaaaaabababbaaaaaababaaaaaaaaaaabaaaaaaaaaba |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/RC3H2-RGS3_1.fq --- a/tests/vcap_pe_53bp/RC3H2-RGS3_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,24 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:37:646:111/1 -AAGGACTCAGATACCCTTTGAAGTCCCACAGTACCCACAGACAGAAGGCAGAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:37:646:111/1 -abbaaaaaaaabaaaabbaaabaaabbaaabaaa^babaaaabbbbaabba^b -@PATHBIO-SOLEXA2_30TUEAAXX:3:7:1283:1650/1 -CAGCACACTCTCTTCTTCTGCTCCTGCGAGTGCGCTTCCAAAGTGAATAAGCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:7:1283:1650/1 -aaabaaabababbaabbbaababbabbaaaaaaaaaabaaa^a^aaaaaaaba -@PATHBIO-SOLEXA2_30TUEAAXX:3:17:1054:638/1 -GGTGATGCTGCCAGCTGTTGCTGCTTGGCGATGTTCTCCGACAGGCACCAGCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:17:1054:638/1 -abbbbbbbaaaaaaaaaaaaaaaaaaaaa^\^_]`__]`^[^^^^]___[[^[ -@PATHBIO-SOLEXA2_30TUEAAXX:3:51:540:507/1 -TGGCATTGGTTGAGTTGACCGGCCCCATCTCCTGCTGGAGAGAGCCGAAAGGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:51:540:507/1 -aababbbaaaaaaaabababaaaabaaaaabaabaaaaaaaaaaaba\aaaaX -@PATHBIO-SOLEXA2_30TUEAAXX:3:84:152:1144/1 -AAAAATCCAGCCCATTTCTAACTAGAGGACCAGTATATCCTCCGCATTCTGAA -+PATHBIO-SOLEXA2_30TUEAAXX:3:84:152:1144/1 -baaaaaaaaaaaa\aaaabbabaaaaaOaaaaaaaaaaaabaaXEOWaaaZaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:96:1139:137/1 -CTTCTATCCTGGGTGGCATTGGTTGAGTTGACCGGCCCCATCTCCTGCTGGAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:96:1139:137/1 -bbbaaaabbaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/RC3H2-RGS3_2.fq --- a/tests/vcap_pe_53bp/RC3H2-RGS3_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,24 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:37:646:111/2 -ATTGGTTGAGTTGACCGGCCCCATCTCCTGCTGGAGAGAGCCGAAAGGGTGGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:37:646:111/2 -aaabaababaaababaaabbaaaaaabaaabaabaabaaaaaaaaWaaa^aba -@PATHBIO-SOLEXA2_30TUEAAXX:3:7:1283:1650/2 -AGAGCTGAATTCTGTGCCTCAAAAATCCAGCCCATTTCTAACTAGAGTACCAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:7:1283:1650/2 -aaaabaaaabbaabbaabbaaaaa^aaaZababaaaabaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:17:1054:638/2 -CTAACTAGAGTACCAGTATATCCTCCGCATTCTGAAAACATTTAGTATTTTCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:17:1054:638/2 -baaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa^Z_aaaaaa_``aaaa^ -@PATHBIO-SOLEXA2_30TUEAAXX:3:51:540:507/2 -GCTGCTGGGCCCTCTGCAGATTCTGTAACTGAAAAAAGGCAGAGTGCTTATTC -+PATHBIO-SOLEXA2_30TUEAAXX:3:51:540:507/2 -bbaabaaaabbbaabbaaaaaabbbaaabbaaaaaaaaaaaaaaaaaaaaab_ -@PATHBIO-SOLEXA2_30TUEAAXX:3:84:152:1144/2 -CGGGGGTGATGCTGCCAGCTGTTGCTGCTTGGCGGGGTGCTCCGACCGGCACC -+PATHBIO-SOLEXA2_30TUEAAXX:3:84:152:1144/2 -a^V_aaLaaabaLaaba\a[QK^aa`UIQ^baaa\IZUJ]aaaaZ^HH[aaa^ -@PATHBIO-SOLEXA2_30TUEAAXX:3:96:1139:137/2 -TCAGATACCCTTTGAAGTCCCACAGTACCCACAGACAGAAGGCAGAGTGCTTA -+PATHBIO-SOLEXA2_30TUEAAXX:3:96:1139:137/2 -aaaaaabbbabbaaaaaaaabaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/TMPRSS2-ERG_1.fq --- a/tests/vcap_pe_53bp/TMPRSS2-ERG_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,744 +0,0 @@\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:70:760:740/1\n-CGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:70:760:740/1\n-aaaZLXXa_[_aaaaWaaaaa^aaaa_]`aaaaaZaaa`]_aaaaaaaaaUGR\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:93:627:338/1\n-TGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:93:627:338/1\n-aaabbabbbbaaaaabaaaaaaaaaaaaaaaaaaaaa`Z_aaaaaaaaaaaa`\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:80:610:176/1\n-AGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:80:610:176/1\n-aaaababbaaaabaaaaaaaaaaaaaaaaaaaaaaa^Z\\aaaaaaa_Z]aaa]\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:66:533:945/1\n-GAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCTT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:66:533:945/1\n-baaabaaaabbaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaa__]`Z\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:52:63:295/1\n-GGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:52:63:295/1\n-aa\\Laaaaaaaaaaaaabaaaaa^aaXaaaabZ\\^WaaaaaaaZ\\WZ]_a^QQ\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:9:249:1837/1\n-GGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:9:249:1837/1\n-baZaabaaaababaaaaaababa_V\\aaabbaa\\a^Q^aa`_`\\V[UMK^a\\U\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:50:1383:772/1\n-GCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:50:1383:772/1\n-aaaaabababaaaaabaaaaaaaaaaaaaaaabaaaaaaabbaaaaaaaaaaX\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:75:1678:1421/1\n-CTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:75:1678:1421/1\n-abaaaaaaa\\aaaaaaaaaaaaaaaabaaW^aaaaW\\baa^aaaaaa][Saaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:15:885:1025/1\n-GGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:15:885:1025/1\n-aaaaaaaaaaabaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:6:362:672/1\n-GCGTTCCGTAGGCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:6:362:672/1\n-aababbbbaaaaabbbaaaaaaaaaaaaaaa```aaaaaaaaaaaaa_]_aa^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:64:1391:490/1\n-CAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:64:1391:490/1\n-abab\\aaaaabaaaabaaabaaaaaaabaaaaaab\\aaLZaa^^aaaaLaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:38:116:785/1\n-GGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCATGAAGCCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:38:116:785/1\n-a_UUabaaaaaaaZabbbaaaaaaabaaaabbaLaaaWaabaaaaZa[aa\\a_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:4:1499:269/1\n-CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:4:1499:269/1\n-aabbaaaaaaa\\baMaaaaaaaaaaa_]_aaaaaaaaaaaaaaa\\aZ\\QZ]^Q\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:1:216:647/1\n-GCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGTA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:1:216:647/1\n-bbaa[Z^aXa^Q^aaaaaaaXNaaaaa^QXa\\_UX]aaaMHQ^aaaHJ[S^QH\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:64:863:109/1\n-CTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:64:863:109/1\n-bbbbbbaaaaaaaaaaaaaaaaaaaaaa``_aaaaaaaaaaaaaaaaaaa^[_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:50:1646:1212/1\n-GCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:50:1646:1212/1\n-aaaaaaaaaaaaaaaaaaaaaaaaa`_]aaaaaa__`aaaaa__`\\__^W`RR\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:35:479:925/1\n-GAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:35:479:925/1\n-bbaabbaaaaabaaaabbbaabaabaaaaaaaaaabaaaaabaabaabaaa^Q\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:45:229:1247/1\n-CTAAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:45:229:1247/1\n-aaaaababa^aaabaaabaaaa^abaaaaabaaX[`WaaaaaaaU[aaaaaZO\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:36:356:614/1\n-CCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:36:356:614/1\n-abaabbabaaaababbabbbbaaaaabbaabaaaaaaaaaaaaaabaaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:40:1488:108/1\n-CGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:40:1488:108/1\n-bbbaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa[X[`X[\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:89:524:344/1\n-GGAGG'..b'aaaaaaLW^aaaaaaLaaaaaaZaaaaaaaaaaaaaaaXE\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:9:701:1223/1\n-GTTGAGACAGCCAATCCTGCTGAGGGACGCGAGGGCTCATCTTGGAAGTCTGT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:9:701:1223/1\n-aaaaaaaaba^VXabbaaaaaa^V[a`J`bUEX`_`aaaa[JS]WSaaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:27:1728:775/1\n-GTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:27:1728:775/1\n-aUaaaaa\\aaaWaaab`KHQVQ\\_S_aa_JXSa[[`aaaaaaXXaaa]Z_^Q[\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:38:616:1973/1\n-GCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:38:616:1973/1\n-aaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaa___aa_[]]__\\Za^Q_aZa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:45:1687:1005/1\n-GAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCTT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:45:1687:1005/1\n-abbabaaababbaaaaaaaaaabbaaa^a_[[aaaaaaaaaaaaa_J_^[Va[\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:4:1196:1118/1\n-TTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:4:1196:1118/1\n-bbabbaaaaabbbbbaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:41:1537:842/1\n-TAAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGCGCGCTGCGTG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:41:1537:842/1\n-aaaaUXLaa^aaaXZaWaOaaaaaWaUEUU_a^Waa^baOaLaLaLOLOaLLL\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:60:936:619/1\n-TGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTAGGAAGTCTGTCC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:60:936:619/1\n-bbaaa_Z_[[aaU[U_aaaaaaaaabLaaLaaaaXKX`[`JaXMGKV_aa^[K\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:22:1310:1233/1\n-CCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:22:1310:1233/1\n-bbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_`_a^X[^__`aaaa[\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:86:917:29/1\n-GGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:86:917:29/1\n-aaabaaaaaaaaabaaaaaaaaaaaaaaaaaaa]``aaaaaaaaaaaaaa_\\^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:72:1178:1343/1\n-CACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCGC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:72:1178:1343/1\n-KZ\\\\a^aa[GXaaaaaa`Z`ZaaaaaaaabLVQVaaaaZaaaaZEURER\\^aW\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:79:1659:1749/1\n-AAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:79:1659:1749/1\n-aabaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaa`__aaaa]__\\a_JZ^_^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:24:1462:302/1\n-CGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:24:1462:302/1\n-aaaaabbaaaabbbbbaaaabaabaaabbbbaaaaaaaaaa_[`aaa_V^aaX\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:28:1231:968/1\n-GGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:28:1231:968/1\n-bbaabbaaaabbaaaaaaabbaaaaabaaaaaaaaaaabaaaaaaaaaaaaa_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:55:369:1726/1\n-TTACATTCCATTTTGATGGTGACCCTGGCTGGGGGTTGAGACAGCCAATCCTG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:55:369:1726/1\n-aaa^aa^ababaaabaa^QZaabaa]__aaaaaaMaaaaaa[[H`_[aaa^__\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:7:1368:717/1\n-CGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCTTATCA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:7:1368:717/1\n-aaaaaaabaaaaaaaaaa^_^aaaaaaaaaaaaaaaaZaaaaaaaaURZ[[a^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:48:1643:1117/1\n-CTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:48:1643:1117/1\n-aaaaaaaaaaaaaaaaaaaaaaaaaaaa`__]]aaaaaaaaaaaaaaa`_`_^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:8:1732:1312/1\n-GAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:8:1732:1312/1\n-aaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaa^_]]aaaa_]`aaaaU\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:12:1770:1382/1\n-CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:12:1770:1382/1\n-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_]_aaaa`[]aaXXUaaaaaX]_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:85:88:633/1\n-TTTTGATGGTGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:85:88:633/1\n-aababbbbabaaababbbababbaaaaaaabaaaaaaaaaaaaaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:81:768:1069/1\n-TTCAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGACTGTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:81:768:1069/1\n-aUG[S`aUKXaaaZaaaZaaa^aaaaWOKG\\QQ^]Xaa^Q\\]aWaaXGMaaL[\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/TMPRSS2-ERG_2.fq --- a/tests/vcap_pe_53bp/TMPRSS2-ERG_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
b'@@ -1,744 +0,0 @@\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:70:760:740/2\n-TTGAGACAGCCAATCCTGCCGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:70:760:740/2\n-aaaaaaaaaaaaaaab_J_aaaaababbaaaaaaaaaa^[_aaaaaaLaa^a_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:93:627:338/2\n-AGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:93:627:338/2\n-bbbaaaabbbabaaaabbaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaa^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:80:610:176/2\n-GCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGA\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:80:610:176/2\n-bbbbbaaaabaaabaababbbbbaaaaaaaaaaababaaaaaaaaaaaaaaaU\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:66:533:945/2\n-TGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:66:533:945/2\n-bbaaaaaaaaabaaabaaaaaaaaabaaaaaaaaaaa^[^aaaaaaaaaaaa_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:52:63:295/2\n-GCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGACTGGCG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:52:63:295/2\n-aaaaaaaZaa\\VZWa\\WbaWaaQUIUUKU^ZaZWaaWZRKXaEEUaaaHHEPK\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:9:249:1837/2\n-TTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGCAGTCTGTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:9:249:1837/2\n-baaa_U^V^b^JQ[aaaabb^Z^a^L_aaaaaab^V[EU^aaa[EM_\\aa^X^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:50:1383:772/2\n-CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:50:1383:772/2\n-aaaabbaaaaaaaaaaaababaabbbaaabbaaababbaaaaaaaaaaaaKE[\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:75:1678:1421/2\n-GGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:75:1678:1421/2\n-aaaaaaaaaaaaaaaaaaaaaabaabaaa^aaabaaaaaaaaa\\aaaZaaaa\\\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:15:885:1025/2\n-TGGCTGGGGGTTCAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:15:885:1025/2\n-aaabaaaaaaaZEXaaaaabaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaa_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:6:362:672/2\n-CGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGTAAAC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:6:362:672/2\n-aaaaaaaaaaaaabaaaaaabaaaaaaaaaaaa`__^^^aaaaa[KXERKX^K\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:64:1391:490/2\n-CCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTCCATAGTCGC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:64:1391:490/2\n-aaaa^bbaaababaaaaabaaWabZaaaaaabaaa\\aaaaaZ^aaaabbaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:38:116:785/2\n-CCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:38:116:785/2\n-a^V^aaaaaaaa`S_]_aa\\aaa_[Mabaaaa\\aaaaaaaaabaaaaVHX[aa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:4:1499:269/2\n-TGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:4:1499:269/2\n-aaaaaaaaaabaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_]]aa^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:1:216:647/2\n-GGTTGAGACAGCCAATCCTGCTGAGGGACGCGTCGGCTCATCTTGGAAGTCTG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:1:216:647/2\n-X_aaLWaabZaaaa^aaaabbabaaa``_aaaVHXabaaaaWaaa[Q^XG[aV\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:64:863:109/2\n-GCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:64:863:109/2\n-aaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaa`_`aaaaaZ\\^[K[^[\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:50:1646:1212/2\n-TGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCCGAGGGACGCGTGGGCTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:50:1646:1212/2\n-aaabbaaaaaaaaaaaaaaaaaaaaaaaaaaa_[_a`_`a^^_aa_^_a^[[X\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:35:479:925/2\n-GCACACTCAAACAACGACTGGTCCTCACTCACAACTGATAAGGCTTCCTGCCG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:35:479:925/2\n-bbbaaabbaaaaaaaaaaaaaaaabaaaaaaaaabaaaa\\aaaaaaba\\aabb\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:45:229:1247/2\n-GGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:45:229:1247/2\n-bbbb^H[abaaaabaaaaabaaaaaaaaaaababaaaaababaaabVQ__]]a\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:36:356:614/2\n-GGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:36:356:614/2\n-aaaaaababababaaaaaababaaaaaaabaaaaaaaaaaaaaababaa^aaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:40:1488:108/2\n-CGATGGTGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:40:1488:108/2\n-bbbabaaaaaaaaaaaaaaaaaabaaaaaaabaaaaaaaaaa`_]aaaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:89:524:344/2\n-TTGAG'..b'aa\\a^abbbaaaaaaaaababaaaaaaaaaaaaaaaaa[H\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:9:701:1223/2\n-CAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:9:701:1223/2\n-aaaa^aaaaZaabaaaaaaaaaaZaabaaaaUabaaaaaa\\[H^aba__]_`V\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:27:1728:775/2\n-CTAAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:27:1728:775/2\n-aZaaaa^aaaabaaaZaaaaabaa^a^a\\aaaaaaaaaLaa\\_JUUZIQbWOM\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:38:616:1973/2\n-GTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:38:616:1973/2\n-aaaaaaababaaaaZabbaaa^aaaaaabbbaaaaaaaaaaaaaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:45:1687:1005/2\n-CCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:45:1687:1005/2\n-abbaababaaaabbaaaaaaabbaaabaaaaabbabaaaaaaaaaaaaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:4:1196:1118/2\n-CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:4:1196:1118/2\n-aabaaaaaaabaaaabaaaaaaaaaaaaabaZaaaaaa^ZMXaaaaaZaa[V[\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:41:1537:842/2\n-GTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:41:1537:842/2\n-aMabaaaW^abaaaa\\aaabbbbabbabaa^aaa^`J_^bWaaa^\\ba\\aaZL\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:60:936:619/2\n-TAAGCAGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:60:936:619/2\n-aaaaaabaa]M`aaaaaaaaaaaaaa^aa\\a_]`aaaaa`S`a[ZVaaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:22:1310:1233/2\n-AGGAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:22:1310:1233/2\n-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa^V[aaaaaa^Z\\XOX]X\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:86:917:29/2\n-GGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:86:917:29/2\n-aaaaaaaaaaaaaaaaabbaaaaaaaaaa_[_a_Z`aaaaa\\^^aaaaa^V^U\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:72:1178:1343/2\n-CGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCCGGTGCGCGGCGCC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:72:1178:1343/2\n-aaaaaZaa[QXaaa[KKaaaaaa^aaaZaaaaZa^^aL[HV_JRKUaaWaUSS\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:79:1659:1749/2\n-TTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:79:1659:1749/2\n-aaaaaaaaabbaaabbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa__`aaa_\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:24:1462:302/2\n-TGACCCTGGCTGGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:24:1462:302/2\n-abbbabbbabbabbbaabbb_[_^abaXaaaaaaaaaaaaaaaaaaX_`aaaX\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:28:1231:968/2\n-CGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:28:1231:968/2\n-aaaaabababaaaaaaaabaaaaaaaaaaa_^^aaaaaaaaa_M_aabaa[]`\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:55:369:1726/2\n-GGAGGCGGAGGGCGAGGGGCGGCGAGCGCCGCCTGGAGCGCGGCAGGGAGCCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:55:369:1726/2\n-aaaababaaabbb^Q^baa[Uaab\\ZZUaaaaZ]_]]VQ^_Z^_Q^XERaa^E\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:7:1368:717/2\n-TTGAGACAGCCAATCCTGCTGAGTGACGCGTGGGCTCATCTTGGAAGTCTGTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:7:1368:717/2\n-aaaaaaaaaaaaaaabbbaaaa^Q^aaaaaaaaaaaaaaaaaa[^^aaaaaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:48:1643:1117/2\n-GAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:48:1643:1117/2\n-aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa__`aaaaaaaU_ZR[aaaa\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:8:1732:1312/2\n-TTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGTCTGTC\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:8:1732:1312/2\n-aaaabaaabbaabbaaaaaaaaaaaaaaaaaaa^aaaaababaa^[^__aa\\\\\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:12:1770:1382/2\n-GGGGGTTGAGACAGCCAATCCTGCTGAGGGACGCGTGGGCTCATCTTGGAAGT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:12:1770:1382/2\n-aaaaaaaaaabaaabaaaaaaaaaaaaaaaaaaaaaaaaaa__`aaaa^[^XU\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:85:88:633/2\n-GGAGGGCGAGGGGCGGGGAGCGCCGCCTGGAGCGCGGCAGGAAGCCTTATCAG\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:85:88:633/2\n-aa^aaabaaabaabaaaaaaabbbabbaaaaaaaaaa__`aaa_]_a_\\^aa^\n-@PATHBIO-SOLEXA2_30TUEAAXX:3:81:768:1069/2\n-CTAAGCAGTAGGCGGAGGCGGAGGCGGAGGGCGAGGGGCGGGGAGCGCCGCCT\n-+PATHBIO-SOLEXA2_30TUEAAXX:3:81:768:1069/2\n-aa^KV[aaPE[aaaUERaaaZH[aaLUPERH^^GGK[UV^[^a_^V[]aaaaa\n' |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/TYMP-SCO2_1.fq --- a/tests/vcap_pe_53bp/TYMP-SCO2_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:63:1353:655/1 -CTGGCCCTGCCCACCTGTCTCTGCAGGGCCCTGCCTTGACAAAAGCCAGGACC -+PATHBIO-SOLEXA2_30TUEAAXX:3:63:1353:655/1 -aaaaaaaabaaaaaabbababa\^aaaaabbaaaaaabaaaaaaaaabaa^aZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/TYMP-SCO2_2.fq --- a/tests/vcap_pe_53bp/TYMP-SCO2_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,4 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:63:1353:655/2 -GCGCCCGGGAGCAGGAGGAGCTGCTGGCGCCCGCAGATGGAGCATCAGATCCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:63:1353:655/2 -aaa^aaXEXaaaaaaa^aaaaaaabaa^abbaaaWaWaZXGXaaaaaaZaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/USP10-ZDHHC7_1.fq --- a/tests/vcap_pe_53bp/USP10-ZDHHC7_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,12 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:1:1485:1153/1 -GCAGCCGTCACGGATGAACCAGACCCGGTCAGCCACGTCAGCCTCGGAGGAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:1:1485:1153/1 -aaaaabaaaaaaaaaaaaaa``_aa``^^^_^^`]_```_^\^^[^XEZXOXZ -@PATHBIO-SOLEXA2_30TUEAAXX:3:84:1623:1043/1 -AGCCGTCACGGATGAACCAGACCCGGTCAGCCACGTCAGCCTCGGAGGAGGAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:84:1623:1043/1 -abaaaaabbaaaaaaaaaaaaaaaaaaaaaa^aaaZ^aa^XRXaaaaUa[IZU -@PATHBIO-SOLEXA2_30TUEAAXX:3:94:1029:1011/1 -GGCGGGGGAAGCAGCGTGAGCAGCCGGAGGATCGCGGAGTCCCAATGAAACGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:94:1029:1011/1 -aaaaaaa`Z`aaaaaaaaaaaaaaaaaaa`[__\\^^VXEU_`_RMXX\ZHXX |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/USP10-ZDHHC7_2.fq --- a/tests/vcap_pe_53bp/USP10-ZDHHC7_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,12 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:1:1485:1153/2 -CAGCCGGAGGATCGCGGAGTCCCAATGAAACGGGCAGCCATGGCCCTCCACCG -+PATHBIO-SOLEXA2_30TUEAAXX:3:1:1485:1153/2 -aaaaaaaaaaaaaaaaaaaaaaaaaaa_``_``^RRXUXZ[X[[[XOX[ERER -@PATHBIO-SOLEXA2_30TUEAAXX:3:84:1623:1043/2 -GGCGGGGGAAGCAGCGTGAGCAGCCGGAGGATCGCGGAGTCCCAATGAAACGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:84:1623:1043/2 -baaaaaba\aaaaaaaaaaabaWa[V^^aaL^aaaaa\_]Xa]__XV^aa^aa -@PATHBIO-SOLEXA2_30TUEAAXX:3:94:1029:1011/2 -TCGGAGGAGGAGGACGATGAAGAGTCATAGTTGTCATTTTCAGCCAGGAGAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:94:1029:1011/2 -baaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaa_\^_^_aaZ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/VWA2-PRKCH_1.fq --- a/tests/vcap_pe_53bp/VWA2-PRKCH_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
b |
@@ -1,8 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:43:285:459/1 -TCCCACGTGGCAGCCGCGCCCCGGGCGCCCCTCCTGTGATCCCGTAGCGCCCC -+PATHBIO-SOLEXA2_30TUEAAXX:3:43:285:459/1 -aaababaaaabaaaaa_Z`bbaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:13:1139:1547/1 -AGCGCCCCCTGGCCCGAGCCGCGCCCGGGTCTGTGAGTAGAGCCGCCCGGGCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:13:1139:1547/1 -aaaabaaabba\aaaaa^\aaaaaaaaabaaaWUaaa\abaaaaa\Z^aaaaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/VWA2-PRKCH_2.fq --- a/tests/vcap_pe_53bp/VWA2-PRKCH_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,8 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:43:285:459/2 -TAAAATGTTTGAAGATCCGGTCTCTCTGGAGAGTAGCTCTGGAAAACAGGAAA -+PATHBIO-SOLEXA2_30TUEAAXX:3:43:285:459/2 -baaa^baabaababbbbbababbabbaaabaaabbabbabaaaaaaaaaaaa_ -@PATHBIO-SOLEXA2_30TUEAAXX:3:13:1139:1547/2 -ATTGATCTGGTGGACTCGCCTTCGCATAGCCCTTTGGCGCTTCCTGGTAAAAT -+PATHBIO-SOLEXA2_30TUEAAXX:3:13:1139:1547/2 -Wabba\aaa^H^aaabaaaaaaaaaa_J_aNZ^[[_ZZaXJH^Q^\IVH[aZ[ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/ZDHHC7-ABCB9_1.fq --- a/tests/vcap_pe_53bp/ZDHHC7-ABCB9_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,64 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:92:573:1783/1 -CTGCGGCATGATCTGTGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGACT -+PATHBIO-SOLEXA2_30TUEAAXX:3:92:573:1783/1 -aaaaaaa\aaaabaaaaaaaaa^aaaaaaaaaaaZXXXEXX]aaaaaZS]a\U -@PATHBIO-SOLEXA2_30TUEAAXX:3:13:326:121/1 -GGCTGCGGCATGATCTGTGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:13:326:121/1 -bbbbaaaabbaaaaaaaaaaaaaaaaaaa`__aaaaaaa^^_aa^Q^_]_Z^[ -@PATHBIO-SOLEXA2_30TUEAAXX:3:33:576:1681/1 -CGCTGCGCCGCTGCAAGGTCGGAGCTGAGACTGGCGTGCCTTGAGACAGGCTG -+PATHBIO-SOLEXA2_30TUEAAXX:3:33:576:1681/1 -aababaaabbaaaabaaaaaaaaaaaaaaaaaaaaaa^^[aaaaaaaaa^X^X -@PATHBIO-SOLEXA2_30TUEAAXX:3:1:668:1831/1 -TGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGACTTCGTGGTGACTTTCG -+PATHBIO-SOLEXA2_30TUEAAXX:3:1:668:1831/1 -aaaaaaaaaaaabbaaaaa__]`]`aa^S^aaXEX_`aaaaaa^[[[^__a]U -@PATHBIO-SOLEXA2_30TUEAAXX:3:33:1105:1973/1 -CGGTCCAGGTGGCTGAAGATATAGATGGCCGTGGTCACGCAGATGTCCACACT -+PATHBIO-SOLEXA2_30TUEAAXX:3:33:1105:1973/1 -bbabbaaaaaaaaaaaaa_V^aabbaaaaaaaaaaaaaaaaa__]a[[[^^a^ -@PATHBIO-SOLEXA2_30TUEAAXX:3:7:647:325/1 -GCAGACTTCGTGGTGACTTTCGTCATGCTGCTGCCTTCCAAAGACTTCTGGTA -+PATHBIO-SOLEXA2_30TUEAAXX:3:7:647:325/1 -aaaaaabbaaaaaabaababababaaaaaaaababaaaabbaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:92:512:1173/1 -CAGGTTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:92:512:1173/1 -aaaaaabaaaabaaaabaaabaababaaaa\aababababaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:89:1268:572/1 -TCGCCTATGCAGACTTCGTGGTGACTTTCGTCATGCTGCTGCCTTCCAAAGAC -+PATHBIO-SOLEXA2_30TUEAAXX:3:89:1268:572/1 -aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa``_Z_aaaaaaaaaa_^^aaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:38:1633:1991/1 -GGTGACTTTCGTCATGCTGCTGCCTTCCAAAGACTTCTGGTACTCTGTGGTCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:38:1633:1991/1 -bbaaabbbbaaabaaaaaaaaaaaaaaaaaaaaaaaa___aaaaaaaaaa_^X -@PATHBIO-SOLEXA2_30TUEAAXX:3:76:1226:1886/1 -TGGAGGTGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCACGGGGGCAAGGCCAT -+PATHBIO-SOLEXA2_30TUEAAXX:3:76:1226:1886/1 -aaaaaa^aZaabbaa\aabaaaaaaaa^H^aaaaabaaaaaXa^aaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:74:1204:1439/1 -CATGATCTGTGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGACTTCGTGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:74:1204:1439/1 -babbabbbbaabbabaaaabaaabbaaaaZaaaaaaaaaaaaaaaaaaaaaa` -@PATHBIO-SOLEXA2_30TUEAAXX:3:58:213:1628/1 -TGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCACGGGGGCAAGGCCATCATCAT -+PATHBIO-SOLEXA2_30TUEAAXX:3:58:213:1628/1 -ababbaaaaabbaaaaaabaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaa^ -@PATHBIO-SOLEXA2_30TUEAAXX:3:2:627:209/1 -TTCGTCATGCTGCTGCCTTCCAAAGACTTCTGGTACTCTGTGGTCAACGGGGT -+PATHBIO-SOLEXA2_30TUEAAXX:3:2:627:209/1 -bbaaabbaaaaaaaaaaaaaaaa__`aaaaaaaaaaaaaaa__`_````_^^[ -@PATHBIO-SOLEXA2_30TUEAAXX:3:20:978:442/1 -CCTGCACAGTGAAAACTGGGCTGGACCCAACCCTTGTGGGCATTTGTGGTGAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:20:978:442/1 -aaabaabaaaabaabaaaaaabaaaabbbabaaaaaaaaaaaaaaaaaaaaab -@PATHBIO-SOLEXA2_30TUEAAXX:3:85:572:390/1 -CAGGTTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:85:572:390/1 -aaaaaaaababbaabaaaaaabaabbaabaaababaaaaaaaaabbaaaabaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:73:901:1886/1 -CTATGCAGACTTCGTGGTGACTTTCGTCATGCTGCTGCCTTCCAAAGACTTCT -+PATHBIO-SOLEXA2_30TUEAAXX:3:73:901:1886/1 -babaaaaaaabaaaaaaabbaaabaaaaaaaaaaaaaaaaaaaaaaaaabbaa |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/ZDHHC7-ABCB9_2.fq --- a/tests/vcap_pe_53bp/ZDHHC7-ABCB9_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,64 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:92:573:1783/2 -GGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGGCAGGGTCGGTGAGCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:92:573:1783/2 -aaaaa_KQ\aaaaaaaa`U`b`]_S_````]_aa^Q[`^V[a_]_a]SZHX[G -@PATHBIO-SOLEXA2_30TUEAAXX:3:13:326:121/2 -GGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGGCAGGGTCGGTGAGCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:13:326:121/2 -aaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa^[[a[R[U[XO -@PATHBIO-SOLEXA2_30TUEAAXX:3:33:576:1681/2 -TCTGTGGTCAACGGGGTCATCTTTAACTGCTTGGCCGTGCTTGCCCTGTCATC -+PATHBIO-SOLEXA2_30TUEAAXX:3:33:576:1681/2 -baaaaaaaaaaaaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaa_]` -@PATHBIO-SOLEXA2_30TUEAAXX:3:1:668:1831/2 -CAGGTTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:1:668:1831/2 -aZaaaaaaaaaa_Z_aaaaaaaaaab^Q^`]_^V[_[GZEUX[a[MHX^XEUK -@PATHBIO-SOLEXA2_30TUEAAXX:3:33:1105:1973/2 -CCCTGTCATCCCACCTGAGAACCATGCTCACCGACCCTGCCTGGCCTCGCCCT -+PATHBIO-SOLEXA2_30TUEAAXX:3:33:1105:1973/2 -bbbbbbbaaabaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_Z^aaaaaaaa_ -@PATHBIO-SOLEXA2_30TUEAAXX:3:7:647:325/2 -CCGCATCCTGCTGGTTGGAGGTGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:7:647:325/2 -aaaaaabaaaaaaaaabaabaabaaaabbaaaaaaabaaaa[aaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:92:512:1173/2 -ATGATCTGTGCTGTCATGACGTGGCTTCTGGTCGCCTATGCAGACTTCGTGGT -+PATHBIO-SOLEXA2_30TUEAAXX:3:92:512:1173/2 -aaaaaaaaaabaaaaaaaabababaaaaaaaababaaaaaaaaaabbaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:89:1268:572/2 -CGCATCCTGCTGGTTGGAGGTGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCAC -+PATHBIO-SOLEXA2_30TUEAAXX:3:89:1268:572/2 -aaaaaabaaaaaaaaaaaaaaaaaaaaaaaa`[^^aaa_^__^^__``_`_`` -@PATHBIO-SOLEXA2_30TUEAAXX:3:38:1633:1991/2 -GAAGGCCAGGTTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:38:1633:1991/2 -bbabbbaaaaaaaaaaaaaaaaaaaa`Z`aaaaa_`_aa`_`^V^Z__^V[M[ -@PATHBIO-SOLEXA2_30TUEAAXX:3:76:1226:1886/2 -AGACTTCGTGGTGACTTTCGTCATGCTGCTGCCTTCCAAAGACTTCTGGTACT -+PATHBIO-SOLEXA2_30TUEAAXX:3:76:1226:1886/2 -aaabbabaaabaaaaabaaaaaaaabaaaaabbaaabaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:74:1204:1439/2 -TTGTAGCTCACGGGGGCAAGGCCATCATCATCCACAGGGCGAGGCCAGGCAGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:74:1204:1439/2 -aaaaaabbabaaaaaabbbbabaaabaaababaaaaaabbaaaabaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:58:213:1628/2 -TGGCTTCTGGTCGCCTATGCAGACTTCGTGGTGACTTTCGTCATGCTGCTGCC -+PATHBIO-SOLEXA2_30TUEAAXX:3:58:213:1628/2 -aabbaaaaaa^aaaa\aabaaaaaabbaaaaaaaaaaaaaa_S_aaWaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:2:627:209/2 -CCGCTGCAAGGTCGGAGCTGAGACTGGCGTGCCTTGAGACAGGCTGGGGGAAA -+PATHBIO-SOLEXA2_30TUEAAXX:3:2:627:209/2 -aaaaaaaaaa```a`__`aaaaa```a`^[^^^^_XU[XXU[^XUPEXXEKXU -@PATHBIO-SOLEXA2_30TUEAAXX:3:20:978:442/2 -TCCCACCCGCGCTGCGCCGCTGCAAGGTCGGAGCTGAGACTGGCGTGCCTTGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:20:978:442/2 -baaaabbaaaaaaabbaaaaabaaaaa^aaaaaabb^bababbaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:85:572:390/2 -TGCTGTCATGACGTGGCTTCTGGCCGCCTATGCAGACTTCGTGGTGACTTTCG -+PATHBIO-SOLEXA2_30TUEAAXX:3:85:572:390/2 -a^a\\Wa^baaaaaaaaaaaabZI\aaaaaaaaaaaaaaaa^aa\aaaaaaa^ -@PATHBIO-SOLEXA2_30TUEAAXX:3:73:901:1886/2 -GTTGGAGGTGGGCGGGTGCTGAAGGCCAGGTTGTAGCTCACGGGGGCAAGGCC -+PATHBIO-SOLEXA2_30TUEAAXX:3:73:901:1886/2 -baaaaaaaaaaaaaaaaaabaaaaaaaaaaaaaaaaaaaaaaaaaaa`]]aa^ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/ZNF649-ZNF577_1.fq --- a/tests/vcap_pe_53bp/ZNF649-ZNF577_1.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,36 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:92:1636:1302/1 -AGGTACTTCTTACTCCACAGTCCATACACTTGCCACCTGGCTAAAATTATTTG -+PATHBIO-SOLEXA2_30TUEAAXX:3:92:1636:1302/1 -abaaaaaabaaabababaabbbaababaaaaabaabaaababaaa^aaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:89:916:968/1 -GCTAGCAACTCTAGTATGTTCTCTCTCTTCTGTCTATTCTGGGCCTTCCCAGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:89:916:968/1 -abaaabaaabaabbaabaab^aabaaabbaabbabaabbababbaaaaaaaLa -@PATHBIO-SOLEXA2_30TUEAAXX:3:79:83:1163/1 -CTCATTGTACATCACAGAACTCACACGGGAGAGAGACCCTATGGCTGTGATGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:79:83:1163/1 -baaabbbbbaaaaaaaaaaabbbbbabaababbaaaabaaaaaaaa`UZU]Z\ -@PATHBIO-SOLEXA2_30TUEAAXX:3:30:232:643/1 -TCTCAGGTCAAGCTACCACTGGAAATGATGATCTTCCCCAGCCTGGAAGCTCC -+PATHBIO-SOLEXA2_30TUEAAXX:3:30:232:643/1 -abbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:20:1494:1497/1 -TCTGTCTATTCTGGGCCTTCCCAGAAGTGGTGGTCAGGTATCATCTCAGGTCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:20:1494:1497/1 -aaaaa^aaaaUaZSV^aa\aaZ_W`^VS[aaa[GUKU[KUXER[[M_[[XMXX -@PATHBIO-SOLEXA2_30TUEAAXX:3:51:343:357/1 -TCTCTTCTGTCTATTCTGGGCCTTCCCAGAAGTGGTGGTCAGGTATCATCTCA -+PATHBIO-SOLEXA2_30TUEAAXX:3:51:343:357/1 -aaaaaabbbabb]U]aaaaaaaaaaaaaaaa^H^aVV[Wa\aa_S]aaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:85:776:696/1 -CTCATTGTACATCACAGAACTCACACGGGAGAGAGACCCTATGGCTGTGATGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:85:776:696/1 -abaaabbbbaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaaaaaa[KZQ[XV -@PATHBIO-SOLEXA2_30TUEAAXX:3:88:32:1594/1 -TGGGAAAGCCTTCCTTACAAAGACAATGCTCATTGTACATCACAGAACTCACA -+PATHBIO-SOLEXA2_30TUEAAXX:3:88:32:1594/1 -bbaaaaaabbabbbaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa` -@PATHBIO-SOLEXA2_30TUEAAXX:3:35:417:135/1 -AACCCTATAAATGCAGTGACTGTGGGAAAGCCTTCCTTACAAAGACAATGCTC -+PATHBIO-SOLEXA2_30TUEAAXX:3:35:417:135/1 -aaaabbaaaaabbaaaabbabaaaaaabaaaaaaaaaaaaaaaaaaaaaaaa_ |
b |
diff -r 6d54abd510d7 -r 37a16ff93dd9 tests/vcap_pe_53bp/ZNF649-ZNF577_2.fq --- a/tests/vcap_pe_53bp/ZNF649-ZNF577_2.fq Wed Sep 13 15:11:27 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 |
[ |
@@ -1,36 +0,0 @@ -@PATHBIO-SOLEXA2_30TUEAAXX:3:92:1636:1302/2 -CAGAAGTCAGGACTCATTAGACATCAGAAAATTCACTCAGGAGAGAAACCCTA -+PATHBIO-SOLEXA2_30TUEAAXX:3:92:1636:1302/2 -aaaaaaOaabaaaaaaaaaaaaaaaaaababaaabaaaaaaaaabaZaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:89:916:968/2 -ACAATGCTCATTGTACATCACAGAACTCACACGGGAGAGAGACCCTATGGCTG -+PATHBIO-SOLEXA2_30TUEAAXX:3:89:916:968/2 -aaabaaaaaaa\aWaababaaaabaaababbaabaaaaaaWaaaaaaaWabaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:79:83:1163/2 -GCAACTCTAGTATGTTCTCTCTCTTCTGTCTATTCTGGGCCTTCCCAGAAGTG -+PATHBIO-SOLEXA2_30TUEAAXX:3:79:83:1163/2 -aaaaaabaaaaaaaaabbbabbbbbbbbbbb^abbaaaabbbbbaaaaaaaaa -@PATHBIO-SOLEXA2_30TUEAAXX:3:30:232:643/2 -AGACATCAGAAAATTCACTCAGGAGAGAAACCCTATAAATGCAGTGACTGTGG -+PATHBIO-SOLEXA2_30TUEAAXX:3:30:232:643/2 -aaaaaaaaaaaaaaaaaabaaaaaaaaaaaabbaaaaaaaaaaaaaaaaaaa` -@PATHBIO-SOLEXA2_30TUEAAXX:3:20:1494:1497/2 -CATGATGCCCTCACCAAGTTGGAACAAGGAGAACCACTATGGACACTAGAAGA -+PATHBIO-SOLEXA2_30TUEAAXX:3:20:1494:1497/2 -[EOaaaZaaaZaa_Q_aaaaaXaaaaaa\a_X`^^QX_aaa_]]aa^^^V[_Z -@PATHBIO-SOLEXA2_30TUEAAXX:3:51:343:357/2 -TTCCTTACAAAGACAATGCTCATTGTACATCACAGAACTCACACGGGAGAGAG -+PATHBIO-SOLEXA2_30TUEAAXX:3:51:343:357/2 -aaaaaaaaaaaaa`U]aabbaaaaa_Z_aaaaaa`S]aa[Z^aaaaaaaa_XS -@PATHBIO-SOLEXA2_30TUEAAXX:3:85:776:696/2 -AGCAACTCTAGTATGTTCTCTCTCTTCTGTCTATTCTGGGCCTTCCCAGAAGT -+PATHBIO-SOLEXA2_30TUEAAXX:3:85:776:696/2 -aaaaaabbaaaaaaaabbbbbbbbbbbbbbbaaabbaaaaaaaaabaa^Z^aa -@PATHBIO-SOLEXA2_30TUEAAXX:3:88:32:1594/2 -CTCTCTCTTCTGTCTATTCTGGGCCTTCCCAGAAGTGGTGGTCAGGTATCATC -+PATHBIO-SOLEXA2_30TUEAAXX:3:88:32:1594/2 -bbbabbbbbbbbbbaaabbaaaaaaaabb`X^MEZaaaaa^XXG[[MRXXEXX -@PATHBIO-SOLEXA2_30TUEAAXX:3:35:417:135/2 -TCCTTCTTCCATTACTGAAAATGTCTTGTTCCTATAGGCCAGAACCTCAAATA -+PATHBIO-SOLEXA2_30TUEAAXX:3:35:417:135/2 -aabbbaaaaaabbaaaaaaaaaaabbbbbaaba^aaabbaaaaaaaaa_[`aa |