Previous changeset 2:7796cbc040c4 (2011-09-12) Next changeset 4:a6c0eb381244 (2011-09-12) |
Commit message:
Uploaded |
added:
bowtie_indices.loc.sample csem csem.xml csem_test1_in.fa csem_test1_in.fq csem_test1_out_original_sorted.bed csem_test1_out_pseudo_sorted.bed csem_wrapper.pl |
b |
diff -r 7796cbc040c4 -r 3ce7ee6f43a7 bowtie_indices.loc.sample --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bowtie_indices.loc.sample Mon Sep 12 10:01:12 2011 -0400 |
b |
@@ -0,0 +1,44 @@ +hg18 hg18 Human (Homo sapiens): hg18 /scratch/dongjun/galaxy/bowtie_indexes/ +mm9 mm9 Mouse (Mus musculus): mm9 /p/keles/SOFTWARE/bowtie-0.12.5/indexes/ +e_coli eschColi_536 Escherichia coli (str. 536) /p/keles/SOFTWARE/bowtie-0.12.5/indexes/ +e_coli_K12 eschColi_K12 Escherichia coli K12 (eschColi_K12) /scratch/dongjun/galaxy/bowtie_indexes/ +c_elegans_ws200 ce7 Caenorhabditis elegans (Feb 2009): WS200/ce7 /p/keles/SOFTWARE/bowtie-0.12.5/indexes/ +a_thaliana a_thaliana Arabidopsis thaliana /p/keles/SOFTWARE/bowtie-0.12.5/indexes/ + +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie_indices.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie/hg18/, +#then the bowtie_indices.loc entry would look like this: +# +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18 +# +#and your /depot/data2/galaxy/bowtie/hg18/ directory +#would contain hg18.*.ebwt files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... +# +#Your bowtie_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +#hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon +#hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie/hg18/hg18full +#/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie/hg19/hg19 +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +# |
b |
diff -r 7796cbc040c4 -r 3ce7ee6f43a7 csem |
b |
Binary file csem has changed |
b |
diff -r 7796cbc040c4 -r 3ce7ee6f43a7 csem.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csem.xml Mon Sep 12 10:01:12 2011 -0400 |
b |
b'@@ -0,0 +1,195 @@\n+<tool id="csem" name="CSEM: Multi-read Allocation for ChIP-seq" version="1.0.0">\n+ \n+ <description></description>\n+ \n+ <parallelism method="basic"></parallelism>\n+ \n+ <requirements>\n+\t <requirement type="binary">csem</requirement>\n+\t <requirement type="package">bowtie</requirement>\n+ </requirements>\n+\n+ <command interpreter="perl">\n+ csem_wrapper.pl \n+ ## Input file name\n+ $InputParams.Input\n+ ## Input file format (FASTA or FASTQ)\n+ $InputParams.InfileFormat\n+ ## Output file name\n+ $out_csem\n+ ## Output file format\n+ $OutfileFormat\n+ ## Reference genome idnex for Bowtie\n+ $index.fields.path$index\n+ ## Generate pseudo-tags?\n+ $pseudoTag\n+ ## Bowtie settings (Max num of mismatches, Max num of aligned positions)\n+ #if $bowtieParams.bSettingsType == "preSet"\n+\t2\n+\t99\n+ #else\n+\t$bowtieParams.Mismatch\n+\t$bowtieParams.SuppressAlign\n+ #end if\n+ ## CSEM settings (window size, number of iterations)\n+ #if $csemParams.cSettingsType == "preSet"\n+ \t101\n+\t200\n+ #else\n+\t$csemParams.windowSize\n+\t$csemParams.nIteration\n+ #end if\n+ ## Number of cores to use\n+ 8\n+ </command>\n+\n+ <inputs>\n+\t<param name="index" type="select" label="Select a reference genome" help="If your genome of interest is not listed - contact Galaxy team.">\n+\t\t<options from_data_table="bowtie_indexes">\n+\t\t\t<filter type="sort_by" column="2" />\n+\t\t\t<validator type="no_options" message="No indexes are available" />\n+\t\t</options>\n+\t</param>\n+\t<conditional name="InputParams">\n+\t\t<param name="InfileFormat" type="select" label="Select file format to process" help="Bowtie accepts FASTA or FASTQ file formats.">\n+\t\t\t<option value="fasta">FASTA</option>\n+\t\t\t<option value="fastq">FASTQ</option>\n+\t\t</param>\n+\t\t<when value="fasta">\n+\t\t\t<param name="Input" type="data" format="fasta" label="FASTA file"/>\n+\t\t</when>\n+\t\t<when value="fastq">\n+\t\t\t<param name="Input" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="FASTQ file"/>\n+\t\t</when>\n+\t</conditional> <!-- InputParams -->\n+\t<param name="OutfileFormat" type="select" label="Select file format to export" help="Multi-read allocator can export results into BED or GFF file formats, or as a table.">\n+\t\t<option value="bed">BED</option>\n+\t\t<option value="gff">GFF</option>\n+\t\t<option value="table">table</option>\n+\t</param>\n+\t<param name="pseudoTag" type="select" label="Generate pseudo-tags?" help="See section \'Pseudo-tags\' in the help below for more details.">\n+\t\t<option value="N">NO</option>\n+\t\t<option value="Y">YES</option>\n+\t</param>\t\t\n+\t<conditional name="bowtieParams">\n+\t\t<param name="bSettingsType" type="select" label="Bowtie settings to use" help="For most mapping applications, use the \'Commonly used\' settings. If you want full control, use \'Full parameter list\'.">\n+\t\t\t<option value="preSet">Commonly used</option>\n+\t\t\t<option value="full">Full parameter list</option>\n+\t\t</param>\n+\t\t<when value="preSet" />\n+\t\t<when value="full">\n+\t\t\t<param name="Mismatch" type="integer" value="2" label="Maximum number of mismatches permitted (-v)" help="May be 0, 1, 2, or 3." />\n+\t\t\t<param name="SuppressAlign" type="integer" value="99" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="99 is appropriate for most cases. Use -1 for no limit." />\n+\t\t</when> <!-- full -->\n+\t</conditional> <!-- bowtieParams -->\t\t\n+\t<conditional name="csemParams">\n+\t\t<param name="cSettingsType" type="select" label="CSEM settings to use" help="For most multi-read allocation applications, use the \'Commonly used\' settings. If you want full control, use \'Full parameter list\'.">\n+\t\t\t<option value="preSet">Commonly used</option>\n+\t\t\t<option value="full">Full parameter list</option>\n+\t\t</param>\n+\t\t<when value="preSet" />\n+\t\t<when value="full">\n+\t\t\t<param name="windowSize" type="integer" value="101" label="Window size for the multi-read allocator" help="Set window size to some o'..b'entifiable with uni-reads, and improves detection of peaks in low mappable regions. The computational and experimental results established that multi-reads can be of critical importance for studying DNA-protein interactions in highly repetitive regions of genomes with ChIP-seq experiments. Output from CSEM can be used with other peak callers such as MOSAiCS and MACS to identify peaks that are in both high and low mappable regions of genomes.\n+\n+Please cite: Chung D, Kuan PF, Li B, SanalKumar R, Liang K, Bresnick E, Dewey C, and Keles S (2011),\n+"Discovering transcription factor binding sites in highly repetitive regions of genomes\n+with multi-read analysis of ChIP-Seq data," PLoS Computational Biology, 7(7): e1002111.\n+\n+------\n+\n+**Input formats**\n+\n+CSEM accepts short reads aligned using bowtie as input. Bowtie accepts single-end reads, in FASTA or FASTQ format, as input. Quality scores of reads are ignored.\n+\n+------\n+\n+**Pseudo-tags**\n+\n+For each read in the alignment file, CSEM estimates the fraction of the read allocated to each of its alignments. This fraction reflects the degree of confidence in each particular alignment. Currently, only the peak caller MOSAiCS can accept fractional of reads as input. However, you can incorporate multi-reads into ChIP-seq analysis with your favoriate peak-caller by utilizing this pseudo-tag functionality. Pseudo-tags are generated by assigning each multi-read to the location it maps to with the largest weight and filtering out multi-reads with weights less than 0.5. Although summarizing CSEM output as pseudo-tags decreases the number of utilized multi-reads, it still leads to a significant increase in the sequencing depth compared to using uni-reads alone and facilitates identification of peaks in repetitive regions.\n+\n+------\n+\n+**Outputs**\n+\n+Currently, results from CSEM can be exported into BED or GFF file formats, or as a table. Each line of the output file specifies a single alignment. The lines of the output file are ordered such that all of the unique read alignments appear first. If pseudo-tags are generated, *FRAC* equals to 1 for all reads if the output is a table and *score* is set to 1000 for all the reads in the BED and GFF formats.\n+\n+If the output is a table, it has the following columns::\n+\n+ Column Description\n+ -------- --------------------------------------------------------\n+ 1 RID ID of a read\n+ 2 CID Chromosome of the alignment\n+ 3 DIR Strand of the alignment (+ or -)\n+ 4 POS Left-most position of the aligned read (the first base in a chromosome is numbered 1)\n+ 5 FRAC Fraction of the read allocated to the alignment (which is 1 for uni-reads)\n+\n+If the output is in BED format, it has the following columns::\n+\n+ Column Description\n+ ------------ --------------------------------------------------------\n+ 1 chrom Chromosome of the alignment\n+ 2 chromStart Start position of the aligned read (the first base in a chromosome is numbered 0)\n+ 3 chromEnd End position of the aligned read (the first base in a chromosome is numbered 0)\n+ 4 name ID of a read\n+ 5 score 1000 * fraction of the read allocated to the alignment (which is 1000 for uni-reads)\n+ 6 strand Strand of the alignment (+ or -)\n+\n+If the output is in GFF format, it has the following columns::\n+\n+ Column Description\n+ --------- --------------------------------------------------------\n+ 1 seqname Chromosome of the alignment\n+ 2 source Always "CSEM"\n+ 3 feature ID of a read\n+ 4 start Start position of the aligned read (the first base in a chromosome is numbered 1)\n+ 5 end End position of the aligned read (the first base in a chromosome is numbered 1)\n+ 6 score 1000 * fraction of the read allocated to the alignment (which is 1000 for uni-reads)\n+ 7 strand Strand of the alignment (+ or -)\n+ 8 frame Always "."\n+ 9 group Always "."\n+\n+ \n+ </help>\n+</tool>\n' |
b |
diff -r 7796cbc040c4 -r 3ce7ee6f43a7 csem_test1_in.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csem_test1_in.fa Mon Sep 12 10:01:12 2011 -0400 |
b |
b'@@ -0,0 +1,2000 @@\n+>r0\n+GAACGATACCCACCCAACTATCGCCATTCCAGCAT\n+>r1\n+CCGAACTGGATGTCTCATGGGATAAAAATCATCCG\n+>r2\n+TCAAAATTGTTATAGTATAACACTGTTGCTTTATG\n+>r3\n+AAAATTTGTGCCTGGATGGCCTGAGTACCNANTAC\n+>r4\n+GCAGAGCAGTTGCTAGAAANNNNNTTGAAGAGGTT\n+>r5\n+CAGCATAAGTGGATATTCAAAGTTTTGCTGTTTTA\n+>r6\n+GGCAGTGATGCAACTGCCCGTTATCAACAGNCNCT\n+>r7\n+GCATATTGCCAATTTTCGCTTCGGGGATCAGGCTA\n+>r8\n+GGTTCAGTTCAGTATACGCCTTATCCGGCCTACGG\n+>r9\n+GGCGATGATTTCATTACCCTCAACGCCGAACAGGC\n+>r10\n+AATCCCACGGCGGCAGCATGGTCCTAGANAGGNCG\n+>r11\n+TTACCACCGAAGTAGCTTACCCATGCGCCGCCGAC\n+>r12\n+AATCACAGGCGGTGAGCAGTAACGATAATTCGGCT\n+>r13\n+CAGCTCGCACGCCACGCCGAACCATGTCATCAATT\n+>r14\n+CGCTTTTGTCCTCGGCGACTTCGGCAACCGATGCG\n+>r15\n+GGGTCTGGCCGTTTTCTGCTTCAACTTCAACAATC\n+>r16\n+ATCCGGTTAAAGATGTTGAGAAATATGTGGTGATG\n+>r17\n+TTTTGTGTGTGTAGTAGGCCTTGGATATTGGGGCT\n+>r18\n+CCTGAAGGCGCGCGTGACTACCTGGTGCCTTCTCG\n+>r19\n+CATATGCCCCAGCACTCTGATGGCATCGCCTTCCA\n+>r20\n+ATAGACGCAAAAGAGCAAATAACATTTCTTCACAA\n+>r21\n+TAATGATAAGGAATCACTGTTTTTGAGAAAAGATA\n+>r22\n+TTGGGATTGTGGGCGTGACAATTTTCTCGATGATT\n+>r23\n+ATCGCGAACACCTTTACCGATTTATCGCCGAAGTG\n+>r24\n+AGATGAAGTTGTTTTGGCGTCATTCCGAAAAATTG\n+>r25\n+TCCGTATTCAGGGCGCTGCGGGAGAAGAAATCNGT\n+>r26\n+ATGGGAACAGTAATCTTTTTTACTGGTTCTGCGTC\n+>r27\n+TATGTCACATTTATTTTTCCTTAAAACTACAATAT\n+>r28\n+TGTTTTCCCGGTTGTCGGGGATCGGTTTGCCGCTG\n+>r29\n+ACCTGGAAATCTGTACCAAACCCCATGCCGGATAA\n+>r30\n+AGCAACATCATTCTCCCGTAAAAAGGGAGTCGATG\n+>r31\n+CCGATAGTGCCGTAGTANAATTAGTTTTTNNTTAG\n+>r32\n+TCGTTTCTGTCTGAGTCGATAGCTTTCTCCTTTGA\n+>r33\n+CGTGGTCATCAGATCATGCGGCAAACCAAANNNNN\n+>r34\n+AGCGGCCAGGTGCTTTCCGGGATAGTAAACAGGCG\n+>r35\n+GCAACGCGCGGGGCTAATCGGGATCTGCCCGGAAG\n+>r36\n+CCGGTTTCACATCCGGTGCAGGCTGTGGTGCTGAC\n+>r37\n+GCATTGCTGCCCATCCTGATTTGCCTGTTAAGCAG\n+>r38\n+GCCGTCTGCACCGTAGCCTGGTATGTGAGTGCGAA\n+>r39\n+GCTATCTGCCGTTCAGTCAGAGTNCGANGCCTNAN\n+>r40\n+CGCAGTCGAAGAGGTGGAAGCATTAATGCNTAATA\n+>r41\n+CGAATCAGGCGATTCGTCAGACGGGAATGTTGNGT\n+>r42\n+TCCACGGAAGTTTTCAGAGATGAGAATGTGCCTTC\n+>r43\n+TTGCAGGAAATTCTGTNACCCTCNACNAAGCATCA\n+>r44\n+AGAAAATATGACCCCGCAGGATTACAACGACGTCN\n+>r45\n+GAAAAAGTTTCATAAAGACTCCAGATGATCGATGG\n+>r46\n+TTTCGGGGATGCGGTGGCGCGTCTGGATAAGCGTC\n+>r47\n+ATCCGGATTGTATTGAGATCCTGCCCGTGNGCTCA\n+>r48\n+CCAACCTTCTCCGCCTGCTTCGGCGCGGCGTTCCT\n+>r49\n+TGTTTCGCGTCGCAAAGTGGAGCGAGACGTGCAAG\n+>r50\n+GTGGTGCATTTTGATATTCCGCGCAATATCGAATC\n+>r51\n+GTCGTTATAGTCTTCAAGCCACGGGTAGACGAAAG\n+>r52\n+GCGTTTATGCGCGTGAACANNANTCCTCATATTAA\n+>r53\n+GCTGCGGGATCACCTGATCTGCCGGATGTCGTGTA\n+>r54\n+ATCGTTCATTGAGCGCAAAATGACGCTTTTGAGGG\n+>r55\n+AATGACTATCCGCTGGATCACGAATTTATCAGTGT\n+>r56\n+AAGGCAACGAAGAAGCCGATGATCCAGATCCAGCA\n+>r57\n+TGGGTGTAAGCCTGTTCCACTGCCTGCTGTTANCT\n+>r58\n+ACGCGTTTACTGTTCACCAGATCGCCCATCAACAT\n+>r59\n+TGATGTAACGCCTGATGGCCTGCATATCGCCCAGT\n+>r60\n+ACGGGTCAGCAACATCTGCCCGATATGAATGTNGN\n+>r61\n+CGATGAAGAAAAATCGCTGGTGAAACAAGCAGATG\n+>r62\n+GTCGCTCGCTACGATCAGCAAAAATATGACATCTT\n+>r63\n+CATTTCTGTACTCATGCAGCCCCCTTCTGAAATAA\n+>r64\n+CACGCGTTGGGTATGGACGAACTGGAAGGTCATGA\n+>r65\n+TGAGAACGGCTTATCTCATTTTCGCAGTCACTATT\n+>r66\n+GTCCGGCATCCAGGTTTTTCACGGCAGATTATCAG\n+>r67\n+GACATGTGAACCTTCTTTTTCAAGCTGCCAATGAT\n+>r68\n+TGCTATTTATCGAACTGGGGGGGGAGAACCTGCNC\n+>r69\n+ACCCCCTCATACCCACCTCTTTCTCCANGTNNGTN\n+>r70\n+TAATTCGATGCAACGCGAAGAACCTTACCTGGACT\n+>r71\n+TNNNNNNNNNNNNNNNNNNNNNNNNNGNNNNNNNN\n+>r72\n+GTTCACGGGGTTAGCGAAGATCCATCCCGCGTGCA\n+>r73\n+CCAATGANNNNNNNNNNNNNNNNNNNNNNNNNNNN\n+>r74\n+AGTTGAAACATTTTACCTGCATCAGCACACGGTNG\n+>r75\n+ACGTTCAGTTCTTTCGCCAGTGCTTTCAGCTAACG\n+>r76\n+TTTGCAGTAAAGGGTAAACGGATTGCCAGACNCCA\n+>r77\n+ATAGCCGCCCGCCGGAAAGATTTTTGCTGCCGGGA\n+>r78\n+GCAGGTGCTGCTGTATGNNGNAATTAGAATANTNN\n+>r79\n+CAGCTTCGTTGCAATGGCAAAAGCTTGCCGCTCCG\n+>r80\n+TTGGAGCGGGGTGGTTGGCACGCTCGAGAACGTCA\n+>r81\n+GACGCCGCCGCAGAGGCCAACAATCTCTTTCGGCG\n+>r82\n+CGACTCTAACGCGCTGATGGCGGTCACCTCCTNCC\n+>r83\n+AACACCGGGGATGTTATGGGTACCGGCGGATCCAC\n+>r84\n+TGATTTACGCATTATCACTAATAGCCTGCGTGTGG\n+>r85\n+GATAACCGGCCTCCAGCTCAGCCCATTTCGCATCN\n+>r86\n+TACCGCAGCAAAATCGGTCCCNANCGTNTNTNNCN\n+>r87\n+TGGCGTAACAAACGAAGGGCAACTGATGAACAANT\n+>r88\n+AACAGCCATAGCGTCATTTCCGGACTCATNNGNCT\n+>r89\n+CGTCTTGTCCTGCCTACGCGAATCCAGGTCGGTAC\n+>r90\n+ATTGGCGGTTTATTAGCGAACCGAGTAATAAACCG\n+>r91\n+GGCATTACCGACCCGTGGTACACCCCCGGCGATGT\n+>r92\n+TGCAACGCATTGGCGAAATACATAAAC'..b'9\n+TGGAAGGGGCCCGGGGAGATAATGTCTCGNTTTAC\n+>r910\n+AAAGAAATGCGTTCAGCAGCCAATGCTAAGTTCAT\n+>r911\n+ATTGAAGCGGCTAAACGTTTTGGTCTGAGCGCGCA\n+>r912\n+TTTTAACGACAAGTGGCGGGTANAGATTGAGGGAT\n+>r913\n+GAGCCAACAGAAAACGCTGAAAAAACATCCAAAAG\n+>r914\n+GCATAATAGCTACCCGTCNGAATNTCGGTTCTGAC\n+>r915\n+GCAACCAACCAGGCATCCGCTTCAACTTGGCGTCG\n+>r916\n+GGCAAGTACAGGACAGCATTAGCACAAAACTGTCT\n+>r917\n+TNNNNNNNNNNNNNNANNNNNNNNNTNNNNNNNTN\n+>r918\n+TTCTTCAATAAGTATCTGTTATCTTTTGATCCCNN\n+>r919\n+TNNCNNNNNNNNNNNNNNNNNNNNNNNNTNNNNNN\n+>r920\n+ACGAGTGCCGAGACTTATCTTTCCCGATGTTTTGG\n+>r921\n+TTGCCGCAGGCCANNAGAACCNTGGTANANATCTN\n+>r922\n+CCCTGCCAGCGTTGCGTGTGCTGTACTCATCCGAA\n+>r923\n+TTAAAAAGTAACGCCATAGTCGCTAAATCTGATGA\n+>r924\n+AACGCCGAGGCAAAGATCACCCGCCTGCCGCTGGG\n+>r925\n+AATATTCAAAGTGACAGCGCATGGCAGCAAGCCTG\n+>r926\n+GCCCCCCAGCAACAAATGCAGCGTGCCGTCTATAN\n+>r927\n+CTTCCCGTGCCGAAGATACCGACCTGCAAGACGAT\n+>r928\n+GCTCGCGGACGACGATCNGGAGGATNAGNACAATA\n+>r929\n+GGCATCGGGTATGGCTGCGTGTTAATCCGGGGTAC\n+>r930\n+GTGTTCTCTTTGCCGGAGGTAAAAAAAGAGGATGA\n+>r931\n+CCTGACGCTGATGGCAATTGGCGGCGCGTTTTTCA\n+>r932\n+CCGAACTGGTCGATGGGACGTAAAATTTCTGTCGG\n+>r933\n+CGAAAAAGATCACAAACTTTGCCATANCATCTGTG\n+>r934\n+AATNTNNNNNNNNNNNNNNNNGNNNTNGNNNTNNN\n+>r935\n+TTTTGGGGAAAGTTTTGGGGCAGATTTTACATCAT\n+>r936\n+TGCCGATACTGAATTGATGCTTGCGGATATCATCG\n+>r937\n+TGGTGCTGATTGGTATGGTTGCTAATATCTTCCTG\n+>r938\n+TCTACAAAGCCATCGCGAAGAAATTCTCNATTGTG\n+>r939\n+CGTGCGNCTGTATGTCAGCGAAAACGAGCTGAAAA\n+>r940\n+CCTGCTTATGCTGGTCCGCTCGGTACTGCTGATAA\n+>r941\n+GCCAGAAGAACCAGTCGCGCTGTTTTTCACTTTCC\n+>r942\n+GCAGGAACAGAGAAATAACCAGGATTATGGATGTN\n+>r943\n+ACAGCGCCGGATACGGCGTGAATGCCTTATCCGAT\n+>r944\n+GCTGTTGGCGCTGAAAACATCGCTACTAACCAGAT\n+>r945\n+GCGGTAGCGGCCCTGCCGGGCTTTCGGCTATGGCG\n+>r946\n+CATCGTGTTTATTCCCGGTGACTTCACCCGCGCGG\n+>r947\n+CAACGAAGGGTTCTACTGGTGGATACACATACCTA\n+>r948\n+TCAGCCCGCGCTCGTAGCCGCTCTGGTCGTCNTAC\n+>r949\n+CAACCTCCCAGTCGACATCGTTTACGGNGTGGACT\n+>r950\n+GCGGACACTCATCACGGGCAAGGGCCCGNTTTAAA\n+>r951\n+GGAGGGGAAGGGGAACTGAGTCCTGACCTGACTCT\n+>r952\n+TAAGTCATAAACCAGAATTATGTNAANGNCTNNTN\n+>r953\n+CGGTGATGTTATATCGCGTTGATTATTGATGCTGT\n+>r954\n+AGGCATCCGTAAGATGCTGGCGCTGAACATCAACC\n+>r955\n+CGTAGTCGGATTCTGGCGGTGGGCCAGATGTCGGA\n+>r956\n+GCGATACCNNNNANNNNNNNGNNNCNNNTNNNNNN\n+>r957\n+GTAGTCGATATCAATACCGACGACTGGAATCTNNT\n+>r958\n+AATTGTCGGCGTAATTGGCCTAACAGGTGAACCAG\n+>r959\n+ACGACAACGGCTTTGCGTTCAACGATGCCAGCGCT\n+>r960\n+TCTGGCCGCAGCCTGATGGACAAAGCGTTCATTAN\n+>r961\n+GCTGTTTAAAGAGAATAGCAACTTCCGTCGTACNN\n+>r962\n+AACCAGGGCGCGCAGGGGGGGCTTNCGCCNTCGAT\n+>r963\n+GGTCGGACAGGTTTTCACACAAGCCGGCTCCTGGC\n+>r964\n+ATTCCGTAATGCCATCTTATTTCGTAATGGATTGA\n+>r965\n+CCTGCTGCTCATTGAGTACGCCTGCTGTATTACNN\n+>r966\n+TGAACGCCTTATCCGACCTACGTTCGCCTTGTTGA\n+>r967\n+TCAAGCTGGCGATTGATCGCTATAAAGCTAACGAG\n+>r968\n+CATCAATTACCTGAATCGTTGCCTCTGCAACAAAC\n+>r969\n+TGCCCTGGCGAGCGGTTTGTTGATCCNNGGAANNG\n+>r970\n+AGGATTTCCTTATAACCATTTATATGTATTTGCGT\n+>r971\n+GATGCCGATGCGGCGGATCTGATATGTCNTTGNTG\n+>r972\n+TAATGCCGGTATTGAAGTTGTCGAAGCCACGGAAG\n+>r973\n+GTACATGTGGAGTTACAGCCTATTGCCGATGAGTT\n+>r974\n+GAATATGCAAAATAGTGCGTATTTTCAGAAGTATT\n+>r975\n+CTGTTGTGGTTGCGCTTTGCACGTTTATTCACCGT\n+>r976\n+AATGAACGCCTTGCTGCTGGGCAGCGTGGATGAAA\n+>r977\n+GTCAATTCGGTCGTGCCCAGTTTTACAGGCGCAGT\n+>r978\n+TCNNNNNNNNNNGNNNNTNNCNNANTNNTNNNNNN\n+>r979\n+TGTTGATCCGGTTTGCATGGCGTACCCGGCACGTA\n+>r980\n+CTGACGCCGTTGATTTCTGCGATCGGCGTGGTGGC\n+>r981\n+TGGATACTGCGCTCCAGCGCCTGCTGGTTAGCGGN\n+>r982\n+GCACCAGCCTCTTTCAGCGTAGCGCAGGCATTGCT\n+>r983\n+ACAGACAAAGTATACCTGTTCGGGTCCCTNAATAT\n+>r984\n+ACTTAGCACCCGCAATTTTACTTNTGTTGGTTGCC\n+>r985\n+ACGCGTACAGATGACTGAATGCAGTGCCCTGGCAA\n+>r986\n+ACGTTGCGTCTGTTCCANGACNGGANNCTTAACTG\n+>r987\n+TCTGGTGGTCAGCGCCAGCGAGTCGCCATCGCCAG\n+>r988\n+TCACTCAGTGGCAAACGATGGATGGACGCTCCTGC\n+>r989\n+GGCGGCACCAGCCCCTGGTGATACAGCACGTAAGA\n+>r990\n+CCNNNNGNNNGNNNNNNNNNNNTNAAGNNNNNNNN\n+>r991\n+CACGCGCTGTTCTGGCTATNGGNACNNCCTGCAAC\n+>r992\n+AGCCATCATCAAACCGTCACTNGCATTTAAAAATA\n+>r993\n+TGGTAGTCAATAATGACGGCAAGGCGACCGATGCC\n+>r994\n+GCATTTTTTTCGCCAGCCAGGCTTTCGCTTTGGGT\n+>r995\n+TGGCACCTGCCGTTTGCTGTGCGACGAATCAACGC\n+>r996\n+GGTGCGTTACTGTGGCACTTCGNCCTGATGTGGAT\n+>r997\n+AACCAACACGCCAAGCATCGCTTCACGGCTGACTC\n+>r998\n+TATGGTGGATCTCATTACTTACACTAATCGTCTTC\n+>r999\n+AGGATAATGAGGCGAGCCGGGGGAACTGAAANTGG\n' |
b |
diff -r 7796cbc040c4 -r 3ce7ee6f43a7 csem_test1_in.fq --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csem_test1_in.fq Mon Sep 12 10:01:12 2011 -0400 |
b |
b'@@ -0,0 +1,4000 @@\n+@r0\n+GAACGATACCCACCCAACTATCGCCATTCCAGCAT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r1\n+CCGAACTGGATGTCTCATGGGATAAAAATCATCCG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r2\n+TCAAAATTGTTATAGTATAACACTGTTGCTTTATG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r3\n+AAAATTTGTGCCTGGATGGCCTGAGTACCNANTAC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r4\n+GCAGAGCAGTTGCTAGAAANNNNNTTGAAGAGGTT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r5\n+CAGCATAAGTGGATATTCAAAGTTTTGCTGTTTTA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r6\n+GGCAGTGATGCAACTGCCCGTTATCAACAGNCNCT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r7\n+GCATATTGCCAATTTTCGCTTCGGGGATCAGGCTA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r8\n+GGTTCAGTTCAGTATACGCCTTATCCGGCCTACGG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r9\n+GGCGATGATTTCATTACCCTCAACGCCGAACAGGC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r10\n+AATCCCACGGCGGCAGCATGGTCCTAGANAGGNCG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r11\n+TTACCACCGAAGTAGCTTACCCATGCGCCGCCGAC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r12\n+AATCACAGGCGGTGAGCAGTAACGATAATTCGGCT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r13\n+CAGCTCGCACGCCACGCCGAACCATGTCATCAATT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r14\n+CGCTTTTGTCCTCGGCGACTTCGGCAACCGATGCG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r15\n+GGGTCTGGCCGTTTTCTGCTTCAACTTCAACAATC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r16\n+ATCCGGTTAAAGATGTTGAGAAATATGTGGTGATG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r17\n+TTTTGTGTGTGTAGTAGGCCTTGGATATTGGGGCT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r18\n+CCTGAAGGCGCGCGTGACTACCTGGTGCCTTCTCG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r19\n+CATATGCCCCAGCACTCTGATGGCATCGCCTTCCA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r20\n+ATAGACGCAAAAGAGCAAATAACATTTCTTCACAA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r21\n+TAATGATAAGGAATCACTGTTTTTGAGAAAAGATA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r22\n+TTGGGATTGTGGGCGTGACAATTTTCTCGATGATT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r23\n+ATCGCGAACACCTTTACCGATTTATCGCCGAAGTG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r24\n+AGATGAAGTTGTTTTGGCGTCATTCCGAAAAATTG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r25\n+TCCGTATTCAGGGCGCTGCGGGAGAAGAAATCNGT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r26\n+ATGGGAACAGTAATCTTTTTTACTGGTTCTGCGTC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r27\n+TATGTCACATTTATTTTTCCTTAAAACTACAATAT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r28\n+TGTTTTCCCGGTTGTCGGGGATCGGTTTGCCGCTG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r29\n+ACCTGGAAATCTGTACCAAACCCCATGCCGGATAA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r30\n+AGCAACATCATTCTCCCGTAAAAAGGGAGTCGATG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r31\n+CCGATAGTGCCGTAGTANAATTAGTTTTTNNTTAG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r32\n+TCGTTTCTGTCTGAGTCGATAGCTTTCTCCTTTGA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r33\n+CGTGGTCATCAGATCATGCGGCAAACCAAANNNNN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r34\n+AGCGGCCAGGTGCTTTCCGGGATAGTAAACAGGCG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r35\n+GCAACGCGCGGGGCTAATCGGGATCTGCCCGGAAG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r36\n+CCGGTTTCACATCCGGTGCAGGCTGTGGTGCTGAC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r37\n+GCATTGCTGCCCATCCTGATTTGCCTGTTAAGCAG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r38\n+GCCGTCTGCACCGTAGCCTGGTATGTGAGTGCGAA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r39\n+GCTATCTGCCGTTCAGTCAGAGTNCGANGCCTNAN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r40\n+CGCAGTCGAAGAGGTGGAAGCATTAATGCNTAATA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r41\n+CGAATCAGGCGATTCGTCAGACGGGAATGTTGNGT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r42\n+TCCACGGAAGTTTTCAGAGATGAGAATGTGCCTTC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r43\n+TTGCAGGAAATTCTGTNACCCTCNACNAAGCATCA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r44\n+AGAAAATATGACCCCGCAGGATTACAACGACGTCN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r45\n+GAAAAAGTTTCATAAAGACTCCAGATGATCGATGG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r46\n+TTTCGGGGATGCGGTGGCGCGTCTGGATAAGCGTC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r47\n+ATCCGGATTGTATTGAGATCCTGCCCGTGNGCTCA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r48\n'..b'ANGNCTNNTN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r953\n+CGGTGATGTTATATCGCGTTGATTATTGATGCTGT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r954\n+AGGCATCCGTAAGATGCTGGCGCTGAACATCAACC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r955\n+CGTAGTCGGATTCTGGCGGTGGGCCAGATGTCGGA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r956\n+GCGATACCNNNNANNNNNNNGNNNCNNNTNNNNNN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r957\n+GTAGTCGATATCAATACCGACGACTGGAATCTNNT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r958\n+AATTGTCGGCGTAATTGGCCTAACAGGTGAACCAG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r959\n+ACGACAACGGCTTTGCGTTCAACGATGCCAGCGCT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r960\n+TCTGGCCGCAGCCTGATGGACAAAGCGTTCATTAN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r961\n+GCTGTTTAAAGAGAATAGCAACTTCCGTCGTACNN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r962\n+AACCAGGGCGCGCAGGGGGGGCTTNCGCCNTCGAT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r963\n+GGTCGGACAGGTTTTCACACAAGCCGGCTCCTGGC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r964\n+ATTCCGTAATGCCATCTTATTTCGTAATGGATTGA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r965\n+CCTGCTGCTCATTGAGTACGCCTGCTGTATTACNN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r966\n+TGAACGCCTTATCCGACCTACGTTCGCCTTGTTGA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r967\n+TCAAGCTGGCGATTGATCGCTATAAAGCTAACGAG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r968\n+CATCAATTACCTGAATCGTTGCCTCTGCAACAAAC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r969\n+TGCCCTGGCGAGCGGTTTGTTGATCCNNGGAANNG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r970\n+AGGATTTCCTTATAACCATTTATATGTATTTGCGT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r971\n+GATGCCGATGCGGCGGATCTGATATGTCNTTGNTG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r972\n+TAATGCCGGTATTGAAGTTGTCGAAGCCACGGAAG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r973\n+GTACATGTGGAGTTACAGCCTATTGCCGATGAGTT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r974\n+GAATATGCAAAATAGTGCGTATTTTCAGAAGTATT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r975\n+CTGTTGTGGTTGCGCTTTGCACGTTTATTCACCGT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r976\n+AATGAACGCCTTGCTGCTGGGCAGCGTGGATGAAA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r977\n+GTCAATTCGGTCGTGCCCAGTTTTACAGGCGCAGT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r978\n+TCNNNNNNNNNNGNNNNTNNCNNANTNNTNNNNNN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r979\n+TGTTGATCCGGTTTGCATGGCGTACCCGGCACGTA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r980\n+CTGACGCCGTTGATTTCTGCGATCGGCGTGGTGGC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r981\n+TGGATACTGCGCTCCAGCGCCTGCTGGTTAGCGGN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r982\n+GCACCAGCCTCTTTCAGCGTAGCGCAGGCATTGCT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r983\n+ACAGACAAAGTATACCTGTTCGGGTCCCTNAATAT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r984\n+ACTTAGCACCCGCAATTTTACTTNTGTTGGTTGCC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r985\n+ACGCGTACAGATGACTGAATGCAGTGCCCTGGCAA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r986\n+ACGTTGCGTCTGTTCCANGACNGGANNCTTAACTG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r987\n+TCTGGTGGTCAGCGCCAGCGAGTCGCCATCGCCAG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r988\n+TCACTCAGTGGCAAACGATGGATGGACGCTCCTGC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r989\n+GGCGGCACCAGCCCCTGGTGATACAGCACGTAAGA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r990\n+CCNNNNGNNNGNNNNNNNNNNNTNAAGNNNNNNNN\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r991\n+CACGCGCTGTTCTGGCTATNGGNACNNCCTGCAAC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r992\n+AGCCATCATCAAACCGTCACTNGCATTTAAAAATA\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r993\n+TGGTAGTCAATAATGACGGCAAGGCGACCGATGCC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r994\n+GCATTTTTTTCGCCAGCCAGGCTTTCGCTTTGGGT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r995\n+TGGCACCTGCCGTTTGCTGTGCGACGAATCAACGC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r996\n+GGTGCGTTACTGTGGCACTTCGNCCTGATGTGGAT\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r997\n+AACCAACACGCCAAGCATCGCTTCACGGCTGACTC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r998\n+TATGGTGGATCTCATTACTTACACTAATCGTCTTC\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n+@r999\n+AGGATAATGAGGCGAGCCGGGGGAACTGAAANTGG\n++\n+EDCCCBAAAA@@@@?>===<;;9:99987776554\n' |
b |
diff -r 7796cbc040c4 -r 3ce7ee6f43a7 csem_test1_out_original_sorted.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csem_test1_out_original_sorted.bed Mon Sep 12 10:01:12 2011 -0400 |
b |
b'@@ -0,0 +1,471 @@\n+gi|49175990|ref|NC_000913.2|\t1006768\t1006802\tr335\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1015037\t1015071\tr232\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1018832\t1018866\tr548\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1026963\t1026997\tr397\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1030176\t1030210\tr937\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1033577\t1033611\tr91\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t103400\t103434\tr717\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1041029\t1041063\tr881\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1054869\t1054903\tr537\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1088175\t1088209\tr483\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1089798\t1089832\tr387\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1100970\t1101004\tr626\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1108188\t1108222\tr167\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1119344\t1119378\tr407\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1128435\t1128469\tr215\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1131944\t1131978\tr171\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1155017\t1155051\tr467\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1159448\t1159482\tr542\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1163899\t1163933\tr433\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t117142\t117176\tr925\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1174893\t1174927\tr563\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t117811\t117845\tr473\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1221102\t1221136\tr741\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1225445\t1225479\tr896\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1230269\t1230303\tr541\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1233397\t1233431\tr927\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1234258\t1234292\tr307\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t124140\t124174\tr227\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1272741\t1272775\tr300\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1276713\t1276747\tr849\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1313902\t1313936\tr851\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t131622\t131656\tr129\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1319239\t1319273\tr565\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1321743\t1321777\tr677\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1322128\t1322162\tr183\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1323134\t1323168\tr188\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1331297\t1331331\tr646\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1334344\t1334378\tr159\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1337479\t1337513\tr93\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1339423\t1339457\tr936\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1348189\t1348223\tr890\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1348663\t1348697\tr79\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1350407\t1350441\tr241\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1355040\t1355074\tr261\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t136417\t136451\tr291\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1366275\t1366309\tr708\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1369121\t1369155\tr444\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1376018\t1376052\tr581\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t138065\t138099\tr372\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1407324\t1407358\tr905\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1408279\t1408313\tr465\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1408731\t1408765\tr518\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t141488\t141522\tr217\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1433307\t1433341\tr662\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1434092\t1434126\tr544\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1467098\t1467132\tr192\t108.251631809475\t+\n+gi|49175990|ref|NC_000913.2|\t1467100\t1467134\tr525\t108.251631809475\t+\n+gi|49175990|ref|NC_000913.2|\t1506317\t1506351\tr398\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1510162\t1510196\tr766\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1515483\t1515517\tr438\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1516377\t1516411\tr720\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1519362\t1519396\tr238\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1539532\t1539566\tr616\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1548992\t1549026\tr963\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1550415\t1550449\tr797\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1574320\t1574354\tr305\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1576311\t1576345\tr471\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t161501\t161535\tr667\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1620'..b'1047\t4541081\tr874\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t4542052\t4542086\tr448\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4559188\t4559222\tr875\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4596991\t4597025\tr353\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4606702\t4606736\tr390\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t461250\t461284\tr104\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t4616264\t4616298\tr322\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t462016\t462050\tr470\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t462269\t462303\tr573\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4626854\t4626888\tr663\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t46754\t46788\tr639\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t469387\t469421\tr692\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t490548\t490582\tr348\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t492923\t492957\tr644\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t504941\t504975\tr979\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t532863\t532897\tr411\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t53780\t53814\tr408\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t544257\t544291\tr772\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t552101\t552135\tr502\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t577746\t577780\tr757\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t594219\t594253\tr868\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t599850\t599884\tr270\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t613795\t613829\tr799\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t638008\t638042\tr477\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t646655\t646689\tr157\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t65084\t65118\tr364\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t656590\t656624\tr138\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t66021\t66055\tr180\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t682795\t682829\tr240\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t68418\t68452\tr694\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t694803\t694837\tr219\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t699778\t699812\tr508\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t710498\t710532\tr109\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t717747\t717781\tr806\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t72007\t72041\tr980\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t723553\t723587\tr366\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t742877\t742911\tr669\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t756851\t756885\tr768\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t757626\t757660\tr685\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t759040\t759074\tr463\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t779810\t779844\tr155\t121.143611695954\t-\n+gi|49175990|ref|NC_000913.2|\t780099\t780133\tr155\t121.339532647512\t-\n+gi|49175990|ref|NC_000913.2|\t780403\t780437\tr155\t121.535526212637\t-\n+gi|49175990|ref|NC_000913.2|\t780625\t780659\tr155\t121.731596160199\t-\n+gi|49175990|ref|NC_000913.2|\t780833\t780867\tr155\t121.927721887041\t-\n+gi|49175990|ref|NC_000913.2|\t801528\t801562\tr114\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t817629\t817663\tr125\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t825691\t825725\tr492\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t834809\t834843\tr72\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t849822\t849856\tr629\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t85595\t85629\tr415\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t856577\t856611\tr273\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t858729\t858763\tr638\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t861184\t861218\tr287\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t864454\t864488\tr378\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t868597\t868631\tr545\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t870333\t870367\tr842\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t870713\t870747\tr697\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t895739\t895773\tr139\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t897622\t897656\tr653\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t906010\t906044\tr721\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t908314\t908348\tr422\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t908450\t908484\tr356\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t91138\t91172\tr236\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t940116\t940150\tr474\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t944264\t944298\tr13\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t953105\t953139\tr960\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t981430\t981464\tr242\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t987163\t987197\tr578\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t989927\t989961\tr869\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t994144\t994178\tr313\t1000\t-\n' |
b |
diff -r 7796cbc040c4 -r 3ce7ee6f43a7 csem_test1_out_pseudo_sorted.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csem_test1_out_pseudo_sorted.bed Mon Sep 12 10:01:12 2011 -0400 |
b |
b'@@ -0,0 +1,412 @@\n+gi|49175990|ref|NC_000913.2|\t1006768\t1006802\tr335\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1015037\t1015071\tr232\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1018832\t1018866\tr548\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1026963\t1026997\tr397\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1030176\t1030210\tr937\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1033577\t1033611\tr91\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t103400\t103434\tr717\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1041029\t1041063\tr881\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1054869\t1054903\tr537\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1088175\t1088209\tr483\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1089798\t1089832\tr387\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1100970\t1101004\tr626\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1108188\t1108222\tr167\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1119344\t1119378\tr407\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1128435\t1128469\tr215\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1131944\t1131978\tr171\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1155017\t1155051\tr467\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1159448\t1159482\tr542\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1163899\t1163933\tr433\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t117142\t117176\tr925\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1174893\t1174927\tr563\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t117811\t117845\tr473\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1221102\t1221136\tr741\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1225445\t1225479\tr896\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1230269\t1230303\tr541\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1233397\t1233431\tr927\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1234258\t1234292\tr307\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t124140\t124174\tr227\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1272741\t1272775\tr300\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1276713\t1276747\tr849\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1313902\t1313936\tr851\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t131622\t131656\tr129\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1319239\t1319273\tr565\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1321743\t1321777\tr677\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1322128\t1322162\tr183\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1323134\t1323168\tr188\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1331297\t1331331\tr646\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1334344\t1334378\tr159\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1337479\t1337513\tr93\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1339423\t1339457\tr936\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1348189\t1348223\tr890\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1348663\t1348697\tr79\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1350407\t1350441\tr241\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1355040\t1355074\tr261\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t136417\t136451\tr291\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1366275\t1366309\tr708\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1369121\t1369155\tr444\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1376018\t1376052\tr581\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t138065\t138099\tr372\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1407324\t1407358\tr905\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1408279\t1408313\tr465\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1408731\t1408765\tr518\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t141488\t141522\tr217\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1433307\t1433341\tr662\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1434092\t1434126\tr544\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1506317\t1506351\tr398\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1510162\t1510196\tr766\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1515483\t1515517\tr438\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1516377\t1516411\tr720\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1519362\t1519396\tr238\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1539532\t1539566\tr616\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1548992\t1549026\tr963\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1550415\t1550449\tr797\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1574320\t1574354\tr305\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1576311\t1576345\tr471\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t161501\t161535\tr667\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t1620675\t1620709\tr484\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1620860\t1620894\tr145\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t1624200\t1624234\tr111\t1000\t-\n'..b'4455334\tr804\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4470446\t4470480\tr341\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4484351\t4484385\tr745\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t448462\t448496\tr56\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4491169\t4491203\tr908\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t4495265\t4495299\tr262\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4541047\t4541081\tr874\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t4542052\t4542086\tr448\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4559188\t4559222\tr875\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4596991\t4597025\tr353\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4606702\t4606736\tr390\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t461250\t461284\tr104\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t4616264\t4616298\tr322\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t462016\t462050\tr470\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t462269\t462303\tr573\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t4626854\t4626888\tr663\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t46754\t46788\tr639\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t469387\t469421\tr692\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t490548\t490582\tr348\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t492923\t492957\tr644\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t504941\t504975\tr979\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t532863\t532897\tr411\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t53780\t53814\tr408\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t544257\t544291\tr772\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t552101\t552135\tr502\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t577746\t577780\tr757\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t594219\t594253\tr868\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t599850\t599884\tr270\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t613795\t613829\tr799\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t638008\t638042\tr477\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t646655\t646689\tr157\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t65084\t65118\tr364\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t656590\t656624\tr138\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t66021\t66055\tr180\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t682795\t682829\tr240\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t68418\t68452\tr694\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t694803\t694837\tr219\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t699778\t699812\tr508\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t710498\t710532\tr109\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t717747\t717781\tr806\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t72007\t72041\tr980\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t723553\t723587\tr366\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t742877\t742911\tr669\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t756851\t756885\tr768\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t757626\t757660\tr685\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t759040\t759074\tr463\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t801528\t801562\tr114\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t817629\t817663\tr125\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t825691\t825725\tr492\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t834809\t834843\tr72\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t849822\t849856\tr629\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t85595\t85629\tr415\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t856577\t856611\tr273\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t858729\t858763\tr638\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t861184\t861218\tr287\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t864454\t864488\tr378\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t868597\t868631\tr545\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t870333\t870367\tr842\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t870713\t870747\tr697\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t895739\t895773\tr139\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t897622\t897656\tr653\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t906010\t906044\tr721\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t908314\t908348\tr422\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t908450\t908484\tr356\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t91138\t91172\tr236\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t940116\t940150\tr474\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t944264\t944298\tr13\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t953105\t953139\tr960\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t981430\t981464\tr242\t1000\t-\n+gi|49175990|ref|NC_000913.2|\t987163\t987197\tr578\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t989927\t989961\tr869\t1000\t+\n+gi|49175990|ref|NC_000913.2|\t994144\t994178\tr313\t1000\t-\n' |
b |
diff -r 7796cbc040c4 -r 3ce7ee6f43a7 csem_wrapper.pl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/csem_wrapper.pl Mon Sep 12 10:01:12 2011 -0400 |
[ |
@@ -0,0 +1,192 @@ +# Wrapper for CSEM with Bowtie +# Written by Dongjun Chung, Sep. 8, 2011 + +#!/usr/bin/env perl; +#use warnings; +#use strict; +use File::Temp qw/tempfile/; +use File::Temp qw/tmpnam/; + +# parse command arguments + +die "Usage: perl csem_wrapper.pl [infile_name] [infile_format] [outfile_name] [outfile_format] [ref_genome] [pseudo_tags] [n_mismatch] [maxpos] [window_size] [n_iter] [n_core]" unless @ARGV == 11; + +my ( $infile_name, $infile_format, $outfile_name, $outfile_format, $ref_genome, $pseudo_tags, $n_mismatch, $maxpos, $window_size, $n_iter, $n_core ) = @ARGV; + +# construct ref genome file (adapted from "genRef.pl") + +open ( IN, "bowtie-inspect -s $ref_genome |" ) or die "Cannot run bowtie-inspect!\n"; + +my $line; + +my $size = 0; +my (@names, @lens) = (); + # $size: # of chromosomes + # @lens: chromosome size + # @names: chromosome name + +for (my $i = 0; $i < 3; $i++) { + # skip unnecessary lines + $line = <IN>; +} + +while ( $line = <IN> ) { + ++$size; + chomp($line); + my ($seqn, $name, $len) = split(/[ \t]+/, $line); + push(@names, $name); + push(@lens, $len); +} +close(IN); + +my ($fh, $temp_reffile) = tempfile(); +print $fh "$size\n"; +print $fh "@lens\n"; +print $fh "@names\n"; +close($fh); + +# extract read length from FASTA/FASTQ files + +open( IN, $infile_name ) or die "Cannot open tag file!\n"; + +$line = <IN>; +if ( $infile_format eq "fasta" ) { + while ( $line =~ /^>/ ) { + $line = <IN>; + } +} elsif ( $infile_format eq "fastq" ) { + while ( $line =~ /^@/ ) { + $line = <IN>; + } +} else { + print "Inappropriate aligned read file format!\n"; + exit 1; +} +chomp($line); +my $read_length = length $line; + +close( IN ); + +# extract read ID + +open( IN, $infile_name ) or die "Cannot open tag file!\n"; + +my @readID = (); +if ( $infile_format eq "fasta" ) { + foreach $line (<IN>) { + chomp($line); + if ( $line =~ /^>(\S+)/ ) { + push @readID, $1; + } + } +} elsif ( $infile_format eq "fastq" ) { + foreach $line (<IN>) { + chomp($line); + if ( $line =~ /^@(\S+)/ ) { + push @readID, $1; + } + } +} else { + print "Inappropriate aligned read file format!\n"; + exit 1; +} + +close( IN ); + +# run bowtie & csem + +my $outfile_temp = tmpnam(); + +if ( $infile_format eq "fasta" ) { + system( "bowtie -f -v $n_mismatch -a -m $maxpos -p $n_core --quiet --concise $ref_genome $infile_name | csem $temp_reffile $window_size $n_iter $outfile_temp > /dev/null" ) == 0 or die "Error occurs while running either bowtie or csem!" +} elsif ( $infile_format eq "fastq" ) { + system( "bowtie -q -v $n_mismatch -a -m $maxpos -p $n_core --quiet --concise $ref_genome $infile_name | csem $temp_reffile $window_size $n_iter $outfile_temp > /dev/null" ) == 0 or die "Error occurs while running either bowtie or csem!" +} else { + print "Inappropriate aligned read file format!\n"; + exit 1; +} + +# post-process chromosome & position + +open( IN, $outfile_temp ) or die "Cannot open csem file!\n"; +open( OUT, ">", $outfile_name ) or die "Cannot open output file!\n"; + +foreach $line (<IN>) { + chomp($line); + my @element = split( /\s/, $line ); + # assume columns are separated by some white space + + # check for invalid line: may cause error in exporting step + + if ( scalar(@element)<5 ) { + next; + } + + # post-process lines + + my ($id, $chr, $str, $loc, $prob) = @element; + if ( $outfile_format ne "bed" ) { + # first base is 0 in bowtie or BED + # first base is 1 in table or GFF + $loc++; + } + my $chrname = $names[$chr]; # translate chromosome + + # write down processed lines + # - generate pseudo-tags, if necessary (adapted from "round_tag_to_integer.pl") + + if ( $pseudo_tags eq "Y" ) { + # if we want to generate pseudo-tags, + # then threshold prob at 0.5 & round prob to integer (i.e., set to one) + # (exclude prob = 0.5 as well in order to avoid a read appears more than once) + + if ( $prob <= 0.5 ) { + next; + } else { + $prob = 1; + } + } + + # write down results + + my $start; + my $end; + my $score; + + my $id_final = $readID[$id]; + #my $id_final = $id; + + if ( $outfile_format eq "table" ) { + print OUT "$id_final\t$chrname $str $loc $prob\n"; + } elsif ( $outfile_format eq "bed" ) { + # BED + # - name: read ID + # - score = prob * 1000 + + $start = $loc; + $end = $start + $read_length - 1; + my $name = $id_final; + $score = $prob * 1000; + + print OUT "$chrname\t$start\t$end\t$name\t$score\t$str\n"; + } elsif ( $outfile_format eq "gff" ) { + # GFF + # - source: "CSEM" + # - feature: read ID + # - score = prob * 1000 + + $start = $loc; + $end = $start + $read_length - 1; + my $source = "CSEM"; + my $feature = $id_final; + $score = $prob * 1000; + + print OUT "$chrname\t$source\t$feature\t$start\t$end\t$score\t$str\t.\t.\n"; + } else { + print "Inappropriate output file format!\n"; + exit 1; +} +} + +close( IN ); +close( OUT ); |